Message ID | 4F3E7AF4.2080407@codesourcery.com |
---|---|
State | New |
Headers | show |
> > extern const struct tune_params *current_tune; > extern int vfp3_const_double_for_fract_bits (rtx); > + > +extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx, > + rtx); > #endif /* RTX_CODE */ > #endif /* ! GCC_ARM_PROTOS_H */ > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index c3a19e4..02dc6ca 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -25213,5 +25213,206 @@ vfp3_const_double_for_fract_bits (rtx operand) > return 0; > } > +/* The default expansion of general 64-bit shifts in core-regs is suboptimal > + on ARM, since we know that shifts by negative amounts are no-ops. > + > + It's safe for the input and output to be the same register, but > + early-clobber rules apply for the shift amount and scratch registers. > + > + Shift by register requires both scratch registers. Shift by a constant > + less than 32 in Thumb2 mode requires SCRATCH1 only. In all other cases > + the scratch registers may be NULL. > + > + Additionally, ashiftrt by a register also clobbers the CC register. */ > +void > +arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in, > + rtx amount, rtx scratch1, rtx scratch2) > +{ > + rtx out_high = gen_highpart (SImode, out); > + rtx out_low = gen_lowpart (SImode, out); > + rtx in_high = gen_highpart (SImode, in); > + rtx in_low = gen_lowpart (SImode, in); > + > + /* Bits flow from up-stream to down-stream. */ Some thing more about "upstream" and "downstream" here would be nice :) > + rtx out_up = code == ASHIFT ? out_low : out_high; > + rtx out_down = code == ASHIFT ? out_high : out_low; > + rtx in_up = code == ASHIFT ? in_low : in_high; > + rtx in_down = code == ASHIFT ? in_high : in_low; > + > + gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT); > + gcc_assert (out > + && (REG_P (out) || GET_CODE (out) == SUBREG) > + && GET_MODE (out) == DImode); > + gcc_assert (in > + && (REG_P (in) || GET_CODE (in) == SUBREG) > + && GET_MODE (in) == DImode); > + gcc_assert (amount > + && (((REG_P (amount) || GET_CODE (amount) == SUBREG) > + && GET_MODE (amount) == SImode) > + || CONST_INT_P (amount))); > + gcc_assert (scratch1 == NULL > + || (GET_CODE (scratch1) == SCRATCH) > + || (GET_MODE (scratch1) == SImode > + && REG_P (scratch1))); > + gcc_assert (scratch2 == NULL > + || (GET_CODE (scratch2) == SCRATCH) > + || (GET_MODE (scratch2) == SImode > + && REG_P (scratch2))); > + gcc_assert (!REG_P (out) || !REG_P (amount) > + || !HARD_REGISTER_P (out) > + || (REGNO (out) != REGNO (amount) > + && REGNO (out) + 1 != REGNO (amount))); > + > + /* Macros to make following code more readable. */ > + #define SUB_32(DEST,SRC) \ > + gen_addsi3 ((DEST), (SRC), gen_rtx_CONST_INT (VOIDmode, -32)) > + #define RSB_32(DEST,SRC) \ > + gen_subsi3 ((DEST), gen_rtx_CONST_INT (VOIDmode, 32), (SRC)) > + #define SUB_S_32(DEST,SRC) \ > + gen_addsi3_compare0 ((DEST), (SRC), \ > + gen_rtx_CONST_INT (VOIDmode, -32)) > + #define SET(DEST,SRC) \ > + gen_rtx_SET (SImode, (DEST), (SRC)) > + #define SHIFT(CODE,SRC,AMOUNT) \ > + gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT)) > + #define LSHIFT(CODE,SRC,AMOUNT) \ > + gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \ > + SImode, (SRC), (AMOUNT)) > + #define REV_LSHIFT(CODE,SRC,AMOUNT) \ > + gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \ > + SImode, (SRC), (AMOUNT)) > + #define ORR(A,B) \ > + gen_rtx_IOR (SImode, (A), (B)) > + #define BRANCH(COND,LABEL) \ > + gen_arm_cond_branch ((LABEL), \ > + gen_rtx_ ## COND (CCmode, cc_reg, \ > + const0_rtx), \ > + cc_reg) > + > + if (CONST_INT_P (amount)) > + { > + /* Shifts by a constant amount. */ > + if (INTVAL (amount) <= 0) > + /* Match what shift-by-register would do. */ > + emit_insn (gen_movdi (out, in)); > + else if (INTVAL (amount) >= 64) > + { > + /* Match what shift-by-register would do. */ > + if (code == ASHIFTRT) > + { > + rtx const31_rtx = gen_rtx_CONST_INT (VOIDmode, 31); > + emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx))); > + emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx))); > + } > + else > + emit_insn (gen_movdi (out, const0_rtx)); > + } > + else if (INTVAL (amount) < 32) > + { > + /* Shifts by a constant less than 32. */ > + rtx reverse_amount = gen_rtx_CONST_INT (VOIDmode, > + 32 - INTVAL (amount)); > + > + emit_insn (SET (out_down, LSHIFT (code, in_down, amount))); > + emit_insn (SET (out_down, > + ORR (REV_LSHIFT (code, in_up, reverse_amount), > + out_down))); > + emit_insn (SET (out_up, SHIFT (code, in_up, amount))); > + } > + else > + { > + /* Shifts by a constant greater than 31. */ > + rtx adj_amount = gen_rtx_CONST_INT (VOIDmode, INTVAL (amount) - 32); > + > + emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount))); > + if (code == ASHIFTRT) > + emit_insn (gen_ashrsi3 (out_up, in_up, > + gen_rtx_CONST_INT (VOIDmode, 31))); > + else > + emit_insn (SET (out_up, const0_rtx)); > + } > + } > + else > + { > + /* Shifts by a variable amount. */ > + rtx cc_reg = gen_rtx_REG (CC_NCVmode, CC_REGNUM); This isn't something I'm terribly confident about. I think I'd rather use CC_NOOVmode or in CCmode in this case I think (in this case you only care that the value as a result of subs r0, r1, 32 is positive or 0) so it's possibly ok to do so. GE with CC_NCV mode really doesn't make sense as this expects only N, C and V flags to be set but GE requires the Z bit as well if you went for it. > + gcc_assert (scratch1 && REG_P (scratch1)); > + gcc_assert (scratch2 && REG_P (scratch2)); > + > + switch (code) > + { > + case ASHIFT: > + emit_insn (SUB_32 (scratch1, amount)); > + emit_insn (RSB_32 (scratch2, amount)); > + break; > + case ASHIFTRT: > + emit_insn (RSB_32 (scratch1, amount)); > + emit_insn (SUB_S_32 (scratch2, amount)); > + break; > + case LSHIFTRT: > + emit_insn (RSB_32 (scratch1, amount)); > + emit_insn (SUB_32 (scratch2, amount)); > + break; > + default: > + gcc_unreachable (); > + } > + > + emit_insn (SET (out_down, LSHIFT (code, in_down, amount))); > + > + if (!TARGET_THUMB2) > + { > + /* If this were only called during expand we could just use the else > + case and let combine deal with it, but this can also be called > + from post-reload splitters. */ > + emit_insn (SET (out_down, > + ORR (SHIFT (ASHIFT, in_up, scratch1), out_down))); > + if (code == ASHIFTRT) > + { > + rtx done_label = gen_label_rtx (); > + emit_jump_insn (BRANCH (LT, done_label)); > + emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2), > + out_down))); > + emit_label (done_label); > + } > + else > + emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2), > + out_down))); > + } > + else > + { > + /* Thumb2 can't do shift and or in one insn. */ > + emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1))); > + emit_insn (gen_iorsi3 (out_down, out_down, scratch1)); > + > + if (code == ASHIFTRT) > + { > + rtx done_label = gen_label_rtx (); > + emit_jump_insn (BRANCH (LT, done_label)); > + emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2))); > + emit_insn (SET (out_down, ORR (out_down, scratch2))); > + emit_label (done_label); > + } > + else > + { > + emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2))); > + emit_insn (gen_iorsi3 (out_down, out_down, scratch2)); > + } > + } > + > + emit_insn (SET (out_up, SHIFT (code, in_up, amount))); > + } > + > + #undef SUB_32 > + #undef RSB_32 > + #undef SUB_S_32 > + #undef SET > + #undef SHIFT > + #undef LSHIFT > + #undef REV_LSHIFT > + #undef ORR > + #undef BRANCH > +} > + > #include "gt-arm.h" > diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md > index 751997f..7910bae 100644 > --- a/gcc/config/arm/arm.md > +++ b/gcc/config/arm/arm.md > @@ -3466,21 +3466,37 @@ > (match_operand:SI 2 "reg_or_int_operand" "")))] > "TARGET_32BIT" > " > - if (GET_CODE (operands[2]) == CONST_INT) > + if (!CONST_INT_P (operands[2]) > + && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK))) > + ; /* No special preparation statements; expand pattern as above. */ > + else > { > - if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1) > + rtx scratch1, scratch2; > + > + if (GET_CODE (operands[2]) == CONST_INT > + && (HOST_WIDE_INT) INTVAL (operands[2]) == 1) > { > emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); > DONE; > } > - /* Ideally we shouldn't fail here if we could know that operands[1] > - ends up already living in an iwmmxt register. Otherwise it's > - cheaper to have the alternate code being generated than moving > - values to iwmmxt regs and back. */ > - FAIL; > + > + /* Ideally we should use iwmmxt here if we could know that operands[1] > + ends up already living in an iwmmxt register. Otherwise it's > + cheaper to have the alternate code being generated than moving > + values to iwmmxt regs and back. */ > + > + /* If we're optimizing for size, we prefer the libgcc calls. */ > + if (optimize_function_for_size_p (cfun)) > + FAIL; > + > + /* Expand operation using core-registers. > + 'FAIL' would achieve the same thing, but this is a bit smarter. */ > + scratch1 = gen_reg_rtx (SImode); > + scratch2 = gen_reg_rtx (SImode); > + arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], > + operands[2], scratch1, scratch2); > + DONE; > } > - else if (!TARGET_REALLY_IWMMXT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)) > - FAIL; > " > ) > @@ -3525,21 +3541,37 @@ > (match_operand:SI 2 "reg_or_int_operand" "")))] > "TARGET_32BIT" > " > - if (GET_CODE (operands[2]) == CONST_INT) > + if (!CONST_INT_P (operands[2]) > + && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK))) > + ; /* No special preparation statements; expand pattern as above. */ > + else > { > - if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1) > + rtx scratch1, scratch2; > + > + if (GET_CODE (operands[2]) == CONST_INT > + && (HOST_WIDE_INT) INTVAL (operands[2]) == 1) > { > emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1])); > DONE; > } > - /* Ideally we shouldn't fail here if we could know that operands[1] > - ends up already living in an iwmmxt register. Otherwise it's > - cheaper to have the alternate code being generated than moving > - values to iwmmxt regs and back. */ > - FAIL; > + > + /* Ideally we should use iwmmxt here if we could know that operands[1] > + ends up already living in an iwmmxt register. Otherwise it's > + cheaper to have the alternate code being generated than moving > + values to iwmmxt regs and back. */ > + > + /* If we're optimizing for size, we prefer the libgcc calls. */ > + if (optimize_function_for_size_p (cfun)) > + FAIL; > + > + /* Expand operation using core-registers. > + 'FAIL' would achieve the same thing, but this is a bit smarter. */ > + scratch1 = gen_reg_rtx (SImode); > + scratch2 = gen_reg_rtx (SImode); > + arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1], > + operands[2], scratch1, scratch2); > + DONE; > } > - else if (!TARGET_REALLY_IWMMXT) > - FAIL; > " > ) > @@ -3582,21 +3614,37 @@ > (match_operand:SI 2 "reg_or_int_operand" "")))] > "TARGET_32BIT" > " > - if (GET_CODE (operands[2]) == CONST_INT) > + if (!CONST_INT_P (operands[2]) > + && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK))) > + ; /* No special preparation statements; expand pattern as above. */ > + else > { > - if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1) > + rtx scratch1, scratch2; > + > + if (GET_CODE (operands[2]) == CONST_INT Use CONST_INT_P (operands[2]) instead. Ok with those changes. regards Ramana
2012-02-17 Andrew Stubbs <ams@codesourcery.com> gcc/ * config/arm/arm-protos.h (arm_emit_coreregs_64bit_shift): New prototype. * config/arm/arm.c (arm_emit_coreregs_64bit_shift): New function. * config/arm/arm.md (ashldi3): Use arm_emit_coreregs_64bit_shift. (ashrdi3,lshrdi3): Likewise. (arm_cond_branch): Remove '*' to enable gen_arm_cond_branch. --- gcc/config/arm/arm-protos.h | 3 + gcc/config/arm/arm.c | 201 +++++++++++++++++++++++++++++++++++++++++++ gcc/config/arm/arm.md | 104 ++++++++++++++++------ 3 files changed, 280 insertions(+), 28 deletions(-) diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 296550a..df8d7a9 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -242,6 +242,9 @@ struct tune_params extern const struct tune_params *current_tune; extern int vfp3_const_double_for_fract_bits (rtx); + +extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx, + rtx); #endif /* RTX_CODE */ #endif /* ! GCC_ARM_PROTOS_H */ diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index c3a19e4..02dc6ca 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -25213,5 +25213,206 @@ vfp3_const_double_for_fract_bits (rtx operand) return 0; } +/* The default expansion of general 64-bit shifts in core-regs is suboptimal + on ARM, since we know that shifts by negative amounts are no-ops. + + It's safe for the input and output to be the same register, but + early-clobber rules apply for the shift amount and scratch registers. + + Shift by register requires both scratch registers. Shift by a constant + less than 32 in Thumb2 mode requires SCRATCH1 only. In all other cases + the scratch registers may be NULL. + + Additionally, ashiftrt by a register also clobbers the CC register. */ +void +arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in, + rtx amount, rtx scratch1, rtx scratch2) +{ + rtx out_high = gen_highpart (SImode, out); + rtx out_low = gen_lowpart (SImode, out); + rtx in_high = gen_highpart (SImode, in); + rtx in_low = gen_lowpart (SImode, in); + + /* Bits flow from up-stream to down-stream. */ + rtx out_up = code == ASHIFT ? out_low : out_high; + rtx out_down = code == ASHIFT ? out_high : out_low; + rtx in_up = code == ASHIFT ? in_low : in_high; + rtx in_down = code == ASHIFT ? in_high : in_low; + + gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT); + gcc_assert (out + && (REG_P (out) || GET_CODE (out) == SUBREG) + && GET_MODE (out) == DImode); + gcc_assert (in + && (REG_P (in) || GET_CODE (in) == SUBREG) + && GET_MODE (in) == DImode); + gcc_assert (amount + && (((REG_P (amount) || GET_CODE (amount) == SUBREG) + && GET_MODE (amount) == SImode) + || CONST_INT_P (amount))); + gcc_assert (scratch1 == NULL + || (GET_CODE (scratch1) == SCRATCH) + || (GET_MODE (scratch1) == SImode + && REG_P (scratch1))); + gcc_assert (scratch2 == NULL + || (GET_CODE (scratch2) == SCRATCH) + || (GET_MODE (scratch2) == SImode + && REG_P (scratch2))); + gcc_assert (!REG_P (out) || !REG_P (amount) + || !HARD_REGISTER_P (out) + || (REGNO (out) != REGNO (amount) + && REGNO (out) + 1 != REGNO (amount))); + + /* Macros to make following code more readable. */ + #define SUB_32(DEST,SRC) \ + gen_addsi3 ((DEST), (SRC), gen_rtx_CONST_INT (VOIDmode, -32)) + #define RSB_32(DEST,SRC) \ + gen_subsi3 ((DEST), gen_rtx_CONST_INT (VOIDmode, 32), (SRC)) + #define SUB_S_32(DEST,SRC) \ + gen_addsi3_compare0 ((DEST), (SRC), \ + gen_rtx_CONST_INT (VOIDmode, -32)) + #define SET(DEST,SRC) \ + gen_rtx_SET (SImode, (DEST), (SRC)) + #define SHIFT(CODE,SRC,AMOUNT) \ + gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT)) + #define LSHIFT(CODE,SRC,AMOUNT) \ + gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \ + SImode, (SRC), (AMOUNT)) + #define REV_LSHIFT(CODE,SRC,AMOUNT) \ + gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \ + SImode, (SRC), (AMOUNT)) + #define ORR(A,B) \ + gen_rtx_IOR (SImode, (A), (B)) + #define BRANCH(COND,LABEL) \ + gen_arm_cond_branch ((LABEL), \ + gen_rtx_ ## COND (CCmode, cc_reg, \ + const0_rtx), \ + cc_reg) + + if (CONST_INT_P (amount)) + { + /* Shifts by a constant amount. */ + if (INTVAL (amount) <= 0) + /* Match what shift-by-register would do. */ + emit_insn (gen_movdi (out, in)); + else if (INTVAL (amount) >= 64) + { + /* Match what shift-by-register would do. */ + if (code == ASHIFTRT) + { + rtx const31_rtx = gen_rtx_CONST_INT (VOIDmode, 31); + emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx))); + emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx))); + } + else + emit_insn (gen_movdi (out, const0_rtx)); + } + else if (INTVAL (amount) < 32) + { + /* Shifts by a constant less than 32. */ + rtx reverse_amount = gen_rtx_CONST_INT (VOIDmode, + 32 - INTVAL (amount)); + + emit_insn (SET (out_down, LSHIFT (code, in_down, amount))); + emit_insn (SET (out_down, + ORR (REV_LSHIFT (code, in_up, reverse_amount), + out_down))); + emit_insn (SET (out_up, SHIFT (code, in_up, amount))); + } + else + { + /* Shifts by a constant greater than 31. */ + rtx adj_amount = gen_rtx_CONST_INT (VOIDmode, INTVAL (amount) - 32); + + emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount))); + if (code == ASHIFTRT) + emit_insn (gen_ashrsi3 (out_up, in_up, + gen_rtx_CONST_INT (VOIDmode, 31))); + else + emit_insn (SET (out_up, const0_rtx)); + } + } + else + { + /* Shifts by a variable amount. */ + rtx cc_reg = gen_rtx_REG (CC_NCVmode, CC_REGNUM); + + gcc_assert (scratch1 && REG_P (scratch1)); + gcc_assert (scratch2 && REG_P (scratch2)); + + switch (code) + { + case ASHIFT: + emit_insn (SUB_32 (scratch1, amount)); + emit_insn (RSB_32 (scratch2, amount)); + break; + case ASHIFTRT: + emit_insn (RSB_32 (scratch1, amount)); + emit_insn (SUB_S_32 (scratch2, amount)); + break; + case LSHIFTRT: + emit_insn (RSB_32 (scratch1, amount)); + emit_insn (SUB_32 (scratch2, amount)); + break; + default: + gcc_unreachable (); + } + + emit_insn (SET (out_down, LSHIFT (code, in_down, amount))); + + if (!TARGET_THUMB2) + { + /* If this were only called during expand we could just use the else + case and let combine deal with it, but this can also be called + from post-reload splitters. */ + emit_insn (SET (out_down, + ORR (SHIFT (ASHIFT, in_up, scratch1), out_down))); + if (code == ASHIFTRT) + { + rtx done_label = gen_label_rtx (); + emit_jump_insn (BRANCH (LT, done_label)); + emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2), + out_down))); + emit_label (done_label); + } + else + emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2), + out_down))); + } + else + { + /* Thumb2 can't do shift and or in one insn. */ + emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1))); + emit_insn (gen_iorsi3 (out_down, out_down, scratch1)); + + if (code == ASHIFTRT) + { + rtx done_label = gen_label_rtx (); + emit_jump_insn (BRANCH (LT, done_label)); + emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2))); + emit_insn (SET (out_down, ORR (out_down, scratch2))); + emit_label (done_label); + } + else + { + emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2))); + emit_insn (gen_iorsi3 (out_down, out_down, scratch2)); + } + } + + emit_insn (SET (out_up, SHIFT (code, in_up, amount))); + } + + #undef SUB_32 + #undef RSB_32 + #undef SUB_S_32 + #undef SET + #undef SHIFT + #undef LSHIFT + #undef REV_LSHIFT + #undef ORR + #undef BRANCH +} + #include "gt-arm.h" diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 751997f..7910bae 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -3466,21 +3466,37 @@ (match_operand:SI 2 "reg_or_int_operand" "")))] "TARGET_32BIT" " - if (GET_CODE (operands[2]) == CONST_INT) + if (!CONST_INT_P (operands[2]) + && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK))) + ; /* No special preparation statements; expand pattern as above. */ + else { - if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1) + rtx scratch1, scratch2; + + if (GET_CODE (operands[2]) == CONST_INT + && (HOST_WIDE_INT) INTVAL (operands[2]) == 1) { emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); DONE; } - /* Ideally we shouldn't fail here if we could know that operands[1] - ends up already living in an iwmmxt register. Otherwise it's - cheaper to have the alternate code being generated than moving - values to iwmmxt regs and back. */ - FAIL; + + /* Ideally we should use iwmmxt here if we could know that operands[1] + ends up already living in an iwmmxt register. Otherwise it's + cheaper to have the alternate code being generated than moving + values to iwmmxt regs and back. */ + + /* If we're optimizing for size, we prefer the libgcc calls. */ + if (optimize_function_for_size_p (cfun)) + FAIL; + + /* Expand operation using core-registers. + 'FAIL' would achieve the same thing, but this is a bit smarter. */ + scratch1 = gen_reg_rtx (SImode); + scratch2 = gen_reg_rtx (SImode); + arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], + operands[2], scratch1, scratch2); + DONE; } - else if (!TARGET_REALLY_IWMMXT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)) - FAIL; " ) @@ -3525,21 +3541,37 @@ (match_operand:SI 2 "reg_or_int_operand" "")))] "TARGET_32BIT" " - if (GET_CODE (operands[2]) == CONST_INT) + if (!CONST_INT_P (operands[2]) + && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK))) + ; /* No special preparation statements; expand pattern as above. */ + else { - if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1) + rtx scratch1, scratch2; + + if (GET_CODE (operands[2]) == CONST_INT + && (HOST_WIDE_INT) INTVAL (operands[2]) == 1) { emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1])); DONE; } - /* Ideally we shouldn't fail here if we could know that operands[1] - ends up already living in an iwmmxt register. Otherwise it's - cheaper to have the alternate code being generated than moving - values to iwmmxt regs and back. */ - FAIL; + + /* Ideally we should use iwmmxt here if we could know that operands[1] + ends up already living in an iwmmxt register. Otherwise it's + cheaper to have the alternate code being generated than moving + values to iwmmxt regs and back. */ + + /* If we're optimizing for size, we prefer the libgcc calls. */ + if (optimize_function_for_size_p (cfun)) + FAIL; + + /* Expand operation using core-registers. + 'FAIL' would achieve the same thing, but this is a bit smarter. */ + scratch1 = gen_reg_rtx (SImode); + scratch2 = gen_reg_rtx (SImode); + arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1], + operands[2], scratch1, scratch2); + DONE; } - else if (!TARGET_REALLY_IWMMXT) - FAIL; " ) @@ -3582,21 +3614,37 @@ (match_operand:SI 2 "reg_or_int_operand" "")))] "TARGET_32BIT" " - if (GET_CODE (operands[2]) == CONST_INT) + if (!CONST_INT_P (operands[2]) + && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK))) + ; /* No special preparation statements; expand pattern as above. */ + else { - if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1) + rtx scratch1, scratch2; + + if (GET_CODE (operands[2]) == CONST_INT + && (HOST_WIDE_INT) INTVAL (operands[2]) == 1) { emit_insn (gen_arm_lshrdi3_1bit (operands[0], operands[1])); DONE; } - /* Ideally we shouldn't fail here if we could know that operands[1] - ends up already living in an iwmmxt register. Otherwise it's - cheaper to have the alternate code being generated than moving - values to iwmmxt regs and back. */ - FAIL; + + /* Ideally we should use iwmmxt here if we could know that operands[1] + ends up already living in an iwmmxt register. Otherwise it's + cheaper to have the alternate code being generated than moving + values to iwmmxt regs and back. */ + + /* If we're optimizing for size, we prefer the libgcc calls. */ + if (optimize_function_for_size_p (cfun)) + FAIL; + + /* Expand operation using core-registers. + 'FAIL' would achieve the same thing, but this is a bit smarter. */ + scratch1 = gen_reg_rtx (SImode); + scratch2 = gen_reg_rtx (SImode); + arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1], + operands[2], scratch1, scratch2); + DONE; } - else if (!TARGET_REALLY_IWMMXT) - FAIL; " ) @@ -7645,7 +7693,7 @@ ;; Patterns to match conditional branch insns. ;; -(define_insn "*arm_cond_branch" +(define_insn "arm_cond_branch" [(set (pc) (if_then_else (match_operator 1 "arm_comparison_operator" [(match_operand 2 "cc_register" "") (const_int 0)])
On 16/02/12 15:33, Andrew Stubbs wrote: > OK for 4.8? I forgot to address Ramana's comment about optimize_size. This update fixes that and leaves everything else untouched. OK? Andrew