diff mbox

[ARM,PR65768] Keep constants in register when expanding

Message ID 552E17C7.80800@linaro.org
State New
Headers show

Commit Message

Kugan Vivekanandarajah April 15, 2015, 7:48 a.m. UTC
As mentioned in PR65768, ARM gcc generates suboptimal code for constant
Uses in loop. Part of the reason is that ARM back-end is splitting
constants during expansion of RTL, making it hard for the RTL
optimization passes to optimize it. Zhenqiang posted a patch at
https://gcc.gnu.org/ml/gcc-patches/2014-08/msg00325.html to fix this

As mentioned in PR65768, I tried with few more test-cases and enhanced
it. Regression tested on arm-none-linux-gnu and no new regressions. Is
this OK for trunk?

Thanks,
Kugan


gcc/ChangeLog:

2015-04-15  Kugan Vivekanandarajah  <kuganv@linaro.org>
	    Zhenqiang Chen  <zhenqiang.chen@linaro.org>

	PR target/65768
	* config/arm/arm-protos.h (const_ok_for_split): New definition.
	* config/arm/arm.c (const_ok_for_split): New function.
	* config/arm/arm.md (subsi3, andsi3, iorsi3, xorsi3, movsi): Keep some
	 large constants in register instead of splitting them.

gcc/testsuite/ChangeLog:

2015-04-15  Kugan Vivekanandarajah  <kuganv@linaro.org>
	    Zhenqiang Chen  <zhenqiang.chen@linaro.org>

	PR target/65768
	* gcc.target/arm/maskdata.c: New test.

Comments

Kugan Vivekanandarajah April 15, 2015, 8:28 a.m. UTC | #1
On 15/04/15 18:21, Kyrill Tkachov wrote:
> Hi Kugan,
> 
> On 15/04/15 08:48, Kugan wrote:
>> As mentioned in PR65768, ARM gcc generates suboptimal code for constant
>> Uses in loop. Part of the reason is that ARM back-end is splitting
>> constants during expansion of RTL, making it hard for the RTL
>> optimization passes to optimize it. Zhenqiang posted a patch at
>> https://gcc.gnu.org/ml/gcc-patches/2014-08/msg00325.html to fix this
>>
>> As mentioned in PR65768, I tried with few more test-cases and enhanced
>> it. Regression tested on arm-none-linux-gnu and no new regressions. Is
>> this OK for trunk?
> 
> Can you please post the code generated for the testcase
> before and after the patch for the record?

Hi Kyrill,

Before:

maskdata:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	@ link register save eliminated.
	cmp	r1, #0
	bxle	lr
	add	r1, r0, r1, lsl #2
.L3:
	ldr	r3, [r1, #-4]!
	cmp	r1, r0
	bic	r3, r3, #-16777216
	bic	r3, r3, #65280
	str	r3, [r1]
	bne	.L3
	bx	lr



After (using the the cprop patch also):

maskdata:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	@ link register save eliminated.
	cmp	r1, #0
	bxle	lr
	mov	r2, #255
	add	r1, r0, r1, lsl #2
	movt	r2, 255
.L3:
	ldr	r3, [r1, #-4]!
	cmp	r1, r0
	and	r3, r3, r2
	str	r3, [r1]
	bne	.L3
	bx	lr



Thanks,
Kugan



> 
> Thanks,
> Kyrill
> 
> 
>>
>> Thanks,
>> Kugan
>>
>>
>> gcc/ChangeLog:
>>
>> 2015-04-15  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>         Zhenqiang Chen  <zhenqiang.chen@linaro.org>
>>
>>     PR target/65768
>>     * config/arm/arm-protos.h (const_ok_for_split): New definition.
>>     * config/arm/arm.c (const_ok_for_split): New function.
>>     * config/arm/arm.md (subsi3, andsi3, iorsi3, xorsi3, movsi): Keep
>> some
>>      large constants in register instead of splitting them.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2015-04-15  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>         Zhenqiang Chen  <zhenqiang.chen@linaro.org>
>>
>>     PR target/65768
>>     * gcc.target/arm/maskdata.c: New test.
>
diff mbox

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 16eb854..1b131a9 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -58,6 +58,7 @@  extern bool arm_modes_tieable_p (machine_mode, machine_mode);
 extern int const_ok_for_arm (HOST_WIDE_INT);
 extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
 extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
+extern int const_ok_for_split (HOST_WIDE_INT, enum rtx_code);
 extern int arm_split_constant (RTX_CODE, machine_mode, rtx,
 			       HOST_WIDE_INT, rtx, rtx, int);
 extern int legitimate_pic_operand_p (rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 8fd1388..0c13666 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3745,6 +3745,41 @@  const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
     }
 }
 
+/* Return true if I is a valid constant for split with the operation CODE.
+   The condition should align with the constrain of the corresponding
+   define_insn_and_split pattern to make sure later pass can optimize
+   the constants.  */
+int
+const_ok_for_split (HOST_WIDE_INT i, enum rtx_code code)
+{
+  if (optimize < 2
+      || !can_create_pseudo_p ()
+      || const_ok_for_arm (i)
+	 /* Since expand pass always uses "sign-extend" to get the value
+	    (trunc_int_for_mode called from immed_wide_int_const) for rtl,
+	    and logs show most negative values are UNSIGNED when they are
+	    TREE node. And combine pass is smart enough to recover the
+	    negative value to positive value.  */
+      || ((i < 0) && const_ok_for_arm (-i)))
+    return 1;
+
+  switch (code)
+    {
+    case AND:
+      /* zero_extendhi instruction is efficient.  */
+      return const_ok_for_arm (~i) || (i == 0xffff);
+
+    case IOR:
+      return TARGET_THUMB2 && const_ok_for_arm (~i);
+
+    case SET:
+      return const_ok_for_arm (i) || const_ok_for_arm (~i);
+
+    default:
+      return 1;
+    }
+}
+
 /* Emit a sequence of insns to handle a large constant.
    CODE is the code of the operation required, it can be any of SET, PLUS,
    IOR, AND, XOR, MINUS;
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 164ac13..a169775 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1164,10 +1164,16 @@ 
     {
       if (TARGET_32BIT)
         {
-          arm_split_constant (MINUS, SImode, NULL_RTX,
-	                      INTVAL (operands[1]), operands[0],
-	  		      operands[2], optimize && can_create_pseudo_p ());
-          DONE;
+	  if (!const_ok_for_split (INTVAL (operands[1]), MINUS))
+	    operands[1] = force_reg (SImode, operands[1]);
+	  else
+	    {
+	      arm_split_constant (MINUS, SImode, NULL_RTX,
+				  INTVAL (operands[1]), operands[0],
+				  operands[2],
+				  optimize && can_create_pseudo_p ());
+	      DONE;
+	    }
 	}
       else /* TARGET_THUMB1 */
         operands[1] = force_reg (SImode, operands[1]);
@@ -2078,14 +2084,19 @@ 
 	      operands[1] = convert_to_mode (QImode, operands[1], 1);
 	      emit_insn (gen_thumb2_zero_extendqisi2_v6 (operands[0],
 							 operands[1]));
+	      DONE;
 	    }
+	  else if (!const_ok_for_split (INTVAL (operands[2]), AND))
+	    operands[2] = force_reg (SImode, operands[2]);
 	  else
-	    arm_split_constant (AND, SImode, NULL_RTX,
-				INTVAL (operands[2]), operands[0],
-				operands[1],
-				optimize && can_create_pseudo_p ());
+	    {
+	      arm_split_constant (AND, SImode, NULL_RTX,
+				  INTVAL (operands[2]), operands[0],
+				  operands[1],
+				  optimize && can_create_pseudo_p ());
 
-          DONE;
+	      DONE;
+	    }
         }
     }
   else /* TARGET_THUMB1 */
@@ -2884,10 +2895,16 @@ 
     {
       if (TARGET_32BIT)
         {
-          arm_split_constant (IOR, SImode, NULL_RTX,
-	                      INTVAL (operands[2]), operands[0], operands[1],
-			      optimize && can_create_pseudo_p ());
-          DONE;
+	  if (!const_ok_for_split (INTVAL (operands[2]), IOR))
+	    operands[2] = force_reg (SImode, operands[2]);
+	  else
+	    {
+	      arm_split_constant (IOR, SImode, NULL_RTX,
+				  INTVAL (operands[2]), operands[0],
+				  operands[1],
+				  optimize && can_create_pseudo_p ());
+	      DONE;
+	    }
 	}
       else /* TARGET_THUMB1 */
         {
@@ -3054,10 +3071,16 @@ 
     {
       if (TARGET_32BIT)
         {
-          arm_split_constant (XOR, SImode, NULL_RTX,
-	                      INTVAL (operands[2]), operands[0], operands[1],
-			      optimize && can_create_pseudo_p ());
-          DONE;
+	  if (!const_ok_for_split (INTVAL (operands[2]), XOR))
+	    operands[2] = force_reg (SImode, operands[2]);
+	  else
+	    {
+	      arm_split_constant (XOR, SImode, NULL_RTX,
+				  INTVAL (operands[2]), operands[0],
+				  operands[1],
+				  optimize && can_create_pseudo_p ());
+	      DONE;
+	    }
 	}
       else /* TARGET_THUMB1 */
         {
@@ -5554,10 +5577,18 @@ 
           && !(const_ok_for_arm (INTVAL (operands[1]))
                || const_ok_for_arm (~INTVAL (operands[1]))))
         {
-           arm_split_constant (SET, SImode, NULL_RTX,
-	                       INTVAL (operands[1]), operands[0], NULL_RTX,
-			       optimize && can_create_pseudo_p ());
-          DONE;
+	   if (!const_ok_for_split (INTVAL (operands[1]), SET))
+	     {
+		emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+		DONE;
+	     }
+	  else
+	     {
+		arm_split_constant (SET, SImode, NULL_RTX,
+	                            INTVAL (operands[1]), operands[0], NULL_RTX,
+			            optimize && can_create_pseudo_p ());
+		DONE;
+	     }
         }
     }
   else /* TARGET_THUMB1...  */
diff --git a/gcc/testsuite/gcc.target/arm/maskdata.c b/gcc/testsuite/gcc.target/arm/maskdata.c
index e69de29..6d6bb39 100644
--- a/gcc/testsuite/gcc.target/arm/maskdata.c
+++ b/gcc/testsuite/gcc.target/arm/maskdata.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options " -O2 -fno-gcse " }  */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+#define MASK 0xff00ff
+void maskdata (int * data, int len)
+{
+   int i = len;
+   for (; i > 0; i -= 2)
+    {
+      data[i] &= MASK;
+      data[i + 1] &= MASK;
+    }
+}
+/* { dg-final { scan-assembler-not "65280" } } */