diff mbox

[rs6000] Fix PR11488 for rs6000 target

Message ID 09df046e-555c-be35-d8ee-d5b96553e87e@linux.vnet.ibm.com
State New
Headers show

Commit Message

Pat Haugen Dec. 20, 2016, 5:27 p.m. UTC
This patch attempts to fix problems with the first scheduling pass creating too much register pressure. It does this by enabling the target hook to compute the pressure classes for rs6000 target since the first thing I observed while investigating the testcase in the subject PR is that IRA was picking NON_SPECIAL_REGS as a pressure class which led to the sched-pressure code computing too high of a value for number of regs available for pseudos preferring GENERAL_REGS. It also enables -fsched-pressure by default, using the 'model' algorithm.

I ran various runs of cpu20006 to determine the set of pressure classes and which sched-pressure algorithm to use. Net result is that with these patches I see 6 benchmarks improve in the 2.4-6% range but there are also a couple 2% degradations which will need follow up in GCC 8. There was also one benchmark that showed a much bigger improvement with the 'weighted' sched-pressure algorithm that also needs follow up ('weighted' was not chosen as default since it showed more degradations).

Bootstrap/regtest on powerpc64/powerpc64le. There were 2 testcases that failed (sms-3.c/sms-6.c) but I have submitted a separate patch to fix those. Ok for trunk?

-Pat


2016-12-20  Pat Haugen  <pthaugen@us.ibm.com>

	PR rtl-optimization/11488
	* common/config/rs6000/rs6000-common.c
	(rs6000_option_optimization_table): Enable -fsched-pressure.
	* config/rs6000/rs6000.c (TARGET_COMPUTE_PRESSURE_CLASSES): Define
	target hook.
	(rs6000_option_override_internal): Set default -fsched-pressure algorithm.
	(rs6000_compute_pressure_classes): Implement target hook.

Comments

Segher Boessenkool Dec. 21, 2016, 6:03 p.m. UTC | #1
On Tue, Dec 20, 2016 at 11:27:18AM -0600, Pat Haugen wrote:
> This patch attempts to fix problems with the first scheduling pass creating too much register pressure. It does this by enabling the target hook to compute the pressure classes for rs6000 target since the first thing I observed while investigating the testcase in the subject PR is that IRA was picking NON_SPECIAL_REGS as a pressure class which led to the sched-pressure code computing too high of a value for number of regs available for pseudos preferring GENERAL_REGS. It also enables -fsched-pressure by default, using the 'model' algorithm.

> 

> I ran various runs of cpu20006 to determine the set of pressure classes and which sched-pressure algorithm to use. Net result is that with these patches I see 6 benchmarks improve in the 2.4-6% range but there are also a couple 2% degradations which will need follow up in GCC 8. There was also one benchmark that showed a much bigger improvement with the 'weighted' sched-pressure algorithm that also needs follow up ('weighted' was not chosen as default since it showed more degradations).

> 

> Bootstrap/regtest on powerpc64/powerpc64le. There were 2 testcases that failed (sms-3.c/sms-6.c) but I have submitted a separate patch to fix those. Ok for trunk?


Okay.  Thanks!


Segher


> 2016-12-20  Pat Haugen  <pthaugen@us.ibm.com>

> 

> 	PR rtl-optimization/11488

> 	* common/config/rs6000/rs6000-common.c

> 	(rs6000_option_optimization_table): Enable -fsched-pressure.

> 	* config/rs6000/rs6000.c (TARGET_COMPUTE_PRESSURE_CLASSES): Define

> 	target hook.

> 	(rs6000_option_override_internal): Set default -fsched-pressure algorithm.

> 	(rs6000_compute_pressure_classes): Implement target hook.
diff mbox

Patch

Index: common/config/rs6000/rs6000-common.c
===================================================================
--- common/config/rs6000/rs6000-common.c	(revision 243651)
+++ common/config/rs6000/rs6000-common.c	(working copy)
@@ -32,6 +32,8 @@ 
 static const struct default_options rs6000_option_optimization_table[] =
   {
     { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    /* Enable -fsched-pressure for first pass instruction scheduling.  */
+    { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
     { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 
Index: config/rs6000/rs6000.c
===================================================================
--- config/rs6000/rs6000.c	(revision 243651)
+++ config/rs6000/rs6000.c	(working copy)
@@ -1807,6 +1807,9 @@  static const struct attribute_spec rs600
 #undef TARGET_LRA_P
 #define TARGET_LRA_P rs6000_lra_p
 
+#undef TARGET_COMPUTE_PRESSURE_CLASSES
+#define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
+
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
 
@@ -5100,6 +5103,12 @@  rs6000_option_override_internal (bool gl
 			     global_options.x_param_values,
 			     global_options_set.x_param_values);
 
+      /* Use the 'model' -fsched-pressure algorithm by default.  */
+      maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
+			     SCHED_PRESSURE_MODEL,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+
       /* If using typedef char *va_list, signal that
 	 __builtin_va_start (&ap, 0) can be optimized to
 	 ap = __builtin_next_arg (0).  */
@@ -37450,6 +37459,32 @@  rs6000_lra_p (void)
   return TARGET_LRA;
 }
 
+/* Compute register pressure classes.  We implement the target hook to avoid
+   IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
+   lead to incorrect estimates of number of available registers and therefor
+   increased register pressure/spill.   */
+static int
+rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
+{
+  int n;
+
+  n = 0;
+  pressure_classes[n++] = GENERAL_REGS;
+  if (TARGET_VSX)
+    pressure_classes[n++] = VSX_REGS;
+  else
+    {
+      if (TARGET_ALTIVEC)
+	pressure_classes[n++] = ALTIVEC_REGS;
+      if (TARGET_HARD_FLOAT && TARGET_FPRS)
+	pressure_classes[n++] = FLOAT_REGS;
+    }
+  pressure_classes[n++] = CR_REGS;
+  pressure_classes[n++] = SPECIAL_REGS;
+
+  return n;
+}
+
 /* Given FROM and TO register numbers, say whether this elimination is allowed.
    Frame pointer elimination is automatically handled.