diff mbox

[SMS,3/4] Optimize stage count

Message ID BANLkTinZ6mdH610Q0nRcSNjx95GTFVoVdQ@mail.gmail.com
State New
Headers show

Commit Message

Revital Eres May 19, 2011, 4:44 a.m. UTC
Hello,

The attach patch tries to achieve optimised SC by normalizing the partial
schedule (having the cycles start from cycle zero). The branch location
must be placed in row ii-1 in the final scheduling.  If that's not the
case after the normalization then it tries to move the branch to that
row if possible, while preserving the scheduling of the rest of the
instructions.

The patch was tested together with the rest of the patches in this series.
On ppc64-redhat-linux regtest as well as bootstrap with SMS flags
enabling SMS also on loops with stage count 1.  Regtested on SPU.
On arm-linux-gnueabi regtseted on c,c++. Bootstrap c language with SMS
flags enabling SMS also on loops with stage count 1.

OK for mainline?

Thanks,
Revital

Changelog:

       * modulo-sched.c (calculate_stage_count,
        calculate_must_precede_follow, get_sched_window,
        try_scheduling_node_in_cycle, remove_node_from_ps): Add
        declaration.
        (update_node_sched_params, set_must_precede_follow, optimize_sc):
        New functions.
        (reset_sched_times): Call update_node_sched_params.
        (sms_schedule): Call optimize_sc.
        (get_sched_window): Change function arguments.
        (sms_schedule_by_order): Update call to get_sched_window.
        all set_must_precede_follow.
        (calculate_stage_count): Add function argument.
diff mbox

Patch

Index: modulo-sched.c
===================================================================
--- modulo-sched.c	(revision 173786)
+++ modulo-sched.c	(working copy)
@@ -198,7 +198,16 @@  static void generate_prolog_epilog (part
                                     rtx, rtx);
 static void duplicate_insns_of_cycles (partial_schedule_ptr,
 				       int, int, int, rtx);
-static int calculate_stage_count (partial_schedule_ptr ps);
+static int calculate_stage_count (partial_schedule_ptr, int);
+static void calculate_must_precede_follow (ddg_node_ptr, int, int,
+					   int, int, sbitmap, sbitmap, sbitmap);
+static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, 
+			     sbitmap, int, int *, int *, int *);
+static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
+					  int, int, sbitmap, int *, sbitmap,
+					  sbitmap);
+static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
+
 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
 #define SCHED_FIRST_REG_MOVE(x) \
@@ -572,6 +581,33 @@  free_undo_replace_buff (struct undo_repl
     }
 }
 
+/* Update the sched_params for node U using the II,
+   the CYCLE of U and MIN_CYCLE.  */
+static void
+update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle)
+{
+  int sc_until_cycle_zero;
+  int stage;
+
+  SCHED_TIME (u) = cycle;
+  SCHED_ROW (u) = SMODULO (cycle, ii);
+
+  /* The calculation of stage count is done adding the number
+     of stages before cycle zero and after cycle zero.  */
+  sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
+
+  if (SCHED_TIME (u) < 0)
+    {
+      stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
+      SCHED_STAGE (u) = sc_until_cycle_zero - stage;
+    }
+  else
+    {
+      stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
+      SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
+    }
+}
+
 /* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
    SCHED_ROW and SCHED_STAGE.  Instruction scheduled on cycle AMOUNT
    will move to cycle zero.  */
@@ -588,7 +624,6 @@  reset_sched_times (partial_schedule_ptr 
 	ddg_node_ptr u = crr_insn->node;
 	int normalized_time = SCHED_TIME (u) - amount;
 	int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
-        int sc_until_cycle_zero, stage;
 
         if (dump_file)
           {
@@ -604,23 +639,9 @@  reset_sched_times (partial_schedule_ptr 
 	
 	gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
 	gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
-	SCHED_TIME (u) = normalized_time;
-	SCHED_ROW (u) = SMODULO (normalized_time, ii);
-      
-        /* The calculation of stage count is done adding the number
-           of stages before cycle zero and after cycle zero.  */
-	sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
-	
-	if (SCHED_TIME (u) < 0)
-	  {
-	    stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
-	    SCHED_STAGE (u) = sc_until_cycle_zero - stage;
-	  }
-	else
-	  {
-	    stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
-	    SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
-	  }
+
+	crr_insn->cycle = normalized_time;
+	update_node_sched_params (u, ii, normalized_time, new_min_cycle);
       }
 }
  
@@ -657,6 +678,206 @@  permute_partial_schedule (partial_schedu
 			    PREV_INSN (last));
 }
 
+/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
+   respectively only if cycle C falls in the scheduling window boundaries
+   marked by START and END cycles.  STEP is the direction of the window.
+   */
+static inline void
+set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
+			 sbitmap *tmp_precede, sbitmap must_precede, int c,
+			 int start, int end, int step)
+{
+  *tmp_precede = NULL;
+  *tmp_follow = NULL;
+
+  if (c == start)
+    {
+      if (step == 1)
+	*tmp_precede = must_precede;
+      else			/* step == -1.  */
+	*tmp_follow = must_follow;
+    }
+  if (c == end - step)
+    {
+      if (step == 1)
+	*tmp_follow = must_follow;
+      else			/* step == -1.  */
+	*tmp_precede = must_precede;
+    }
+
+}
+
+/* Return True if the branch can be moved to row ii-1 while
+   normalizing the partial schedule PS to start from cycle zero and thus
+   optimize the SC.  Otherwise return False.  */
+static bool
+optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
+{
+  int amount = PS_MIN_CYCLE (ps);
+  sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
+  int start, end, step;
+  int ii = ps->ii;
+  bool ok = false;
+  int stage_count, stage_count_curr;
+
+  /* Compare the SC after normalization and SC after bringing the branch
+     to row ii-1.  If they are equal just bail out.  */
+  stage_count = calculate_stage_count (ps, amount);
+  stage_count_curr =
+    calculate_stage_count (ps, SCHED_TIME (g->closing_branch) + 1);
+
+  if (stage_count == stage_count_curr)
+    {
+      if (dump_file)
+	fprintf (dump_file, "SMS SC already optimized.\n");
+
+      ok = false;
+      goto clear;
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "SMS Trying to optimize branch location\n");
+      fprintf (dump_file, "SMS partial schedule before trial:\n");
+      print_partial_schedule (ps, dump_file);
+    }
+
+  /* First, normailize the partial schedualing.  */
+  reset_sched_times (ps, amount);
+  rotate_partial_schedule (ps, amount);
+  if (dump_file)
+    {
+      fprintf (dump_file,
+	       "SMS partial schedule after normalization (ii, %d, SC %d):\n",
+	       ii, stage_count);
+      print_partial_schedule (ps, dump_file);
+    }
+
+  if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1)
+    {
+      ok = true;
+      goto clear;
+    }
+
+  sbitmap_ones (sched_nodes);
+
+  /* Calculate the new placement of the branch.  It should be in row
+     ii-1 and fall into it's scheduling window.  */
+  if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
+			&step, &end) == 0)
+    {
+      bool success;
+      ps_insn_ptr next_ps_i;
+      int branch_cycle = SCHED_TIME (g->closing_branch);
+      int row = SMODULO (branch_cycle, ps->ii);
+      int num_splits = 0;
+      sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
+      int c;
+
+      if (dump_file)
+	fprintf (dump_file, "\nTrying to schedule node %d "
+		 "INSN = %d  in (%d .. %d) step %d\n",
+		 g->closing_branch->cuid,
+		 (INSN_UID (g->closing_branch->insn)), start, end, step);
+
+      gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
+      if (step == 1)
+	{
+	  c = start + ii - SMODULO (start, ii) - 1;
+	  gcc_assert (c >= start);
+	  if (c >= end)
+	    {
+	      ok = false;
+	      if (dump_file)
+		fprintf (dump_file,
+			 "SMS failed to schedule branch at cycle: %d\n", c);
+	      goto clear;
+	    }
+	}
+      else
+	{
+	  c = start - SMODULO (start, ii) - 1;
+	  gcc_assert (c <= start);
+
+	  if (c <= end)
+	    {
+	      if (dump_file)
+		fprintf (dump_file,
+			 "SMS failed to schedule branch at cycle: %d\n", c);
+	      ok = false;
+	      goto clear;
+	    }
+	}
+
+      must_precede = sbitmap_alloc (g->num_nodes);
+      must_follow = sbitmap_alloc (g->num_nodes);
+
+      /* Try to schedule the branch is it's new cycle.  */
+      calculate_must_precede_follow (g->closing_branch, start, end,
+				     step, ii, sched_nodes,
+				     must_precede, must_follow);
+
+      set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+			       must_precede, c, start, end, step);
+
+      /* Find the element in the partial schedule related to the closing
+         branch so we can remove it from it's current cycle.  */
+      for (next_ps_i = ps->rows[row];
+	   next_ps_i; next_ps_i = next_ps_i->next_in_row)
+	if (next_ps_i->node->cuid == g->closing_branch->cuid)
+	  break;
+
+      gcc_assert (next_ps_i);
+      gcc_assert (remove_node_from_ps (ps, next_ps_i));
+      success =
+	try_scheduling_node_in_cycle (ps, g->closing_branch,
+				      g->closing_branch->cuid, c,
+				      sched_nodes, &num_splits,
+				      tmp_precede, tmp_follow);
+      gcc_assert (num_splits == 0);
+      if (!success)
+	{
+	  if (dump_file)
+	    fprintf (dump_file,
+		     "SMS failed to schedule branch at cycle: %d, "
+		     "bringing it back to cycle %d\n", c, branch_cycle);
+
+	  /* The branch was failed to be placed in row ii - 1.
+	     Put it back in it's original place in the partial
+	     schedualing.  */
+	  set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+				   must_precede, branch_cycle, start, end,
+				   step);
+	  success =
+	    try_scheduling_node_in_cycle (ps, g->closing_branch,
+					  g->closing_branch->cuid,
+					  branch_cycle, sched_nodes,
+					  &num_splits, tmp_precede,
+					  tmp_follow);
+	  gcc_assert (success && (num_splits == 0));
+	  ok = false;
+	}
+      else
+	{
+	  /* The branch is placed in row ii - 1.  */
+	  if (dump_file)
+	    fprintf (dump_file,
+		     "SMS success in moving branch to cycle %d\n", c);
+
+	  update_node_sched_params (g->closing_branch, ii, c,
+				    PS_MIN_CYCLE (ps));
+	  ok = true;
+	}
+
+      free (must_precede);
+      free (must_follow);
+    }
+
+clear:
+  free (sched_nodes);
+  return ok;
+}
+
 static void
 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
 			   int to_stage, int for_prolog, rtx count_reg)
@@ -1112,6 +1333,7 @@  sms_schedule (void)
       int mii, rec_mii;
       unsigned stage_count = 0;
       HOST_WIDEST_INT loop_count = 0;
+      bool opt_sc_p = false;
 
       if (! (g = g_arr[loop->num]))
         continue;
@@ -1193,14 +1415,32 @@  sms_schedule (void)
       set_node_sched_params (g);
 
       ps = sms_schedule_by_order (g, mii, maxii, node_order);
-
-       if (ps)
-       {
-         stage_count = calculate_stage_count (ps);
-         gcc_assert(stage_count >= 1);
-         PS_STAGE_COUNT(ps) = stage_count;
-       }
-
+      
+      if (ps)
+	{
+	  /* Try to achieve optimized SC by normalizing the partial
+	     schedule (having the cycles start from cycle zero). The branch
+	     location must be placed in row ii-1 in the final scheduling.
+	     If that's not the case after the normalization then try to
+	     move the branch to that row if possible.  */
+	  opt_sc_p = optimize_sc (ps, g);
+	  if (opt_sc_p)
+	    stage_count = calculate_stage_count (ps, 0);
+	  else
+	    {
+	      /* Bring the branch to cycle -1.  */
+	      int amount = SCHED_TIME (g->closing_branch) + 1;
+	      
+	      if (dump_file)
+		fprintf (dump_file, "SMS schedule branch at cycle -1\n");
+	      
+	      stage_count = calculate_stage_count (ps, amount);
+	    }
+	  
+	  gcc_assert (stage_count >= 1);
+	  PS_STAGE_COUNT (ps) = stage_count;
+	}
+      
       /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
 	 1 means that there is no interleaving between iterations thus
 	 we let the scheduling passes do the job in this case.  */
@@ -1221,12 +1461,16 @@  sms_schedule (void)
       else
 	{
 	  struct undo_replace_buff_elem *reg_move_replaces;
-          int amount = SCHED_TIME (g->closing_branch) + 1;
+
+          if (!opt_sc_p)
+            {
+	      /* Rotate the partial schedule to have the branch in row ii-1.  */
+              int amount = SCHED_TIME (g->closing_branch) + 1;
+	      
+              reset_sched_times (ps, amount);
+              rotate_partial_schedule (ps, amount);
+            }
 	  
-	  /* Set the stage boundaries.	The closing_branch was scheduled
-	     and should appear in the last (ii-1) row.  */
-	  reset_sched_times (ps, amount);
-	  rotate_partial_schedule (ps, amount);
 	  set_columns_for_ps (ps);
 
 	  canon_loop (loop);
@@ -1378,13 +1622,11 @@  sms_schedule (void)
    scheduling window is empty and zero otherwise.  */
 
 static int
-get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
+get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
 		  sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
 {
   int start, step, end;
   ddg_edge_ptr e;
-  int u = nodes_order [i];
-  ddg_node_ptr u_node = &ps->g->nodes[u];
   sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
   sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
   sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
@@ -1796,7 +2038,7 @@  sms_schedule_by_order (ddg_ptr g, int mi
 
 	  /* Try to get non-empty scheduling window.  */
 	 success = 0;
-         if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
+         if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
                                 &step, &end) == 0)
             {
               if (dump_file)
@@ -1816,21 +2058,9 @@  sms_schedule_by_order (ddg_ptr g, int mi
                   sbitmap tmp_precede = NULL;
                   sbitmap tmp_follow = NULL;
 
-                  if (c == start)
-                    {
-                      if (step == 1)
-                        tmp_precede = must_precede;
-                      else      /* step == -1.  */
-                        tmp_follow = must_follow;
-                    }
-                  if (c == end - step)
-                    {
-                      if (step == 1)
-                        tmp_follow = must_follow;
-                      else      /* step == -1.  */
-                        tmp_precede = must_precede;
-                    }
-
+                  set_must_precede_follow (&tmp_follow, must_follow, 
+		                           &tmp_precede, must_precede, 
+                                           c, start, end, step);
                   success =
                     try_scheduling_node_in_cycle (ps, u_node, u, c,
                                                   sched_nodes,
@@ -2876,12 +3106,10 @@  ps_add_node_check_conflicts (partial_sch
 }
 
 /* Calculate the stage count of the partial schedule PS.  The calculation
-   takes into account the rotation to bring the closing branch to row
-   ii-1.  */
+   takes into account the rotation amount passed in ROTATION_AMOUNT.  */
 int
-calculate_stage_count (partial_schedule_ptr ps)
+calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
 {
-  int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
   int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
   int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
   int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);