diff mbox

Don't insert pattern statements into the code (was Fix PR tree-optimization/49318)

Message ID OFD9EB1FDE.DDE5476E-ONC22578B0.00379333-C22578B0.0038C5B0@il.ibm.com
State Accepted
Headers show

Commit Message

Ira Rosen June 15, 2011, 10:20 a.m. UTC
(I am resending this, since it's been already 4 hours since I sent it first
time, and I don't see it on the list. I apologize for possible
duplication.)

> > On 14 June 2011 14:27, Richard Guenther <richard.guenther@gmail.com>
wrote:
> >
> >>>>
> >>>>   /* Mark the stmts that are involved in the pattern. */
> >>>> -  gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT);
> >>>>   set_vinfo_for_stmt (pattern_stmt,
> >>>>                      new_stmt_vec_info (pattern_stmt, loop_vinfo,
NULL));
> >>>> +  gimple_set_bb (pattern_stmt, gimple_bb (stmt));
> >>>>
> >>>> do you really need this?
> >>>
> >>> Yes, there are a lot of uses of gimple_bb (stmt). Otherwise, we'd
have
> >>> to check there that bb exists (or that this is not a pattern stmt)
and
> >>> use the bb of the original statement if not.
> >>
> >> I see.  It's not really uglier than the part where you have to
special-case
> >> them when walking use-operands, so ...
> >
> > I think it is uglier, because there are 42 cases to handle instead of
> > a single place that you mentioned. (Probably not all the 42 can be
> > really reached with a pattern stmt, but still it's a lot).
>
> Well, yes - I meant setting the BB isn't uglier which means setting BB
> is ok.
>
> Richard.
>
> > Thanks,
> > Ira
> >
> >>
> >> Still a lot better than when inserting them for real.
> >>
> >>>> Otherwise it looks reasonable.  Btw,
> >>>> we can probably remove the simple DCE done in
> >>>> slpeel_tree_peel_loop_to_edge (remove_dead_stmts_from_loop)
> >>>> with this patch.


I committed the attached patch. It also removes
remove_dead_stmts_from_loop.

Thanks,
Ira


(See attached file: pattern2.txt)
diff mbox

Patch

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 175073)
+++ ChangeLog	(working copy)
@@ -1,3 +1,31 @@ 
+2011-06-15  Ira Rosen  <ira.rosen@linaro.org>
+
+	* tree-vect-loop-manip.c (remove_dead_stmts_from_loop): Remove.
+	(slpeel_tree_peel_loop_to_edge): Don't call
+	remove_dead_stmts_from_loop.
+	* tree-vect-loop.c (vect_determine_vectorization_factor): Don't
+	remove irrelevant pattern statements.  For irrelevant statements
+	check if it is the last statement of a detected pattern, use
+	corresponding pattern statement instead.
+	(destroy_loop_vec_info): No need to remove pattern statements,
+	 only free stmt_vec_info.
+	(vect_transform_loop): For irrelevant statements check if it is
+	the last statement of a detected pattern, use corresponding
+	pattern statement instead.
+	* tree-vect-patterns.c (vect_pattern_recog_1): Don't insert
+	pattern statements.  Set basic block for the new statement.
+	(vect_pattern_recog): Update documentation.
+	* tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Scan
+	operands of pattern statements.
+	(vectorizable_call): Fix printing.  In case of a pattern statement
+	use the lhs of the original statement when creating a dummy
+	statement to replace the original call.
+	(vect_analyze_stmt): For irrelevant statements check if it is
+	the last statement of a detected pattern, use corresponding
+	pattern statement instead.
+	* tree-vect-slp.c (vect_schedule_slp_instance): For pattern
+	statements use gsi of the original statement.
+
 2011-06-14  Joseph Myers  <joseph@codesourcery.com>
 
 	* target-def.h (TARGET_HAVE_NAMED_SECTIONS): Move to
Index: tree-vect-loop-manip.c
===================================================================
--- tree-vect-loop-manip.c	(revision 175073)
+++ tree-vect-loop-manip.c	(working copy)
@@ -1105,35 +1105,6 @@  set_prologue_iterations (basic_block bb_before_fir
   first_niters = PHI_RESULT (newphi);
 }
 
-
-/* Remove dead assignments from loop NEW_LOOP.  */
-
-static void
-remove_dead_stmts_from_loop (struct loop *new_loop)
-{
-  basic_block *bbs = get_loop_body (new_loop);
-  unsigned i;
-  for (i = 0; i < new_loop->num_nodes; ++i)
-    {
-      gimple_stmt_iterator gsi;
-      for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);)
-	{
-	  gimple stmt = gsi_stmt (gsi);
-	  if (is_gimple_assign (stmt)
-	      && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
-	      && has_zero_uses (gimple_assign_lhs (stmt)))
-	    {
-	      gsi_remove (&gsi, true);
-	      release_defs (stmt);
-	    }
-	  else
-	    gsi_next (&gsi);
-	}
-    }
-  free (bbs);
-}
-
-
 /* Function slpeel_tree_peel_loop_to_edge.
 
    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
@@ -1445,13 +1416,6 @@  slpeel_tree_peel_loop_to_edge (struct loop *loop,
   BITMAP_FREE (definitions);
   delete_update_ssa ();
 
-  /* Remove all pattern statements from the loop copy.  They will confuse
-     the expander if DCE is disabled.
-     ???  The pattern recognizer should be split into an analysis and
-     a transformation phase that is then run only on the loop that is
-     going to be transformed.  */
-  remove_dead_stmts_from_loop (new_loop);
-
   adjust_vec_debug_stmts ();
 
   return new_loop;
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c	(revision 175073)
+++ tree-vect-loop.c	(working copy)
@@ -244,7 +244,7 @@  vect_determine_vectorization_factor (loop_vec_info
       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
         {
 	  tree vf_vectype;
-	  gimple stmt = gsi_stmt (si);
+	  gimple stmt = gsi_stmt (si), pattern_stmt;
 	  stmt_info = vinfo_for_stmt (stmt);
 
 	  if (vect_print_dump_info (REPORT_DETAILS))
@@ -258,20 +258,26 @@  vect_determine_vectorization_factor (loop_vec_info
 	  /* Skip stmts which do not need to be vectorized.  */
 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
 	      && !STMT_VINFO_LIVE_P (stmt_info))
-	    {
-              if (is_pattern_stmt_p (stmt_info))
+            {
+              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                  && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+                  && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+                      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
                 {
-                   /* We are not going to vectorize this pattern statement,
-                      therefore, remove it.  */
-                  gimple_stmt_iterator tmp_gsi = gsi_for_stmt (stmt);
-                  STMT_VINFO_RELATED_STMT (stmt_info) = NULL;
-                  gsi_remove (&tmp_gsi, true);
-                  free_stmt_vec_info (stmt);
+                  stmt = pattern_stmt;
+                  stmt_info = vinfo_for_stmt (pattern_stmt);
+                  if (vect_print_dump_info (REPORT_DETAILS))
+                    {
+                      fprintf (vect_dump, "==> examining pattern statement: ");
+                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+                    }
                 }
-
-	      if (vect_print_dump_info (REPORT_DETAILS))
-	        fprintf (vect_dump, "skip.");
-	      continue;
+              else
+	        {
+	          if (vect_print_dump_info (REPORT_DETAILS))
+	            fprintf (vect_dump, "skip.");
+	          continue;
+                }
 	    }
 
 	  if (gimple_get_lhs (stmt) == NULL_TREE)
@@ -828,25 +834,17 @@  destroy_loop_vec_info (loop_vec_info loop_vinfo, b
 
           if (stmt_info)
             {
-              /* Check if this is a "pattern stmt" (introduced by the
-                 vectorizer during the pattern recognition pass).  */
-              bool remove_stmt_p = false;
-              gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
-              if (orig_stmt)
-                {
-                  stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
-                  if (orig_stmt_info
-                      && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
-                    remove_stmt_p = true;
-                }
+              /* Check if this statement has a related "pattern stmt"
+                 (introduced by the vectorizer during the pattern recognition
+                 pass).  Free pattern's stmt_vec_info.  */
+              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                  && vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)))
+                free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
 
               /* Free stmt_vec_info.  */
               free_stmt_vec_info (stmt);
+            }
 
-              /* Remove dead "pattern stmts".  */
-              if (remove_stmt_p)
-                gsi_remove (&si, true);
-            }
           gsi_next (&si);
         }
     }
@@ -5131,7 +5129,7 @@  vect_transform_loop (loop_vec_info loop_vinfo)
 
       for (si = gsi_start_bb (bb); !gsi_end_p (si);)
 	{
-	  gimple stmt = gsi_stmt (si);
+	  gimple stmt = gsi_stmt (si), pattern_stmt;
 	  bool is_store;
 
 	  if (vect_print_dump_info (REPORT_DETAILS))
@@ -5156,14 +5154,25 @@  vect_transform_loop (loop_vec_info loop_vinfo)
 
 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
 	      && !STMT_VINFO_LIVE_P (stmt_info))
-	    {
-	      gsi_next (&si);
-	      continue;
+            {
+              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                  && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+                  && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+                      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+                {
+                  stmt = pattern_stmt;
+                  stmt_info = vinfo_for_stmt (stmt);
+                }
+              else
+	        {
+   	          gsi_next (&si);
+	          continue;
+                }
 	    }
 
 	  gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
-	  nunits =
-	    (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+	  nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
+                                               STMT_VINFO_VECTYPE (stmt_info));
 	  if (!STMT_SLP_TYPE (stmt_info)
 	      && nunits != (unsigned int) vectorization_factor
               && vect_print_dump_info (REPORT_DETAILS))
Index: tree-vect-patterns.c
===================================================================
--- tree-vect-patterns.c	(revision 175073)
+++ tree-vect-patterns.c	(working copy)
@@ -831,9 +831,9 @@  vect_pattern_recog_1 (
     }
 
   /* Mark the stmts that are involved in the pattern. */
-  gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT);
   set_vinfo_for_stmt (pattern_stmt,
 		      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
+  gimple_set_bb (pattern_stmt, gimple_bb (stmt));
   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
 
   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
@@ -856,8 +856,8 @@  vect_pattern_recog_1 (
    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
         computation idioms.
 
-   Output - for each computation idiom that is detected we insert a new stmt
-        that provides the same functionality and that can be vectorized. We
+   Output - for each computation idiom that is detected we create a new stmt
+        that provides the same functionality and that can be vectorized.  We
         also record some information in the struct_stmt_info of the relevant
         stmts, as explained below:
 
@@ -872,53 +872,49 @@  vect_pattern_recog_1 (
          S5: ... = ..use(a_0)..         -       -               -
 
    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
-   represented by a single stmt. We then:
-   - create a new stmt S6 that will replace the pattern.
-   - insert the new stmt S6 before the last stmt in the pattern
+   represented by a single stmt.  We then:
+   - create a new stmt S6 equivalent to the pattern (the stmt is not
+     inserted into the code)
    - fill in the STMT_VINFO fields as follows:
 
                                   in_pattern_p  related_stmt    vec_stmt
          S1: a_i = ....                 -       -               -
          S2: a_2 = ..use(a_i)..         -       -               -
          S3: a_1 = ..use(a_2)..         -       -               -
-       > S6: a_new = ....               -       S4              -
          S4: a_0 = ..use(a_1)..         true    S6              -
+          '---> S6: a_new = ....        -       S4              -
          S5: ... = ..use(a_0)..         -       -               -
 
    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
-    to each other through the RELATED_STMT field).
+   to each other through the RELATED_STMT field).
 
    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
    remain irrelevant unless used by stmts other than S4.
 
    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
-   (because they are marked as irrelevant). It will vectorize S6, and record
+   (because they are marked as irrelevant).  It will vectorize S6, and record
    a pointer to the new vector stmt VS6 both from S6 (as usual), and also
-   from S4. We do that so that when we get to vectorizing stmts that use the
+   from S4.  We do that so that when we get to vectorizing stmts that use the
    def of S4 (like S5 that uses a_0), we'll know where to take the relevant
-   vector-def from. S4 will be skipped, and S5 will be vectorized as usual:
+   vector-def from.  S4 will be skipped, and S5 will be vectorized as usual:
 
                                   in_pattern_p  related_stmt    vec_stmt
          S1: a_i = ....                 -       -               -
          S2: a_2 = ..use(a_i)..         -       -               -
          S3: a_1 = ..use(a_2)..         -       -               -
        > VS6: va_new = ....             -       -               -
-         S6: a_new = ....               -       S4              VS6
          S4: a_0 = ..use(a_1)..         true    S6              VS6
+          '---> S6: a_new = ....        -       S4              VS6
        > VS5: ... = ..vuse(va_new)..    -       -               -
          S5: ... = ..use(a_0)..         -       -               -
 
-   DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used
+   DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
    elsewhere), and we'll end up with:
 
         VS6: va_new = ....
-        VS5: ... = ..vuse(va_new)..
+        VS5: ... = ..vuse(va_new)..  */
 
-   If vectorization does not succeed, DCE will clean S6 away (its def is
-   not used), and we'll end up with the original sequence.
-*/
-
 void
 vect_pattern_recog (loop_vec_info loop_vinfo)
 {
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 175073)
+++ tree-vect-stmts.c	(working copy)
@@ -605,15 +605,49 @@  vect_mark_stmts_to_be_vectorized (loop_vec_info lo
             break;
         }
 
-      FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
-	{
-	  tree op = USE_FROM_PTR (use_p);
-	  if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
-	    {
-	      VEC_free (gimple, heap, worklist);
-	      return false;
-	    }
-	}
+      if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
+        {
+          /* Pattern statements are not inserted into the code, so
+             FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
+             have to scan the RHS or function arguments instead.  */
+          if (is_gimple_assign (stmt))
+            {
+              for (i = 1; i < gimple_num_ops (stmt); i++)
+                {
+                  tree op = gimple_op (stmt, i);
+                  if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+                                    &worklist))
+                    {
+                      VEC_free (gimple, heap, worklist);
+                      return false;
+                    }
+                 }
+            }
+          else if (is_gimple_call (stmt))
+            {
+              for (i = 0; i < gimple_call_num_args (stmt); i++)
+                {
+                  tree arg = gimple_call_arg (stmt, i);
+                  if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
+                                    &worklist))
+                    {
+                      VEC_free (gimple, heap, worklist);
+                      return false;
+                    }
+                }
+            }
+        }
+      else
+        FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
+          {
+            tree op = USE_FROM_PTR (use_p);
+            if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+                              &worklist))
+              {
+                VEC_free (gimple, heap, worklist);
+                return false;
+              }
+          }
     } /* while worklist */
 
   VEC_free (gimple, heap, worklist);
@@ -1406,6 +1440,7 @@  vectorizable_call (gimple stmt, gimple_stmt_iterat
   VEC(tree, heap) *vargs = NULL;
   enum { NARROW, NONE, WIDEN } modifier;
   size_t i, nargs;
+  tree lhs;
 
   /* FORNOW: unsupported in basic block SLP.  */
   gcc_assert (loop_vinfo);
@@ -1543,7 +1578,7 @@  vectorizable_call (gimple stmt, gimple_stmt_iterat
   /** Transform.  **/
 
   if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "transform operation.");
+    fprintf (vect_dump, "transform call.");
 
   /* Handle def.  */
   scalar_dest = gimple_call_lhs (stmt);
@@ -1662,8 +1697,11 @@  vectorizable_call (gimple stmt, gimple_stmt_iterat
      rhs of the statement with something harmless.  */
 
   type = TREE_TYPE (scalar_dest);
-  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
-				  build_zero_cst (type));
+  if (is_pattern_stmt_p (stmt_info))
+    lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
+  else
+    lhs = gimple_call_lhs (stmt);
+  new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
   set_vinfo_for_stmt (new_stmt, stmt_info);
   set_vinfo_for_stmt (stmt, NULL);
   STMT_VINFO_STMT (stmt_info) = new_stmt;
@@ -4846,10 +4884,26 @@  vect_analyze_stmt (gimple stmt, bool *need_to_vect
   if (!STMT_VINFO_RELEVANT_P (stmt_info)
       && !STMT_VINFO_LIVE_P (stmt_info))
     {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "irrelevant.");
+      gimple pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+      if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+          && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+              || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+        {
+          stmt = pattern_stmt;
+          stmt_info = vinfo_for_stmt (pattern_stmt);
+          if (vect_print_dump_info (REPORT_DETAILS))
+            {
+              fprintf (vect_dump, "==> examining pattern statement: ");
+              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+            }
+        }
+      else
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "irrelevant.");
 
-      return true;
+          return true;
+        }
     }
 
   switch (STMT_VINFO_DEF_TYPE (stmt_info))
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c	(revision 175073)
+++ tree-vect-slp.c	(working copy)
@@ -2546,6 +2546,8 @@  vect_schedule_slp_instance (slp_tree node, slp_ins
       && STMT_VINFO_STRIDED_ACCESS (stmt_info)
       && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
+  else if (is_pattern_stmt_p (stmt_info))
+     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
   else
     si = gsi_for_stmt (stmt);