Fix SLP def type when computing masks (PR85853)

Message ID 87muwqlvr3.fsf@linaro.org
State New
Headers show
Series
  • Fix SLP def type when computing masks (PR85853)
Related show

Commit Message

Richard Sandiford May 23, 2018, 6:41 a.m.
In this PR, SLP failed to include a comparison node in the SLP
tree and so marked the node as external.  It then went on to call
vect_is_simple_use on the comparison with its STMT_VINFO_DEF_TYPE
still claiming that it was an internal definition.

We already avoid that for vect_analyze_stmt by temporarily copying
the node's definition type to each STMT_VINFO_DEF_TYPE.  This patch
extends that to the vector type calculation.  The easiest thing
seemed to be to split the analysis of the root node out into
a subroutine, so that it's possible to return false early without
awkward control flow.

Tested on aarch64-linux-gnu (with and without SLP), aarch64_be-elf
and x86_64-linux-gnu.  OK to install?

Richard


2018-05-23  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	PR tree-optimization/85853
	* tree-vect-slp.c (vect_slp_analyze_node_operations): Split out
	handling of the root of the node to...
	(vect_slp_analyze_node_operations_1): ...this new function,
	and run the whole thing with the child nodes' def types
	set according to their SLP node's def type.

gcc/testsuite/
	PR tree-optimization/85853
	* gfortran.dg/vect/pr85853.f90: New test.

Comments

Richard Biener May 23, 2018, 9:50 a.m. | #1
On Wed, May 23, 2018 at 8:41 AM Richard Sandiford <
richard.sandiford@linaro.org> wrote:

> In this PR, SLP failed to include a comparison node in the SLP

> tree and so marked the node as external.  It then went on to call

> vect_is_simple_use on the comparison with its STMT_VINFO_DEF_TYPE

> still claiming that it was an internal definition.


> We already avoid that for vect_analyze_stmt by temporarily copying

> the node's definition type to each STMT_VINFO_DEF_TYPE.  This patch

> extends that to the vector type calculation.  The easiest thing

> seemed to be to split the analysis of the root node out into

> a subroutine, so that it's possible to return false early without

> awkward control flow.


> Tested on aarch64-linux-gnu (with and without SLP), aarch64_be-elf

> and x86_64-linux-gnu.  OK to install?


OK.

Richard.

> Richard



> 2018-05-23  Richard Sandiford  <richard.sandiford@linaro.org>


> gcc/

>          PR tree-optimization/85853

>          * tree-vect-slp.c (vect_slp_analyze_node_operations): Split out

>          handling of the root of the node to...

>          (vect_slp_analyze_node_operations_1): ...this new function,

>          and run the whole thing with the child nodes' def types

>          set according to their SLP node's def type.


> gcc/testsuite/

>          PR tree-optimization/85853

>          * gfortran.dg/vect/pr85853.f90: New test.


> Index: gcc/tree-vect-slp.c

> ===================================================================

> --- gcc/tree-vect-slp.c 2018-05-17 11:50:31.609158213 +0100

> +++ gcc/tree-vect-slp.c 2018-05-23 07:37:12.480578116 +0100

> @@ -2476,49 +2476,16 @@ _bb_vec_info::~_bb_vec_info ()

>     bb->aux = NULL;

>   }


> -

> -/* Analyze statements contained in SLP tree NODE after recursively

analyzing
> -   the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.

> -

> -   Return true if the operations are supported.  */

> +/* Subroutine of vect_slp_analyze_node_operations.  Handle the root of

NODE,
> +   given then that child nodes have already been processed, and that

> +   their def types currently match their SLP node's def type.  */


>   static bool

> -vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,

> -                                 slp_instance node_instance,

> -                                 scalar_stmts_to_slp_tree_map_t *visited,

> -                                 scalar_stmts_to_slp_tree_map_t

*lvisited,
> -                                 stmt_vector_for_cost *cost_vec)

> +vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,

> +                                   slp_instance node_instance,

> +                                   stmt_vector_for_cost *cost_vec)

>   {

> -  bool dummy;

> -  int i, j;

> -  gimple *stmt;

> -  slp_tree child;

> -

> -  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)

> -    return true;

> -

> -  /* If we already analyzed the exact same set of scalar stmts we're

done.
> -     We share the generated vector stmts for those.  */

> -  slp_tree *leader;

> -  if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))

> -      || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))

> -    {

> -      SLP_TREE_NUMBER_OF_VEC_STMTS (node)

> -       = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);

> -      return true;

> -    }

> -

> -  /* The SLP graph is acyclic so not caching whether we failed or

succeeded
> -     doesn't result in any issue since we throw away the lvisited set

> -     when we fail.  */

> -  lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);

> -

> -  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)

> -    if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,

> -                                          visited, lvisited, cost_vec))

> -      return false;

> -

> -  stmt = SLP_TREE_SCALAR_STMTS (node)[0];

> +  gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];

>     stmt_vec_info stmt_info = vinfo_for_stmt (stmt);

>     gcc_assert (stmt_info);

>     gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);

> @@ -2545,6 +2512,7 @@ vect_slp_analyze_node_operations (vec_in

>          }


>         gimple *sstmt;

> +      unsigned int i;

>         FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)

>          STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;

>       }

> @@ -2572,12 +2540,56 @@ vect_slp_analyze_node_operations (vec_in

>          = vect_get_num_vectors (vf * group_size, vectype);

>       }


> +  bool dummy;

> +  return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);

> +}

> +

> +/* Analyze statements contained in SLP tree NODE after recursively

analyzing
> +   the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.

> +

> +   Return true if the operations are supported.  */

> +

> +static bool

> +vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,

> +                                 slp_instance node_instance,

> +                                 scalar_stmts_to_slp_tree_map_t *visited,

> +                                 scalar_stmts_to_slp_tree_map_t

*lvisited,
> +                                 stmt_vector_for_cost *cost_vec)

> +{

> +  int i, j;

> +  slp_tree child;

> +

> +  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)

> +    return true;

> +

> +  /* If we already analyzed the exact same set of scalar stmts we're

done.
> +     We share the generated vector stmts for those.  */

> +  slp_tree *leader;

> +  if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))

> +      || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))

> +    {

> +      SLP_TREE_NUMBER_OF_VEC_STMTS (node)

> +       = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);

> +      return true;

> +    }

> +

> +  /* The SLP graph is acyclic so not caching whether we failed or

succeeded
> +     doesn't result in any issue since we throw away the lvisited set

> +     when we fail.  */

> +  lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);

> +

> +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)

> +    if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,

> +                                          visited, lvisited, cost_vec))

> +      return false;

> +

>     /* Push SLP node def-type to stmt operands.  */

>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)

>       if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)

>         STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS

(child)[0]))
>          = SLP_TREE_DEF_TYPE (child);

> -  bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance,

cost_vec);
> +  bool res = vect_slp_analyze_node_operations_1 (vinfo, node,

node_instance,
> +                                                cost_vec);

>     /* Restore def-types.  */

>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)

>       if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)

> Index: gcc/testsuite/gfortran.dg/vect/pr85853.f90

> ===================================================================

> --- /dev/null   2018-04-20 16:19:46.369131350 +0100

> +++ gcc/testsuite/gfortran.dg/vect/pr85853.f90  2018-05-23

07:37:12.477578485 +0100
> @@ -0,0 +1,26 @@

> +! Taken from execute/where_2.f90, but with special flags.

> +! { dg-do run }

> +! { dg-additional-options "-O3 -fno-tree-loop-vectorize" }

> +

> +! Program to test the WHERE constructs

> +program where_2

> +   integer temp(10), reduce(10)

> +

> +   temp = 10

> +   reduce(1:3) = -1

> +   reduce(4:6) = 0

> +   reduce(7:8) = 5

> +   reduce(9:10) = 10

> +

> +   WHERE (reduce < 0)

> +      temp = 100

> +   ELSE WHERE (reduce .EQ. 0)

> +      temp = 200 + temp

> +   ELSE WHERE

> +      WHERE (reduce > 6) temp = temp + sum(reduce)

> +      temp = 300 + temp

> +   END WHERE

> +

> +   if (any (temp .ne. (/100, 100, 100, 210, 210, 210, 310, 310, 337,

337/))) &
> +      STOP 1

> +end program

Patch

Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	2018-05-17 11:50:31.609158213 +0100
+++ gcc/tree-vect-slp.c	2018-05-23 07:37:12.480578116 +0100
@@ -2476,49 +2476,16 @@  _bb_vec_info::~_bb_vec_info ()
   bb->aux = NULL;
 }
 
-
-/* Analyze statements contained in SLP tree NODE after recursively analyzing
-   the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
-
-   Return true if the operations are supported.  */
+/* Subroutine of vect_slp_analyze_node_operations.  Handle the root of NODE,
+   given then that child nodes have already been processed, and that
+   their def types currently match their SLP node's def type.  */
 
 static bool
-vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
-				  slp_instance node_instance,
-				  scalar_stmts_to_slp_tree_map_t *visited,
-				  scalar_stmts_to_slp_tree_map_t *lvisited,
-				  stmt_vector_for_cost *cost_vec)
+vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
+				    slp_instance node_instance,
+				    stmt_vector_for_cost *cost_vec)
 {
-  bool dummy;
-  int i, j;
-  gimple *stmt;
-  slp_tree child;
-
-  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
-    return true;
-
-  /* If we already analyzed the exact same set of scalar stmts we're done.
-     We share the generated vector stmts for those.  */
-  slp_tree *leader;
-  if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
-      || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
-    {
-      SLP_TREE_NUMBER_OF_VEC_STMTS (node)
-	= SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
-      return true;
-    }
-
-  /* The SLP graph is acyclic so not caching whether we failed or succeeded
-     doesn't result in any issue since we throw away the lvisited set
-     when we fail.  */
-  lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
-
-  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
-					   visited, lvisited, cost_vec))
-      return false;
-
-  stmt = SLP_TREE_SCALAR_STMTS (node)[0];
+  gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   gcc_assert (stmt_info);
   gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
@@ -2545,6 +2512,7 @@  vect_slp_analyze_node_operations (vec_in
 	}
 
       gimple *sstmt;
+      unsigned int i;
       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
 	STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
     }
@@ -2572,12 +2540,56 @@  vect_slp_analyze_node_operations (vec_in
 	= vect_get_num_vectors (vf * group_size, vectype);
     }
 
+  bool dummy;
+  return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
+}
+
+/* Analyze statements contained in SLP tree NODE after recursively analyzing
+   the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
+
+   Return true if the operations are supported.  */
+
+static bool
+vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
+				  slp_instance node_instance,
+				  scalar_stmts_to_slp_tree_map_t *visited,
+				  scalar_stmts_to_slp_tree_map_t *lvisited,
+				  stmt_vector_for_cost *cost_vec)
+{
+  int i, j;
+  slp_tree child;
+
+  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+    return true;
+
+  /* If we already analyzed the exact same set of scalar stmts we're done.
+     We share the generated vector stmts for those.  */
+  slp_tree *leader;
+  if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
+      || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
+    {
+      SLP_TREE_NUMBER_OF_VEC_STMTS (node)
+	= SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
+      return true;
+    }
+
+  /* The SLP graph is acyclic so not caching whether we failed or succeeded
+     doesn't result in any issue since we throw away the lvisited set
+     when we fail.  */
+  lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
+
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+    if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
+					   visited, lvisited, cost_vec))
+      return false;
+
   /* Push SLP node def-type to stmt operands.  */
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
       STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
 	= SLP_TREE_DEF_TYPE (child);
-  bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
+  bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
+						 cost_vec);
   /* Restore def-types.  */
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
Index: gcc/testsuite/gfortran.dg/vect/pr85853.f90
===================================================================
--- /dev/null	2018-04-20 16:19:46.369131350 +0100
+++ gcc/testsuite/gfortran.dg/vect/pr85853.f90	2018-05-23 07:37:12.477578485 +0100
@@ -0,0 +1,26 @@ 
+! Taken from execute/where_2.f90, but with special flags.
+! { dg-do run }
+! { dg-additional-options "-O3 -fno-tree-loop-vectorize" }
+
+! Program to test the WHERE constructs
+program where_2
+   integer temp(10), reduce(10)
+
+   temp = 10
+   reduce(1:3) = -1
+   reduce(4:6) = 0
+   reduce(7:8) = 5
+   reduce(9:10) = 10
+
+   WHERE (reduce < 0)
+      temp = 100
+   ELSE WHERE (reduce .EQ. 0)
+      temp = 200 + temp
+   ELSE WHERE
+      WHERE (reduce > 6) temp = temp + sum(reduce)
+      temp = 300 + temp
+   END WHERE
+
+   if (any (temp .ne. (/100, 100, 100, 210, 210, 210, 310, 310, 337, 337/))) &
+      STOP 1
+end program