diff mbox series

Improve vectorisation of COND_EXPR <bool op bool, ...>

Message ID 874lqbpbum.fsf@linaro.org
State New
Headers show
Series Improve vectorisation of COND_EXPR <bool op bool, ...> | expand

Commit Message

Richard Sandiford Nov. 3, 2017, 4:26 p.m. UTC
This patch allows us to recognise:

    ... = bool1 != bool2 ? x : y

as equivalent to:

    bool tmp = bool1 ^ bool2;
    ... = tmp ? x : y

For the latter we were already able to find the natural number
of vector units for tmp based on the types that feed bool1 and
bool2, whereas with the former we would simply treat bool1 and
bool2 as vectorised 8-bit values, possibly requiring them to
be packed and unpacked from their natural width.

This is used by a later SVE patch.


2017-11-03  Richard Sandiford  <richard.sandiford@linaro.org>
	    Alan Hayward  <alan.hayward@arm.com>
	    David Sherwood  <david.sherwood@arm.com>

gcc/
	* tree-vect-patterns.c (vect_recog_mask_conversion_pattern): When
	handling COND_EXPRs with boolean comparisons, try to find a better
	basis for the mask type than the boolean itself.

Comments

Richard Biener Nov. 7, 2017, 11:19 a.m. UTC | #1
On Fri, Nov 3, 2017 at 5:26 PM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> This patch allows us to recognise:

>

>     ... = bool1 != bool2 ? x : y

>

> as equivalent to:

>

>     bool tmp = bool1 ^ bool2;

>     ... = tmp ? x : y

>

> For the latter we were already able to find the natural number

> of vector units for tmp based on the types that feed bool1 and

> bool2, whereas with the former we would simply treat bool1 and

> bool2 as vectorised 8-bit values, possibly requiring them to

> be packed and unpacked from their natural width.

>

> This is used by a later SVE patch.


Ok.

I wonder if you can see code generation improvements for
AVX512 as well?  (testcase?)

Thanks,
Richard.

>

> 2017-11-03  Richard Sandiford  <richard.sandiford@linaro.org>

>             Alan Hayward  <alan.hayward@arm.com>

>             David Sherwood  <david.sherwood@arm.com>

>

> gcc/

>         * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): When

>         handling COND_EXPRs with boolean comparisons, try to find a better

>         basis for the mask type than the boolean itself.

>

> Index: gcc/tree-vect-patterns.c

> ===================================================================

> --- gcc/tree-vect-patterns.c    2017-11-03 12:17:34.392744807 +0000

> +++ gcc/tree-vect-patterns.c    2017-11-03 12:17:36.313554835 +0000

> @@ -3968,15 +3968,43 @@ vect_recog_mask_conversion_pattern (vec<

>             return NULL;

>         }

>        else if (COMPARISON_CLASS_P (rhs1))

> -       rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));

> +       {

> +         /* Check whether we're comparing scalar booleans and (if so)

> +            whether a better mask type exists than the mask associated

> +            with boolean-sized elements.  This avoids unnecessary packs

> +            and unpacks if the booleans are set from comparisons of

> +            wider types.  E.g. in:

> +

> +              int x1, x2, x3, x4, y1, y1;

> +              ...

> +              bool b1 = (x1 == x2);

> +              bool b2 = (x3 == x4);

> +              ... = b1 == b2 ? y1 : y2;

> +

> +            it is better for b1 and b2 to use the mask type associated

> +            with int elements rather bool (byte) elements.  */

> +         rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);

> +         if (!rhs1_type)

> +           rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));

> +       }

>        else

>         return NULL;

>

>        vectype2 = get_mask_type_for_scalar_type (rhs1_type);

>

> -      if (!vectype1 || !vectype2

> -         || must_eq (TYPE_VECTOR_SUBPARTS (vectype1),

> -                     TYPE_VECTOR_SUBPARTS (vectype2)))

> +      if (!vectype1 || !vectype2)

> +       return NULL;

> +

> +      /* Continue if a conversion is needed.  Also continue if we have

> +        a comparison whose vector type would normally be different from

> +        VECTYPE2 when considered in isolation.  In that case we'll

> +        replace the comparison with an SSA name (so that we can record

> +        its vector type) and behave as though the comparison was an SSA

> +        name from the outset.  */

> +      if (must_eq (TYPE_VECTOR_SUBPARTS (vectype1),

> +                  TYPE_VECTOR_SUBPARTS (vectype2))

> +         && (TREE_CODE (rhs1) == SSA_NAME

> +             || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))

>         return NULL;

>

>        /* If rhs1 is a comparison we need to move it into a

> @@ -3993,7 +4021,11 @@ vect_recog_mask_conversion_pattern (vec<

>           append_pattern_def_seq (stmt_vinfo, pattern_stmt);

>         }

>

> -      tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);

> +      if (may_ne (TYPE_VECTOR_SUBPARTS (vectype1),

> +                 TYPE_VECTOR_SUBPARTS (vectype2)))

> +       tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);

> +      else

> +       tmp = rhs1;

>

>        lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);

>        pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
diff mbox series

Patch

Index: gcc/tree-vect-patterns.c
===================================================================
--- gcc/tree-vect-patterns.c	2017-11-03 12:17:34.392744807 +0000
+++ gcc/tree-vect-patterns.c	2017-11-03 12:17:36.313554835 +0000
@@ -3968,15 +3968,43 @@  vect_recog_mask_conversion_pattern (vec<
 	    return NULL;
 	}
       else if (COMPARISON_CLASS_P (rhs1))
-	rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
+	{
+	  /* Check whether we're comparing scalar booleans and (if so)
+	     whether a better mask type exists than the mask associated
+	     with boolean-sized elements.  This avoids unnecessary packs
+	     and unpacks if the booleans are set from comparisons of
+	     wider types.  E.g. in:
+
+	       int x1, x2, x3, x4, y1, y1;
+	       ...
+	       bool b1 = (x1 == x2);
+	       bool b2 = (x3 == x4);
+	       ... = b1 == b2 ? y1 : y2;
+
+	     it is better for b1 and b2 to use the mask type associated
+	     with int elements rather bool (byte) elements.  */
+	  rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
+	  if (!rhs1_type)
+	    rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
+	}
       else
 	return NULL;
 
       vectype2 = get_mask_type_for_scalar_type (rhs1_type);
 
-      if (!vectype1 || !vectype2
-	  || must_eq (TYPE_VECTOR_SUBPARTS (vectype1),
-		      TYPE_VECTOR_SUBPARTS (vectype2)))
+      if (!vectype1 || !vectype2)
+	return NULL;
+
+      /* Continue if a conversion is needed.  Also continue if we have
+	 a comparison whose vector type would normally be different from
+	 VECTYPE2 when considered in isolation.  In that case we'll
+	 replace the comparison with an SSA name (so that we can record
+	 its vector type) and behave as though the comparison was an SSA
+	 name from the outset.  */
+      if (must_eq (TYPE_VECTOR_SUBPARTS (vectype1),
+		   TYPE_VECTOR_SUBPARTS (vectype2))
+	  && (TREE_CODE (rhs1) == SSA_NAME
+	      || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
 	return NULL;
 
       /* If rhs1 is a comparison we need to move it into a
@@ -3993,7 +4021,11 @@  vect_recog_mask_conversion_pattern (vec<
 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt);
 	}
 
-      tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
+      if (may_ne (TYPE_VECTOR_SUBPARTS (vectype1),
+		  TYPE_VECTOR_SUBPARTS (vectype2)))
+	tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
+      else
+	tmp = rhs1;
 
       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,