diff mbox

Fold strstr (s, t) eq/ne s to strcmp (s, t) eq/ne 0 if strlen (t) is known

Message ID CAAgBjMnS7bQP5dUfh3A2==EwmNDLNrkzs2qWwrH54sbE5vqm_A@mail.gmail.com
State Superseded
Headers show

Commit Message

Prathamesh Kulkarni Dec. 13, 2016, 12:11 p.m. UTC
On 13 December 2016 at 15:27, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, Dec 13, 2016 at 03:08:17PM +0530, Prathamesh Kulkarni wrote:

>> Thanks for the suggestions. It didn't occur to me to check for gimple_cond.

>> I have tried to do the changes in the attached version.

>> I am not sure if I have handled cond_expr correctly.

>> IIUC, if gimple_assign has code cond_expr, then the condition is

>> stored in gimple_assign_rhs1,

>> however it's not a single operand but a tree of the form "op1 cond_code op2".

>> Is that correct ?

>

> Yes.  gimple_assign_rhs1 will be in what you are looking for EQ_EXPR or

> NE_EXPR tree, its TREE_CODE will be this code you want to check, and

> TREE_OPERAND (exp, 0) and TREE_OPERAND (exp, 1) the rhs1 and rhs2 you use

> elsewhere.

>

>> However I am not able to write a test-case that generates cond_expr in the IL.

>> I tried:

>> t1 = strstr (s, t);

>> (t1 == s)  ? foo() : bar ();

>> and other such variants but it seems the ?: operator is getting

>> lowered to gimple_cond instead.

>

> It is, but in some cases tree-if-conv.c turns them back into COND_EXPRs.

> I guess you need -ftree-loop-if-convert now, and it has to be in some loop

> where the addition of cond_expr would likely turn it into a single bb loop.

> You probably want constants or vars, not function calls in the ? :

> expressions though.

>

>> +/* Try to fold strstr (s, t) == s to memcmp (s, t, strlen (t)) == 0.  */

>> +

>> +static void

>> +fold_strstr_to_memcmp(enum tree_code code, tree rhs1, tree rhs2, gimple *stmt)

>

> Formatting, space before (.

>

>> +{

>> +  gimple *call_stmt = NULL;

>> +  for (int pass = 0; pass < 2; pass++)

>> +    {

>> +      gimple *g = SSA_NAME_DEF_STMT (rhs1);

>> +      if (g

>

> I think g should be always non-NULL (except for invalid IL), so probably no

> need to check it.

Ah indeed, thanks for pointing out. I assumed if ssa-var has default definition,
then SSA_NAME_DEF_STMT would be NULL, but it's GIMPLE_NOP.
>

>> +       && gimple_call_builtin_p (g, BUILT_IN_STRSTR)

>> +       && has_single_use (rhs1)

>> +       && gimple_call_arg (as_a<gcall *> (g), 0) == rhs2)

>

> I think gimple_call_arg works fine even with just gimple * argument.

> So you can avoid the as_a<gcall *> (g) uglification and just use g.

>

>> +           if (is_gimple_assign (stmt))

>> +             {

>> +               if (gimple_assign_rhs_code (stmt) == COND_EXPR)

>> +                 {

>> +                   tree cond = gimple_assign_rhs1 (stmt);

>> +                   TREE_SET_CODE (cond, EQ_EXPR);

>

> This looks weird.  You are hardcoding EQ_EXPR, while for the

> other case below you use code.  So, do you handle properly both

> EQ_EXPR and NE_EXPR for this and gimple_cond cases?

> Also, for non-COND_EXPR assign you build a new stmt instead of reusing

> the existing one, why?

>

>> +                   TREE_OPERAND (cond, 0) = memcmp_lhs;

>> +                   TREE_OPERAND (cond, 1) = zero;

>> +                   update_stmt (stmt);

>> +                 }

>> +               else

>> +                 {

>> +                   gsi = gsi_for_stmt (stmt);

>> +                   tree lhs = gimple_assign_lhs (stmt);

>> +                   gassign *ga = gimple_build_assign (lhs, code, memcmp_lhs,

>> +                                                      zero);

>> +                   gsi_replace (&gsi, ga, false);

>> +                 }

>> +             }

>> +           else

>> +             {

>> +               gcond *cond = as_a<gcond *> (stmt);

>> +               gimple_cond_set_lhs (cond, memcmp_lhs);

>> +               gimple_cond_set_rhs (cond, zero);

>> +               gimple_cond_set_code (cond, EQ_EXPR);

>

> Likewise here.

Oops, sorry about that :/
Does this version look OK ?
Bootstrap+test in progress.

Thanks,
Prathamesh
>

>         Jakub
2016-12-13  Jakub Jelinek  <jakub@redhat.com>
	    Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

	* tree-ssa-strlen.c (fold_strstr_to_memcmp): New function.
	(strlen_optimize_stmt): Call fold_strstr_to_memcmp.

testsuite/
	* gcc.dg/strlenopt-30.c: New test-case.

Comments

Jakub Jelinek Dec. 13, 2016, 12:24 p.m. UTC | #1
On Tue, Dec 13, 2016 at 05:41:09PM +0530, Prathamesh Kulkarni wrote:
> --- a/gcc/tree-ssa-strlen.c

> +++ b/gcc/tree-ssa-strlen.c

> @@ -2222,6 +2222,90 @@ handle_char_store (gimple_stmt_iterator *gsi)

>    return true;

>  }

>  

> +/* Try to fold strstr (s, t) eq/ne s to memcmp (s, t, strlen (t)) eq/ne 0.  */

> +

> +static void

> +fold_strstr_to_memcmp (enum tree_code code, tree rhs1, tree rhs2, gimple *stmt)


You can drop code argument here, see below.  And I'd say it is better to
do the
  if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
    return;
here than repeat it in all the callers.

> +		  if (gimple_assign_rhs_code (stmt) == COND_EXPR)

> +		    {

> +		      tree cond = gimple_assign_rhs1 (stmt);

> +		      TREE_SET_CODE (cond, code);


TREE_CODE (cond) is already code, so no need to set it again.

> +		  gcond *cond = as_a<gcond *> (stmt);

> +		  gimple_cond_set_lhs (cond, memcmp_lhs);

> +		  gimple_cond_set_rhs (cond, zero);

> +		  gimple_cond_set_code (cond, code);


And gimple_cond_code (cond) == code here too.

> +		  update_stmt (cond);

> +		}


You can perhaps move the update_stmt (stmt); to a single spot after
all the 3 cases are handled.

> +	    if (cond_code == EQ_EXPR || cond_code == NE_EXPR)

> +	      {

> +		tree rhs1 = TREE_OPERAND (cond, 0);

> +		tree rhs2 = TREE_OPERAND (cond, 1);


While it is necessary to check cond_code here and in the other spots
similarly, because otherwise you don't know if it has 2 arguments etc.,
you can avoid the SSA_NAME tests here.

> +		if (TREE_CODE (rhs1) == SSA_NAME

> +		    && TREE_CODE (rhs2) == SSA_NAME)

> +		  fold_strstr_to_memcmp (cond_code, rhs1, rhs2, stmt);

> +	      }

> +	  }

> +	else if (code == EQ_EXPR || code == NE_EXPR)

> +	  {

> +	    tree rhs1 = gimple_assign_rhs1 (stmt);

> +	    tree rhs2 = gimple_assign_rhs2 (stmt);

> +

> +	    if (TREE_CODE (rhs1) == SSA_NAME

> +		&& TREE_CODE (rhs2) == SSA_NAME)


And here.
> +	      fold_strstr_to_memcmp (code, rhs1, rhs2, stmt);

> +	  }

> +      }

> +    else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))

>  	{

>  	  tree type = TREE_TYPE (lhs);

>  	  if (TREE_CODE (type) == ARRAY_TYPE)

> @@ -2316,6 +2427,17 @@ strlen_optimize_stmt (gimple_stmt_iterator *gsi)

>  	    }

>  	}

>      }

> +  else if (gcond *cond = dyn_cast<gcond *> (stmt))

> +    {

> +      enum tree_code code = gimple_cond_code (cond);

> +      tree lhs = gimple_cond_lhs (stmt);

> +      tree rhs = gimple_cond_rhs (stmt);

> +

> +      if ((code == EQ_EXPR || code == NE_EXPR)

> +	  && TREE_CODE (lhs) == SSA_NAME

> +	  && TREE_CODE (rhs) == SSA_NAME)


And here.
> +	fold_strstr_to_memcmp (code, lhs, rhs, stmt);

> +    }

>  

>    if (gimple_vdef (stmt))

>      maybe_invalidate (stmt);


Otherwise LGTM, but it would be nice to cover also the COND_EXPR case by a
testcase (can be done incrementally).

	Jakub
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/strlenopt-30.c b/gcc/testsuite/gcc.dg/strlenopt-30.c
new file mode 100644
index 0000000..089b3a2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-30.c
@@ -0,0 +1,63 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-strlen" } */
+
+__attribute__((no_icf))
+_Bool f1(char *s)
+{
+  return __builtin_strstr (s, "hello") == s;
+}
+
+__attribute__((no_icf))
+_Bool f2(char *s)
+{
+  return s == __builtin_strstr (s, "hello");
+}
+
+__attribute__((no_icf))
+_Bool f3(char *s)
+{
+  return s != __builtin_strstr (s, "hello");
+}
+
+__attribute__((no_icf))
+_Bool f4()
+{
+  char *foo_f4(void);
+  char *t1 = foo_f4();
+  char *t2 = __builtin_strstr (t1, "hello");
+  _Bool t3 = t2 == t1;
+  return t3;
+}
+
+__attribute__((no_icf))
+void f5(char *s)
+{
+  char *t1 = __builtin_strstr (s, "hello");
+  void foo_f5(void);
+  if (t1 != s)
+    foo_f5();
+}
+
+/* Do not perform transform, since strlen (t)
+   is unknown.  */
+
+__attribute__((no_icf))
+_Bool f6(char *s, char *t)
+{
+  return __builtin_strstr (s, t) == s;
+}
+
+/* Do not perform transform in this case, since
+   t1 doesn't have single use.  */
+
+__attribute__((no_icf))
+_Bool f7(char *s)
+{
+  void foo_f7(char *);
+
+  char *t1 = __builtin_strstr (s, "hello");
+  foo_f7 (t1);
+  return (t1 == s);
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_memcmp" 5 "strlen" } } */
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index 339812e..b19dab6 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -2222,6 +2222,90 @@  handle_char_store (gimple_stmt_iterator *gsi)
   return true;
 }
 
+/* Try to fold strstr (s, t) eq/ne s to memcmp (s, t, strlen (t)) eq/ne 0.  */
+
+static void
+fold_strstr_to_memcmp (enum tree_code code, tree rhs1, tree rhs2, gimple *stmt)
+{
+  gimple *call_stmt = NULL;
+  for (int pass = 0; pass < 2; pass++)
+    {
+      gimple *g = SSA_NAME_DEF_STMT (rhs1);
+      if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
+	  && has_single_use (rhs1)
+	  && gimple_call_arg (g, 0) == rhs2)
+	{
+	  call_stmt = g;
+	  break;
+	}
+      std::swap (rhs1, rhs2);
+    }
+
+  if (call_stmt)
+    {
+      tree arg0 = gimple_call_arg (call_stmt, 0);
+
+      if (arg0 == rhs2)
+	{
+	  tree arg1 = gimple_call_arg (call_stmt, 1);
+	  tree arg1_len = NULL_TREE;
+	  int idx = get_stridx (arg1);
+
+	  if (idx)
+	    {
+	      if (idx < 0)
+		arg1_len = build_int_cst (size_type_node, ~idx);
+	      else
+		{
+		  strinfo *si = get_strinfo (idx);
+		  if (si)
+		    arg1_len = get_string_length (si);
+		}
+	    }
+
+	  if (arg1_len != NULL_TREE)
+	    {
+	      gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
+	      tree memcmp_decl = builtin_decl_explicit (BUILT_IN_MEMCMP);
+	      gcall *memcmp_call = gimple_build_call (memcmp_decl, 3,
+						      arg0, arg1, arg1_len);
+	      tree memcmp_lhs = make_ssa_name (integer_type_node);
+	      gimple_set_vuse (memcmp_call, gimple_vuse (call_stmt));
+	      gimple_call_set_lhs (memcmp_call, memcmp_lhs);
+	      gsi_remove (&gsi, true);
+	      gsi_insert_before (&gsi, memcmp_call, GSI_SAME_STMT);
+	      tree zero = build_zero_cst (TREE_TYPE (memcmp_lhs));
+
+	      if (is_gimple_assign (stmt))
+		{
+		  if (gimple_assign_rhs_code (stmt) == COND_EXPR)
+		    {
+		      tree cond = gimple_assign_rhs1 (stmt);
+		      TREE_SET_CODE (cond, code);
+		      TREE_OPERAND (cond, 0) = memcmp_lhs;
+		      TREE_OPERAND (cond, 1) = zero;
+		      update_stmt (stmt);
+		    }
+		  else
+		    {
+		      gimple_assign_set_rhs1 (stmt, memcmp_lhs);
+		      gimple_assign_set_rhs2 (stmt, zero);
+		      update_stmt (stmt);
+		    }
+		}
+	      else
+		{
+		  gcond *cond = as_a<gcond *> (stmt);
+		  gimple_cond_set_lhs (cond, memcmp_lhs);
+		  gimple_cond_set_rhs (cond, zero);
+		  gimple_cond_set_code (cond, code);
+		  update_stmt (cond);
+		}
+	    }
+	}
+    }
+}
+
 /* Attempt to optimize a single statement at *GSI using string length
    knowledge.  */
 
@@ -2302,7 +2386,34 @@  strlen_optimize_stmt (gimple_stmt_iterator *gsi)
 	  else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
 	    handle_pointer_plus (gsi);
 	}
-      else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
+    else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
+      {
+	enum tree_code code = gimple_assign_rhs_code (stmt);
+	if (code == COND_EXPR)
+	  {
+	    tree cond = gimple_assign_rhs1 (stmt);
+	    enum tree_code cond_code = TREE_CODE (cond);
+
+	    if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
+	      {
+		tree rhs1 = TREE_OPERAND (cond, 0);
+		tree rhs2 = TREE_OPERAND (cond, 1);
+		if (TREE_CODE (rhs1) == SSA_NAME
+		    && TREE_CODE (rhs2) == SSA_NAME)
+		  fold_strstr_to_memcmp (cond_code, rhs1, rhs2, stmt);
+	      }
+	  }
+	else if (code == EQ_EXPR || code == NE_EXPR)
+	  {
+	    tree rhs1 = gimple_assign_rhs1 (stmt);
+	    tree rhs2 = gimple_assign_rhs2 (stmt);
+
+	    if (TREE_CODE (rhs1) == SSA_NAME
+		&& TREE_CODE (rhs2) == SSA_NAME)
+	      fold_strstr_to_memcmp (code, rhs1, rhs2, stmt);
+	  }
+      }
+    else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
 	{
 	  tree type = TREE_TYPE (lhs);
 	  if (TREE_CODE (type) == ARRAY_TYPE)
@@ -2316,6 +2427,17 @@  strlen_optimize_stmt (gimple_stmt_iterator *gsi)
 	    }
 	}
     }
+  else if (gcond *cond = dyn_cast<gcond *> (stmt))
+    {
+      enum tree_code code = gimple_cond_code (cond);
+      tree lhs = gimple_cond_lhs (stmt);
+      tree rhs = gimple_cond_rhs (stmt);
+
+      if ((code == EQ_EXPR || code == NE_EXPR)
+	  && TREE_CODE (lhs) == SSA_NAME
+	  && TREE_CODE (rhs) == SSA_NAME)
+	fold_strstr_to_memcmp (code, lhs, rhs, stmt);
+    }
 
   if (gimple_vdef (stmt))
     maybe_invalidate (stmt);