PR82665 - missing value range optimization for memchr

Message ID CAAgBjMkt=jdK1=QWbq0Vmw42xU1T-wWhWJ9xaO+QZQhvxxTDTw@mail.gmail.com
State New
Headers show
Series
  • PR82665 - missing value range optimization for memchr
Related show

Commit Message

Prathamesh Kulkarni Nov. 20, 2017, 10:43 a.m.
Hi,
The attached patch tries to fix PR82665 by adding value-range for 'n'
to [0, PTRDIFF_MAX - 1] in the following case:
def = memchr(arg, 0, sz);
n = def - arg

where def and arg are char *. I suppose it's safe to assume that if
arg is char *, then
memchr(arg, 0, sz) would return a non NULL pointer ?
The patch could also be extended to handle more functions like
memrchr, but I was wondering
if it is in right direction ?
Bootstrapped+tested on x86_64-unknown-linux-gnu
and cross-tested on arm*-*-*, aarch64*-*-*.

Thanks,
Prathamesh

Comments

Jakub Jelinek Nov. 20, 2017, 10:49 a.m. | #1
On Mon, Nov 20, 2017 at 04:13:49PM +0530, Prathamesh Kulkarni wrote:
> Hi,

> The attached patch tries to fix PR82665 by adding value-range for 'n'

> to [0, PTRDIFF_MAX - 1] in the following case:

> def = memchr(arg, 0, sz);

> n = def - arg

> 

> where def and arg are char *. I suppose it's safe to assume that if

> arg is char *, then

> memchr(arg, 0, sz) would return a non NULL pointer ?


I don't think it is safe, at least not until we have the POINTER_DIFF_EXPR.
Because
char *def = memchr (arg, 0, sz);
uintptr_t n = (uintptr_t) def - (uintptr_t) arg;
is valid even if def is NULL and you can't differentiate between original
pointer difference which would invoke UB if def was NULL and the case where
user did the subtraction in an integral type.

	Jakub
Prathamesh Kulkarni Jan. 2, 2018, 12:09 p.m. | #2
On 20 November 2017 at 16:19, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Nov 20, 2017 at 04:13:49PM +0530, Prathamesh Kulkarni wrote:

>> Hi,

>> The attached patch tries to fix PR82665 by adding value-range for 'n'

>> to [0, PTRDIFF_MAX - 1] in the following case:

>> def = memchr(arg, 0, sz);

>> n = def - arg

>>

>> where def and arg are char *. I suppose it's safe to assume that if

>> arg is char *, then

>> memchr(arg, 0, sz) would return a non NULL pointer ?

>

> I don't think it is safe, at least not until we have the POINTER_DIFF_EXPR.

> Because

> char *def = memchr (arg, 0, sz);

> uintptr_t n = (uintptr_t) def - (uintptr_t) arg;

> is valid even if def is NULL and you can't differentiate between original

> pointer difference which would invoke UB if def was NULL and the case where

> user did the subtraction in an integral type.

Hi,
I updated the patch based on POINTER_DIFF_EXPR.
Bootstrapped and tested on x86_64-unknown-linux-gnu.
Does it look OK ?

Thanks,
Prathamesh
>

>         Jakub
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c
new file mode 100644
index 00000000000..17be6ec4e4b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void f1 (char *p, __SIZE_TYPE__ sz)
+{
+  char *q = __builtin_memchr (p, 0, sz);
+  long n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+void f2 (unsigned char *p, __SIZE_TYPE__ sz)
+{
+  unsigned char *q = __builtin_memchr (p, 0, sz);
+  long n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "memchr" 1 "optimized" } } */
diff --git a/gcc/vr-values.c b/gcc/vr-values.c
index 794b4635f9e..5385c91f1ec 100644
--- a/gcc/vr-values.c
+++ b/gcc/vr-values.c
@@ -793,6 +793,42 @@ vr_values::extract_range_from_binary_expr (value_range *vr,
 
   extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);
 
+  /* Set value_range for n in following sequence:
+     def = __builtin_memchr (arg, 0, sz)
+     n = def - arg
+     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */
+
+  if (vr->type == VR_VARYING
+      && (code == POINTER_DIFF_EXPR)
+      && (TREE_CODE (op0) == SSA_NAME)
+      && (TREE_CODE (op1) == SSA_NAME))
+    {
+      tree def = op0;
+      tree arg = op1;
+
+      gcall *call_stmt = NULL;
+      if (def && arg
+	  && (TREE_CODE (def) == SSA_NAME)
+	  && ((TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE)
+	      && (TREE_TYPE (TREE_TYPE (def)) == char_type_node))
+	  && (TREE_CODE (arg) == SSA_NAME)
+	  && ((TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
+	      && (TREE_TYPE (TREE_TYPE (arg)) == char_type_node))
+	  && (call_stmt = dyn_cast<gcall *>(SSA_NAME_DEF_STMT (def)))
+	  && (gimple_call_combined_fn (call_stmt) == CFN_BUILT_IN_MEMCHR)
+	  && operand_equal_p (def, gimple_call_lhs (call_stmt), 0)
+	  && operand_equal_p (arg, gimple_call_arg (call_stmt, 0), 0)
+	  && integer_zerop (gimple_call_arg (call_stmt, 1)))
+	    {
+	      tree max = vrp_val_max (ptrdiff_type_node);
+	      wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));
+	      tree range_min = build_zero_cst (expr_type);
+	      tree range_max = wide_int_to_tree (expr_type, wmax - 1);
+	      set_value_range (vr, VR_RANGE, range_min, range_max, NULL);
+	      return;
+	    }
+     }
+
   /* Try harder for PLUS and MINUS if the range of one operand is symbolic
      and based on the other operand, for example if it was deduced from a
      symbolic comparison.  When a bound of the range of the first operand
Jakub Jelinek Jan. 2, 2018, 12:19 p.m. | #3
On Tue, Jan 02, 2018 at 05:39:17PM +0530, Prathamesh Kulkarni wrote:
> --- /dev/null

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

> @@ -0,0 +1,22 @@

> +/* { dg-do compile } */

> +/* { dg-options "-O2 -fdump-tree-optimized" } */

> +

> +void f1 (char *p, __SIZE_TYPE__ sz)

> +{

> +  char *q = __builtin_memchr (p, 0, sz);

> +  long n = q - p;


Please use __PTRDIFF_TYPE__ instead of long, here and in other spots.

> --- a/gcc/vr-values.c

> +++ b/gcc/vr-values.c

> @@ -793,6 +793,42 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>  

>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>  

> +  /* Set value_range for n in following sequence:

> +     def = __builtin_memchr (arg, 0, sz)

> +     n = def - arg

> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

> +

> +  if (vr->type == VR_VARYING

> +      && (code == POINTER_DIFF_EXPR)

> +      && (TREE_CODE (op0) == SSA_NAME)

> +      && (TREE_CODE (op1) == SSA_NAME))


Please drop the useless ()s around the comparisons.  They are needed
only if the condition is multi-line and needed for emacs indentation,
or around assignments tested as conditions.

> +      gcall *call_stmt = NULL;

> +      if (def && arg

> +	  && (TREE_CODE (def) == SSA_NAME)


Likewise (many times).

> +	  && ((TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE)

> +	      && (TREE_TYPE (TREE_TYPE (def)) == char_type_node))

> +	  && (TREE_CODE (arg) == SSA_NAME)

> +	  && ((TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)

> +	      && (TREE_TYPE (TREE_TYPE (arg)) == char_type_node))


What is so special about char_type_node?  Why e.g. unsigned char or signed
char pointer shouldn't be handled the same?

> +	  && (call_stmt = dyn_cast<gcall *>(SSA_NAME_DEF_STMT (def)))

> +	  && (gimple_call_combined_fn (call_stmt) == CFN_BUILT_IN_MEMCHR)


We don't have an ifn for memchr, so why this?  On the other side, it should
be making sure one doesn't use unprototyped memchr with weirdo argument
types, so you need gimple_call_builtin_p.

> +	  && operand_equal_p (def, gimple_call_lhs (call_stmt), 0)

> +	  && operand_equal_p (arg, gimple_call_arg (call_stmt, 0), 0)

> +	  && integer_zerop (gimple_call_arg (call_stmt, 1)))

> +	    {

> +	      tree max = vrp_val_max (ptrdiff_type_node);

> +	      wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));

> +	      tree range_min = build_zero_cst (expr_type);

> +	      tree range_max = wide_int_to_tree (expr_type, wmax - 1);

> +	      set_value_range (vr, VR_RANGE, range_min, range_max, NULL);

> +	      return;

> +	    }

> +     }

> +

>    /* Try harder for PLUS and MINUS if the range of one operand is symbolic

>       and based on the other operand, for example if it was deduced from a

>       symbolic comparison.  When a bound of the range of the first operand



	Jakub
Prathamesh Kulkarni Jan. 3, 2018, 7:03 a.m. | #4
On 2 January 2018 at 17:49, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, Jan 02, 2018 at 05:39:17PM +0530, Prathamesh Kulkarni wrote:

>> --- /dev/null

>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

>> @@ -0,0 +1,22 @@

>> +/* { dg-do compile } */

>> +/* { dg-options "-O2 -fdump-tree-optimized" } */

>> +

>> +void f1 (char *p, __SIZE_TYPE__ sz)

>> +{

>> +  char *q = __builtin_memchr (p, 0, sz);

>> +  long n = q - p;

>

> Please use __PTRDIFF_TYPE__ instead of long, here and in other spots.

>

>> --- a/gcc/vr-values.c

>> +++ b/gcc/vr-values.c

>> @@ -793,6 +793,42 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>>

>>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>>

>> +  /* Set value_range for n in following sequence:

>> +     def = __builtin_memchr (arg, 0, sz)

>> +     n = def - arg

>> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

>> +

>> +  if (vr->type == VR_VARYING

>> +      && (code == POINTER_DIFF_EXPR)

>> +      && (TREE_CODE (op0) == SSA_NAME)

>> +      && (TREE_CODE (op1) == SSA_NAME))

>

> Please drop the useless ()s around the comparisons.  They are needed

> only if the condition is multi-line and needed for emacs indentation,

> or around assignments tested as conditions.

>

>> +      gcall *call_stmt = NULL;

>> +      if (def && arg

>> +       && (TREE_CODE (def) == SSA_NAME)

>

> Likewise (many times).

>

>> +       && ((TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE)

>> +           && (TREE_TYPE (TREE_TYPE (def)) == char_type_node))

>> +       && (TREE_CODE (arg) == SSA_NAME)

>> +       && ((TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)

>> +           && (TREE_TYPE (TREE_TYPE (arg)) == char_type_node))

>

> What is so special about char_type_node?  Why e.g. unsigned char or signed

> char pointer shouldn't be handled the same?

>

>> +       && (call_stmt = dyn_cast<gcall *>(SSA_NAME_DEF_STMT (def)))

>> +       && (gimple_call_combined_fn (call_stmt) == CFN_BUILT_IN_MEMCHR)

>

> We don't have an ifn for memchr, so why this?  On the other side, it should

> be making sure one doesn't use unprototyped memchr with weirdo argument

> types, so you need gimple_call_builtin_p.

Hi Jakub,
Thanks for the review. I have tried to address your suggestions in the
attached patch.
Does it look OK ?
Validation in progress.

Thanks,
Prathamesh
>

>> +       && operand_equal_p (def, gimple_call_lhs (call_stmt), 0)

>> +       && operand_equal_p (arg, gimple_call_arg (call_stmt, 0), 0)

>> +       && integer_zerop (gimple_call_arg (call_stmt, 1)))

>> +         {

>> +           tree max = vrp_val_max (ptrdiff_type_node);

>> +           wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));

>> +           tree range_min = build_zero_cst (expr_type);

>> +           tree range_max = wide_int_to_tree (expr_type, wmax - 1);

>> +           set_value_range (vr, VR_RANGE, range_min, range_max, NULL);

>> +           return;

>> +         }

>> +     }

>> +

>>    /* Try harder for PLUS and MINUS if the range of one operand is symbolic

>>       and based on the other operand, for example if it was deduced from a

>>       symbolic comparison.  When a bound of the range of the first operand

>

>

>         Jakub
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c
new file mode 100644
index 00000000000..e42ca34cc61
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void f1 (char *p, __SIZE_TYPE__ sz)
+{
+  char *q = __builtin_memchr (p, 0, sz);
+  __PTRDIFF_TYPE__ n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+void f2 (unsigned char *p, __PTRDIFF_TYPE__ sz)
+{
+  unsigned char *q = __builtin_memchr (p, 0, sz);
+  __PTRDIFF_TYPE__ n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+void f3 (signed char *p, __PTRDIFF_TYPE__ sz)
+{
+  signed char *q = __builtin_memchr (p, 0, sz);
+  __PTRDIFF_TYPE__ n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+
+/* { dg-final { scan-tree-dump-not "memchr" 0 "optimized" } } */
diff --git a/gcc/vr-values.c b/gcc/vr-values.c
index 794b4635f9e..2c93f92438a 100644
--- a/gcc/vr-values.c
+++ b/gcc/vr-values.c
@@ -793,6 +793,47 @@ vr_values::extract_range_from_binary_expr (value_range *vr,
 
   extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);
 
+  /* Set value_range for n in following sequence:
+     def = __builtin_memchr (arg, 0, sz)
+     n = def - arg
+     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */
+
+  if (vr->type == VR_VARYING
+      && code == POINTER_DIFF_EXPR
+      && TREE_CODE (op0) == SSA_NAME
+      && TREE_CODE (op1) == SSA_NAME)
+    {
+      tree def = op0;
+      tree arg = op1;
+      tree def_type, arg_type;
+
+      gcall *call_stmt = NULL;
+      if (def && arg
+	  && TREE_CODE (def) == SSA_NAME
+	  && TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE
+	  && (def_type = TREE_TYPE (TREE_TYPE (def)))
+	  && (def_type == char_type_node || def_type == signed_char_type_node
+	      || def_type == unsigned_char_type_node)
+	  && TREE_CODE (arg) == SSA_NAME
+	  && TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE
+	  && (arg_type = TREE_TYPE (TREE_TYPE (arg)))
+	  && (arg_type == char_type_node || arg_type == signed_char_type_node
+	      || arg_type == unsigned_char_type_node)
+	  && (call_stmt = dyn_cast<gcall *>(SSA_NAME_DEF_STMT (def)))
+	  && gimple_call_builtin_p (call_stmt, BUILT_IN_MEMCHR)
+	  && operand_equal_p (def, gimple_call_lhs (call_stmt), 0)
+	  && operand_equal_p (arg, gimple_call_arg (call_stmt, 0), 0)
+	  && integer_zerop (gimple_call_arg (call_stmt, 1)))
+	    {
+	      tree max = vrp_val_max (ptrdiff_type_node);
+	      wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));
+	      tree range_min = build_zero_cst (expr_type);
+	      tree range_max = wide_int_to_tree (expr_type, wmax - 1);
+	      set_value_range (vr, VR_RANGE, range_min, range_max, NULL);
+	      return;
+	    }
+     }
+
   /* Try harder for PLUS and MINUS if the range of one operand is symbolic
      and based on the other operand, for example if it was deduced from a
      symbolic comparison.  When a bound of the range of the first operand
Prathamesh Kulkarni Jan. 3, 2018, 7:08 a.m. | #5
On 3 January 2018 at 12:33, Prathamesh Kulkarni
<prathamesh.kulkarni@linaro.org> wrote:
> On 2 January 2018 at 17:49, Jakub Jelinek <jakub@redhat.com> wrote:

>> On Tue, Jan 02, 2018 at 05:39:17PM +0530, Prathamesh Kulkarni wrote:

>>> --- /dev/null

>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

>>> @@ -0,0 +1,22 @@

>>> +/* { dg-do compile } */

>>> +/* { dg-options "-O2 -fdump-tree-optimized" } */

>>> +

>>> +void f1 (char *p, __SIZE_TYPE__ sz)

>>> +{

>>> +  char *q = __builtin_memchr (p, 0, sz);

>>> +  long n = q - p;

>>

>> Please use __PTRDIFF_TYPE__ instead of long, here and in other spots.

>>

>>> --- a/gcc/vr-values.c

>>> +++ b/gcc/vr-values.c

>>> @@ -793,6 +793,42 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>>>

>>>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>>>

>>> +  /* Set value_range for n in following sequence:

>>> +     def = __builtin_memchr (arg, 0, sz)

>>> +     n = def - arg

>>> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

>>> +

>>> +  if (vr->type == VR_VARYING

>>> +      && (code == POINTER_DIFF_EXPR)

>>> +      && (TREE_CODE (op0) == SSA_NAME)

>>> +      && (TREE_CODE (op1) == SSA_NAME))

>>

>> Please drop the useless ()s around the comparisons.  They are needed

>> only if the condition is multi-line and needed for emacs indentation,

>> or around assignments tested as conditions.

>>

>>> +      gcall *call_stmt = NULL;

>>> +      if (def && arg

>>> +       && (TREE_CODE (def) == SSA_NAME)

>>

>> Likewise (many times).

>>

>>> +       && ((TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE)

>>> +           && (TREE_TYPE (TREE_TYPE (def)) == char_type_node))

>>> +       && (TREE_CODE (arg) == SSA_NAME)

>>> +       && ((TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)

>>> +           && (TREE_TYPE (TREE_TYPE (arg)) == char_type_node))

>>

>> What is so special about char_type_node?  Why e.g. unsigned char or signed

>> char pointer shouldn't be handled the same?

>>

>>> +       && (call_stmt = dyn_cast<gcall *>(SSA_NAME_DEF_STMT (def)))

>>> +       && (gimple_call_combined_fn (call_stmt) == CFN_BUILT_IN_MEMCHR)

>>

>> We don't have an ifn for memchr, so why this?  On the other side, it should

>> be making sure one doesn't use unprototyped memchr with weirdo argument

>> types, so you need gimple_call_builtin_p.

> Hi Jakub,

> Thanks for the review. I have tried to address your suggestions in the

> attached patch.

> Does it look OK ?

> Validation in progress.

Oops, I mistakenly made argument sz in the tests of type
__PTRDIFF_TYPE__ in the previous patch.
The attached patch restores it's type to __SIZE_TYPE__.

Thanks,
Prathamesh
>

> Thanks,

> Prathamesh

>>

>>> +       && operand_equal_p (def, gimple_call_lhs (call_stmt), 0)

>>> +       && operand_equal_p (arg, gimple_call_arg (call_stmt, 0), 0)

>>> +       && integer_zerop (gimple_call_arg (call_stmt, 1)))

>>> +         {

>>> +           tree max = vrp_val_max (ptrdiff_type_node);

>>> +           wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));

>>> +           tree range_min = build_zero_cst (expr_type);

>>> +           tree range_max = wide_int_to_tree (expr_type, wmax - 1);

>>> +           set_value_range (vr, VR_RANGE, range_min, range_max, NULL);

>>> +           return;

>>> +         }

>>> +     }

>>> +

>>>    /* Try harder for PLUS and MINUS if the range of one operand is symbolic

>>>       and based on the other operand, for example if it was deduced from a

>>>       symbolic comparison.  When a bound of the range of the first operand

>>

>>

>>         Jakub
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c
new file mode 100644
index 00000000000..b37c3d1d7ce
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void f1 (char *p, __SIZE_TYPE__ sz)
+{
+  char *q = __builtin_memchr (p, 0, sz);
+  __PTRDIFF_TYPE__ n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+void f2 (unsigned char *p, __SIZE_TYPE__ sz)
+{
+  unsigned char *q = __builtin_memchr (p, 0, sz);
+  __PTRDIFF_TYPE__ n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+void f3 (signed char *p, __SIZE_TYPE__ sz)
+{
+  signed char *q = __builtin_memchr (p, 0, sz);
+  __PTRDIFF_TYPE__ n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+
+/* { dg-final { scan-tree-dump-not "memchr" 0 "optimized" } } */
diff --git a/gcc/vr-values.c b/gcc/vr-values.c
index 794b4635f9e..2c93f92438a 100644
--- a/gcc/vr-values.c
+++ b/gcc/vr-values.c
@@ -793,6 +793,47 @@ vr_values::extract_range_from_binary_expr (value_range *vr,
 
   extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);
 
+  /* Set value_range for n in following sequence:
+     def = __builtin_memchr (arg, 0, sz)
+     n = def - arg
+     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */
+
+  if (vr->type == VR_VARYING
+      && code == POINTER_DIFF_EXPR
+      && TREE_CODE (op0) == SSA_NAME
+      && TREE_CODE (op1) == SSA_NAME)
+    {
+      tree def = op0;
+      tree arg = op1;
+      tree def_type, arg_type;
+
+      gcall *call_stmt = NULL;
+      if (def && arg
+	  && TREE_CODE (def) == SSA_NAME
+	  && TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE
+	  && (def_type = TREE_TYPE (TREE_TYPE (def)))
+	  && (def_type == char_type_node || def_type == signed_char_type_node
+	      || def_type == unsigned_char_type_node)
+	  && TREE_CODE (arg) == SSA_NAME
+	  && TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE
+	  && (arg_type = TREE_TYPE (TREE_TYPE (arg)))
+	  && (arg_type == char_type_node || arg_type == signed_char_type_node
+	      || arg_type == unsigned_char_type_node)
+	  && (call_stmt = dyn_cast<gcall *>(SSA_NAME_DEF_STMT (def)))
+	  && gimple_call_builtin_p (call_stmt, BUILT_IN_MEMCHR)
+	  && operand_equal_p (def, gimple_call_lhs (call_stmt), 0)
+	  && operand_equal_p (arg, gimple_call_arg (call_stmt, 0), 0)
+	  && integer_zerop (gimple_call_arg (call_stmt, 1)))
+	    {
+	      tree max = vrp_val_max (ptrdiff_type_node);
+	      wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));
+	      tree range_min = build_zero_cst (expr_type);
+	      tree range_max = wide_int_to_tree (expr_type, wmax - 1);
+	      set_value_range (vr, VR_RANGE, range_min, range_max, NULL);
+	      return;
+	    }
+     }
+
   /* Try harder for PLUS and MINUS if the range of one operand is symbolic
      and based on the other operand, for example if it was deduced from a
      symbolic comparison.  When a bound of the range of the first operand
Jeff Law Jan. 4, 2018, 6:50 p.m. | #6
On 01/03/2018 12:08 AM, Prathamesh Kulkarni wrote:
> On 3 January 2018 at 12:33, Prathamesh Kulkarni

> <prathamesh.kulkarni@linaro.org> wrote:

>> On 2 January 2018 at 17:49, Jakub Jelinek <jakub@redhat.com> wrote:

>>> On Tue, Jan 02, 2018 at 05:39:17PM +0530, Prathamesh Kulkarni wrote:

>>>> --- /dev/null

>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

>>>> @@ -0,0 +1,22 @@

>>>> +/* { dg-do compile } */

>>>> +/* { dg-options "-O2 -fdump-tree-optimized" } */

>>>> +

>>>> +void f1 (char *p, __SIZE_TYPE__ sz)

>>>> +{

>>>> +  char *q = __builtin_memchr (p, 0, sz);

>>>> +  long n = q - p;

>>> Please use __PTRDIFF_TYPE__ instead of long, here and in other spots.

>>>

>>>> --- a/gcc/vr-values.c

>>>> +++ b/gcc/vr-values.c

>>>> @@ -793,6 +793,42 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>>>>

>>>>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>>>>

>>>> +  /* Set value_range for n in following sequence:

>>>> +     def = __builtin_memchr (arg, 0, sz)

>>>> +     n = def - arg

>>>> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

>>>> +

>>>> +  if (vr->type == VR_VARYING

>>>> +      && (code == POINTER_DIFF_EXPR)

>>>> +      && (TREE_CODE (op0) == SSA_NAME)

>>>> +      && (TREE_CODE (op1) == SSA_NAME))

>>> Please drop the useless ()s around the comparisons.  They are needed

>>> only if the condition is multi-line and needed for emacs indentation,

>>> or around assignments tested as conditions.

>>>

>>>> +      gcall *call_stmt = NULL;

>>>> +      if (def && arg

>>>> +       && (TREE_CODE (def) == SSA_NAME)

>>> Likewise (many times).

>>>

>>>> +       && ((TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE)

>>>> +           && (TREE_TYPE (TREE_TYPE (def)) == char_type_node))

>>>> +       && (TREE_CODE (arg) == SSA_NAME)

>>>> +       && ((TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)

>>>> +           && (TREE_TYPE (TREE_TYPE (arg)) == char_type_node))

>>> What is so special about char_type_node?  Why e.g. unsigned char or signed

>>> char pointer shouldn't be handled the same?

>>>

>>>> +       && (call_stmt = dyn_cast<gcall *>(SSA_NAME_DEF_STMT (def)))

>>>> +       && (gimple_call_combined_fn (call_stmt) == CFN_BUILT_IN_MEMCHR)

>>> We don't have an ifn for memchr, so why this?  On the other side, it should

>>> be making sure one doesn't use unprototyped memchr with weirdo argument

>>> types, so you need gimple_call_builtin_p.

>> Hi Jakub,

>> Thanks for the review. I have tried to address your suggestions in the

>> attached patch.

>> Does it look OK ?

>> Validation in progress.

> Oops, I mistakenly made argument sz in the tests of type

> __PTRDIFF_TYPE__ in the previous patch.

> The attached patch restores it's type to __SIZE_TYPE__.

> 

> Thanks,

> Prathamesh

>> Thanks,

>> Prathamesh

>>>> +       && operand_equal_p (def, gimple_call_lhs (call_stmt), 0)

>>>> +       && operand_equal_p (arg, gimple_call_arg (call_stmt, 0), 0)

>>>> +       && integer_zerop (gimple_call_arg (call_stmt, 1)))

>>>> +         {

>>>> +           tree max = vrp_val_max (ptrdiff_type_node);

>>>> +           wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));

>>>> +           tree range_min = build_zero_cst (expr_type);

>>>> +           tree range_max = wide_int_to_tree (expr_type, wmax - 1);

>>>> +           set_value_range (vr, VR_RANGE, range_min, range_max, NULL);

>>>> +           return;

>>>> +         }

>>>> +     }

>>>> +

>>>>    /* Try harder for PLUS and MINUS if the range of one operand is symbolic

>>>>       and based on the other operand, for example if it was deduced from a

>>>>       symbolic comparison.  When a bound of the range of the first operand

>>>

>>>         Jakub

> 

> pr82665-8.diff

> 

> 

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

> new file mode 100644

> index 00000000000..b37c3d1d7ce

> --- /dev/null

> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

> @@ -0,0 +1,32 @@

> +/* { dg-do compile } */

> +/* { dg-options "-O2 -fdump-tree-optimized" } */

> +

> +void f1 (char *p, __SIZE_TYPE__ sz)

> +{

> +  char *q = __builtin_memchr (p, 0, sz);

> +  __PTRDIFF_TYPE__ n = q - p;

> +

> +  if (n >= __PTRDIFF_MAX__)

> +    __builtin_abort ();

> +}

> +

> +void f2 (unsigned char *p, __SIZE_TYPE__ sz)

> +{

> +  unsigned char *q = __builtin_memchr (p, 0, sz);

> +  __PTRDIFF_TYPE__ n = q - p;

> +

> +  if (n >= __PTRDIFF_MAX__)

> +    __builtin_abort ();

> +}

> +

> +void f3 (signed char *p, __SIZE_TYPE__ sz)

> +{

> +  signed char *q = __builtin_memchr (p, 0, sz);

> +  __PTRDIFF_TYPE__ n = q - p;

> +

> +  if (n >= __PTRDIFF_MAX__)

> +    __builtin_abort ();

> +}

> +

> +

> +/* { dg-final { scan-tree-dump-not "memchr" 0 "optimized" } } */

> diff --git a/gcc/vr-values.c b/gcc/vr-values.c

> index 794b4635f9e..2c93f92438a 100644

> --- a/gcc/vr-values.c

> +++ b/gcc/vr-values.c

> @@ -793,6 +793,47 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>  

>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>  

> +  /* Set value_range for n in following sequence:

> +     def = __builtin_memchr (arg, 0, sz)

> +     n = def - arg

> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

> +

> +  if (vr->type == VR_VARYING

> +      && code == POINTER_DIFF_EXPR

> +      && TREE_CODE (op0) == SSA_NAME

> +      && TREE_CODE (op1) == SSA_NAME)

> +    {

> +      tree def = op0;

> +      tree arg = op1;

> +      tree def_type, arg_type;

> +

> +      gcall *call_stmt = NULL;

> +      if (def && arg

AFAICT these can never be NULL at this point.  They are op0 and op1
respectively and we've verified they are SSA_NAMEs.  So I think this
test is redundant and should be removed.


> +	  && TREE_CODE (def) == SSA_NAME

Similarly this test is redundant with verification that op0 is an
SSA_NAME in the outer conditional.

> +	  && TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE

> +	  && (def_type = TREE_TYPE (TREE_TYPE (def)))

> +	  && (def_type == char_type_node || def_type == signed_char_type_node

> +	      || def_type == unsigned_char_type_node)

Why are we checking for equality with these types at all.  Aren't we
going to miss equivalent types or types with qualifiers?

It looks like the canonical way to do this check in tree-ssa-strlen.c is

 (TYPE_MODE (type) == TYPE_MODE (char_type_node)
  && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node))

ie, verify that the mode/precision of the object is the same as the
mode/precision of a character type.


> +	  && TREE_CODE (arg) == SSA_NAME

Redundant with the verification that op1 is an SSA_NAME in the outer
conditional.

> +	  && TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE

> +	  && (arg_type = TREE_TYPE (TREE_TYPE (arg)))

> +	  && (arg_type == char_type_node || arg_type == signed_char_type_node

> +	      || arg_type == unsigned_char_type_node)

See note above about verifying based on mode/precision.

I think with those changes we're probably in good shape.  But please
repost for final approval.

jeff
Prathamesh Kulkarni Jan. 7, 2018, 6:58 a.m. | #7
On 5 January 2018 at 00:20, Jeff Law <law@redhat.com> wrote:
> On 01/03/2018 12:08 AM, Prathamesh Kulkarni wrote:

>> On 3 January 2018 at 12:33, Prathamesh Kulkarni

>> <prathamesh.kulkarni@linaro.org> wrote:

>>> On 2 January 2018 at 17:49, Jakub Jelinek <jakub@redhat.com> wrote:

>>>> On Tue, Jan 02, 2018 at 05:39:17PM +0530, Prathamesh Kulkarni wrote:

>>>>> --- /dev/null

>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

>>>>> @@ -0,0 +1,22 @@

>>>>> +/* { dg-do compile } */

>>>>> +/* { dg-options "-O2 -fdump-tree-optimized" } */

>>>>> +

>>>>> +void f1 (char *p, __SIZE_TYPE__ sz)

>>>>> +{

>>>>> +  char *q = __builtin_memchr (p, 0, sz);

>>>>> +  long n = q - p;

>>>> Please use __PTRDIFF_TYPE__ instead of long, here and in other spots.

>>>>

>>>>> --- a/gcc/vr-values.c

>>>>> +++ b/gcc/vr-values.c

>>>>> @@ -793,6 +793,42 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>>>>>

>>>>>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>>>>>

>>>>> +  /* Set value_range for n in following sequence:

>>>>> +     def = __builtin_memchr (arg, 0, sz)

>>>>> +     n = def - arg

>>>>> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

>>>>> +

>>>>> +  if (vr->type == VR_VARYING

>>>>> +      && (code == POINTER_DIFF_EXPR)

>>>>> +      && (TREE_CODE (op0) == SSA_NAME)

>>>>> +      && (TREE_CODE (op1) == SSA_NAME))

>>>> Please drop the useless ()s around the comparisons.  They are needed

>>>> only if the condition is multi-line and needed for emacs indentation,

>>>> or around assignments tested as conditions.

>>>>

>>>>> +      gcall *call_stmt = NULL;

>>>>> +      if (def && arg

>>>>> +       && (TREE_CODE (def) == SSA_NAME)

>>>> Likewise (many times).

>>>>

>>>>> +       && ((TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE)

>>>>> +           && (TREE_TYPE (TREE_TYPE (def)) == char_type_node))

>>>>> +       && (TREE_CODE (arg) == SSA_NAME)

>>>>> +       && ((TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)

>>>>> +           && (TREE_TYPE (TREE_TYPE (arg)) == char_type_node))

>>>> What is so special about char_type_node?  Why e.g. unsigned char or signed

>>>> char pointer shouldn't be handled the same?

>>>>

>>>>> +       && (call_stmt = dyn_cast<gcall *>(SSA_NAME_DEF_STMT (def)))

>>>>> +       && (gimple_call_combined_fn (call_stmt) == CFN_BUILT_IN_MEMCHR)

>>>> We don't have an ifn for memchr, so why this?  On the other side, it should

>>>> be making sure one doesn't use unprototyped memchr with weirdo argument

>>>> types, so you need gimple_call_builtin_p.

>>> Hi Jakub,

>>> Thanks for the review. I have tried to address your suggestions in the

>>> attached patch.

>>> Does it look OK ?

>>> Validation in progress.

>> Oops, I mistakenly made argument sz in the tests of type

>> __PTRDIFF_TYPE__ in the previous patch.

>> The attached patch restores it's type to __SIZE_TYPE__.

>>

>> Thanks,

>> Prathamesh

>>> Thanks,

>>> Prathamesh

>>>>> +       && operand_equal_p (def, gimple_call_lhs (call_stmt), 0)

>>>>> +       && operand_equal_p (arg, gimple_call_arg (call_stmt, 0), 0)

>>>>> +       && integer_zerop (gimple_call_arg (call_stmt, 1)))

>>>>> +         {

>>>>> +           tree max = vrp_val_max (ptrdiff_type_node);

>>>>> +           wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));

>>>>> +           tree range_min = build_zero_cst (expr_type);

>>>>> +           tree range_max = wide_int_to_tree (expr_type, wmax - 1);

>>>>> +           set_value_range (vr, VR_RANGE, range_min, range_max, NULL);

>>>>> +           return;

>>>>> +         }

>>>>> +     }

>>>>> +

>>>>>    /* Try harder for PLUS and MINUS if the range of one operand is symbolic

>>>>>       and based on the other operand, for example if it was deduced from a

>>>>>       symbolic comparison.  When a bound of the range of the first operand

>>>>

>>>>         Jakub

>>

>> pr82665-8.diff

>>

>>

>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

>> new file mode 100644

>> index 00000000000..b37c3d1d7ce

>> --- /dev/null

>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

>> @@ -0,0 +1,32 @@

>> +/* { dg-do compile } */

>> +/* { dg-options "-O2 -fdump-tree-optimized" } */

>> +

>> +void f1 (char *p, __SIZE_TYPE__ sz)

>> +{

>> +  char *q = __builtin_memchr (p, 0, sz);

>> +  __PTRDIFF_TYPE__ n = q - p;

>> +

>> +  if (n >= __PTRDIFF_MAX__)

>> +    __builtin_abort ();

>> +}

>> +

>> +void f2 (unsigned char *p, __SIZE_TYPE__ sz)

>> +{

>> +  unsigned char *q = __builtin_memchr (p, 0, sz);

>> +  __PTRDIFF_TYPE__ n = q - p;

>> +

>> +  if (n >= __PTRDIFF_MAX__)

>> +    __builtin_abort ();

>> +}

>> +

>> +void f3 (signed char *p, __SIZE_TYPE__ sz)

>> +{

>> +  signed char *q = __builtin_memchr (p, 0, sz);

>> +  __PTRDIFF_TYPE__ n = q - p;

>> +

>> +  if (n >= __PTRDIFF_MAX__)

>> +    __builtin_abort ();

>> +}

>> +

>> +

>> +/* { dg-final { scan-tree-dump-not "memchr" 0 "optimized" } } */

>> diff --git a/gcc/vr-values.c b/gcc/vr-values.c

>> index 794b4635f9e..2c93f92438a 100644

>> --- a/gcc/vr-values.c

>> +++ b/gcc/vr-values.c

>> @@ -793,6 +793,47 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>>

>>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>>

>> +  /* Set value_range for n in following sequence:

>> +     def = __builtin_memchr (arg, 0, sz)

>> +     n = def - arg

>> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

>> +

>> +  if (vr->type == VR_VARYING

>> +      && code == POINTER_DIFF_EXPR

>> +      && TREE_CODE (op0) == SSA_NAME

>> +      && TREE_CODE (op1) == SSA_NAME)

>> +    {

>> +      tree def = op0;

>> +      tree arg = op1;

>> +      tree def_type, arg_type;

>> +

>> +      gcall *call_stmt = NULL;

>> +      if (def && arg

> AFAICT these can never be NULL at this point.  They are op0 and op1

> respectively and we've verified they are SSA_NAMEs.  So I think this

> test is redundant and should be removed.

Indeed. These checks were carried over from my previous patch before
basing on POINTER_DIFF_EXPR,
and I forgot to remove them later :/
>

>

>> +       && TREE_CODE (def) == SSA_NAME

> Similarly this test is redundant with verification that op0 is an

> SSA_NAME in the outer conditional.

>

>> +       && TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE

>> +       && (def_type = TREE_TYPE (TREE_TYPE (def)))

>> +       && (def_type == char_type_node || def_type == signed_char_type_node

>> +           || def_type == unsigned_char_type_node)

> Why are we checking for equality with these types at all.  Aren't we

> going to miss equivalent types or types with qualifiers?

>

> It looks like the canonical way to do this check in tree-ssa-strlen.c is

>

>  (TYPE_MODE (type) == TYPE_MODE (char_type_node)

>   && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node))

>

> ie, verify that the mode/precision of the object is the same as the

> mode/precision of a character type.

Thanks, I updated the patch to check for mode/precision.
>

>

>> +       && TREE_CODE (arg) == SSA_NAME

> Redundant with the verification that op1 is an SSA_NAME in the outer

> conditional.

>

>> +       && TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE

>> +       && (arg_type = TREE_TYPE (TREE_TYPE (arg)))

>> +       && (arg_type == char_type_node || arg_type == signed_char_type_node

>> +           || arg_type == unsigned_char_type_node)

> See note above about verifying based on mode/precision.

>

> I think with those changes we're probably in good shape.  But please

> repost for final approval.

I have the updated the attached version with your suggestions.
Does it look OK ?
Bootstrap+test passes on x86_64-unknown-linux-gnu.

Thanks,
Prathamesh
>

> jeff
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c
new file mode 100644
index 00000000000..66db32f46db
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void f1 (char *p, __SIZE_TYPE__ sz)
+{
+  char *q = __builtin_memchr (p, 0, sz);
+  __PTRDIFF_TYPE__ n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+void f2 (unsigned char *p, __SIZE_TYPE__ sz)
+{
+  unsigned char *q = __builtin_memchr (p, 0, sz);
+  __PTRDIFF_TYPE__ n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+void f3 (signed char *p, __SIZE_TYPE__ sz)
+{
+  signed char *q = __builtin_memchr (p, 0, sz);
+  __PTRDIFF_TYPE__ n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+
+/* { dg-final { scan-tree-dump-not "memchr" "optimized" } } */
diff --git a/gcc/vr-values.c b/gcc/vr-values.c
index 794b4635f9e..41a4a0b041f 100644
--- a/gcc/vr-values.c
+++ b/gcc/vr-values.c
@@ -793,6 +793,39 @@ vr_values::extract_range_from_binary_expr (value_range *vr,
 
   extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);
 
+  /* Set value_range for n in following sequence:
+     def = __builtin_memchr (arg, 0, sz)
+     n = def - arg
+     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */
+
+  if (vr->type == VR_VARYING
+      && code == POINTER_DIFF_EXPR
+      && TREE_CODE (op0) == SSA_NAME
+      && TREE_CODE (op1) == SSA_NAME)
+    {
+      tree op0_ptype = TREE_TYPE (TREE_TYPE (op0));
+      tree op1_ptype = TREE_TYPE (TREE_TYPE (op1));
+      gcall *call_stmt = NULL;
+
+      if (TYPE_MODE (op0_ptype) == TYPE_MODE (char_type_node)
+	  && TYPE_PRECISION (op0_ptype) == TYPE_PRECISION (char_type_node)
+	  && TYPE_MODE (op1_ptype) == TYPE_MODE (char_type_node)
+	  && TYPE_PRECISION (op1_ptype) == TYPE_PRECISION (char_type_node)
+	  && (call_stmt = dyn_cast<gcall *>(SSA_NAME_DEF_STMT (op0)))
+	  && gimple_call_builtin_p (call_stmt, BUILT_IN_MEMCHR)
+	  && operand_equal_p (op0, gimple_call_lhs (call_stmt), 0)
+	  && operand_equal_p (op1, gimple_call_arg (call_stmt, 0), 0)
+	  && integer_zerop (gimple_call_arg (call_stmt, 1)))
+	    {
+	      tree max = vrp_val_max (ptrdiff_type_node);
+	      wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));
+	      tree range_min = build_zero_cst (expr_type);
+	      tree range_max = wide_int_to_tree (expr_type, wmax - 1);
+	      set_value_range (vr, VR_RANGE, range_min, range_max, NULL);
+	      return;
+	    }
+     }
+
   /* Try harder for PLUS and MINUS if the range of one operand is symbolic
      and based on the other operand, for example if it was deduced from a
      symbolic comparison.  When a bound of the range of the first operand
Prathamesh Kulkarni Jan. 9, 2018, 12:44 p.m. | #8
On 7 January 2018 at 12:28, Prathamesh Kulkarni
<prathamesh.kulkarni@linaro.org> wrote:
> On 5 January 2018 at 00:20, Jeff Law <law@redhat.com> wrote:

>> On 01/03/2018 12:08 AM, Prathamesh Kulkarni wrote:

>>> On 3 January 2018 at 12:33, Prathamesh Kulkarni

>>> <prathamesh.kulkarni@linaro.org> wrote:

>>>> On 2 January 2018 at 17:49, Jakub Jelinek <jakub@redhat.com> wrote:

>>>>> On Tue, Jan 02, 2018 at 05:39:17PM +0530, Prathamesh Kulkarni wrote:

>>>>>> --- /dev/null

>>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

>>>>>> @@ -0,0 +1,22 @@

>>>>>> +/* { dg-do compile } */

>>>>>> +/* { dg-options "-O2 -fdump-tree-optimized" } */

>>>>>> +

>>>>>> +void f1 (char *p, __SIZE_TYPE__ sz)

>>>>>> +{

>>>>>> +  char *q = __builtin_memchr (p, 0, sz);

>>>>>> +  long n = q - p;

>>>>> Please use __PTRDIFF_TYPE__ instead of long, here and in other spots.

>>>>>

>>>>>> --- a/gcc/vr-values.c

>>>>>> +++ b/gcc/vr-values.c

>>>>>> @@ -793,6 +793,42 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>>>>>>

>>>>>>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>>>>>>

>>>>>> +  /* Set value_range for n in following sequence:

>>>>>> +     def = __builtin_memchr (arg, 0, sz)

>>>>>> +     n = def - arg

>>>>>> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

>>>>>> +

>>>>>> +  if (vr->type == VR_VARYING

>>>>>> +      && (code == POINTER_DIFF_EXPR)

>>>>>> +      && (TREE_CODE (op0) == SSA_NAME)

>>>>>> +      && (TREE_CODE (op1) == SSA_NAME))

>>>>> Please drop the useless ()s around the comparisons.  They are needed

>>>>> only if the condition is multi-line and needed for emacs indentation,

>>>>> or around assignments tested as conditions.

>>>>>

>>>>>> +      gcall *call_stmt = NULL;

>>>>>> +      if (def && arg

>>>>>> +       && (TREE_CODE (def) == SSA_NAME)

>>>>> Likewise (many times).

>>>>>

>>>>>> +       && ((TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE)

>>>>>> +           && (TREE_TYPE (TREE_TYPE (def)) == char_type_node))

>>>>>> +       && (TREE_CODE (arg) == SSA_NAME)

>>>>>> +       && ((TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)

>>>>>> +           && (TREE_TYPE (TREE_TYPE (arg)) == char_type_node))

>>>>> What is so special about char_type_node?  Why e.g. unsigned char or signed

>>>>> char pointer shouldn't be handled the same?

>>>>>

>>>>>> +       && (call_stmt = dyn_cast<gcall *>(SSA_NAME_DEF_STMT (def)))

>>>>>> +       && (gimple_call_combined_fn (call_stmt) == CFN_BUILT_IN_MEMCHR)

>>>>> We don't have an ifn for memchr, so why this?  On the other side, it should

>>>>> be making sure one doesn't use unprototyped memchr with weirdo argument

>>>>> types, so you need gimple_call_builtin_p.

>>>> Hi Jakub,

>>>> Thanks for the review. I have tried to address your suggestions in the

>>>> attached patch.

>>>> Does it look OK ?

>>>> Validation in progress.

>>> Oops, I mistakenly made argument sz in the tests of type

>>> __PTRDIFF_TYPE__ in the previous patch.

>>> The attached patch restores it's type to __SIZE_TYPE__.

>>>

>>> Thanks,

>>> Prathamesh

>>>> Thanks,

>>>> Prathamesh

>>>>>> +       && operand_equal_p (def, gimple_call_lhs (call_stmt), 0)

>>>>>> +       && operand_equal_p (arg, gimple_call_arg (call_stmt, 0), 0)

>>>>>> +       && integer_zerop (gimple_call_arg (call_stmt, 1)))

>>>>>> +         {

>>>>>> +           tree max = vrp_val_max (ptrdiff_type_node);

>>>>>> +           wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));

>>>>>> +           tree range_min = build_zero_cst (expr_type);

>>>>>> +           tree range_max = wide_int_to_tree (expr_type, wmax - 1);

>>>>>> +           set_value_range (vr, VR_RANGE, range_min, range_max, NULL);

>>>>>> +           return;

>>>>>> +         }

>>>>>> +     }

>>>>>> +

>>>>>>    /* Try harder for PLUS and MINUS if the range of one operand is symbolic

>>>>>>       and based on the other operand, for example if it was deduced from a

>>>>>>       symbolic comparison.  When a bound of the range of the first operand

>>>>>

>>>>>         Jakub

>>>

>>> pr82665-8.diff

>>>

>>>

>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

>>> new file mode 100644

>>> index 00000000000..b37c3d1d7ce

>>> --- /dev/null

>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

>>> @@ -0,0 +1,32 @@

>>> +/* { dg-do compile } */

>>> +/* { dg-options "-O2 -fdump-tree-optimized" } */

>>> +

>>> +void f1 (char *p, __SIZE_TYPE__ sz)

>>> +{

>>> +  char *q = __builtin_memchr (p, 0, sz);

>>> +  __PTRDIFF_TYPE__ n = q - p;

>>> +

>>> +  if (n >= __PTRDIFF_MAX__)

>>> +    __builtin_abort ();

>>> +}

>>> +

>>> +void f2 (unsigned char *p, __SIZE_TYPE__ sz)

>>> +{

>>> +  unsigned char *q = __builtin_memchr (p, 0, sz);

>>> +  __PTRDIFF_TYPE__ n = q - p;

>>> +

>>> +  if (n >= __PTRDIFF_MAX__)

>>> +    __builtin_abort ();

>>> +}

>>> +

>>> +void f3 (signed char *p, __SIZE_TYPE__ sz)

>>> +{

>>> +  signed char *q = __builtin_memchr (p, 0, sz);

>>> +  __PTRDIFF_TYPE__ n = q - p;

>>> +

>>> +  if (n >= __PTRDIFF_MAX__)

>>> +    __builtin_abort ();

>>> +}

>>> +

>>> +

>>> +/* { dg-final { scan-tree-dump-not "memchr" 0 "optimized" } } */

>>> diff --git a/gcc/vr-values.c b/gcc/vr-values.c

>>> index 794b4635f9e..2c93f92438a 100644

>>> --- a/gcc/vr-values.c

>>> +++ b/gcc/vr-values.c

>>> @@ -793,6 +793,47 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>>>

>>>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>>>

>>> +  /* Set value_range for n in following sequence:

>>> +     def = __builtin_memchr (arg, 0, sz)

>>> +     n = def - arg

>>> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

>>> +

>>> +  if (vr->type == VR_VARYING

>>> +      && code == POINTER_DIFF_EXPR

>>> +      && TREE_CODE (op0) == SSA_NAME

>>> +      && TREE_CODE (op1) == SSA_NAME)

>>> +    {

>>> +      tree def = op0;

>>> +      tree arg = op1;

>>> +      tree def_type, arg_type;

>>> +

>>> +      gcall *call_stmt = NULL;

>>> +      if (def && arg

>> AFAICT these can never be NULL at this point.  They are op0 and op1

>> respectively and we've verified they are SSA_NAMEs.  So I think this

>> test is redundant and should be removed.

> Indeed. These checks were carried over from my previous patch before

> basing on POINTER_DIFF_EXPR,

> and I forgot to remove them later :/

>>

>>

>>> +       && TREE_CODE (def) == SSA_NAME

>> Similarly this test is redundant with verification that op0 is an

>> SSA_NAME in the outer conditional.

>>

>>> +       && TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE

>>> +       && (def_type = TREE_TYPE (TREE_TYPE (def)))

>>> +       && (def_type == char_type_node || def_type == signed_char_type_node

>>> +           || def_type == unsigned_char_type_node)

>> Why are we checking for equality with these types at all.  Aren't we

>> going to miss equivalent types or types with qualifiers?

>>

>> It looks like the canonical way to do this check in tree-ssa-strlen.c is

>>

>>  (TYPE_MODE (type) == TYPE_MODE (char_type_node)

>>   && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node))

>>

>> ie, verify that the mode/precision of the object is the same as the

>> mode/precision of a character type.

> Thanks, I updated the patch to check for mode/precision.

>>

>>

>>> +       && TREE_CODE (arg) == SSA_NAME

>> Redundant with the verification that op1 is an SSA_NAME in the outer

>> conditional.

>>

>>> +       && TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE

>>> +       && (arg_type = TREE_TYPE (TREE_TYPE (arg)))

>>> +       && (arg_type == char_type_node || arg_type == signed_char_type_node

>>> +           || arg_type == unsigned_char_type_node)

>> See note above about verifying based on mode/precision.

>>

>> I think with those changes we're probably in good shape.  But please

>> repost for final approval.

> I have the updated the attached version with your suggestions.

> Does it look OK ?

> Bootstrap+test passes on x86_64-unknown-linux-gnu.

Hi,
Could you please review https://gcc.gnu.org/ml/gcc-patches/2018-01/msg00385.html
Since stage-4 deadline is approaching, I am pinging in a very short
time, sorry about that.

Regards,
Prathamesh
>

> Thanks,

> Prathamesh

>>

>> jeff
Jeff Law Jan. 10, 2018, 8:59 p.m. | #9
On 01/06/2018 11:58 PM, Prathamesh Kulkarni wrote:
[ Snip ]

>> I think with those changes we're probably in good shape.  But please

>> repost for final approval.

> I have the updated the attached version with your suggestions.

> Does it look OK ?

> Bootstrap+test passes on x86_64-unknown-linux-gnu.

> 

> Thanks,

> Prathamesh

>> jeff

> 

> pr82665-9.diff

> 

> 

> diff --git a/gcc/vr-values.c b/gcc/vr-values.c

> index 794b4635f9e..41a4a0b041f 100644

> --- a/gcc/vr-values.c

> +++ b/gcc/vr-values.c

> @@ -793,6 +793,39 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>  

>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>  

> +  /* Set value_range for n in following sequence:

> +     def = __builtin_memchr (arg, 0, sz)

> +     n = def - arg

> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

> +

> +  if (vr->type == VR_VARYING

> +      && code == POINTER_DIFF_EXPR

> +      && TREE_CODE (op0) == SSA_NAME

> +      && TREE_CODE (op1) == SSA_NAME)

> +    {

> +      tree op0_ptype = TREE_TYPE (TREE_TYPE (op0));

> +      tree op1_ptype = TREE_TYPE (TREE_TYPE (op1));

> +      gcall *call_stmt = NULL;

> +

> +      if (TYPE_MODE (op0_ptype) == TYPE_MODE (char_type_node)

> +	  && TYPE_PRECISION (op0_ptype) == TYPE_PRECISION (char_type_node)

> +	  && TYPE_MODE (op1_ptype) == TYPE_MODE (char_type_node)

> +	  && TYPE_PRECISION (op1_ptype) == TYPE_PRECISION (char_type_node)

Note that while the operands of POINTER_DIFF_EXPR can be pointers to
different types, we do require that they have the same mode and
precision.  So the tests of TYPE_MODE and TYPE_PRECISION for op1_ptype
are not needed.

OK with those two checks removed.

Jeff

ps. FWIW, the close a stage in our development cycle is a patch
submission deadline.  So if a patch is submitted prior to the deadline,
then it can move forward, even if review/approval happens after the
deadline.
Richard Biener Jan. 11, 2018, 9:30 a.m. | #10
On Wed, Jan 10, 2018 at 9:59 PM, Jeff Law <law@redhat.com> wrote:
> On 01/06/2018 11:58 PM, Prathamesh Kulkarni wrote:

> [ Snip ]

>

>>> I think with those changes we're probably in good shape.  But please

>>> repost for final approval.

>> I have the updated the attached version with your suggestions.

>> Does it look OK ?

>> Bootstrap+test passes on x86_64-unknown-linux-gnu.

>>

>> Thanks,

>> Prathamesh

>>> jeff

>>

>> pr82665-9.diff

>>

>>

>> diff --git a/gcc/vr-values.c b/gcc/vr-values.c

>> index 794b4635f9e..41a4a0b041f 100644

>> --- a/gcc/vr-values.c

>> +++ b/gcc/vr-values.c

>> @@ -793,6 +793,39 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>>

>>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>>

>> +  /* Set value_range for n in following sequence:

>> +     def = __builtin_memchr (arg, 0, sz)

>> +     n = def - arg

>> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

>> +

>> +  if (vr->type == VR_VARYING

>> +      && code == POINTER_DIFF_EXPR

>> +      && TREE_CODE (op0) == SSA_NAME

>> +      && TREE_CODE (op1) == SSA_NAME)

>> +    {

>> +      tree op0_ptype = TREE_TYPE (TREE_TYPE (op0));

>> +      tree op1_ptype = TREE_TYPE (TREE_TYPE (op1));

>> +      gcall *call_stmt = NULL;

>> +

>> +      if (TYPE_MODE (op0_ptype) == TYPE_MODE (char_type_node)

>> +       && TYPE_PRECISION (op0_ptype) == TYPE_PRECISION (char_type_node)

>> +       && TYPE_MODE (op1_ptype) == TYPE_MODE (char_type_node)

>> +       && TYPE_PRECISION (op1_ptype) == TYPE_PRECISION (char_type_node)

> Note that while the operands of POINTER_DIFF_EXPR can be pointers to

> different types, we do require that they have the same mode and

> precision.  So the tests of TYPE_MODE and TYPE_PRECISION for op1_ptype

> are not needed.

>

> OK with those two checks removed.

>

> Jeff

>

> ps. FWIW, the close a stage in our development cycle is a patch

> submission deadline.  So if a patch is submitted prior to the deadline,

> then it can move forward, even if review/approval happens after the

> deadline.


IMHO not.  Stage3 is exactly to be able to flush out patches like this if they
have been posted during stage1.  This isn't a critical bug fix or anything like
that and so it can wait for the next stage1.

Richard.
Jeff Law May 1, 2018, 6:21 p.m. | #11
On 01/06/2018 11:58 PM, Prathamesh Kulkarni wrote:
> On 5 January 2018 at 00:20, Jeff Law <law@redhat.com> wrote:

>> On 01/03/2018 12:08 AM, Prathamesh Kulkarni wrote:

>>> On 3 January 2018 at 12:33, Prathamesh Kulkarni

>>> <prathamesh.kulkarni@linaro.org> wrote:

>>>> On 2 January 2018 at 17:49, Jakub Jelinek <jakub@redhat.com> wrote:

>>>>> On Tue, Jan 02, 2018 at 05:39:17PM +0530, Prathamesh Kulkarni wrote:

>>>>>> --- /dev/null

>>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

>>>>>> @@ -0,0 +1,22 @@

>>>>>> +/* { dg-do compile } */

>>>>>> +/* { dg-options "-O2 -fdump-tree-optimized" } */

>>>>>> +

>>>>>> +void f1 (char *p, __SIZE_TYPE__ sz)

>>>>>> +{

>>>>>> +  char *q = __builtin_memchr (p, 0, sz);

>>>>>> +  long n = q - p;

>>>>> Please use __PTRDIFF_TYPE__ instead of long, here and in other spots.

>>>>>

>>>>>> --- a/gcc/vr-values.c

>>>>>> +++ b/gcc/vr-values.c

>>>>>> @@ -793,6 +793,42 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>>>>>>

>>>>>>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>>>>>>

>>>>>> +  /* Set value_range for n in following sequence:

>>>>>> +     def = __builtin_memchr (arg, 0, sz)

>>>>>> +     n = def - arg

>>>>>> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

>>>>>> +

>>>>>> +  if (vr->type == VR_VARYING

>>>>>> +      && (code == POINTER_DIFF_EXPR)

>>>>>> +      && (TREE_CODE (op0) == SSA_NAME)

>>>>>> +      && (TREE_CODE (op1) == SSA_NAME))

>>>>> Please drop the useless ()s around the comparisons.  They are needed

>>>>> only if the condition is multi-line and needed for emacs indentation,

>>>>> or around assignments tested as conditions.

>>>>>

>>>>>> +      gcall *call_stmt = NULL;

>>>>>> +      if (def && arg

>>>>>> +       && (TREE_CODE (def) == SSA_NAME)

>>>>> Likewise (many times).

>>>>>

>>>>>> +       && ((TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE)

>>>>>> +           && (TREE_TYPE (TREE_TYPE (def)) == char_type_node))

>>>>>> +       && (TREE_CODE (arg) == SSA_NAME)

>>>>>> +       && ((TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)

>>>>>> +           && (TREE_TYPE (TREE_TYPE (arg)) == char_type_node))

>>>>> What is so special about char_type_node?  Why e.g. unsigned char or signed

>>>>> char pointer shouldn't be handled the same?

>>>>>

>>>>>> +       && (call_stmt = dyn_cast<gcall *>(SSA_NAME_DEF_STMT (def)))

>>>>>> +       && (gimple_call_combined_fn (call_stmt) == CFN_BUILT_IN_MEMCHR)

>>>>> We don't have an ifn for memchr, so why this?  On the other side, it should

>>>>> be making sure one doesn't use unprototyped memchr with weirdo argument

>>>>> types, so you need gimple_call_builtin_p.

>>>> Hi Jakub,

>>>> Thanks for the review. I have tried to address your suggestions in the

>>>> attached patch.

>>>> Does it look OK ?

>>>> Validation in progress.

>>> Oops, I mistakenly made argument sz in the tests of type

>>> __PTRDIFF_TYPE__ in the previous patch.

>>> The attached patch restores it's type to __SIZE_TYPE__.

>>>

>>> Thanks,

>>> Prathamesh

>>>> Thanks,

>>>> Prathamesh

>>>>>> +       && operand_equal_p (def, gimple_call_lhs (call_stmt), 0)

>>>>>> +       && operand_equal_p (arg, gimple_call_arg (call_stmt, 0), 0)

>>>>>> +       && integer_zerop (gimple_call_arg (call_stmt, 1)))

>>>>>> +         {

>>>>>> +           tree max = vrp_val_max (ptrdiff_type_node);

>>>>>> +           wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));

>>>>>> +           tree range_min = build_zero_cst (expr_type);

>>>>>> +           tree range_max = wide_int_to_tree (expr_type, wmax - 1);

>>>>>> +           set_value_range (vr, VR_RANGE, range_min, range_max, NULL);

>>>>>> +           return;

>>>>>> +         }

>>>>>> +     }

>>>>>> +

>>>>>>    /* Try harder for PLUS and MINUS if the range of one operand is symbolic

>>>>>>       and based on the other operand, for example if it was deduced from a

>>>>>>       symbolic comparison.  When a bound of the range of the first operand

>>>>>

>>>>>         Jakub

>>>

>>> pr82665-8.diff

>>>

>>>

>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

>>> new file mode 100644

>>> index 00000000000..b37c3d1d7ce

>>> --- /dev/null

>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c

>>> @@ -0,0 +1,32 @@

>>> +/* { dg-do compile } */

>>> +/* { dg-options "-O2 -fdump-tree-optimized" } */

>>> +

>>> +void f1 (char *p, __SIZE_TYPE__ sz)

>>> +{

>>> +  char *q = __builtin_memchr (p, 0, sz);

>>> +  __PTRDIFF_TYPE__ n = q - p;

>>> +

>>> +  if (n >= __PTRDIFF_MAX__)

>>> +    __builtin_abort ();

>>> +}

>>> +

>>> +void f2 (unsigned char *p, __SIZE_TYPE__ sz)

>>> +{

>>> +  unsigned char *q = __builtin_memchr (p, 0, sz);

>>> +  __PTRDIFF_TYPE__ n = q - p;

>>> +

>>> +  if (n >= __PTRDIFF_MAX__)

>>> +    __builtin_abort ();

>>> +}

>>> +

>>> +void f3 (signed char *p, __SIZE_TYPE__ sz)

>>> +{

>>> +  signed char *q = __builtin_memchr (p, 0, sz);

>>> +  __PTRDIFF_TYPE__ n = q - p;

>>> +

>>> +  if (n >= __PTRDIFF_MAX__)

>>> +    __builtin_abort ();

>>> +}

>>> +

>>> +

>>> +/* { dg-final { scan-tree-dump-not "memchr" 0 "optimized" } } */

>>> diff --git a/gcc/vr-values.c b/gcc/vr-values.c

>>> index 794b4635f9e..2c93f92438a 100644

>>> --- a/gcc/vr-values.c

>>> +++ b/gcc/vr-values.c

>>> @@ -793,6 +793,47 @@ vr_values::extract_range_from_binary_expr (value_range *vr,

>>>

>>>    extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);

>>>

>>> +  /* Set value_range for n in following sequence:

>>> +     def = __builtin_memchr (arg, 0, sz)

>>> +     n = def - arg

>>> +     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */

>>> +

>>> +  if (vr->type == VR_VARYING

>>> +      && code == POINTER_DIFF_EXPR

>>> +      && TREE_CODE (op0) == SSA_NAME

>>> +      && TREE_CODE (op1) == SSA_NAME)

>>> +    {

>>> +      tree def = op0;

>>> +      tree arg = op1;

>>> +      tree def_type, arg_type;

>>> +

>>> +      gcall *call_stmt = NULL;

>>> +      if (def && arg

>> AFAICT these can never be NULL at this point.  They are op0 and op1

>> respectively and we've verified they are SSA_NAMEs.  So I think this

>> test is redundant and should be removed.

> Indeed. These checks were carried over from my previous patch before

> basing on POINTER_DIFF_EXPR,

> and I forgot to remove them later :/

>>

>>

>>> +       && TREE_CODE (def) == SSA_NAME

>> Similarly this test is redundant with verification that op0 is an

>> SSA_NAME in the outer conditional.

>>

>>> +       && TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE

>>> +       && (def_type = TREE_TYPE (TREE_TYPE (def)))

>>> +       && (def_type == char_type_node || def_type == signed_char_type_node

>>> +           || def_type == unsigned_char_type_node)

>> Why are we checking for equality with these types at all.  Aren't we

>> going to miss equivalent types or types with qualifiers?

>>

>> It looks like the canonical way to do this check in tree-ssa-strlen.c is

>>

>>  (TYPE_MODE (type) == TYPE_MODE (char_type_node)

>>   && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node))

>>

>> ie, verify that the mode/precision of the object is the same as the

>> mode/precision of a character type.

> Thanks, I updated the patch to check for mode/precision.

>>

>>

>>> +       && TREE_CODE (arg) == SSA_NAME

>> Redundant with the verification that op1 is an SSA_NAME in the outer

>> conditional.

>>

>>> +       && TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE

>>> +       && (arg_type = TREE_TYPE (TREE_TYPE (arg)))

>>> +       && (arg_type == char_type_node || arg_type == signed_char_type_node

>>> +           || arg_type == unsigned_char_type_node)

>> See note above about verifying based on mode/precision.

>>

>> I think with those changes we're probably in good shape.  But please

>> repost for final approval.

> I have the updated the attached version with your suggestions.

> Does it look OK ?

> Bootstrap+test passes on x86_64-unknown-linux-gnu.

THanks.  I've installed this onto the trunk.  I don't think it is good
candidate for backporting though.

Sorry it got lost in the insanity after spectre/meltdown went public.

jeff

Patch

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c
new file mode 100644
index 00000000000..17be6ec4e4b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82665.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void f1 (char *p, __SIZE_TYPE__ sz)
+{
+  char *q = __builtin_memchr (p, 0, sz);
+  long n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+void f2 (unsigned char *p, __SIZE_TYPE__ sz)
+{
+  unsigned char *q = __builtin_memchr (p, 0, sz);
+  long n = q - p;
+
+  if (n >= __PTRDIFF_MAX__)
+    __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "memchr" 1 "optimized" } } */
diff --git a/gcc/vr-values.c b/gcc/vr-values.c
index 3e760a378fc..ca9dca84ebc 100644
--- a/gcc/vr-values.c
+++ b/gcc/vr-values.c
@@ -773,6 +773,54 @@  vr_values::extract_range_from_binary_expr (value_range *vr,
 
   extract_range_from_binary_expr_1 (vr, code, expr_type, &vr0, &vr1);
 
+  /* Set value_range for n in following sequence:
+     def = __builtin_memchr (arg, 0, sz)
+     op0 = convert_expr def
+     op1 = convert_expr arg
+     n = op0 - op1
+     Here the range for n can be set to [0, PTRDIFF_MAX - 1]. */
+
+  if (vr->type == VR_VARYING
+      && (code == MINUS_EXPR)
+      && (TREE_CODE (op0) == SSA_NAME)
+      && (TREE_CODE (op1) == SSA_NAME)
+      && INTEGRAL_TYPE_P (TREE_TYPE (op0))
+      && INTEGRAL_TYPE_P (TREE_TYPE (op1)))
+    {
+      tree def = NULL_TREE;
+      tree arg = NULL_TREE;
+
+      gassign *s = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op0));
+      if (s && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (s)))
+	def = gimple_assign_rhs1 (s);
+
+      s = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op1));
+      if (s && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (s)))
+	arg = gimple_assign_rhs1 (s);
+
+      gcall *call_stmt = NULL;
+      if (def && arg
+	  && (TREE_CODE (def) == SSA_NAME)
+	  && ((TREE_CODE (TREE_TYPE (def)) == POINTER_TYPE)
+	      && (TREE_TYPE (TREE_TYPE (def)) == char_type_node))
+	  && (TREE_CODE (arg) == SSA_NAME)
+	  && ((TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
+	      && (TREE_TYPE (TREE_TYPE (arg)) == char_type_node))
+	  && (call_stmt = dyn_cast<gcall *>(SSA_NAME_DEF_STMT (def)))
+	  && (gimple_call_combined_fn (call_stmt) == CFN_BUILT_IN_MEMCHR)
+	  && operand_equal_p (def, gimple_call_lhs (call_stmt), 0)
+	  && operand_equal_p (arg, gimple_call_arg (call_stmt, 0), 0)
+	  && integer_zerop (gimple_call_arg (call_stmt, 1)))
+	    {
+	      tree max = vrp_val_max (ptrdiff_type_node);
+	      wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));
+	      tree range_min = build_zero_cst (expr_type);
+	      tree range_max = wide_int_to_tree (expr_type, wmax - 1);
+	      set_value_range (vr, VR_RANGE, range_min, range_max, NULL);
+	      return;
+	    }
+     }
+
   /* Try harder for PLUS and MINUS if the range of one operand is symbolic
      and based on the other operand, for example if it was deduced from a
      symbolic comparison.  When a bound of the range of the first operand