diff mbox

PR78153

Message ID CAAgBjMksprUQcHtB+MNSg=_ppB0Opoq-nopmFCd3_hLuUKhFOw@mail.gmail.com
State Superseded
Headers show

Commit Message

Prathamesh Kulkarni Nov. 23, 2016, 11:49 a.m. UTC
On 23 November 2016 at 15:16, Richard Biener <rguenther@suse.de> wrote:
> On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

>

>> On 22 November 2016 at 20:53, Richard Biener <rguenther@suse.de> wrote:

>> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

>> >

>> >> On 22 November 2016 at 20:18, Richard Biener <rguenther@suse.de> wrote:

>> >> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

>> >> >

>> >> >> On 21 November 2016 at 15:10, Richard Biener <rguenther@suse.de> wrote:

>> >> >> > On Sun, 20 Nov 2016, Prathamesh Kulkarni wrote:

>> >> >> >

>> >> >> >> Hi,

>> >> >> >> As suggested by Martin in PR78153 strlen's return value cannot exceed

>> >> >> >> PTRDIFF_MAX.

>> >> >> >> So I set it's range to [0, PTRDIFF_MAX - 1] in extract_range_basic()

>> >> >> >> in the attached patch.

>> >> >> >>

>> >> >> >> However it regressed strlenopt-3.c:

>> >> >> >>

>> >> >> >> Consider fn1() from strlenopt-3.c:

>> >> >> >>

>> >> >> >> __attribute__((noinline, noclone)) size_t

>> >> >> >> fn1 (char *p, char *q)

>> >> >> >> {

>> >> >> >>   size_t s = strlen (q);

>> >> >> >>   strcpy (p, q);

>> >> >> >>   return s - strlen (p);

>> >> >> >> }

>> >> >> >>

>> >> >> >> The optimized dump shows the following:

>> >> >> >>

>> >> >> >> __attribute__((noclone, noinline))

>> >> >> >> fn1 (char * p, char * q)

>> >> >> >> {

>> >> >> >>   size_t s;

>> >> >> >>   size_t _7;

>> >> >> >>   long unsigned int _9;

>> >> >> >>

>> >> >> >>   <bb 2>:

>> >> >> >>   s_4 = strlen (q_3(D));

>> >> >> >>   _9 = s_4 + 1;

>> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

>> >> >> >>   _7 = 0;

>> >> >> >>   return _7;

>> >> >> >>

>> >> >> >> }

>> >> >> >>

>> >> >> >> which introduces the regression, because the test expects "return 0;" in fn1().

>> >> >> >>

>> >> >> >> The issue seems to be in vrp2:

>> >> >> >>

>> >> >> >> Before the patch:

>> >> >> >> Visiting statement:

>> >> >> >> s_4 = strlen (q_3(D));

>> >> >> >> Found new range for s_4: VARYING

>> >> >> >>

>> >> >> >> Visiting statement:

>> >> >> >> _1 = s_4;

>> >> >> >> Found new range for _1: [s_4, s_4]

>> >> >> >> marking stmt to be not simulated again

>> >> >> >>

>> >> >> >> Visiting statement:

>> >> >> >> _7 = s_4 - _1;

>> >> >> >> Applying pattern match.pd:111, gimple-match.c:27997

>> >> >> >> Match-and-simplified s_4 - _1 to 0

>> >> >> >> Intersecting

>> >> >> >>   [0, 0]

>> >> >> >> and

>> >> >> >>   [0, +INF]

>> >> >> >> to

>> >> >> >>   [0, 0]

>> >> >> >> Found new range for _7: [0, 0]

>> >> >> >>

>> >> >> >> __attribute__((noclone, noinline))

>> >> >> >> fn1 (char * p, char * q)

>> >> >> >> {

>> >> >> >>   size_t s;

>> >> >> >>   long unsigned int _1;

>> >> >> >>   long unsigned int _9;

>> >> >> >>

>> >> >> >>   <bb 2>:

>> >> >> >>   s_4 = strlen (q_3(D));

>> >> >> >>   _9 = s_4 + 1;

>> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

>> >> >> >>   _1 = s_4;

>> >> >> >>   return 0;

>> >> >> >>

>> >> >> >> }

>> >> >> >>

>> >> >> >>

>> >> >> >> After the patch:

>> >> >> >> Visiting statement:

>> >> >> >> s_4 = strlen (q_3(D));

>> >> >> >> Intersecting

>> >> >> >>   [0, 9223372036854775806]

>> >> >> >> and

>> >> >> >>   [0, 9223372036854775806]

>> >> >> >> to

>> >> >> >>   [0, 9223372036854775806]

>> >> >> >> Found new range for s_4: [0, 9223372036854775806]

>> >> >> >> marking stmt to be not simulated again

>> >> >> >>

>> >> >> >> Visiting statement:

>> >> >> >> _1 = s_4;

>> >> >> >> Intersecting

>> >> >> >>   [0, 9223372036854775806]  EQUIVALENCES: { s_4 } (1 elements)

>> >> >> >> and

>> >> >> >>   [0, 9223372036854775806]

>> >> >> >> to

>> >> >> >>   [0, 9223372036854775806]  EQUIVALENCES: { s_4 } (1 elements)

>> >> >> >> Found new range for _1: [0, 9223372036854775806]

>> >> >> >> marking stmt to be not simulated again

>> >> >> >>

>> >> >> >> Visiting statement:

>> >> >> >> _7 = s_4 - _1;

>> >> >> >> Intersecting

>> >> >> >>   ~[9223372036854775807, 9223372036854775809]

>> >> >> >> and

>> >> >> >>   ~[9223372036854775807, 9223372036854775809]

>> >> >> >> to

>> >> >> >>   ~[9223372036854775807, 9223372036854775809]

>> >> >> >> Found new range for _7: ~[9223372036854775807, 9223372036854775809]

>> >> >> >> marking stmt to be not simulated again

>> >> >> >>

>> >> >> >> __attribute__((noclone, noinline))

>> >> >> >> fn1 (char * p, char * q)

>> >> >> >> {

>> >> >> >>   size_t s;

>> >> >> >>   long unsigned int _1;

>> >> >> >>   size_t _7;

>> >> >> >>   long unsigned int _9;

>> >> >> >>

>> >> >> >>   <bb 2>:

>> >> >> >>   s_4 = strlen (q_3(D));

>> >> >> >>   _9 = s_4 + 1;

>> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

>> >> >> >>   _1 = s_4;

>> >> >> >>   _7 = s_4 - _1;

>> >> >> >>   return _7;

>> >> >> >>

>> >> >> >> }

>> >> >> >>

>> >> >> >> Then forwprop4 turns

>> >> >> >> _1 = s_4

>> >> >> >> _7 = s_4 - _1

>> >> >> >> into

>> >> >> >> _7 = 0

>> >> >> >>

>> >> >> >> and we end up with:

>> >> >> >> _7 = 0

>> >> >> >> return _7

>> >> >> >> in optimized dump.

>> >> >> >>

>> >> >> >> Running ccp again after forwprop4 trivially solves the issue, however

>> >> >> >> I am not sure if we want to run ccp again ?

>> >> >> >>

>> >> >> >> The issue is probably with extract_range_from_ssa_name():

>> >> >> >> For _1 = s_4

>> >> >> >>

>> >> >> >> Before patch:

>> >> >> >> VR for s_4 is set to varying.

>> >> >> >> So VR for _1 is set to [s_4, s_4] by extract_range_from_ssa_name.

>> >> >> >> Since VR for _1 is [s_4, s_4] it implicitly implies that _1 is equal to s_4,

>> >> >> >> and vrp is able to transform _7 = s_4 - _1 to _7 = 0 (by using

>> >> >> >> match.pd pattern x - x -> 0).

>> >> >> >>

>> >> >> >> After patch:

>> >> >> >> VR for s_4 is set to [0, PTRDIFF_MAX - 1]

>> >> >> >> And correspondingly VR for _1 is set to [0, PTRDIFF_MAX - 1]

>> >> >> >> so IIUC, we then lose the information that _1 is equal to s_4,

>> >> >> >

>> >> >> > We don't lose it, it's in its set of equivalencies.

>> >> >> Ah, I missed that, thanks. For some reason I had mis-conception that

>> >> >> equivalences stores

>> >> >> variables which have same value-ranges but are not necessarily equal.

>> >> >> >

>> >> >> >> and vrp doesn't transform _7 = s_4 - _1 to _7 = 0.

>> >> >> >> forwprop4 does that because it sees that s_4 and _1 are equivalent.

>> >> >> >> Does this sound correct ?

>> >> >> >

>> >> >> > Yes.  So the issue is really that vrp_visit_assignment_or_call calls

>> >> >> > gimple_fold_stmt_to_constant_1 with vrp_valueize[_1] which when

>> >> >> > we do not have a singleton VR_RANGE does not fall back to looking

>> >> >> > at equivalences (there's not a good cheap way to do that currently because

>> >> >> > VRP doesn't keep a proper copy lattice but simply IORs equivalences

>> >> >> > from all equivalences).  In theory simply using the first set bit

>> >> >> > might work.  Thus sth like

>> >> >> >

>> >> >> > @@ -7057,6 +7030,12 @@ vrp_valueize (tree name)

>> >> >> >               || is_gimple_min_invariant (vr->min))

>> >> >> >           && vrp_operand_equal_p (vr->min, vr->max))

>> >> >> >         return vr->min;

>> >> >> > +      else if (vr->equiv && ! bitmap_empty_p (vr->equiv))

>> >> >> > +       {

>> >> >> > +         unsigned num = bitmap_first_set_bit (vr->equiv);

>> >> >> > +         if (num < SSA_NAME_VERSION (name))

>> >> >> > +           return ssa_name (num);

>> >> >> > +       }

>> >> >> >      }

>> >> >> >    return name;

>> >> >> >  }

>> >> >> >

>> >> >> > might work with the idea of simply doing canonicalization to one of

>> >> >> > the equivalences.  But as we don't allow copies in the SSA def stmt

>> >> >> > (via vrp_valueize_1) I'm not sure that's good enough canonicalization.

>> >> >> IIUC, we record the equivalent variables in vr->equiv

>> >> >> but do not canonicalize to one of the equivalence like "copy-of value"

>> >> >> in copyprop ?

>> >> >> Using first set bit unfortunately doesn't help for the above case.

>> >> >>

>> >> >> Sorry if this sounds silly, should we just run copyprop/ccp once again

>> >> >> after vrp2 to ensure that there are no copies left ?

>> >> >

>> >> > why?  forwprop also does copy and constant propagation.  For the

>> >> > regression simply adjust the pass dump you scan.

>> >> Well, with the patch the redundant store to and load from _7 still remains

>> >> in optimized dump for fn1() in strlenopt-3.c:

>> >>

>> >> __attribute__((noclone, noinline))

>> >> fn1 (char * p, char * q)

>> >> {

>> >>   size_t s;

>> >>   size_t _7;

>> >>   long unsigned int _9;

>> >>

>> >>   <bb 2>:

>> >>   s_4 = strlen (q_3(D));

>> >>   _9 = s_4 + 1;

>> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

>> >>   _7 = 0;

>> >>   return _7;

>> >>

>> >> }

>> >>

>> >> Running ccp again after forwprop4 would get rid of _7.

>> >> Without the patch we have return _0; in optimized dump.

>> >

>> > Ah, but then that's a missing "folding" of the return.  It's not

>> > a load/store anyway.

>> Hi Richard,

>> Thanks for the suggestion. In the attached untested patch, I tried to

>> modify forwprop to fold return-value to constant.

>> The optimized dump shows return 0; for the above test-case with this patch.

>> Does it look OK ?

>

> No, the fix is to make fold_stmt_1 handle GIMPLE_RETURN and simply

> valueize the return value (note 'valueize' might return NULL or be NULL).

>

Hi Richard,
Does the attached patch look OK ? I verified it prevents the
regression for above case.
Bootstrap+test on x86_64-unknown-linux-gnu in progress.

Thanks,
Prathamesh
> Richard.

>

>>

>> Thanks,

>> Prathamesh

>> >

>> > Richard.

>> >

>> >> Thanks,

>> >> Prathamesh

>> >> >

>> >> >> However that might be quite expensive ?

>> >> >> Or make vrp track copies like copyprop using a separate copy-of lattice ?

>> >> >

>> >> > Ideally we'd unify the three SSA propagation passes into one.  We'd

>> >> > have to have separate lattices for copy&constant and range&known-bits.

>> >> >

>> >> > Richard.

>> >> >

>> >> >> Thanks,

>> >> >> Prathamesh

>> >> >> >

>> >> >> > Richard.

>> >> >>

>> >> >>

>> >> >

>> >> > --

>> >> > Richard Biener <rguenther@suse.de>

>> >> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

>> >>

>> >>

>> >

>> > --

>> > Richard Biener <rguenther@suse.de>

>> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

>>

>

> --

> Richard Biener <rguenther@suse.de>

> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

Comments

Richard Biener Nov. 23, 2016, 11:51 a.m. UTC | #1
On Wed, 23 Nov 2016, Prathamesh Kulkarni wrote:

> On 23 November 2016 at 15:16, Richard Biener <rguenther@suse.de> wrote:

> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

> >

> >> On 22 November 2016 at 20:53, Richard Biener <rguenther@suse.de> wrote:

> >> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

> >> >

> >> >> On 22 November 2016 at 20:18, Richard Biener <rguenther@suse.de> wrote:

> >> >> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

> >> >> >

> >> >> >> On 21 November 2016 at 15:10, Richard Biener <rguenther@suse.de> wrote:

> >> >> >> > On Sun, 20 Nov 2016, Prathamesh Kulkarni wrote:

> >> >> >> >

> >> >> >> >> Hi,

> >> >> >> >> As suggested by Martin in PR78153 strlen's return value cannot exceed

> >> >> >> >> PTRDIFF_MAX.

> >> >> >> >> So I set it's range to [0, PTRDIFF_MAX - 1] in extract_range_basic()

> >> >> >> >> in the attached patch.

> >> >> >> >>

> >> >> >> >> However it regressed strlenopt-3.c:

> >> >> >> >>

> >> >> >> >> Consider fn1() from strlenopt-3.c:

> >> >> >> >>

> >> >> >> >> __attribute__((noinline, noclone)) size_t

> >> >> >> >> fn1 (char *p, char *q)

> >> >> >> >> {

> >> >> >> >>   size_t s = strlen (q);

> >> >> >> >>   strcpy (p, q);

> >> >> >> >>   return s - strlen (p);

> >> >> >> >> }

> >> >> >> >>

> >> >> >> >> The optimized dump shows the following:

> >> >> >> >>

> >> >> >> >> __attribute__((noclone, noinline))

> >> >> >> >> fn1 (char * p, char * q)

> >> >> >> >> {

> >> >> >> >>   size_t s;

> >> >> >> >>   size_t _7;

> >> >> >> >>   long unsigned int _9;

> >> >> >> >>

> >> >> >> >>   <bb 2>:

> >> >> >> >>   s_4 = strlen (q_3(D));

> >> >> >> >>   _9 = s_4 + 1;

> >> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

> >> >> >> >>   _7 = 0;

> >> >> >> >>   return _7;

> >> >> >> >>

> >> >> >> >> }

> >> >> >> >>

> >> >> >> >> which introduces the regression, because the test expects "return 0;" in fn1().

> >> >> >> >>

> >> >> >> >> The issue seems to be in vrp2:

> >> >> >> >>

> >> >> >> >> Before the patch:

> >> >> >> >> Visiting statement:

> >> >> >> >> s_4 = strlen (q_3(D));

> >> >> >> >> Found new range for s_4: VARYING

> >> >> >> >>

> >> >> >> >> Visiting statement:

> >> >> >> >> _1 = s_4;

> >> >> >> >> Found new range for _1: [s_4, s_4]

> >> >> >> >> marking stmt to be not simulated again

> >> >> >> >>

> >> >> >> >> Visiting statement:

> >> >> >> >> _7 = s_4 - _1;

> >> >> >> >> Applying pattern match.pd:111, gimple-match.c:27997

> >> >> >> >> Match-and-simplified s_4 - _1 to 0

> >> >> >> >> Intersecting

> >> >> >> >>   [0, 0]

> >> >> >> >> and

> >> >> >> >>   [0, +INF]

> >> >> >> >> to

> >> >> >> >>   [0, 0]

> >> >> >> >> Found new range for _7: [0, 0]

> >> >> >> >>

> >> >> >> >> __attribute__((noclone, noinline))

> >> >> >> >> fn1 (char * p, char * q)

> >> >> >> >> {

> >> >> >> >>   size_t s;

> >> >> >> >>   long unsigned int _1;

> >> >> >> >>   long unsigned int _9;

> >> >> >> >>

> >> >> >> >>   <bb 2>:

> >> >> >> >>   s_4 = strlen (q_3(D));

> >> >> >> >>   _9 = s_4 + 1;

> >> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

> >> >> >> >>   _1 = s_4;

> >> >> >> >>   return 0;

> >> >> >> >>

> >> >> >> >> }

> >> >> >> >>

> >> >> >> >>

> >> >> >> >> After the patch:

> >> >> >> >> Visiting statement:

> >> >> >> >> s_4 = strlen (q_3(D));

> >> >> >> >> Intersecting

> >> >> >> >>   [0, 9223372036854775806]

> >> >> >> >> and

> >> >> >> >>   [0, 9223372036854775806]

> >> >> >> >> to

> >> >> >> >>   [0, 9223372036854775806]

> >> >> >> >> Found new range for s_4: [0, 9223372036854775806]

> >> >> >> >> marking stmt to be not simulated again

> >> >> >> >>

> >> >> >> >> Visiting statement:

> >> >> >> >> _1 = s_4;

> >> >> >> >> Intersecting

> >> >> >> >>   [0, 9223372036854775806]  EQUIVALENCES: { s_4 } (1 elements)

> >> >> >> >> and

> >> >> >> >>   [0, 9223372036854775806]

> >> >> >> >> to

> >> >> >> >>   [0, 9223372036854775806]  EQUIVALENCES: { s_4 } (1 elements)

> >> >> >> >> Found new range for _1: [0, 9223372036854775806]

> >> >> >> >> marking stmt to be not simulated again

> >> >> >> >>

> >> >> >> >> Visiting statement:

> >> >> >> >> _7 = s_4 - _1;

> >> >> >> >> Intersecting

> >> >> >> >>   ~[9223372036854775807, 9223372036854775809]

> >> >> >> >> and

> >> >> >> >>   ~[9223372036854775807, 9223372036854775809]

> >> >> >> >> to

> >> >> >> >>   ~[9223372036854775807, 9223372036854775809]

> >> >> >> >> Found new range for _7: ~[9223372036854775807, 9223372036854775809]

> >> >> >> >> marking stmt to be not simulated again

> >> >> >> >>

> >> >> >> >> __attribute__((noclone, noinline))

> >> >> >> >> fn1 (char * p, char * q)

> >> >> >> >> {

> >> >> >> >>   size_t s;

> >> >> >> >>   long unsigned int _1;

> >> >> >> >>   size_t _7;

> >> >> >> >>   long unsigned int _9;

> >> >> >> >>

> >> >> >> >>   <bb 2>:

> >> >> >> >>   s_4 = strlen (q_3(D));

> >> >> >> >>   _9 = s_4 + 1;

> >> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

> >> >> >> >>   _1 = s_4;

> >> >> >> >>   _7 = s_4 - _1;

> >> >> >> >>   return _7;

> >> >> >> >>

> >> >> >> >> }

> >> >> >> >>

> >> >> >> >> Then forwprop4 turns

> >> >> >> >> _1 = s_4

> >> >> >> >> _7 = s_4 - _1

> >> >> >> >> into

> >> >> >> >> _7 = 0

> >> >> >> >>

> >> >> >> >> and we end up with:

> >> >> >> >> _7 = 0

> >> >> >> >> return _7

> >> >> >> >> in optimized dump.

> >> >> >> >>

> >> >> >> >> Running ccp again after forwprop4 trivially solves the issue, however

> >> >> >> >> I am not sure if we want to run ccp again ?

> >> >> >> >>

> >> >> >> >> The issue is probably with extract_range_from_ssa_name():

> >> >> >> >> For _1 = s_4

> >> >> >> >>

> >> >> >> >> Before patch:

> >> >> >> >> VR for s_4 is set to varying.

> >> >> >> >> So VR for _1 is set to [s_4, s_4] by extract_range_from_ssa_name.

> >> >> >> >> Since VR for _1 is [s_4, s_4] it implicitly implies that _1 is equal to s_4,

> >> >> >> >> and vrp is able to transform _7 = s_4 - _1 to _7 = 0 (by using

> >> >> >> >> match.pd pattern x - x -> 0).

> >> >> >> >>

> >> >> >> >> After patch:

> >> >> >> >> VR for s_4 is set to [0, PTRDIFF_MAX - 1]

> >> >> >> >> And correspondingly VR for _1 is set to [0, PTRDIFF_MAX - 1]

> >> >> >> >> so IIUC, we then lose the information that _1 is equal to s_4,

> >> >> >> >

> >> >> >> > We don't lose it, it's in its set of equivalencies.

> >> >> >> Ah, I missed that, thanks. For some reason I had mis-conception that

> >> >> >> equivalences stores

> >> >> >> variables which have same value-ranges but are not necessarily equal.

> >> >> >> >

> >> >> >> >> and vrp doesn't transform _7 = s_4 - _1 to _7 = 0.

> >> >> >> >> forwprop4 does that because it sees that s_4 and _1 are equivalent.

> >> >> >> >> Does this sound correct ?

> >> >> >> >

> >> >> >> > Yes.  So the issue is really that vrp_visit_assignment_or_call calls

> >> >> >> > gimple_fold_stmt_to_constant_1 with vrp_valueize[_1] which when

> >> >> >> > we do not have a singleton VR_RANGE does not fall back to looking

> >> >> >> > at equivalences (there's not a good cheap way to do that currently because

> >> >> >> > VRP doesn't keep a proper copy lattice but simply IORs equivalences

> >> >> >> > from all equivalences).  In theory simply using the first set bit

> >> >> >> > might work.  Thus sth like

> >> >> >> >

> >> >> >> > @@ -7057,6 +7030,12 @@ vrp_valueize (tree name)

> >> >> >> >               || is_gimple_min_invariant (vr->min))

> >> >> >> >           && vrp_operand_equal_p (vr->min, vr->max))

> >> >> >> >         return vr->min;

> >> >> >> > +      else if (vr->equiv && ! bitmap_empty_p (vr->equiv))

> >> >> >> > +       {

> >> >> >> > +         unsigned num = bitmap_first_set_bit (vr->equiv);

> >> >> >> > +         if (num < SSA_NAME_VERSION (name))

> >> >> >> > +           return ssa_name (num);

> >> >> >> > +       }

> >> >> >> >      }

> >> >> >> >    return name;

> >> >> >> >  }

> >> >> >> >

> >> >> >> > might work with the idea of simply doing canonicalization to one of

> >> >> >> > the equivalences.  But as we don't allow copies in the SSA def stmt

> >> >> >> > (via vrp_valueize_1) I'm not sure that's good enough canonicalization.

> >> >> >> IIUC, we record the equivalent variables in vr->equiv

> >> >> >> but do not canonicalize to one of the equivalence like "copy-of value"

> >> >> >> in copyprop ?

> >> >> >> Using first set bit unfortunately doesn't help for the above case.

> >> >> >>

> >> >> >> Sorry if this sounds silly, should we just run copyprop/ccp once again

> >> >> >> after vrp2 to ensure that there are no copies left ?

> >> >> >

> >> >> > why?  forwprop also does copy and constant propagation.  For the

> >> >> > regression simply adjust the pass dump you scan.

> >> >> Well, with the patch the redundant store to and load from _7 still remains

> >> >> in optimized dump for fn1() in strlenopt-3.c:

> >> >>

> >> >> __attribute__((noclone, noinline))

> >> >> fn1 (char * p, char * q)

> >> >> {

> >> >>   size_t s;

> >> >>   size_t _7;

> >> >>   long unsigned int _9;

> >> >>

> >> >>   <bb 2>:

> >> >>   s_4 = strlen (q_3(D));

> >> >>   _9 = s_4 + 1;

> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

> >> >>   _7 = 0;

> >> >>   return _7;

> >> >>

> >> >> }

> >> >>

> >> >> Running ccp again after forwprop4 would get rid of _7.

> >> >> Without the patch we have return _0; in optimized dump.

> >> >

> >> > Ah, but then that's a missing "folding" of the return.  It's not

> >> > a load/store anyway.

> >> Hi Richard,

> >> Thanks for the suggestion. In the attached untested patch, I tried to

> >> modify forwprop to fold return-value to constant.

> >> The optimized dump shows return 0; for the above test-case with this patch.

> >> Does it look OK ?

> >

> > No, the fix is to make fold_stmt_1 handle GIMPLE_RETURN and simply

> > valueize the return value (note 'valueize' might return NULL or be NULL).

> >

> Hi Richard,

> Does the attached patch look OK ? I verified it prevents the

> regression for above case.

> Bootstrap+test on x86_64-unknown-linux-gnu in progress.


+           tree val = valueize (ret);
+           if (val && TREE_CONSTANT (val))
+             {

ok apart from the TREE_CONSTANT check which should be necessary
(it misses applying copy propagation).

Thanks,
Richard.

> Thanks,

> Prathamesh

> > Richard.

> >

> >>

> >> Thanks,

> >> Prathamesh

> >> >

> >> > Richard.

> >> >

> >> >> Thanks,

> >> >> Prathamesh

> >> >> >

> >> >> >> However that might be quite expensive ?

> >> >> >> Or make vrp track copies like copyprop using a separate copy-of lattice ?

> >> >> >

> >> >> > Ideally we'd unify the three SSA propagation passes into one.  We'd

> >> >> > have to have separate lattices for copy&constant and range&known-bits.

> >> >> >

> >> >> > Richard.

> >> >> >

> >> >> >> Thanks,

> >> >> >> Prathamesh

> >> >> >> >

> >> >> >> > Richard.

> >> >> >>

> >> >> >>

> >> >> >

> >> >> > --

> >> >> > Richard Biener <rguenther@suse.de>

> >> >> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

> >> >>

> >> >>

> >> >

> >> > --

> >> > Richard Biener <rguenther@suse.de>

> >> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

> >>

> >

> > --

> > Richard Biener <rguenther@suse.de>

> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

> 


-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
Prathamesh Kulkarni Nov. 23, 2016, 12:05 p.m. UTC | #2
On 23 November 2016 at 17:21, Richard Biener <rguenther@suse.de> wrote:
> On Wed, 23 Nov 2016, Prathamesh Kulkarni wrote:

>

>> On 23 November 2016 at 15:16, Richard Biener <rguenther@suse.de> wrote:

>> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

>> >

>> >> On 22 November 2016 at 20:53, Richard Biener <rguenther@suse.de> wrote:

>> >> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

>> >> >

>> >> >> On 22 November 2016 at 20:18, Richard Biener <rguenther@suse.de> wrote:

>> >> >> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

>> >> >> >

>> >> >> >> On 21 November 2016 at 15:10, Richard Biener <rguenther@suse.de> wrote:

>> >> >> >> > On Sun, 20 Nov 2016, Prathamesh Kulkarni wrote:

>> >> >> >> >

>> >> >> >> >> Hi,

>> >> >> >> >> As suggested by Martin in PR78153 strlen's return value cannot exceed

>> >> >> >> >> PTRDIFF_MAX.

>> >> >> >> >> So I set it's range to [0, PTRDIFF_MAX - 1] in extract_range_basic()

>> >> >> >> >> in the attached patch.

>> >> >> >> >>

>> >> >> >> >> However it regressed strlenopt-3.c:

>> >> >> >> >>

>> >> >> >> >> Consider fn1() from strlenopt-3.c:

>> >> >> >> >>

>> >> >> >> >> __attribute__((noinline, noclone)) size_t

>> >> >> >> >> fn1 (char *p, char *q)

>> >> >> >> >> {

>> >> >> >> >>   size_t s = strlen (q);

>> >> >> >> >>   strcpy (p, q);

>> >> >> >> >>   return s - strlen (p);

>> >> >> >> >> }

>> >> >> >> >>

>> >> >> >> >> The optimized dump shows the following:

>> >> >> >> >>

>> >> >> >> >> __attribute__((noclone, noinline))

>> >> >> >> >> fn1 (char * p, char * q)

>> >> >> >> >> {

>> >> >> >> >>   size_t s;

>> >> >> >> >>   size_t _7;

>> >> >> >> >>   long unsigned int _9;

>> >> >> >> >>

>> >> >> >> >>   <bb 2>:

>> >> >> >> >>   s_4 = strlen (q_3(D));

>> >> >> >> >>   _9 = s_4 + 1;

>> >> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

>> >> >> >> >>   _7 = 0;

>> >> >> >> >>   return _7;

>> >> >> >> >>

>> >> >> >> >> }

>> >> >> >> >>

>> >> >> >> >> which introduces the regression, because the test expects "return 0;" in fn1().

>> >> >> >> >>

>> >> >> >> >> The issue seems to be in vrp2:

>> >> >> >> >>

>> >> >> >> >> Before the patch:

>> >> >> >> >> Visiting statement:

>> >> >> >> >> s_4 = strlen (q_3(D));

>> >> >> >> >> Found new range for s_4: VARYING

>> >> >> >> >>

>> >> >> >> >> Visiting statement:

>> >> >> >> >> _1 = s_4;

>> >> >> >> >> Found new range for _1: [s_4, s_4]

>> >> >> >> >> marking stmt to be not simulated again

>> >> >> >> >>

>> >> >> >> >> Visiting statement:

>> >> >> >> >> _7 = s_4 - _1;

>> >> >> >> >> Applying pattern match.pd:111, gimple-match.c:27997

>> >> >> >> >> Match-and-simplified s_4 - _1 to 0

>> >> >> >> >> Intersecting

>> >> >> >> >>   [0, 0]

>> >> >> >> >> and

>> >> >> >> >>   [0, +INF]

>> >> >> >> >> to

>> >> >> >> >>   [0, 0]

>> >> >> >> >> Found new range for _7: [0, 0]

>> >> >> >> >>

>> >> >> >> >> __attribute__((noclone, noinline))

>> >> >> >> >> fn1 (char * p, char * q)

>> >> >> >> >> {

>> >> >> >> >>   size_t s;

>> >> >> >> >>   long unsigned int _1;

>> >> >> >> >>   long unsigned int _9;

>> >> >> >> >>

>> >> >> >> >>   <bb 2>:

>> >> >> >> >>   s_4 = strlen (q_3(D));

>> >> >> >> >>   _9 = s_4 + 1;

>> >> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

>> >> >> >> >>   _1 = s_4;

>> >> >> >> >>   return 0;

>> >> >> >> >>

>> >> >> >> >> }

>> >> >> >> >>

>> >> >> >> >>

>> >> >> >> >> After the patch:

>> >> >> >> >> Visiting statement:

>> >> >> >> >> s_4 = strlen (q_3(D));

>> >> >> >> >> Intersecting

>> >> >> >> >>   [0, 9223372036854775806]

>> >> >> >> >> and

>> >> >> >> >>   [0, 9223372036854775806]

>> >> >> >> >> to

>> >> >> >> >>   [0, 9223372036854775806]

>> >> >> >> >> Found new range for s_4: [0, 9223372036854775806]

>> >> >> >> >> marking stmt to be not simulated again

>> >> >> >> >>

>> >> >> >> >> Visiting statement:

>> >> >> >> >> _1 = s_4;

>> >> >> >> >> Intersecting

>> >> >> >> >>   [0, 9223372036854775806]  EQUIVALENCES: { s_4 } (1 elements)

>> >> >> >> >> and

>> >> >> >> >>   [0, 9223372036854775806]

>> >> >> >> >> to

>> >> >> >> >>   [0, 9223372036854775806]  EQUIVALENCES: { s_4 } (1 elements)

>> >> >> >> >> Found new range for _1: [0, 9223372036854775806]

>> >> >> >> >> marking stmt to be not simulated again

>> >> >> >> >>

>> >> >> >> >> Visiting statement:

>> >> >> >> >> _7 = s_4 - _1;

>> >> >> >> >> Intersecting

>> >> >> >> >>   ~[9223372036854775807, 9223372036854775809]

>> >> >> >> >> and

>> >> >> >> >>   ~[9223372036854775807, 9223372036854775809]

>> >> >> >> >> to

>> >> >> >> >>   ~[9223372036854775807, 9223372036854775809]

>> >> >> >> >> Found new range for _7: ~[9223372036854775807, 9223372036854775809]

>> >> >> >> >> marking stmt to be not simulated again

>> >> >> >> >>

>> >> >> >> >> __attribute__((noclone, noinline))

>> >> >> >> >> fn1 (char * p, char * q)

>> >> >> >> >> {

>> >> >> >> >>   size_t s;

>> >> >> >> >>   long unsigned int _1;

>> >> >> >> >>   size_t _7;

>> >> >> >> >>   long unsigned int _9;

>> >> >> >> >>

>> >> >> >> >>   <bb 2>:

>> >> >> >> >>   s_4 = strlen (q_3(D));

>> >> >> >> >>   _9 = s_4 + 1;

>> >> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

>> >> >> >> >>   _1 = s_4;

>> >> >> >> >>   _7 = s_4 - _1;

>> >> >> >> >>   return _7;

>> >> >> >> >>

>> >> >> >> >> }

>> >> >> >> >>

>> >> >> >> >> Then forwprop4 turns

>> >> >> >> >> _1 = s_4

>> >> >> >> >> _7 = s_4 - _1

>> >> >> >> >> into

>> >> >> >> >> _7 = 0

>> >> >> >> >>

>> >> >> >> >> and we end up with:

>> >> >> >> >> _7 = 0

>> >> >> >> >> return _7

>> >> >> >> >> in optimized dump.

>> >> >> >> >>

>> >> >> >> >> Running ccp again after forwprop4 trivially solves the issue, however

>> >> >> >> >> I am not sure if we want to run ccp again ?

>> >> >> >> >>

>> >> >> >> >> The issue is probably with extract_range_from_ssa_name():

>> >> >> >> >> For _1 = s_4

>> >> >> >> >>

>> >> >> >> >> Before patch:

>> >> >> >> >> VR for s_4 is set to varying.

>> >> >> >> >> So VR for _1 is set to [s_4, s_4] by extract_range_from_ssa_name.

>> >> >> >> >> Since VR for _1 is [s_4, s_4] it implicitly implies that _1 is equal to s_4,

>> >> >> >> >> and vrp is able to transform _7 = s_4 - _1 to _7 = 0 (by using

>> >> >> >> >> match.pd pattern x - x -> 0).

>> >> >> >> >>

>> >> >> >> >> After patch:

>> >> >> >> >> VR for s_4 is set to [0, PTRDIFF_MAX - 1]

>> >> >> >> >> And correspondingly VR for _1 is set to [0, PTRDIFF_MAX - 1]

>> >> >> >> >> so IIUC, we then lose the information that _1 is equal to s_4,

>> >> >> >> >

>> >> >> >> > We don't lose it, it's in its set of equivalencies.

>> >> >> >> Ah, I missed that, thanks. For some reason I had mis-conception that

>> >> >> >> equivalences stores

>> >> >> >> variables which have same value-ranges but are not necessarily equal.

>> >> >> >> >

>> >> >> >> >> and vrp doesn't transform _7 = s_4 - _1 to _7 = 0.

>> >> >> >> >> forwprop4 does that because it sees that s_4 and _1 are equivalent.

>> >> >> >> >> Does this sound correct ?

>> >> >> >> >

>> >> >> >> > Yes.  So the issue is really that vrp_visit_assignment_or_call calls

>> >> >> >> > gimple_fold_stmt_to_constant_1 with vrp_valueize[_1] which when

>> >> >> >> > we do not have a singleton VR_RANGE does not fall back to looking

>> >> >> >> > at equivalences (there's not a good cheap way to do that currently because

>> >> >> >> > VRP doesn't keep a proper copy lattice but simply IORs equivalences

>> >> >> >> > from all equivalences).  In theory simply using the first set bit

>> >> >> >> > might work.  Thus sth like

>> >> >> >> >

>> >> >> >> > @@ -7057,6 +7030,12 @@ vrp_valueize (tree name)

>> >> >> >> >               || is_gimple_min_invariant (vr->min))

>> >> >> >> >           && vrp_operand_equal_p (vr->min, vr->max))

>> >> >> >> >         return vr->min;

>> >> >> >> > +      else if (vr->equiv && ! bitmap_empty_p (vr->equiv))

>> >> >> >> > +       {

>> >> >> >> > +         unsigned num = bitmap_first_set_bit (vr->equiv);

>> >> >> >> > +         if (num < SSA_NAME_VERSION (name))

>> >> >> >> > +           return ssa_name (num);

>> >> >> >> > +       }

>> >> >> >> >      }

>> >> >> >> >    return name;

>> >> >> >> >  }

>> >> >> >> >

>> >> >> >> > might work with the idea of simply doing canonicalization to one of

>> >> >> >> > the equivalences.  But as we don't allow copies in the SSA def stmt

>> >> >> >> > (via vrp_valueize_1) I'm not sure that's good enough canonicalization.

>> >> >> >> IIUC, we record the equivalent variables in vr->equiv

>> >> >> >> but do not canonicalize to one of the equivalence like "copy-of value"

>> >> >> >> in copyprop ?

>> >> >> >> Using first set bit unfortunately doesn't help for the above case.

>> >> >> >>

>> >> >> >> Sorry if this sounds silly, should we just run copyprop/ccp once again

>> >> >> >> after vrp2 to ensure that there are no copies left ?

>> >> >> >

>> >> >> > why?  forwprop also does copy and constant propagation.  For the

>> >> >> > regression simply adjust the pass dump you scan.

>> >> >> Well, with the patch the redundant store to and load from _7 still remains

>> >> >> in optimized dump for fn1() in strlenopt-3.c:

>> >> >>

>> >> >> __attribute__((noclone, noinline))

>> >> >> fn1 (char * p, char * q)

>> >> >> {

>> >> >>   size_t s;

>> >> >>   size_t _7;

>> >> >>   long unsigned int _9;

>> >> >>

>> >> >>   <bb 2>:

>> >> >>   s_4 = strlen (q_3(D));

>> >> >>   _9 = s_4 + 1;

>> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

>> >> >>   _7 = 0;

>> >> >>   return _7;

>> >> >>

>> >> >> }

>> >> >>

>> >> >> Running ccp again after forwprop4 would get rid of _7.

>> >> >> Without the patch we have return _0; in optimized dump.

>> >> >

>> >> > Ah, but then that's a missing "folding" of the return.  It's not

>> >> > a load/store anyway.

>> >> Hi Richard,

>> >> Thanks for the suggestion. In the attached untested patch, I tried to

>> >> modify forwprop to fold return-value to constant.

>> >> The optimized dump shows return 0; for the above test-case with this patch.

>> >> Does it look OK ?

>> >

>> > No, the fix is to make fold_stmt_1 handle GIMPLE_RETURN and simply

>> > valueize the return value (note 'valueize' might return NULL or be NULL).

>> >

>> Hi Richard,

>> Does the attached patch look OK ? I verified it prevents the

>> regression for above case.

>> Bootstrap+test on x86_64-unknown-linux-gnu in progress.

>

> +           tree val = valueize (ret);

> +           if (val && TREE_CONSTANT (val))

> +             {

>

> ok apart from the TREE_CONSTANT check which should be necessary

> (it misses applying copy propagation).

Well without TREE_CONSTANT check, it goes into infinite loop for above  test,
which would happen if valueize (ret) returns ret
Instead of TREE_CONSTANT is the following check OK ?
tree val = valueize (ret);
if (val && val != ret)
  {
    gimple_return_set_retval (ret_stmt, val);
    changed = true;
  }

Thanks,
Prathamesh
>

> Thanks,

> Richard.

>

>> Thanks,

>> Prathamesh

>> > Richard.

>> >

>> >>

>> >> Thanks,

>> >> Prathamesh

>> >> >

>> >> > Richard.

>> >> >

>> >> >> Thanks,

>> >> >> Prathamesh

>> >> >> >

>> >> >> >> However that might be quite expensive ?

>> >> >> >> Or make vrp track copies like copyprop using a separate copy-of lattice ?

>> >> >> >

>> >> >> > Ideally we'd unify the three SSA propagation passes into one.  We'd

>> >> >> > have to have separate lattices for copy&constant and range&known-bits.

>> >> >> >

>> >> >> > Richard.

>> >> >> >

>> >> >> >> Thanks,

>> >> >> >> Prathamesh

>> >> >> >> >

>> >> >> >> > Richard.

>> >> >> >>

>> >> >> >>

>> >> >> >

>> >> >> > --

>> >> >> > Richard Biener <rguenther@suse.de>

>> >> >> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

>> >> >>

>> >> >>

>> >> >

>> >> > --

>> >> > Richard Biener <rguenther@suse.de>

>> >> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

>> >>

>> >

>> > --

>> > Richard Biener <rguenther@suse.de>

>> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

>>

>

> --

> Richard Biener <rguenther@suse.de>

> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
Richard Biener Nov. 23, 2016, 12:21 p.m. UTC | #3
On Wed, 23 Nov 2016, Prathamesh Kulkarni wrote:

> On 23 November 2016 at 17:21, Richard Biener <rguenther@suse.de> wrote:

> > On Wed, 23 Nov 2016, Prathamesh Kulkarni wrote:

> >

> >> On 23 November 2016 at 15:16, Richard Biener <rguenther@suse.de> wrote:

> >> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

> >> >

> >> >> On 22 November 2016 at 20:53, Richard Biener <rguenther@suse.de> wrote:

> >> >> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

> >> >> >

> >> >> >> On 22 November 2016 at 20:18, Richard Biener <rguenther@suse.de> wrote:

> >> >> >> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

> >> >> >> >

> >> >> >> >> On 21 November 2016 at 15:10, Richard Biener <rguenther@suse.de> wrote:

> >> >> >> >> > On Sun, 20 Nov 2016, Prathamesh Kulkarni wrote:

> >> >> >> >> >

> >> >> >> >> >> Hi,

> >> >> >> >> >> As suggested by Martin in PR78153 strlen's return value cannot exceed

> >> >> >> >> >> PTRDIFF_MAX.

> >> >> >> >> >> So I set it's range to [0, PTRDIFF_MAX - 1] in extract_range_basic()

> >> >> >> >> >> in the attached patch.

> >> >> >> >> >>

> >> >> >> >> >> However it regressed strlenopt-3.c:

> >> >> >> >> >>

> >> >> >> >> >> Consider fn1() from strlenopt-3.c:

> >> >> >> >> >>

> >> >> >> >> >> __attribute__((noinline, noclone)) size_t

> >> >> >> >> >> fn1 (char *p, char *q)

> >> >> >> >> >> {

> >> >> >> >> >>   size_t s = strlen (q);

> >> >> >> >> >>   strcpy (p, q);

> >> >> >> >> >>   return s - strlen (p);

> >> >> >> >> >> }

> >> >> >> >> >>

> >> >> >> >> >> The optimized dump shows the following:

> >> >> >> >> >>

> >> >> >> >> >> __attribute__((noclone, noinline))

> >> >> >> >> >> fn1 (char * p, char * q)

> >> >> >> >> >> {

> >> >> >> >> >>   size_t s;

> >> >> >> >> >>   size_t _7;

> >> >> >> >> >>   long unsigned int _9;

> >> >> >> >> >>

> >> >> >> >> >>   <bb 2>:

> >> >> >> >> >>   s_4 = strlen (q_3(D));

> >> >> >> >> >>   _9 = s_4 + 1;

> >> >> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

> >> >> >> >> >>   _7 = 0;

> >> >> >> >> >>   return _7;

> >> >> >> >> >>

> >> >> >> >> >> }

> >> >> >> >> >>

> >> >> >> >> >> which introduces the regression, because the test expects "return 0;" in fn1().

> >> >> >> >> >>

> >> >> >> >> >> The issue seems to be in vrp2:

> >> >> >> >> >>

> >> >> >> >> >> Before the patch:

> >> >> >> >> >> Visiting statement:

> >> >> >> >> >> s_4 = strlen (q_3(D));

> >> >> >> >> >> Found new range for s_4: VARYING

> >> >> >> >> >>

> >> >> >> >> >> Visiting statement:

> >> >> >> >> >> _1 = s_4;

> >> >> >> >> >> Found new range for _1: [s_4, s_4]

> >> >> >> >> >> marking stmt to be not simulated again

> >> >> >> >> >>

> >> >> >> >> >> Visiting statement:

> >> >> >> >> >> _7 = s_4 - _1;

> >> >> >> >> >> Applying pattern match.pd:111, gimple-match.c:27997

> >> >> >> >> >> Match-and-simplified s_4 - _1 to 0

> >> >> >> >> >> Intersecting

> >> >> >> >> >>   [0, 0]

> >> >> >> >> >> and

> >> >> >> >> >>   [0, +INF]

> >> >> >> >> >> to

> >> >> >> >> >>   [0, 0]

> >> >> >> >> >> Found new range for _7: [0, 0]

> >> >> >> >> >>

> >> >> >> >> >> __attribute__((noclone, noinline))

> >> >> >> >> >> fn1 (char * p, char * q)

> >> >> >> >> >> {

> >> >> >> >> >>   size_t s;

> >> >> >> >> >>   long unsigned int _1;

> >> >> >> >> >>   long unsigned int _9;

> >> >> >> >> >>

> >> >> >> >> >>   <bb 2>:

> >> >> >> >> >>   s_4 = strlen (q_3(D));

> >> >> >> >> >>   _9 = s_4 + 1;

> >> >> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

> >> >> >> >> >>   _1 = s_4;

> >> >> >> >> >>   return 0;

> >> >> >> >> >>

> >> >> >> >> >> }

> >> >> >> >> >>

> >> >> >> >> >>

> >> >> >> >> >> After the patch:

> >> >> >> >> >> Visiting statement:

> >> >> >> >> >> s_4 = strlen (q_3(D));

> >> >> >> >> >> Intersecting

> >> >> >> >> >>   [0, 9223372036854775806]

> >> >> >> >> >> and

> >> >> >> >> >>   [0, 9223372036854775806]

> >> >> >> >> >> to

> >> >> >> >> >>   [0, 9223372036854775806]

> >> >> >> >> >> Found new range for s_4: [0, 9223372036854775806]

> >> >> >> >> >> marking stmt to be not simulated again

> >> >> >> >> >>

> >> >> >> >> >> Visiting statement:

> >> >> >> >> >> _1 = s_4;

> >> >> >> >> >> Intersecting

> >> >> >> >> >>   [0, 9223372036854775806]  EQUIVALENCES: { s_4 } (1 elements)

> >> >> >> >> >> and

> >> >> >> >> >>   [0, 9223372036854775806]

> >> >> >> >> >> to

> >> >> >> >> >>   [0, 9223372036854775806]  EQUIVALENCES: { s_4 } (1 elements)

> >> >> >> >> >> Found new range for _1: [0, 9223372036854775806]

> >> >> >> >> >> marking stmt to be not simulated again

> >> >> >> >> >>

> >> >> >> >> >> Visiting statement:

> >> >> >> >> >> _7 = s_4 - _1;

> >> >> >> >> >> Intersecting

> >> >> >> >> >>   ~[9223372036854775807, 9223372036854775809]

> >> >> >> >> >> and

> >> >> >> >> >>   ~[9223372036854775807, 9223372036854775809]

> >> >> >> >> >> to

> >> >> >> >> >>   ~[9223372036854775807, 9223372036854775809]

> >> >> >> >> >> Found new range for _7: ~[9223372036854775807, 9223372036854775809]

> >> >> >> >> >> marking stmt to be not simulated again

> >> >> >> >> >>

> >> >> >> >> >> __attribute__((noclone, noinline))

> >> >> >> >> >> fn1 (char * p, char * q)

> >> >> >> >> >> {

> >> >> >> >> >>   size_t s;

> >> >> >> >> >>   long unsigned int _1;

> >> >> >> >> >>   size_t _7;

> >> >> >> >> >>   long unsigned int _9;

> >> >> >> >> >>

> >> >> >> >> >>   <bb 2>:

> >> >> >> >> >>   s_4 = strlen (q_3(D));

> >> >> >> >> >>   _9 = s_4 + 1;

> >> >> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

> >> >> >> >> >>   _1 = s_4;

> >> >> >> >> >>   _7 = s_4 - _1;

> >> >> >> >> >>   return _7;

> >> >> >> >> >>

> >> >> >> >> >> }

> >> >> >> >> >>

> >> >> >> >> >> Then forwprop4 turns

> >> >> >> >> >> _1 = s_4

> >> >> >> >> >> _7 = s_4 - _1

> >> >> >> >> >> into

> >> >> >> >> >> _7 = 0

> >> >> >> >> >>

> >> >> >> >> >> and we end up with:

> >> >> >> >> >> _7 = 0

> >> >> >> >> >> return _7

> >> >> >> >> >> in optimized dump.

> >> >> >> >> >>

> >> >> >> >> >> Running ccp again after forwprop4 trivially solves the issue, however

> >> >> >> >> >> I am not sure if we want to run ccp again ?

> >> >> >> >> >>

> >> >> >> >> >> The issue is probably with extract_range_from_ssa_name():

> >> >> >> >> >> For _1 = s_4

> >> >> >> >> >>

> >> >> >> >> >> Before patch:

> >> >> >> >> >> VR for s_4 is set to varying.

> >> >> >> >> >> So VR for _1 is set to [s_4, s_4] by extract_range_from_ssa_name.

> >> >> >> >> >> Since VR for _1 is [s_4, s_4] it implicitly implies that _1 is equal to s_4,

> >> >> >> >> >> and vrp is able to transform _7 = s_4 - _1 to _7 = 0 (by using

> >> >> >> >> >> match.pd pattern x - x -> 0).

> >> >> >> >> >>

> >> >> >> >> >> After patch:

> >> >> >> >> >> VR for s_4 is set to [0, PTRDIFF_MAX - 1]

> >> >> >> >> >> And correspondingly VR for _1 is set to [0, PTRDIFF_MAX - 1]

> >> >> >> >> >> so IIUC, we then lose the information that _1 is equal to s_4,

> >> >> >> >> >

> >> >> >> >> > We don't lose it, it's in its set of equivalencies.

> >> >> >> >> Ah, I missed that, thanks. For some reason I had mis-conception that

> >> >> >> >> equivalences stores

> >> >> >> >> variables which have same value-ranges but are not necessarily equal.

> >> >> >> >> >

> >> >> >> >> >> and vrp doesn't transform _7 = s_4 - _1 to _7 = 0.

> >> >> >> >> >> forwprop4 does that because it sees that s_4 and _1 are equivalent.

> >> >> >> >> >> Does this sound correct ?

> >> >> >> >> >

> >> >> >> >> > Yes.  So the issue is really that vrp_visit_assignment_or_call calls

> >> >> >> >> > gimple_fold_stmt_to_constant_1 with vrp_valueize[_1] which when

> >> >> >> >> > we do not have a singleton VR_RANGE does not fall back to looking

> >> >> >> >> > at equivalences (there's not a good cheap way to do that currently because

> >> >> >> >> > VRP doesn't keep a proper copy lattice but simply IORs equivalences

> >> >> >> >> > from all equivalences).  In theory simply using the first set bit

> >> >> >> >> > might work.  Thus sth like

> >> >> >> >> >

> >> >> >> >> > @@ -7057,6 +7030,12 @@ vrp_valueize (tree name)

> >> >> >> >> >               || is_gimple_min_invariant (vr->min))

> >> >> >> >> >           && vrp_operand_equal_p (vr->min, vr->max))

> >> >> >> >> >         return vr->min;

> >> >> >> >> > +      else if (vr->equiv && ! bitmap_empty_p (vr->equiv))

> >> >> >> >> > +       {

> >> >> >> >> > +         unsigned num = bitmap_first_set_bit (vr->equiv);

> >> >> >> >> > +         if (num < SSA_NAME_VERSION (name))

> >> >> >> >> > +           return ssa_name (num);

> >> >> >> >> > +       }

> >> >> >> >> >      }

> >> >> >> >> >    return name;

> >> >> >> >> >  }

> >> >> >> >> >

> >> >> >> >> > might work with the idea of simply doing canonicalization to one of

> >> >> >> >> > the equivalences.  But as we don't allow copies in the SSA def stmt

> >> >> >> >> > (via vrp_valueize_1) I'm not sure that's good enough canonicalization.

> >> >> >> >> IIUC, we record the equivalent variables in vr->equiv

> >> >> >> >> but do not canonicalize to one of the equivalence like "copy-of value"

> >> >> >> >> in copyprop ?

> >> >> >> >> Using first set bit unfortunately doesn't help for the above case.

> >> >> >> >>

> >> >> >> >> Sorry if this sounds silly, should we just run copyprop/ccp once again

> >> >> >> >> after vrp2 to ensure that there are no copies left ?

> >> >> >> >

> >> >> >> > why?  forwprop also does copy and constant propagation.  For the

> >> >> >> > regression simply adjust the pass dump you scan.

> >> >> >> Well, with the patch the redundant store to and load from _7 still remains

> >> >> >> in optimized dump for fn1() in strlenopt-3.c:

> >> >> >>

> >> >> >> __attribute__((noclone, noinline))

> >> >> >> fn1 (char * p, char * q)

> >> >> >> {

> >> >> >>   size_t s;

> >> >> >>   size_t _7;

> >> >> >>   long unsigned int _9;

> >> >> >>

> >> >> >>   <bb 2>:

> >> >> >>   s_4 = strlen (q_3(D));

> >> >> >>   _9 = s_4 + 1;

> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

> >> >> >>   _7 = 0;

> >> >> >>   return _7;

> >> >> >>

> >> >> >> }

> >> >> >>

> >> >> >> Running ccp again after forwprop4 would get rid of _7.

> >> >> >> Without the patch we have return _0; in optimized dump.

> >> >> >

> >> >> > Ah, but then that's a missing "folding" of the return.  It's not

> >> >> > a load/store anyway.

> >> >> Hi Richard,

> >> >> Thanks for the suggestion. In the attached untested patch, I tried to

> >> >> modify forwprop to fold return-value to constant.

> >> >> The optimized dump shows return 0; for the above test-case with this patch.

> >> >> Does it look OK ?

> >> >

> >> > No, the fix is to make fold_stmt_1 handle GIMPLE_RETURN and simply

> >> > valueize the return value (note 'valueize' might return NULL or be NULL).

> >> >

> >> Hi Richard,

> >> Does the attached patch look OK ? I verified it prevents the

> >> regression for above case.

> >> Bootstrap+test on x86_64-unknown-linux-gnu in progress.

> >

> > +           tree val = valueize (ret);

> > +           if (val && TREE_CONSTANT (val))

> > +             {

> >

> > ok apart from the TREE_CONSTANT check which should be necessary

> > (it misses applying copy propagation).

> Well without TREE_CONSTANT check, it goes into infinite loop for above  test,

> which would happen if valueize (ret) returns ret

> Instead of TREE_CONSTANT is the following check OK ?

> tree val = valueize (ret);

> if (val && val != ret)

>   {

>     gimple_return_set_retval (ret_stmt, val);

>     changed = true;

>   }


Yeah, you indeed shall not return true if you changed nothing.

Richard.

> Thanks,

> Prathamesh

> >

> > Thanks,

> > Richard.

> >

> >> Thanks,

> >> Prathamesh

> >> > Richard.

> >> >

> >> >>

> >> >> Thanks,

> >> >> Prathamesh

> >> >> >

> >> >> > Richard.

> >> >> >

> >> >> >> Thanks,

> >> >> >> Prathamesh

> >> >> >> >

> >> >> >> >> However that might be quite expensive ?

> >> >> >> >> Or make vrp track copies like copyprop using a separate copy-of lattice ?

> >> >> >> >

> >> >> >> > Ideally we'd unify the three SSA propagation passes into one.  We'd

> >> >> >> > have to have separate lattices for copy&constant and range&known-bits.

> >> >> >> >

> >> >> >> > Richard.

> >> >> >> >

> >> >> >> >> Thanks,

> >> >> >> >> Prathamesh

> >> >> >> >> >

> >> >> >> >> > Richard.

> >> >> >> >>

> >> >> >> >>

> >> >> >> >

> >> >> >> > --

> >> >> >> > Richard Biener <rguenther@suse.de>

> >> >> >> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

> >> >> >>

> >> >> >>

> >> >> >

> >> >> > --

> >> >> > Richard Biener <rguenther@suse.de>

> >> >> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

> >> >>

> >> >

> >> > --

> >> > Richard Biener <rguenther@suse.de>

> >> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

> >>

> >

> > --

> > Richard Biener <rguenther@suse.de>

> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

> 

> 


-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
diff mbox

Patch

diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index aabc8ff..321dc85 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -4406,6 +4406,23 @@  fold_stmt_1 (gimple_stmt_iterator *gsi, bool inplace, tree (*valueize) (tree))
 	}
       break;
 
+    case GIMPLE_RETURN:
+      {
+	greturn *ret_stmt = as_a<greturn *> (stmt);
+	tree ret = gimple_return_retval(ret_stmt);
+
+	if (ret && TREE_CODE (ret) == SSA_NAME && valueize)
+	  {
+	    tree val = valueize (ret);
+	    if (val && TREE_CONSTANT (val))
+	      {
+		gimple_return_set_retval (ret_stmt, val);
+		changed = true;
+	      }
+	  }
+      }
+      break;
+
     default:;
     }
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr78153-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr78153-1.c
new file mode 100644
index 0000000..2530ba0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr78153-1.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp-slim" } */
+
+void f(const char *s)
+{
+  if (__PTRDIFF_MAX__ <= __builtin_strlen (s))
+    __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_abort" "evrp" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr78153-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr78153-2.c
new file mode 100644
index 0000000..de70450
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr78153-2.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp-slim" } */
+
+void f(const char *s)
+{
+  __PTRDIFF_TYPE__ n = __builtin_strlen (s);
+  if (n < 0)
+    __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_abort" "evrp" } } */
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index c2a4133..373582a 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -4013,6 +4013,16 @@  extract_range_basic (value_range *vr, gimple *stmt)
 			          : vrp_val_max (type), NULL);
 	  }
 	  return;
+	case CFN_BUILT_IN_STRLEN:
+	  {
+	    tree type = TREE_TYPE (gimple_call_lhs (stmt));
+	    tree max = vrp_val_max (ptrdiff_type_node);
+	    wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max)));
+	    tree range_min = build_zero_cst (type); 
+	    tree range_max = wide_int_to_tree (type, wmax - 1);
+	    set_value_range (vr, VR_RANGE, range_min, range_max, NULL);
+	  }
+	  return;
 	default:
 	  break;
 	}