diff mbox

PR78153

Message ID CAAgBjMms4U2LvcyKn6snYHKq92NqTs22PKgoOuabZ5DKNP-uDQ@mail.gmail.com
State New
Headers show

Commit Message

Prathamesh Kulkarni Nov. 22, 2016, 4:10 p.m. UTC
On 22 November 2016 at 20:53, Richard Biener <rguenther@suse.de> wrote:
> On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

>

>> On 22 November 2016 at 20:18, Richard Biener <rguenther@suse.de> wrote:

>> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

>> >

>> >> On 21 November 2016 at 15:10, Richard Biener <rguenther@suse.de> wrote:

>> >> > On Sun, 20 Nov 2016, Prathamesh Kulkarni wrote:

>> >> >

>> >> >> Hi,

>> >> >> As suggested by Martin in PR78153 strlen's return value cannot exceed

>> >> >> PTRDIFF_MAX.

>> >> >> So I set it's range to [0, PTRDIFF_MAX - 1] in extract_range_basic()

>> >> >> in the attached patch.

>> >> >>

>> >> >> However it regressed strlenopt-3.c:

>> >> >>

>> >> >> Consider fn1() from strlenopt-3.c:

>> >> >>

>> >> >> __attribute__((noinline, noclone)) size_t

>> >> >> fn1 (char *p, char *q)

>> >> >> {

>> >> >>   size_t s = strlen (q);

>> >> >>   strcpy (p, q);

>> >> >>   return s - strlen (p);

>> >> >> }

>> >> >>

>> >> >> The optimized dump shows the following:

>> >> >>

>> >> >> __attribute__((noclone, noinline))

>> >> >> fn1 (char * p, char * q)

>> >> >> {

>> >> >>   size_t s;

>> >> >>   size_t _7;

>> >> >>   long unsigned int _9;

>> >> >>

>> >> >>   <bb 2>:

>> >> >>   s_4 = strlen (q_3(D));

>> >> >>   _9 = s_4 + 1;

>> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

>> >> >>   _7 = 0;

>> >> >>   return _7;

>> >> >>

>> >> >> }

>> >> >>

>> >> >> which introduces the regression, because the test expects "return 0;" in fn1().

>> >> >>

>> >> >> The issue seems to be in vrp2:

>> >> >>

>> >> >> Before the patch:

>> >> >> Visiting statement:

>> >> >> s_4 = strlen (q_3(D));

>> >> >> Found new range for s_4: VARYING

>> >> >>

>> >> >> Visiting statement:

>> >> >> _1 = s_4;

>> >> >> Found new range for _1: [s_4, s_4]

>> >> >> marking stmt to be not simulated again

>> >> >>

>> >> >> Visiting statement:

>> >> >> _7 = s_4 - _1;

>> >> >> Applying pattern match.pd:111, gimple-match.c:27997

>> >> >> Match-and-simplified s_4 - _1 to 0

>> >> >> Intersecting

>> >> >>   [0, 0]

>> >> >> and

>> >> >>   [0, +INF]

>> >> >> to

>> >> >>   [0, 0]

>> >> >> Found new range for _7: [0, 0]

>> >> >>

>> >> >> __attribute__((noclone, noinline))

>> >> >> fn1 (char * p, char * q)

>> >> >> {

>> >> >>   size_t s;

>> >> >>   long unsigned int _1;

>> >> >>   long unsigned int _9;

>> >> >>

>> >> >>   <bb 2>:

>> >> >>   s_4 = strlen (q_3(D));

>> >> >>   _9 = s_4 + 1;

>> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

>> >> >>   _1 = s_4;

>> >> >>   return 0;

>> >> >>

>> >> >> }

>> >> >>

>> >> >>

>> >> >> After the patch:

>> >> >> Visiting statement:

>> >> >> s_4 = strlen (q_3(D));

>> >> >> Intersecting

>> >> >>   [0, 9223372036854775806]

>> >> >> and

>> >> >>   [0, 9223372036854775806]

>> >> >> to

>> >> >>   [0, 9223372036854775806]

>> >> >> Found new range for s_4: [0, 9223372036854775806]

>> >> >> marking stmt to be not simulated again

>> >> >>

>> >> >> Visiting statement:

>> >> >> _1 = s_4;

>> >> >> Intersecting

>> >> >>   [0, 9223372036854775806]  EQUIVALENCES: { s_4 } (1 elements)

>> >> >> and

>> >> >>   [0, 9223372036854775806]

>> >> >> to

>> >> >>   [0, 9223372036854775806]  EQUIVALENCES: { s_4 } (1 elements)

>> >> >> Found new range for _1: [0, 9223372036854775806]

>> >> >> marking stmt to be not simulated again

>> >> >>

>> >> >> Visiting statement:

>> >> >> _7 = s_4 - _1;

>> >> >> Intersecting

>> >> >>   ~[9223372036854775807, 9223372036854775809]

>> >> >> and

>> >> >>   ~[9223372036854775807, 9223372036854775809]

>> >> >> to

>> >> >>   ~[9223372036854775807, 9223372036854775809]

>> >> >> Found new range for _7: ~[9223372036854775807, 9223372036854775809]

>> >> >> marking stmt to be not simulated again

>> >> >>

>> >> >> __attribute__((noclone, noinline))

>> >> >> fn1 (char * p, char * q)

>> >> >> {

>> >> >>   size_t s;

>> >> >>   long unsigned int _1;

>> >> >>   size_t _7;

>> >> >>   long unsigned int _9;

>> >> >>

>> >> >>   <bb 2>:

>> >> >>   s_4 = strlen (q_3(D));

>> >> >>   _9 = s_4 + 1;

>> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

>> >> >>   _1 = s_4;

>> >> >>   _7 = s_4 - _1;

>> >> >>   return _7;

>> >> >>

>> >> >> }

>> >> >>

>> >> >> Then forwprop4 turns

>> >> >> _1 = s_4

>> >> >> _7 = s_4 - _1

>> >> >> into

>> >> >> _7 = 0

>> >> >>

>> >> >> and we end up with:

>> >> >> _7 = 0

>> >> >> return _7

>> >> >> in optimized dump.

>> >> >>

>> >> >> Running ccp again after forwprop4 trivially solves the issue, however

>> >> >> I am not sure if we want to run ccp again ?

>> >> >>

>> >> >> The issue is probably with extract_range_from_ssa_name():

>> >> >> For _1 = s_4

>> >> >>

>> >> >> Before patch:

>> >> >> VR for s_4 is set to varying.

>> >> >> So VR for _1 is set to [s_4, s_4] by extract_range_from_ssa_name.

>> >> >> Since VR for _1 is [s_4, s_4] it implicitly implies that _1 is equal to s_4,

>> >> >> and vrp is able to transform _7 = s_4 - _1 to _7 = 0 (by using

>> >> >> match.pd pattern x - x -> 0).

>> >> >>

>> >> >> After patch:

>> >> >> VR for s_4 is set to [0, PTRDIFF_MAX - 1]

>> >> >> And correspondingly VR for _1 is set to [0, PTRDIFF_MAX - 1]

>> >> >> so IIUC, we then lose the information that _1 is equal to s_4,

>> >> >

>> >> > We don't lose it, it's in its set of equivalencies.

>> >> Ah, I missed that, thanks. For some reason I had mis-conception that

>> >> equivalences stores

>> >> variables which have same value-ranges but are not necessarily equal.

>> >> >

>> >> >> and vrp doesn't transform _7 = s_4 - _1 to _7 = 0.

>> >> >> forwprop4 does that because it sees that s_4 and _1 are equivalent.

>> >> >> Does this sound correct ?

>> >> >

>> >> > Yes.  So the issue is really that vrp_visit_assignment_or_call calls

>> >> > gimple_fold_stmt_to_constant_1 with vrp_valueize[_1] which when

>> >> > we do not have a singleton VR_RANGE does not fall back to looking

>> >> > at equivalences (there's not a good cheap way to do that currently because

>> >> > VRP doesn't keep a proper copy lattice but simply IORs equivalences

>> >> > from all equivalences).  In theory simply using the first set bit

>> >> > might work.  Thus sth like

>> >> >

>> >> > @@ -7057,6 +7030,12 @@ vrp_valueize (tree name)

>> >> >               || is_gimple_min_invariant (vr->min))

>> >> >           && vrp_operand_equal_p (vr->min, vr->max))

>> >> >         return vr->min;

>> >> > +      else if (vr->equiv && ! bitmap_empty_p (vr->equiv))

>> >> > +       {

>> >> > +         unsigned num = bitmap_first_set_bit (vr->equiv);

>> >> > +         if (num < SSA_NAME_VERSION (name))

>> >> > +           return ssa_name (num);

>> >> > +       }

>> >> >      }

>> >> >    return name;

>> >> >  }

>> >> >

>> >> > might work with the idea of simply doing canonicalization to one of

>> >> > the equivalences.  But as we don't allow copies in the SSA def stmt

>> >> > (via vrp_valueize_1) I'm not sure that's good enough canonicalization.

>> >> IIUC, we record the equivalent variables in vr->equiv

>> >> but do not canonicalize to one of the equivalence like "copy-of value"

>> >> in copyprop ?

>> >> Using first set bit unfortunately doesn't help for the above case.

>> >>

>> >> Sorry if this sounds silly, should we just run copyprop/ccp once again

>> >> after vrp2 to ensure that there are no copies left ?

>> >

>> > why?  forwprop also does copy and constant propagation.  For the

>> > regression simply adjust the pass dump you scan.

>> Well, with the patch the redundant store to and load from _7 still remains

>> in optimized dump for fn1() in strlenopt-3.c:

>>

>> __attribute__((noclone, noinline))

>> fn1 (char * p, char * q)

>> {

>>   size_t s;

>>   size_t _7;

>>   long unsigned int _9;

>>

>>   <bb 2>:

>>   s_4 = strlen (q_3(D));

>>   _9 = s_4 + 1;

>>   __builtin_memcpy (p_5(D), q_3(D), _9);

>>   _7 = 0;

>>   return _7;

>>

>> }

>>

>> Running ccp again after forwprop4 would get rid of _7.

>> Without the patch we have return _0; in optimized dump.

>

> Ah, but then that's a missing "folding" of the return.  It's not

> a load/store anyway.

Hi Richard,
Thanks for the suggestion. In the attached untested patch, I tried to
modify forwprop to fold return-value to constant.
The optimized dump shows return 0; for the above test-case with this patch.
Does it look OK ?

Thanks,
Prathamesh
>

> Richard.

>

>> Thanks,

>> Prathamesh

>> >

>> >> However that might be quite expensive ?

>> >> Or make vrp track copies like copyprop using a separate copy-of lattice ?

>> >

>> > Ideally we'd unify the three SSA propagation passes into one.  We'd

>> > have to have separate lattices for copy&constant and range&known-bits.

>> >

>> > Richard.

>> >

>> >> Thanks,

>> >> Prathamesh

>> >> >

>> >> > Richard.

>> >>

>> >>

>> >

>> > --

>> > Richard Biener <rguenther@suse.de>

>> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

>>

>>

>

> --

> Richard Biener <rguenther@suse.de>

> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

Comments

Richard Biener Nov. 23, 2016, 9:46 a.m. UTC | #1
On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

> On 22 November 2016 at 20:53, Richard Biener <rguenther@suse.de> wrote:

> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

> >

> >> On 22 November 2016 at 20:18, Richard Biener <rguenther@suse.de> wrote:

> >> > On Tue, 22 Nov 2016, Prathamesh Kulkarni wrote:

> >> >

> >> >> On 21 November 2016 at 15:10, Richard Biener <rguenther@suse.de> wrote:

> >> >> > On Sun, 20 Nov 2016, Prathamesh Kulkarni wrote:

> >> >> >

> >> >> >> Hi,

> >> >> >> As suggested by Martin in PR78153 strlen's return value cannot exceed

> >> >> >> PTRDIFF_MAX.

> >> >> >> So I set it's range to [0, PTRDIFF_MAX - 1] in extract_range_basic()

> >> >> >> in the attached patch.

> >> >> >>

> >> >> >> However it regressed strlenopt-3.c:

> >> >> >>

> >> >> >> Consider fn1() from strlenopt-3.c:

> >> >> >>

> >> >> >> __attribute__((noinline, noclone)) size_t

> >> >> >> fn1 (char *p, char *q)

> >> >> >> {

> >> >> >>   size_t s = strlen (q);

> >> >> >>   strcpy (p, q);

> >> >> >>   return s - strlen (p);

> >> >> >> }

> >> >> >>

> >> >> >> The optimized dump shows the following:

> >> >> >>

> >> >> >> __attribute__((noclone, noinline))

> >> >> >> fn1 (char * p, char * q)

> >> >> >> {

> >> >> >>   size_t s;

> >> >> >>   size_t _7;

> >> >> >>   long unsigned int _9;

> >> >> >>

> >> >> >>   <bb 2>:

> >> >> >>   s_4 = strlen (q_3(D));

> >> >> >>   _9 = s_4 + 1;

> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

> >> >> >>   _7 = 0;

> >> >> >>   return _7;

> >> >> >>

> >> >> >> }

> >> >> >>

> >> >> >> which introduces the regression, because the test expects "return 0;" in fn1().

> >> >> >>

> >> >> >> The issue seems to be in vrp2:

> >> >> >>

> >> >> >> Before the patch:

> >> >> >> Visiting statement:

> >> >> >> s_4 = strlen (q_3(D));

> >> >> >> Found new range for s_4: VARYING

> >> >> >>

> >> >> >> Visiting statement:

> >> >> >> _1 = s_4;

> >> >> >> Found new range for _1: [s_4, s_4]

> >> >> >> marking stmt to be not simulated again

> >> >> >>

> >> >> >> Visiting statement:

> >> >> >> _7 = s_4 - _1;

> >> >> >> Applying pattern match.pd:111, gimple-match.c:27997

> >> >> >> Match-and-simplified s_4 - _1 to 0

> >> >> >> Intersecting

> >> >> >>   [0, 0]

> >> >> >> and

> >> >> >>   [0, +INF]

> >> >> >> to

> >> >> >>   [0, 0]

> >> >> >> Found new range for _7: [0, 0]

> >> >> >>

> >> >> >> __attribute__((noclone, noinline))

> >> >> >> fn1 (char * p, char * q)

> >> >> >> {

> >> >> >>   size_t s;

> >> >> >>   long unsigned int _1;

> >> >> >>   long unsigned int _9;

> >> >> >>

> >> >> >>   <bb 2>:

> >> >> >>   s_4 = strlen (q_3(D));

> >> >> >>   _9 = s_4 + 1;

> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

> >> >> >>   _1 = s_4;

> >> >> >>   return 0;

> >> >> >>

> >> >> >> }

> >> >> >>

> >> >> >>

> >> >> >> After the patch:

> >> >> >> Visiting statement:

> >> >> >> s_4 = strlen (q_3(D));

> >> >> >> Intersecting

> >> >> >>   [0, 9223372036854775806]

> >> >> >> and

> >> >> >>   [0, 9223372036854775806]

> >> >> >> to

> >> >> >>   [0, 9223372036854775806]

> >> >> >> Found new range for s_4: [0, 9223372036854775806]

> >> >> >> marking stmt to be not simulated again

> >> >> >>

> >> >> >> Visiting statement:

> >> >> >> _1 = s_4;

> >> >> >> Intersecting

> >> >> >>   [0, 9223372036854775806]  EQUIVALENCES: { s_4 } (1 elements)

> >> >> >> and

> >> >> >>   [0, 9223372036854775806]

> >> >> >> to

> >> >> >>   [0, 9223372036854775806]  EQUIVALENCES: { s_4 } (1 elements)

> >> >> >> Found new range for _1: [0, 9223372036854775806]

> >> >> >> marking stmt to be not simulated again

> >> >> >>

> >> >> >> Visiting statement:

> >> >> >> _7 = s_4 - _1;

> >> >> >> Intersecting

> >> >> >>   ~[9223372036854775807, 9223372036854775809]

> >> >> >> and

> >> >> >>   ~[9223372036854775807, 9223372036854775809]

> >> >> >> to

> >> >> >>   ~[9223372036854775807, 9223372036854775809]

> >> >> >> Found new range for _7: ~[9223372036854775807, 9223372036854775809]

> >> >> >> marking stmt to be not simulated again

> >> >> >>

> >> >> >> __attribute__((noclone, noinline))

> >> >> >> fn1 (char * p, char * q)

> >> >> >> {

> >> >> >>   size_t s;

> >> >> >>   long unsigned int _1;

> >> >> >>   size_t _7;

> >> >> >>   long unsigned int _9;

> >> >> >>

> >> >> >>   <bb 2>:

> >> >> >>   s_4 = strlen (q_3(D));

> >> >> >>   _9 = s_4 + 1;

> >> >> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

> >> >> >>   _1 = s_4;

> >> >> >>   _7 = s_4 - _1;

> >> >> >>   return _7;

> >> >> >>

> >> >> >> }

> >> >> >>

> >> >> >> Then forwprop4 turns

> >> >> >> _1 = s_4

> >> >> >> _7 = s_4 - _1

> >> >> >> into

> >> >> >> _7 = 0

> >> >> >>

> >> >> >> and we end up with:

> >> >> >> _7 = 0

> >> >> >> return _7

> >> >> >> in optimized dump.

> >> >> >>

> >> >> >> Running ccp again after forwprop4 trivially solves the issue, however

> >> >> >> I am not sure if we want to run ccp again ?

> >> >> >>

> >> >> >> The issue is probably with extract_range_from_ssa_name():

> >> >> >> For _1 = s_4

> >> >> >>

> >> >> >> Before patch:

> >> >> >> VR for s_4 is set to varying.

> >> >> >> So VR for _1 is set to [s_4, s_4] by extract_range_from_ssa_name.

> >> >> >> Since VR for _1 is [s_4, s_4] it implicitly implies that _1 is equal to s_4,

> >> >> >> and vrp is able to transform _7 = s_4 - _1 to _7 = 0 (by using

> >> >> >> match.pd pattern x - x -> 0).

> >> >> >>

> >> >> >> After patch:

> >> >> >> VR for s_4 is set to [0, PTRDIFF_MAX - 1]

> >> >> >> And correspondingly VR for _1 is set to [0, PTRDIFF_MAX - 1]

> >> >> >> so IIUC, we then lose the information that _1 is equal to s_4,

> >> >> >

> >> >> > We don't lose it, it's in its set of equivalencies.

> >> >> Ah, I missed that, thanks. For some reason I had mis-conception that

> >> >> equivalences stores

> >> >> variables which have same value-ranges but are not necessarily equal.

> >> >> >

> >> >> >> and vrp doesn't transform _7 = s_4 - _1 to _7 = 0.

> >> >> >> forwprop4 does that because it sees that s_4 and _1 are equivalent.

> >> >> >> Does this sound correct ?

> >> >> >

> >> >> > Yes.  So the issue is really that vrp_visit_assignment_or_call calls

> >> >> > gimple_fold_stmt_to_constant_1 with vrp_valueize[_1] which when

> >> >> > we do not have a singleton VR_RANGE does not fall back to looking

> >> >> > at equivalences (there's not a good cheap way to do that currently because

> >> >> > VRP doesn't keep a proper copy lattice but simply IORs equivalences

> >> >> > from all equivalences).  In theory simply using the first set bit

> >> >> > might work.  Thus sth like

> >> >> >

> >> >> > @@ -7057,6 +7030,12 @@ vrp_valueize (tree name)

> >> >> >               || is_gimple_min_invariant (vr->min))

> >> >> >           && vrp_operand_equal_p (vr->min, vr->max))

> >> >> >         return vr->min;

> >> >> > +      else if (vr->equiv && ! bitmap_empty_p (vr->equiv))

> >> >> > +       {

> >> >> > +         unsigned num = bitmap_first_set_bit (vr->equiv);

> >> >> > +         if (num < SSA_NAME_VERSION (name))

> >> >> > +           return ssa_name (num);

> >> >> > +       }

> >> >> >      }

> >> >> >    return name;

> >> >> >  }

> >> >> >

> >> >> > might work with the idea of simply doing canonicalization to one of

> >> >> > the equivalences.  But as we don't allow copies in the SSA def stmt

> >> >> > (via vrp_valueize_1) I'm not sure that's good enough canonicalization.

> >> >> IIUC, we record the equivalent variables in vr->equiv

> >> >> but do not canonicalize to one of the equivalence like "copy-of value"

> >> >> in copyprop ?

> >> >> Using first set bit unfortunately doesn't help for the above case.

> >> >>

> >> >> Sorry if this sounds silly, should we just run copyprop/ccp once again

> >> >> after vrp2 to ensure that there are no copies left ?

> >> >

> >> > why?  forwprop also does copy and constant propagation.  For the

> >> > regression simply adjust the pass dump you scan.

> >> Well, with the patch the redundant store to and load from _7 still remains

> >> in optimized dump for fn1() in strlenopt-3.c:

> >>

> >> __attribute__((noclone, noinline))

> >> fn1 (char * p, char * q)

> >> {

> >>   size_t s;

> >>   size_t _7;

> >>   long unsigned int _9;

> >>

> >>   <bb 2>:

> >>   s_4 = strlen (q_3(D));

> >>   _9 = s_4 + 1;

> >>   __builtin_memcpy (p_5(D), q_3(D), _9);

> >>   _7 = 0;

> >>   return _7;

> >>

> >> }

> >>

> >> Running ccp again after forwprop4 would get rid of _7.

> >> Without the patch we have return _0; in optimized dump.

> >

> > Ah, but then that's a missing "folding" of the return.  It's not

> > a load/store anyway.

> Hi Richard,

> Thanks for the suggestion. In the attached untested patch, I tried to

> modify forwprop to fold return-value to constant.

> The optimized dump shows return 0; for the above test-case with this patch.

> Does it look OK ?


No, the fix is to make fold_stmt_1 handle GIMPLE_RETURN and simply
valueize the return value (note 'valueize' might return NULL or be NULL).

Richard.

> 

> Thanks,

> Prathamesh

> >

> > Richard.

> >

> >> Thanks,

> >> Prathamesh

> >> >

> >> >> However that might be quite expensive ?

> >> >> Or make vrp track copies like copyprop using a separate copy-of lattice ?

> >> >

> >> > Ideally we'd unify the three SSA propagation passes into one.  We'd

> >> > have to have separate lattices for copy&constant and range&known-bits.

> >> >

> >> > Richard.

> >> >

> >> >> Thanks,

> >> >> Prathamesh

> >> >> >

> >> >> > Richard.

> >> >>

> >> >>

> >> >

> >> > --

> >> > Richard Biener <rguenther@suse.de>

> >> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

> >>

> >>

> >

> > --

> > Richard Biener <rguenther@suse.de>

> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

> 


-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
diff mbox

Patch

diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index ed11b32..b4dce91 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -2155,6 +2155,8 @@  pass_forwprop::execute (function *fun)
 							 postorder, false);
   auto_vec<gimple *, 4> to_fixup;
   to_purge = BITMAP_ALLOC (NULL);
+  auto_vec<greturn *> ret_stmts;
+  
   for (int i = 0; i < postorder_num; ++i)
     {
       gimple_stmt_iterator gsi;
@@ -2197,6 +2199,9 @@  pass_forwprop::execute (function *fun)
 	  tree lhs, rhs;
 	  enum tree_code code;
 
+	  if (greturn *ret_stmt = dyn_cast<greturn *> (stmt))
+	    ret_stmts.safe_push (ret_stmt);
+
 	  if (!is_gimple_assign (stmt))
 	    {
 	      gsi_next (&gsi);
@@ -2533,6 +2538,26 @@  pass_forwprop::execute (function *fun)
       cfg_changed |= fixup_noreturn_call (stmt);
     }
 
+  for (unsigned i = 0; i < ret_stmts.length (); ++i)
+    {
+      greturn *ret_stmt = ret_stmts[i];
+      tree ret = gimple_return_retval (ret_stmt);
+      if (ret && TREE_CODE (ret) == SSA_NAME)
+	{
+	  gimple *def_stmt = SSA_NAME_DEF_STMT (ret);
+	  if (gassign *ga = dyn_cast<gassign *> (def_stmt))
+	    {
+	      enum tree_code code = gimple_assign_rhs_code (def_stmt);
+	      if (TREE_CODE_CLASS (code) == tcc_constant) 
+		{
+		  tree cst = gimple_assign_rhs1 (ga);
+		  gimple_return_set_retval (ret_stmt, cst);
+		  update_stmt (ret_stmt);
+		} 
+	    }
+	}
+    }
+
   cfg_changed |= gimple_purge_all_dead_eh_edges (to_purge);
   BITMAP_FREE (to_purge);