diff mbox

[2/2] shrink wrap a function with a single loop: split live_edge

Message ID CACgzC7AJUvPr_uQpMq7zS_570TKAqe12vp1T=D2s=7xUmmh6dA@mail.gmail.com
State New
Headers show

Commit Message

Zhenqiang Chen May 8, 2014, 8:07 a.m. UTC
Hi,

The patch splits the live_edge for move_insn_for_shrink_wrap to sink
the copy out of the entry block.

Bootstrap and no make check regression on X86-64 and ARM.

OK for trunk?

Thanks!
-Zhenqiang

ChangeLog:
2014-05-08  Zhenqiang Chen  <zhenqiang.chen@linaro.org>

        * function.c (next_block_for_reg): Allow live_edge->dest has two
        predecessors.
        (move_insn_for_shrink_wrap): Split live_edge.
        (prepre_shrink_wrap): One more parameter for move_insn_for_shrink_wrap.


   edge e, live_edge;
@@ -5415,10 +5415,12 @@ next_block_for_reg (basic_block bb, int regno,
int end_regno)
   if (live_edge->flags & EDGE_ABNORMAL)
     return NULL;

-  if (EDGE_COUNT (live_edge->dest->preds) > 1)
+  /* When live_edge->dest->preds == 2, we can create a new block on
+     the edge to make it meet the requirement.  */
+  if (EDGE_COUNT (live_edge->dest->preds) > 2)
     return NULL;

-  return live_edge->dest;
+  return live_edge;
 }

 /* Check whether INSN is the last insn in BB or
@@ -5545,20 +5547,25 @@ try_copy_prop (basic_block bb, rtx insn, rtx
src, rtx dest,
   return ret;
 }

- /* Try to move INSN from BB to a successor.  Return true on success.
-    USES and DEFS are the set of registers that are used and defined
-    after INSN in BB.  */
+/* Try to move INSN from BB to a successor.  Return true on success.
+   LAST_USES is the set of registers that are used by the COMPARE or JUMP
+   instructions in the block.  USES is the set of registers that are used
+   by others after INSN except COMARE and JUMP.  DEFS are the set of registers
+   that are used and defined others after INSN.  SPLIT_P indicates whether
+   a live edge from BB is splitted or not.  */

 static bool
 move_insn_for_shrink_wrap (basic_block bb, rtx insn,
                           const HARD_REG_SET uses,
                           const HARD_REG_SET defs,
-                          HARD_REG_SET *last_uses)
+                          HARD_REG_SET *last_uses,
+                          bool *split_p)
{
   rtx set, src, dest;
   bitmap live_out, live_in, bb_uses, bb_defs;
   unsigned int i, dregno, end_dregno, sregno, end_sregno;
   basic_block next_block;
+  edge live_edge;

   /* Look for a simple register copy.  */
   set = single_set (insn);
@@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
       || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
     return false;

-  /* See whether there is a successor block to which we could move INSN.  */
-  next_block = next_block_for_reg (bb, dregno, end_dregno);
-  if (!next_block)
+  live_edge = next_block_for_reg (bb, dregno, end_dregno);
+  if (!live_edge)
      return false;

+  next_block = live_edge->dest;
+
   /* If the destination register is referred in later insn,
      try to forward it.  */
   if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest), dregno)
       && !try_copy_prop (bb, insn, src, dest, last_uses))
     return false;

+  /* Create a new basic block on the edge.  */
+  if (EDGE_COUNT (next_block->preds) == 2)
+    {
+      next_block = split_edge (live_edge);
+
+      bitmap_copy (df_get_live_in (next_block), df_get_live_out (bb));
+      df_set_bb_dirty (next_block);
+
+      /* We should not split more than once for a function.  */
+      gcc_assert (!(*split_p));
+      *split_p = true;
+    }
+
   /* At this point we are committed to moving INSN, but let's try to
      move it as far as we can.  */
   do
@@ -5610,7 +5631,10 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
        {
          for (i = dregno; i < end_dregno; i++)
            {
-             if (REGNO_REG_SET_P (bb_uses, i) || REGNO_REG_SET_P (bb_defs, i)
+
+             if (*split_p
+                 || REGNO_REG_SET_P (bb_uses, i)
+                 || REGNO_REG_SET_P (bb_defs, i)
                  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
                next_block = NULL;
              CLEAR_REGNO_REG_SET (live_out, i);
@@ -5621,7 +5645,8 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
             Either way, SRC is now live on entry.  */
          for (i = sregno; i < end_sregno; i++)
            {
-             if (REGNO_REG_SET_P (bb_defs, i)
+             if (*split_p
+                 || REGNO_REG_SET_P (bb_defs, i)
                  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
                next_block = NULL;
              SET_REGNO_REG_SET (live_out, i);
@@ -5650,21 +5675,31 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
       /* If we don't need to add the move to BB, look for a single
         successor block.  */
       if (next_block)
-       next_block = next_block_for_reg (next_block, dregno, end_dregno);
+       {
+         live_edge = next_block_for_reg (next_block, dregno, end_dregno);
+         if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
+           break;
+         next_block = live_edge->dest;
+       }
     }
   while (next_block);

-  /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
-     (next loop).  */
-  for (i = dregno; i < end_dregno; i++)
+  /* For the new created basic block, there is no dataflow info at all.
+     So skip the following dataflow update and check.  */
+  if (!(*split_p))
     {
-      CLEAR_REGNO_REG_SET (bb_uses, i);
-      SET_REGNO_REG_SET (bb_defs, i);
-    }
+      /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
+        (next loop).  */
+      for (i = dregno; i < end_dregno; i++)
+       {
+         CLEAR_REGNO_REG_SET (bb_uses, i);
+         SET_REGNO_REG_SET (bb_defs, i);
+       }

-  /* BB now uses SRC.  */
-  for (i = sregno; i < end_sregno; i++)
-    SET_REGNO_REG_SET (bb_uses, i);
+      /* BB now uses SRC.  */
+      for (i = sregno; i < end_sregno; i++)
+       SET_REGNO_REG_SET (bb_uses, i);
+    }

   emit_insn_after (PATTERN (insn), bb_note (bb));
   delete_insn (insn);
@@ -5684,6 +5719,7 @@ prepare_shrink_wrap (basic_block entry_block)
   rtx insn, curr, x;
   HARD_REG_SET uses, defs, last_uses;
   df_ref *ref;
+  bool split_p = false;

   if (!JUMP_P (BB_END (entry_block)))
     return;
@@ -5693,7 +5729,7 @@ prepare_shrink_wrap (basic_block entry_block)
   FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
     if (NONDEBUG_INSN_P (insn)
        && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
-                                      &last_uses))
+                                      &last_uses, &split_p))
       {
        /* Add all defined registers to DEFs.  */
        for (ref = DF_INSN_DEFS (insn); *ref; ref++)

Comments

Steven Bosscher May 8, 2014, 8:33 a.m. UTC | #1
On Thu, May 8, 2014 at 10:07 AM, Zhenqiang Chen wrote:
> The patch splits the live_edge for move_insn_for_shrink_wrap to sink
> the copy out of the entry block.

Maybe also time to take the shrink-wrapping code out of function.c and
put it in its own file?

Ciao!
Steven
Jeff Law May 9, 2014, 6:08 a.m. UTC | #2
On 05/08/14 02:07, Zhenqiang Chen wrote:
> Hi,
>
> The patch splits the live_edge for move_insn_for_shrink_wrap to sink
> the copy out of the entry block.
>
> Bootstrap and no make check regression on X86-64 and ARM.
>
> OK for trunk?
>
> Thanks!
> -Zhenqiang
>
> ChangeLog:
> 2014-05-08  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
>
>          * function.c (next_block_for_reg): Allow live_edge->dest has two
>          predecessors.
>          (move_insn_for_shrink_wrap): Split live_edge.
>          (prepre_shrink_wrap): One more parameter for move_insn_for_shrink_wrap.
>
>
> diff --git a/gcc/function.c b/gcc/function.c
> index 764ac82..0be58e2 100644
> --- a/gcc/function.c
> +++ b/gcc/function.c
> @@ -5381,7 +5381,7 @@ requires_stack_frame_p (rtx insn, HARD_REG_SET
> prologue_used,
>      and if BB is its only predecessor.  Return that block if so,
>      otherwise return null.  */
>
> -static basic_block
> +static edge
>   next_block_for_reg (basic_block bb, int regno, int end_regno)
Comment for this function needs to be changed.  You're no longer 
returning a block, but the edge leading to the block.  It also seems the 
name of the function ought to change.

This looks basically OK.  I'd like to see the requested cleanups made, 
then the resulting new patch reposted for a final review.

Jeff
Zhenqiang Chen May 9, 2014, 6:43 a.m. UTC | #3
On 9 May 2014 14:08, Jeff Law <law@redhat.com> wrote:
> On 05/08/14 02:07, Zhenqiang Chen wrote:
>>
>> Hi,
>>
>> The patch splits the live_edge for move_insn_for_shrink_wrap to sink
>> the copy out of the entry block.
>>
>> Bootstrap and no make check regression on X86-64 and ARM.
>>
>> OK for trunk?
>>
>> Thanks!
>> -Zhenqiang
>>
>> ChangeLog:
>> 2014-05-08  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
>>
>>          * function.c (next_block_for_reg): Allow live_edge->dest has two
>>          predecessors.
>>          (move_insn_for_shrink_wrap): Split live_edge.
>>          (prepre_shrink_wrap): One more parameter for
>> move_insn_for_shrink_wrap.
>>
>>
>> diff --git a/gcc/function.c b/gcc/function.c
>> index 764ac82..0be58e2 100644
>> --- a/gcc/function.c
>> +++ b/gcc/function.c
>> @@ -5381,7 +5381,7 @@ requires_stack_frame_p (rtx insn, HARD_REG_SET
>> prologue_used,
>>      and if BB is its only predecessor.  Return that block if so,
>>      otherwise return null.  */
>>
>> -static basic_block
>> +static edge
>>   next_block_for_reg (basic_block bb, int regno, int end_regno)
>
> Comment for this function needs to be changed.  You're no longer returning a
> block, but the edge leading to the block.  It also seems the name of the
> function ought to change.
>
> This looks basically OK.  I'd like to see the requested cleanups made, then
> the resulting new patch reposted for a final review.

Thank you for the comments. I will follow Steven's comments to
separate shrink-wrapping code from function.c to shrink-wrap.c.

-Zhenqiang
Jiong Wang Sept. 15, 2014, 3:28 p.m. UTC | #4
On 08/05/14 09:07, Zhenqiang Chen wrote:
>   static bool
>   move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>                             const HARD_REG_SET uses,
>                             const HARD_REG_SET defs,
> -                          HARD_REG_SET *last_uses)
> +                          HARD_REG_SET *last_uses,
> +                          bool *split_p)
> {
>     rtx set, src, dest;
>     bitmap live_out, live_in, bb_uses, bb_defs;
>     unsigned int i, dregno, end_dregno, sregno, end_sregno;
>     basic_block next_block;
> +  edge live_edge;
>
>     /* Look for a simple register copy.  */
>     set = single_set (insn);
> @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>         || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
>       return false;
>
> -  /* See whether there is a successor block to which we could move INSN.  */
> -  next_block = next_block_for_reg (bb, dregno, end_dregno);
> -  if (!next_block)
> +  live_edge = next_block_for_reg (bb, dregno, end_dregno);
> +  if (!live_edge)
>        return false;
>
> +  next_block = live_edge->dest;
> +
>     /* If the destination register is referred in later insn,
>        try to forward it.  */
>     if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest), dregno)
>         && !try_copy_prop (bb, insn, src, dest, last_uses))
>       return false;
>
> +  /* Create a new basic block on the edge.  */
> +  if (EDGE_COUNT (next_block->preds) == 2)
> +    {
> +      next_block = split_edge (live_edge);
> +
> +      bitmap_copy (df_get_live_in (next_block), df_get_live_out (bb));

(re-sent, looks like the first send not received by the server...)

  for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot file attached)

  174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block because of the sink of move
instruction "ax:DI = 0" and split edge.

  but the live_in of this BB is copied from live_out of BB 2 which is too big, and
actually prevent the later sink of "16: r12:DI=dx:DI".

  Should it be better to copy live_in from "next_block->next_bb" instead of live_out from "bb",
as it will model what's needed more accurately?

+      bitmap_copy (df_get_live_in (next_block), df_get_live_in (next_block->next_bb));

  After this modification, pass x86-64 bootstrap, and this function could be shrink-wrapped.

-- Jiong

> +      df_set_bb_dirty (next_block);
> +
> +      /* We should not split more than once for a function.  */
> +      gcc_assert (!(*split_p));
> +      *split_p = true;
> +    }
> +
>     /* At this point we are committed to moving INSN, but let's try to
>        move it as far as we can.  */
>     do
> @@ -5610,7 +5631,10 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>          {
>            for (i = dregno; i < end_dregno; i++)
>              {
> -             if (REGNO_REG_SET_P (bb_uses, i) || REGNO_REG_SET_P (bb_defs, i)
> +
> +             if (*split_p
> +                 || REGNO_REG_SET_P (bb_uses, i)
> +                 || REGNO_REG_SET_P (bb_defs, i)
>                    || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
>                  next_block = NULL;
>                CLEAR_REGNO_REG_SET (live_out, i);
> @@ -5621,7 +5645,8 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>               Either way, SRC is now live on entry.  */
>            for (i = sregno; i < end_sregno; i++)
>              {
> -             if (REGNO_REG_SET_P (bb_defs, i)
> +             if (*split_p
> +                 || REGNO_REG_SET_P (bb_defs, i)
>                    || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
>                  next_block = NULL;
>                SET_REGNO_REG_SET (live_out, i);
> @@ -5650,21 +5675,31 @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>         /* If we don't need to add the move to BB, look for a single
>           successor block.  */
>         if (next_block)
> -       next_block = next_block_for_reg (next_block, dregno, end_dregno);
> +       {
> +         live_edge = next_block_for_reg (next_block, dregno, end_dregno);
> +         if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
> +           break;
> +         next_block = live_edge->dest;
> +       }
>       }
>     while (next_block);
>
> -  /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
> -     (next loop).  */
> -  for (i = dregno; i < end_dregno; i++)
> +  /* For the new created basic block, there is no dataflow info at all.
> +     So skip the following dataflow update and check.  */
> +  if (!(*split_p))
>       {
> -      CLEAR_REGNO_REG_SET (bb_uses, i);
> -      SET_REGNO_REG_SET (bb_defs, i);
> -    }
> +      /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
> +        (next loop).  */
> +      for (i = dregno; i < end_dregno; i++)
> +       {
> +         CLEAR_REGNO_REG_SET (bb_uses, i);
> +         SET_REGNO_REG_SET (bb_defs, i);
> +       }
>
> -  /* BB now uses SRC.  */
> -  for (i = sregno; i < end_sregno; i++)
> -    SET_REGNO_REG_SET (bb_uses, i);
> +      /* BB now uses SRC.  */
> +      for (i = sregno; i < end_sregno; i++)
> +       SET_REGNO_REG_SET (bb_uses, i);
> +    }
>
>     emit_insn_after (PATTERN (insn), bb_note (bb));
>     delete_insn (insn);
> @@ -5684,6 +5719,7 @@ prepare_shrink_wrap (basic_block entry_block)
>     rtx insn, curr, x;
>     HARD_REG_SET uses, defs, last_uses;
>     df_ref *ref;
> +  bool split_p = false;
>
>     if (!JUMP_P (BB_END (entry_block)))
>       return;
> @@ -5693,7 +5729,7 @@ prepare_shrink_wrap (basic_block entry_block)
>     FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
>       if (NONDEBUG_INSN_P (insn)
>          && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
> -                                      &last_uses))
> +                                      &last_uses, &split_p))
>         {
>          /* Add all defined registers to DEFs.  */
>          for (ref = DF_INSN_DEFS (insn); *ref; ref++)
>
Zhenqiang Chen Sept. 16, 2014, 6:55 a.m. UTC | #5
> -----Original Message-----
> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
> owner@gcc.gnu.org] On Behalf Of Jiong Wang
> Sent: Monday, September 15, 2014 11:28 PM
> To: Zhenqiang Chen
> Cc: gcc-patches@gcc.gnu.org; Jeff Law
> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop: split
> live_edge
> 
> On 08/05/14 09:07, Zhenqiang Chen wrote:
> >   static bool
> >   move_insn_for_shrink_wrap (basic_block bb, rtx insn,
> >                             const HARD_REG_SET uses,
> >                             const HARD_REG_SET defs,
> > -                          HARD_REG_SET *last_uses)
> > +                          HARD_REG_SET *last_uses,
> > +                          bool *split_p)
> > {
> >     rtx set, src, dest;
> >     bitmap live_out, live_in, bb_uses, bb_defs;
> >     unsigned int i, dregno, end_dregno, sregno, end_sregno;
> >     basic_block next_block;
> > +  edge live_edge;
> >
> >     /* Look for a simple register copy.  */
> >     set = single_set (insn);
> > @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb,
> rtx insn,
> >         || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
> >       return false;
> >
> > -  /* See whether there is a successor block to which we could move
> > INSN.  */
> > -  next_block = next_block_for_reg (bb, dregno, end_dregno);
> > -  if (!next_block)
> > +  live_edge = next_block_for_reg (bb, dregno, end_dregno);  if
> > + (!live_edge)
> >        return false;
> >
> > +  next_block = live_edge->dest;
> > +
> >     /* If the destination register is referred in later insn,
> >        try to forward it.  */
> >     if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest), dregno)
> >         && !try_copy_prop (bb, insn, src, dest, last_uses))
> >       return false;
> >
> > +  /* Create a new basic block on the edge.  */  if (EDGE_COUNT
> > + (next_block->preds) == 2)
> > +    {
> > +      next_block = split_edge (live_edge);
> > +
> > +      bitmap_copy (df_get_live_in (next_block), df_get_live_out
> > + (bb));
> 
> (re-sent, looks like the first send not received by the server...)
> 
>   for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot file
> attached)
> 
>   174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block because
> of the sink of move instruction "ax:DI = 0" and split edge.
> 
>   but the live_in of this BB is copied from live_out of BB 2 which is too big, and
> actually prevent the later sink of "16: r12:DI=dx:DI".
> 
>   Should it be better to copy live_in from "next_block->next_bb" instead of
> live_out from "bb", as it will model what's needed more accurately?

According to the algorithm, next_block->next_bb (which is live_edge->dest) should have two predecessors. Live_in of next_block->next_bb would include live_out of the other edge,  which is not necessary accurately. To be more accurate, you may need an intersection of df_get_live_out (bb) and df_get_live_in (next_block->next_bb).

Thanks!
-Zhenqiang
 
> +      bitmap_copy (df_get_live_in (next_block), df_get_live_in
> + (next_block->next_bb));
> 
>   After this modification, pass x86-64 bootstrap, and this function could be
> shrink-wrapped.
> 
> -- Jiong
> 
> > +      df_set_bb_dirty (next_block);
> > +
> > +      /* We should not split more than once for a function.  */
> > +      gcc_assert (!(*split_p));
> > +      *split_p = true;
> > +    }
> > +
> >     /* At this point we are committed to moving INSN, but let's try to
> >        move it as far as we can.  */
> >     do
> > @@ -5610,7 +5631,10 @@ move_insn_for_shrink_wrap (basic_block bb,
> rtx insn,
> >          {
> >            for (i = dregno; i < end_dregno; i++)
> >              {
> > -             if (REGNO_REG_SET_P (bb_uses, i) || REGNO_REG_SET_P
> (bb_defs, i)
> > +
> > +             if (*split_p
> > +                 || REGNO_REG_SET_P (bb_uses, i)
> > +                 || REGNO_REG_SET_P (bb_defs, i)
> >                    || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
> >                  next_block = NULL;
> >                CLEAR_REGNO_REG_SET (live_out, i); @@ -5621,7 +5645,8
> > @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
> >               Either way, SRC is now live on entry.  */
> >            for (i = sregno; i < end_sregno; i++)
> >              {
> > -             if (REGNO_REG_SET_P (bb_defs, i)
> > +             if (*split_p
> > +                 || REGNO_REG_SET_P (bb_defs, i)
> >                    || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
> >                  next_block = NULL;
> >                SET_REGNO_REG_SET (live_out, i); @@ -5650,21 +5675,31
> > @@ move_insn_for_shrink_wrap (basic_block bb, rtx insn,
> >         /* If we don't need to add the move to BB, look for a single
> >           successor block.  */
> >         if (next_block)
> > -       next_block = next_block_for_reg (next_block, dregno, end_dregno);
> > +       {
> > +         live_edge = next_block_for_reg (next_block, dregno, end_dregno);
> > +         if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
> > +           break;
> > +         next_block = live_edge->dest;
> > +       }
> >       }
> >     while (next_block);
> >
> > -  /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
> > -     (next loop).  */
> > -  for (i = dregno; i < end_dregno; i++)
> > +  /* For the new created basic block, there is no dataflow info at all.
> > +     So skip the following dataflow update and check.  */  if
> > + (!(*split_p))
> >       {
> > -      CLEAR_REGNO_REG_SET (bb_uses, i);
> > -      SET_REGNO_REG_SET (bb_defs, i);
> > -    }
> > +      /* BB now defines DEST.  It only uses the parts of DEST that overlap
> SRC
> > +        (next loop).  */
> > +      for (i = dregno; i < end_dregno; i++)
> > +       {
> > +         CLEAR_REGNO_REG_SET (bb_uses, i);
> > +         SET_REGNO_REG_SET (bb_defs, i);
> > +       }
> >
> > -  /* BB now uses SRC.  */
> > -  for (i = sregno; i < end_sregno; i++)
> > -    SET_REGNO_REG_SET (bb_uses, i);
> > +      /* BB now uses SRC.  */
> > +      for (i = sregno; i < end_sregno; i++)
> > +       SET_REGNO_REG_SET (bb_uses, i);
> > +    }
> >
> >     emit_insn_after (PATTERN (insn), bb_note (bb));
> >     delete_insn (insn);
> > @@ -5684,6 +5719,7 @@ prepare_shrink_wrap (basic_block entry_block)
> >     rtx insn, curr, x;
> >     HARD_REG_SET uses, defs, last_uses;
> >     df_ref *ref;
> > +  bool split_p = false;
> >
> >     if (!JUMP_P (BB_END (entry_block)))
> >       return;
> > @@ -5693,7 +5729,7 @@ prepare_shrink_wrap (basic_block entry_block)
> >     FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
> >       if (NONDEBUG_INSN_P (insn)
> >          && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
> > -                                      &last_uses))
> > +                                      &last_uses, &split_p))
> >         {
> >          /* Add all defined registers to DEFs.  */
> >          for (ref = DF_INSN_DEFS (insn); *ref; ref++)
> >
Jeff Law Sept. 19, 2014, 5:51 a.m. UTC | #6
On 09/16/14 00:55, Zhenqiang Chen wrote:
>
>
>> -----Original Message-----
>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>> owner@gcc.gnu.org] On Behalf Of Jiong Wang
>> Sent: Monday, September 15, 2014 11:28 PM
>> To: Zhenqiang Chen
>> Cc: gcc-patches@gcc.gnu.org; Jeff Law
>> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop: split
>> live_edge
>>
>> On 08/05/14 09:07, Zhenqiang Chen wrote:
>>>    static bool
>>>    move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>>>                              const HARD_REG_SET uses,
>>>                              const HARD_REG_SET defs,
>>> -                          HARD_REG_SET *last_uses)
>>> +                          HARD_REG_SET *last_uses,
>>> +                          bool *split_p)
>>> {
>>>      rtx set, src, dest;
>>>      bitmap live_out, live_in, bb_uses, bb_defs;
>>>      unsigned int i, dregno, end_dregno, sregno, end_sregno;
>>>      basic_block next_block;
>>> +  edge live_edge;
>>>
>>>      /* Look for a simple register copy.  */
>>>      set = single_set (insn);
>>> @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb,
>> rtx insn,
>>>          || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
>>>        return false;
>>>
>>> -  /* See whether there is a successor block to which we could move
>>> INSN.  */
>>> -  next_block = next_block_for_reg (bb, dregno, end_dregno);
>>> -  if (!next_block)
>>> +  live_edge = next_block_for_reg (bb, dregno, end_dregno);  if
>>> + (!live_edge)
>>>         return false;
>>>
>>> +  next_block = live_edge->dest;
>>> +
>>>      /* If the destination register is referred in later insn,
>>>         try to forward it.  */
>>>      if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest), dregno)
>>>          && !try_copy_prop (bb, insn, src, dest, last_uses))
>>>        return false;
>>>
>>> +  /* Create a new basic block on the edge.  */  if (EDGE_COUNT
>>> + (next_block->preds) == 2)
>>> +    {
>>> +      next_block = split_edge (live_edge);
>>> +
>>> +      bitmap_copy (df_get_live_in (next_block), df_get_live_out
>>> + (bb));
>>
>> (re-sent, looks like the first send not received by the server...)
>>
>>    for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot file
>> attached)
>>
>>    174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block because
>> of the sink of move instruction "ax:DI = 0" and split edge.
>>
>>    but the live_in of this BB is copied from live_out of BB 2 which is too big, and
>> actually prevent the later sink of "16: r12:DI=dx:DI".
>>
>>    Should it be better to copy live_in from "next_block->next_bb" instead of
>> live_out from "bb", as it will model what's needed more accurately?
>
> According to the algorithm, next_block->next_bb (which is live_edge->dest) should have two predecessors. Live_in of next_block->next_bb would include live_out of the other edge,  which is not necessary accurately. To be more accurate, you may need an intersection of df_get_live_out (bb) and df_get_live_in (next_block->next_bb).
>
Presumably we can't recompute the liveness info here as it's too 
expensive to do that each time we split an edge to sink a statement? 
ie, we need the more accurate liveness within this pass so that we can 
sink further instructions?

jeff
Jiong Wang Sept. 19, 2014, 11:27 a.m. UTC | #7
On 19/09/14 06:51, Jeff Law wrote:
> On 09/16/14 00:55, Zhenqiang Chen wrote:
>>
>>> -----Original Message-----
>>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>>> owner@gcc.gnu.org] On Behalf Of Jiong Wang
>>> Sent: Monday, September 15, 2014 11:28 PM
>>> To: Zhenqiang Chen
>>> Cc: gcc-patches@gcc.gnu.org; Jeff Law
>>> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop: split
>>> live_edge
>>>
>>> On 08/05/14 09:07, Zhenqiang Chen wrote:
>>>>     static bool
>>>>     move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>>>>                               const HARD_REG_SET uses,
>>>>                               const HARD_REG_SET defs,
>>>> -                          HARD_REG_SET *last_uses)
>>>> +                          HARD_REG_SET *last_uses,
>>>> +                          bool *split_p)
>>>> {
>>>>       rtx set, src, dest;
>>>>       bitmap live_out, live_in, bb_uses, bb_defs;
>>>>       unsigned int i, dregno, end_dregno, sregno, end_sregno;
>>>>       basic_block next_block;
>>>> +  edge live_edge;
>>>>
>>>>       /* Look for a simple register copy.  */
>>>>       set = single_set (insn);
>>>> @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb,
>>> rtx insn,
>>>>           || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
>>>>         return false;
>>>>
>>>> -  /* See whether there is a successor block to which we could move
>>>> INSN.  */
>>>> -  next_block = next_block_for_reg (bb, dregno, end_dregno);
>>>> -  if (!next_block)
>>>> +  live_edge = next_block_for_reg (bb, dregno, end_dregno);  if
>>>> + (!live_edge)
>>>>          return false;
>>>>
>>>> +  next_block = live_edge->dest;
>>>> +
>>>>       /* If the destination register is referred in later insn,
>>>>          try to forward it.  */
>>>>       if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest), dregno)
>>>>           && !try_copy_prop (bb, insn, src, dest, last_uses))
>>>>         return false;
>>>>
>>>> +  /* Create a new basic block on the edge.  */  if (EDGE_COUNT
>>>> + (next_block->preds) == 2)
>>>> +    {
>>>> +      next_block = split_edge (live_edge);
>>>> +
>>>> +      bitmap_copy (df_get_live_in (next_block), df_get_live_out
>>>> + (bb));
>>> (re-sent, looks like the first send not received by the server...)
>>>
>>>     for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot file
>>> attached)
>>>
>>>     174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block because
>>> of the sink of move instruction "ax:DI = 0" and split edge.
>>>
>>>     but the live_in of this BB is copied from live_out of BB 2 which is too big, and
>>> actually prevent the later sink of "16: r12:DI=dx:DI".
>>>
>>>     Should it be better to copy live_in from "next_block->next_bb" instead of
>>> live_out from "bb", as it will model what's needed more accurately?
>> According to the algorithm, next_block->next_bb (which is live_edge->dest) should have two predecessors. Live_in of next_block->next_bb would include live_out of the other edge,  which is not necessary accurately. To be more accurate, you may need an intersection of df_get_live_out (bb) and df_get_live_in (next_block->next_bb).
>>
> Presumably we can't recompute the liveness info here as it's too
> expensive to do that each time we split an edge to sink a statement?
> ie, we need the more accurate liveness within this pass so that we can
> sink further instructions?

for a single case, x86 bootstrap, looks like these code are not compile time critical
as for all 4762 live edges which could sink instructions

4139: EDGE_COUNT (next_block->preds) == 1
270: EDGE_COUNT (next_block->preds) == 2
353: EDGE_COUNT (next_block->preds) > 2

and if we make the live info more accurate here, just very few additional functions (< 10)
shrink-wrapped from glibc build & gcc bootstrap test.

So, is it OK we simply change "bitmap_copy  (df_get_live_in  (next_block),  df_get_live_out  (bb));"
into "bitmap_and  (df_get_live_in  (next_block),  df_get_live_out  (bb),  df_get_live_in  (next_block->next_bb));" ?

-- Jiong

>
> jeff
>
>
Jeff Law Sept. 19, 2014, 3:49 p.m. UTC | #8
On 09/19/14 05:27, Jiong Wang wrote:
>
> On 19/09/14 06:51, Jeff Law wrote:
>> On 09/16/14 00:55, Zhenqiang Chen wrote:
>>>
>>>> -----Original Message-----
>>>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>>>> owner@gcc.gnu.org] On Behalf Of Jiong Wang
>>>> Sent: Monday, September 15, 2014 11:28 PM
>>>> To: Zhenqiang Chen
>>>> Cc: gcc-patches@gcc.gnu.org; Jeff Law
>>>> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop:
>>>> split
>>>> live_edge
>>>>
>>>> On 08/05/14 09:07, Zhenqiang Chen wrote:
>>>>>     static bool
>>>>>     move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>>>>>                               const HARD_REG_SET uses,
>>>>>                               const HARD_REG_SET defs,
>>>>> -                          HARD_REG_SET *last_uses)
>>>>> +                          HARD_REG_SET *last_uses,
>>>>> +                          bool *split_p)
>>>>> {
>>>>>       rtx set, src, dest;
>>>>>       bitmap live_out, live_in, bb_uses, bb_defs;
>>>>>       unsigned int i, dregno, end_dregno, sregno, end_sregno;
>>>>>       basic_block next_block;
>>>>> +  edge live_edge;
>>>>>
>>>>>       /* Look for a simple register copy.  */
>>>>>       set = single_set (insn);
>>>>> @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb,
>>>> rtx insn,
>>>>>           || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
>>>>>         return false;
>>>>>
>>>>> -  /* See whether there is a successor block to which we could move
>>>>> INSN.  */
>>>>> -  next_block = next_block_for_reg (bb, dregno, end_dregno);
>>>>> -  if (!next_block)
>>>>> +  live_edge = next_block_for_reg (bb, dregno, end_dregno);  if
>>>>> + (!live_edge)
>>>>>          return false;
>>>>>
>>>>> +  next_block = live_edge->dest;
>>>>> +
>>>>>       /* If the destination register is referred in later insn,
>>>>>          try to forward it.  */
>>>>>       if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest),
>>>>> dregno)
>>>>>           && !try_copy_prop (bb, insn, src, dest, last_uses))
>>>>>         return false;
>>>>>
>>>>> +  /* Create a new basic block on the edge.  */  if (EDGE_COUNT
>>>>> + (next_block->preds) == 2)
>>>>> +    {
>>>>> +      next_block = split_edge (live_edge);
>>>>> +
>>>>> +      bitmap_copy (df_get_live_in (next_block), df_get_live_out
>>>>> + (bb));
>>>> (re-sent, looks like the first send not received by the server...)
>>>>
>>>>     for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot
>>>> file
>>>> attached)
>>>>
>>>>     174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block
>>>> because
>>>> of the sink of move instruction "ax:DI = 0" and split edge.
>>>>
>>>>     but the live_in of this BB is copied from live_out of BB 2 which
>>>> is too big, and
>>>> actually prevent the later sink of "16: r12:DI=dx:DI".
>>>>
>>>>     Should it be better to copy live_in from "next_block->next_bb"
>>>> instead of
>>>> live_out from "bb", as it will model what's needed more accurately?
>>> According to the algorithm, next_block->next_bb (which is
>>> live_edge->dest) should have two predecessors. Live_in of
>>> next_block->next_bb would include live_out of the other edge,  which
>>> is not necessary accurately. To be more accurate, you may need an
>>> intersection of df_get_live_out (bb) and df_get_live_in
>>> (next_block->next_bb).
>>>
>> Presumably we can't recompute the liveness info here as it's too
>> expensive to do that each time we split an edge to sink a statement?
>> ie, we need the more accurate liveness within this pass so that we can
>> sink further instructions?
>
> for a single case, x86 bootstrap, looks like these code are not compile
> time critical
> as for all 4762 live edges which could sink instructions
>
> 4139: EDGE_COUNT (next_block->preds) == 1
> 270: EDGE_COUNT (next_block->preds) == 2
> 353: EDGE_COUNT (next_block->preds) > 2
>
> and if we make the live info more accurate here, just very few
> additional functions (< 10)
> shrink-wrapped from glibc build & gcc bootstrap test.
Perhaps I wasn't clear.

The whole point behind the proposed changes is to enable additional 
sinking that is currently blocked because of the overly large set of 
live objects.

So my question was an attempt to understand why we didn't just a normal 
liveness update -- I speculated that we aren't doing a normal liveness 
update because of the compile-time cost.



>
> So, is it OK we simply change "bitmap_copy  (df_get_live_in
> (next_block),  df_get_live_out  (bb));"
> into "bitmap_and  (df_get_live_in  (next_block),  df_get_live_out
> (bb),  df_get_live_in  (next_block->next_bb));" ?
Probably.  Though I'd be a bit concerned with next_block->next_bb. 
Wouldn't it be safer to stash away the relevant basic block prior to the 
call to split_edge, then use that saved copy.  Something like this 
(untested):

basic_block old_dest = live_edge->dest;
next_block = split_edge (live_edge);

/* We create a new basic block.  Call df_grow_bb_info to make sure
    all data structures are allocated.  */
df_grow_bb_info (df_live);
bitmap_and (df_get_live_in (next_block),
             df_get_live_out (bb),
             df_get_live_in (old_dest));


The idea being we don't want to depend on the precise ordering blocks in 
the block chain.

Could you try that and see if it does what you need?

jeff
Jiong Wang Sept. 19, 2014, 4:02 p.m. UTC | #9
On 19/09/14 16:49, Jeff Law wrote:
> On 09/19/14 05:27, Jiong Wang wrote:
>> On 19/09/14 06:51, Jeff Law wrote:
>>> On 09/16/14 00:55, Zhenqiang Chen wrote:
>>>>> -----Original Message-----
>>>>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>>>>> owner@gcc.gnu.org] On Behalf Of Jiong Wang
>>>>> Sent: Monday, September 15, 2014 11:28 PM
>>>>> To: Zhenqiang Chen
>>>>> Cc: gcc-patches@gcc.gnu.org; Jeff Law
>>>>> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop:
>>>>> split
>>>>> live_edge
>>>>>
>>>>> On 08/05/14 09:07, Zhenqiang Chen wrote:
>>>>>>      static bool
>>>>>>      move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>>>>>>                                const HARD_REG_SET uses,
>>>>>>                                const HARD_REG_SET defs,
>>>>>> -                          HARD_REG_SET *last_uses)
>>>>>> +                          HARD_REG_SET *last_uses,
>>>>>> +                          bool *split_p)
>>>>>> {
>>>>>>        rtx set, src, dest;
>>>>>>        bitmap live_out, live_in, bb_uses, bb_defs;
>>>>>>        unsigned int i, dregno, end_dregno, sregno, end_sregno;
>>>>>>        basic_block next_block;
>>>>>> +  edge live_edge;
>>>>>>
>>>>>>        /* Look for a simple register copy.  */
>>>>>>        set = single_set (insn);
>>>>>> @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb,
>>>>> rtx insn,
>>>>>>            || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
>>>>>>          return false;
>>>>>>
>>>>>> -  /* See whether there is a successor block to which we could move
>>>>>> INSN.  */
>>>>>> -  next_block = next_block_for_reg (bb, dregno, end_dregno);
>>>>>> -  if (!next_block)
>>>>>> +  live_edge = next_block_for_reg (bb, dregno, end_dregno);  if
>>>>>> + (!live_edge)
>>>>>>           return false;
>>>>>>
>>>>>> +  next_block = live_edge->dest;
>>>>>> +
>>>>>>        /* If the destination register is referred in later insn,
>>>>>>           try to forward it.  */
>>>>>>        if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest),
>>>>>> dregno)
>>>>>>            && !try_copy_prop (bb, insn, src, dest, last_uses))
>>>>>>          return false;
>>>>>>
>>>>>> +  /* Create a new basic block on the edge.  */  if (EDGE_COUNT
>>>>>> + (next_block->preds) == 2)
>>>>>> +    {
>>>>>> +      next_block = split_edge (live_edge);
>>>>>> +
>>>>>> +      bitmap_copy (df_get_live_in (next_block), df_get_live_out
>>>>>> + (bb));
>>>>> (re-sent, looks like the first send not received by the server...)
>>>>>
>>>>>      for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot
>>>>> file
>>>>> attached)
>>>>>
>>>>>      174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block
>>>>> because
>>>>> of the sink of move instruction "ax:DI = 0" and split edge.
>>>>>
>>>>>      but the live_in of this BB is copied from live_out of BB 2 which
>>>>> is too big, and
>>>>> actually prevent the later sink of "16: r12:DI=dx:DI".
>>>>>
>>>>>      Should it be better to copy live_in from "next_block->next_bb"
>>>>> instead of
>>>>> live_out from "bb", as it will model what's needed more accurately?
>>>> According to the algorithm, next_block->next_bb (which is
>>>> live_edge->dest) should have two predecessors. Live_in of
>>>> next_block->next_bb would include live_out of the other edge,  which
>>>> is not necessary accurately. To be more accurate, you may need an
>>>> intersection of df_get_live_out (bb) and df_get_live_in
>>>> (next_block->next_bb).
>>>>
>>> Presumably we can't recompute the liveness info here as it's too
>>> expensive to do that each time we split an edge to sink a statement?
>>> ie, we need the more accurate liveness within this pass so that we can
>>> sink further instructions?
>> for a single case, x86 bootstrap, looks like these code are not compile
>> time critical
>> as for all 4762 live edges which could sink instructions
>>
>> 4139: EDGE_COUNT (next_block->preds) == 1
>> 270: EDGE_COUNT (next_block->preds) == 2
>> 353: EDGE_COUNT (next_block->preds) > 2
>>
>> and if we make the live info more accurate here, just very few
>> additional functions (< 10)
>> shrink-wrapped from glibc build & gcc bootstrap test.
> Perhaps I wasn't clear.
>
> The whole point behind the proposed changes is to enable additional
> sinking that is currently blocked because of the overly large set of
> live objects.
>
> So my question was an attempt to understand why we didn't just a normal
> liveness update -- I speculated that we aren't doing a normal liveness
> update because of the compile-time cost.
>
>
>
>> So, is it OK we simply change "bitmap_copy  (df_get_live_in
>> (next_block),  df_get_live_out  (bb));"
>> into "bitmap_and  (df_get_live_in  (next_block),  df_get_live_out
>> (bb),  df_get_live_in  (next_block->next_bb));" ?
> Probably.  Though I'd be a bit concerned with next_block->next_bb.
> Wouldn't it be safer to stash away the relevant basic block prior to the
> call to split_edge, then use that saved copy.  Something like this
> (untested):
>
> basic_block old_dest = live_edge->dest;
> next_block = split_edge (live_edge);
>
> /* We create a new basic block.  Call df_grow_bb_info to make sure
>      all data structures are allocated.  */
> df_grow_bb_info (df_live);
> bitmap_and (df_get_live_in (next_block),
>               df_get_live_out (bb),
>               df_get_live_in (old_dest));
>
>
> The idea being we don't want to depend on the precise ordering blocks in
> the block chain.
>
> Could you try that and see if it does what you need?
Jeff,

   Thanks, verified, it works.

Regards,
Jiong
>
> jeff
>
>
Jeff Law Sept. 19, 2014, 4:19 p.m. UTC | #10
On 09/19/14 10:02, Jiong Wang wrote:
>
> On 19/09/14 16:49, Jeff Law wrote:
>> On 09/19/14 05:27, Jiong Wang wrote:
>>> On 19/09/14 06:51, Jeff Law wrote:
>>>> On 09/16/14 00:55, Zhenqiang Chen wrote:
>>>>>> -----Original Message-----
>>>>>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>>>>>> owner@gcc.gnu.org] On Behalf Of Jiong Wang
>>>>>> Sent: Monday, September 15, 2014 11:28 PM
>>>>>> To: Zhenqiang Chen
>>>>>> Cc: gcc-patches@gcc.gnu.org; Jeff Law
>>>>>> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop:
>>>>>> split
>>>>>> live_edge
>>>>>>
>>>>>> On 08/05/14 09:07, Zhenqiang Chen wrote:
>>>>>>>      static bool
>>>>>>>      move_insn_for_shrink_wrap (basic_block bb, rtx insn,
>>>>>>>                                const HARD_REG_SET uses,
>>>>>>>                                const HARD_REG_SET defs,
>>>>>>> -                          HARD_REG_SET *last_uses)
>>>>>>> +                          HARD_REG_SET *last_uses,
>>>>>>> +                          bool *split_p)
>>>>>>> {
>>>>>>>        rtx set, src, dest;
>>>>>>>        bitmap live_out, live_in, bb_uses, bb_defs;
>>>>>>>        unsigned int i, dregno, end_dregno, sregno, end_sregno;
>>>>>>>        basic_block next_block;
>>>>>>> +  edge live_edge;
>>>>>>>
>>>>>>>        /* Look for a simple register copy.  */
>>>>>>>        set = single_set (insn);
>>>>>>> @@ -5582,17 +5589,31 @@ move_insn_for_shrink_wrap (basic_block bb,
>>>>>> rtx insn,
>>>>>>>            || overlaps_hard_reg_set_p (defs, GET_MODE (dest),
>>>>>>> dregno))
>>>>>>>          return false;
>>>>>>>
>>>>>>> -  /* See whether there is a successor block to which we could move
>>>>>>> INSN.  */
>>>>>>> -  next_block = next_block_for_reg (bb, dregno, end_dregno);
>>>>>>> -  if (!next_block)
>>>>>>> +  live_edge = next_block_for_reg (bb, dregno, end_dregno);  if
>>>>>>> + (!live_edge)
>>>>>>>           return false;
>>>>>>>
>>>>>>> +  next_block = live_edge->dest;
>>>>>>> +
>>>>>>>        /* If the destination register is referred in later insn,
>>>>>>>           try to forward it.  */
>>>>>>>        if (overlaps_hard_reg_set_p (*last_uses, GET_MODE (dest),
>>>>>>> dregno)
>>>>>>>            && !try_copy_prop (bb, insn, src, dest, last_uses))
>>>>>>>          return false;
>>>>>>>
>>>>>>> +  /* Create a new basic block on the edge.  */  if (EDGE_COUNT
>>>>>>> + (next_block->preds) == 2)
>>>>>>> +    {
>>>>>>> +      next_block = split_edge (live_edge);
>>>>>>> +
>>>>>>> +      bitmap_copy (df_get_live_in (next_block), df_get_live_out
>>>>>>> + (bb));
>>>>>> (re-sent, looks like the first send not received by the server...)
>>>>>>
>>>>>>      for the function "_IO_wdefault_xsputn" in glibc wgenops.c (.dot
>>>>>> file
>>>>>> attached)
>>>>>>
>>>>>>      174: NOTE_INSN_BASIC_BLOCK 21 are the new created basic block
>>>>>> because
>>>>>> of the sink of move instruction "ax:DI = 0" and split edge.
>>>>>>
>>>>>>      but the live_in of this BB is copied from live_out of BB 2 which
>>>>>> is too big, and
>>>>>> actually prevent the later sink of "16: r12:DI=dx:DI".
>>>>>>
>>>>>>      Should it be better to copy live_in from "next_block->next_bb"
>>>>>> instead of
>>>>>> live_out from "bb", as it will model what's needed more accurately?
>>>>> According to the algorithm, next_block->next_bb (which is
>>>>> live_edge->dest) should have two predecessors. Live_in of
>>>>> next_block->next_bb would include live_out of the other edge,  which
>>>>> is not necessary accurately. To be more accurate, you may need an
>>>>> intersection of df_get_live_out (bb) and df_get_live_in
>>>>> (next_block->next_bb).
>>>>>
>>>> Presumably we can't recompute the liveness info here as it's too
>>>> expensive to do that each time we split an edge to sink a statement?
>>>> ie, we need the more accurate liveness within this pass so that we can
>>>> sink further instructions?
>>> for a single case, x86 bootstrap, looks like these code are not compile
>>> time critical
>>> as for all 4762 live edges which could sink instructions
>>>
>>> 4139: EDGE_COUNT (next_block->preds) == 1
>>> 270: EDGE_COUNT (next_block->preds) == 2
>>> 353: EDGE_COUNT (next_block->preds) > 2
>>>
>>> and if we make the live info more accurate here, just very few
>>> additional functions (< 10)
>>> shrink-wrapped from glibc build & gcc bootstrap test.
>> Perhaps I wasn't clear.
>>
>> The whole point behind the proposed changes is to enable additional
>> sinking that is currently blocked because of the overly large set of
>> live objects.
>>
>> So my question was an attempt to understand why we didn't just a normal
>> liveness update -- I speculated that we aren't doing a normal liveness
>> update because of the compile-time cost.
>>
>>
>>
>>> So, is it OK we simply change "bitmap_copy  (df_get_live_in
>>> (next_block),  df_get_live_out  (bb));"
>>> into "bitmap_and  (df_get_live_in  (next_block),  df_get_live_out
>>> (bb),  df_get_live_in  (next_block->next_bb));" ?
>> Probably.  Though I'd be a bit concerned with next_block->next_bb.
>> Wouldn't it be safer to stash away the relevant basic block prior to the
>> call to split_edge, then use that saved copy.  Something like this
>> (untested):
>>
>> basic_block old_dest = live_edge->dest;
>> next_block = split_edge (live_edge);
>>
>> /* We create a new basic block.  Call df_grow_bb_info to make sure
>>      all data structures are allocated.  */
>> df_grow_bb_info (df_live);
>> bitmap_and (df_get_live_in (next_block),
>>               df_get_live_out (bb),
>>               df_get_live_in (old_dest));
>>
>>
>> The idea being we don't want to depend on the precise ordering blocks in
>> the block chain.
>>
>> Could you try that and see if it does what you need?
> Jeff,
>
>    Thanks, verified, it works.
Great.  Can you send an updated patchkit for review.

Thanks,

jeff
Jeff Law Sept. 22, 2014, 5:51 p.m. UTC | #11
On 09/22/14 04:24, Jiong Wang wrote:
>> Great.  Can you send an updated patchkit for review.
>
> patch attached.
>
> please review, thanks.
>
> gcc/ * shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the
> live-in of new created BB as the intersection of live-in from
> "old_dest" and live-out from "bb".
Looks good.  However, before committing we need a couple things.

1. Bootstrap & regression test this variant of the patch.  I know you 
tested an earlier one, but please test this one just to be sure.

2. Testcase.  I think you could test for either the reduction in the 
live-in set of the newly created block or that you're shrink wrapping 
one or more functions you didn't previously shrink-wrap.  I think it's 
fine if this test is target specific.

Jeff
Zhenqiang Chen Sept. 25, 2014, 8:52 a.m. UTC | #12
> -----Original Message-----
> From: Jiong Wang [mailto:jiong.wang@arm.com]
> Sent: Thursday, September 25, 2014 2:13 AM
> To: Jeff Law; Zhenqiang Chen
> Cc: gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop: split
> live_edge
> 
> 
> On 22/09/14 18:51, Jeff Law wrote:
> > On 09/22/14 04:24, Jiong Wang wrote:
> >>> Great.  Can you send an updated patchkit for review.
> >> patch attached.
> >>
> >> please review, thanks.
> >>
> >> gcc/ * shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the
> >> live-in of new created BB as the intersection of live-in from
> >> "old_dest" and live-out from "bb".
> > Looks good.  However, before committing we need a couple things.
> >
> > 1. Bootstrap & regression test this variant of the patch.  I know you
> > tested an earlier one, but please test this one just to be sure.
> >
> > 2. Testcase.  I think you could test for either the reduction in the
> > live-in set of the newly created block or that you're shrink wrapping
> > one or more functions you didn't previously shrink-wrap.  I think it's
> > fine if this test is target specific.
> 
>   bootstrap ok based on revision 215515.
> 
>   while the x86 regression result is interesting. there is no regression on
> check-g++, while there is four regression on check-gcc:
> 
> FAIL: gcc.dg/tree-ssa/loadpre10.c (internal compiler error)
> FAIL: gcc.dg/tree-ssa/loadpre10.c (test for excess errors)
> FAIL: gcc.dg/tree-ssa/pr21417.c (internal compiler error)
> FAIL: gcc.dg/tree-ssa/pr21417.c (test for excess errors)
> 
>    this is caused by our improving the accuracy of live-in for new created basic
> block. Now we will split
>    more than one edge for the above two testcase. thus trigger the following
> assert in move_insn_for_shrink_wrap:
> 
>        /* We should not split more than once for a function.  */
>        gcc_assert (!(*split_p));

According to the algorithm, it is impossible to split one edge twice. It's possible to split two different edges. But for such cases, the control flow is too complex to perform shrink-wrapping.

Anyway, your patch improves the accuracy. You can replace the "gcc_assert" to "return"; or change "split_p" to "splitted_edge" then you can check one edge is not splitted twice.

Thanks!
-Zhenqiang
 
>   take pr21417.c for example, after the patch, two edges will be split,
> 
> before this patch
> =================
> .L2:
>          movq    %rdi, %rax
>          cmpl    $142, (%rdi)
>          jne     .L13
> .L4:
>          all insns sinked here  <-- the only split
>          ...
>          ...
> 
>          popq    %rbx
>          popq    %rbp
> .L13:
>          ret
> 
> after this patch
> ================
> .L2:
> 
>          cmpl    $142, (%rdi)
>          jne     .L13
> .L4:
>          part of insns sinked into here  <-- first split
>          ....
>          ....
> 
>          popq    %rbx
>          popq    %rbp
>          ret
> 
> .L13:
>          movq    %rdi, %rax  <-- second split and one instruction moved here
>          ret
> 
> I don't know why there is a assert to prevent multi split.
> 
> after I remove that assert, pass bootstrap and no regression.
> 
> and for pr21417.c, the multi split more cause one extra "ret" instruction, but
> the performance is better, because there
> is no need to execute "movq    %rdi, %rax" if we go down to L4.
> 
> any comments?
> 
> BTW: I updated the patch with testcase which could not be shrink-wrapped
> before this patch.
> 
> thanks.
> 
> -- Jiong
> 
> >
> > Jeff
> >
> >
Jeff Law Sept. 25, 2014, 4:24 p.m. UTC | #13
On 09/25/14 09:04, Jiong Wang wrote:
>
> On 25/09/14 09:52, Zhenqiang Chen wrote:
>>
>>> -----Original Message-----
>>> From: Jiong Wang [mailto:jiong.wang@arm.com]
>>> Sent: Thursday, September 25, 2014 2:13 AM
>>> To: Jeff Law; Zhenqiang Chen
>>> Cc: gcc-patches@gcc.gnu.org
>>> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop:
>>> split
>>> live_edge
>>>
>>>
>>> On 22/09/14 18:51, Jeff Law wrote:
>>>> On 09/22/14 04:24, Jiong Wang wrote:
>>>>>> Great.  Can you send an updated patchkit for review.
>>>>> patch attached.
>>>>>
>>>>> please review, thanks.
>>>>>
>>>>> gcc/ * shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the
>>>>> live-in of new created BB as the intersection of live-in from
>>>>> "old_dest" and live-out from "bb".
>>>> Looks good.  However, before committing we need a couple things.
>>>>
>>>> 1. Bootstrap & regression test this variant of the patch.  I know you
>>>> tested an earlier one, but please test this one just to be sure.
>>>>
>>>> 2. Testcase.  I think you could test for either the reduction in the
>>>> live-in set of the newly created block or that you're shrink wrapping
>>>> one or more functions you didn't previously shrink-wrap.  I think it's
>>>> fine if this test is target specific.
>>>    bootstrap ok based on revision 215515.
>>>
>>>    while the x86 regression result is interesting. there is no
>>> regression on
>>> check-g++, while there is four regression on check-gcc:
>>>
>>> FAIL: gcc.dg/tree-ssa/loadpre10.c (internal compiler error)
>>> FAIL: gcc.dg/tree-ssa/loadpre10.c (test for excess errors)
>>> FAIL: gcc.dg/tree-ssa/pr21417.c (internal compiler error)
>>> FAIL: gcc.dg/tree-ssa/pr21417.c (test for excess errors)
>>>
>>>     this is caused by our improving the accuracy of live-in for new
>>> created basic
>>> block. Now we will split
>>>     more than one edge for the above two testcase. thus trigger the
>>> following
>>> assert in move_insn_for_shrink_wrap:
>>>
>>>         /* We should not split more than once for a function.  */
>>>         gcc_assert (!(*split_p));
>> According to the algorithm, it is impossible to split one edge twice.
>> It's possible to split two different edges. But for such cases, the
>> control flow is too complex to perform shrink-wrapping.
>>
>> Anyway, your patch improves the accuracy. You can replace the
>> "gcc_assert" to "return"; or change "split_p" to "splitted_edge" then
>> you can check one edge is not splitted twice.
>
> thanks for the explanation.
>
> actually, the old "bitmap_copy (df_get_live_in (next_block),
> df_get_live_out (bb));" will let any "dest" reg
> in entry block alive in the new splitted block. If there is another
> block which "dest" also set in live_in, then
> dest alive in two blocks, then those code in "live_edge_for_reg" will
> always return NULL, thus the old
> inaccurate data flow will actually never make split two different edges
> happen... thus assert never triggered.
>
> as from the whole x86 boostrap, and regression test, only two cases
> trigger split two different edges, I think it's
> trival case, thus prefer to be conservative to keep the old logic, as
> suggested, just replace "gcc_assert" into "return false".
>
> or if we want to allow multi split, I think just remove the assert is
> OK, because "EDGE_COUNT (next_block->preds) == 2"
> will guarantee split one edge twice never happen.
>
> new patch updated.
>
> pass bootstrap and no regression, both check-gcc and check-g++, on the x86.
>
> OK for trunk?
>
> thanks.
>
> gcc/
>     * shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the live-in of
>     new created BB as the intersection of live-in from "old_dest" and
> live-out
>     from "bb".
Please include a ChangeLog entry for the testsuite.  Something like:

	* gcc.target/i386/shrink_wrap_1.c: New test.

With that addition, OK for the trunk.

Jeff
Jiong Wang Sept. 25, 2014, 4:43 p.m. UTC | #14
On 25/09/14 17:24, Jeff Law wrote:
> On 09/25/14 09:04, Jiong Wang wrote:
>> new patch updated.
>>
>> pass bootstrap and no regression, both check-gcc and check-g++, on the x86.
>>
>> OK for trunk?
>>
>> thanks.
>>
>> gcc/
>>      * shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the live-in of
>>      new created BB as the intersection of live-in from "old_dest" and
>> live-out
>>      from "bb".
> Please include a ChangeLog entry for the testsuite.  Something like:
>
> 	* gcc.target/i386/shrink_wrap_1.c: New test.
>
> With that addition, OK for the trunk.

committed as r215611.

-- Jiong

>
> Jeff
>
>
>
H.J. Lu Sept. 26, 2014, 3:05 p.m. UTC | #15
On Thu, Sep 25, 2014 at 9:43 AM, Jiong Wang <jiong.wang@arm.com> wrote:
>
> On 25/09/14 17:24, Jeff Law wrote:
>>
>> On 09/25/14 09:04, Jiong Wang wrote:
>>>
>>> new patch updated.
>>>
>>> pass bootstrap and no regression, both check-gcc and check-g++, on the
>>> x86.
>>>
>>> OK for trunk?
>>>
>>> thanks.
>>>
>>> gcc/
>>>      * shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the live-in
>>> of
>>>      new created BB as the intersection of live-in from "old_dest" and
>>> live-out
>>>      from "bb".
>>
>> Please include a ChangeLog entry for the testsuite.  Something like:
>>
>>         * gcc.target/i386/shrink_wrap_1.c: New test.

This fails on Linux/x86 and with -m32 on Linux/x86-64.

>> With that addition, OK for the trunk.
>
>
> committed as r215611.
Jiong Wang Sept. 26, 2014, 3:14 p.m. UTC | #16
On 26/09/14 16:05, H.J. Lu wrote:
> On Thu, Sep 25, 2014 at 9:43 AM, Jiong Wang <jiong.wang@arm.com> wrote:
>> On 25/09/14 17:24, Jeff Law wrote:
>>> On 09/25/14 09:04, Jiong Wang wrote:
>>>> new patch updated.
>>>>
>>>> pass bootstrap and no regression, both check-gcc and check-g++, on the
>>>> x86.
>>>>
>>>> OK for trunk?
>>>>
>>>> thanks.
>>>>
>>>> gcc/
>>>>       * shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the live-in
>>>> of
>>>>       new created BB as the intersection of live-in from "old_dest" and
>>>> live-out
>>>>       from "bb".
>>> Please include a ChangeLog entry for the testsuite.  Something like:
>>>
>>>          * gcc.target/i386/shrink_wrap_1.c: New test.
> This fails on Linux/x86 and with -m32 on Linux/x86-64.
sorry, my test machine is x86-64, I think the shrink wrap test itself is 
very fragile because
it's highly related insn generated.

could you mark that testcase using something like 
"dg-require-effective-target lp64"?

>
>>> With that addition, OK for the trunk.
>>
>> committed as r215611.
>
>
Jiong Wang Sept. 26, 2014, 3:29 p.m. UTC | #17
On 26/09/14 16:28, H.J. Lu wrote:
> On Fri, Sep 26, 2014 at 8:14 AM, Jiong Wang <jiong.wang@arm.com> wrote:
>> could you mark that testcase using something like
>> "dg-require-effective-target lp64"?
> I checked in this patch to skip it on ia32.
>
great, thanks !
-- Jiong
diff mbox

Patch

diff --git a/gcc/function.c b/gcc/function.c
index 764ac82..0be58e2 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -5381,7 +5381,7 @@  requires_stack_frame_p (rtx insn, HARD_REG_SET
prologue_used,
    and if BB is its only predecessor.  Return that block if so,
    otherwise return null.  */

-static basic_block
+static edge
 next_block_for_reg (basic_block bb, int regno, int end_regno)
 {