diff mbox

[0/7] Type promotion pass and elimination of zext/sext

Message ID 56442C1A.4090205@linaro.org
State Superseded
Headers show

Commit Message

Kugan Vivekanandarajah Nov. 12, 2015, 6:05 a.m. UTC
Hi Richard,

Thanks for the review.

>>>

>>> The basic "structure" thing still remains.  You walk over all uses and

>>> defs in all stmts

>>> in promote_all_stmts which ends up calling promote_ssa_if_not_promoted on all

>>> uses and defs which in turn promotes (the "def") and then fixes up all

>>> uses in all stmts.

>>

>> Done.

> 

> Not exactly.  I still see

> 

> /* Promote all the stmts in the basic block.  */

> static void

> promote_all_stmts (basic_block bb)

> {

>   gimple_stmt_iterator gsi;

>   ssa_op_iter iter;

>   tree def, use;

>   use_operand_p op;

> 

>   for (gphi_iterator gpi = gsi_start_phis (bb);

>        !gsi_end_p (gpi); gsi_next (&gpi))

>     {

>       gphi *phi = gpi.phi ();

>       def = PHI_RESULT (phi);

>       promote_ssa (def, &gsi);

> 

>       FOR_EACH_PHI_ARG (op, phi, iter, SSA_OP_USE)

>         {

>           use = USE_FROM_PTR (op);

>           if (TREE_CODE (use) == SSA_NAME

>               && gimple_code (SSA_NAME_DEF_STMT (use)) == GIMPLE_NOP)

>             promote_ssa (use, &gsi);

>           fixup_uses (phi, &gsi, op, use);

>         }

> 

> you still call promote_ssa on both DEFs and USEs and promote_ssa looks

> at SSA_NAME_DEF_STMT of the passed arg.  Please call promote_ssa just

> on DEFs and fixup_uses on USEs.


I am doing this to promote SSA that are defined with GIMPLE_NOP. Is
there anyway to iterate over this. I have added gcc_assert to make sure
that promote_ssa is called only once.

> 

> Any reason you do not promote debug stmts during the DOM walk?

> 

> So for each DEF you record in ssa_name_info

> 

> struct ssa_name_info

> {

>   tree ssa;

>   tree type;

>   tree promoted_type;

> };

> 

> (the fields need documenting).  Add a tree promoted_def to it which you

> can replace any use of the DEF with.


In this version of the patch, I am promoting the def in place. If we
decide to change, I will add it. If I understand you correctly, this is
to be used in iterating over uses and fixing.

> 

> Currently as you call promote_ssa for DEFs and USEs you repeatedly

> overwrite the entry in ssa_name_info_map with a new copy.  So you

> should assert it wasn't already there.

> 

>   switch (gimple_code (def_stmt))

>     {

>     case GIMPLE_PHI:

>         {

> 

> the last { is indented too much it should be indented 2 spaces

> relative to the 'case'


Done.

> 

> 

>   SSA_NAME_RANGE_INFO (def) = NULL;

> 

> only needed in the case 'def' was promoted itself.  Please use

> reset_flow_sensitive_info (def).


We are promoting all the defs. In some-cases we can however use the
value ranges in SSA just by promoting to new type (as the values will be
the same). Shall I do it as a follow up.
> 

>>>

>>> Instead of this you should, in promote_all_stmts, walk over all uses doing what

>>> fixup_uses does and then walk over all defs, doing what promote_ssa does.

>>>

>>> +    case GIMPLE_NOP:

>>> +       {

>>> +         if (SSA_NAME_VAR (def) == NULL)

>>> +           {

>>> +             /* Promote def by fixing its type for anonymous def.  */

>>> +             TREE_TYPE (def) = promoted_type;

>>> +           }

>>> +         else

>>> +           {

>>> +             /* Create a promoted copy of parameters.  */

>>> +             bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));

>>>

>>> I think the uninitialized vars are somewhat tricky and it would be best

>>> to create a new uninit anonymous SSA name for them.  You can

>>> have SSA_NAME_VAR != NULL and def _not_ being a parameter

>>> btw.

>>

>> Done. I also had to do some changes to in couple of other places to

>> reflect this.

>> They are:

>> --- a/gcc/tree-ssa-reassoc.c

>> +++ b/gcc/tree-ssa-reassoc.c

>> @@ -302,6 +302,7 @@ phi_rank (gimple *stmt)

>>      {

>>        tree arg = gimple_phi_arg_def (stmt, i);

>>        if (TREE_CODE (arg) == SSA_NAME

>> +         && SSA_NAME_VAR (arg)

>>           && !SSA_NAME_IS_DEFAULT_DEF (arg))

>>         {

>>           gimple *def_stmt = SSA_NAME_DEF_STMT (arg);

>> @@ -434,7 +435,8 @@ get_rank (tree e)

>>        if (gimple_code (stmt) == GIMPLE_PHI)

>>         return phi_rank (stmt);

>>

>> -      if (!is_gimple_assign (stmt))

>> +      if (!is_gimple_assign (stmt)

>> +         && !gimple_nop_p (stmt))

>>         return bb_rank[gimple_bb (stmt)->index];

>>

>> and

>>

>> --- a/gcc/tree-ssa.c

>> +++ b/gcc/tree-ssa.c

>> @@ -752,7 +752,8 @@ verify_use (basic_block bb, basic_block def_bb,

>> use_operand_p use_p,

>>    TREE_VISITED (ssa_name) = 1;

>>

>>    if (gimple_nop_p (SSA_NAME_DEF_STMT (ssa_name))

>> -      && SSA_NAME_IS_DEFAULT_DEF (ssa_name))

>> +      && (SSA_NAME_IS_DEFAULT_DEF (ssa_name)

>> +         || SSA_NAME_VAR (ssa_name) == NULL))

>>      ; /* Default definitions have empty statements.  Nothing to do.  */

>>    else if (!def_bb)

>>      {

>>

>> Does this look OK?

> 

> Hmm, no, this looks bogus.


I have removed all the above.

> 

> I think the best thing to do is not promoting default defs at all and instead

> promote at the uses.

> 

>               /* Create a promoted copy of parameters.  */

>               bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));

>               gcc_assert (bb);

>               gsi2 = gsi_after_labels (bb);

>               new_def = copy_ssa_name (def);

>               set_ssa_promoted (new_def);

>               set_ssa_default_def (cfun, SSA_NAME_VAR (def), new_def);

>               duplicate_default_ssa (new_def, def);

>               TREE_TYPE (def) = promoted_type;

> 

> AFAIK this is just an awkward way of replacing all uses by a new DEF, sth

> that should be supported by the machinery so that other default defs can just

> do

> 

>              new_def = get_or_create_default_def (create_tmp_reg

> (promoted_type));

> 

> and have all uses ('def') replaced by new_def.


I experimented with get_or_create_default_def. Here  we have to have a
SSA_NAME_VAR (def) of promoted type.

In the attached patch I am doing the following and seems to work. Does
this looks OK?

+	  }
+	else if (TREE_CODE (SSA_NAME_VAR (def)) != PARM_DECL)
+	  {
+	    tree var = copy_node (SSA_NAME_VAR (def));
+	    TREE_TYPE (var) = promoted_type;
+	    TREE_TYPE (def) = promoted_type;
+	    SET_SSA_NAME_VAR_OR_IDENTIFIER (def, var);
+	  }

I prefer to promote def as otherwise iterating over the uses and
promoting can look complicated (have to look at all the different types
of stmts again and do the right thing as It was in the earlier version
of this before we move to this approach)

>>>

>>> Note that as followup things like the rotates should be "expanded" like

>>> we'd do on RTL (open-coding the thing).  And we'd need a way to

>>> specify zero-/sign-extended loads.

>>>

>>> +/* Return true if it is safe to promote the use in the STMT.  */

>>> +static bool

>>> +safe_to_promote_use_p (gimple *stmt)

>>> +{

>>> +  enum tree_code code = gimple_assign_rhs_code (stmt);

>>> +  tree lhs = gimple_assign_lhs (stmt);

>>> +

>>> +  if (gimple_vuse (stmt) != NULL_TREE

>>> +      || gimple_vdef (stmt) != NULL_TREE

>>>

>>> I think the vuse/vdef check is bogus, you can have a use of 'i_3' in say

>>> _2 = a[i_3];

>>>

>> When I remove this, I see errors in stmts like:

>>

>> unsigned char

>> unsigned int

>> # .MEM_197 = VDEF <.MEM_187>

>> fs_9(D)->fde_encoding = _154;

> 

> Yeah, as said a stmt based check is really bogus without context.  As the

> predicate is only used in a single place it's better to inline it

> there.  In this

> case you want to handle loads/stores differently.  From this context it

> looks like not iterating over uses in the caller but rather iterating over

> uses here makes most sense as you then can do

> 

>    if (gimple_store_p (stmt))

>      {

>         promote all uses that are not gimple_assign_rhs1 ()

>      }

> 

> you can also transparently handle constants for the cases where promoting

> is required.  At the moment their handling is interwinded with the def promotion

> code.  That makes the whole thing hard to follow.



I have updated the comments with:

+/* Promote constants in STMT to TYPE.  If PROMOTE_COND_EXPR is true,
+   promote only the constants in conditions part of the COND_EXPR.
+
+   We promote the constants when the associated operands are promoted.
+   This usually means that we promote the constants when we promote the
+   defining stmnts (as part of promote_ssa). However for COND_EXPR, we
+   can promote only when we promote the other operand. Therefore, this
+   is done during fixup_use.  */


I am handling gimple_debug separately to avoid any code difference with
and without -g option. I have updated the comments for this.

Tested attached patch on ppc64, aarch64 and x86-none-linux-gnu.
regression testing for ppc64 is progressing. I also noticed that
tree-ssa-uninit sometimes gives false positives due to the assumptions
it makes. Is it OK to move this pass before type promotion? I can do the
testings and post a separate patch with this if this OK.

I also removed the optimization that prevents some of the redundant
truncation/extensions from type promotion pass, as it dosent do much as
of now. I can send a proper follow up patch. Is that OK?

Thanks,
Kugan
diff mbox

Patch

From c0ce364e3a422912a08189645efde46c36583753 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
Date: Thu, 22 Oct 2015 10:51:42 +1100
Subject: [PATCH 1/5] Add new SEXT_EXPR tree code

---
 gcc/cfgexpand.c         | 12 ++++++++++++
 gcc/expr.c              | 20 ++++++++++++++++++++
 gcc/fold-const.c        |  4 ++++
 gcc/tree-cfg.c          | 12 ++++++++++++
 gcc/tree-inline.c       |  1 +
 gcc/tree-pretty-print.c | 11 +++++++++++
 gcc/tree.def            |  5 +++++
 7 files changed, 65 insertions(+)

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index eaad859..aeb64bb 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -5054,6 +5054,18 @@  expand_debug_expr (tree exp)
     case FMA_EXPR:
       return simplify_gen_ternary (FMA, mode, inner_mode, op0, op1, op2);
 
+    case SEXT_EXPR:
+      gcc_assert (CONST_INT_P (op1));
+      inner_mode = mode_for_size (INTVAL (op1), MODE_INT, 0);
+      gcc_assert (GET_MODE_BITSIZE (inner_mode) == INTVAL (op1));
+
+      if (mode != inner_mode)
+	op0 = simplify_gen_unary (SIGN_EXTEND,
+				  mode,
+				  gen_lowpart_SUBREG (inner_mode, op0),
+				  inner_mode);
+      return op0;
+
     default:
     flag_unsupported:
 #ifdef ENABLE_CHECKING
diff --git a/gcc/expr.c b/gcc/expr.c
index da68870..c2f535f 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9318,6 +9318,26 @@  expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
       return target;
 
+    case SEXT_EXPR:
+	{
+	  machine_mode inner_mode = mode_for_size (tree_to_uhwi (treeop1),
+						   MODE_INT, 0);
+	  rtx temp, result;
+	  rtx op0 = expand_normal (treeop0);
+	  op0 = force_reg (mode, op0);
+	  if (mode != inner_mode)
+	    {
+	      result = gen_reg_rtx (mode);
+	      temp = simplify_gen_unary (SIGN_EXTEND, mode,
+					 gen_lowpart_SUBREG (inner_mode, op0),
+					 inner_mode);
+	      convert_move (result, temp, 0);
+	    }
+	  else
+	    result = op0;
+	  return result;
+	}
+
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 602ea24..a149bad 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -987,6 +987,10 @@  int_const_binop_1 (enum tree_code code, const_tree arg1, const_tree parg2,
       res = wi::bit_and (arg1, arg2);
       break;
 
+    case SEXT_EXPR:
+      res = wi::sext (arg1, arg2.to_uhwi ());
+      break;
+
     case RSHIFT_EXPR:
     case LSHIFT_EXPR:
       if (wi::neg_p (arg2))
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 8e3e810..d18b3f7 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3752,6 +3752,18 @@  verify_gimple_assign_binary (gassign *stmt)
         return false;
       }
 
+    case SEXT_EXPR:
+      {
+	if (!INTEGRAL_TYPE_P (lhs_type)
+	    || !useless_type_conversion_p (lhs_type, rhs1_type)
+	    || !tree_fits_uhwi_p (rhs2))
+	  {
+	    error ("invalid operands in sext expr");
+	    return true;
+	  }
+	return false;
+      }
+
     case VEC_WIDEN_LSHIFT_HI_EXPR:
     case VEC_WIDEN_LSHIFT_LO_EXPR:
       {
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index b8269ef..e61c200 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3893,6 +3893,7 @@  estimate_operator_cost (enum tree_code code, eni_weights *weights,
     case BIT_XOR_EXPR:
     case BIT_AND_EXPR:
     case BIT_NOT_EXPR:
+    case SEXT_EXPR:
 
     case TRUTH_ANDIF_EXPR:
     case TRUTH_ORIF_EXPR:
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 11f90051..bec9082 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -1923,6 +1923,14 @@  dump_generic_node (pretty_printer *pp, tree node, int spc, int flags,
       }
       break;
 
+    case SEXT_EXPR:
+      pp_string (pp, "SEXT_EXPR <");
+      dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
+      pp_string (pp, ", ");
+      dump_generic_node (pp, TREE_OPERAND (node, 1), spc, flags, false);
+      pp_greater (pp);
+      break;
+
     case MODIFY_EXPR:
     case INIT_EXPR:
       dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags,
@@ -3561,6 +3569,9 @@  op_symbol_code (enum tree_code code)
     case MIN_EXPR:
       return "min";
 
+    case SEXT_EXPR:
+      return "sext";
+
     default:
       return "<<< ??? >>>";
     }
diff --git a/gcc/tree.def b/gcc/tree.def
index d0a3bd6..789cfdd 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -760,6 +760,11 @@  DEFTREECODE (BIT_XOR_EXPR, "bit_xor_expr", tcc_binary, 2)
 DEFTREECODE (BIT_AND_EXPR, "bit_and_expr", tcc_binary, 2)
 DEFTREECODE (BIT_NOT_EXPR, "bit_not_expr", tcc_unary, 1)
 
+/*  Sign-extend operation.  It will sign extend first operand from
+ the sign bit specified by the second operand.  The type of the
+ result is that of the first operand.  */
+DEFTREECODE (SEXT_EXPR, "sext_expr", tcc_binary, 2)
+
 /* ANDIF and ORIF allow the second operand not to be computed if the
    value of the expression is determined from the first operand.  AND,
    OR, and XOR always compute the second operand whether its value is
-- 
1.9.1