[(2/7)] Widening multiplies by more than one mode

Message ID 1A77B5B39081C241A68E6CF16983025F02090746@EU1-MAIL.mgc.mentorg.com
State New
Headers show

Commit Message

Andrew Stubbs July 11, 2011, 3:23 p.m.
On 23/06/11 15:39, Andrew Stubbs wrote:
> This patch has two effects:
>
> 1. It permits the use of widening multiply instructions that widen by
> more than one mode. E.g. HImode -> DImode.
>
> 2. It enables the use of widening multiply instructions for (extended)
> inputs of narrower mode than the instruction takes. E.g. QImode ->
> DImode where only HI->DI or SI->DI is available.
>
> Hopefully, most of the patch is self-explanatory, but here are few notes:
>
> The code introduces a temporary FIXME comment; this will be removed
> later in the patch series. In fact, this is not a new restriction;
> previously "type1" and "type2" were implicitly identical because they
> were required to be one mode smaller than "type".
>
> I regard the ARM portion of this patch as obvious, so I don't think I
> need an ARM maintainer to read this.
>
> Is the patch OK?

I found a bug in this patch. It seems I do need to add casts for the 
inputs to widening multiplies (even though I know the registers are 
already fine), because otherwise something is insisting on truncating 
the values to the minimum width, which isn't helpful when it's actually 
an instruction with wider inputs.

The mode changing bits from patch 4 have therefore been moved here. I've 
made the changes Richard Guenther requested there, I think.

Otherwise, the patch is the same as before.

Andrew

Patch

2011-07-11  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/arm/arm.md (maddhidi4): Remove '*' from name.
	* expr.c (expand_expr_real_2): Use find_widening_optab_handler.
	* optabs.c (find_widening_optab_handler_and_mode): New function.
	(expand_widen_pattern_expr): Use find_widening_optab_handler.
	(expand_binop_directly): Likewise.
	(expand_binop): Likewise.
	* optabs.h (find_widening_optab_handler): New macro define.
	(find_widening_optab_handler_and_mode): New prototype.
	* tree-cfg.c (verify_gimple_assign_binary): Adjust WIDEN_MULT_EXPR
	type precision rules.
	(verify_gimple_assign_ternary): Likewise for WIDEN_MULT_PLUS_EXPR.
	* tree-ssa-math-opts.c (build_and_insert_cast): New function.
	(is_widening_mult_rhs_p): Allow widening by more than one mode.
	Explicitly disallow mis-matched input types.
	(convert_mult_to_widen): Use find_widening_optab_handler, and cast
	input types to fit the new handler.
	(convert_plusminus_to_widen): Likewise.

--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1857,7 +1857,7 @@ 
    (set_attr "predicable" "yes")]
 )
 
-(define_insn "*maddhidi4"
+(define_insn "maddhidi4"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
 	(plus:DI
 	  (mult:DI (sign_extend:DI
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -7638,19 +7638,16 @@  expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 	{
 	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (treeop0));
 	  this_optab = usmul_widen_optab;
-	  if (mode == GET_MODE_2XWIDER_MODE (innermode))
+	  if (find_widening_optab_handler (this_optab, mode, innermode, 0)
+		!= CODE_FOR_nothing)
 	    {
-	      if (widening_optab_handler (this_optab, mode, innermode)
-		    != CODE_FOR_nothing)
-		{
-		  if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
-		    expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
-				     EXPAND_NORMAL);
-		  else
-		    expand_operands (treeop0, treeop1, NULL_RTX, &op1, &op0,
-				     EXPAND_NORMAL);
-		  goto binop3;
-		}
+	      if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
+		expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
+				 EXPAND_NORMAL);
+	      else
+		expand_operands (treeop0, treeop1, NULL_RTX, &op1, &op0,
+				 EXPAND_NORMAL);
+	      goto binop3;
 	    }
 	}
       /* Check for a multiplication with matching signedness.  */
@@ -7665,10 +7662,9 @@  expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 	  optab other_optab = zextend_p ? smul_widen_optab : umul_widen_optab;
 	  this_optab = zextend_p ? umul_widen_optab : smul_widen_optab;
 
-	  if (mode == GET_MODE_2XWIDER_MODE (innermode)
-	      && TREE_CODE (treeop0) != INTEGER_CST)
+	  if (TREE_CODE (treeop0) != INTEGER_CST)
 	    {
-	      if (widening_optab_handler (this_optab, mode, innermode)
+	      if (find_widening_optab_handler (this_optab, mode, innermode, 0)
 		    != CODE_FOR_nothing)
 		{
 		  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
@@ -7677,7 +7673,7 @@  expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 					       unsignedp, this_optab);
 		  return REDUCE_BIT_FIELD (temp);
 		}
-	      if (widening_optab_handler (other_optab, mode, innermode)
+	      if (find_widening_optab_handler (other_optab, mode, innermode, 0)
 		    != CODE_FOR_nothing
 		  && innermode == word_mode)
 		{
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -225,6 +225,37 @@  add_equal_note (rtx insns, rtx target, enum rtx_code code, rtx op0, rtx op1)
   return 1;
 }
 
+/* Find a widening optab even if it doesn't widen as much as we want.
+   E.g. if from_mode is HImode, and to_mode is DImode, and there is no
+   direct HI->SI insn, then return SI->DI, if that exists.
+   If PERMIT_NON_WIDENING is non-zero then this can be used with
+   non-widening optabs also.  */
+
+enum insn_code
+find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode,
+				      enum machine_mode from_mode,
+				      int permit_non_widening,
+				      enum machine_mode *found_mode)
+{
+  for (; (permit_non_widening || from_mode != to_mode)
+	 && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode)
+	 && from_mode != VOIDmode;
+       from_mode = GET_MODE_WIDER_MODE (from_mode))
+    {
+      enum insn_code handler = widening_optab_handler (op, to_mode,
+						       from_mode);
+
+      if (handler != CODE_FOR_nothing)
+	{
+	  if (found_mode)
+	    *found_mode = from_mode;
+	  return handler;
+	}
+    }
+
+  return CODE_FOR_nothing;
+}
+
 /* Widen OP to MODE and return the rtx for the widened operand.  UNSIGNEDP
    says whether OP is signed or unsigned.  NO_EXTEND is nonzero if we need
    not actually do a sign-extend or zero-extend, but can leave the
@@ -515,8 +546,9 @@  expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
     optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
   if (ops->code == WIDEN_MULT_PLUS_EXPR
       || ops->code == WIDEN_MULT_MINUS_EXPR)
-    icode = widening_optab_handler (widen_pattern_optab,
-				    TYPE_MODE (TREE_TYPE (ops->op2)), tmode0);
+    icode = find_widening_optab_handler (widen_pattern_optab,
+					 TYPE_MODE (TREE_TYPE (ops->op2)),
+					 tmode0, 0);
   else
     icode = optab_handler (widen_pattern_optab, tmode0);
   gcc_assert (icode != CODE_FOR_nothing);
@@ -1243,7 +1275,8 @@  expand_binop_directly (enum machine_mode mode, optab binoptab,
 		       rtx last)
 {
   enum machine_mode from_mode = GET_MODE (op0);
-  enum insn_code icode = widening_optab_handler (binoptab, mode, from_mode);
+  enum insn_code icode = find_widening_optab_handler (binoptab, mode,
+						      from_mode, 1);
   enum machine_mode xmode0 = insn_data[(int) icode].operand[1].mode;
   enum machine_mode xmode1 = insn_data[(int) icode].operand[2].mode;
   enum machine_mode mode0, mode1, tmp_mode;
@@ -1390,7 +1423,7 @@  expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
   /* If we can do it with a three-operand insn, do so.  */
 
   if (methods != OPTAB_MUST_WIDEN
-      && widening_optab_handler (binoptab, mode, GET_MODE (op0))
+      && find_widening_optab_handler (binoptab, mode, GET_MODE (op0), 1)
 	    != CODE_FOR_nothing)
     {
       temp = expand_binop_directly (mode, binoptab, op0, op1, target,
@@ -1464,10 +1497,11 @@  expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
 		!= CODE_FOR_nothing
 	    || (binoptab == smul_optab
 		&& GET_MODE_WIDER_MODE (wider_mode) != VOIDmode
-		&& (widening_optab_handler ((unsignedp ? umul_widen_optab
-						       : smul_widen_optab),
-					    GET_MODE_WIDER_MODE (wider_mode),
-					    mode)
+		&& (find_widening_optab_handler ((unsignedp
+						  ? umul_widen_optab
+						  : smul_widen_optab),
+						 GET_MODE_WIDER_MODE (wider_mode),
+						 mode, 0)
 		    != CODE_FOR_nothing)))
 	  {
 	    rtx xop0 = op0, xop1 = op1;
@@ -2002,7 +2036,7 @@  expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
 	   wider_mode != VOIDmode;
 	   wider_mode = GET_MODE_WIDER_MODE (wider_mode))
 	{
-	  if (widening_optab_handler (binoptab, wider_mode, mode)
+	  if (find_widening_optab_handler (binoptab, wider_mode, mode, 1)
 		  != CODE_FOR_nothing
 	      || (methods == OPTAB_LIB
 		  && optab_libfunc (binoptab, wider_mode)))
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -807,6 +807,15 @@  extern rtx expand_copysign (rtx, rtx, rtx);
 extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 
+/* Find a widening optab even if it doesn't widen as much as we want.  */
+#define find_widening_optab_handler(A,B,C,D) \
+  find_widening_optab_handler_and_mode (A, B, C, D, NULL)
+extern enum insn_code find_widening_optab_handler_and_mode (optab,
+							    enum machine_mode,
+							    enum machine_mode,
+							    int,
+							    enum machine_mode *);
+
 /* An extra flag to control optab_for_tree_code's behavior.  This is needed to
    distinguish between machines with a vector shift that takes a scalar for the
    shift amount vs. machines that take a vector for the shift amount.  */
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3577,7 +3577,7 @@  do_pointer_plus_expr_check:
     case WIDEN_MULT_EXPR:
       if (TREE_CODE (lhs_type) != INTEGER_TYPE)
 	return true;
-      return ((2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type))
+      return ((2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type))
 	      || (TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type)));
 
     case WIDEN_SUM_EXPR:
@@ -3668,7 +3668,7 @@  verify_gimple_assign_ternary (gimple stmt)
 	   && !FIXED_POINT_TYPE_P (rhs1_type))
 	  || !useless_type_conversion_p (rhs1_type, rhs2_type)
 	  || !useless_type_conversion_p (lhs_type, rhs3_type)
-	  || 2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type)
+	  || 2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)
 	  || TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type))
 	{
 	  error ("type mismatch in widening multiply-accumulate expression");
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -1086,6 +1086,16 @@  build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
   return result;
 }
 
+/* Build a gimple assignment to cast VAL to TARGET.  Insert the statement
+   prior to GSI's current position, and return the fresh SSA name.  */
+
+static tree
+build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
+		       tree target, tree val)
+{
+  return build_and_insert_binop (gsi, loc, target, CONVERT_EXPR, val, NULL);
+}
+
 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
    with location info LOC.  If possible, create an equivalent and
    less expensive sequence of statements prior to GSI, and return an
@@ -1958,8 +1968,8 @@  struct gimple_opt_pass pass_optimize_bswap =
 /* Return true if RHS is a suitable operand for a widening multiplication.
    There are two cases:
 
-     - RHS makes some value twice as wide.  Store that value in *NEW_RHS_OUT
-       if so, and store its type in *TYPE_OUT.
+     - RHS makes some value at least twice as wide.  Store that value
+       in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
 
      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
        but leave *TYPE_OUT untouched.  */
@@ -1987,7 +1997,7 @@  is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out)
       rhs1 = gimple_assign_rhs1 (stmt);
       type1 = TREE_TYPE (rhs1);
       if (TREE_CODE (type1) != TREE_CODE (type)
-	  || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type))
+	  || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
 	return false;
 
       *new_rhs_out = rhs1;
@@ -2043,6 +2053,10 @@  is_widening_mult_p (gimple stmt,
       *type2_out = *type1_out;
     }
 
+  /* FIXME: remove this restriction.  */
+  if (TYPE_PRECISION (*type1_out) != TYPE_PRECISION (*type2_out))
+    return false;
+
   return true;
 }
 
@@ -2051,7 +2065,7 @@  is_widening_mult_p (gimple stmt,
    value is true iff we converted the statement.  */
 
 static bool
-convert_mult_to_widen (gimple stmt)
+convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 {
   tree lhs, rhs1, rhs2, type, type1, type2;
   enum insn_code handler;
@@ -2076,13 +2090,34 @@  convert_mult_to_widen (gimple stmt)
   else
     op = usmul_widen_optab;
 
-  handler = widening_optab_handler (op, to_mode, from_mode);
+  handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
+						  0, &from_mode);
 
   if (handler == CODE_FOR_nothing)
     return false;
 
-  gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1));
-  gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2));
+  if (from_mode != TYPE_MODE (type1))
+    {
+      location_t loc = gimple_location (stmt);
+      tree tmp1, tmp2;
+
+      tmp1 = create_tmp_var (
+		build_nonstandard_integer_type (
+		  GET_MODE_PRECISION (from_mode), TYPE_UNSIGNED (type1)),
+		NULL);
+      tmp2 = TYPE_UNSIGNED (type1) == TYPE_UNSIGNED (type2)
+	     ? tmp1
+	     : create_tmp_var (
+		  build_nonstandard_integer_type (
+		    GET_MODE_PRECISION (from_mode), TYPE_UNSIGNED (type1)),
+		  NULL);
+
+      rhs1 = build_and_insert_cast (gsi, loc, tmp1, rhs1);
+      rhs2 = build_and_insert_cast (gsi, loc, tmp2, rhs2);
+    }
+
+  gimple_assign_set_rhs1 (stmt, rhs1);
+  gimple_assign_set_rhs2 (stmt, rhs2);
   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
   update_stmt (stmt);
   widen_mul_stats.widen_mults_inserted++;
@@ -2105,6 +2140,8 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
   optab this_optab;
   enum tree_code wmult_code;
+  enum insn_code handler;
+  enum machine_mode from_mode;
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -2138,36 +2175,27 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
   else
     return false;
 
-  if (code == PLUS_EXPR && rhs1_code == MULT_EXPR)
+  /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
+     is_widening_mult_p, but we still need the rhs returns.
+
+     It might also appear that it would be sufficient to use the existing
+     operands of the widening multiply, but that would limit the choice of
+     multiply-and-accumulate instructions.  */
+  if (code == PLUS_EXPR
+      && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
     {
       if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
 			       &type2, &mult_rhs2))
 	return false;
       add_rhs = rhs2;
     }
-  else if (rhs2_code == MULT_EXPR)
+  else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
     {
       if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
 			       &type2, &mult_rhs2))
 	return false;
       add_rhs = rhs1;
     }
-  else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR)
-    {
-      mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt);
-      mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt);
-      type1 = TREE_TYPE (mult_rhs1);
-      type2 = TREE_TYPE (mult_rhs2);
-      add_rhs = rhs2;
-    }
-  else if (rhs2_code == WIDEN_MULT_EXPR)
-    {
-      mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt);
-      mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt);
-      type1 = TREE_TYPE (mult_rhs1);
-      type2 = TREE_TYPE (mult_rhs2);
-      add_rhs = rhs1;
-    }
   else
     return false;
 
@@ -2178,15 +2206,29 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
      accumulate in this mode/signedness combination, otherwise
      this transformation is likely to pessimize code.  */
   this_optab = optab_for_tree_code (wmult_code, type1, optab_default);
-  if (widening_optab_handler (this_optab, TYPE_MODE (type), TYPE_MODE (type1))
-	== CODE_FOR_nothing)
+  handler = find_widening_optab_handler_and_mode (this_optab,
+						  TYPE_MODE (type),
+						  TYPE_MODE (type1), 0,
+						  &from_mode);
+
+  if (handler == CODE_FOR_nothing)
     return false;
 
-  /* ??? May need some type verification here?  */
+  if (TYPE_MODE (type1) != from_mode)
+    {
+      location_t loc = gimple_location (stmt);
+      tree tmp;
+
+      tmp = create_tmp_var (
+		build_nonstandard_integer_type (
+		  GET_MODE_PRECISION (from_mode), TYPE_UNSIGNED (type1)),
+		NULL);
+
+      mult_rhs1 = build_and_insert_cast (gsi, loc, tmp, mult_rhs1);
+      mult_rhs2 = build_and_insert_cast (gsi, loc, tmp, mult_rhs2);
+    }
 
-  gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code,
-				    fold_convert (type1, mult_rhs1),
-				    fold_convert (type2, mult_rhs2),
+  gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
 				    add_rhs);
   update_stmt (gsi_stmt (*gsi));
   widen_mul_stats.maccs_inserted++;
@@ -2398,7 +2440,7 @@  execute_optimize_widening_mul (void)
 	      switch (code)
 		{
 		case MULT_EXPR:
-		  if (!convert_mult_to_widen (stmt)
+		  if (!convert_mult_to_widen (stmt, &gsi)
 		      && convert_mult_to_fma (stmt,
 					      gimple_assign_rhs1 (stmt),
 					      gimple_assign_rhs2 (stmt)))