From patchwork Tue Jun 28 14:14:09 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 2357 Return-Path: X-Original-To: patchwork@peony.canonical.com Delivered-To: patchwork@peony.canonical.com Received: from fiordland.canonical.com (fiordland.canonical.com [91.189.94.145]) by peony.canonical.com (Postfix) with ESMTP id 08E8D1A5A1C for ; Tue, 28 Jun 2011 14:14:18 +0000 (UTC) Received: from mail-qy0-f180.google.com (mail-qy0-f180.google.com [209.85.216.180]) by fiordland.canonical.com (Postfix) with ESMTP id 92D43A181B5 for ; Tue, 28 Jun 2011 14:14:17 +0000 (UTC) Received: by qyk30 with SMTP id 30so171905qyk.11 for ; Tue, 28 Jun 2011 07:14:17 -0700 (PDT) Received: by 10.229.1.140 with SMTP id 12mr5710623qcf.118.1309270456884; Tue, 28 Jun 2011 07:14:16 -0700 (PDT) X-Forwarded-To: linaro-patchwork@canonical.com X-Forwarded-For: patch@linaro.org linaro-patchwork@canonical.com Delivered-To: patches@linaro.org Received: by 10.229.48.135 with SMTP id r7cs9359qcf; Tue, 28 Jun 2011 07:14:16 -0700 (PDT) Received: by 10.42.144.68 with SMTP id a4mr7952311icv.427.1309270455002; Tue, 28 Jun 2011 07:14:15 -0700 (PDT) Received: from mail.codesourcery.com (mail.codesourcery.com [38.113.113.100]) by mx.google.com with ESMTPS id p3si1032711icc.47.2011.06.28.07.14.14 (version=TLSv1/SSLv3 cipher=OTHER); Tue, 28 Jun 2011 07:14:14 -0700 (PDT) Received-SPF: pass (google.com: domain of ams@codesourcery.com designates 38.113.113.100 as permitted sender) client-ip=38.113.113.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ams@codesourcery.com designates 38.113.113.100 as permitted sender) smtp.mail=ams@codesourcery.com Received: (qmail 10955 invoked from network); 28 Jun 2011 14:14:11 -0000 Received: from unknown (HELO ?192.168.0.100?) (ams@127.0.0.2) by mail.codesourcery.com with ESMTPA; 28 Jun 2011 14:14:11 -0000 Message-ID: <4E09E1B1.2090005@codesourcery.com> Date: Tue, 28 Jun 2011 15:14:09 +0100 From: Andrew Stubbs Organization: CodeSourcery User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.17) Gecko/20110516 Lightning/1.0b2 Thunderbird/3.1.10 MIME-Version: 1.0 CC: gcc-patches@gcc.gnu.org, patches@linaro.org Subject: Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies References: <4E034EF2.3070503@codesourcery.com> <4E035084.2010503@codesourcery.com> <4E09CA21.3030605@codesourcery.com> In-Reply-To: <4E09CA21.3030605@codesourcery.com> On 28/06/11 13:33, Andrew Stubbs wrote: > On 23/06/11 15:41, Andrew Stubbs wrote: >> If one or both of the inputs to a widening multiply are of unsigned type >> then the compiler will attempt to use usmul_widen_optab or >> umul_widen_optab, respectively. >> >> That works fine, but only if the target supports those operations >> directly. Otherwise, it just bombs out and reverts to the normal >> inefficient non-widening multiply. >> >> This patch attempts to catch these cases and use an alternative signed >> widening multiply instruction, if one of those is available. >> >> I believe this should be legal as long as the top bit of both inputs is >> guaranteed to be zero. The code achieves this guarantee by >> zero-extending the inputs to a wider mode (which must still be narrower >> than the output mode). >> >> OK? > > This update fixes the testsuite issue Janis pointed out. And this one fixes up the wmul-5.c testcase also. The patch has changed the correct result. Andrew 2011-06-28 Andrew Stubbs gcc/ * Makefile.in (tree-ssa-math-opts.o): Add langhooks.h dependency. * optabs.c (find_widening_optab_handler): Rename to ... (find_widening_optab_handler_and_mode): ... this, and add new argument 'found_mode'. * optabs.h (find_widening_optab_handler): Rename to ... (find_widening_optab_handler_and_mode): ... this. (find_widening_optab_handler): New macro. * tree-ssa-math-opts.c: Include langhooks.h (build_and_insert_cast): New function. (convert_mult_to_widen): Add new argument 'gsi'. Convert unsupported unsigned multiplies to signed. (convert_plusminus_to_widen): Likewise. (execute_optimize_widening_mul): Pass gsi to convert_mult_to_widen. gcc/testsuite/ * gcc.target/arm/wmul-5.c: Update expected result. * gcc.target/arm/wmul-6.c: New file. --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2672,7 +2672,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-math-opts.o : tree-ssa-math-opts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(FLAGS_H) $(TREE_H) $(TREE_FLOW_H) $(TIMEVAR_H) \ $(TREE_PASS_H) alloc-pool.h $(BASIC_BLOCK_H) $(TARGET_H) \ - $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h + $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h \ + langhooks.h tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \ $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) $(TREE_INLINE_H) $(FLAGS_H) \ $(FUNCTION_H) $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \ --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -232,9 +232,10 @@ add_equal_note (rtx insns, rtx target, enum rtx_code code, rtx op0, rtx op1) non-widening optabs also. */ enum insn_code -find_widening_optab_handler (optab op, enum machine_mode to_mode, - enum machine_mode from_mode, - int permit_non_widening) +find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode, + enum machine_mode from_mode, + int permit_non_widening, + enum machine_mode *found_mode) { for (; (permit_non_widening || from_mode != to_mode) && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode) @@ -245,7 +246,11 @@ find_widening_optab_handler (optab op, enum machine_mode to_mode, from_mode); if (handler != CODE_FOR_nothing) - return handler; + { + if (found_mode) + *found_mode = from_mode; + return handler; + } } return CODE_FOR_nothing; --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -808,8 +808,13 @@ extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code); extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code); /* Find a widening optab even if it doesn't widen as much as we want. */ -extern enum insn_code find_widening_optab_handler (optab, enum machine_mode, - enum machine_mode, int); +#define find_widening_optab_handler(A,B,C,D) \ + find_widening_optab_handler_and_mode (A, B, C, D, NULL) +extern enum insn_code find_widening_optab_handler_and_mode (optab, + enum machine_mode, + enum machine_mode, + int, + enum machine_mode *); /* An extra flag to control optab_for_tree_code's behavior. This is needed to distinguish between machines with a vector shift that takes a scalar for the --- a/gcc/testsuite/gcc.target/arm/wmul-5.c +++ b/gcc/testsuite/gcc.target/arm/wmul-5.c @@ -7,4 +7,4 @@ foo (long long a, char *b, char *c) return a + *b * *c; } -/* { dg-final { scan-assembler "umlal" } } */ +/* { dg-final { scan-assembler "smlalbb" } } */ --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/wmul-6.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv7-a" } */ + +long long +foo (long long a, unsigned char *b, signed char *c) +{ + return a + (long long)*b * (long long)*c; +} + +/* { dg-final { scan-assembler "smlal" } } */ --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -98,6 +98,7 @@ along with GCC; see the file COPYING3. If not see #include "basic-block.h" #include "target.h" #include "gimple-pretty-print.h" +#include "langhooks.h" /* FIXME: RTL headers have to be included here for optabs. */ #include "rtl.h" /* Because optabs.h wants enum rtx_code. */ @@ -1086,6 +1087,21 @@ build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type, return result; } +/* Build a gimple assignment to cast VAL to TYPE, and put the result in + TARGET. Insert the statement prior to GSI's current position, and + return the from SSA name. */ + +static tree +build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, + tree target, tree val, tree type) +{ + tree result = make_ssa_name (target, NULL); + gimple stmt = gimple_build_assign (result, fold_convert (type, val)); + gimple_set_location (stmt, loc); + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); + return result; +} + /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI with location info LOC. If possible, create an equivalent and less expensive sequence of statements prior to GSI, and return an @@ -2047,7 +2063,7 @@ is_widening_mult_p (gimple stmt, value is true iff we converted the statement. */ static bool -convert_mult_to_widen (gimple stmt) +convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) { tree lhs, rhs1, rhs2, type, type1, type2; enum insn_code handler; @@ -2075,7 +2091,31 @@ convert_mult_to_widen (gimple stmt) handler = find_widening_optab_handler (op, to_mode, from_mode, 0); if (handler == CODE_FOR_nothing) - return false; + { + if (op != smul_widen_optab) + { + from_mode = GET_MODE_WIDER_MODE (from_mode); + if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) + return false; + + op = smul_widen_optab; + handler = find_widening_optab_handler_and_mode (op, to_mode, + from_mode, 0, + &from_mode); + + if (handler == CODE_FOR_nothing) + return false; + + type1 = type2 = lang_hooks.types.type_for_mode (from_mode, 0); + + rhs1 = build_and_insert_cast (gsi, gimple_location (stmt), + create_tmp_var (type1, NULL), rhs1, type1); + rhs2 = build_and_insert_cast (gsi, gimple_location (stmt), + create_tmp_var (type2, NULL), rhs2, type2); + } + else + return false; + } gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1)); gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2)); @@ -2165,7 +2205,22 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, return false; if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) - return false; + { + enum machine_mode mode = TYPE_MODE (type1); + mode = GET_MODE_WIDER_MODE (mode); + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (TYPE_MODE (type))) + { + type1 = type2 = lang_hooks.types.type_for_mode (mode, 0); + mult_rhs1 = build_and_insert_cast (gsi, gimple_location (stmt), + create_tmp_var (type1, NULL), + mult_rhs1, type1); + mult_rhs2 = build_and_insert_cast (gsi, gimple_location (stmt), + create_tmp_var (type2, NULL), + mult_rhs2, type2); + } + else + return false; + } /* Verify that the machine can perform a widening multiply accumulate in this mode/signedness combination, otherwise @@ -2393,7 +2448,7 @@ execute_optimize_widening_mul (void) switch (code) { case MULT_EXPR: - if (!convert_mult_to_widen (stmt) + if (!convert_mult_to_widen (stmt, &gsi) && convert_mult_to_fma (stmt, gimple_assign_rhs1 (stmt), gimple_assign_rhs2 (stmt)))