From patchwork Mon May 30 05:55:02 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Prathamesh Kulkarni X-Patchwork-Id: 68824 Delivered-To: patch@linaro.org Received: by 10.140.92.199 with SMTP id b65csp1234512qge; Sun, 29 May 2016 22:55:30 -0700 (PDT) X-Received: by 10.66.152.164 with SMTP id uz4mr43693774pab.9.1464587730140; Sun, 29 May 2016 22:55:30 -0700 (PDT) Return-Path: Received: from sourceware.org (server1.sourceware.org. [209.132.180.131]) by mx.google.com with ESMTPS id m81si34412951pfa.117.2016.05.29.22.55.29 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Sun, 29 May 2016 22:55:30 -0700 (PDT) Received-SPF: pass (google.com: domain of gcc-patches-return-428565-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; Authentication-Results: mx.google.com; dkim=pass header.i=@gcc.gnu.org; spf=pass (google.com: domain of gcc-patches-return-428565-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) smtp.mailfrom=gcc-patches-return-428565-patch=linaro.org@gcc.gnu.org; dmarc=fail (p=NONE dis=NONE) header.from=linaro.org DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:in-reply-to:references:date:message-id:subject :from:to:cc:content-type; q=dns; s=default; b=bdTMkC70cZgH9Y3wBB s8faQ7lfD6bztAYRvf6tgS31ASWvDcsZagMQ2PvmRjeDMKK3pk0Yhlf82tAn/wFB I8Wcw193TYHuLnS2XhFQYXQ+3cnoB7oyJ0YBKbMkjHGiZ83llEoCE9TK+K87KH/v yPKx3dsZjCQ11ROCby8LgBOuM= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:in-reply-to:references:date:message-id:subject :from:to:cc:content-type; s=default; bh=1QfV8/HvUIbZxDQ/D7t2h3WA GTw=; b=KhhBEagCnbZRTCX9ebyMH6jxr0i7/Utnl5ubjYJsf1//4mZrj6MkZ7Ud Ig3m1V30EL/iEuokQtgu6uSY/Q8vyolpZJl7SiPAXSNgUtAfnyB/yBxFPZtUkK8t BjPcsDHrIlfpMBLC+/G/zZwHNQWfzYUDPKXb6xev0igahM7jAoI= Received: (qmail 74917 invoked by alias); 30 May 2016 05:55:16 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 74904 invoked by uid 89); 30 May 2016 05:55:15 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.6 required=5.0 tests=AWL, BAYES_50, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=biener, Biener, youd, you'd X-HELO: mail-it0-f43.google.com Received: from mail-it0-f43.google.com (HELO mail-it0-f43.google.com) (209.85.214.43) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Mon, 30 May 2016 05:55:05 +0000 Received: by mail-it0-f43.google.com with SMTP id z123so24036765itg.0 for ; Sun, 29 May 2016 22:55:04 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:mime-version:in-reply-to:references:date :message-id:subject:from:to:cc; bh=qeEMJHvKxN/+6/PH1sD4jCtYMekWd4JBkOA/rCNSWfc=; b=bRSxXs4OB1A4xQ03+N76V09TYu9nUPuv4iOQ+qeAZUTofKZ0dmSEPHY52eGgwW4Kx/ delxxlgemZ7NFDa84BZ0+KOWjXTBMAE1IlaLucc+lGsaoUBK3lXFUeNBjHsIjBLek8p9 I0crcuuFDLe61YZAZcxL2P1QxXL3yUS1V822b/mstp4eWUKSlgnhRWMbiqUXc4Mi6tEV 9ZYEiw8SG90Sr8IB7vvLwB5eGV+EUzm/+a6105ey1OqiiZCPiNIAZZzeC7Et4MWU5AFw HmwSsLEA/Z6UYJb5CAaLvSQibCGArPPRVxzyJqHuQ+0qkGzjFFrFE4gcXYh8BSqmm+l3 10Ig== X-Gm-Message-State: ALyK8tLUSaXqA/phnTNvI2+vZDgJ5vNx3nsJgx/DtuvfP7ckh3+Tj5AmFkJciHjoLbVNJOwBEAk/Y57+Bv9Br0uK MIME-Version: 1.0 X-Received: by 10.36.123.77 with SMTP id q74mr6052205itc.44.1464587702451; Sun, 29 May 2016 22:55:02 -0700 (PDT) Received: by 10.36.236.5 with HTTP; Sun, 29 May 2016 22:55:02 -0700 (PDT) In-Reply-To: References: Date: Mon, 30 May 2016 11:25:02 +0530 Message-ID: Subject: Re: RFC [1/2] divmod transform From: Prathamesh Kulkarni To: Richard Biener Cc: Richard Biener , gcc Patches , Ramana Radhakrishnan , Kugan Vivekanandarajah , Jim Wilson X-IsSubscribed: yes On 27 May 2016 at 17:31, Richard Biener wrote: > On Fri, 27 May 2016, Prathamesh Kulkarni wrote: > >> On 27 May 2016 at 15:45, Richard Biener wrote: >> > On Wed, 25 May 2016, Prathamesh Kulkarni wrote: >> > >> >> On 25 May 2016 at 12:52, Richard Biener wrote: >> >> > On Tue, 24 May 2016, Prathamesh Kulkarni wrote: >> >> > >> >> >> On 24 May 2016 at 19:39, Richard Biener wrote: >> >> >> > On Tue, 24 May 2016, Prathamesh Kulkarni wrote: >> >> >> > >> >> >> >> On 24 May 2016 at 17:42, Richard Biener wrote: >> >> >> >> > On Tue, 24 May 2016, Prathamesh Kulkarni wrote: >> >> >> >> > >> >> >> >> >> On 23 May 2016 at 17:35, Richard Biener wrote: >> >> >> >> >> > On Mon, May 23, 2016 at 10:58 AM, Prathamesh Kulkarni >> >> >> >> >> > wrote: >> >> >> >> >> >> Hi, >> >> >> >> >> >> I have updated my patch for divmod (attached), which was originally >> >> >> >> >> >> based on Kugan's patch. >> >> >> >> >> >> The patch transforms stmts with code TRUNC_DIV_EXPR and TRUNC_MOD_EXPR >> >> >> >> >> >> having same operands to divmod representation, so we can cse computation of mod. >> >> >> >> >> >> >> >> >> >> >> >> t1 = a TRUNC_DIV_EXPR b; >> >> >> >> >> >> t2 = a TRUNC_MOD_EXPR b >> >> >> >> >> >> is transformed to: >> >> >> >> >> >> complex_tmp = DIVMOD (a, b); >> >> >> >> >> >> t1 = REALPART_EXPR (complex_tmp); >> >> >> >> >> >> t2 = IMAGPART_EXPR (complex_tmp); >> >> >> >> >> >> >> >> >> >> >> >> * New hook divmod_expand_libfunc >> >> >> >> >> >> The rationale for introducing the hook is that different targets have >> >> >> >> >> >> incompatible calling conventions for divmod libfunc. >> >> >> >> >> >> Currently three ports define divmod libfunc: c6x, spu and arm. >> >> >> >> >> >> c6x and spu follow the convention of libgcc2.c:__udivmoddi4: >> >> >> >> >> >> return quotient and store remainder in argument passed as pointer, >> >> >> >> >> >> while the arm version takes two arguments and returns both >> >> >> >> >> >> quotient and remainder having mode double the size of the operand mode. >> >> >> >> >> >> The port should hence override the hook expand_divmod_libfunc >> >> >> >> >> >> to generate call to target-specific divmod. >> >> >> >> >> >> Ports should define this hook if: >> >> >> >> >> >> a) The port does not have divmod or div insn for the given mode. >> >> >> >> >> >> b) The port defines divmod libfunc for the given mode. >> >> >> >> >> >> The default hook default_expand_divmod_libfunc() generates call >> >> >> >> >> >> to libgcc2.c:__udivmoddi4 provided the operands are unsigned and >> >> >> >> >> >> are of DImode. >> >> >> >> >> >> >> >> >> >> >> >> Patch passes bootstrap+test on x86_64-unknown-linux-gnu and >> >> >> >> >> >> cross-tested on arm*-*-*. >> >> >> >> >> >> Bootstrap+test in progress on arm-linux-gnueabihf. >> >> >> >> >> >> Does this patch look OK ? >> >> >> >> >> > >> >> >> >> >> > diff --git a/gcc/targhooks.c b/gcc/targhooks.c >> >> >> >> >> > index 6b4601b..e4a021a 100644 >> >> >> >> >> > --- a/gcc/targhooks.c >> >> >> >> >> > +++ b/gcc/targhooks.c >> >> >> >> >> > @@ -1965,4 +1965,31 @@ default_optab_supported_p (int, machine_mode, >> >> >> >> >> > machine_mode, optimization_type) >> >> >> >> >> > return true; >> >> >> >> >> > } >> >> >> >> >> > >> >> >> >> >> > +void >> >> >> >> >> > +default_expand_divmod_libfunc (bool unsignedp, machine_mode mode, >> >> >> >> >> > + rtx op0, rtx op1, >> >> >> >> >> > + rtx *quot_p, rtx *rem_p) >> >> >> >> >> > >> >> >> >> >> > functions need a comment. >> >> >> >> >> > >> >> >> >> >> > ISTR it was suggested that ARM change to libgcc2.c__udivmoddi4 style? In that >> >> >> >> >> > case we could avoid the target hook. >> >> >> >> >> Well I would prefer adding the hook because that's more easier -;) >> >> >> >> >> Would it be ok for now to go with the hook ? >> >> >> >> >> > >> >> >> >> >> > + /* If target overrides expand_divmod_libfunc hook >> >> >> >> >> > + then perform divmod by generating call to the target-specifc divmod >> >> >> >> >> > libfunc. */ >> >> >> >> >> > + if (targetm.expand_divmod_libfunc != default_expand_divmod_libfunc) >> >> >> >> >> > + return true; >> >> >> >> >> > + >> >> >> >> >> > + /* Fall back to using libgcc2.c:__udivmoddi4. */ >> >> >> >> >> > + return (mode == DImode && unsignedp); >> >> >> >> >> > >> >> >> >> >> > I don't understand this - we know optab_libfunc returns non-NULL for 'mode' >> >> >> >> >> > but still restrict this to DImode && unsigned? Also if >> >> >> >> >> > targetm.expand_divmod_libfunc >> >> >> >> >> > is not the default we expect the target to handle all modes? >> >> >> >> >> Ah indeed, the check for DImode is unnecessary. >> >> >> >> >> However I suppose the check for unsignedp should be there, >> >> >> >> >> since we want to generate call to __udivmoddi4 only if operand is unsigned ? >> >> >> >> > >> >> >> >> > The optab libfunc for sdivmod should be NULL in that case. >> >> >> >> Ah indeed, thanks. >> >> >> >> > >> >> >> >> >> > >> >> >> >> >> > That said - I expected the above piece to be simply a 'return true;' ;) >> >> >> >> >> > >> >> >> >> >> > Usually we use some can_expand_XXX helper in optabs.c to query if the target >> >> >> >> >> > supports a specific operation (for example SImode divmod would use DImode >> >> >> >> >> > divmod by means of widening operands - for the unsigned case of course). >> >> >> >> >> Thanks for pointing out. So if a target does not support divmod >> >> >> >> >> libfunc for a mode >> >> >> >> >> but for a wider mode, then we could zero-extend operands to the wider-mode, >> >> >> >> >> perform divmod on the wider-mode, and then cast result back to the >> >> >> >> >> original mode. >> >> >> >> >> I haven't done that in this patch, would it be OK to do that as a follow up ? >> >> >> >> > >> >> >> >> > I think that you should conservatively handle the div_optab query, thus if >> >> >> >> > the target has a HW division in a wider mode don't use the divmod IFN. >> >> >> >> > You'd simply iterate over GET_MODE_WIDER_MODE and repeat the >> >> >> >> > if (optab_handler (div_optab, mode) != CODE_FOR_nothing) check, bailing >> >> >> >> > out if that is available. >> >> >> >> Done. >> >> >> >> > >> >> >> >> >> > + /* Disable the transform if either is a constant, since >> >> >> >> >> > division-by-constant >> >> >> >> >> > + may have specialized expansion. */ >> >> >> >> >> > + if (TREE_CONSTANT (op1) || TREE_CONSTANT (op2)) >> >> >> >> >> > + return false; >> >> >> >> >> > >> >> >> >> >> > please use CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2) >> >> >> >> >> > >> >> >> >> >> > + if (TYPE_OVERFLOW_TRAPS (type)) >> >> >> >> >> > + return false; >> >> >> >> >> > >> >> >> >> >> > why's that? Generally please first test cheap things (trapping, constant-ness) >> >> >> >> >> > before checking expensive stuff (target_supports_divmod_p). >> >> >> >> >> I added TYPE_OVERFLOW_TRAPS (type) based on your suggestion in: >> >> >> >> >> https://www.mail-archive.com/gcc@gcc.gnu.org/msg78534.html >> >> >> >> >> "When looking at TRUNC_DIV_EXPR you should also exclude >> >> >> >> >> the case where TYPE_OVERFLOW_TRAPS (type) as that should >> >> >> >> >> expand using the [su]divv optabs (no trapping overflow >> >> >> >> >> divmod optab exists)." >> >> >> >> > >> >> >> >> > Ok, didn't remember that. >> >> >> >> > >> >> >> >> >> > >> >> >> >> >> > +static bool >> >> >> >> >> > +convert_to_divmod (gassign *stmt) >> >> >> >> >> > +{ >> >> >> >> >> > + if (!divmod_candidate_p (stmt)) >> >> >> >> >> > + return false; >> >> >> >> >> > + >> >> >> >> >> > + tree op1 = gimple_assign_rhs1 (stmt); >> >> >> >> >> > + tree op2 = gimple_assign_rhs2 (stmt); >> >> >> >> >> > + >> >> >> >> >> > + vec stmts = vNULL; >> >> >> >> >> > >> >> >> >> >> > use an auto_vec - you currently leak it in at least one place. >> >> >> >> >> > >> >> >> >> >> > + if (maybe_clean_or_replace_eh_stmt (use_stmt, use_stmt)) >> >> >> >> >> > + cfg_changed = true; >> >> >> >> >> > >> >> >> >> >> > note that this suggests you should check whether any of the stmts may throw >> >> >> >> >> > internally as you don't update / transfer EH info correctly. So for 'stmt' and >> >> >> >> >> > all 'use_stmt' check stmt_can_throw_internal and if so do not add it to >> >> >> >> >> > the list of stmts to modify. >> >> >> >> >> > >> >> >> >> >> > Btw, I think you should not add 'stmt' immediately but when iterating over >> >> >> >> >> > all uses also gather uses in TRUNC_MOD_EXPR. >> >> >> >> >> > >> >> >> >> >> > Otherwise looks ok. >> >> >> >> >> Done changes in this version. I am gathering mod uses same time as div uses, >> >> >> >> >> so this imposes a constraint that mod dominates mod. I am not sure if >> >> >> >> >> that's desirable. >> >> >> >> > >> >> >> >> > I think you also need a mod_seen variable now that you don't necessarily >> >> >> >> > end up with 'stmt' in the vector of stmts. I don't see how there is a >> >> >> >> > constraint that mod dominates mod - it's just that the top_stmt needs >> >> >> >> > to dominate all other uses that can be replaced with replacing top_stmt >> >> >> >> > with a divmod. It's just that the actual stmt set we choose may now >> >> >> >> > depend on immediate uses order which on a second thought is bad >> >> >> >> > as immediate uses order could be affected by debug stmts ... hmm. >> >> >> >> > >> >> >> >> > To avoid this please re-add the code adding 'stmt' to stmts immediately >> >> >> >> > and add a use_stmt != stmt check to the immediate use processing loop >> >> >> >> > so that we don't end up adding it twice. >> >> >> >> Well I wonder what will happen for the following case: >> >> >> >> t1 = x / y; >> >> >> >> if (cond) >> >> >> >> t2 = x % y; >> >> >> >> else >> >> >> >> t3 = x % y; >> >> >> >> >> >> >> >> Assuming stmt == top_stmt is "t2 = x % y" and use_stmt is "t3 = x % y", >> >> >> >> use_stmt will not get added to stmts vector, since top_stmt and >> >> >> >> use_stmt are not in same bb, >> >> >> >> and bb's containing top_stmt and use_stmt don't dominate each other. >> >> >> >> Not sure if this is practical case (I assume fre will hoist mod >> >> >> >> outside if-else?) >> >> >> >> >> >> >> >> Now that we immediately add stmt to stmts vector, I suppose mod_seen >> >> >> >> shall not be required ? >> >> >> > >> >> >> > In that case mod_seen is not needed. But the situation you say will >> >> >> > still happen so I wonder if we'd need a better way of iterating over >> >> >> > immediate uses, like first pushing all candidates into a worklist >> >> >> > vector and then iterating over that until we find no more candidates. >> >> >> > >> >> >> > You can then also handle the case of more than one group of stmts >> >> >> > (the pass currently doesn't iterate in any particular useful order >> >> >> > over BBs). >> >> >> IIUC, we want to perform the transform if: >> >> >> i) there exists top_stmt with code trunc_div_expr/trunc_mod_expr and >> >> >> having same operands as stmt. >> >> >> ii) top_stmt dominates all other stmts with code >> >> >> trunc_div_expr/trunc_mod_expr and having same operands as top_stmt. >> >> >> >> >> >> Firstly, we try to get to top_stmt if it exists, by iterating over uses of stmt, >> >> >> and then iterate over all uses of top_stmt and add them to stmts vector >> >> >> only if top_stmt dominates all the stmts with same operands as top_stmt >> >> >> and have code trunc_div_expr/trunc_mod_expr. >> >> >> >> >> >> /* Get to top_stmt. */ >> >> >> top_stmt = stmt; >> >> >> top_bb = gimple_bb (stmt); >> >> >> >> >> >> FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1) >> >> >> { >> >> >> if (use_stmt code is TRUNC_DIV_EXPR or TRUNC_MOD_EXPR >> >> >> && use_stmt has same operands as stmt) >> >> >> { >> >> >> if (gimple_bb (use_stmt) dominates top_bb) >> >> >> { >> >> >> top_bb = gimple_bb (use_stmt); >> >> >> top_stmt = use_stmt; >> >> >> } >> >> >> else if (gimple_bb (use_stmt) == top_stmt >> >> >> && gimple_uid (use_stmt) < top_stmt) >> >> >> top_stmt = use_stmt; >> >> >> } >> >> >> } >> >> >> >> >> >> /* Speculatively consider top_stmt as dominating all other >> >> >> div_expr/mod_expr stmts with same operands as stmt. */ >> >> >> stmts.safe_push (top_stmt); >> >> >> >> >> >> /* Walk uses of top_stmt to ensure that all stmts are dominated by top_stmt. */ >> >> >> top_op1 = gimple_assign_rhs1 (top_stmt); >> >> >> FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1) >> >> >> { >> >> >> if (use_stmt code is TRUNC_DIV_EXPR or TRUNC_MOD_EXPR >> >> >> && use_stmt has same operands as top_stmt) >> >> >> { >> >> >> if (use_stmt == top_stmt) >> >> >> continue; >> >> >> >> >> >> /* No top_stmt exits, do not proceed with transform */ >> >> >> if (top_bb does not dominate gimple_bb (use_stmt)) >> >> >> return false; >> >> >> >> >> >> stmts.safe_push (use_stmt); >> >> >> } >> >> >> } >> >> >> >> >> >> For the case: >> >> >> t1 = x / y; >> >> >> if (cond) >> >> >> t2 = x % y; >> >> >> else >> >> >> t3 = x % y; >> >> >> >> >> >> Assuming stmt is "t2 = x % y", it will walk uses of stmt and set >> >> >> top_stmt to "t1 = x / y" >> >> >> Then it will walk all uses of top_stmt: >> >> >> "t2 = x % y" -> dominated by top_stmt >> >> >> "t3 = x % y" -> dominated by top_stmt >> >> >> Since all stmts are dominated by top_stmt, we add all three stmts to >> >> >> vector of stmts and proceed with transform. >> >> >> >> >> >> For the case where, top_stmt dominates original stmt but not all stmts: >> >> >> >> >> >> if (cond) >> >> >> t1 = x / y; >> >> >> else >> >> >> { >> >> >> t2 = x % y; >> >> >> return; >> >> >> } >> >> >> >> >> >> t3 = x % y; >> >> >> >> >> >> Assuming stmt is "t3 = x % y", >> >> >> Walking stmt uses will set top_stmt to "t1 = x / y"; >> >> >> >> >> >> Walking immediate uses of top_stmt, we find that "t2 = x % y" is not >> >> >> dominated by top_stmt, >> >> >> and hence don't do the transform. >> >> >> >> >> >> Does this sound reasonable ? >> >> > >> >> > Yes, that's reasonable. >> >> Thanks, I have attached patch that implements above approach. >> >> Does it look OK ? >> > >> > Please start the top-stmt search with >> > >> > top_stmt = stmt; >> > top_bb = gimple_bb (stmt); >> > >> > this makes sure to process all stmts via the IL walk in case >> > the uses have multiple independent "dominated" trees. This also >> > simplifies the loop body (no need to check for NULL). This also >> > makes mod_seen always true and you can compute div_seen in that >> > loop as well. >> Done. Um I don't understand why setting top_stmt to NULL won't process >> all stmts ? AFAIU it will have one extra iteration compared to >> initializing top_stmt to stmt (since first iteration would initialize >> top_stmt to stmt assuming stmt does not throw) ? > > If you have > > if (cond) > { > r = x % y; > q = x / y; > } > else > { > r = x % y; > q = x / y; > } > > then the loop over the function might end up transforming the else > block when visiting the then block modulo and thus it will never > transform the then block. Because you walk immediate uses which > do not guarantee that you end up with a top_stmt related to the > IL point you were coming from - the first iteration does _not_ > necessarily have use_stmt == stmt. Thanks for the explanation, I overlooked the fact that for first iteration use_stmt may not equal stmt -;) > >> > >> > Otherwise looks ok now. >> > >> >> The patch does not still handle the following case: >> >> int f(int x, int y) >> >> { >> >> extern int cond; >> >> int q, r; >> >> >> >> if (cond) >> >> q = x % y; >> >> else >> >> q = x % y; >> >> >> >> r = x % y; >> >> return q + r; >> >> } >> >> >> >> In above case although the mod stmt is not dominated by either div >> >> stmt, I suppose the transform >> >> is still possible by inserting DIVMOD (x, y) before if-else ? >> > >> > Yeah, same for sincos where doing this requires some LCM algorithm. >> Well I don't have a good approach for this. >> I was thinking, before doing the divmod transform, we could walk >> GIMPLE_COND of "diamond" shape >> (having both arms), and check "then" bb and "else" bb have same div or >> mod stmts and in that case put an artificial >> same stmt above GIMPLE_COND. >> >> So the above case would be transformed to: >> >> int tmp = x / y; // artificial top_stmt >> if (cond) >> q = x / y; >> else >> q = x / y; >> >> r = x % y; >> return q + r; >> >> and then the divmod transform will see "tmp = x / y" as the topmost stmt. >> Since top_stmt is artificially introduced, we will replace that with DIVMOD ifn >> rather than inserting DIVMOD ifn above top_stmt as in other cases. > > Yeah, but it is really a general missed optimization that should be not > required for this transform. The attached patch ICE's during bootstrap for x86_64, and is reproducible with following case with -m32 -O2: typedef long long type; type f(type x, type y) { type q = x / y; type r = x % y; return q + r; } The ICE happens because the test-case hits gcc_assert (unsignedp); in default_expand_divmod_libfunc (). Surprisingly, optab_libfunc (sdivmod_optab, DImode) returns optab_libfunc with name "__divmoddi4" although __divmoddi4() is nowhere defined in libgcc for x86. (I verified that by forcing the patch to generate call to __divmoddi4, which results in undefined reference to __divmoddi4). This happens because in optabs.def we have: OPTAB_NL(sdivmod_optab, "divmod$a4", UNKNOWN, "divmod", '4', gen_int_libfunc) and gen_int_libfunc generates "__divmoddi4" on first call to optab_libfunc and sets optab_libfunc (sdivmod_optab, DImode) to "__divmoddi4". I wonder if we should remove gen_int_libfunc entry in optabs.def for sdivmod_optab ? Thanks, Prathamesh > > Richard. > >> > >> >> For the following test-case, I am surprised why CSE didn't take place before >> >> widening_mul pass ? >> >> >> >> int >> >> f_1 (int x, int y) >> >> { >> >> int q = x / y; >> >> int r1 = 0, r2 = 0; >> >> if (cond) >> >> r1 = x % y; >> >> else >> >> r2 = x % y; >> >> return q + r1 + r2; >> >> } >> > >> > This is not CSE but code hoisting which is not implemented on GIMPLE >> > (see PR23286) >> Ah right, thanks for pointing out the PR. >> >> Thanks, >> Prathamesh >> > >> >> The input to widening_mul pass is: >> >> f_1 (int x, int y) >> >> { >> >> int r2; >> >> int r1; >> >> int q; >> >> int cond.0_1; >> >> int _2; >> >> int _11; >> >> >> >> : >> >> q_7 = x_5(D) / y_6(D); >> >> cond.0_1 = cond; >> >> if (cond.0_1 != 0) >> >> goto ; >> >> else >> >> goto ; >> >> >> >> : >> >> r1_9 = x_5(D) % y_6(D); >> >> goto ; >> >> >> >> : >> >> r2_10 = x_5(D) % y_6(D); >> >> >> >> : >> >> # r1_3 = PHI >> >> # r2_4 = PHI <0(3), r2_10(4)> >> >> _2 = r1_3 + q_7; >> >> _11 = _2 + r2_4; >> >> return _11; >> >> >> >> } >> >> >> >> Thanks, >> >> Prathamesh >> >> > >> >> > Richard. >> >> > >> >> >> Thanks, >> >> >> Prathamesh >> >> >> > >> >> >> > Richard. >> >> >> > >> >> >> >> >> >> >> >> > >> >> > -- >> >> > Richard Biener >> >> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg) >> >> >> > >> > -- >> > Richard Biener >> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg) >> > > -- > Richard Biener > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 12060ba..1310006 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -61,6 +61,7 @@ #include "builtins.h" #include "tm-constrs.h" #include "rtl-iter.h" +#include "optabs-libfuncs.h" /* This file should be included last. */ #include "target-def.h" @@ -300,6 +301,7 @@ static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void); static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*); static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT, const_tree); +static void arm_expand_divmod_libfunc (bool, machine_mode, rtx, rtx, rtx *, rtx *); /* Table of machine attributes. */ @@ -730,6 +732,9 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_SCHED_FUSION_PRIORITY #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority +#undef TARGET_EXPAND_DIVMOD_LIBFUNC +#define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -30354,6 +30359,37 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri, return; } +/* Expand call to __aeabi_[mode]divmod (op0, op1). */ + +static void +arm_expand_divmod_libfunc (bool unsignedp, machine_mode mode, + rtx op0, rtx op1, + rtx *quot_p, rtx *rem_p) +{ + if (mode == SImode) + gcc_assert (!TARGET_IDIV); + + optab tab = (unsignedp) ? udivmod_optab : sdivmod_optab; + rtx libfunc = optab_libfunc (tab, mode); + gcc_assert (libfunc); + + machine_mode libval_mode = smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode), + MODE_INT); + + rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, + libval_mode, 2, + op0, GET_MODE (op0), + op1, GET_MODE (op1)); + + rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0); + rtx remainder = simplify_gen_subreg (mode, libval, libval_mode, GET_MODE_SIZE (mode)); + + gcc_assert (quotient); + gcc_assert (remainder); + + *quot_p = quotient; + *rem_p = remainder; +} /* Construct and return a PARALLEL RTX vector with elements numbering the lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 8c7f2a1..111f19f 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6963,6 +6963,12 @@ This is firstly introduced on ARM/AArch64 targets, please refer to the hook implementation for how different fusion types are supported. @end deftypefn +@deftypefn {Target Hook} void TARGET_EXPAND_DIVMOD_LIBFUNC (bool @var{unsignedp}, machine_mode @var{mode}, @var{rtx}, @var{rtx}, rtx *@var{quot}, rtx *@var{rem}) +Define this hook if the port does not have hardware div and divmod insn for +the given mode but has divmod libfunc, which is incompatible +with libgcc2.c:__udivmoddi4 +@end deftypefn + @node Sections @section Dividing the Output into Sections (Texts, Data, @dots{}) @c the above section title is WAY too long. maybe cut the part between diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index f963a58..2c9a800 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4848,6 +4848,8 @@ them: try the first ones in this list first. @hook TARGET_SCHED_FUSION_PRIORITY +@hook TARGET_EXPAND_DIVMOD_LIBFUNC + @node Sections @section Dividing the Output into Sections (Texts, Data, @dots{}) @c the above section title is WAY too long. maybe cut the part between diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index c867ddc..0cb59f7 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -2276,6 +2276,48 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_mask_store_optab_supported_p direct_optab_supported_p #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p +/* Expand DIVMOD() using: + a) optab handler for udivmod/sdivmod if it is available. + b) If optab_handler doesn't exist, Generate call to + optab_libfunc for udivmod/sdivmod. */ + +static void +expand_DIVMOD (internal_fn, gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + tree arg0 = gimple_call_arg (stmt, 0); + tree arg1 = gimple_call_arg (stmt, 1); + + gcc_assert (TREE_CODE (TREE_TYPE (lhs)) == COMPLEX_TYPE); + tree type = TREE_TYPE (TREE_TYPE (lhs)); + machine_mode mode = TYPE_MODE (type); + bool unsignedp = TYPE_UNSIGNED (type); + optab tab = (unsignedp) ? udivmod_optab : sdivmod_optab; + + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + + rtx quotient, remainder; + + /* Check if optab handler exists for [u]divmod. */ + if (optab_handler (tab, mode) != CODE_FOR_nothing) + { + quotient = gen_reg_rtx (mode); + remainder = gen_reg_rtx (mode); + expand_twoval_binop (tab, op0, op1, quotient, remainder, unsignedp); + } + else + targetm.expand_divmod_libfunc (unsignedp, mode, op0, op1, + "ient, &remainder); + + /* Wrap the return value (quotient, remainder) within COMPLEX_EXPR. */ + expand_expr (build2 (COMPLEX_EXPR, TREE_TYPE (lhs), + make_tree (TREE_TYPE (arg0), quotient), + make_tree (TREE_TYPE (arg1), remainder)), + target, VOIDmode, EXPAND_NORMAL); +} + /* Return true if FN is supported for the types in TYPES when the optimization type is OPT_TYPE. The types are those associated with the "type0" and "type1" fields of FN's direct_internal_fn_info diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index e729d85..56a80f1 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -194,6 +194,9 @@ DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL) +/* Divmod function. */ +DEF_INTERNAL_FN (DIVMOD, ECF_CONST | ECF_LEAF, NULL) + #undef DEF_INTERNAL_INT_FN #undef DEF_INTERNAL_FLT_FN #undef DEF_INTERNAL_OPTAB_FN diff --git a/gcc/target.def b/gcc/target.def index 6392e73..4496f9a 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -4948,6 +4948,16 @@ Normally, this is not needed.", bool, (const_tree field, machine_mode mode), default_member_type_forces_blk) +/* See tree-ssa-math-opts.c:divmod_candidate_p for conditions that gate + the divmod transform. */ +DEFHOOK +(expand_divmod_libfunc, + "Define this hook if the port does not have hardware div and divmod insn for\n\ +the given mode but has divmod libfunc, which is incompatible\n\ +with libgcc2.c:__udivmoddi4", + void, (bool unsignedp, machine_mode mode, rtx, rtx, rtx *quot, rtx *rem), + default_expand_divmod_libfunc) + /* Return the class for a secondary reload, and fill in extra information. */ DEFHOOK (secondary_reload, diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 6b4601b..20327a6 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1965,4 +1965,31 @@ default_optab_supported_p (int, machine_mode, machine_mode, optimization_type) return true; } +/* Generate call to + DImode __udivmoddi4 (DImode op0, DImode op1, DImode *rem). */ + +void +default_expand_divmod_libfunc (bool unsignedp, machine_mode mode, + rtx op0, rtx op1, + rtx *quot_p, rtx *rem_p) +{ + gcc_assert (mode == DImode); + gcc_assert (unsignedp); + + rtx libfunc = optab_libfunc (udivmod_optab, DImode); + gcc_assert (libfunc); + + rtx remainder = assign_stack_temp (DImode, GET_MODE_SIZE (DImode)); + rtx address = XEXP (remainder, 0); + + rtx quotient = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, + DImode, 3, + op0, GET_MODE (op0), + op1, GET_MODE (op1), + address, GET_MODE (address)); + + *quot_p = quotient; + *rem_p = remainder; +} + #include "gt-targhooks.h" diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 7687c39..dc5e8e7 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -254,4 +254,7 @@ extern void default_setup_incoming_vararg_bounds (cumulative_args_t ca ATTRIBUTE extern bool default_optab_supported_p (int, machine_mode, machine_mode, optimization_type); +extern void default_expand_divmod_libfunc (bool, machine_mode, + rtx, rtx, rtx *, rtx *); + #endif /* GCC_TARGHOOKS_H */ diff --git a/gcc/testsuite/gcc.dg/divmod-1-simode.c b/gcc/testsuite/gcc.dg/divmod-1-simode.c new file mode 100644 index 0000000..7405f66 --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-1-simode.c @@ -0,0 +1,22 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* div dominates mod. */ + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype q = x / y; \ + if (cond) \ + foo (); \ + bigtype r = x % y; \ + return q + r; \ +} + +FOO(int, int, 1) +FOO(int, unsigned, 2) +FOO(unsigned, unsigned, 5) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-1.c b/gcc/testsuite/gcc.dg/divmod-1.c new file mode 100644 index 0000000..40aec74 --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-1.c @@ -0,0 +1,26 @@ +/* { dg-require-effective-target divmod } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* div dominates mod. */ + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype q = x / y; \ + if (cond) \ + foo (); \ + bigtype r = x % y; \ + return q + r; \ +} + +FOO(int, long long, 3) +FOO(int, unsigned long long, 4) +FOO(unsigned, long long, 6) +FOO(unsigned, unsigned long long, 7) +FOO(long long, long long, 8) +FOO(long long, unsigned long long, 9) +FOO(unsigned long long, unsigned long long, 10) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-2-simode.c b/gcc/testsuite/gcc.dg/divmod-2-simode.c new file mode 100644 index 0000000..7c8313b --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-2-simode.c @@ -0,0 +1,22 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* mod dominates div. */ + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype r = x % y; \ + if (cond) \ + foo (); \ + bigtype q = x / y; \ + return q + r; \ +} + +FOO(int, int, 1) +FOO(int, unsigned, 2) +FOO(unsigned, unsigned, 5) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-2.c b/gcc/testsuite/gcc.dg/divmod-2.c new file mode 100644 index 0000000..6a2216c --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-2.c @@ -0,0 +1,26 @@ +/* { dg-require-effective-target divmod } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* mod dominates div. */ + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype r = x % y; \ + if (cond) \ + foo (); \ + bigtype q = x / y; \ + return q + r; \ +} + +FOO(int, long long, 3) +FOO(int, unsigned long long, 4) +FOO(unsigned, long long, 6) +FOO(unsigned, unsigned long long, 7) +FOO(long long, long long, 8) +FOO(long long, unsigned long long, 9) +FOO(unsigned long long, unsigned long long, 10) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-3-simode.c b/gcc/testsuite/gcc.dg/divmod-3-simode.c new file mode 100644 index 0000000..6f0f63d --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-3-simode.c @@ -0,0 +1,20 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* div comes before mod in same bb. */ + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype q = x / y; \ + bigtype r = x % y; \ + return q + r; \ +} + +FOO(int, int, 1) +FOO(int, unsigned, 2) +FOO(unsigned, unsigned, 5) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-3.c b/gcc/testsuite/gcc.dg/divmod-3.c new file mode 100644 index 0000000..9fe6f64 --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-3.c @@ -0,0 +1,24 @@ +/* { dg-require-effective-target divmod } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* div comes before mod in same bb. */ + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype q = x / y; \ + bigtype r = x % y; \ + return q + r; \ +} + +FOO(int, long long, 3) +FOO(int, unsigned long long, 4) +FOO(unsigned, long long, 6) +FOO(unsigned, unsigned long long, 7) +FOO(long long, long long, 8) +FOO(long long, unsigned long long, 9) +FOO(unsigned long long, unsigned long long, 10) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-4-simode.c b/gcc/testsuite/gcc.dg/divmod-4-simode.c new file mode 100644 index 0000000..9c326f2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-4-simode.c @@ -0,0 +1,20 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* mod comes before div in same bb. */ + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype r = x % y; \ + bigtype q = x / y; \ + return q + r; \ +} + +FOO(int, int, 1) +FOO(int, unsigned, 2) +FOO(unsigned, unsigned, 5) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-4.c b/gcc/testsuite/gcc.dg/divmod-4.c new file mode 100644 index 0000000..a5686cc --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-4.c @@ -0,0 +1,24 @@ +/* { dg-require-effective-target divmod } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* mod comes before div in same bb. */ + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype r = x % y; \ + bigtype q = x / y; \ + return q + r; \ +} + +FOO(int, long long, 3) +FOO(int, unsigned long long, 4) +FOO(unsigned, long long, 6) +FOO(unsigned, unsigned long long, 7) +FOO(long long, long long, 8) +FOO(long long, unsigned long long, 9) +FOO(unsigned long long, unsigned long long, 10) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-5.c b/gcc/testsuite/gcc.dg/divmod-5.c new file mode 100644 index 0000000..8a8cee5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-5.c @@ -0,0 +1,19 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ +/* div and mod are not in same bb and + bb's containing div and mod don't dominate each other. */ + +int f(int x, int y) +{ + int q = 0; + int r = 0; + extern int cond; + + if (cond) + q = x / y; + + r = x % y; + return q + r; +} + +/* { dg-final { scan-tree-dump-times "DIVMOD" 0 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-6-simode.c b/gcc/testsuite/gcc.dg/divmod-6-simode.c new file mode 100644 index 0000000..3bf6fa3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-6-simode.c @@ -0,0 +1,24 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ + + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype q = x / y; \ + bigtype r1 = 0, r2 = 0; \ + if (cond) \ + r1 = x % y; \ + else \ + r2 = x % y; \ + return q + r1 + r2; \ +} + +FOO(int, int, 1) +FOO(int, unsigned, 2) +FOO(unsigned, unsigned, 5) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 3 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-6.c b/gcc/testsuite/gcc.dg/divmod-6.c new file mode 100644 index 0000000..70e4321 --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-6.c @@ -0,0 +1,27 @@ +/* { dg-require-effective-target divmod } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ + +extern int cond; +void foo(void); + +#define FOO(smalltype, bigtype, no) \ +bigtype f_##no(smalltype x, bigtype y) \ +{ \ + bigtype q = x / y; \ + bigtype r1 = 0, r2 = 0; \ + if (cond) \ + r1 = x % y; \ + else \ + r2 = x % y; \ + return q + r1 + r2; \ +} + +FOO(int, long long, 3) +FOO(int, unsigned long long, 4) +FOO(unsigned, long long, 6) +FOO(unsigned, unsigned long long, 7) +FOO(long long, long long, 8) +FOO(long long, unsigned long long, 9) +FOO(unsigned long long, unsigned long long, 10) + +/* { dg-final { scan-tree-dump-times "DIVMOD" 7 "widening_mul" } } */ diff --git a/gcc/testsuite/gcc.dg/divmod-7.c b/gcc/testsuite/gcc.dg/divmod-7.c new file mode 100644 index 0000000..a6e7fcd --- /dev/null +++ b/gcc/testsuite/gcc.dg/divmod-7.c @@ -0,0 +1,21 @@ +/* { dg-require-effective-target divmod_simode } */ +/* { dg-options "-O2 -fdump-tree-widening_mul-details" } */ + +int f(int x, int y) +{ + int q = 0, r1 = 0, r2 = 0; + extern int cond; + + if (cond) + q = x / y; + else + { + r1 = x % y; + return q + r1; + } + + r2 = x % y; + return q + r2; +} + +/* { dg-final { scan-tree-dump-times "DIVMOD" 0 "widening_mul" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 04ca176..ad7c487 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6986,3 +6986,32 @@ proc check_effective_target_offload_hsa { } { int main () {return 0;} } "-foffload=hsa" ] } + +# For ARM configs defining __ARM_ARCH_EXT_IDIV__, disable divmod_simode test-cases. + +proc check_effective_target_arm_divmod_simode { } { + return [check_no_compiler_messages arm_divmod assembly { + #ifdef __ARM_ARCH_EXT_IDIV__ + #error has div insn + #endif + int i; + }] +} + +proc check_effective_target_divmod { } { + #TODO: Add checks for all targets that have either hardware divmod insn + # or define libfunc for divmod. + if { [istarget arm*-*-*] + || [istarget x86_64-*-*] } { + return 1 + } + return 0 +} + +proc check_effective_target_divmod_simode { } { + if { [istarget arm*-*-*] } { + return [check_effective_target_arm_divmod_simode] + } + + return [check_effective_target_divmod] +} diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 81688cd..4e5cd2b 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -112,6 +112,9 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "internal-fn.h" #include "case-cfn-macros.h" +#include "optabs-libfuncs.h" +#include "tree-eh.h" +#include "targhooks.h" /* This structure represents one basic block that either computes a division, or is a common dominator for basic block that compute a @@ -184,6 +187,9 @@ static struct /* Number of fp fused multiply-add ops inserted. */ int fmas_inserted; + + /* Number of divmod calls inserted. */ + int divmod_calls_inserted; } widen_mul_stats; /* The instance of "struct occurrence" representing the highest @@ -3784,6 +3790,222 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt, return true; } +/* Return true if target has support for divmod. */ + +static bool +target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode) +{ + /* If target supports hardware divmod insn, use it for divmod. */ + if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing) + return true; + + /* Check if libfunc for divmod is available. */ + if (optab_libfunc (divmod_optab, mode) != NULL_RTX) + { + /* If optab_handler exists for div_optab, perhaps in a wider mode, + we don't want to use the libfunc even if it exists for given mode. */ + for (machine_mode div_mode = mode; + div_mode != VOIDmode; + div_mode = GET_MODE_WIDER_MODE (div_mode)) + if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing) + return false; + + return true; + } + + return false; +} + +/* Check if stmt is candidate for divmod transform. */ + +static bool +divmod_candidate_p (gassign *stmt) +{ + tree type = TREE_TYPE (gimple_assign_lhs (stmt)); + enum machine_mode mode = TYPE_MODE (type); + optab divmod_optab, div_optab; + + if (TYPE_UNSIGNED (type)) + { + divmod_optab = udivmod_optab; + div_optab = udiv_optab; + } + else + { + divmod_optab = sdivmod_optab; + div_optab = sdiv_optab; + } + + tree op1 = gimple_assign_rhs1 (stmt); + tree op2 = gimple_assign_rhs2 (stmt); + + /* Disable the transform if either is a constant, since division-by-constant + may have specialized expansion. */ + if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2)) + return false; + + /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should + expand using the [su]divv optabs. */ + if (TYPE_OVERFLOW_TRAPS (type)) + return false; + + if (!target_supports_divmod_p (divmod_optab, div_optab, mode)) + return false; + + return true; +} + +/* This function looks for: + t1 = a TRUNC_DIV_EXPR b; + t2 = a TRUNC_MOD_EXPR b; + and transforms it to the following sequence: + complex_tmp = DIVMOD (a, b); + t1 = REALPART_EXPR(a); + t2 = IMAGPART_EXPR(b); + For conditions enabling the transform see divmod_candidate_p(). + + The pass works in two phases: + 1) Walk through all immediate uses of stmt's operand and find a + TRUNC_DIV_EXPR with matching operands and if such a stmt is found add + it to stmts vector. + 2) Insert DIVMOD call before first div/mod stmt in top_bb (basic block that + dominates other div/mod stmts with same operands) and update entries in + stmts vector to use return value of DIMOVD (REALEXPR_PART for div, + IMAGPART_EXPR for mod). */ + +static bool +convert_to_divmod (gassign *stmt) +{ + if (!divmod_candidate_p (stmt)) + return false; + + tree op1 = gimple_assign_rhs1 (stmt); + tree op2 = gimple_assign_rhs2 (stmt); + + imm_use_iterator use_iter; + gimple *use_stmt; + auto_vec stmts; + + gimple *top_stmt = stmt; + basic_block top_bb = gimple_bb (stmt); + + /* Try to set top_stmt to "topmost" stmt + with code TRUNC_DIV_EXPR/TRUNC_MOD_EXPR having same operands as stmt. */ + + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1) + { + if (is_gimple_assign (use_stmt) + && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR + || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) + && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0) + && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0)) + { + if (stmt_can_throw_internal (use_stmt)) + continue; + + basic_block bb = gimple_bb (use_stmt); + + if (bb == top_bb) + { + if (gimple_uid (use_stmt) < gimple_uid (top_stmt)) + top_stmt = use_stmt; + } + else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb)) + { + top_bb = bb; + top_stmt = use_stmt; + } + } + } + + if (top_stmt == stmt && stmt_can_throw_internal (top_stmt)) + return false; + + tree top_op1 = gimple_assign_rhs1 (top_stmt); + tree top_op2 = gimple_assign_rhs2 (top_stmt); + + stmts.safe_push (top_stmt); + bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR); + + /* Ensure that gimple_bb (use_stmt) is dominated by top_bb. */ + + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1) + { + if (is_gimple_assign (use_stmt) + && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR + || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) + && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0) + && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0)) + { + if (use_stmt == top_stmt) + continue; + + if (stmt_can_throw_internal (use_stmt)) + continue; + + if (!dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb)) + { + end_imm_use_stmt_traverse (&use_iter); + return false; + } + + stmts.safe_push (use_stmt); + if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR) + div_seen = true; + } + } + + if (!div_seen) + return false; + + /* Create libcall to internal fn DIVMOD: + divmod_tmp = DIVMOD (op1, op2). */ + + gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2); + tree res = make_temp_ssa_name ( + build_complex_type (TREE_TYPE (op1)), + call_stmt, "divmod_tmp"); + gimple_call_set_lhs (call_stmt, res); + + /* Insert the call before top_stmt. */ + gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt); + gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT); + + widen_mul_stats.divmod_calls_inserted++; + + /* Update all statements in stmts. + if stmt is lhs = op1 TRUNC_DIV_EXPR op2, change to lhs = REALPART_EXPR + if stmt is lhs = op1 TRUNC_MOD_EXPR op2, change to lhs = IMAGPART_EXPR. */ + + bool cfg_changed = false; + for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i) + { + tree new_rhs; + + switch (gimple_assign_rhs_code (use_stmt)) + { + case TRUNC_DIV_EXPR: + new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res); + break; + + case TRUNC_MOD_EXPR: + new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op2), res); + break; + + default: + gcc_unreachable (); + } + + gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); + gimple_assign_set_rhs_from_tree (&gsi, new_rhs); + update_stmt (use_stmt); + + if (maybe_clean_or_replace_eh_stmt (use_stmt, use_stmt)) + cfg_changed = true; + } + + return cfg_changed; +} /* Find integer multiplications where the operands are extended from smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR @@ -3828,6 +4050,8 @@ pass_optimize_widening_mul::execute (function *fun) bool cfg_changed = false; memset (&widen_mul_stats, 0, sizeof (widen_mul_stats)); + calculate_dominance_info (CDI_DOMINATORS); + renumber_gimple_stmt_uids (); FOR_EACH_BB_FN (bb, fun) { @@ -3861,6 +4085,10 @@ pass_optimize_widening_mul::execute (function *fun) match_uaddsub_overflow (&gsi, stmt, code); break; + case TRUNC_MOD_EXPR: + cfg_changed = convert_to_divmod (as_a (stmt)); + break; + default:; } } @@ -3907,6 +4135,8 @@ pass_optimize_widening_mul::execute (function *fun) widen_mul_stats.maccs_inserted); statistics_counter_event (fun, "fused multiply-adds inserted", widen_mul_stats.fmas_inserted); + statistics_counter_event (fun, "divmod calls inserted", + widen_mul_stats.divmod_calls_inserted); return cfg_changed ? TODO_cleanup_cfg : 0; }