From patchwork Fri Jun 21 09:51:37 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Prathamesh Kulkarni X-Patchwork-Id: 167377 Delivered-To: patch@linaro.org Received: by 2002:a92:4782:0:0:0:0:0 with SMTP id e2csp514123ilk; Fri, 21 Jun 2019 02:52:32 -0700 (PDT) X-Google-Smtp-Source: APXvYqzxgnMRZuTQ2XyrrSJlMmF0KWq/bTXMSNdZfhzoTabtQki0JnggQs59CgwwbrOBpgm5EuEf X-Received: by 2002:a65:5c0a:: with SMTP id u10mr17893097pgr.412.1561110751855; Fri, 21 Jun 2019 02:52:31 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1561110751; cv=none; d=google.com; s=arc-20160816; b=TND9uAACgNLEJdK+2DDDM6fdtHfOGSEyqBU9OQ1ldpQX74Pu3oSJ068VRQ+pmT3Bkl p/4Q/J148Y8MQXFuHWlzywqe9HhwjZME5xaWicHa0M+BEkOYwLwAGPp1e6WU+UAW2lJr QT3kHjCZacITT3RsUH+VLxtLurgwX0pu+VPvEBuUp0mmjFxQR0m6ynXdIpddZuFUtPwe byJXFvqWOLfViu4jvULJXFMMz3G9nGS83N/F4oKkmIDiFPWeG7b1UFdtUnttpV4AYf7B adCZX/JE1wZQGUNTcrifGzs8gVnmNJof8Xji4PULdsgJIAcLiUgpktx+H5MnQk9ySSrc QoGw== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=to:subject:message-id:date:from:mime-version:dkim-signature :delivered-to:sender:list-help:list-post:list-archive :list-unsubscribe:list-id:precedence:mailing-list:dkim-signature :domainkey-signature; bh=DMeM2tQjkIWz1hqop+v5Ep89tJuiaomEsn9xhC1W3RI=; b=rnASjdKQ/r4jycK1ZCyOL8aWFqRfg6o9bIo5YAsoDWKBE8917Dh83R8ai6m6DdsnKS UrfTYeCYcYUf0m734k0gekNDUQfRj4rMlko3zu0Xwr44DJQG7gnWZTxQQcUna+DJ4US8 J+ObdXLSwn7AGqtZa++gHgCpxe1ualbMps69zf0D1KhizT+iuI4A4fbtWOSG2tZ5G9u+ 0qX3w/JJprAcxhBwHh2dpwtqMd9/PXR2WyoxSJS3qt4SgS/K3Lt5fnJiIdaJnpG180x1 RQWxwsqsiOgdWdDw8CrLGD74MJ//yv3iKiPpzdWnSIOaNersIKzuJGkWOwtEHeQvsK3c 7i4w== ARC-Authentication-Results: i=1; mx.google.com; dkim=pass header.i=@gcc.gnu.org header.s=default header.b=CKPfOByG; dkim=neutral (body hash did not verify) header.i=@linaro.org header.s=google header.b=OHRwgTVR; spf=pass (google.com: domain of gcc-patches-return-503422-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) smtp.mailfrom="gcc-patches-return-503422-patch=linaro.org@gcc.gnu.org"; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=linaro.org Return-Path: Received: from sourceware.org (server1.sourceware.org. [209.132.180.131]) by mx.google.com with ESMTPS id a25si2359143pfo.234.2019.06.21.02.52.31 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Fri, 21 Jun 2019 02:52:31 -0700 (PDT) Received-SPF: pass (google.com: domain of gcc-patches-return-503422-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; Authentication-Results: mx.google.com; dkim=pass header.i=@gcc.gnu.org header.s=default header.b=CKPfOByG; dkim=neutral (body hash did not verify) header.i=@linaro.org header.s=google header.b=OHRwgTVR; spf=pass (google.com: domain of gcc-patches-return-503422-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) smtp.mailfrom="gcc-patches-return-503422-patch=linaro.org@gcc.gnu.org"; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=linaro.org DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:from:date:message-id:subject:to:content-type; q= dns; s=default; b=kdTf+qrpZ2DkpAJ0TNyV6H7ekBu7MzfLc9dw8mUFrA0L0C 7pVKmLxAOpMzK1TopvyBJNqYsXgCW6L6aCqeH/8QwfkmaF9S+1lNQ9+q+2uLilxD IHeWRLbb6dpxPLrznHbUzcN0lauRmnOxYFvm/UmYFd429HmM5t8i6SU10SHPg= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:from:date:message-id:subject:to:content-type; s= default; bh=7K+WKsWBGOr6Ew3zBSogOLLhH3I=; b=CKPfOByGedybl6LkTOpW 1F783nm+8zyMGrugbQVUO8C7PAxPSXxOI3FwcOGHndcIbfu3qqqo/Kgc3OwzlwAc iPaF0i+ifj8s8dAJKIT9gcsq/+gaLYe9TevT9BthOIWSf/9+crVODEsGyLVqXTeg GJZxH30G6YarDiGuj/9mS2U= Received: (qmail 44278 invoked by alias); 21 Jun 2019 09:52:20 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 44265 invoked by uid 89); 21 Jun 2019 09:52:18 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-25.8 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.1 spammy=Non, unspec, SVE, whilelo X-HELO: mail-lf1-f43.google.com Received: from mail-lf1-f43.google.com (HELO mail-lf1-f43.google.com) (209.85.167.43) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Fri, 21 Jun 2019 09:52:16 +0000 Received: by mail-lf1-f43.google.com with SMTP id r15so4553107lfm.11 for ; Fri, 21 Jun 2019 02:52:15 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google; h=mime-version:from:date:message-id:subject:to; bh=cQ+qac/lSUhn7O+xQkdM3y/NJYoFaXQKa+57FBVW+T4=; b=OHRwgTVRDbz9jFSA3GiLQtDhw7MwH0sQGL2WAVKuIV/1uXMBxvJM7EtkX6K4gBvzi0 kfRCX90oOnvywvkI9k1KnHoRxyDMeebZOMzYEbUaxfxQRd4OyuKQfZtTiQEHM5+kVB0p 3s3qlGkQAz+pj4bb1b9LZDfJZ62+DRejqN5jqTcMkTFtGg8WLC+UCRKQIiYp62AArM2I Eh441kMdY7tNjVdi7yTqjW5E3cvHVxzrxXkBepjBuYhGQQ7GwfYar4N/Ew+rG4IDcNNh BdJ8vqf+g4X85jeT68kNGoIbX+zkQx06iQj7gkZedKJy14gIfUEm2gKS3XdgIL6v4CU3 N/SA== MIME-Version: 1.0 From: Prathamesh Kulkarni Date: Fri, 21 Jun 2019 15:21:37 +0530 Message-ID: Subject: [SVE] [fwprop] PR88833 - Redundant moves for WHILELO-based loops To: gcc Patches , Richard Sandiford X-IsSubscribed: yes Hi, The attached patch tries to fix PR88833. For the following test-case: subroutine foo(x) real :: x(100) x = x + 10 end subroutine foo Assembly with -O3 -march=armv8.2-a+sve: foo_: .LFB0: .cfi_startproc mov w2, 100 mov w3, w2 mov x1, 0 whilelo p0.s, wzr, w2 fmov z1.s, #1.0e+1 .p2align 3,,7 .L2: ld1w z0.s, p0/z, [x0, x1, lsl 2] fadd z0.s, z0.s, z1.s st1w z0.s, p0, [x0, x1, lsl 2] incw x1 whilelo p0.s, w1, w3 bne .L2 ret As we can see, it generates extra mov w3, w2. Instead it could have reused w2 in both whilelo's. expand produces: insn 7: reg:SI 97 = 100 insn 8: use (reg:SI 97) insn 22: reg:SI 105 = 100 insn 23: use (reg:SI 105) Both reg:SI 97 and reg:SI 105 have only single definitions (and also single use). cse2 then replaces 100 with reg:SI 97 in insn 22, which becomes: insn 22: reg:SI 105 = reg:SI 97. sched1 then reorders instructions, and insn 7 and insn 22 end up falling in same basic block Looking at reload dump: Choosing alt 3 in insn 7: (0) r (1) M {*movsi_aarch64} alt=0,overall=0,losers=0,rld_nregs=0 Choosing alt 0 in insn 2: (0) =r (1) r {*movdi_aarch64} alt=0,overall=0,losers=0,rld_nregs=0 Choosing alt 0 in insn 22: (0) =r (1) r {*movsi_aarch64} 1 Non-pseudo reload: reject+=2 1 Non input pseudo reload: reject++ Cycle danger: overall += LRA_MAX_REJECT alt=0,overall=609,losers=1,rld_nregs=1 which shows, it ends up taking extra register. The issue here is that cse2 pass is leaving opportunities for propagating register copies. To address this, the patch makes following changes to fwprop.c: (a) Add support for handling UNSPEC in propagate_rtx_1 in a similar manner to simplify_replace_fn_rtx. (b) Allow propagating def inside a loop if source of def is a register in forward_propagate_into. AFAIU, replacing register by another register shouldn't increase cost. (c) Integrate fwprop and fwprop_addr, and make fwprop_addr propagate register copies. With the patch, fwprop_addr propagates reg:SI 97 in insn 23 and deletes insn 22, which eliminates the redundant mov. Does this patch look OK ? Bootstrapped + tested on x86_64-unknown-linux-gnu and aarch64-linux-gnu. Cross-testing with SVE in progress. Thanks, Prathamesh diff --git a/gcc/fwprop.c b/gcc/fwprop.c index 45703fe5f01..93a1a10c9a6 100644 --- a/gcc/fwprop.c +++ b/gcc/fwprop.c @@ -547,6 +547,54 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x)); } + + else + { + rtvec vec; + rtvec newvec; + const char *fmt = GET_RTX_FORMAT (code); + rtx op; + + for (int i = 0; fmt[i]; i++) + switch (fmt[i]) + { + case 'E': + vec = XVEC (x, i); + newvec = vec; + for (int j = 0; j < GET_NUM_ELEM (vec); j++) + { + op = RTVEC_ELT (vec, j); + valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, flags); + if (op != RTVEC_ELT (vec, j)) + { + if (newvec == vec) + { + newvec = shallow_copy_rtvec (vec); + if (!tem) + tem = shallow_copy_rtx (x); + XVEC (tem, i) = newvec; + } + RTVEC_ELT (newvec, j) = op; + } + } + break; + + case 'e': + if (XEXP (x, i)) + { + op = XEXP (x, i); + valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, flags); + if (op != XEXP (x, i)) + { + if (!tem) + tem = shallow_copy_rtx (x); + XEXP (tem, i) = op; + } + } + break; + } + } + break; case RTX_OBJ: @@ -1370,10 +1418,11 @@ forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set) /* Given a use USE of an insn, if it has a single reaching definition, try to forward propagate it into that insn. - Return true if cfg cleanup will be needed. */ + Return true if cfg cleanup will be needed. + REG_PROP_ONLY is true if we should only propagate register copies. */ static bool -forward_propagate_into (df_ref use) +forward_propagate_into (df_ref use, bool reg_prop_only = false) { df_ref def; rtx_insn *def_insn, *use_insn; @@ -1394,10 +1443,6 @@ forward_propagate_into (df_ref use) if (DF_REF_IS_ARTIFICIAL (def)) return false; - /* Do not propagate loop invariant definitions inside the loop. */ - if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father) - return false; - /* Check if the use is still present in the insn! */ use_insn = DF_REF_INSN (use); if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE) @@ -1415,6 +1460,16 @@ forward_propagate_into (df_ref use) if (!def_set) return false; + if (reg_prop_only && !REG_P (SET_SRC (def_set))) + return false; + + /* Allow propagating def inside loop only if source of def_set is + reg, since replacing reg by source reg shouldn't increase cost. */ + + if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father + && !REG_P (SET_SRC (def_set))) + return false; + /* Only try one kind of propagation. If two are possible, we'll do it on the following iterations. */ if (forward_propagate_and_simplify (use, def_insn, def_set) @@ -1483,7 +1538,7 @@ gate_fwprop (void) } static unsigned int -fwprop (void) +fwprop (bool fwprop_addr_p) { unsigned i; @@ -1502,11 +1557,16 @@ fwprop (void) df_ref use = DF_USES_GET (i); if (use) - if (DF_REF_TYPE (use) == DF_REF_REG_USE - || DF_REF_BB (use)->loop_father == NULL - /* The outer most loop is not really a loop. */ - || loop_outer (DF_REF_BB (use)->loop_father) == NULL) - forward_propagate_into (use); + { + if (DF_REF_TYPE (use) == DF_REF_REG_USE + || DF_REF_BB (use)->loop_father == NULL + /* The outer most loop is not really a loop. */ + || loop_outer (DF_REF_BB (use)->loop_father) == NULL) + forward_propagate_into (use, fwprop_addr_p); + + else if (fwprop_addr_p) + forward_propagate_into (use, false); + } } fwprop_done (); @@ -1537,7 +1597,7 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { return gate_fwprop (); } - virtual unsigned int execute (function *) { return fwprop (); } + virtual unsigned int execute (function *) { return fwprop (false); } }; // class pass_rtl_fwprop @@ -1549,33 +1609,6 @@ make_pass_rtl_fwprop (gcc::context *ctxt) return new pass_rtl_fwprop (ctxt); } -static unsigned int -fwprop_addr (void) -{ - unsigned i; - - fwprop_init (); - - /* Go through all the uses. df_uses_create will create new ones at the - end, and we'll go through them as well. */ - for (i = 0; i < DF_USES_TABLE_SIZE (); i++) - { - if (!propagations_left) - break; - - df_ref use = DF_USES_GET (i); - if (use) - if (DF_REF_TYPE (use) != DF_REF_REG_USE - && DF_REF_BB (use)->loop_father != NULL - /* The outer most loop is not really a loop. */ - && loop_outer (DF_REF_BB (use)->loop_father) != NULL) - forward_propagate_into (use); - } - - fwprop_done (); - return 0; -} - namespace { const pass_data pass_data_rtl_fwprop_addr = @@ -1600,7 +1633,7 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { return gate_fwprop (); } - virtual unsigned int execute (function *) { return fwprop_addr (); } + virtual unsigned int execute (function *) { return fwprop (true); } }; // class pass_rtl_fwprop_addr diff --git a/gcc/testsuite/gfortran.dg/pr88833.f90 b/gcc/testsuite/gfortran.dg/pr88833.f90 new file mode 100644 index 00000000000..224e6ce5f3d --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr88833.f90 @@ -0,0 +1,9 @@ +! { dg-do assemble { target aarch64_asm_sve_ok } } +! { dg-options "-O3 -march=armv8.2-a+sve --save-temps" } + +subroutine foo(x) + real :: x(100) + x = x + 10 +end subroutine foo + +! { dg-final { scan-assembler {\twhilelo\tp[0-9]+\.s, wzr, (w[0-9]+).*\twhilelo\tp[0-9]+\.s, w[0-9]+, \1} } }