From patchwork Wed Aug 1 22:23:45 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 143317 Delivered-To: patch@linaro.org Received: by 2002:a2e:9754:0:0:0:0:0 with SMTP id f20-v6csp1414984ljj; Wed, 1 Aug 2018 15:24:40 -0700 (PDT) X-Google-Smtp-Source: AAOMgpf6r3RxF9lCboxobee6acDJZR5Fbgr+hWCcvroCyIQRXwluMfUol1vNsVTQEswgFLMh5qsY X-Received: by 2002:a17:902:da4:: with SMTP id 33-v6mr138158plv.193.1533162280873; Wed, 01 Aug 2018 15:24:40 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1533162280; cv=none; d=google.com; s=arc-20160816; b=zPpw8i+JPaw6EZOVat6OFzqZ74IA3iHuHHPfjF9sgetdBlKNpPIpORTcrA5ToJ0lhi Pl6lB3+cjxQ1kX/xJubvcaU3wR1DJhKHmXP+XqDYPaKUtG9bcuu8mhlxvqivcqDyi1Vv c98gF92dzbfjTlhvVbYVKlH/R/ZbpvEQveheCfVCl45U5lFwlgUzMjN7ALVrCVJkMVTe ClneMOacKNKY3fLL0Va1ztIiVoYPYGgGSu/I9VB3AjILa1VAKA68Tmf1cB8FJpAwPm3G BYxQQabrtY01qUvokkeLmrPmko3DrOhYOSO9TNnLhJRt4/JYJOXuFJure+kgUAPFvGpr kASA== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=references:in-reply-to:message-id:date:subject:cc:to:from:sender :dkim-signature:delivered-to:sender:list-help:list-post:list-archive :list-subscribe:list-unsubscribe:list-id:precedence:mailing-list :arc-authentication-results; bh=sOhRp65/pGulKyAWQDLjGPI89+CTFVdgL3WNY/sc4X4=; b=ZPYRwaxGl21wHZn1bja8PbSN1qr8O+Lr4T6eaRN3c7a9Fdkga43NMIg3N6RAudh7TU q3F6gTUA9jzSYPozKYZUEECa/u/Yi5d/vP9wZyMeX4nLWrzAdNtsDDDldjRO+uAcNuhj Sit9l/pD8F+lMQWMAnFav/XROMliHTcOBjLPCAjnYWflFPir5hNTrq0lo7vYQBbdkqyr 3oe1/+fqHT9zY/li3GTc7X9EwrHyZHzbqA0GIlKQjfVD4UgsV+IkIQeTdc1l12GYgNWX eREw3HM1kunGiUqOyNYwo5yweZwmfURRWpRB+tII29QuS8mA3UbRAY1L0zCUASIep7M0 EH1Q== ARC-Authentication-Results: i=1; mx.google.com; dkim=pass header.i=@gmail.com header.s=20161025 header.b=AUW5hRh8; spf=pass (google.com: domain of libc-alpha-return-94987-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) smtp.mailfrom="libc-alpha-return-94987-patch=linaro.org@sourceware.org" Return-Path: Received: from sourceware.org (server1.sourceware.org. [209.132.180.131]) by mx.google.com with ESMTPS id d66-v6si151499pfa.186.2018.08.01.15.24.40 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Wed, 01 Aug 2018 15:24:40 -0700 (PDT) Received-SPF: pass (google.com: domain of libc-alpha-return-94987-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; Authentication-Results: mx.google.com; dkim=pass header.i=@gmail.com header.s=20161025 header.b=AUW5hRh8; spf=pass (google.com: domain of libc-alpha-return-94987-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) smtp.mailfrom="libc-alpha-return-94987-patch=linaro.org@sourceware.org" Received: (qmail 24395 invoked by alias); 1 Aug 2018 22:24:00 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 24267 invoked by uid 89); 1 Aug 2018 22:23:59 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-26.6 required=5.0 tests=BAYES_00, FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=PROF, Jump X-HELO: mail-ua0-f173.google.com DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=sender:from:to:cc:subject:date:message-id:in-reply-to:references; bh=sOhRp65/pGulKyAWQDLjGPI89+CTFVdgL3WNY/sc4X4=; b=AUW5hRh81h5n8nP4IbeGivMSIWMtM9SKr4mkjg9yZYmN8atGBDfDCYzvba79PiRxUo krJ33cOIFsOQnfRi/n3vUiy77hTOSXwQwNN05uX/qgQC1EomLcLNFcG1awAkNMBb4qS3 /kysswfSrYKnoc1r3+bk55t4yjg2IQBWN8AjZiQLPU7vCnMF94tGACb59FMNxRYtWXMO q+EaKhR1LTY4IDUnjlvvThm0GwJLgads7XwOmBN6N3cCrp+VEsiTqzQhcWplDR71Ne13 NgVqK+kWVa09qSO/aDAm/snG4L0EAcW8CUzZNOswknoCYAeU1BHZzDWuZHa1I2NEoHo4 Tynw== Return-Path: Sender: Richard Henderson From: rth@twiddle.net To: libc-alpha@sourceware.org Cc: marcus.shawcroft@linaro.org, szabolcs.nagy@arm.com, Richard Henderson Subject: [PATCH 1/3] aarch64: Clean up _dl_runtime_resolve Date: Wed, 1 Aug 2018 18:23:45 -0400 Message-Id: <20180801222347.18903-2-rth@twiddle.net> In-Reply-To: <20180801222347.18903-1-rth@twiddle.net> References: <20180801222347.18903-1-rth@twiddle.net> From: Richard Henderson * sysdeps/aarch64/dl-trampoline.S (_dl_runtime_resolve): Do not record unwind info for arguments; this is unneeded; do not save x9 just to have a register to pair with x8; properly include the 16 bytes of PLT stack into the unwind; create a frame pointer with the spare stack slot; rearrange the exit to only adjust the stack once. --- sysdeps/aarch64/dl-trampoline.S | 50 +++++++++------------------------ 1 file changed, 14 insertions(+), 36 deletions(-) -- 2.17.1 diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S index a86d0722d4..e8e2af485a 100644 --- a/sysdeps/aarch64/dl-trampoline.S +++ b/sysdeps/aarch64/dl-trampoline.S @@ -32,7 +32,6 @@ .text .globl _dl_runtime_resolve .type _dl_runtime_resolve, #function - cfi_startproc .align 2 _dl_runtime_resolve: /* AArch64 we get called with: @@ -41,46 +40,24 @@ _dl_runtime_resolve: [sp, #8] lr [sp, #0] &PLTGOT[n] */ - + cfi_startproc + cfi_adjust_cfa_offset(16) /* Incorporate PLT */ cfi_rel_offset (lr, 8) /* Save arguments. */ - stp x8, x9, [sp, #-(80+8*16)]! + stp x29, x8, [sp, #-(80+8*16)]! cfi_adjust_cfa_offset (80+8*16) - cfi_rel_offset (x8, 0) - cfi_rel_offset (x9, 8) + cfi_rel_offset (x29, 0) + mov x29, sp stp x6, x7, [sp, #16] - cfi_rel_offset (x6, 16) - cfi_rel_offset (x7, 24) - stp x4, x5, [sp, #32] - cfi_rel_offset (x4, 32) - cfi_rel_offset (x5, 40) - stp x2, x3, [sp, #48] - cfi_rel_offset (x2, 48) - cfi_rel_offset (x3, 56) - stp x0, x1, [sp, #64] - cfi_rel_offset (x0, 64) - cfi_rel_offset (x1, 72) - stp q0, q1, [sp, #(80+0*16)] - cfi_rel_offset (q0, 80+0*16) - cfi_rel_offset (q1, 80+1*16) - stp q2, q3, [sp, #(80+2*16)] - cfi_rel_offset (q0, 80+2*16) - cfi_rel_offset (q1, 80+3*16) - stp q4, q5, [sp, #(80+4*16)] - cfi_rel_offset (q0, 80+4*16) - cfi_rel_offset (q1, 80+5*16) - stp q6, q7, [sp, #(80+6*16)] - cfi_rel_offset (q0, 80+6*16) - cfi_rel_offset (q1, 80+7*16) /* Get pointer to linker struct. */ ldr PTR_REG (0), [ip0, #-PTR_SIZE] @@ -101,25 +78,26 @@ _dl_runtime_resolve: mov ip0, x0 /* Get arguments and return address back. */ - ldp q0, q1, [sp, #(80+0*16)] - ldp q2, q3, [sp, #(80+2*16)] - ldp q4, q5, [sp, #(80+4*16)] + ldr lr, [sp, #80+8*16+8] ldp q6, q7, [sp, #(80+6*16)] + ldp q4, q5, [sp, #(80+4*16)] + ldp q2, q3, [sp, #(80+2*16)] + ldp q0, q1, [sp, #(80+0*16)] ldp x0, x1, [sp, #64] ldp x2, x3, [sp, #48] ldp x4, x5, [sp, #32] ldp x6, x7, [sp, #16] - ldp x8, x9, [sp], #(80+8*16) - cfi_adjust_cfa_offset (-(80+8*16)) - - ldp ip1, lr, [sp], #16 - cfi_adjust_cfa_offset (-16) + ldp x29, x8, [sp], 80+8*16+16 + cfi_adjust_cfa_offset (-(80+8*16+16)) + cfi_restore (lr) + cfi_restore (x29) /* Jump to the newly found address. */ br ip0 cfi_endproc .size _dl_runtime_resolve, .-_dl_runtime_resolve + #ifndef PROF .globl _dl_runtime_profile .type _dl_runtime_profile, #function From patchwork Wed Aug 1 22:23:46 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 143318 Delivered-To: patch@linaro.org Received: by 2002:a2e:9754:0:0:0:0:0 with SMTP id f20-v6csp1415065ljj; Wed, 1 Aug 2018 15:24:46 -0700 (PDT) X-Google-Smtp-Source: AAOMgpc+djl4G7uGyrP00uW06fZygqzK9BBiDR98/9w6wmiPX+ZMnHp20VQke1O1TO87+ZvOwn7T X-Received: by 2002:a62:6a01:: with SMTP id f1-v6mr219865pfc.156.1533162286389; Wed, 01 Aug 2018 15:24:46 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1533162286; cv=none; d=google.com; s=arc-20160816; b=BEN8XTajvJIZVf4mRT7VT1F/hiw4wLDimVzE7Hv8AFyyVG5sjtxYJay8IqG6dyRNLx 07gTlbH1+mN9dN4dFdHpPk9bqWPWFN+98tDT0nkOFlAHbHGp9D9ZIEGT2sVFzbSAj5uv UfWOX313tRbPJLRK8QpB/SD2Ba99dOgOnATGp3a7TXcf2RbwUQK9b3kegS9o03ls+a3q Rum5W4mklIEGK/ryfhSzAcBTZal+jzMIL6VjY1O2r1vxt+GcJm8I5DTn/YwuUnygABdV JhaNUcm6ydOookoTS85kCylc1Qd2gOIV71ZMqQMbYZK/CDFoiEfOFwv5QoEuMLvgsK1F Y12Q== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=references:in-reply-to:message-id:date:subject:cc:to:from:sender :dkim-signature:delivered-to:sender:list-help:list-post:list-archive :list-subscribe:list-unsubscribe:list-id:precedence:mailing-list :arc-authentication-results; bh=aL1gAGPGZHQcFp7XdCKPoN1KsedXCn7XSo0W7UBK6Qo=; b=oXYAlrXQqvAVm6cmVmyb5YbzPVwVBq9hPIWBhSj8vSFGGaalb7pWitRmP9ACRmYHHi fZoG1wLlHMS72V4iBAzdF9+GnxioxczxGxp5vIGV95WrWMrx3q18+9O02ZvpWL6vc/dD 1qElSou9exFG1L0c+OIvzsWzMZaMgd9bJxJvn61NS8OqVyjdIQs3pQf9C/pkpJOoaCYz kjOH/wYw8rr9b1X0x2io5pC2/MOezZ8Uf2reVimxhVQ7hocNGNa8JyxQkVvjI+FmhkvM Ltr0asssfpXv9BA/HxC4j1dBkbxWDVaRLTjxmTiQXlN+xyjj76HHJWBuUUAShMiA5mGp ldDQ== ARC-Authentication-Results: i=1; mx.google.com; dkim=pass header.i=@gmail.com header.s=20161025 header.b=ERiXIqAw; spf=pass (google.com: domain of libc-alpha-return-94988-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) smtp.mailfrom="libc-alpha-return-94988-patch=linaro.org@sourceware.org" Return-Path: Received: from sourceware.org (server1.sourceware.org. [209.132.180.131]) by mx.google.com with ESMTPS id y62-v6si138083pfd.254.2018.08.01.15.24.46 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Wed, 01 Aug 2018 15:24:46 -0700 (PDT) Received-SPF: pass (google.com: domain of libc-alpha-return-94988-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; Authentication-Results: mx.google.com; dkim=pass header.i=@gmail.com header.s=20161025 header.b=ERiXIqAw; spf=pass (google.com: domain of libc-alpha-return-94988-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) smtp.mailfrom="libc-alpha-return-94988-patch=linaro.org@sourceware.org" Received: (qmail 25145 invoked by alias); 1 Aug 2018 22:24:05 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 24737 invoked by uid 89); 1 Aug 2018 22:24:02 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-26.6 required=5.0 tests=BAYES_00, FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy= X-HELO: mail-ua0-f172.google.com DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=sender:from:to:cc:subject:date:message-id:in-reply-to:references; bh=aL1gAGPGZHQcFp7XdCKPoN1KsedXCn7XSo0W7UBK6Qo=; b=ERiXIqAwOubPdMusvYtOQVq5B6WNF9QOYwFpU+CAfGCxFW7xaudiCmUjruXfuUm1kp T3SvwayXPH8Ku19vFy8NKmlDWDCfa5D4ON3yWsQ4hvq/OY4Ok/Bhcuuy6AMWePRZeFHD /F52Y0U1jZHRZ6pWaiYv0sI6CA9ME/5diutyTZh7L7n+JDUxK5CJktrfE7rVzgz76SaH WncUInI0DlI7Em/aOfTYm8xIeKWH7FTeQh4MUATuWMLvcZLCsA0AClv2MvguJlJdFzqv b3jTDUoGmtGlGmvMmGRDCr4PTvbu4UfZv1+eQOdMltJpf21hY5eJn9H9h96gyB9K9y1b 1jCQ== Return-Path: Sender: Richard Henderson From: rth@twiddle.net To: libc-alpha@sourceware.org Cc: marcus.shawcroft@linaro.org, szabolcs.nagy@arm.com, Richard Henderson Subject: [PATCH 2/3] aarch64: Clean up _dl_runtime_profile Date: Wed, 1 Aug 2018 18:23:46 -0400 Message-Id: <20180801222347.18903-3-rth@twiddle.net> In-Reply-To: <20180801222347.18903-1-rth@twiddle.net> References: <20180801222347.18903-1-rth@twiddle.net> From: Richard Henderson Not adjusting La_aarch64_regs or La_aarch64_retval for the new AdvSIMD vector ABI; that will require more thought and coordination. In the meantime, this will at least pass the proper values to each callee, even if the values are not visible to auditing. * sysdeps/aarch64/dl-trampoline.S (_dl_runtime_profile): Do not record unwind info for arguments -- this is unneeded; properly include the 16 bytes of PLT stack into the unwind; save and restore the structure return pointer, x8; save all of the AdvSIMD registers defined for the vector ABI. --- sysdeps/aarch64/dl-trampoline.S | 138 ++++++++++++++++---------------- 1 file changed, 71 insertions(+), 67 deletions(-) -- 2.17.1 Reviewed-By: Szabolcs Nagy diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S index e8e2af485a..67a7c1b207 100644 --- a/sysdeps/aarch64/dl-trampoline.S +++ b/sysdeps/aarch64/dl-trampoline.S @@ -101,7 +101,6 @@ _dl_runtime_resolve: #ifndef PROF .globl _dl_runtime_profile .type _dl_runtime_profile, #function - cfi_startproc .align 2 _dl_runtime_profile: /* AArch64 we get called with: @@ -111,15 +110,16 @@ _dl_runtime_profile: [sp, #0] &PLTGOT[n] Stack frame layout: - [sp, #...] lr - [sp, #...] &PLTGOT[n] - [sp, #96] La_aarch64_regs - [sp, #48] La_aarch64_retval - [sp, #40] frame size return from pltenter - [sp, #32] dl_profile_call saved x1 - [sp, #24] dl_profile_call saved x0 - [sp, #16] t1 - [sp, #0] x29, lr <- x29 + [x29, #...] lr + [x29, #...] &PLTGOT[n] + [x29, #96] La_aarch64_regs + [x29, #48] La_aarch64_retval + [x29, #40] frame size return from pltenter + [x29, #32] dl_profile_call saved x1 + [x29, #24] dl_profile_call saved x0 + [x29, #16] t1 + [x29, #0] x29, x8 + [x29, #-128] full q[0-7] contents */ # define OFFSET_T1 16 @@ -127,46 +127,39 @@ _dl_runtime_profile: # define OFFSET_FS OFFSET_SAVED_CALL_X0 + 16 # define OFFSET_RV OFFSET_FS + 8 # define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV +# define OFFSET_SAVED_VEC (-16 * 8) -# define SF_SIZE OFFSET_RG + DL_SIZEOF_RG +# define SF_SIZE (OFFSET_RG + DL_SIZEOF_RG) # define OFFSET_PLTGOTN SF_SIZE # define OFFSET_LR OFFSET_PLTGOTN + 8 - /* Save arguments. */ - sub sp, sp, #SF_SIZE - cfi_adjust_cfa_offset (SF_SIZE) - stp x29, x30, [SP, #0] - mov x29, sp - cfi_def_cfa_register (x29) - cfi_rel_offset (x29, 0) + cfi_startproc + cfi_adjust_cfa_offset(16) /* Incorporate PLT */ cfi_rel_offset (lr, 8) - stp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0] - cfi_rel_offset (x0, OFFSET_RG + DL_OFFSET_RG_X0 + 16*0 + 0) - cfi_rel_offset (x1, OFFSET_RG + DL_OFFSET_RG_X0 + 16*0 + 8) - stp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] - cfi_rel_offset (x2, OFFSET_RG + DL_OFFSET_RG_X0 + 16*1 + 0) - cfi_rel_offset (x3, OFFSET_RG + DL_OFFSET_RG_X0 + 16*1 + 8) - stp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] - cfi_rel_offset (x4, OFFSET_RG + DL_OFFSET_RG_X0 + 16*2 + 0) - cfi_rel_offset (x5, OFFSET_RG + DL_OFFSET_RG_X0 + 16*2 + 8) - stp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] - cfi_rel_offset (x6, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 0) - cfi_rel_offset (x7, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 8) + stp x29, x8, [SP, #-SF_SIZE]! + cfi_adjust_cfa_offset (SF_SIZE) + cfi_rel_offset (x29, 0) + mov x29, sp + cfi_def_cfa_register (x29) + sub sp, sp, #-OFFSET_SAVED_VEC - stp d0, d1, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] - cfi_rel_offset (d0, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0) - cfi_rel_offset (d1, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0 + 8) - stp d2, d3, [X29, #OFFSET_RG+ DL_OFFSET_RG_D0 + 16*1] - cfi_rel_offset (d2, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 0) - cfi_rel_offset (d3, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 8) - stp d4, d5, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] - cfi_rel_offset (d4, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 0) - cfi_rel_offset (d5, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 8) - stp d6, d7, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] - cfi_rel_offset (d6, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 0) - cfi_rel_offset (d7, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 8) + /* Save La_aarch64_regs. */ + stp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0] + stp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] + stp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] + stp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] + stp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] + stp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1] + stp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] + stp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] + + /* Re-save the full contents of the vector arguments. */ + stp q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0] + stp q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2] + stp q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4] + stp q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6] add x0, x29, #SF_SIZE + 16 ldr x1, [x29, #OFFSET_LR] @@ -201,31 +194,28 @@ _dl_runtime_profile: mov ip0, x0 /* Get arguments and return address back. */ + ldr lr, [x29, #OFFSET_LR] ldp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0] ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] - ldp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] - ldp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1] - ldp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] - ldp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] + ldp q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0] + ldp q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2] + ldp q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4] + ldp q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6] - cfi_def_cfa_register (sp) - ldp x29, x30, [x29, #0] - cfi_restore(x29) - cfi_restore(x30) - - add sp, sp, SF_SIZE + 16 - cfi_adjust_cfa_offset (- SF_SIZE - 16) + mov sp, x29 + ldp x29, x8, [sp], SF_SIZE + 16 + cfi_def_cfa (sp, 0) + cfi_restore (x29) + cfi_restore (lr) /* Jump to the newly found address. */ br ip0 cfi_restore_state -1: - /* The new frame size is in ip0. */ - - sub PTR_REG (1), PTR_REG (29), ip0l + /* The new frame size is in ip0, extended for pointer size. */ +1: sub x1, sp, ip0 and sp, x1, #0xfffffffffffffff0 str x0, [x29, #OFFSET_T1] @@ -237,42 +227,56 @@ _dl_runtime_profile: ldr ip0, [x29, #OFFSET_T1] - /* Call the function. */ + /* Load the original arguments. */ ldp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0] ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] - ldp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] - ldp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1] - ldp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] - ldp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] + ldr x8, [x29, 8] + ldp q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0] + ldp q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2] + ldp q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4] + ldp q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6] + + /* Call the function. */ blr ip0 + + /* Save La_aarch64_retval. */ stp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0] stp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0] stp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1] + /* Re-save the full contents of the vector return. */ + stp q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0] + stp q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2] + stp q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4] + stp q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6] + /* Setup call to pltexit */ ldp x0, x1, [x29, #OFFSET_SAVED_CALL_X0] add x2, x29, #OFFSET_RG add x3, x29, #OFFSET_RV bl _dl_call_pltexit + /* Restore the full return value. */ ldp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0] - ldp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0] - ldp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1] + ldp q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0] + ldp q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2] + ldp q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4] + ldp q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6] + /* LR from within La_aarch64_reg */ ldr lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR] - cfi_restore(lr) mov sp, x29 cfi_def_cfa_register (sp) ldr x29, [x29, #0] - cfi_restore(x29) add sp, sp, SF_SIZE + 16 cfi_adjust_cfa_offset (- SF_SIZE - 16) + cfi_restore(x29) + cfi_restore(lr) br lr cfi_endproc .size _dl_runtime_profile, .-_dl_runtime_profile #endif - .previous From patchwork Wed Aug 1 22:23:47 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 143319 Delivered-To: patch@linaro.org Received: by 2002:a2e:9754:0:0:0:0:0 with SMTP id f20-v6csp1415154ljj; Wed, 1 Aug 2018 15:24:53 -0700 (PDT) X-Google-Smtp-Source: AAOMgpf4RHJlMj3of3x2WGmrGNj+ZZOKYEMH/BXPJV+ue29SdseQsVFEyMAAxAD1Gp5kdp6ga/AD X-Received: by 2002:a65:57c9:: with SMTP id q9-v6mr213462pgr.128.1533162293127; Wed, 01 Aug 2018 15:24:53 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1533162293; cv=none; d=google.com; s=arc-20160816; b=P1hs7tYciKpsRJtFYABg1tCjwOS5FFvEF/wo4c4oQ8nZvUakE4fbxeMewrkx42RIQa ApjpZoRzGBOspskRwtMhduMAqeXM2IAPwmETi5gtmDBzfqkc4yEbiNGwRSCwbO3L4o6z Nvfb/hDMBbbmAN6O8uYJ2k4MJlBn/4aMVRiKttJCeom2MrUL+IstRi1DrzEN6JVQ5v13 CSRK/+lX3ij1IGH6attI/GdQ6sKTGH/KkfhJrKRTTYaw8SeKcBSxFz2ZHe7KH3BBTHcj YzmC+2Jk6CXPbpbLmjxgRfxDrIm1+0Nm8rJKaqNzJtCCLJAQO6tGQFHc3GCwViLy1bDm enaA== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=references:in-reply-to:message-id:date:subject:cc:to:from:sender :dkim-signature:delivered-to:sender:list-help:list-post:list-archive :list-subscribe:list-unsubscribe:list-id:precedence:mailing-list :arc-authentication-results; bh=hBKtMhCjVGD6X8lBZBjb5AN6l81Hdrcy8nCWSz88XjE=; b=adFCdDeKq8tPZoTAHb5kLi/UE7jfmD1mSwN0DjPG3Rde+qHpicoW8cVXfRi+ySZSmo dd4yXTuWGagptV7sIkwb52CJbUUwXya2jzcEYWcvY2yQiTuxl95xYH4SeHX0JnY2npCU gywtfIQnRU22MNyYv1JWz+Cw67iy+eI/XTc4VsKbgJ55EbrtDwzkgW+WFh0GRhRnsXWh TBINXFasaw8gUbCeWUIoSNlwZaEvQpZ1zdnawdaUobNr4Z9/Rb7lzUwsEjrQIMLuCp4s tmly0tWFgyEn6o++j/55o/hc1W9KxpK00n6I+THGwMDMvr1C2oriXwuaTmxdouo/0/R+ 97XA== ARC-Authentication-Results: i=1; mx.google.com; dkim=pass header.i=@gmail.com header.s=20161025 header.b=iDjhLltq; spf=pass (google.com: domain of libc-alpha-return-94989-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) smtp.mailfrom="libc-alpha-return-94989-patch=linaro.org@sourceware.org" Return-Path: Received: from sourceware.org (server1.sourceware.org. [209.132.180.131]) by mx.google.com with ESMTPS id n70-v6si134761pfa.320.2018.08.01.15.24.52 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Wed, 01 Aug 2018 15:24:53 -0700 (PDT) Received-SPF: pass (google.com: domain of libc-alpha-return-94989-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; Authentication-Results: mx.google.com; dkim=pass header.i=@gmail.com header.s=20161025 header.b=iDjhLltq; spf=pass (google.com: domain of libc-alpha-return-94989-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) smtp.mailfrom="libc-alpha-return-94989-patch=linaro.org@sourceware.org" Received: (qmail 25350 invoked by alias); 1 Aug 2018 22:24:06 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 25028 invoked by uid 89); 1 Aug 2018 22:24:04 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-26.6 required=5.0 tests=BAYES_00, FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=H*r:sk:h1-v6so, HX-Received:sk:l27-v6m, amended X-HELO: mail-ua0-f195.google.com DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=sender:from:to:cc:subject:date:message-id:in-reply-to:references; bh=hBKtMhCjVGD6X8lBZBjb5AN6l81Hdrcy8nCWSz88XjE=; b=iDjhLltq88m0yQiPEIzUGI70PMyrCp6gvKcHldmRTr4sLgDf7DbnxfVxMGdq1+JzNA xw3FMLv7xYQdVZUu4vT9eZEwbLmwy4kl3IW9gzpLrFo6T5o+ZbsnBE1Jvzn1fbMdl8Sv gZPahMxhoT8KUUYZHMoMWnj9H7m/XVZ7RwMt+razhK8dHYdnylz2qCjRF84goB2CUwxm 8x8WbcWLs152EmOgv9csuqKwt+j2SqrBVsVq+Jb6rRN+fYN90G9sy5uKIR2/JHX4V1rm KFqn2qeMlCTIxwoLMr0xISNByGeNyLk3f0L/WJBe8BnCuBleGIBXnUH7lKnn8OYwnp1+ D75w== Return-Path: Sender: Richard Henderson From: rth@twiddle.net To: libc-alpha@sourceware.org Cc: marcus.shawcroft@linaro.org, szabolcs.nagy@arm.com, Richard Henderson Subject: [PATCH 3/3] aarch64: Save and restore SVE registers in ld.so Date: Wed, 1 Aug 2018 18:23:47 -0400 Message-Id: <20180801222347.18903-4-rth@twiddle.net> In-Reply-To: <20180801222347.18903-1-rth@twiddle.net> References: <20180801222347.18903-1-rth@twiddle.net> From: Richard Henderson Add SVE versions of _dl_runtime_resolve and _dl_runtime_profile. This honors the extended vector calling conventionn described in ARM_100986_0000_00_en (SVEpcs 00bet1). * sysdeps/aarch64/dl-trampoline.S (_dl_runtime_resolve_sve): New. (_dl_runtime_profile_sve): New. * sysdeps/aarch64/dl-machine.h (elf_machine_runtime_set): Use the new routines if HWCAP_SVE is set. --- sysdeps/aarch64/dl-machine.h | 13 +- sysdeps/aarch64/dl-trampoline.S | 343 ++++++++++++++++++++++++++++++++ 2 files changed, 353 insertions(+), 3 deletions(-) -- 2.17.1 diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h index 4935aa7c54..ea7c5c71d5 100644 --- a/sysdeps/aarch64/dl-machine.h +++ b/sysdeps/aarch64/dl-machine.h @@ -69,6 +69,9 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ElfW(Addr) *got; extern void _dl_runtime_resolve (ElfW(Word)); extern void _dl_runtime_profile (ElfW(Word)); + extern void _dl_runtime_resolve_sve (ElfW(Word)); + extern void _dl_runtime_profile_sve (ElfW(Word)); + unsigned has_sve = GLRO(dl_hwcap) & HWCAP_SVE; got = (ElfW(Addr) *) D_PTR (l, l_info[DT_PLTGOT]); if (got[1]) @@ -83,9 +86,11 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) to intercept the calls to collect information. In this case we don't store the address in the GOT so that all future calls also end in this function. */ - if ( profile) + if (profile) { - got[2] = (ElfW(Addr)) &_dl_runtime_profile; + got[2] = (has_sve + ? (ElfW(Addr)) &_dl_runtime_profile_sve + : (ElfW(Addr)) &_dl_runtime_profile); if (GLRO(dl_profile) != NULL && _dl_name_match_p (GLRO(dl_profile), l)) @@ -98,7 +103,9 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) /* This function will get called to fix up the GOT entry indicated by the offset on the stack, and then jump to the resolved address. */ - got[2] = (ElfW(Addr)) &_dl_runtime_resolve; + got[2] = (has_sve + ? (ElfW(Addr)) &_dl_runtime_resolve_sve + : (ElfW(Addr)) &_dl_runtime_resolve); } } diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S index 67a7c1b207..e23e5f1aad 100644 --- a/sysdeps/aarch64/dl-trampoline.S +++ b/sysdeps/aarch64/dl-trampoline.S @@ -280,3 +280,346 @@ _dl_runtime_profile: cfi_endproc .size _dl_runtime_profile, .-_dl_runtime_profile #endif + +/* + * For functions conforming to the procedure call standard as + * amended for SVE support (ARM_100986_0000_00_en (SVEpcs 00bet1)), + * we must save the entire contents of Z0-Z7 as well as P0-P3. + */ + .arch armv8-a+sve + + .globl _dl_runtime_resolve_sve + .type _dl_runtime_resolve_sve, #function + .align 2 +_dl_runtime_resolve_sve: + /* AArch64 we get called with: + ip0 &PLTGOT[2] + ip1 temp(dl resolver entry point) + [sp, #8] lr + [sp, #0] &PLTGOT[n] + */ + cfi_startproc + cfi_adjust_cfa_offset(16) /* Incorporate PLT */ + cfi_rel_offset (lr, 8) + + /* Save arguments. */ + stp x29, x8, [sp, #-80]! + cfi_adjust_cfa_offset (80) + cfi_rel_offset (x29, 0) + mov x29, sp + cfi_def_cfa_register (x29) + + stp x6, x7, [sp, #16] + stp x4, x5, [sp, #32] + stp x2, x3, [sp, #48] + stp x0, x1, [sp, #64] + + /* Allocate space for, and store, Z[0-7]. */ + addvl sp, sp, #-8 + str z0, [sp, #0, mul vl] + str z1, [sp, #1, mul vl] + str z2, [sp, #2, mul vl] + str z3, [sp, #3, mul vl] + str z4, [sp, #4, mul vl] + str z5, [sp, #5, mul vl] + str z6, [sp, #6, mul vl] + str z7, [sp, #7, mul vl] + + /* Allocate space for, and store, P[0-3]. */ + addpl sp, sp, #-4 + str p0, [sp, #0, mul vl] + str p1, [sp, #1, mul vl] + str p2, [sp, #2, mul vl] + str p3, [sp, #3, mul vl] + + /* Get pointer to linker struct. */ + ldr PTR_REG (0), [ip0, #-PTR_SIZE] + + /* Prepare to call _dl_fixup(). */ + ldr x1, [x29, 80] /* Recover &PLTGOT[n] */ + + sub x1, x1, ip0 + add x1, x1, x1, lsl #1 + lsl x1, x1, #3 + sub x1, x1, #(RELA_SIZE<<3) + lsr x1, x1, #3 + + /* Call fixup routine. */ + bl _dl_fixup + + /* Save the return. */ + mov ip0, x0 + + /* Get arguments and return address back. */ + ldr p0, [sp, #0, mul vl] + ldr p1, [sp, #1, mul vl] + ldr p2, [sp, #2, mul vl] + ldr p3, [sp, #3, mul vl] + addpl sp, sp, #4 + + ldr z0, [sp, #0, mul vl] + ldr z1, [sp, #1, mul vl] + ldr z2, [sp, #2, mul vl] + ldr z3, [sp, #3, mul vl] + ldr z4, [sp, #4, mul vl] + ldr z5, [sp, #5, mul vl] + ldr z6, [sp, #6, mul vl] + ldr z7, [sp, #7, mul vl] + addvl sp, sp, #8 + + ldr lr, [sp, #88] + ldp x0, x1, [sp, #64] + ldp x2, x3, [sp, #48] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #16] + ldp x29, x8, [sp], #96 + cfi_def_cfa (sp, 0) + cfi_restore (lr) + cfi_restore (x29) + + /* Jump to the newly found address. */ + br ip0 + + cfi_endproc + .size _dl_runtime_resolve_sve, .-_dl_runtime_resolve_sve + +#ifndef PROF + .globl _dl_runtime_profile_sve + .type _dl_runtime_profile_sve, #function + .align 2 +_dl_runtime_profile_sve: + /* AArch64 we get called with: + ip0 &PLTGOT[2] + ip1 temp(dl resolver entry point) + [sp, #8] lr + [sp, #0] &PLTGOT[n] + + Stack frame layout: + [x29, #...] lr + [x29, #...] &PLTGOT[n] + [x29, #96] La_aarch64_regs + [x29, #48] La_aarch64_retval + [x29, #40] frame size return from pltenter + [x29, #32] dl_profile_call saved x1 + [x29, #24] dl_profile_call saved x0 + [x29, #16] t1 + [x29, #0] x29, lr <- x29 + [x29, #-1, mul vl] full p[0-3] + [x29, #-2, mul vl] full z[0-8] <- sp + + ??? Extending the profiling hook for full SVE register export + is tricky given the variable register size. Perhaps the new + La_aarch64_regs should contain pointers to Z0 and P0, and + the current VL, and one infers the addresses from there. + + This one new form could be used for all, with AdvSIMD + devolving into VL=16 with no predicate registers. + + In the meantime, this function simply saves the contents of + the SVE registers, but only exposes the AdvSIMD portion to + the profile hooks. + */ + + cfi_startproc + cfi_adjust_cfa_offset(16) /* Incorporate PLT */ + cfi_rel_offset (lr, 8) + + stp x29, x8, [SP, #-SF_SIZE]! + cfi_adjust_cfa_offset (SF_SIZE) + cfi_rel_offset (x29, 0) + mov x29, sp + cfi_def_cfa_register (x29) + + /* Save La_aarch64_regs. */ + stp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0] + stp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] + stp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] + stp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] + stp d0, d1, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] + stp d2, d3, [X29, #OFFSET_RG+ DL_OFFSET_RG_D0 + 16*1] + stp d4, d5, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] + stp d6, d7, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] + + /* Re-save the full contents of the vector arguments. + + Note that PL = VL/8, so we can save all 4 predicates + in (less than) the space of one vector; this minimizes + the number of stack adjustments required, and gives a + predictable place for each register. + + Despite the unfortunate assembler mnemomics, the vector + stores do not overlap the preceeding prediate stores. */ + addvl sp, sp, #-9 + + str p0, [x29, #-1, mul vl] + str p1, [x29, #-2, mul vl] + str p2, [x29, #-3, mul vl] + str p3, [x29, #-4, mul vl] + + str z0, [x29, #-2, mul vl] + str z1, [x29, #-3, mul vl] + str z2, [x29, #-4, mul vl] + str z3, [x29, #-5, mul vl] + str z4, [x29, #-6, mul vl] + str z5, [x29, #-7, mul vl] + str z6, [x29, #-8, mul vl] + str z7, [x29, #-9, mul vl] + + add x0, x29, #SF_SIZE + 16 + ldr x1, [x29, #OFFSET_LR] + stp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_SP] + + /* Get pointer to linker struct. */ + ldr PTR_REG (0), [ip0, #-PTR_SIZE] + + /* Prepare to call _dl_profile_fixup(). */ + ldr x1, [x29, OFFSET_PLTGOTN] /* Recover &PLTGOT[n] */ + + sub x1, x1, ip0 + add x1, x1, x1, lsl #1 + lsl x1, x1, #3 + sub x1, x1, #(RELA_SIZE<<3) + lsr x1, x1, #3 + + stp x0, x1, [x29, #OFFSET_SAVED_CALL_X0] + + /* Set up extra args for _dl_profile_fixup */ + ldr x2, [x29, #OFFSET_LR] /* load saved LR */ + add x3, x29, #OFFSET_RG /* address of La_aarch64_reg */ + add x4, x29, #OFFSET_FS /* address of framesize */ + bl _dl_profile_fixup + + ldr ip0l, [x29, #OFFSET_FS] /* framesize == 0 */ + cmp ip0l, #0 + bge 1f + cfi_remember_state + + /* Save the return. */ + mov ip0, x0 + + /* Get arguments and return address back. */ + ldr p0, [x29, #-1, mul vl] + ldr p1, [x29, #-2, mul vl] + ldr p2, [x29, #-3, mul vl] + ldr p3, [x29, #-4, mul vl] + + ldr z0, [x29, #-2, mul vl] + ldr z1, [x29, #-3, mul vl] + ldr z2, [x29, #-4, mul vl] + ldr z3, [x29, #-5, mul vl] + ldr z4, [x29, #-6, mul vl] + ldr z5, [x29, #-7, mul vl] + ldr z6, [x29, #-8, mul vl] + ldr z7, [x29, #-9, mul vl] + + ldr lr, [x29, #OFFSET_LR] + ldp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0] + ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] + ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] + ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] + + mov sp, x29 + ldp x29, x8, [sp], SF_SIZE + 16 + cfi_def_cfa (sp, 0) + cfi_restore(x29) + cfi_restore(lr) + + /* Jump to the newly found address. */ + br ip0 + + cfi_restore_state + /* The new frame size is in ip0, extended for pointer size. */ +1: sub x1, sp, ip0 + and sp, x1, #0xfffffffffffffff0 + + str x0, [x29, #OFFSET_T1] + + mov x0, sp + add x1, x29, #SF_SIZE + 16 + mov x2, ip0 + bl memcpy + + ldr ip0, [x29, #OFFSET_T1] + + /* Reload the full arguments. */ + ldp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0] + ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] + ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] + ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] + ldr x8, [x29, 8] + + ldr p0, [x29, #-1, mul vl] + ldr p1, [x29, #-2, mul vl] + ldr p2, [x29, #-3, mul vl] + ldr p3, [x29, #-4, mul vl] + + ldr z0, [x29, #-2, mul vl] + ldr z1, [x29, #-3, mul vl] + ldr z2, [x29, #-4, mul vl] + ldr z3, [x29, #-5, mul vl] + ldr z4, [x29, #-6, mul vl] + ldr z5, [x29, #-7, mul vl] + ldr z6, [x29, #-8, mul vl] + ldr z7, [x29, #-9, mul vl] + + /* Call the function. */ + blr ip0 + + /* Store La_aarch64_retval, as if for the non-vector ABI. */ + stp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0] + stp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0] + stp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1] + + /* Store the full contents of the vector return. */ + str p0, [x29, #-1, mul vl] + str p1, [x29, #-2, mul vl] + str p2, [x29, #-3, mul vl] + str p3, [x29, #-4, mul vl] + + str z0, [x29, #-2, mul vl] + str z1, [x29, #-3, mul vl] + str z2, [x29, #-4, mul vl] + str z3, [x29, #-5, mul vl] + str z4, [x29, #-6, mul vl] + str z5, [x29, #-7, mul vl] + str z6, [x29, #-8, mul vl] + str z7, [x29, #-9, mul vl] + + /* Setup call to pltexit */ + ldp x0, x1, [x29, #OFFSET_SAVED_CALL_X0] + add x2, x29, #OFFSET_RG + add x3, x29, #OFFSET_RV + bl _dl_call_pltexit + + /* Reload the full return value. */ + ldp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0] + + ldr p0, [x29, #-1, mul vl] + ldr p1, [x29, #-2, mul vl] + ldr p2, [x29, #-3, mul vl] + ldr p3, [x29, #-4, mul vl] + + ldr z0, [x29, #-2, mul vl] + ldr z1, [x29, #-3, mul vl] + ldr z2, [x29, #-4, mul vl] + ldr z3, [x29, #-5, mul vl] + ldr z4, [x29, #-6, mul vl] + ldr z5, [x29, #-7, mul vl] + ldr z6, [x29, #-8, mul vl] + ldr z7, [x29, #-9, mul vl] + + /* LR from within La_aarch64_reg */ + ldr lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR] + mov sp, x29 + cfi_def_cfa_register (sp) + ldr x29, [x29, #0] + add sp, sp, SF_SIZE + 16 + cfi_adjust_cfa_offset (- SF_SIZE - 16) + cfi_restore(x29) + cfi_restore(lr) + + br lr + + cfi_endproc + .size _dl_runtime_profile_sve, .-_dl_runtime_profile_sve +#endif