From patchwork Fri Oct 6 21:15:53 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Adhemerval Zanella X-Patchwork-Id: 115134 Delivered-To: patch@linaro.org Received: by 10.140.22.163 with SMTP id 32csp2246022qgn; Fri, 6 Oct 2017 14:16:20 -0700 (PDT) X-Received: by 10.101.66.135 with SMTP id j7mr3025678pgp.39.1507324580233; Fri, 06 Oct 2017 14:16:20 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1507324580; cv=none; d=google.com; s=arc-20160816; b=oU1bJeiLlOFQ3L5ZfMWZp7T2Ie21Du6bUALicdrM3pXFH8ZaQvhygn03C1MGV7nMM+ rBlnP1eO+3zw7MTGALrPjIL6ASE7f0kmn7f6qAiFlrF3w7K4CqRKgnI0LKVdCCUtQWo7 kutEYKoH9/a5t1xAf8gNNwUUGg7x1OtCHTTmlQXggFOx3V7Jq3auvccbYcheLgUobiL3 XVb9Fc7UbezfFHB6GZaqkDZEvI6QHItJ6F42MVUpj6sP/jUdlr/HACSZahpSyVXVg37R Ny3YlDEsbuRdTmwPTY7r83tr6CqTMmzVdsAL1rCu2iUEAzVEWmRnCViocufg9wFLc+7F PNCA== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=message-id:date:subject:to:from:delivered-to:sender:list-help :list-post:list-archive:list-subscribe:list-unsubscribe:list-id :precedence:mailing-list:dkim-signature:domainkey-signature :arc-authentication-results; bh=n+TYLXByY86q5jTcC5azLn9RlIZ/oVVyhEaQShuojEo=; b=LNzp6Y2U6Wz2Pg4eteQACjM20KfDTmpNiihs3AuFI5sJ1GFVE1bfp84zW/GcuHNVBp Ux7zzG8q2/42jVMbAJ4ANZ3rq/vV+4Dk3tALiiYKh8qGa50YMis1e+NEXK4PLebEalEr eOhOrxem9lD1HhNNeLzk7IoJBK6snqz+/bLUE/gPbi4XqAhAFH75QFpIQBmO73iMvSVy HD6WxDW9dywvkCRcUOlkWfHuGPbzOoEGsBhAZem9Ib6sSIXScc/rxUBgbsV69HTNA0pI jrcV8wDaJa/aOBewIDC1LZxhVlCsau6rHdwETmiro6LKkQ3mJdGz2CwB5t10D5I7GFxq b05A== ARC-Authentication-Results: i=1; mx.google.com; dkim=pass header.i=@sourceware.org header.s=default header.b=I7BBHiR1; spf=pass (google.com: domain of libc-alpha-return-85522-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) smtp.mailfrom=libc-alpha-return-85522-patch=linaro.org@sourceware.org; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=linaro.org Return-Path: Received: from sourceware.org (server1.sourceware.org. [209.132.180.131]) by mx.google.com with ESMTPS id f35si1870327plh.822.2017.10.06.14.16.19 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Fri, 06 Oct 2017 14:16:20 -0700 (PDT) Received-SPF: pass (google.com: domain of libc-alpha-return-85522-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; Authentication-Results: mx.google.com; dkim=pass header.i=@sourceware.org header.s=default header.b=I7BBHiR1; spf=pass (google.com: domain of libc-alpha-return-85522-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) smtp.mailfrom=libc-alpha-return-85522-patch=linaro.org@sourceware.org; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=linaro.org DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id; q=dns; s= default; b=qKLcw6dIxYCxrPzqqTAumld2i4ybXldvdm9E7rJ4/UzVtJvT0xP6H 1qBwIcI+X36Xeg8zQsmtia6TTm3mmBqvc1xpDFA5agJgvl5QfNl+Z3RSMFGcZKl4 420aOJPI/Q0Mj+ZywaxV39OWvRH4mmFkeHvPlYSd5dW/BQ1iT56/ss= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id; s=default; bh=++2rjELAZxhvGyYnEO0bEpQITMs=; b=I7BBHiR1wiGSwsiLG6RfvdnspGKH 6ntNwfRD6FfnoBky0xim/0GAXSBMeFLKge0wiOWrXfAIAUQyBozMAEQNsF07OP2E jeN8BNTKluuMJ0r2CRJ52HP3CelAz98T/KTE/kjPuReo3lJJevCN/v+85IHv0xGb qxuNxoNLkyznTgU= Received: (qmail 12255 invoked by alias); 6 Oct 2017 21:16:07 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 12241 invoked by uid 89); 6 Oct 2017 21:16:06 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-26.3 required=5.0 tests=BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_STOCKGEN, RCVD_IN_DNSWL_NONE, RCVD_IN_SORBS_SPAM, SPF_PASS autolearn=ham version=3.3.2 spammy=H*MI:2612 X-HELO: mail-qt0-f177.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:subject:date:message-id; bh=n+TYLXByY86q5jTcC5azLn9RlIZ/oVVyhEaQShuojEo=; b=L2622PLKEy2A6FvGGjHMxoEVogayYJjoBF2avjDit/PVyPz+0Obxj42Oj2qmcluZgs RPV4FdzAN+IZTgR3/rltT8x5HVhFlwzoyDwh2649JRxJc6y6CcpEY+kwuwkCiodOHvf0 WcBPl2o4P53YYCkoOCbgOEu/Mzobeo+Uc8X3Tjc9RmaUReD2oBeL1vCOlqZkfuTKjJE9 s+5aI+stdN8phO28DDAGudBFgpw62mrK81YxRAnUpcBNdOyKLxniVKGdTD6PkW3PoYak mMBAjabk1I9AOBmcl7FlX6TMhyfKEQnP3ZdXCLeUzP4vpJoEpXmpScn2bYN/M25LiANl SlzQ== X-Gm-Message-State: AMCzsaUtrh2dpvod1RgNo6WAaO9oUNYre5NgedGOtfdAxgjuVPXx0rQZ G7qAqnWo/E6XokLoK9QeJrWNCvByDnA= X-Google-Smtp-Source: AOwi7QAiH1Q9r8EEkQILPBvKpx1JAuknOYrcrGBniKZNEoHSTSMQE6mtxSE4GS0Sqkj0BT0u5gh/tA== X-Received: by 10.200.15.83 with SMTP id l19mr4833297qtk.168.1507324561628; Fri, 06 Oct 2017 14:16:01 -0700 (PDT) From: Adhemerval Zanella To: libc-alpha@sourceware.org Subject: [PATCH 1/2] arm: Implement memcpy ifunc selection in C Date: Fri, 6 Oct 2017 18:15:53 -0300 Message-Id: <1507324554-2612-1-git-send-email-adhemerval.zanella@linaro.org> This patch refactor ARM memcpy ifunc selector to a C implementation. No functional change is expected, including ifunc resolution rules. To avoid build issues with mainline GCC which set no default gnu indirect function support for ARM (--enable-gnu-indirect-function) two fixes are added: 1. The new macro arm_libc_ifunc_hidden_def is redefined using direct asm assembly directives instead of function attributes. This avoid the incompatbile types for a symbol and its alias. 2. A new macro NO_MEM_INTERAL_SYM_HACKS is added on symbol-hacks and defined on arm ifunc objects. It avoids the symbol definition loop because compiler might emit the hacks for first fix before the ifunc function definition itself. Checked on armv7-linux-gnueabihf and with a build for arm-linux-gnueabi, arm-linux-gnueabihf with and without multiarch support and with both GCC 7.1 and GCC mainline. * sysdeps/arm/arm-ifunc.h: New file. * sysdeps/arm/armv7/multiarch/ifunc-memcpy.h: Likewise. * sysdeps/arm/armv7/multiarch/memcpy.c: Likewise. * sysdeps/arm/armv7/multiarch/memcpy_arm.S: Likewise. * sysdeps/arm/armv7/multiarch/rtld-memcpy.S: Likewise. * sysdeps/arm/armv7/multiarch/Makefile [$(subdir) = string] (sysdep_routines): Add memcpy_arm. (CFLAGS-.os): New rule. * sysdeps/arm/armv7/multiarch/memcpy.S: Remove file. * sysdeps/generic/symbol-hacks.h: Use mem* alias hacks if not defined NO_MEM_INTERAL_SYM_HACKS. --- ChangeLog | 12 +++++ sysdeps/arm/arm-ifunc.h | 47 ++++++++++++++++++ sysdeps/arm/armv7/multiarch/Makefile | 8 +++- sysdeps/arm/armv7/multiarch/ifunc-memcpy.h | 37 +++++++++++++++ sysdeps/arm/armv7/multiarch/memcpy.S | 76 ------------------------------ sysdeps/arm/armv7/multiarch/memcpy.c | 33 +++++++++++++ sysdeps/arm/armv7/multiarch/memcpy_arm.S | 6 +++ sysdeps/arm/armv7/multiarch/rtld-memcpy.S | 1 + sysdeps/generic/symbol-hacks.h | 3 +- 9 files changed, 145 insertions(+), 78 deletions(-) create mode 100644 sysdeps/arm/arm-ifunc.h create mode 100644 sysdeps/arm/armv7/multiarch/ifunc-memcpy.h delete mode 100644 sysdeps/arm/armv7/multiarch/memcpy.S create mode 100644 sysdeps/arm/armv7/multiarch/memcpy.c create mode 100644 sysdeps/arm/armv7/multiarch/memcpy_arm.S create mode 100644 sysdeps/arm/armv7/multiarch/rtld-memcpy.S -- 2.7.4 diff --git a/sysdeps/arm/arm-ifunc.h b/sysdeps/arm/arm-ifunc.h new file mode 100644 index 0000000..f7d3473 --- /dev/null +++ b/sysdeps/arm/arm-ifunc.h @@ -0,0 +1,47 @@ +/* Common definition for ifunc resolvers. Linux/ARM version. + This file is part of the GNU C Library. + Copyright (C) 2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +#define INIT_ARCH() + +#define arm_libc_ifunc_redirected(redirected_name, name, expr) \ + __ifunc (redirected_name, name, expr(hwcap), int hwcap, INIT_ARCH) + +#if defined SHARED +# ifdef HAVE_GCC_IFUNC +# define arm_libc_ifunc_hidden_def(redirect_name, name) \ + __hidden_ver1 (name, __GI_##name, redirect_name) \ + __attribute__ ((visibility ("hidden"))) +# else +/* GCC 8 without support for ifunc issues an error when trying to + alias symbols with different prototypes (in this case the redirect + one which have the same as the function implementation and the + ifunc resolver). The macro below avoid it by issuing the required + alias directly. */ +# define arm_libc_ifunc_hidden_def(redirect_name, name) \ + _arm_ifunc_hidden_ver (__GI_##name, name) +# define _arm_ifunc_hidden_ver(local, name) \ + asm (".globl " #local ";" \ + ".hidden " #local ";" \ + #local "=" #name) +# endif +#else +# define arm_libc_ifunc_hidden_def(redirect_name, name) +#endif diff --git a/sysdeps/arm/armv7/multiarch/Makefile b/sysdeps/arm/armv7/multiarch/Makefile index 9e1e61c..24c5e5a 100644 --- a/sysdeps/arm/armv7/multiarch/Makefile +++ b/sysdeps/arm/armv7/multiarch/Makefile @@ -1,3 +1,9 @@ ifeq ($(subdir),string) -sysdep_routines += memcpy_neon memcpy_vfp memchr_neon +sysdep_routines += memcpy_neon memcpy_vfp memchr_neon memcpy_arm + +# For ifunc resolvers compiler might place the alias from generic +# symbol-hacks.h before the resolver implementation itself. This +# causes definition loops and the macro below suppress the alias +# definition. +CFLAGS-.os += -DNO_MEM_INTERAL_SYM_HACKS endif diff --git a/sysdeps/arm/armv7/multiarch/ifunc-memcpy.h b/sysdeps/arm/armv7/multiarch/ifunc-memcpy.h new file mode 100644 index 0000000..78cef2a --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/ifunc-memcpy.h @@ -0,0 +1,37 @@ +/* Common definition for memcpy resolver. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef __SOFTFP__ +__typeof (REDIRECT_NAME) OPTIMIZE (arm) attribute_hidden; +#endif +__typeof (REDIRECT_NAME) OPTIMIZE (vfp) attribute_hidden; +__typeof (REDIRECT_NAME) OPTIMIZE (neon) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (int hwcap) +{ + if (hwcap & HWCAP_ARM_NEON) + return OPTIMIZE (neon); +#ifdef __SOFTFP__ + if (hwcap & HWCAP_ARM_VFP) + return OPTIMIZE (vfp); + return OPTIMIZE (arm); +#else + return OPTIMIZE (vfp); +#endif +} diff --git a/sysdeps/arm/armv7/multiarch/memcpy.S b/sysdeps/arm/armv7/multiarch/memcpy.S deleted file mode 100644 index 8a53bda..0000000 --- a/sysdeps/arm/armv7/multiarch/memcpy.S +++ /dev/null @@ -1,76 +0,0 @@ -/* Multiple versions of memcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2013-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -/* Thumb requires excess IT instructions here. */ -#define NO_THUMB -#include -#include - -#if IS_IN (libc) -/* Under __ARM_NEON__, memcpy_neon.S defines the name memcpy. */ -# ifndef __ARM_NEON__ - .text -ENTRY(memcpy) - .type memcpy, %gnu_indirect_function -# ifdef __SOFTFP__ - ldr r1, .Lmemcpy_arm - tst r0, #HWCAP_ARM_VFP - ldrne r1, .Lmemcpy_vfp -# else - ldr r1, .Lmemcpy_vfp -# endif - tst r0, #HWCAP_ARM_NEON - ldrne r1, .Lmemcpy_neon -1: - add r0, r1, pc - DO_RET(lr) - -# ifdef __SOFTFP__ -.Lmemcpy_arm: - .long C_SYMBOL_NAME(__memcpy_arm) - 1b - PC_OFS -# endif -.Lmemcpy_neon: - .long C_SYMBOL_NAME(__memcpy_neon) - 1b - PC_OFS -.Lmemcpy_vfp: - .long C_SYMBOL_NAME(__memcpy_vfp) - 1b - PC_OFS - -END(memcpy) - -libc_hidden_builtin_def (memcpy) -#endif /* Not __ARM_NEON__. */ - -/* These versions of memcpy are defined not to clobber any VFP or NEON - registers so they must always call the ARM variant of the memcpy code. */ -strong_alias (__memcpy_arm, __aeabi_memcpy) -strong_alias (__memcpy_arm, __aeabi_memcpy4) -strong_alias (__memcpy_arm, __aeabi_memcpy8) -libc_hidden_def (__memcpy_arm) - -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(name) -#undef weak_alias -#define weak_alias(x, y) -#undef libc_hidden_def -#define libc_hidden_def(name) - -#define memcpy __memcpy_arm - -#endif - -#include "memcpy_impl.S" diff --git a/sysdeps/arm/armv7/multiarch/memcpy.c b/sysdeps/arm/armv7/multiarch/memcpy.c new file mode 100644 index 0000000..7ef6714 --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/memcpy.c @@ -0,0 +1,33 @@ +/* Multiple versions of memcpy. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) +# define memcpy __redirect_memcpy +# include +# undef memcpy + +# include + +# define SYMBOL_NAME memcpy +# include "ifunc-memcpy.h" + +arm_libc_ifunc_redirected (__redirect_memcpy, memcpy, IFUNC_SELECTOR); + +arm_libc_ifunc_hidden_def (__redirect_memcpy, memcpy); +#endif diff --git a/sysdeps/arm/armv7/multiarch/memcpy_arm.S b/sysdeps/arm/armv7/multiarch/memcpy_arm.S new file mode 100644 index 0000000..37565cd --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/memcpy_arm.S @@ -0,0 +1,6 @@ +#define memcpy __memcpy_arm +#include "memcpy_impl.S" + +strong_alias (__memcpy_arm, __aeabi_memcpy) +strong_alias (__memcpy_arm, __aeabi_memcpy4) +strong_alias (__memcpy_arm, __aeabi_memcpy8) diff --git a/sysdeps/arm/armv7/multiarch/rtld-memcpy.S b/sysdeps/arm/armv7/multiarch/rtld-memcpy.S new file mode 100644 index 0000000..0190edc --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/rtld-memcpy.S @@ -0,0 +1 @@ +#include <./sysdeps/arm/armv7/multiarch/memcpy_impl.S> diff --git a/sysdeps/generic/symbol-hacks.h b/sysdeps/generic/symbol-hacks.h index d614c09..589585a 100644 --- a/sysdeps/generic/symbol-hacks.h +++ b/sysdeps/generic/symbol-hacks.h @@ -1,6 +1,7 @@ /* Some compiler optimizations may transform loops into memset/memmove calls and without proper declaration it may generate PLT calls. */ -#if !defined __ASSEMBLER__ && IS_IN (libc) && defined SHARED +#if !defined __ASSEMBLER__ && IS_IN (libc) && defined SHARED \ + && !defined(NO_MEM_INTERAL_SYM_HACKS) asm ("memmove = __GI_memmove"); asm ("memset = __GI_memset"); asm ("memcpy = __GI_memcpy"); From patchwork Fri Oct 6 21:15:54 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Adhemerval Zanella X-Patchwork-Id: 115135 Delivered-To: patch@linaro.org Received: by 10.140.22.163 with SMTP id 32csp2246174qgn; Fri, 6 Oct 2017 14:16:30 -0700 (PDT) X-Received: by 10.98.10.21 with SMTP id s21mr3356203pfi.119.1507324590751; Fri, 06 Oct 2017 14:16:30 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1507324590; cv=none; d=google.com; s=arc-20160816; b=BRikCpjaLkvxPaf1R2LFo1ueNsKDf/DhIT+HlriFw3gjJmboGFJFYDNkEjHV2rFpy4 8jlbW0So6wMm94mrl08GOAC50oxKZE2ukzx8qC0urEqeSVwgQ8xkIjAIGiG4z2xH8WIj fNGMGpLNbF9lW27khSp3HKS7a8ObavX7iYzU8apFX1tsslPhTpAe5j8S9q8QOGX7TbBh QkLOFUEUlVojMKSOt6bL6lWgK33FYZEPoHo4T8OyGI7It/ifHRmnlU1R2+MhjgvdOIZO aG62+jzwUQIFWEQ72zPvf26u/S/LirNRVmVlqfmWOxmmQdPa1Hp8X/1FZdHSacb1pnsO tRXA== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=references:in-reply-to:message-id:date:subject:to:from:delivered-to :sender:list-help:list-post:list-archive:list-subscribe :list-unsubscribe:list-id:precedence:mailing-list:dkim-signature :domainkey-signature:arc-authentication-results; bh=rBb2UOLiYNGNbNHF3E6HND6KedyLSb/iw33CH5TQ2Vo=; b=Rh1zh3cfU5cVY3jwC2NJwFh3Nivz1WvuVkiBtd4WoK/Zdk7RX0Y7XldoExXSri8KtY QkhO8Y38ZqnRWpXNY1amhX9tWbsrvGr2LdG0Sv7aZ7h7oJ1xfDmO2mtUGgbZQijCTYxP fZ7GbBmjFzsmFxoGFDzBNZXISDg2ujR2jDsjXxZE9xjp4/Qxh7F0/bDacSse0MC9KkPy PlmkA28NKvpSeqAhUVQ0QlCc0gx4TyqMPdw24FV1PAbnZEBjfzEAdcP3UInx1qDwobXL aHd/sJkcx1UYD8j99mkBtMclbSWgjYQH8fo9ZkDg42WfpgC8qZXk40UFrI61H5X8DLYf 4vbw== ARC-Authentication-Results: i=1; mx.google.com; dkim=pass header.i=@sourceware.org header.s=default header.b=dnc0moSK; spf=pass (google.com: domain of libc-alpha-return-85523-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) smtp.mailfrom=libc-alpha-return-85523-patch=linaro.org@sourceware.org; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=linaro.org Return-Path: Received: from sourceware.org (server1.sourceware.org. [209.132.180.131]) by mx.google.com with ESMTPS id c77si1933727pfd.233.2017.10.06.14.16.30 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Fri, 06 Oct 2017 14:16:30 -0700 (PDT) Received-SPF: pass (google.com: domain of libc-alpha-return-85523-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; Authentication-Results: mx.google.com; dkim=pass header.i=@sourceware.org header.s=default header.b=dnc0moSK; spf=pass (google.com: domain of libc-alpha-return-85523-patch=linaro.org@sourceware.org designates 209.132.180.131 as permitted sender) smtp.mailfrom=libc-alpha-return-85523-patch=linaro.org@sourceware.org; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=linaro.org DomainKey-Signature: a=rsa-sha1; c=nofws; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:in-reply-to :references; q=dns; s=default; b=qQuD+Dh5cT6MGonag1IG2henmM2EhA5 1ZDcZo0O7lcPwHqDJ/fafxIKBvG596b73rwEjkK2cNnqB/ZI8rlahGKGSG6gum8x aRyIWaDTXxHIwxhSUVcZndBr2Jm5pNSHnP/T1KlcNRrBqnt46p8lhfK4jBF/mstI c8vSoPqNeeCc= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=sourceware.org; h=list-id :list-unsubscribe:list-subscribe:list-archive:list-post :list-help:sender:from:to:subject:date:message-id:in-reply-to :references; s=default; bh=qCRgIQYlmBWD2TyZ+2DXpuiYsiQ=; b=dnc0m oSKAFXNiltPdmPb40nud0MZ2HoFqB+yJIvUaCz+1VY7VcBbe/fh+D6Uz1jG0A3mc rbGiHcqtDioVRc1p63SUiMM4I4ybZ/zcdTW1/0Na0uAoVCvWu1p7716YO2O/Kd+8 9/7LFLaDl5w5alherj6Y4IBllM5h4CCIzTbsjU= Received: (qmail 12547 invoked by alias); 6 Oct 2017 21:16:09 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 12490 invoked by uid 89); 6 Oct 2017 21:16:09 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-26.3 required=5.0 tests=BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_STOCKGEN, RCVD_IN_DNSWL_NONE, RCVD_IN_SORBS_SPAM, SPF_PASS autolearn=ham version=3.3.2 spammy= X-HELO: mail-qt0-f169.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:subject:date:message-id:in-reply-to :references; bh=rBb2UOLiYNGNbNHF3E6HND6KedyLSb/iw33CH5TQ2Vo=; b=odca3xqlk2s0F2y6kSctLrv+5GFrEPPXEpqHntSn44ifz2SbcODWqi+Du4kRBoaJs5 MBiZ6X3dSAOMTGWie6cZMgQxzo/IO+iVFzxAY5cXIe+lKdKegFzzhY3p4a7iXVrCsfEI tC2c+kPDP28H4mKSWJfYJdG8lutI+OhAgD9XXkXkd8MbS/MY4s8kUUmyjnQCd9LFtkF/ SvExDuV8o24YlaWekOBFQD18w79Rw5xo1B/gGT6Wl8bv4N1KxiDs3COS9i7majW/Std5 m8miXOo5uAW5g+gB6ejyY6XutRIwq6pZhv7glQv9c2SlM0ogCOIg2v1OupJ92m6vkJO5 LnvQ== X-Gm-Message-State: AMCzsaUUo3KTnM2oRfnUawMy8iFrH4fCcDcPYx3PVzzKxjcvEpt66fGp VUfqKLISzeB8p7fqmOUD6tbnLhQNqqE= X-Google-Smtp-Source: AOwi7QCwX6p5oxRFZrjeXBBXPYVyc9ZOmx+p3wss8/Gmk9v3FkPqmKg8LDgGuwGBTz5ZZZ/nXrlF1g== X-Received: by 10.237.61.49 with SMTP id g46mr5014383qtf.233.1507324563177; Fri, 06 Oct 2017 14:16:03 -0700 (PDT) From: Adhemerval Zanella To: libc-alpha@sourceware.org Subject: [PATCH 2/2] arm: Implement memcpy ifunc selection in C Date: Fri, 6 Oct 2017 18:15:54 -0300 Message-Id: <1507324554-2612-2-git-send-email-adhemerval.zanella@linaro.org> In-Reply-To: <1507324554-2612-1-git-send-email-adhemerval.zanella@linaro.org> References: <1507324554-2612-1-git-send-email-adhemerval.zanella@linaro.org> This patch refactor ARM memcpy ifunc selector to a C implementation. No functional change is expected, including ifunc resolution rules. It also reorganize the ifunc options code: 1. The memchr_impl.S is renamed to memchr_neon.S and multiple compilation options (which route to armv6t2/memchr one) is removed. The code to build if __ARM_NEON__ is defined is also simplified. 2. A memchr_noneon is added (which as build along previous ifunc resolution) and includes the armv6t2 direct. 3. Same as 2. for loader object. Checked on armv7-linux-gnueabihf and with a build for arm-linux-gnueabi, arm-linux-gnueabihf with and without multiarch support and with both GCC 7.1 and GCC mainline. * sysdeps/arm/armv7/multiarch/Makefile [$(subdir) = string] (sysdeps_routines): Add memchr_noneon. * sysdeps/arm/armv7/multiarch/ifunc-memchr.h: New file. * sysdeps/arm/armv7/multiarch/memchr_noneon.S: Likewise. * sysdeps/arm/armv7/multiarch/rtld-memchr.S: Likewise. * sysdeps/arm/armv7/multiarch/memchr.S: Remove file. * sysdeps/arm/armv7/multiarch/memchr.c: New file. * sysdeps/arm/armv7/multiarch/memchr_impl.S: Move to ... * sysdeps/arm/armv7/multiarch/memchr_neon.S: ... here. --- ChangeLog | 10 ++ sysdeps/arm/armv7/multiarch/Makefile | 3 +- sysdeps/arm/armv7/multiarch/ifunc-memchr.h | 28 ++++ sysdeps/arm/armv7/multiarch/memchr.S | 59 -------- sysdeps/arm/armv7/multiarch/memchr.c | 35 +++++ sysdeps/arm/armv7/multiarch/memchr_impl.S | 219 --------------------------- sysdeps/arm/armv7/multiarch/memchr_neon.S | 221 +++++++++++++++++++++++++++- sysdeps/arm/armv7/multiarch/memchr_noneon.S | 8 + sysdeps/arm/armv7/multiarch/rtld-memchr.S | 1 + 9 files changed, 299 insertions(+), 285 deletions(-) create mode 100644 sysdeps/arm/armv7/multiarch/ifunc-memchr.h delete mode 100644 sysdeps/arm/armv7/multiarch/memchr.S create mode 100644 sysdeps/arm/armv7/multiarch/memchr.c delete mode 100644 sysdeps/arm/armv7/multiarch/memchr_impl.S create mode 100644 sysdeps/arm/armv7/multiarch/memchr_noneon.S create mode 100644 sysdeps/arm/armv7/multiarch/rtld-memchr.S -- 2.7.4 diff --git a/sysdeps/arm/armv7/multiarch/Makefile b/sysdeps/arm/armv7/multiarch/Makefile index 24c5e5a..ccca96e 100644 --- a/sysdeps/arm/armv7/multiarch/Makefile +++ b/sysdeps/arm/armv7/multiarch/Makefile @@ -1,5 +1,6 @@ ifeq ($(subdir),string) -sysdep_routines += memcpy_neon memcpy_vfp memchr_neon memcpy_arm +sysdep_routines += memcpy_neon memcpy_vfp memchr_neon memcpy_arm \ + memchr_noneon # For ifunc resolvers compiler might place the alias from generic # symbol-hacks.h before the resolver implementation itself. This diff --git a/sysdeps/arm/armv7/multiarch/ifunc-memchr.h b/sysdeps/arm/armv7/multiarch/ifunc-memchr.h new file mode 100644 index 0000000..42f89fa --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/ifunc-memchr.h @@ -0,0 +1,28 @@ +/* Common definition for memchr resolver. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +__typeof (REDIRECT_NAME) OPTIMIZE (neon) attribute_hidden; +__typeof (REDIRECT_NAME) OPTIMIZE (noneon) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (int hwcap) +{ + if (hwcap & HWCAP_ARM_NEON) + return OPTIMIZE (neon); + return OPTIMIZE (noneon); +} diff --git a/sysdeps/arm/armv7/multiarch/memchr.S b/sysdeps/arm/armv7/multiarch/memchr.S deleted file mode 100644 index 8e8097a..0000000 --- a/sysdeps/arm/armv7/multiarch/memchr.S +++ /dev/null @@ -1,59 +0,0 @@ -/* Multiple versions of memchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2013-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -#if IS_IN (libc) -/* Under __ARM_NEON__, memchr_neon.S defines the name memchr. */ -# ifndef __ARM_NEON__ - .text - .arm -ENTRY(memchr) - .type memchr, %gnu_indirect_function - ldr r1, .Lmemchr_noneon - tst r0, #HWCAP_ARM_NEON - ldrne r1, .Lmemchr_neon -1: - add r0, r1, pc - DO_RET(lr) - -.Lmemchr_noneon: - .long C_SYMBOL_NAME(__memchr_noneon) - 1b - 8 -.Lmemchr_neon: - .long C_SYMBOL_NAME(__memchr_neon) - 1b - 8 - -END(memchr) - -libc_hidden_builtin_def (memchr) -# endif /* Not __ARM_NEON__. */ -libc_hidden_def (__memchr_noneon) - -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) -# undef weak_alias -# define weak_alias(x, y) -# undef libc_hidden_def -# define libc_hidden_def(name) - -# define memchr __memchr_noneon - -#endif - -#include "memchr_impl.S" diff --git a/sysdeps/arm/armv7/multiarch/memchr.c b/sysdeps/arm/armv7/multiarch/memchr.c new file mode 100644 index 0000000..906bcd5 --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/memchr.c @@ -0,0 +1,35 @@ +/* Multiple versions of memchr. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* For __ARM_NEON__ memchr_neon.S defines memchr directly and ifunc + is not used. */ +#if IS_IN (libc) && !defined (__ARM_NEON__) +# define memchr __redirect_memchr +# include +# undef memchr + +# include + +# define SYMBOL_NAME memchr +# include "ifunc-memchr.h" + +arm_libc_ifunc_redirected (__redirect_memchr, memchr, IFUNC_SELECTOR); + +arm_libc_ifunc_hidden_def (__redirect_memchr, memchr); +#endif diff --git a/sysdeps/arm/armv7/multiarch/memchr_impl.S b/sysdeps/arm/armv7/multiarch/memchr_impl.S deleted file mode 100644 index e8cbb97..0000000 --- a/sysdeps/arm/armv7/multiarch/memchr_impl.S +++ /dev/null @@ -1,219 +0,0 @@ -/* memchr implemented using NEON. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - . */ - -#ifdef MEMCHR_NEON - -#include - - .arch armv7-a - .fpu neon - - -/* Arguments */ -#define srcin r0 -#define chrin r1 -#define cntin r2 - -/* Retval */ -#define result r0 /* Live range does not overlap with srcin */ - -/* Working registers */ -#define src r1 /* Live range does not overlap with chrin */ -#define tmp r3 -#define synd r0 /* No overlap with srcin or result */ -#define soff r12 - -/* Working NEON registers */ -#define vrepchr q0 -#define vdata0 q1 -#define vdata0_0 d2 /* Lower half of vdata0 */ -#define vdata0_1 d3 /* Upper half of vdata0 */ -#define vdata1 q2 -#define vdata1_0 d4 /* Lower half of vhas_chr0 */ -#define vdata1_1 d5 /* Upper half of vhas_chr0 */ -#define vrepmask q3 -#define vrepmask0 d6 -#define vrepmask1 d7 -#define vend q4 -#define vend0 d8 -#define vend1 d9 - -/* - * Core algorithm: - * - * For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per - * byte. Each bit is set if the relevant byte matched the requested character - * and cleared otherwise. Since the bits in the syndrome reflect exactly the - * order in which things occur in the original string, counting trailing zeros - * allows to identify exactly which byte has matched. - */ - -#ifndef NO_THUMB - .thumb_func -#else - .arm -#endif - .p2align 4,,15 - -ENTRY(memchr) - /* Use a simple loop if there are less than 8 bytes to search. */ - cmp cntin, #7 - bhi .Llargestr - and chrin, chrin, #0xff - -.Lsmallstr: - subs cntin, cntin, #1 - blo .Lnotfound /* Return not found if reached end. */ - ldrb tmp, [srcin], #1 - cmp tmp, chrin - bne .Lsmallstr /* Loop again if not found. */ - /* Otherwise fixup address and return. */ - sub result, srcin, #1 - bx lr - - -.Llargestr: - vdup.8 vrepchr, chrin /* Duplicate char across all lanes. */ - /* - * Magic constant 0x8040201008040201 allows us to identify which lane - * matches the requested byte. - */ - movw tmp, #0x0201 - movt tmp, #0x0804 - lsl soff, tmp, #4 - vmov vrepmask0, tmp, soff - vmov vrepmask1, tmp, soff - /* Work with aligned 32-byte chunks */ - bic src, srcin, #31 - ands soff, srcin, #31 - beq .Lloopintro /* Go straight to main loop if it's aligned. */ - - /* - * Input string is not 32-byte aligned. We calculate the syndrome - * value for the aligned 32 bytes block containing the first bytes - * and mask the irrelevant part. - */ - vld1.8 {vdata0, vdata1}, [src:256]! - sub tmp, soff, #32 - adds cntin, cntin, tmp - vceq.i8 vdata0, vdata0, vrepchr - vceq.i8 vdata1, vdata1, vrepchr - vand vdata0, vdata0, vrepmask - vand vdata1, vdata1, vrepmask - vpadd.i8 vdata0_0, vdata0_0, vdata0_1 - vpadd.i8 vdata1_0, vdata1_0, vdata1_1 - vpadd.i8 vdata0_0, vdata0_0, vdata1_0 - vpadd.i8 vdata0_0, vdata0_0, vdata0_0 - vmov synd, vdata0_0[0] - - /* Clear the soff lower bits */ - lsr synd, synd, soff - lsl synd, synd, soff - /* The first block can also be the last */ - bls .Lmasklast - /* Have we found something already? */ -#ifndef NO_THUMB - cbnz synd, .Ltail -#else - cmp synd, #0 - bne .Ltail -#endif - - -.Lloopintro: - vpush {vend} - /* 264/265 correspond to d8/d9 for q4 */ - cfi_adjust_cfa_offset (16) - cfi_rel_offset (264, 0) - cfi_rel_offset (265, 8) - .p2align 3,,7 -.Lloop: - vld1.8 {vdata0, vdata1}, [src:256]! - subs cntin, cntin, #32 - vceq.i8 vdata0, vdata0, vrepchr - vceq.i8 vdata1, vdata1, vrepchr - /* If we're out of data we finish regardless of the result. */ - bls .Lend - /* Use a fast check for the termination condition. */ - vorr vend, vdata0, vdata1 - vorr vend0, vend0, vend1 - vmov synd, tmp, vend0 - orrs synd, synd, tmp - /* We're not out of data, loop if we haven't found the character. */ - beq .Lloop - -.Lend: - vpop {vend} - cfi_adjust_cfa_offset (-16) - cfi_restore (264) - cfi_restore (265) - - /* Termination condition found, let's calculate the syndrome value. */ - vand vdata0, vdata0, vrepmask - vand vdata1, vdata1, vrepmask - vpadd.i8 vdata0_0, vdata0_0, vdata0_1 - vpadd.i8 vdata1_0, vdata1_0, vdata1_1 - vpadd.i8 vdata0_0, vdata0_0, vdata1_0 - vpadd.i8 vdata0_0, vdata0_0, vdata0_0 - vmov synd, vdata0_0[0] -#ifndef NO_THUMB - cbz synd, .Lnotfound - bhi .Ltail /* Uses the condition code from - subs cntin, cntin, #32 above. */ -#else - cmp synd, #0 - beq .Lnotfound - cmp cntin, #0 - bhi .Ltail -#endif - - -.Lmasklast: - /* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */ - neg cntin, cntin - lsl synd, synd, cntin - lsrs synd, synd, cntin - it eq - moveq src, #0 /* If no match, set src to 0 so the retval is 0. */ - - -.Ltail: - /* Count the trailing zeros using bit reversing */ - rbit synd, synd - /* Compensate the last post-increment */ - sub src, src, #32 - /* Count the leading zeros */ - clz synd, synd - /* Compute the potential result and return */ - add result, src, synd - bx lr - - -.Lnotfound: - /* Set result to NULL if not found and return */ - mov result, #0 - bx lr - -END(memchr) -libc_hidden_builtin_def (memchr) - -#else - -#include "../../armv6t2/memchr.S" - -#endif diff --git a/sysdeps/arm/armv7/multiarch/memchr_neon.S b/sysdeps/arm/armv7/multiarch/memchr_neon.S index ee21818..a400033 100644 --- a/sysdeps/arm/armv7/multiarch/memchr_neon.S +++ b/sysdeps/arm/armv7/multiarch/memchr_neon.S @@ -1,9 +1,218 @@ -#ifdef __ARM_NEON__ -/* Under __ARM_NEON__, this file defines memchr directly. */ -libc_hidden_builtin_def (memchr) -#else +/* memchr implemented using NEON. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + +/* For __ARM_NEON__ this file defines memchr. */ +#ifndef __ARM_NEON__ # define memchr __memchr_neon +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(a) +#endif + + .arch armv7-a + .fpu neon + + +/* Arguments */ +#define srcin r0 +#define chrin r1 +#define cntin r2 + +/* Retval */ +#define result r0 /* Live range does not overlap with srcin */ + +/* Working registers */ +#define src r1 /* Live range does not overlap with chrin */ +#define tmp r3 +#define synd r0 /* No overlap with srcin or result */ +#define soff r12 + +/* Working NEON registers */ +#define vrepchr q0 +#define vdata0 q1 +#define vdata0_0 d2 /* Lower half of vdata0 */ +#define vdata0_1 d3 /* Upper half of vdata0 */ +#define vdata1 q2 +#define vdata1_0 d4 /* Lower half of vhas_chr0 */ +#define vdata1_1 d5 /* Upper half of vhas_chr0 */ +#define vrepmask q3 +#define vrepmask0 d6 +#define vrepmask1 d7 +#define vend q4 +#define vend0 d8 +#define vend1 d9 + +/* + * Core algorithm: + * + * For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per + * byte. Each bit is set if the relevant byte matched the requested character + * and cleared otherwise. Since the bits in the syndrome reflect exactly the + * order in which things occur in the original string, counting trailing zeros + * allows to identify exactly which byte has matched. + */ + +#ifndef NO_THUMB + .thumb_func +#else + .arm +#endif + .p2align 4,,15 + +ENTRY(memchr) + /* Use a simple loop if there are less than 8 bytes to search. */ + cmp cntin, #7 + bhi .Llargestr + and chrin, chrin, #0xff + +.Lsmallstr: + subs cntin, cntin, #1 + blo .Lnotfound /* Return not found if reached end. */ + ldrb tmp, [srcin], #1 + cmp tmp, chrin + bne .Lsmallstr /* Loop again if not found. */ + /* Otherwise fixup address and return. */ + sub result, srcin, #1 + bx lr + + +.Llargestr: + vdup.8 vrepchr, chrin /* Duplicate char across all lanes. */ + /* + * Magic constant 0x8040201008040201 allows us to identify which lane + * matches the requested byte. + */ + movw tmp, #0x0201 + movt tmp, #0x0804 + lsl soff, tmp, #4 + vmov vrepmask0, tmp, soff + vmov vrepmask1, tmp, soff + /* Work with aligned 32-byte chunks */ + bic src, srcin, #31 + ands soff, srcin, #31 + beq .Lloopintro /* Go straight to main loop if it's aligned. */ + + /* + * Input string is not 32-byte aligned. We calculate the syndrome + * value for the aligned 32 bytes block containing the first bytes + * and mask the irrelevant part. + */ + vld1.8 {vdata0, vdata1}, [src:256]! + sub tmp, soff, #32 + adds cntin, cntin, tmp + vceq.i8 vdata0, vdata0, vrepchr + vceq.i8 vdata1, vdata1, vrepchr + vand vdata0, vdata0, vrepmask + vand vdata1, vdata1, vrepmask + vpadd.i8 vdata0_0, vdata0_0, vdata0_1 + vpadd.i8 vdata1_0, vdata1_0, vdata1_1 + vpadd.i8 vdata0_0, vdata0_0, vdata1_0 + vpadd.i8 vdata0_0, vdata0_0, vdata0_0 + vmov synd, vdata0_0[0] + + /* Clear the soff lower bits */ + lsr synd, synd, soff + lsl synd, synd, soff + /* The first block can also be the last */ + bls .Lmasklast + /* Have we found something already? */ +#ifndef NO_THUMB + cbnz synd, .Ltail +#else + cmp synd, #0 + bne .Ltail #endif -#define MEMCHR_NEON -#include "memchr_impl.S" + +.Lloopintro: + vpush {vend} + /* 264/265 correspond to d8/d9 for q4 */ + cfi_adjust_cfa_offset (16) + cfi_rel_offset (264, 0) + cfi_rel_offset (265, 8) + .p2align 3,,7 +.Lloop: + vld1.8 {vdata0, vdata1}, [src:256]! + subs cntin, cntin, #32 + vceq.i8 vdata0, vdata0, vrepchr + vceq.i8 vdata1, vdata1, vrepchr + /* If we're out of data we finish regardless of the result. */ + bls .Lend + /* Use a fast check for the termination condition. */ + vorr vend, vdata0, vdata1 + vorr vend0, vend0, vend1 + vmov synd, tmp, vend0 + orrs synd, synd, tmp + /* We're not out of data, loop if we haven't found the character. */ + beq .Lloop + +.Lend: + vpop {vend} + cfi_adjust_cfa_offset (-16) + cfi_restore (264) + cfi_restore (265) + + /* Termination condition found, let's calculate the syndrome value. */ + vand vdata0, vdata0, vrepmask + vand vdata1, vdata1, vrepmask + vpadd.i8 vdata0_0, vdata0_0, vdata0_1 + vpadd.i8 vdata1_0, vdata1_0, vdata1_1 + vpadd.i8 vdata0_0, vdata0_0, vdata1_0 + vpadd.i8 vdata0_0, vdata0_0, vdata0_0 + vmov synd, vdata0_0[0] +#ifndef NO_THUMB + cbz synd, .Lnotfound + bhi .Ltail /* Uses the condition code from + subs cntin, cntin, #32 above. */ +#else + cmp synd, #0 + beq .Lnotfound + cmp cntin, #0 + bhi .Ltail +#endif + + +.Lmasklast: + /* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */ + neg cntin, cntin + lsl synd, synd, cntin + lsrs synd, synd, cntin + it eq + moveq src, #0 /* If no match, set src to 0 so the retval is 0. */ + + +.Ltail: + /* Count the trailing zeros using bit reversing */ + rbit synd, synd + /* Compensate the last post-increment */ + sub src, src, #32 + /* Count the leading zeros */ + clz synd, synd + /* Compute the potential result and return */ + add result, src, synd + bx lr + + +.Lnotfound: + /* Set result to NULL if not found and return */ + mov result, #0 + bx lr + +END(memchr) +libc_hidden_builtin_def (memchr) diff --git a/sysdeps/arm/armv7/multiarch/memchr_noneon.S b/sysdeps/arm/armv7/multiarch/memchr_noneon.S new file mode 100644 index 0000000..e13be13 --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/memchr_noneon.S @@ -0,0 +1,8 @@ +/* For __ARM_NEON__ memchr_neon defines memchr. */ +#ifndef __ARM_NEON__ +# define memchr __memchr_noneon +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) + +# include +#endif diff --git a/sysdeps/arm/armv7/multiarch/rtld-memchr.S b/sysdeps/arm/armv7/multiarch/rtld-memchr.S new file mode 100644 index 0000000..ae8e5f0 --- /dev/null +++ b/sysdeps/arm/armv7/multiarch/rtld-memchr.S @@ -0,0 +1 @@ +#include