@@ -1,3 +1,4 @@
ifeq ($(subdir),string)
-sysdep_routines += memcpy_neon memcpy_vfp memchr_neon memcpy_arm
+sysdep_routines += memcpy_neon memcpy_vfp memchr_neon memcpy_arm \
+ memchr_noneon
endif
new file mode 100644
@@ -0,0 +1,28 @@
+/* Common definition for memchr resolver.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+__typeof (REDIRECT_NAME) OPTIMIZE (neon) attribute_hidden;
+__typeof (REDIRECT_NAME) OPTIMIZE (noneon) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (int hwcap)
+{
+ if (hwcap & HWCAP_ARM_NEON)
+ return OPTIMIZE (neon);
+ return OPTIMIZE (noneon);
+}
deleted file mode 100644
@@ -1,59 +0,0 @@
-/* Multiple versions of memchr
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2013-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <rtld-global-offsets.h>
-
-#if IS_IN (libc)
-/* Under __ARM_NEON__, memchr_neon.S defines the name memchr. */
-# ifndef __ARM_NEON__
- .text
- .arm
-ENTRY(memchr)
- .type memchr, %gnu_indirect_function
- ldr r1, .Lmemchr_noneon
- tst r0, #HWCAP_ARM_NEON
- ldrne r1, .Lmemchr_neon
-1:
- add r0, r1, pc
- DO_RET(lr)
-
-.Lmemchr_noneon:
- .long C_SYMBOL_NAME(__memchr_noneon) - 1b - 8
-.Lmemchr_neon:
- .long C_SYMBOL_NAME(__memchr_neon) - 1b - 8
-
-END(memchr)
-
-libc_hidden_builtin_def (memchr)
-# endif /* Not __ARM_NEON__. */
-libc_hidden_def (__memchr_noneon)
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name)
-# undef weak_alias
-# define weak_alias(x, y)
-# undef libc_hidden_def
-# define libc_hidden_def(name)
-
-# define memchr __memchr_noneon
-
-#endif
-
-#include "memchr_impl.S"
new file mode 100644
@@ -0,0 +1,35 @@
+/* Multiple versions of memchr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* For __ARM_NEON__ memchr_neon.S defines memchr directly and ifunc
+ is not used. */
+#if IS_IN (libc) && !defined (__ARM_NEON__)
+# define memchr __redirect_memchr
+# include <string.h>
+# undef memchr
+
+# include <arm-ifunc.h>
+
+# define SYMBOL_NAME memchr
+# include "ifunc-memchr.h"
+
+arm_libc_ifunc_redirected (__redirect_memchr, memchr, IFUNC_SELECTOR);
+
+arm_libc_ifunc_hidden_def (__redirect_memchr, memchr);
+#endif
deleted file mode 100644
@@ -1,219 +0,0 @@
-/* memchr implemented using NEON.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifdef MEMCHR_NEON
-
-#include <sysdep.h>
-
- .arch armv7-a
- .fpu neon
-
-
-/* Arguments */
-#define srcin r0
-#define chrin r1
-#define cntin r2
-
-/* Retval */
-#define result r0 /* Live range does not overlap with srcin */
-
-/* Working registers */
-#define src r1 /* Live range does not overlap with chrin */
-#define tmp r3
-#define synd r0 /* No overlap with srcin or result */
-#define soff r12
-
-/* Working NEON registers */
-#define vrepchr q0
-#define vdata0 q1
-#define vdata0_0 d2 /* Lower half of vdata0 */
-#define vdata0_1 d3 /* Upper half of vdata0 */
-#define vdata1 q2
-#define vdata1_0 d4 /* Lower half of vhas_chr0 */
-#define vdata1_1 d5 /* Upper half of vhas_chr0 */
-#define vrepmask q3
-#define vrepmask0 d6
-#define vrepmask1 d7
-#define vend q4
-#define vend0 d8
-#define vend1 d9
-
-/*
- * Core algorithm:
- *
- * For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per
- * byte. Each bit is set if the relevant byte matched the requested character
- * and cleared otherwise. Since the bits in the syndrome reflect exactly the
- * order in which things occur in the original string, counting trailing zeros
- * allows to identify exactly which byte has matched.
- */
-
-#ifndef NO_THUMB
- .thumb_func
-#else
- .arm
-#endif
- .p2align 4,,15
-
-ENTRY(memchr)
- /* Use a simple loop if there are less than 8 bytes to search. */
- cmp cntin, #7
- bhi .Llargestr
- and chrin, chrin, #0xff
-
-.Lsmallstr:
- subs cntin, cntin, #1
- blo .Lnotfound /* Return not found if reached end. */
- ldrb tmp, [srcin], #1
- cmp tmp, chrin
- bne .Lsmallstr /* Loop again if not found. */
- /* Otherwise fixup address and return. */
- sub result, srcin, #1
- bx lr
-
-
-.Llargestr:
- vdup.8 vrepchr, chrin /* Duplicate char across all lanes. */
- /*
- * Magic constant 0x8040201008040201 allows us to identify which lane
- * matches the requested byte.
- */
- movw tmp, #0x0201
- movt tmp, #0x0804
- lsl soff, tmp, #4
- vmov vrepmask0, tmp, soff
- vmov vrepmask1, tmp, soff
- /* Work with aligned 32-byte chunks */
- bic src, srcin, #31
- ands soff, srcin, #31
- beq .Lloopintro /* Go straight to main loop if it's aligned. */
-
- /*
- * Input string is not 32-byte aligned. We calculate the syndrome
- * value for the aligned 32 bytes block containing the first bytes
- * and mask the irrelevant part.
- */
- vld1.8 {vdata0, vdata1}, [src:256]!
- sub tmp, soff, #32
- adds cntin, cntin, tmp
- vceq.i8 vdata0, vdata0, vrepchr
- vceq.i8 vdata1, vdata1, vrepchr
- vand vdata0, vdata0, vrepmask
- vand vdata1, vdata1, vrepmask
- vpadd.i8 vdata0_0, vdata0_0, vdata0_1
- vpadd.i8 vdata1_0, vdata1_0, vdata1_1
- vpadd.i8 vdata0_0, vdata0_0, vdata1_0
- vpadd.i8 vdata0_0, vdata0_0, vdata0_0
- vmov synd, vdata0_0[0]
-
- /* Clear the soff lower bits */
- lsr synd, synd, soff
- lsl synd, synd, soff
- /* The first block can also be the last */
- bls .Lmasklast
- /* Have we found something already? */
-#ifndef NO_THUMB
- cbnz synd, .Ltail
-#else
- cmp synd, #0
- bne .Ltail
-#endif
-
-
-.Lloopintro:
- vpush {vend}
- /* 264/265 correspond to d8/d9 for q4 */
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (264, 0)
- cfi_rel_offset (265, 8)
- .p2align 3,,7
-.Lloop:
- vld1.8 {vdata0, vdata1}, [src:256]!
- subs cntin, cntin, #32
- vceq.i8 vdata0, vdata0, vrepchr
- vceq.i8 vdata1, vdata1, vrepchr
- /* If we're out of data we finish regardless of the result. */
- bls .Lend
- /* Use a fast check for the termination condition. */
- vorr vend, vdata0, vdata1
- vorr vend0, vend0, vend1
- vmov synd, tmp, vend0
- orrs synd, synd, tmp
- /* We're not out of data, loop if we haven't found the character. */
- beq .Lloop
-
-.Lend:
- vpop {vend}
- cfi_adjust_cfa_offset (-16)
- cfi_restore (264)
- cfi_restore (265)
-
- /* Termination condition found, let's calculate the syndrome value. */
- vand vdata0, vdata0, vrepmask
- vand vdata1, vdata1, vrepmask
- vpadd.i8 vdata0_0, vdata0_0, vdata0_1
- vpadd.i8 vdata1_0, vdata1_0, vdata1_1
- vpadd.i8 vdata0_0, vdata0_0, vdata1_0
- vpadd.i8 vdata0_0, vdata0_0, vdata0_0
- vmov synd, vdata0_0[0]
-#ifndef NO_THUMB
- cbz synd, .Lnotfound
- bhi .Ltail /* Uses the condition code from
- subs cntin, cntin, #32 above. */
-#else
- cmp synd, #0
- beq .Lnotfound
- cmp cntin, #0
- bhi .Ltail
-#endif
-
-
-.Lmasklast:
- /* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */
- neg cntin, cntin
- lsl synd, synd, cntin
- lsrs synd, synd, cntin
- it eq
- moveq src, #0 /* If no match, set src to 0 so the retval is 0. */
-
-
-.Ltail:
- /* Count the trailing zeros using bit reversing */
- rbit synd, synd
- /* Compensate the last post-increment */
- sub src, src, #32
- /* Count the leading zeros */
- clz synd, synd
- /* Compute the potential result and return */
- add result, src, synd
- bx lr
-
-
-.Lnotfound:
- /* Set result to NULL if not found and return */
- mov result, #0
- bx lr
-
-END(memchr)
-libc_hidden_builtin_def (memchr)
-
-#else
-
-#include "../../armv6t2/memchr.S"
-
-#endif
@@ -1,9 +1,218 @@
-#ifdef __ARM_NEON__
-/* Under __ARM_NEON__, this file defines memchr directly. */
-libc_hidden_builtin_def (memchr)
-#else
+/* memchr implemented using NEON.
+ Copyright (C) 2011-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* For __ARM_NEON__ this file defines memchr. */
+#ifndef __ARM_NEON__
# define memchr __memchr_neon
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(a)
+#endif
+
+ .arch armv7-a
+ .fpu neon
+
+
+/* Arguments */
+#define srcin r0
+#define chrin r1
+#define cntin r2
+
+/* Retval */
+#define result r0 /* Live range does not overlap with srcin */
+
+/* Working registers */
+#define src r1 /* Live range does not overlap with chrin */
+#define tmp r3
+#define synd r0 /* No overlap with srcin or result */
+#define soff r12
+
+/* Working NEON registers */
+#define vrepchr q0
+#define vdata0 q1
+#define vdata0_0 d2 /* Lower half of vdata0 */
+#define vdata0_1 d3 /* Upper half of vdata0 */
+#define vdata1 q2
+#define vdata1_0 d4 /* Lower half of vhas_chr0 */
+#define vdata1_1 d5 /* Upper half of vhas_chr0 */
+#define vrepmask q3
+#define vrepmask0 d6
+#define vrepmask1 d7
+#define vend q4
+#define vend0 d8
+#define vend1 d9
+
+/*
+ * Core algorithm:
+ *
+ * For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per
+ * byte. Each bit is set if the relevant byte matched the requested character
+ * and cleared otherwise. Since the bits in the syndrome reflect exactly the
+ * order in which things occur in the original string, counting trailing zeros
+ * allows to identify exactly which byte has matched.
+ */
+
+#ifndef NO_THUMB
+ .thumb_func
+#else
+ .arm
+#endif
+ .p2align 4,,15
+
+ENTRY(memchr)
+ /* Use a simple loop if there are less than 8 bytes to search. */
+ cmp cntin, #7
+ bhi .Llargestr
+ and chrin, chrin, #0xff
+
+.Lsmallstr:
+ subs cntin, cntin, #1
+ blo .Lnotfound /* Return not found if reached end. */
+ ldrb tmp, [srcin], #1
+ cmp tmp, chrin
+ bne .Lsmallstr /* Loop again if not found. */
+ /* Otherwise fixup address and return. */
+ sub result, srcin, #1
+ bx lr
+
+
+.Llargestr:
+ vdup.8 vrepchr, chrin /* Duplicate char across all lanes. */
+ /*
+ * Magic constant 0x8040201008040201 allows us to identify which lane
+ * matches the requested byte.
+ */
+ movw tmp, #0x0201
+ movt tmp, #0x0804
+ lsl soff, tmp, #4
+ vmov vrepmask0, tmp, soff
+ vmov vrepmask1, tmp, soff
+ /* Work with aligned 32-byte chunks */
+ bic src, srcin, #31
+ ands soff, srcin, #31
+ beq .Lloopintro /* Go straight to main loop if it's aligned. */
+
+ /*
+ * Input string is not 32-byte aligned. We calculate the syndrome
+ * value for the aligned 32 bytes block containing the first bytes
+ * and mask the irrelevant part.
+ */
+ vld1.8 {vdata0, vdata1}, [src:256]!
+ sub tmp, soff, #32
+ adds cntin, cntin, tmp
+ vceq.i8 vdata0, vdata0, vrepchr
+ vceq.i8 vdata1, vdata1, vrepchr
+ vand vdata0, vdata0, vrepmask
+ vand vdata1, vdata1, vrepmask
+ vpadd.i8 vdata0_0, vdata0_0, vdata0_1
+ vpadd.i8 vdata1_0, vdata1_0, vdata1_1
+ vpadd.i8 vdata0_0, vdata0_0, vdata1_0
+ vpadd.i8 vdata0_0, vdata0_0, vdata0_0
+ vmov synd, vdata0_0[0]
+
+ /* Clear the soff lower bits */
+ lsr synd, synd, soff
+ lsl synd, synd, soff
+ /* The first block can also be the last */
+ bls .Lmasklast
+ /* Have we found something already? */
+#ifndef NO_THUMB
+ cbnz synd, .Ltail
+#else
+ cmp synd, #0
+ bne .Ltail
#endif
-#define MEMCHR_NEON
-#include "memchr_impl.S"
+
+.Lloopintro:
+ vpush {vend}
+ /* 264/265 correspond to d8/d9 for q4 */
+ cfi_adjust_cfa_offset (16)
+ cfi_rel_offset (264, 0)
+ cfi_rel_offset (265, 8)
+ .p2align 3,,7
+.Lloop:
+ vld1.8 {vdata0, vdata1}, [src:256]!
+ subs cntin, cntin, #32
+ vceq.i8 vdata0, vdata0, vrepchr
+ vceq.i8 vdata1, vdata1, vrepchr
+ /* If we're out of data we finish regardless of the result. */
+ bls .Lend
+ /* Use a fast check for the termination condition. */
+ vorr vend, vdata0, vdata1
+ vorr vend0, vend0, vend1
+ vmov synd, tmp, vend0
+ orrs synd, synd, tmp
+ /* We're not out of data, loop if we haven't found the character. */
+ beq .Lloop
+
+.Lend:
+ vpop {vend}
+ cfi_adjust_cfa_offset (-16)
+ cfi_restore (264)
+ cfi_restore (265)
+
+ /* Termination condition found, let's calculate the syndrome value. */
+ vand vdata0, vdata0, vrepmask
+ vand vdata1, vdata1, vrepmask
+ vpadd.i8 vdata0_0, vdata0_0, vdata0_1
+ vpadd.i8 vdata1_0, vdata1_0, vdata1_1
+ vpadd.i8 vdata0_0, vdata0_0, vdata1_0
+ vpadd.i8 vdata0_0, vdata0_0, vdata0_0
+ vmov synd, vdata0_0[0]
+#ifndef NO_THUMB
+ cbz synd, .Lnotfound
+ bhi .Ltail /* Uses the condition code from
+ subs cntin, cntin, #32 above. */
+#else
+ cmp synd, #0
+ beq .Lnotfound
+ cmp cntin, #0
+ bhi .Ltail
+#endif
+
+
+.Lmasklast:
+ /* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */
+ neg cntin, cntin
+ lsl synd, synd, cntin
+ lsrs synd, synd, cntin
+ it eq
+ moveq src, #0 /* If no match, set src to 0 so the retval is 0. */
+
+
+.Ltail:
+ /* Count the trailing zeros using bit reversing */
+ rbit synd, synd
+ /* Compensate the last post-increment */
+ sub src, src, #32
+ /* Count the leading zeros */
+ clz synd, synd
+ /* Compute the potential result and return */
+ add result, src, synd
+ bx lr
+
+
+.Lnotfound:
+ /* Set result to NULL if not found and return */
+ mov result, #0
+ bx lr
+
+END(memchr)
+libc_hidden_builtin_def (memchr)
new file mode 100644
@@ -0,0 +1,5 @@
+#define memchr __memchr_noneon
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/arm/armv6t2/memchr.S>
new file mode 100644
@@ -0,0 +1 @@
+#include <sysdeps/arm/armv6t2/memchr.S>
This patch refactor ARM memchr ifunc selector to a C implementation. No functional change is expected, including ifunc resolution rules. It also reorganize the ifunc options code: 1. The memchr_impl.S is renamed to memchr_neon.S and multiple compilation options (which route to armv6t2/memchr one) is removed. The code to build if __ARM_NEON__ is defined is also simplified. 2. A memchr_noneon is added (which as build along previous ifunc resolution) and includes the armv6t2 direct. 3. Same as 2. for loader object. Alongside the aforementioned changes, it also some cleanus: - Internal memchr definition (__GI_memcpy) is now a hidden symbol. - No need to create hidden definition for the ifunc variants. Checked on armv7-linux-gnueabihf and with a build for arm-linux-gnueabi, arm-linux-gnueabihf with and without multiarch support and with both GCC 7.1 and GCC mainline. * sysdeps/arm/armv7/multiarch/Makefile [$(subdir) = string] (sysdeps_routines): Add memchr_noneon. * sysdeps/arm/armv7/multiarch/ifunc-memchr.h: New file. * sysdeps/arm/armv7/multiarch/memchr_noneon.S: Likewise. * sysdeps/arm/armv7/multiarch/rtld-memchr.S: Likewise. * sysdeps/arm/armv7/multiarch/memchr.S: Remove file. * sysdeps/arm/armv7/multiarch/memchr.c: New file. * sysdeps/arm/armv7/multiarch/memchr_impl.S: Move to ... * sysdeps/arm/armv7/multiarch/memchr_neon.S: ... here. Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> --- ChangeLog | 10 ++ sysdeps/arm/armv7/multiarch/Makefile | 3 +- sysdeps/arm/armv7/multiarch/ifunc-memchr.h | 28 ++++ sysdeps/arm/armv7/multiarch/memchr.S | 59 -------- sysdeps/arm/armv7/multiarch/memchr.c | 35 +++++ sysdeps/arm/armv7/multiarch/memchr_impl.S | 219 --------------------------- sysdeps/arm/armv7/multiarch/memchr_neon.S | 221 +++++++++++++++++++++++++++- sysdeps/arm/armv7/multiarch/memchr_noneon.S | 5 + sysdeps/arm/armv7/multiarch/rtld-memchr.S | 1 + 9 files changed, 296 insertions(+), 285 deletions(-) create mode 100644 sysdeps/arm/armv7/multiarch/ifunc-memchr.h delete mode 100644 sysdeps/arm/armv7/multiarch/memchr.S create mode 100644 sysdeps/arm/armv7/multiarch/memchr.c delete mode 100644 sysdeps/arm/armv7/multiarch/memchr_impl.S create mode 100644 sysdeps/arm/armv7/multiarch/memchr_noneon.S create mode 100644 sysdeps/arm/armv7/multiarch/rtld-memchr.S -- 2.7.4