diff mbox series

[02/25] arm: Implement memchr ifunc selection in C

Message ID 1509044813-9951-3-git-send-email-adhemerval.zanella@linaro.org
State Accepted
Commit a1a638dda91ed7739a066477908511e53840603b
Headers show
Series Refactor IFUNC selection in C | expand

Commit Message

Adhemerval Zanella Oct. 26, 2017, 7:06 p.m. UTC
This patch refactor ARM memchr ifunc selector to a C implementation.
No functional change is expected, including ifunc resolution rules.

It also reorganize the ifunc options code:

  1. The memchr_impl.S is renamed to memchr_neon.S and multiple
     compilation options (which route to armv6t2/memchr one) is
     removed.  The code to build if __ARM_NEON__ is defined is
     also simplified.

  2. A memchr_noneon is added (which as build along previous ifunc
     resolution) and includes the armv6t2 direct.

  3. Same as 2. for loader object.

Alongside the aforementioned changes, it also some cleanus:

  - Internal memchr definition (__GI_memcpy) is now a hidden
    symbol.
  - No need to create hidden definition for the ifunc variants.

Checked on armv7-linux-gnueabihf and with a build for arm-linux-gnueabi,
arm-linux-gnueabihf with and without multiarch support and with both
GCC 7.1 and GCC mainline.

	* sysdeps/arm/armv7/multiarch/Makefile [$(subdir) = string]
	(sysdeps_routines): Add memchr_noneon.
	* sysdeps/arm/armv7/multiarch/ifunc-memchr.h: New file.
	* sysdeps/arm/armv7/multiarch/memchr_noneon.S: Likewise.
	* sysdeps/arm/armv7/multiarch/rtld-memchr.S: Likewise.
	* sysdeps/arm/armv7/multiarch/memchr.S: Remove file.
	* sysdeps/arm/armv7/multiarch/memchr.c: New file.
	* sysdeps/arm/armv7/multiarch/memchr_impl.S: Move to ...
	* sysdeps/arm/armv7/multiarch/memchr_neon.S: ... here.

Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>

---
 ChangeLog                                   |  10 ++
 sysdeps/arm/armv7/multiarch/Makefile        |   3 +-
 sysdeps/arm/armv7/multiarch/ifunc-memchr.h  |  28 ++++
 sysdeps/arm/armv7/multiarch/memchr.S        |  59 --------
 sysdeps/arm/armv7/multiarch/memchr.c        |  35 +++++
 sysdeps/arm/armv7/multiarch/memchr_impl.S   | 219 ---------------------------
 sysdeps/arm/armv7/multiarch/memchr_neon.S   | 221 +++++++++++++++++++++++++++-
 sysdeps/arm/armv7/multiarch/memchr_noneon.S |   5 +
 sysdeps/arm/armv7/multiarch/rtld-memchr.S   |   1 +
 9 files changed, 296 insertions(+), 285 deletions(-)
 create mode 100644 sysdeps/arm/armv7/multiarch/ifunc-memchr.h
 delete mode 100644 sysdeps/arm/armv7/multiarch/memchr.S
 create mode 100644 sysdeps/arm/armv7/multiarch/memchr.c
 delete mode 100644 sysdeps/arm/armv7/multiarch/memchr_impl.S
 create mode 100644 sysdeps/arm/armv7/multiarch/memchr_noneon.S
 create mode 100644 sysdeps/arm/armv7/multiarch/rtld-memchr.S

-- 
2.7.4

Comments

Joseph Myers Oct. 31, 2017, 6:18 p.m. UTC | #1
On Thu, 26 Oct 2017, Adhemerval Zanella wrote:

> This patch refactor ARM memchr ifunc selector to a C implementation.

> No functional change is expected, including ifunc resolution rules.

> 

> It also reorganize the ifunc options code:

> 

>   1. The memchr_impl.S is renamed to memchr_neon.S and multiple

>      compilation options (which route to armv6t2/memchr one) is

>      removed.  The code to build if __ARM_NEON__ is defined is

>      also simplified.

> 

>   2. A memchr_noneon is added (which as build along previous ifunc

>      resolution) and includes the armv6t2 direct.

> 

>   3. Same as 2. for loader object.

> 

> Alongside the aforementioned changes, it also some cleanus:

> 

>   - Internal memchr definition (__GI_memcpy) is now a hidden

>     symbol.

>   - No need to create hidden definition for the ifunc variants.

> 

> Checked on armv7-linux-gnueabihf and with a build for arm-linux-gnueabi,

> arm-linux-gnueabihf with and without multiarch support and with both

> GCC 7.1 and GCC mainline.


OK.

-- 
Joseph S. Myers
joseph@codesourcery.com
diff mbox series

Patch

diff --git a/sysdeps/arm/armv7/multiarch/Makefile b/sysdeps/arm/armv7/multiarch/Makefile
index 1e62ef9..6e5851f 100644
--- a/sysdeps/arm/armv7/multiarch/Makefile
+++ b/sysdeps/arm/armv7/multiarch/Makefile
@@ -1,3 +1,4 @@ 
 ifeq ($(subdir),string)
-sysdep_routines += memcpy_neon memcpy_vfp memchr_neon memcpy_arm
+sysdep_routines += memcpy_neon memcpy_vfp memchr_neon memcpy_arm \
+		   memchr_noneon
 endif
diff --git a/sysdeps/arm/armv7/multiarch/ifunc-memchr.h b/sysdeps/arm/armv7/multiarch/ifunc-memchr.h
new file mode 100644
index 0000000..42f89fa
--- /dev/null
+++ b/sysdeps/arm/armv7/multiarch/ifunc-memchr.h
@@ -0,0 +1,28 @@ 
+/* Common definition for memchr resolver.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+__typeof (REDIRECT_NAME) OPTIMIZE (neon) attribute_hidden;
+__typeof (REDIRECT_NAME) OPTIMIZE (noneon) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (int hwcap)
+{
+  if (hwcap & HWCAP_ARM_NEON)
+    return OPTIMIZE (neon);
+  return OPTIMIZE (noneon);
+}
diff --git a/sysdeps/arm/armv7/multiarch/memchr.S b/sysdeps/arm/armv7/multiarch/memchr.S
deleted file mode 100644
index 8e8097a..0000000
--- a/sysdeps/arm/armv7/multiarch/memchr.S
+++ /dev/null
@@ -1,59 +0,0 @@ 
-/* Multiple versions of memchr
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2013-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <rtld-global-offsets.h>
-
-#if IS_IN (libc)
-/* Under __ARM_NEON__, memchr_neon.S defines the name memchr.  */
-# ifndef __ARM_NEON__
-	.text
-	.arm
-ENTRY(memchr)
-	.type	memchr, %gnu_indirect_function
-	ldr	r1, .Lmemchr_noneon
-	tst	r0, #HWCAP_ARM_NEON
-	ldrne	r1, .Lmemchr_neon
-1:
-	add	r0, r1, pc
-	DO_RET(lr)
-
-.Lmemchr_noneon:
-	.long	C_SYMBOL_NAME(__memchr_noneon) - 1b - 8
-.Lmemchr_neon:
-	.long	C_SYMBOL_NAME(__memchr_neon) - 1b - 8
-
-END(memchr)
-
-libc_hidden_builtin_def (memchr)
-# endif  /* Not __ARM_NEON__.  */
-libc_hidden_def (__memchr_noneon)
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name)
-# undef weak_alias
-# define weak_alias(x, y)
-# undef libc_hidden_def
-# define libc_hidden_def(name)
-
-# define memchr __memchr_noneon
-
-#endif
-
-#include "memchr_impl.S"
diff --git a/sysdeps/arm/armv7/multiarch/memchr.c b/sysdeps/arm/armv7/multiarch/memchr.c
new file mode 100644
index 0000000..906bcd5
--- /dev/null
+++ b/sysdeps/arm/armv7/multiarch/memchr.c
@@ -0,0 +1,35 @@ 
+/* Multiple versions of memchr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* For __ARM_NEON__ memchr_neon.S defines memchr directly and ifunc
+   is not used.  */
+#if IS_IN (libc) && !defined (__ARM_NEON__)
+# define memchr __redirect_memchr
+# include <string.h>
+# undef memchr
+
+# include <arm-ifunc.h>
+
+# define SYMBOL_NAME memchr
+# include "ifunc-memchr.h"
+
+arm_libc_ifunc_redirected (__redirect_memchr, memchr, IFUNC_SELECTOR);
+
+arm_libc_ifunc_hidden_def (__redirect_memchr, memchr);
+#endif
diff --git a/sysdeps/arm/armv7/multiarch/memchr_impl.S b/sysdeps/arm/armv7/multiarch/memchr_impl.S
deleted file mode 100644
index e8cbb97..0000000
--- a/sysdeps/arm/armv7/multiarch/memchr_impl.S
+++ /dev/null
@@ -1,219 +0,0 @@ 
-/* memchr implemented using NEON.
-   Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifdef MEMCHR_NEON
-
-#include <sysdep.h>
-
-	.arch	armv7-a
-	.fpu	neon
-
-
-/* Arguments */
-#define srcin		r0
-#define chrin		r1
-#define cntin		r2
-
-/* Retval */
-#define result		r0	/* Live range does not overlap with srcin */
-
-/* Working registers */
-#define src		r1	/* Live range does not overlap with chrin */
-#define tmp		r3
-#define synd		r0	/* No overlap with srcin or result */
-#define soff		r12
-
-/* Working NEON registers */
-#define vrepchr		q0
-#define vdata0		q1
-#define vdata0_0	d2	/* Lower half of vdata0 */
-#define vdata0_1	d3	/* Upper half of vdata0 */
-#define vdata1		q2
-#define vdata1_0	d4	/* Lower half of vhas_chr0 */
-#define vdata1_1	d5	/* Upper half of vhas_chr0 */
-#define vrepmask	q3
-#define vrepmask0	d6
-#define vrepmask1	d7
-#define vend		q4
-#define vend0		d8
-#define vend1		d9
-
-/*
- * Core algorithm:
- *
- * For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per
- * byte. Each bit is set if the relevant byte matched the requested character
- * and cleared otherwise. Since the bits in the syndrome reflect exactly the
- * order in which things occur in the original string, counting trailing zeros
- * allows to identify exactly which byte has matched.
- */
-
-#ifndef NO_THUMB
-	.thumb_func
-#else
-	.arm
-#endif
-	.p2align 4,,15
-
-ENTRY(memchr)
-	/* Use a simple loop if there are less than 8 bytes to search.  */
-	cmp	cntin, #7
-	bhi	.Llargestr
-	and	chrin, chrin, #0xff
-
-.Lsmallstr:
-	subs	cntin, cntin, #1
-	blo	.Lnotfound	/* Return not found if reached end.  */
-	ldrb	tmp, [srcin], #1
-	cmp	tmp, chrin
-	bne	.Lsmallstr	/* Loop again if not found.  */
-	/* Otherwise fixup address and return.  */
-	sub	result, srcin, #1
-	bx	lr
-
-
-.Llargestr:
-	vdup.8	vrepchr, chrin	/* Duplicate char across all lanes. */
-	/*
-	 * Magic constant 0x8040201008040201 allows us to identify which lane
-	 * matches the requested byte.
-	 */
-	movw	tmp, #0x0201
-	movt	tmp, #0x0804
-	lsl	soff, tmp, #4
-	vmov	vrepmask0, tmp, soff
-	vmov	vrepmask1, tmp, soff
-	/* Work with aligned 32-byte chunks */
-	bic	src, srcin, #31
-	ands	soff, srcin, #31
-	beq	.Lloopintro	/* Go straight to main loop if it's aligned. */
-
-	/*
-	 * Input string is not 32-byte aligned. We calculate the syndrome
-	 * value for the aligned 32 bytes block containing the first bytes
-	 * and mask the irrelevant part.
-	 */
-	vld1.8		{vdata0, vdata1}, [src:256]!
-	sub		tmp, soff, #32
-	adds		cntin, cntin, tmp
-	vceq.i8		vdata0, vdata0, vrepchr
-	vceq.i8		vdata1, vdata1, vrepchr
-	vand		vdata0, vdata0, vrepmask
-	vand		vdata1, vdata1, vrepmask
-	vpadd.i8	vdata0_0, vdata0_0, vdata0_1
-	vpadd.i8	vdata1_0, vdata1_0, vdata1_1
-	vpadd.i8	vdata0_0, vdata0_0, vdata1_0
-	vpadd.i8	vdata0_0, vdata0_0, vdata0_0
-	vmov		synd, vdata0_0[0]
-
-	/* Clear the soff lower bits */
-	lsr		synd, synd, soff
-	lsl		synd, synd, soff
-	/* The first block can also be the last */
-	bls		.Lmasklast
-	/* Have we found something already? */
-#ifndef NO_THUMB
-	cbnz		synd, .Ltail
-#else
-	cmp		synd, #0
-	bne		.Ltail
-#endif
-
-
-.Lloopintro:
-	vpush	{vend}
-	/* 264/265 correspond to d8/d9 for q4 */
-	cfi_adjust_cfa_offset (16)
-	cfi_rel_offset (264, 0)
-	cfi_rel_offset (265, 8)
-	.p2align 3,,7
-.Lloop:
-	vld1.8		{vdata0, vdata1}, [src:256]!
-	subs		cntin, cntin, #32
-	vceq.i8		vdata0, vdata0, vrepchr
-	vceq.i8		vdata1, vdata1, vrepchr
-	/* If we're out of data we finish regardless of the result. */
-	bls		.Lend
-	/* Use a fast check for the termination condition. */
-	vorr		vend, vdata0, vdata1
-	vorr		vend0, vend0, vend1
-	vmov		synd, tmp, vend0
-	orrs		synd, synd, tmp
-	/* We're not out of data, loop if we haven't found the character. */
-	beq		.Lloop
-
-.Lend:
-	vpop		{vend}
-	cfi_adjust_cfa_offset (-16)
-	cfi_restore (264)
-	cfi_restore (265)
-
-	/* Termination condition found, let's calculate the syndrome value. */
-	vand		vdata0, vdata0, vrepmask
-	vand		vdata1, vdata1, vrepmask
-	vpadd.i8	vdata0_0, vdata0_0, vdata0_1
-	vpadd.i8	vdata1_0, vdata1_0, vdata1_1
-	vpadd.i8	vdata0_0, vdata0_0, vdata1_0
-	vpadd.i8	vdata0_0, vdata0_0, vdata0_0
-	vmov		synd, vdata0_0[0]
-#ifndef NO_THUMB
-	cbz		synd, .Lnotfound
-	bhi		.Ltail	/* Uses the condition code from
-				   subs cntin, cntin, #32 above.  */
-#else
-	cmp		synd, #0
-	beq		.Lnotfound
-	cmp		cntin, #0
-	bhi		.Ltail
-#endif
-
-
-.Lmasklast:
-	/* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */
-	neg	cntin, cntin
-	lsl	synd, synd, cntin
-	lsrs	synd, synd, cntin
-	it	eq
-	moveq	src, #0	/* If no match, set src to 0 so the retval is 0. */
-
-
-.Ltail:
-	/* Count the trailing zeros using bit reversing */
-	rbit	synd, synd
-	/* Compensate the last post-increment */
-	sub	src, src, #32
-	/* Count the leading zeros */
-	clz	synd, synd
-	/* Compute the potential result and return */
-	add	result, src, synd
-	bx	lr
-
-
-.Lnotfound:
-	/* Set result to NULL if not found and return */
-	mov	result, #0
-	bx	lr
-
-END(memchr)
-libc_hidden_builtin_def (memchr)
-
-#else
-
-#include "../../armv6t2/memchr.S"
-
-#endif
diff --git a/sysdeps/arm/armv7/multiarch/memchr_neon.S b/sysdeps/arm/armv7/multiarch/memchr_neon.S
index ee21818..a400033 100644
--- a/sysdeps/arm/armv7/multiarch/memchr_neon.S
+++ b/sysdeps/arm/armv7/multiarch/memchr_neon.S
@@ -1,9 +1,218 @@ 
-#ifdef __ARM_NEON__
-/* Under __ARM_NEON__, this file defines memchr directly.  */
-libc_hidden_builtin_def (memchr)
-#else
+/* memchr implemented using NEON.
+   Copyright (C) 2011-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* For __ARM_NEON__ this file defines memchr.  */
+#ifndef __ARM_NEON__
 # define memchr __memchr_neon
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(a)
+#endif
+
+	.arch	armv7-a
+	.fpu	neon
+
+
+/* Arguments */
+#define srcin		r0
+#define chrin		r1
+#define cntin		r2
+
+/* Retval */
+#define result		r0	/* Live range does not overlap with srcin */
+
+/* Working registers */
+#define src		r1	/* Live range does not overlap with chrin */
+#define tmp		r3
+#define synd		r0	/* No overlap with srcin or result */
+#define soff		r12
+
+/* Working NEON registers */
+#define vrepchr		q0
+#define vdata0		q1
+#define vdata0_0	d2	/* Lower half of vdata0 */
+#define vdata0_1	d3	/* Upper half of vdata0 */
+#define vdata1		q2
+#define vdata1_0	d4	/* Lower half of vhas_chr0 */
+#define vdata1_1	d5	/* Upper half of vhas_chr0 */
+#define vrepmask	q3
+#define vrepmask0	d6
+#define vrepmask1	d7
+#define vend		q4
+#define vend0		d8
+#define vend1		d9
+
+/*
+ * Core algorithm:
+ *
+ * For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per
+ * byte. Each bit is set if the relevant byte matched the requested character
+ * and cleared otherwise. Since the bits in the syndrome reflect exactly the
+ * order in which things occur in the original string, counting trailing zeros
+ * allows to identify exactly which byte has matched.
+ */
+
+#ifndef NO_THUMB
+	.thumb_func
+#else
+	.arm
+#endif
+	.p2align 4,,15
+
+ENTRY(memchr)
+	/* Use a simple loop if there are less than 8 bytes to search.  */
+	cmp	cntin, #7
+	bhi	.Llargestr
+	and	chrin, chrin, #0xff
+
+.Lsmallstr:
+	subs	cntin, cntin, #1
+	blo	.Lnotfound	/* Return not found if reached end.  */
+	ldrb	tmp, [srcin], #1
+	cmp	tmp, chrin
+	bne	.Lsmallstr	/* Loop again if not found.  */
+	/* Otherwise fixup address and return.  */
+	sub	result, srcin, #1
+	bx	lr
+
+
+.Llargestr:
+	vdup.8	vrepchr, chrin	/* Duplicate char across all lanes. */
+	/*
+	 * Magic constant 0x8040201008040201 allows us to identify which lane
+	 * matches the requested byte.
+	 */
+	movw	tmp, #0x0201
+	movt	tmp, #0x0804
+	lsl	soff, tmp, #4
+	vmov	vrepmask0, tmp, soff
+	vmov	vrepmask1, tmp, soff
+	/* Work with aligned 32-byte chunks */
+	bic	src, srcin, #31
+	ands	soff, srcin, #31
+	beq	.Lloopintro	/* Go straight to main loop if it's aligned. */
+
+	/*
+	 * Input string is not 32-byte aligned. We calculate the syndrome
+	 * value for the aligned 32 bytes block containing the first bytes
+	 * and mask the irrelevant part.
+	 */
+	vld1.8		{vdata0, vdata1}, [src:256]!
+	sub		tmp, soff, #32
+	adds		cntin, cntin, tmp
+	vceq.i8		vdata0, vdata0, vrepchr
+	vceq.i8		vdata1, vdata1, vrepchr
+	vand		vdata0, vdata0, vrepmask
+	vand		vdata1, vdata1, vrepmask
+	vpadd.i8	vdata0_0, vdata0_0, vdata0_1
+	vpadd.i8	vdata1_0, vdata1_0, vdata1_1
+	vpadd.i8	vdata0_0, vdata0_0, vdata1_0
+	vpadd.i8	vdata0_0, vdata0_0, vdata0_0
+	vmov		synd, vdata0_0[0]
+
+	/* Clear the soff lower bits */
+	lsr		synd, synd, soff
+	lsl		synd, synd, soff
+	/* The first block can also be the last */
+	bls		.Lmasklast
+	/* Have we found something already? */
+#ifndef NO_THUMB
+	cbnz		synd, .Ltail
+#else
+	cmp		synd, #0
+	bne		.Ltail
 #endif
 
-#define MEMCHR_NEON
-#include "memchr_impl.S"
+
+.Lloopintro:
+	vpush	{vend}
+	/* 264/265 correspond to d8/d9 for q4 */
+	cfi_adjust_cfa_offset (16)
+	cfi_rel_offset (264, 0)
+	cfi_rel_offset (265, 8)
+	.p2align 3,,7
+.Lloop:
+	vld1.8		{vdata0, vdata1}, [src:256]!
+	subs		cntin, cntin, #32
+	vceq.i8		vdata0, vdata0, vrepchr
+	vceq.i8		vdata1, vdata1, vrepchr
+	/* If we're out of data we finish regardless of the result. */
+	bls		.Lend
+	/* Use a fast check for the termination condition. */
+	vorr		vend, vdata0, vdata1
+	vorr		vend0, vend0, vend1
+	vmov		synd, tmp, vend0
+	orrs		synd, synd, tmp
+	/* We're not out of data, loop if we haven't found the character. */
+	beq		.Lloop
+
+.Lend:
+	vpop		{vend}
+	cfi_adjust_cfa_offset (-16)
+	cfi_restore (264)
+	cfi_restore (265)
+
+	/* Termination condition found, let's calculate the syndrome value. */
+	vand		vdata0, vdata0, vrepmask
+	vand		vdata1, vdata1, vrepmask
+	vpadd.i8	vdata0_0, vdata0_0, vdata0_1
+	vpadd.i8	vdata1_0, vdata1_0, vdata1_1
+	vpadd.i8	vdata0_0, vdata0_0, vdata1_0
+	vpadd.i8	vdata0_0, vdata0_0, vdata0_0
+	vmov		synd, vdata0_0[0]
+#ifndef NO_THUMB
+	cbz		synd, .Lnotfound
+	bhi		.Ltail	/* Uses the condition code from
+				   subs cntin, cntin, #32 above.  */
+#else
+	cmp		synd, #0
+	beq		.Lnotfound
+	cmp		cntin, #0
+	bhi		.Ltail
+#endif
+
+
+.Lmasklast:
+	/* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */
+	neg	cntin, cntin
+	lsl	synd, synd, cntin
+	lsrs	synd, synd, cntin
+	it	eq
+	moveq	src, #0	/* If no match, set src to 0 so the retval is 0. */
+
+
+.Ltail:
+	/* Count the trailing zeros using bit reversing */
+	rbit	synd, synd
+	/* Compensate the last post-increment */
+	sub	src, src, #32
+	/* Count the leading zeros */
+	clz	synd, synd
+	/* Compute the potential result and return */
+	add	result, src, synd
+	bx	lr
+
+
+.Lnotfound:
+	/* Set result to NULL if not found and return */
+	mov	result, #0
+	bx	lr
+
+END(memchr)
+libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/arm/armv7/multiarch/memchr_noneon.S b/sysdeps/arm/armv7/multiarch/memchr_noneon.S
new file mode 100644
index 0000000..b1fb540
--- /dev/null
+++ b/sysdeps/arm/armv7/multiarch/memchr_noneon.S
@@ -0,0 +1,5 @@ 
+#define memchr __memchr_noneon
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/arm/armv6t2/memchr.S>
diff --git a/sysdeps/arm/armv7/multiarch/rtld-memchr.S b/sysdeps/arm/armv7/multiarch/rtld-memchr.S
new file mode 100644
index 0000000..ae8e5f0
--- /dev/null
+++ b/sysdeps/arm/armv7/multiarch/rtld-memchr.S
@@ -0,0 +1 @@ 
+#include <sysdeps/arm/armv6t2/memchr.S>