From patchwork Mon Dec 19 17:21:23 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Dr. David Alan Gilbert" X-Patchwork-Id: 5884 Return-Path: X-Original-To: patchwork@peony.canonical.com Delivered-To: patchwork@peony.canonical.com Received: from fiordland.canonical.com (fiordland.canonical.com [91.189.94.145]) by peony.canonical.com (Postfix) with ESMTP id 0D2FA23E21 for ; Mon, 19 Dec 2011 17:21:34 +0000 (UTC) Received: from mail-ey0-f180.google.com (mail-ey0-f180.google.com [209.85.215.180]) by fiordland.canonical.com (Postfix) with ESMTP id EB75CA18089 for ; Mon, 19 Dec 2011 17:21:33 +0000 (UTC) Received: by eaac11 with SMTP id c11so2209918eaa.11 for ; Mon, 19 Dec 2011 09:21:33 -0800 (PST) Received: by 10.204.133.207 with SMTP id g15mr5370910bkt.17.1324315293495; Mon, 19 Dec 2011 09:21:33 -0800 (PST) X-Forwarded-To: linaro-patchwork@canonical.com X-Forwarded-For: patch@linaro.org linaro-patchwork@canonical.com Delivered-To: patches@linaro.org Received: by 10.205.82.144 with SMTP id ac16cs12156bkc; Mon, 19 Dec 2011 09:21:32 -0800 (PST) Received: by 10.213.28.69 with SMTP id l5mr4916698ebc.110.1324315291447; Mon, 19 Dec 2011 09:21:31 -0800 (PST) Received: from mail-ee0-f50.google.com (mail-ee0-f50.google.com [74.125.83.50]) by mx.google.com with ESMTPS id 73si5963029eev.163.2011.12.19.09.21.31 (version=TLSv1/SSLv3 cipher=OTHER); Mon, 19 Dec 2011 09:21:31 -0800 (PST) Received-SPF: neutral (google.com: 74.125.83.50 is neither permitted nor denied by best guess record for domain of david.gilbert@linaro.org) client-ip=74.125.83.50; Authentication-Results: mx.google.com; spf=neutral (google.com: 74.125.83.50 is neither permitted nor denied by best guess record for domain of david.gilbert@linaro.org) smtp.mail=david.gilbert@linaro.org Received: by eeke53 with SMTP id e53so5725309eek.37 for ; Mon, 19 Dec 2011 09:21:31 -0800 (PST) Received: by 10.14.127.68 with SMTP id c44mr4693012eei.47.1324315291105; Mon, 19 Dec 2011 09:21:31 -0800 (PST) Received: from davesworkthinkpad (gbibp9ph1--blueice3n2.emea.ibm.com. [195.212.29.84]) by mx.google.com with ESMTPS id x12sm46896510eef.9.2011.12.19.09.21.29 (version=TLSv1/SSLv3 cipher=OTHER); Mon, 19 Dec 2011 09:21:30 -0800 (PST) Date: Mon, 19 Dec 2011 17:21:23 +0000 From: "Dr. David Alan Gilbert" To: libc-ports@sourceware.org Cc: joseph@codesourcery.com, patches@linaro.org Subject: [ARM] Optimised strchr and strlen Message-ID: <20111219172122.GA10120@davesworkthinkpad> MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.20 (2009-06-14) This is a strchr and strlen optimised for ARM v6t2 or v7. It's against svn rev r15869 with my previous memchr patch. Tested both little & big endian. (I've checked it still applies on svn trunk, but not done a retest on that; nothing seems to have changed around there). Dave 2012-12-19 Dr. David Alan Gilbert * sysdeps/arm/eabi/armv6t2/strchr.S: New file * sysdeps/arm/eabi/armv6t2/strlen.S: New file diff -urN ports/sysdeps/arm/eabi/armv6t2/strchr.S src/ports/sysdeps/arm/eabi/armv6t2/strchr.S --- ports/sysdeps/arm/eabi/armv6t2/strchr.S 1970-01-01 01:00:00.000000000 +0100 +++ ports/sysdeps/arm/eabi/armv6t2/strchr.S 2011-12-16 13:43:56.704694919 +0000 @@ -0,0 +1,71 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Code contributed by Dave Gilbert + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + +@ A very simple strchr routine, from benchmarks on A9 it's a bit faster than +@ the current version in eglibc. +@ While I have a version that does 8 bytes/loop and is a lot faster on long +@ strings, it is slower on short strings, and short strings seem more common +@ in strchr usage. +@ Note: The use of cbz/cbnz means it's Thumb only + +@ 2011-02-07 david.gilbert@linaro.org +@ Extracted from local git a5b438d861 +@ 2011-12-16 david.gilbert@linaro.org +@ Copy from Cortex strings rev 65 and change license + + .syntax unified + + .text + .thumb + +@ --------------------------------------------------------------------------- + + .thumb_func + .global strchr + .type strchr,%function +ENTRY(strchr) + @ r0 = start of string + @ r1 = character to match + @ returns NULL for no match, or a pointer to the match + and r1,r1, #255 + +1: + ldrb r2,[r0],#1 + cmp r2,r1 + cbz r2,10f + bne 1b + + @ We're here if it matched +5: + subs r0,r0,#1 + DO_RET(lr) + +10: + @ We're here if we ran off the end + cmp r1, #0 @ Corner case - you can search for the nil and get a pointer to it + beq 5b @ messy, if common we should branch at the start to a special loop + mov r0,#0 + DO_RET(lr) + +END(strchr) + +weak_alias (strchr, index) +libc_hidden_builtin_def(strchr) diff -urN ports/sysdeps/arm/eabi/armv6t2/strlen.S src/ports/sysdeps/arm/eabi/armv6t2/strlen.S --- ports/sysdeps/arm/eabi/armv6t2/strlen.S 1970-01-01 01:00:00.000000000 +0100 +++ ports/sysdeps/arm/eabi/armv6t2/strlen.S 2011-12-16 13:43:01.991130183 +0000 @@ -0,0 +1,118 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Code contributed by Dave Gilbert + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + +@ This strlen routine is optimised on a Cortex-A9 and should work on +@ all ARMv7 processors. This routine is reasonably fast for short +@ strings, but is probably slower than a simple implementation if all +@ your strings are very short +@ Note: The use of cbz/cbnz means it's Thumb only + +@ 2011-02-08 david.gilbert@linaro.org +@ Extracted from local git 6848613a +@ 2011-12-16 david.gilbert@linaro.org +@ Copy from Cortex strings rev 65 and change license +@ Add cfi magic, switch to ldrd + + +@ this lets us check a flag in a 00/ff byte easily in either endianness +#ifdef __ARMEB__ +#define CHARTSTMASK(c) 1<<(31-(c*8)) +#else +#define CHARTSTMASK(c) 1<<(c*8) +#endif + +@----------------------------------------------------------------------------- + .syntax unified + + .text + .thumb + + .thumb_func + .global strlen + .type strlen,%function +ENTRY(strlen) + @ r0 = string + @ returns count of bytes in string not including terminator + mov r1, r0 + push { r4,r6 } + cfi_adjust_cfa_offset (8) + cfi_rel_offset (r4, 0) + cfi_rel_offset (r6, 4) + + cfi_remember_state + + mvns r6, #0 @ all F + movs r4, #0 + tst r0, #7 + beq 2f + +1: + ldrb r2, [r1], #1 + tst r1, #7 @ Hit alignment yet? + cbz r2, 10f @ Exit if we found the 0 + bne 1b + + @ So we're now aligned +2: + ldrd r2,r3,[r1],#8 + uadd8 r2, r2, r6 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 + sel r2, r4, r6 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION + uadd8 r3, r3, r6 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 + sel r3, r2, r6 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION + cmp r3, #0 + beq 2b + +strlenendtmp: + @ One (or more) of the bytes we loaded was 0 - but which one? + @ r2 has the mask corresponding to the first loaded word + @ r3 has a combined mask of the two words - but if r2 was all-non 0 + @ then it's just the 2nd words + cmp r2, #0 + itte eq + moveq r2, r3 @ the end is in the 2nd word + subeq r1,r1,#3 + subne r1,r1,#7 + + @ r1 currently points to the 2nd byte of the word containing the 0 + tst r2, # CHARTSTMASK(0) @ 1st character + bne 10f + adds r1,r1,#1 + tst r2, # CHARTSTMASK(1) @ 2nd character + ittt eq + addeq r1,r1,#1 + tsteq r2, # (3<<15) @ 2nd & 3rd character + @ If not the 3rd must be the last one + addeq r1,r1,#1 + +10: + @ r0 is still at the beginning, r1 is pointing 1 byte after terminator + sub r0, r1, r0 + subs r0, r0, #1 + pop { r4, r6 } + + cfi_adjust_cfa_offset (-8) + cfi_restore (r4) + cfi_restore (r6) + + DO_RET(lr) + +END(strlen) +libc_hidden_builtin_def (strlen)