Message ID | 20230914183704.1386534-2-adhemerval.zanella@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | Use introsort for qsort | expand |
On Thu, Sep 14, 2023 at 1:37 PM Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: > > The prototype is: > > void __memswap (void *restrict p1, void *restrict p2, size_t n) > > The function swaps the content of two memory blocks P1 and P2 of > len N. Memory overlap is NOT handled. > > It will be used on qsort optimization. > > Checked on x86_64-linux-gnu and aarch64-linux-gnu. > --- > include/string.h | 3 + > string/Makefile | 13 +++ > string/memswap.c | 41 +++++++++ > string/test-memswap.c | 191 ++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 248 insertions(+) > create mode 100644 string/memswap.c > create mode 100644 string/test-memswap.c > > diff --git a/include/string.h b/include/string.h > index 86d1fa4abe..f31a7f6006 100644 > --- a/include/string.h > +++ b/include/string.h > @@ -45,6 +45,9 @@ extern void *__memrchr (const void *__s, int __c, size_t __n) > extern void *__memchr (const void *__s, int __c, size_t __n) > __attribute_pure__; > > +extern void __memswap (void *restrict __p1, void *restrict __p2, size_t __n) > + attribute_hidden; > + > extern void __bzero (void *__s, size_t __n) __THROW __nonnull ((1)); > > extern int __ffs (int __i) __attribute__ ((const)); > diff --git a/string/Makefile b/string/Makefile > index 8cdfd5b000..3d0a3d5682 100644 > --- a/string/Makefile > +++ b/string/Makefile > @@ -69,6 +69,7 @@ routines := \ > mempcpy \ > memrchr \ > memset \ > + memswap \ > rawmemchr \ > sigabbrev_np \ > sigdescr_np \ > @@ -209,6 +210,18 @@ tests := \ > tst-xbzero-opt \ > # tests > > +tests-static-internal := \ > + test-memswap \ > +# tests-static-internal > + > +tests-internal := \ > + $(tests-static-internal) \ > + # tests-internal > + > +tests-static := \ > + $(tests-static-internal) \ > + # tests-static > + > # Both tests require the .mo translation files generated by msgfmt. > tests-translation := \ > tst-strerror \ > diff --git a/string/memswap.c b/string/memswap.c > new file mode 100644 > index 0000000000..37640c47ad > --- /dev/null > +++ b/string/memswap.c > @@ -0,0 +1,41 @@ > +/* Swap the content of two memory blocks, overlap is NOT handled. > + Copyright (C) 2023 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <string.h> > + > +void > +__memswap (void *restrict p1, void *restrict p2, size_t n) > +{ > + /* Use multiple small memcpys with constant size to enable inlining on most > + targets. */ > + enum { SWAP_GENERIC_SIZE = 32 }; > + unsigned char tmp[SWAP_GENERIC_SIZE]; > + while (n > SWAP_GENERIC_SIZE) > + { > + memcpy (tmp, p1, SWAP_GENERIC_SIZE); > + p1 = __mempcpy (p1, p2, SWAP_GENERIC_SIZE); > + p2 = __mempcpy (p2, tmp, SWAP_GENERIC_SIZE); > + n -= SWAP_GENERIC_SIZE; > + } > + while (n > 0) > + { > + unsigned char t = ((unsigned char *)p1)[--n]; > + ((unsigned char *)p1)[n] = ((unsigned char *)p2)[n]; > + ((unsigned char *)p2)[n] = t; > + } > +} Don't think it's a huge deal either way, but would have implemented this as a static function in a header file like: sysdeps/generic/memswap.h so it can be inlined. If arch maintainers needs to add ifunc/etc they can always create symbols and just call them from the arch header version. > diff --git a/string/test-memswap.c b/string/test-memswap.c > new file mode 100644 > index 0000000000..7696c43711 > --- /dev/null > +++ b/string/test-memswap.c > @@ -0,0 +1,191 @@ > +/* Test and measure memcpy functions. > + Copyright (C) 2023 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <string.h> > +#include <support/check.h> > + > +#define TEST_MAIN > +#define BUF1PAGES 3 > +#include "test-string.h" > + > +static unsigned char *ref1; > +static unsigned char *ref2; > + > +static void > +do_one_test (unsigned char *p1, unsigned char *ref1, unsigned char *p2, > + unsigned char *ref2, size_t len) > +{ > + __memswap (p1, p2, len); > + > + TEST_COMPARE_BLOB (p1, len, ref2, len); > + TEST_COMPARE_BLOB (p2, len, ref1, len); > +} > + > +static inline void > +do_test (size_t align1, size_t align2, size_t len) > +{ > + align1 &= page_size; > + if (align1 + len >= page_size) > + return; > + > + align2 &= page_size; > + if (align2 + len >= page_size) > + return; > + > + unsigned char *p1 = buf1 + align1; > + unsigned char *p2 = buf2 + align2; > + for (size_t repeats = 0; repeats < 2; ++repeats) > + { > + size_t i, j; > + for (i = 0, j = 1; i < len; i++, j += 23) > + { > + ref1[i] = p1[i] = j; > + ref2[i] = p2[i] = UCHAR_MAX - j; > + } > + > + do_one_test (p1, ref1, p2, ref2, len); > + } > +} > + > +static void > +do_random_tests (void) > +{ > + for (size_t n = 0; n < ITERATIONS; n++) > + { > + size_t len, size, size1, size2, align1, align2; > + > + if (n == 0) > + { > + len = getpagesize (); > + size = len + 512; > + size1 = size; > + size2 = size; > + align1 = 512; > + align2 = 512; > + } > + else > + { > + if ((random () & 255) == 0) > + size = 65536; > + else > + size = 768; > + if (size > page_size) > + size = page_size; > + size1 = size; > + size2 = size; > + size_t i = random (); > + if (i & 3) > + size -= 256; > + if (i & 1) > + size1 -= 256; > + if (i & 2) > + size2 -= 256; > + if (i & 4) > + { > + len = random () % size; > + align1 = size1 - len - (random () & 31); > + align2 = size2 - len - (random () & 31); > + if (align1 > size1) > + align1 = 0; > + if (align2 > size2) > + align2 = 0; > + } > + else > + { > + align1 = random () & 63; > + align2 = random () & 63; > + len = random () % size; > + if (align1 + len > size1) > + align1 = size1 - len; > + if (align2 + len > size2) > + align2 = size2 - len; > + } > + } > + unsigned char *p1 = buf1 + page_size - size1; > + unsigned char *p2 = buf2 + page_size - size2; > + size_t j = align1 + len + 256; > + if (j > size1) > + j = size1; > + for (size_t i = 0; i < j; ++i) > + ref1[i] = p1[i] = random () & 255; > + > + j = align2 + len + 256; > + if (j > size2) > + j = size2; > + > + for (size_t i = 0; i < j; ++i) > + ref2[i] = p2[i] = random () & 255; > + > + do_one_test (p1 + align1, ref1 + align1, p2 + align2, ref2 + align2, len); > + } > +} > + > +static int > +test_main (void) > +{ > + test_init (); > + /* Use the start of buf1 for reference buffers. */ > + ref1 = buf1; > + ref2 = buf1 + page_size; > + buf1 = ref2 + page_size; > + > + printf ("%23s", ""); > + printf ("\t__memswap\n"); > + > + for (size_t i = 0; i < 18; ++i) > + { > + do_test (0, 0, 1 << i); > + do_test (i, 0, 1 << i); > + do_test (0, i, 1 << i); > + do_test (i, i, 1 << i); > + } > + > + for (size_t i = 0; i < 32; ++i) > + { > + do_test (0, 0, i); > + do_test (i, 0, i); > + do_test (0, i, i); > + do_test (i, i, i); > + } > + > + for (size_t i = 3; i < 32; ++i) > + { > + if ((i & (i - 1)) == 0) > + continue; > + do_test (0, 0, 16 * i); > + do_test (i, 0, 16 * i); > + do_test (0, i, 16 * i); > + do_test (i, i, 16 * i); > + } > + > + for (size_t i = 19; i <= 25; ++i) > + { > + do_test (255, 0, 1 << i); > + do_test (0, 4000, 1 << i); > + do_test (0, 255, i); > + do_test (0, 4000, i); > + } > + > + do_test (0, 0, getpagesize ()); > + > + do_random_tests (); > + > + return 0; > +} > + > +#include <support/test-driver.c> > -- > 2.34.1 >
On Thu, Sep 14, 2023 at 3:37 PM Adhemerval Zanella < adhemerval.zanella@linaro.org> wrote: > The prototype is: > > void __memswap (void *restrict p1, void *restrict p2, size_t n) > > The function swaps the content of two memory blocks P1 and P2 of > len N. Memory overlap is NOT handled. > > It will be used on qsort optimization. > > Checked on x86_64-linux-gnu and aarch64-linux-gnu. > --- > include/string.h | 3 + > string/Makefile | 13 +++ > string/memswap.c | 41 +++++++++ > string/test-memswap.c | 191 ++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 248 insertions(+) > create mode 100644 string/memswap.c > create mode 100644 string/test-memswap.c > > diff --git a/include/string.h b/include/string.h > index 86d1fa4abe..f31a7f6006 100644 > --- a/include/string.h > +++ b/include/string.h > @@ -45,6 +45,9 @@ extern void *__memrchr (const void *__s, int __c, size_t > __n) > extern void *__memchr (const void *__s, int __c, size_t __n) > __attribute_pure__; > > +extern void __memswap (void *restrict __p1, void *restrict __p2, size_t > __n) > + attribute_hidden; > + > > +++ b/string/memswap.c > @ > An internal inline function should be just fine no ? compiler's are smarter nowadays.
On 15/09/23 11:36, Noah Goldstein wrote: >> + >> +void >> +__memswap (void *restrict p1, void *restrict p2, size_t n) >> +{ >> + /* Use multiple small memcpys with constant size to enable inlining on most >> + targets. */ >> + enum { SWAP_GENERIC_SIZE = 32 }; >> + unsigned char tmp[SWAP_GENERIC_SIZE]; >> + while (n > SWAP_GENERIC_SIZE) >> + { >> + memcpy (tmp, p1, SWAP_GENERIC_SIZE); >> + p1 = __mempcpy (p1, p2, SWAP_GENERIC_SIZE); >> + p2 = __mempcpy (p2, tmp, SWAP_GENERIC_SIZE); >> + n -= SWAP_GENERIC_SIZE; >> + } >> + while (n > 0) >> + { >> + unsigned char t = ((unsigned char *)p1)[--n]; >> + ((unsigned char *)p1)[n] = ((unsigned char *)p2)[n]; >> + ((unsigned char *)p2)[n] = t; >> + } >> +} > > Don't think it's a huge deal either way, > but would have implemented this as a static function in a header file like: > sysdeps/generic/memswap.h > > so it can be inlined. If arch maintainers needs to add ifunc/etc > they can always create symbols and just call them from the arch > header version. Indeed a inline function would be better in this case, I will change it.
diff --git a/include/string.h b/include/string.h index 86d1fa4abe..f31a7f6006 100644 --- a/include/string.h +++ b/include/string.h @@ -45,6 +45,9 @@ extern void *__memrchr (const void *__s, int __c, size_t __n) extern void *__memchr (const void *__s, int __c, size_t __n) __attribute_pure__; +extern void __memswap (void *restrict __p1, void *restrict __p2, size_t __n) + attribute_hidden; + extern void __bzero (void *__s, size_t __n) __THROW __nonnull ((1)); extern int __ffs (int __i) __attribute__ ((const)); diff --git a/string/Makefile b/string/Makefile index 8cdfd5b000..3d0a3d5682 100644 --- a/string/Makefile +++ b/string/Makefile @@ -69,6 +69,7 @@ routines := \ mempcpy \ memrchr \ memset \ + memswap \ rawmemchr \ sigabbrev_np \ sigdescr_np \ @@ -209,6 +210,18 @@ tests := \ tst-xbzero-opt \ # tests +tests-static-internal := \ + test-memswap \ +# tests-static-internal + +tests-internal := \ + $(tests-static-internal) \ + # tests-internal + +tests-static := \ + $(tests-static-internal) \ + # tests-static + # Both tests require the .mo translation files generated by msgfmt. tests-translation := \ tst-strerror \ diff --git a/string/memswap.c b/string/memswap.c new file mode 100644 index 0000000000..37640c47ad --- /dev/null +++ b/string/memswap.c @@ -0,0 +1,41 @@ +/* Swap the content of two memory blocks, overlap is NOT handled. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <string.h> + +void +__memswap (void *restrict p1, void *restrict p2, size_t n) +{ + /* Use multiple small memcpys with constant size to enable inlining on most + targets. */ + enum { SWAP_GENERIC_SIZE = 32 }; + unsigned char tmp[SWAP_GENERIC_SIZE]; + while (n > SWAP_GENERIC_SIZE) + { + memcpy (tmp, p1, SWAP_GENERIC_SIZE); + p1 = __mempcpy (p1, p2, SWAP_GENERIC_SIZE); + p2 = __mempcpy (p2, tmp, SWAP_GENERIC_SIZE); + n -= SWAP_GENERIC_SIZE; + } + while (n > 0) + { + unsigned char t = ((unsigned char *)p1)[--n]; + ((unsigned char *)p1)[n] = ((unsigned char *)p2)[n]; + ((unsigned char *)p2)[n] = t; + } +} diff --git a/string/test-memswap.c b/string/test-memswap.c new file mode 100644 index 0000000000..7696c43711 --- /dev/null +++ b/string/test-memswap.c @@ -0,0 +1,191 @@ +/* Test and measure memcpy functions. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <support/check.h> + +#define TEST_MAIN +#define BUF1PAGES 3 +#include "test-string.h" + +static unsigned char *ref1; +static unsigned char *ref2; + +static void +do_one_test (unsigned char *p1, unsigned char *ref1, unsigned char *p2, + unsigned char *ref2, size_t len) +{ + __memswap (p1, p2, len); + + TEST_COMPARE_BLOB (p1, len, ref2, len); + TEST_COMPARE_BLOB (p2, len, ref1, len); +} + +static inline void +do_test (size_t align1, size_t align2, size_t len) +{ + align1 &= page_size; + if (align1 + len >= page_size) + return; + + align2 &= page_size; + if (align2 + len >= page_size) + return; + + unsigned char *p1 = buf1 + align1; + unsigned char *p2 = buf2 + align2; + for (size_t repeats = 0; repeats < 2; ++repeats) + { + size_t i, j; + for (i = 0, j = 1; i < len; i++, j += 23) + { + ref1[i] = p1[i] = j; + ref2[i] = p2[i] = UCHAR_MAX - j; + } + + do_one_test (p1, ref1, p2, ref2, len); + } +} + +static void +do_random_tests (void) +{ + for (size_t n = 0; n < ITERATIONS; n++) + { + size_t len, size, size1, size2, align1, align2; + + if (n == 0) + { + len = getpagesize (); + size = len + 512; + size1 = size; + size2 = size; + align1 = 512; + align2 = 512; + } + else + { + if ((random () & 255) == 0) + size = 65536; + else + size = 768; + if (size > page_size) + size = page_size; + size1 = size; + size2 = size; + size_t i = random (); + if (i & 3) + size -= 256; + if (i & 1) + size1 -= 256; + if (i & 2) + size2 -= 256; + if (i & 4) + { + len = random () % size; + align1 = size1 - len - (random () & 31); + align2 = size2 - len - (random () & 31); + if (align1 > size1) + align1 = 0; + if (align2 > size2) + align2 = 0; + } + else + { + align1 = random () & 63; + align2 = random () & 63; + len = random () % size; + if (align1 + len > size1) + align1 = size1 - len; + if (align2 + len > size2) + align2 = size2 - len; + } + } + unsigned char *p1 = buf1 + page_size - size1; + unsigned char *p2 = buf2 + page_size - size2; + size_t j = align1 + len + 256; + if (j > size1) + j = size1; + for (size_t i = 0; i < j; ++i) + ref1[i] = p1[i] = random () & 255; + + j = align2 + len + 256; + if (j > size2) + j = size2; + + for (size_t i = 0; i < j; ++i) + ref2[i] = p2[i] = random () & 255; + + do_one_test (p1 + align1, ref1 + align1, p2 + align2, ref2 + align2, len); + } +} + +static int +test_main (void) +{ + test_init (); + /* Use the start of buf1 for reference buffers. */ + ref1 = buf1; + ref2 = buf1 + page_size; + buf1 = ref2 + page_size; + + printf ("%23s", ""); + printf ("\t__memswap\n"); + + for (size_t i = 0; i < 18; ++i) + { + do_test (0, 0, 1 << i); + do_test (i, 0, 1 << i); + do_test (0, i, 1 << i); + do_test (i, i, 1 << i); + } + + for (size_t i = 0; i < 32; ++i) + { + do_test (0, 0, i); + do_test (i, 0, i); + do_test (0, i, i); + do_test (i, i, i); + } + + for (size_t i = 3; i < 32; ++i) + { + if ((i & (i - 1)) == 0) + continue; + do_test (0, 0, 16 * i); + do_test (i, 0, 16 * i); + do_test (0, i, 16 * i); + do_test (i, i, 16 * i); + } + + for (size_t i = 19; i <= 25; ++i) + { + do_test (255, 0, 1 << i); + do_test (0, 4000, 1 << i); + do_test (0, 255, i); + do_test (0, 4000, i); + } + + do_test (0, 0, getpagesize ()); + + do_random_tests (); + + return 0; +} + +#include <support/test-driver.c>