benchtests: Switch string benchmarks to use bench-timing.h.

Message ID 52249F08.6060002@linaro.org
State Accepted
Headers show

Commit Message

Will Newton Sept. 2, 2013, 2:22 p.m.
Switch the string benchmarks to using bench-timing.h instead
of hp-timing.h directly. This allows the string benchmarks to
be run usefully on architectures such as ARM that do not have
support for hp-timing.h.

In order to do this the tests have been changed from timing each
individual call and picking the lowest execution time recorded to
timing a number of calls and taking the mean execution time.

ChangeLog:

2013-09-02   Will Newton  <will.newton@linaro.org>

	* benchtests/bench-timing.h (TIMING_PRINT_MEAN): New macro.
	* benchtests/bench-string.h: Include bench-timing.h instead
	of including hp-timing.h directly. (INNER_LOOP_ITERS): New
	define. (HP_TIMING_BEST): Delete macro. (test_init): Remove
	call to HP_TIMING_DIFF_INIT.
	* benchtests/bench-memccpy.c: Use bench-timing.h macros
	instead of hp-timing.h macros.
	* benchtests/bench-memchr.c: Likewise.
	* benchtests/bench-memcmp.c: Likewise.
	* benchtests/bench-memcpy.c: Likewise.
	* benchtests/bench-memmem.c: Likewise.
	* benchtests/bench-memmove.c: Likewise.
	* benchtests/bench-memset.c: Likewise.
	* benchtests/bench-rawmemchr.c: Likewise.
	* benchtests/bench-strcasecmp.c: Likewise.
	* benchtests/bench-strcasestr.c: Likewise.
	* benchtests/bench-strcat.c: Likewise.
	* benchtests/bench-strchr.c: Likewise.
	* benchtests/bench-strcmp.c: Likewise.
	* benchtests/bench-strcpy.c: Likewise.
	* benchtests/bench-strcpy_chk.c: Likewise.
	* benchtests/bench-strlen.c: Likewise.
	* benchtests/bench-strncasecmp.c: Likewise.
	* benchtests/bench-strncat.c: Likewise.
	* benchtests/bench-strncmp.c: Likewise.
	* benchtests/bench-strncpy.c: Likewise.
	* benchtests/bench-strnlen.c: Likewise.
	* benchtests/bench-strpbrk.c: Likewise.
	* benchtests/bench-strrchr.c: Likewise.
	* benchtests/bench-strspn.c: Likewise.
	* benchtests/bench-strstr.c: Likewise.
---
 benchtests/bench-memccpy.c     | 32 +++++++++++++-----------------
 benchtests/bench-memchr.c      | 32 +++++++++++++-----------------
 benchtests/bench-memcmp.c      | 32 +++++++++++++-----------------
 benchtests/bench-memcpy.c      | 32 +++++++++++++-----------------
 benchtests/bench-memmem.c      | 36 ++++++++++++++--------------------
 benchtests/bench-memmove.c     | 34 ++++++++++++++------------------
 benchtests/bench-memset.c      | 32 ++++++++++++------------------
 benchtests/bench-rawmemchr.c   | 31 ++++++++++++-----------------
 benchtests/bench-strcasecmp.c  | 31 ++++++++++++-----------------
 benchtests/bench-strcasestr.c  | 32 +++++++++++++-----------------
 benchtests/bench-strcat.c      | 35 ++++++++++++++-------------------
 benchtests/bench-strchr.c      | 34 ++++++++++++++------------------
 benchtests/bench-strcmp.c      | 32 +++++++++++++-----------------
 benchtests/bench-strcpy.c      | 30 ++++++++++++----------------
 benchtests/bench-strcpy_chk.c  | 28 ++++++++++++---------------
 benchtests/bench-string.h      | 15 +++-----------
 benchtests/bench-strlen.c      | 33 +++++++++++++------------------
 benchtests/bench-strncasecmp.c | 32 +++++++++++++-----------------
 benchtests/bench-strncat.c     | 36 ++++++++++++++--------------------
 benchtests/bench-strncmp.c     | 44 ++++++++++++++++--------------------------
 benchtests/bench-strncpy.c     | 30 ++++++++++++----------------
 benchtests/bench-strnlen.c     | 33 +++++++++++++------------------
 benchtests/bench-strpbrk.c     | 30 ++++++++++++----------------
 benchtests/bench-strrchr.c     | 32 +++++++++++++-----------------
 benchtests/bench-strspn.c      | 33 +++++++++++++------------------
 benchtests/bench-strstr.c      | 32 +++++++++++++-----------------
 benchtests/bench-timing.h      |  3 +++
 27 files changed, 334 insertions(+), 502 deletions(-)

Comments

Siddhesh Poyarekar Sept. 2, 2013, 2:35 p.m. | #1
On Mon, Sep 02, 2013 at 03:22:00PM +0100, Will Newton wrote:
> 
> Switch the string benchmarks to using bench-timing.h instead
> of hp-timing.h directly. This allows the string benchmarks to
> be run usefully on architectures such as ARM that do not have
> support for hp-timing.h.
> 
> In order to do this the tests have been changed from timing each
> individual call and picking the lowest execution time recorded to
> timing a number of calls and taking the mean execution time.
> 
> ChangeLog:
> 
> 2013-09-02   Will Newton  <will.newton@linaro.org>
> 
> 	* benchtests/bench-timing.h (TIMING_PRINT_MEAN): New macro.
> 	* benchtests/bench-string.h: Include bench-timing.h instead
> 	of including hp-timing.h directly. (INNER_LOOP_ITERS): New
> 	define. (HP_TIMING_BEST): Delete macro. (test_init): Remove
> 	call to HP_TIMING_DIFF_INIT.
> 	* benchtests/bench-memccpy.c: Use bench-timing.h macros
> 	instead of hp-timing.h macros.
> 	* benchtests/bench-memchr.c: Likewise.
> 	* benchtests/bench-memcmp.c: Likewise.
> 	* benchtests/bench-memcpy.c: Likewise.
> 	* benchtests/bench-memmem.c: Likewise.
> 	* benchtests/bench-memmove.c: Likewise.
> 	* benchtests/bench-memset.c: Likewise.
> 	* benchtests/bench-rawmemchr.c: Likewise.
> 	* benchtests/bench-strcasecmp.c: Likewise.
> 	* benchtests/bench-strcasestr.c: Likewise.
> 	* benchtests/bench-strcat.c: Likewise.
> 	* benchtests/bench-strchr.c: Likewise.
> 	* benchtests/bench-strcmp.c: Likewise.
> 	* benchtests/bench-strcpy.c: Likewise.
> 	* benchtests/bench-strcpy_chk.c: Likewise.
> 	* benchtests/bench-strlen.c: Likewise.
> 	* benchtests/bench-strncasecmp.c: Likewise.
> 	* benchtests/bench-strncat.c: Likewise.
> 	* benchtests/bench-strncmp.c: Likewise.
> 	* benchtests/bench-strncpy.c: Likewise.
> 	* benchtests/bench-strnlen.c: Likewise.
> 	* benchtests/bench-strpbrk.c: Likewise.
> 	* benchtests/bench-strrchr.c: Likewise.
> 	* benchtests/bench-strspn.c: Likewise.
> 	* benchtests/bench-strstr.c: Likewise.
> ---
>  benchtests/bench-memccpy.c     | 32 +++++++++++++-----------------
>  benchtests/bench-memchr.c      | 32 +++++++++++++-----------------
>  benchtests/bench-memcmp.c      | 32 +++++++++++++-----------------
>  benchtests/bench-memcpy.c      | 32 +++++++++++++-----------------
>  benchtests/bench-memmem.c      | 36 ++++++++++++++--------------------
>  benchtests/bench-memmove.c     | 34 ++++++++++++++------------------
>  benchtests/bench-memset.c      | 32 ++++++++++++------------------
>  benchtests/bench-rawmemchr.c   | 31 ++++++++++++-----------------
>  benchtests/bench-strcasecmp.c  | 31 ++++++++++++-----------------
>  benchtests/bench-strcasestr.c  | 32 +++++++++++++-----------------
>  benchtests/bench-strcat.c      | 35 ++++++++++++++-------------------
>  benchtests/bench-strchr.c      | 34 ++++++++++++++------------------
>  benchtests/bench-strcmp.c      | 32 +++++++++++++-----------------
>  benchtests/bench-strcpy.c      | 30 ++++++++++++----------------
>  benchtests/bench-strcpy_chk.c  | 28 ++++++++++++---------------
>  benchtests/bench-string.h      | 15 +++-----------
>  benchtests/bench-strlen.c      | 33 +++++++++++++------------------
>  benchtests/bench-strncasecmp.c | 32 +++++++++++++-----------------
>  benchtests/bench-strncat.c     | 36 ++++++++++++++--------------------
>  benchtests/bench-strncmp.c     | 44 ++++++++++++++++--------------------------
>  benchtests/bench-strncpy.c     | 30 ++++++++++++----------------
>  benchtests/bench-strnlen.c     | 33 +++++++++++++------------------
>  benchtests/bench-strpbrk.c     | 30 ++++++++++++----------------
>  benchtests/bench-strrchr.c     | 32 +++++++++++++-----------------
>  benchtests/bench-strspn.c      | 33 +++++++++++++------------------
>  benchtests/bench-strstr.c      | 32 +++++++++++++-----------------
>  benchtests/bench-timing.h      |  3 +++
>  27 files changed, 334 insertions(+), 502 deletions(-)
> 
> diff --git a/benchtests/bench-memccpy.c b/benchtests/bench-memccpy.c
> index 612513c..2c47e79 100644
> --- a/benchtests/bench-memccpy.c
> +++ b/benchtests/bench-memccpy.c
> @@ -59,6 +59,9 @@ do_one_test (impl_t *impl, void *dst, const void *src, int c, size_t len,
>  	     size_t n)
>  {
>    void *expect = len > n ? NULL : (char *) dst + len;
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (CALL (impl, dst, src, c, n) != expect)
>      {
>        error (0, 0, "Wrong result in function %s %p %p", impl->name,
> @@ -74,23 +77,16 @@ do_one_test (impl_t *impl, void *dst, const void *src, int c, size_t len,
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute__ ((unused));
> -      hp_timing_t stop __attribute__ ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, dst, src, c, n);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, dst, src, c, n);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -121,14 +117,12 @@ do_test (size_t align1, size_t align2, int c, size_t len, size_t n,
>    for (i = len; i + align1 < page_size && i < len + 64; ++i)
>      s1[i] = 32 + 32 * i % (max_char - 32);
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, n %4zd, char %d, alignment %2zd/%2zd:", len, n, c, align1, align2);
> +  printf ("Length %4zd, n %4zd, char %d, alignment %2zd/%2zd:", len, n, c, align1, align2);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s2, s1, c, len, n);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-memchr.c b/benchtests/bench-memchr.c
> index 5470ce6..db099ad 100644
> --- a/benchtests/bench-memchr.c
> +++ b/benchtests/bench-memchr.c
> @@ -39,6 +39,9 @@ static void
>  do_one_test (impl_t *impl, const char *s, int c, size_t n, char *exp_res)
>  {
>    char *res = CALL (impl, s, c, n);
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (res != exp_res)
>      {
>        error (0, 0, "Wrong result in function %s %p %p", impl->name,
> @@ -47,23 +50,16 @@ do_one_test (impl_t *impl, const char *s, int c, size_t n, char *exp_res)
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s, c, n);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, s, c, n);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -96,14 +92,12 @@ do_test (size_t align, size_t pos, size_t len, int seek_char)
>        buf1[align + len] = seek_char;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd:", pos, align);
> +  printf ("Length %4zd, alignment %2zd:", pos, align);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, (char *) (buf1 + align), seek_char, len, result);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-memcmp.c b/benchtests/bench-memcmp.c
> index 053bb50..544130b 100644
> --- a/benchtests/bench-memcmp.c
> +++ b/benchtests/bench-memcmp.c
> @@ -78,23 +78,19 @@ static void
>  do_one_test (impl_t *impl, const CHAR *s1, const CHAR *s2, size_t len,
>  	     int exp_result)
>  {
> -  if (HP_TIMING_AVAIL)
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s1, s2, len);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, s1, s2, len);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -124,14 +120,12 @@ do_test (size_t align1, size_t align2, size_t len, int exp_result)
>    s2[len] = align2;
>    s2[len - 1] -= exp_result;
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> +  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s1, s2, len, exp_result);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-memcpy.c b/benchtests/bench-memcpy.c
> index 1b12671..8cd9c23 100644
> --- a/benchtests/bench-memcpy.c
> +++ b/benchtests/bench-memcpy.c
> @@ -52,6 +52,9 @@ static void
>  do_one_test (impl_t *impl, char *dst, const char *src,
>  	     size_t len)
>  {
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (CALL (impl, dst, src, len) != MEMCPY_RESULT (dst, len))
>      {
>        error (0, 0, "Wrong result in function %s %p %p", impl->name,
> @@ -68,23 +71,16 @@ do_one_test (impl_t *impl, char *dst, const char *src,
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, dst, src, len);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, dst, src, len);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -107,14 +103,12 @@ do_test (size_t align1, size_t align2, size_t len)
>    for (i = 0, j = 1; i < len; i++, j += 23)
>      s1[i] = j;
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> +  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s2, s1, len);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-memmem.c b/benchtests/bench-memmem.c
> index ca758a8..b8f8a8b 100644
> --- a/benchtests/bench-memmem.c
> +++ b/benchtests/bench-memmem.c
> @@ -60,23 +60,19 @@ static void
>  do_one_test (impl_t *impl, const void *haystack, size_t haystack_len,
>  	     const void *needle, size_t needle_len, const void *expected)
>  {
> -  if (HP_TIMING_AVAIL)
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> +      CALL (impl, haystack, haystack_len, needle, needle_len);
> +    }
> +  TIMING_NOW (stop);
> 
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, haystack, haystack_len, needle, needle_len);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> +  TIMING_DIFF (cur, start, stop);
> 
> -      printf ("\t%zd", (size_t) best_time);
> -    }
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -87,16 +83,14 @@ do_test (const char *str, size_t len, size_t idx)
>    memcpy (tmpbuf, buf1 + idx, len);
>    memcpy (buf1 + idx, str, len);
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("String %s, offset %zd:", str, idx);
> +  printf ("String %s, offset %zd:", str, idx);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, buf1, BUF1PAGES * page_size, str, len, buf1 + idx);
> 
>    memcpy (buf1 + idx, tmpbuf, len);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  static void
> @@ -120,15 +114,13 @@ do_random_tests (void)
>  	  buf1[idx + off] = ch;
>  	}
> 
> -      if (HP_TIMING_AVAIL)
> -	printf ("String %.*s, offset %zd:", (int) len, buf1 + idx, idx);
> +      printf ("String %.*s, offset %zd:", (int) len, buf1 + idx, idx);
> 
>        FOR_EACH_IMPL (impl, 0)
>  	do_one_test (impl, buf1, BUF1PAGES * page_size, buf1 + idx, len,
>  		     buf1 + idx);
> 
> -      if (HP_TIMING_AVAIL)
> -	putchar ('\n');
> +      putchar ('\n');
> 
>        memcpy (buf1 + idx, tmpbuf, len);
>      }
> diff --git a/benchtests/bench-memmove.c b/benchtests/bench-memmove.c
> index 8925606..332d6af 100644
> --- a/benchtests/bench-memmove.c
> +++ b/benchtests/bench-memmove.c
> @@ -67,6 +67,9 @@ static void
>  do_one_test (impl_t *impl, char *dst, char *src, const char *orig_src,
>  	     size_t len)
>  {
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    memcpy (src, orig_src, len);
>  #ifdef TEST_BCOPY
>    CALL (impl, src, dst, len);
> @@ -91,27 +94,20 @@ do_one_test (impl_t *impl, char *dst, char *src, const char *orig_src,
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
>  #ifdef TEST_BCOPY
> -	  CALL (impl, src, dst, len);
> +      CALL (impl, src, dst, len);
>  #else
> -	  CALL (impl, dst, src, len);
> +      CALL (impl, dst, src, len);
>  #endif
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -134,14 +130,12 @@ do_test (size_t align1, size_t align2, size_t len)
>    for (i = 0, j = 1; i < len; i++, j += 23)
>      s1[i] = j;
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> +  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s2, (char *) (buf2 + align1), s1, len);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-memset.c b/benchtests/bench-memset.c
> index ea29cf3..e45807c 100644
> --- a/benchtests/bench-memset.c
> +++ b/benchtests/bench-memset.c
> @@ -75,6 +75,8 @@ simple_memset (char *s, int c, size_t n)
>  static void
>  do_one_test (impl_t *impl, char *s, int c __attribute ((unused)), size_t n)
>  {
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
>    char tstbuf[n];
>  #ifdef TEST_BZERO
>    simple_bzero (tstbuf, n);
> @@ -92,28 +94,20 @@ do_one_test (impl_t *impl, char *s, int c __attribute ((unused)), size_t n)
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
>  #ifdef TEST_BZERO
> -	  CALL (impl, s, n);
> +      CALL (impl, s, n);
>  #else
> -	  CALL (impl, s, c, n);
> +      CALL (impl, s, c, n);
>  #endif
> +    }
> +  TIMING_NOW (stop);
> 
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> +  TIMING_DIFF (cur, start, stop);
> 
> -      printf ("\t%zd", (size_t) best_time);
> -    }
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -123,14 +117,12 @@ do_test (size_t align, int c, size_t len)
>    if (align + len > page_size)
>      return;
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd, c %2d:", len, align, c);
> +  printf ("Length %4zd, alignment %2zd, c %2d:", len, align, c);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, (char *) buf1 + align, c, len);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-rawmemchr.c b/benchtests/bench-rawmemchr.c
> index a6b29d7..df6a310 100644
> --- a/benchtests/bench-rawmemchr.c
> +++ b/benchtests/bench-rawmemchr.c
> @@ -40,6 +40,8 @@ simple_rawmemchr (const char *s, int c)
>  static void
>  do_one_test (impl_t *impl, const char *s, int c, char *exp_res)
>  {
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
>    char *res = CALL (impl, s, c);
>    if (res != exp_res)
>      {
> @@ -49,23 +51,16 @@ do_one_test (impl_t *impl, const char *s, int c, char *exp_res)
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s, c);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, s, c);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -92,14 +87,12 @@ do_test (size_t align, size_t pos, size_t len, int seek_char)
>    buf1[align + len] = -seek_char;
>    result = (char *) (buf1 + align + pos);
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd:", pos, align);
> +  printf ("Length %4zd, alignment %2zd:", pos, align);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, (char *) (buf1 + align), seek_char, result);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strcasecmp.c b/benchtests/bench-strcasecmp.c
> index 27250bb..1458df1 100644
> --- a/benchtests/bench-strcasecmp.c
> +++ b/benchtests/bench-strcasecmp.c
> @@ -62,6 +62,8 @@ stupid_strcasecmp (const char *s1, const char *s2)
>  static void
>  do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result)
>  {
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
>    int result = CALL (impl, s1, s2);
>    if ((exp_result == 0 && result != 0)
>        || (exp_result < 0 && result >= 0)
> @@ -73,23 +75,16 @@ do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result)
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s1, s2);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, s1, s2);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -128,14 +123,12 @@ do_test (size_t align1, size_t align2, size_t len, int max_char,
>    else
>      s2[len - 1] -= exp_result;
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> +  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s1, s2, exp_result);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strcasestr.c b/benchtests/bench-strcasestr.c
> index 289b490..68b7e95 100644
> --- a/benchtests/bench-strcasestr.c
> +++ b/benchtests/bench-strcasestr.c
> @@ -60,23 +60,19 @@ IMPL (strcasestr, 1)
>  static void
>  do_one_test (impl_t *impl, const char *s1, const char *s2, char *exp_result)
>  {
> -  if (HP_TIMING_AVAIL)
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~(hp_timing_t) 0;
> -      size_t i;
> +      CALL (impl, s1, s2);
> +    }
> +  TIMING_NOW (stop);
> 
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s1, s2);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> +  TIMING_DIFF (cur, start, stop);
> 
> -      printf ("\t%zd", (size_t) best_time);
> -    }
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
> 
> @@ -116,15 +112,13 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2,
>      }
>    s1[len1] = '\0';
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:",
> -	    len1, len2, align1, align2, fail ? "fail" : "found");
> +  printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:",
> +	  len1, len2, align1, align2, fail ? "fail" : "found");
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s1, s2, fail ? NULL : s1 + len1 - len2);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  static int
> diff --git a/benchtests/bench-strcat.c b/benchtests/bench-strcat.c
> index b70a272..6602009 100644
> --- a/benchtests/bench-strcat.c
> +++ b/benchtests/bench-strcat.c
> @@ -39,7 +39,9 @@ simple_strcat (char *dst, const char *src)
>  static void
>  do_one_test (impl_t *impl, char *dst, const char *src)
>  {
> -  size_t k = strlen (dst);
> +  size_t k = strlen (dst), i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (CALL (impl, dst, src) != dst)
>      {
>        error (0, 0, "Wrong result in function %s %p %p", impl->name,
> @@ -56,24 +58,17 @@ do_one_test (impl_t *impl, char *dst, const char *src)
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  dst[k] = '\0';
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, dst, src);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      dst[k] = '\0';
> +      CALL (impl, dst, src);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -100,8 +95,7 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2, int max_char)
>    for (i = 0; i < len2; i++)
>      s2[i] = 32 + 23 * i % (max_char - 32);
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd/%4zd, alignment %2zd/%2zd:", len1, len2, align1, align2);
> +  printf ("Length %4zd/%4zd, alignment %2zd/%2zd:", len1, len2, align1, align2);
> 
>    FOR_EACH_IMPL (impl, 0)
>      {
> @@ -109,8 +103,7 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2, int max_char)
>        do_one_test (impl, s2, s1);
>      }
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strchr.c b/benchtests/bench-strchr.c
> index 710b592..d432ba5 100644
> --- a/benchtests/bench-strchr.c
> +++ b/benchtests/bench-strchr.c
> @@ -89,23 +89,19 @@ IMPL (STRCHR, 1)
>  static void
>  do_one_test (impl_t *impl, const CHAR *s, int c, const CHAR *exp_res)
>  {
> -  if (HP_TIMING_AVAIL)
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s, c);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, s, c);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -141,15 +137,13 @@ do_test (size_t align, size_t pos, size_t len, int seek_char, int max_char)
>    else
>      result = NULLRET (buf + align + len);
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment in bytes %2zd:",
> -	    pos, align * sizeof (CHAR));
> +  printf ("Length %4zd, alignment in bytes %2zd:",
> +	  pos, align * sizeof (CHAR));
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, buf + align, seek_char, result);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strcmp.c b/benchtests/bench-strcmp.c
> index 63a3cd4..c1e0b26 100644
> --- a/benchtests/bench-strcmp.c
> +++ b/benchtests/bench-strcmp.c
> @@ -138,23 +138,19 @@ do_one_test (impl_t *impl,
>  	     const CHAR *s1, const CHAR *s2,
>  	     int exp_result)
>  {
> -  if (HP_TIMING_AVAIL)
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s1, s2);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, s1, s2);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -190,14 +186,12 @@ do_test (size_t align1, size_t align2, size_t len, int max_char,
>    s2[len + 1] = 24 + exp_result;
>    s2[len - 1] -= exp_result;
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> +  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s1, s2, exp_result);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strcpy.c b/benchtests/bench-strcpy.c
> index 4e024d4..88db83b 100644
> --- a/benchtests/bench-strcpy.c
> +++ b/benchtests/bench-strcpy.c
> @@ -74,6 +74,9 @@ static void
>  do_one_test (impl_t *impl, CHAR *dst, const CHAR *src,
>  	     size_t len __attribute__((unused)))
>  {
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (CALL (impl, dst, src) != STRCPY_RESULT (dst, len))
>      {
>        error (0, 0, "Wrong result in function %s %p %p", impl->name,
> @@ -91,23 +94,16 @@ do_one_test (impl_t *impl, CHAR *dst, const CHAR *src,
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));;
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
>  	  CALL (impl, dst, src);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -133,14 +129,12 @@ do_test (size_t align1, size_t align2, size_t len, int max_char)
>      s1[i] = 32 + 23 * i % (max_char - 32);
>    s1[len] = 0;
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignments in bytes %2zd/%2zd:", len, align1 * sizeof(CHAR), align2 * sizeof(CHAR));
> +  printf ("Length %4zd, alignments in bytes %2zd/%2zd:", len, align1 * sizeof(CHAR), align2 * sizeof(CHAR));
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s2, s1, len);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strcpy_chk.c b/benchtests/bench-strcpy_chk.c
> index 29e5728..3c2a0b3 100644
> --- a/benchtests/bench-strcpy_chk.c
> +++ b/benchtests/bench-strcpy_chk.c
> @@ -75,6 +75,9 @@ do_one_test (impl_t *impl, char *dst, const char *src,
>  	     size_t len, size_t dlen)
>  {
>    char *res;
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (dlen <= len)
>      {
>        if (impl->test == 1)
> @@ -110,23 +113,16 @@ do_one_test (impl_t *impl, char *dst, const char *src,
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));;
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> +      CALL (impl, dst, src, dlen);
> +    }
> +  TIMING_NOW (stop);
> 
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, dst, src, dlen);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> +  TIMING_DIFF (cur, start, stop);
> 
> -      printf ("\t%zd", (size_t) best_time);
> -    }
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -150,13 +146,13 @@ do_test (size_t align1, size_t align2, size_t len, size_t dlen, int max_char)
>      s1[i] = 32 + 23 * i % (max_char - 32);
>    s1[len] = 0;
> 
> -  if (HP_TIMING_AVAIL && dlen > len)
> +  if (dlen > len)
>      printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s2, s1, len, dlen);
> 
> -  if (HP_TIMING_AVAIL && dlen > len)
> +  if (dlen > len)
>      putchar ('\n');
>  }
> 
> diff --git a/benchtests/bench-string.h b/benchtests/bench-string.h
> index 2fe8d9f..53e796a 100644
> --- a/benchtests/bench-string.h
> +++ b/benchtests/bench-string.h
> @@ -52,7 +52,7 @@ extern impl_t __start_impls[], __stop_impls[];
>  # include <ifunc-impl-list.h>
>  # define GL(x) _##x
>  # define GLRO(x) _##x
> -# include <hp-timing.h>
> +# include "bench-timing.h"
> 
> 
>  # define TEST_FUNCTION test_main ()
> @@ -61,6 +61,8 @@ extern impl_t __start_impls[], __stop_impls[];
>  # define OPT_RANDOM 10001
>  # define OPT_SEED 10002
> 
> +# define INNER_LOOP_ITERS 64
> +

Maybe in future this could be determined dynamically on test
initialization.  That's an improvement for later though.  This patch
looks good to me.

Thanks,
Siddhesh

>  unsigned char *buf1, *buf2;
>  int ret, do_srandom;
>  unsigned int seed;
> @@ -158,16 +160,6 @@ static impl_t *impl_array;
>         if (!notall || impl->test)
>  # endif /* ! (defined TEST_IFUNC && defined TEST_NAME) */
> 
> -# define HP_TIMING_BEST(best_time, start, end)	\
> -    do									      \
> -      {									      \
> -	hp_timing_t tmptime;						      \
> -	HP_TIMING_DIFF (tmptime, start + _dl_hp_timing_overhead, end);	      \
> -	if (best_time > tmptime)					      \
> -	  best_time = tmptime;						      \
> -      }									      \
> -    while (0)
> -
>  # ifndef BUF1PAGES
>  #  define BUF1PAGES 1
>  # endif
> @@ -198,7 +190,6 @@ test_init (void)
>      error (EXIT_FAILURE, errno, "mmap failed");
>    if (mprotect (buf2 + page_size, page_size, PROT_NONE))
>      error (EXIT_FAILURE, errno, "mprotect failed");
> -  HP_TIMING_DIFF_INIT ();
>    if (do_srandom)
>      {
>        printf ("Setting seed to 0x%x\n", seed);
> diff --git a/benchtests/bench-strlen.c b/benchtests/bench-strlen.c
> index 63b1e93..44c9c2b 100644
> --- a/benchtests/bench-strlen.c
> +++ b/benchtests/bench-strlen.c
> @@ -62,7 +62,9 @@ IMPL (STRLEN, 1)
>  static void
>  do_one_test (impl_t *impl, const CHAR *s, size_t exp_len)
>  {
> -  size_t len = CALL (impl, s);
> +  size_t len = CALL (impl, s), i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (len != exp_len)
>      {
>        error (0, 0, "Wrong result in function %s %zd %zd", impl->name,
> @@ -71,23 +73,16 @@ do_one_test (impl_t *impl, const CHAR *s, size_t exp_len)
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, s);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -105,14 +100,12 @@ do_test (size_t align, size_t len)
>      buf[align + i] = 1 + 11111 * i % MAX_CHAR;
>    buf[align + len] = 0;
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd:", len, align);
> +  printf ("Length %4zd, alignment %2zd:", len, align);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, (CHAR *) (buf + align), len);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strncasecmp.c b/benchtests/bench-strncasecmp.c
> index 5fa9220..9badd05 100644
> --- a/benchtests/bench-strncasecmp.c
> +++ b/benchtests/bench-strncasecmp.c
> @@ -73,23 +73,19 @@ static void
>  do_one_test (impl_t *impl, const char *s1, const char *s2, size_t n,
>  	     int exp_result)
>  {
> -  if (HP_TIMING_AVAIL)
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s1, s2, n);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, s1, s2, n);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -128,14 +124,12 @@ do_test (size_t align1, size_t align2, size_t n, size_t len, int max_char,
>    else
>      s2[len - 1] -= exp_result;
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> +  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s1, s2, n, exp_result);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strncat.c b/benchtests/bench-strncat.c
> index 904daa7..2a17817 100644
> --- a/benchtests/bench-strncat.c
> +++ b/benchtests/bench-strncat.c
> @@ -43,7 +43,9 @@ stupid_strncat (char *dst, const char *src, size_t n)
>  static void
>  do_one_test (impl_t *impl, char *dst, const char *src, size_t n)
>  {
> -  size_t k = strlen (dst);
> +  size_t k = strlen (dst), i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (CALL (impl, dst, src, n) != dst)
>      {
>        error (0, 0, "Wrong result in function %s %p != %p", impl->name,
> @@ -67,24 +69,18 @@ do_one_test (impl_t *impl, char *dst, const char *src, size_t n)
>        ret = 1;
>        return;
>      }
> -  if (HP_TIMING_AVAIL)
> +
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> +      dst[k] = '\0';
> +      CALL (impl, dst, src, n);
> +    }
> +  TIMING_NOW (stop);
> 
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  dst[k] = '\0';
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, dst, src, n);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> +  TIMING_DIFF (cur, start, stop);
> 
> -      printf ("\t%zd", (size_t) best_time);
> -    }
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -114,9 +110,8 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2,
>    for (i = 0; i < len2; i++)
>      s2[i] = 32 + 23 * i % (max_char - 32);
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd/%4zd, alignment %2zd/%2zd, N %4zd:",
> -	    len1, len2, align1, align2, n);
> +  printf ("Length %4zd/%4zd, alignment %2zd/%2zd, N %4zd:",
> +	  len1, len2, align1, align2, n);
> 
>    FOR_EACH_IMPL (impl, 0)
>      {
> @@ -124,8 +119,7 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2,
>        do_one_test (impl, s2, s1, n);
>      }
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strncmp.c b/benchtests/bench-strncmp.c
> index b3af0f9..25df3db 100644
> --- a/benchtests/bench-strncmp.c
> +++ b/benchtests/bench-strncmp.c
> @@ -54,23 +54,19 @@ static void
>  do_one_test (impl_t *impl, const char *s1, const char *s2, size_t n,
>  	     int exp_result)
>  {
> -  if (HP_TIMING_AVAIL)
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s1, s2, n);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, s1, s2, n);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -84,14 +80,12 @@ do_test_limit (size_t align1, size_t align2, size_t len, size_t n, int max_char,
>      {
>        s1 = (char*)(buf1 + page_size);
>        s2 = (char*)(buf2 + page_size);
> -      if (HP_TIMING_AVAIL)
> -	printf ("Length %4zd/%4zd:", len, n);
> +      printf ("Length %4zd/%4zd:", len, n);
> 
>        FOR_EACH_IMPL (impl, 0)
>  	do_one_test (impl, s1, s2, n, 0);
> 
> -      if (HP_TIMING_AVAIL)
> -	putchar ('\n');
> +      putchar ('\n');
> 
>        return;
>      }
> @@ -122,14 +116,12 @@ do_test_limit (size_t align1, size_t align2, size_t len, size_t n, int max_char,
>  	s1[len] = 64;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd/%4zd, alignment %2zd/%2zd:", len, n, align1, align2);
> +  printf ("Length %4zd/%4zd, alignment %2zd/%2zd:", len, n, align1, align2);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s1, s2, n, exp_result);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  static void
> @@ -167,14 +159,12 @@ do_test (size_t align1, size_t align2, size_t len, size_t n, int max_char,
>    if (len >= n)
>      s2[n - 1] -= exp_result;
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd/%4zd, alignment %2zd/%2zd:", len, n, align1, align2);
> +  printf ("Length %4zd/%4zd, alignment %2zd/%2zd:", len, n, align1, align2);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, (char*)s1, (char*)s2, n, exp_result);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strncpy.c b/benchtests/bench-strncpy.c
> index 4065c0a..645925b 100644
> --- a/benchtests/bench-strncpy.c
> +++ b/benchtests/bench-strncpy.c
> @@ -62,6 +62,9 @@ typedef char *(*proto_t) (char *, const char *, size_t);
>  static void
>  do_one_test (impl_t *impl, char *dst, const char *src, size_t len, size_t n)
>  {
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (CALL (impl, dst, src, n) != STRNCPY_RESULT (dst, len, n))
>      {
>        error (0, 0, "Wrong result in function %s %p %p", impl->name,
> @@ -90,23 +93,16 @@ do_one_test (impl_t *impl, char *dst, const char *src, size_t len, size_t n)
>  	  }
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute__ ((unused));
> -      hp_timing_t stop __attribute__ ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> +      CALL (impl, dst, src, n);
> +    }
> +  TIMING_NOW (stop);
> 
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, dst, src, n);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> +  TIMING_DIFF (cur, start, stop);
> 
> -      printf ("\t%zd", (size_t) best_time);
> -    }
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -132,14 +128,12 @@ do_test (size_t align1, size_t align2, size_t len, size_t n, int max_char)
>    for (i = len + 1; i + align1 < page_size && i < len + 64; ++i)
>      s1[i] = 32 + 32 * i % (max_char - 32);
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, n %4zd, alignment %2zd/%2zd:", len, n, align1, align2);
> +  printf ("Length %4zd, n %4zd, alignment %2zd/%2zd:", len, n, align1, align2);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s2, s1, len, n);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strnlen.c b/benchtests/bench-strnlen.c
> index 4233f27..793f9be 100644
> --- a/benchtests/bench-strnlen.c
> +++ b/benchtests/bench-strnlen.c
> @@ -38,7 +38,9 @@ simple_strnlen (const char *s, size_t maxlen)
>  static void
>  do_one_test (impl_t *impl, const char *s, size_t maxlen, size_t exp_len)
>  {
> -  size_t len = CALL (impl, s, maxlen);
> +  size_t len = CALL (impl, s, maxlen), i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (len != exp_len)
>      {
>        error (0, 0, "Wrong result in function %s %zd %zd", impl->name,
> @@ -47,23 +49,16 @@ do_one_test (impl_t *impl, const char *s, size_t maxlen, size_t exp_len)
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s, maxlen);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, s, maxlen);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -79,14 +74,12 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char)
>      buf1[align + i] = 1 + 7 * i % max_char;
>    buf1[align + len] = 0;
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd:", len, align);
> +  printf ("Length %4zd, alignment %2zd:", len, align);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, (char *) (buf1 + align), maxlen, MIN (len, maxlen));
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strpbrk.c b/benchtests/bench-strpbrk.c
> index 0163de8..fe966be 100644
> --- a/benchtests/bench-strpbrk.c
> +++ b/benchtests/bench-strpbrk.c
> @@ -62,6 +62,9 @@ static void
>  do_one_test (impl_t *impl, const char *s, const char *rej, RES_TYPE exp_res)
>  {
>    RES_TYPE res = CALL (impl, s, rej);
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (res != exp_res)
>      {
>        error (0, 0, "Wrong result in function %s %p %p", impl->name,
> @@ -70,23 +73,16 @@ do_one_test (impl_t *impl, const char *s, const char *rej, RES_TYPE exp_res)
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> +      CALL (impl, s, rej);
> +    }
> +  TIMING_NOW (stop);
> 
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s, rej);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> +  TIMING_DIFF (cur, start, stop);
> 
> -      printf ("\t%zd", (size_t) best_time);
> -    }
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -136,14 +132,12 @@ do_test (size_t align, size_t pos, size_t len)
>      }
>    result = STRPBRK_RESULT (s, pos);
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd, rej len %2zd:", pos, align, len);
> +  printf ("Length %4zd, alignment %2zd, rej len %2zd:", pos, align, len);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s, rej, result);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strrchr.c b/benchtests/bench-strrchr.c
> index 400ac80..6a7aa84 100644
> --- a/benchtests/bench-strrchr.c
> +++ b/benchtests/bench-strrchr.c
> @@ -63,6 +63,9 @@ static void
>  do_one_test (impl_t *impl, const CHAR *s, int c, CHAR *exp_res)
>  {
>    CHAR *res = CALL (impl, s, c);
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (res != exp_res)
>      {
>        error (0, 0, "Wrong result in function %s %p %p", impl->name,
> @@ -71,23 +74,16 @@ do_one_test (impl_t *impl, const CHAR *s, int c, CHAR *exp_res)
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s, c);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, s, c);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -126,14 +122,12 @@ do_test (size_t align, size_t pos, size_t len, int seek_char, int max_char)
>    else
>      result = NULL;
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment in bytes %2zd:", pos, align * sizeof(CHAR));
> +  printf ("Length %4zd, alignment in bytes %2zd:", pos, align * sizeof(CHAR));
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, (CHAR *) (buf + align), seek_char, result);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strspn.c b/benchtests/bench-strspn.c
> index 7cf26f4..634bca1 100644
> --- a/benchtests/bench-strspn.c
> +++ b/benchtests/bench-strspn.c
> @@ -65,7 +65,9 @@ stupid_strspn (const char *s, const char *acc)
>  static void
>  do_one_test (impl_t *impl, const char *s, const char *acc, size_t exp_res)
>  {
> -  size_t res = CALL (impl, s, acc);
> +  size_t res = CALL (impl, s, acc), i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
>    if (res != exp_res)
>      {
>        error (0, 0, "Wrong result in function %s %p %p", impl->name,
> @@ -74,23 +76,16 @@ do_one_test (impl_t *impl, const char *s, const char *acc, size_t exp_res)
>        return;
>      }
> 
> -  if (HP_TIMING_AVAIL)
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~ (hp_timing_t) 0;
> -      size_t i;
> -
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s, acc);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> -
> -      printf ("\t%zd", (size_t) best_time);
> +      CALL (impl, s, acc);
>      }
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (cur, start, stop);
> +
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
>  static void
> @@ -128,14 +123,12 @@ do_test (size_t align, size_t pos, size_t len)
>        s[i] = '\0';
>      }
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd, alignment %2zd, acc len %2zd:", pos, align, len);
> +  printf ("Length %4zd, alignment %2zd, acc len %2zd:", pos, align, len);
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s, acc, pos);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  int
> diff --git a/benchtests/bench-strstr.c b/benchtests/bench-strstr.c
> index 91a8dfe..528a5c0 100644
> --- a/benchtests/bench-strstr.c
> +++ b/benchtests/bench-strstr.c
> @@ -58,23 +58,19 @@ IMPL (strstr, 1)
>  static void
>  do_one_test (impl_t *impl, const char *s1, const char *s2, char *exp_result)
>  {
> -  if (HP_TIMING_AVAIL)
> +  size_t i, iters = INNER_LOOP_ITERS;
> +  timing_t start, stop, cur;
> +
> +  TIMING_NOW (start);
> +  for (i = 0; i < iters; ++i)
>      {
> -      hp_timing_t start __attribute ((unused));
> -      hp_timing_t stop __attribute ((unused));
> -      hp_timing_t best_time = ~(hp_timing_t) 0;
> -      size_t i;
> +      CALL (impl, s1, s2);
> +    }
> +  TIMING_NOW (stop);
> 
> -      for (i = 0; i < 32; ++i)
> -	{
> -	  HP_TIMING_NOW (start);
> -	  CALL (impl, s1, s2);
> -	  HP_TIMING_NOW (stop);
> -	  HP_TIMING_BEST (best_time, start, stop);
> -	}
> +  TIMING_DIFF (cur, start, stop);
> 
> -      printf ("\t%zd", (size_t) best_time);
> -    }
> +  TIMING_PRINT_MEAN ((double) cur, (double) iters);
>  }
> 
> 
> @@ -113,15 +109,13 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2,
>      }
>    s1[len1] = '\0';
> 
> -  if (HP_TIMING_AVAIL)
> -    printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:",
> -	    len1, len2, align1, align2, fail ? "fail" : "found");
> +  printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:",
> +	  len1, len2, align1, align2, fail ? "fail" : "found");
> 
>    FOR_EACH_IMPL (impl, 0)
>      do_one_test (impl, s1, s2, fail ? NULL : s1 + len1 - len2);
> 
> -  if (HP_TIMING_AVAIL)
> -    putchar ('\n');
> +  putchar ('\n');
>  }
> 
>  static int
> diff --git a/benchtests/bench-timing.h b/benchtests/bench-timing.h
> index 264d4b8..009813b 100644
> --- a/benchtests/bench-timing.h
> +++ b/benchtests/bench-timing.h
> @@ -70,3 +70,6 @@ typedef uint64_t timing_t;
>  	  (min) / (d_iters), 1e9 * (d_total_i) / (d_total_s))
> 
>  #endif
> +
> +#define TIMING_PRINT_MEAN(d_total_s, d_iters) \
> +  printf ("\t%g", (d_total_s) / (d_iters))
> -- 
> 1.8.1.4
>

Patch

diff --git a/benchtests/bench-memccpy.c b/benchtests/bench-memccpy.c
index 612513c..2c47e79 100644
--- a/benchtests/bench-memccpy.c
+++ b/benchtests/bench-memccpy.c
@@ -59,6 +59,9 @@  do_one_test (impl_t *impl, void *dst, const void *src, int c, size_t len,
 	     size_t n)
 {
   void *expect = len > n ? NULL : (char *) dst + len;
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (CALL (impl, dst, src, c, n) != expect)
     {
       error (0, 0, "Wrong result in function %s %p %p", impl->name,
@@ -74,23 +77,16 @@  do_one_test (impl_t *impl, void *dst, const void *src, int c, size_t len,
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute__ ((unused));
-      hp_timing_t stop __attribute__ ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, dst, src, c, n);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, dst, src, c, n);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -121,14 +117,12 @@  do_test (size_t align1, size_t align2, int c, size_t len, size_t n,
   for (i = len; i + align1 < page_size && i < len + 64; ++i)
     s1[i] = 32 + 32 * i % (max_char - 32);

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, n %4zd, char %d, alignment %2zd/%2zd:", len, n, c, align1, align2);
+  printf ("Length %4zd, n %4zd, char %d, alignment %2zd/%2zd:", len, n, c, align1, align2);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s2, s1, c, len, n);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-memchr.c b/benchtests/bench-memchr.c
index 5470ce6..db099ad 100644
--- a/benchtests/bench-memchr.c
+++ b/benchtests/bench-memchr.c
@@ -39,6 +39,9 @@  static void
 do_one_test (impl_t *impl, const char *s, int c, size_t n, char *exp_res)
 {
   char *res = CALL (impl, s, c, n);
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (res != exp_res)
     {
       error (0, 0, "Wrong result in function %s %p %p", impl->name,
@@ -47,23 +50,16 @@  do_one_test (impl_t *impl, const char *s, int c, size_t n, char *exp_res)
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s, c, n);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, s, c, n);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -96,14 +92,12 @@  do_test (size_t align, size_t pos, size_t len, int seek_char)
       buf1[align + len] = seek_char;
     }

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd:", pos, align);
+  printf ("Length %4zd, alignment %2zd:", pos, align);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, (char *) (buf1 + align), seek_char, len, result);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-memcmp.c b/benchtests/bench-memcmp.c
index 053bb50..544130b 100644
--- a/benchtests/bench-memcmp.c
+++ b/benchtests/bench-memcmp.c
@@ -78,23 +78,19 @@  static void
 do_one_test (impl_t *impl, const CHAR *s1, const CHAR *s2, size_t len,
 	     int exp_result)
 {
-  if (HP_TIMING_AVAIL)
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s1, s2, len);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, s1, s2, len);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -124,14 +120,12 @@  do_test (size_t align1, size_t align2, size_t len, int exp_result)
   s2[len] = align2;
   s2[len - 1] -= exp_result;

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s1, s2, len, exp_result);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-memcpy.c b/benchtests/bench-memcpy.c
index 1b12671..8cd9c23 100644
--- a/benchtests/bench-memcpy.c
+++ b/benchtests/bench-memcpy.c
@@ -52,6 +52,9 @@  static void
 do_one_test (impl_t *impl, char *dst, const char *src,
 	     size_t len)
 {
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (CALL (impl, dst, src, len) != MEMCPY_RESULT (dst, len))
     {
       error (0, 0, "Wrong result in function %s %p %p", impl->name,
@@ -68,23 +71,16 @@  do_one_test (impl_t *impl, char *dst, const char *src,
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, dst, src, len);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, dst, src, len);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -107,14 +103,12 @@  do_test (size_t align1, size_t align2, size_t len)
   for (i = 0, j = 1; i < len; i++, j += 23)
     s1[i] = j;

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s2, s1, len);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-memmem.c b/benchtests/bench-memmem.c
index ca758a8..b8f8a8b 100644
--- a/benchtests/bench-memmem.c
+++ b/benchtests/bench-memmem.c
@@ -60,23 +60,19 @@  static void
 do_one_test (impl_t *impl, const void *haystack, size_t haystack_len,
 	     const void *needle, size_t needle_len, const void *expected)
 {
-  if (HP_TIMING_AVAIL)
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
+      CALL (impl, haystack, haystack_len, needle, needle_len);
+    }
+  TIMING_NOW (stop);

-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, haystack, haystack_len, needle, needle_len);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
+  TIMING_DIFF (cur, start, stop);

-      printf ("\t%zd", (size_t) best_time);
-    }
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -87,16 +83,14 @@  do_test (const char *str, size_t len, size_t idx)
   memcpy (tmpbuf, buf1 + idx, len);
   memcpy (buf1 + idx, str, len);

-  if (HP_TIMING_AVAIL)
-    printf ("String %s, offset %zd:", str, idx);
+  printf ("String %s, offset %zd:", str, idx);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, buf1, BUF1PAGES * page_size, str, len, buf1 + idx);

   memcpy (buf1 + idx, tmpbuf, len);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 static void
@@ -120,15 +114,13 @@  do_random_tests (void)
 	  buf1[idx + off] = ch;
 	}

-      if (HP_TIMING_AVAIL)
-	printf ("String %.*s, offset %zd:", (int) len, buf1 + idx, idx);
+      printf ("String %.*s, offset %zd:", (int) len, buf1 + idx, idx);

       FOR_EACH_IMPL (impl, 0)
 	do_one_test (impl, buf1, BUF1PAGES * page_size, buf1 + idx, len,
 		     buf1 + idx);

-      if (HP_TIMING_AVAIL)
-	putchar ('\n');
+      putchar ('\n');

       memcpy (buf1 + idx, tmpbuf, len);
     }
diff --git a/benchtests/bench-memmove.c b/benchtests/bench-memmove.c
index 8925606..332d6af 100644
--- a/benchtests/bench-memmove.c
+++ b/benchtests/bench-memmove.c
@@ -67,6 +67,9 @@  static void
 do_one_test (impl_t *impl, char *dst, char *src, const char *orig_src,
 	     size_t len)
 {
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   memcpy (src, orig_src, len);
 #ifdef TEST_BCOPY
   CALL (impl, src, dst, len);
@@ -91,27 +94,20 @@  do_one_test (impl_t *impl, char *dst, char *src, const char *orig_src,
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
 #ifdef TEST_BCOPY
-	  CALL (impl, src, dst, len);
+      CALL (impl, src, dst, len);
 #else
-	  CALL (impl, dst, src, len);
+      CALL (impl, dst, src, len);
 #endif
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -134,14 +130,12 @@  do_test (size_t align1, size_t align2, size_t len)
   for (i = 0, j = 1; i < len; i++, j += 23)
     s1[i] = j;

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s2, (char *) (buf2 + align1), s1, len);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-memset.c b/benchtests/bench-memset.c
index ea29cf3..e45807c 100644
--- a/benchtests/bench-memset.c
+++ b/benchtests/bench-memset.c
@@ -75,6 +75,8 @@  simple_memset (char *s, int c, size_t n)
 static void
 do_one_test (impl_t *impl, char *s, int c __attribute ((unused)), size_t n)
 {
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
   char tstbuf[n];
 #ifdef TEST_BZERO
   simple_bzero (tstbuf, n);
@@ -92,28 +94,20 @@  do_one_test (impl_t *impl, char *s, int c __attribute ((unused)), size_t n)
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
 #ifdef TEST_BZERO
-	  CALL (impl, s, n);
+      CALL (impl, s, n);
 #else
-	  CALL (impl, s, c, n);
+      CALL (impl, s, c, n);
 #endif
+    }
+  TIMING_NOW (stop);

-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
+  TIMING_DIFF (cur, start, stop);

-      printf ("\t%zd", (size_t) best_time);
-    }
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -123,14 +117,12 @@  do_test (size_t align, int c, size_t len)
   if (align + len > page_size)
     return;

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd, c %2d:", len, align, c);
+  printf ("Length %4zd, alignment %2zd, c %2d:", len, align, c);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, (char *) buf1 + align, c, len);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-rawmemchr.c b/benchtests/bench-rawmemchr.c
index a6b29d7..df6a310 100644
--- a/benchtests/bench-rawmemchr.c
+++ b/benchtests/bench-rawmemchr.c
@@ -40,6 +40,8 @@  simple_rawmemchr (const char *s, int c)
 static void
 do_one_test (impl_t *impl, const char *s, int c, char *exp_res)
 {
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
   char *res = CALL (impl, s, c);
   if (res != exp_res)
     {
@@ -49,23 +51,16 @@  do_one_test (impl_t *impl, const char *s, int c, char *exp_res)
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s, c);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, s, c);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -92,14 +87,12 @@  do_test (size_t align, size_t pos, size_t len, int seek_char)
   buf1[align + len] = -seek_char;
   result = (char *) (buf1 + align + pos);

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd:", pos, align);
+  printf ("Length %4zd, alignment %2zd:", pos, align);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, (char *) (buf1 + align), seek_char, result);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strcasecmp.c b/benchtests/bench-strcasecmp.c
index 27250bb..1458df1 100644
--- a/benchtests/bench-strcasecmp.c
+++ b/benchtests/bench-strcasecmp.c
@@ -62,6 +62,8 @@  stupid_strcasecmp (const char *s1, const char *s2)
 static void
 do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result)
 {
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
   int result = CALL (impl, s1, s2);
   if ((exp_result == 0 && result != 0)
       || (exp_result < 0 && result >= 0)
@@ -73,23 +75,16 @@  do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result)
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s1, s2);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, s1, s2);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -128,14 +123,12 @@  do_test (size_t align1, size_t align2, size_t len, int max_char,
   else
     s2[len - 1] -= exp_result;

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s1, s2, exp_result);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strcasestr.c b/benchtests/bench-strcasestr.c
index 289b490..68b7e95 100644
--- a/benchtests/bench-strcasestr.c
+++ b/benchtests/bench-strcasestr.c
@@ -60,23 +60,19 @@  IMPL (strcasestr, 1)
 static void
 do_one_test (impl_t *impl, const char *s1, const char *s2, char *exp_result)
 {
-  if (HP_TIMING_AVAIL)
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~(hp_timing_t) 0;
-      size_t i;
+      CALL (impl, s1, s2);
+    }
+  TIMING_NOW (stop);

-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s1, s2);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
+  TIMING_DIFF (cur, start, stop);

-      printf ("\t%zd", (size_t) best_time);
-    }
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }


@@ -116,15 +112,13 @@  do_test (size_t align1, size_t align2, size_t len1, size_t len2,
     }
   s1[len1] = '\0';

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:",
-	    len1, len2, align1, align2, fail ? "fail" : "found");
+  printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:",
+	  len1, len2, align1, align2, fail ? "fail" : "found");

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s1, s2, fail ? NULL : s1 + len1 - len2);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 static int
diff --git a/benchtests/bench-strcat.c b/benchtests/bench-strcat.c
index b70a272..6602009 100644
--- a/benchtests/bench-strcat.c
+++ b/benchtests/bench-strcat.c
@@ -39,7 +39,9 @@  simple_strcat (char *dst, const char *src)
 static void
 do_one_test (impl_t *impl, char *dst, const char *src)
 {
-  size_t k = strlen (dst);
+  size_t k = strlen (dst), i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (CALL (impl, dst, src) != dst)
     {
       error (0, 0, "Wrong result in function %s %p %p", impl->name,
@@ -56,24 +58,17 @@  do_one_test (impl_t *impl, char *dst, const char *src)
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  dst[k] = '\0';
-	  HP_TIMING_NOW (start);
-	  CALL (impl, dst, src);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      dst[k] = '\0';
+      CALL (impl, dst, src);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -100,8 +95,7 @@  do_test (size_t align1, size_t align2, size_t len1, size_t len2, int max_char)
   for (i = 0; i < len2; i++)
     s2[i] = 32 + 23 * i % (max_char - 32);

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd/%4zd, alignment %2zd/%2zd:", len1, len2, align1, align2);
+  printf ("Length %4zd/%4zd, alignment %2zd/%2zd:", len1, len2, align1, align2);

   FOR_EACH_IMPL (impl, 0)
     {
@@ -109,8 +103,7 @@  do_test (size_t align1, size_t align2, size_t len1, size_t len2, int max_char)
       do_one_test (impl, s2, s1);
     }

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strchr.c b/benchtests/bench-strchr.c
index 710b592..d432ba5 100644
--- a/benchtests/bench-strchr.c
+++ b/benchtests/bench-strchr.c
@@ -89,23 +89,19 @@  IMPL (STRCHR, 1)
 static void
 do_one_test (impl_t *impl, const CHAR *s, int c, const CHAR *exp_res)
 {
-  if (HP_TIMING_AVAIL)
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s, c);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, s, c);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -141,15 +137,13 @@  do_test (size_t align, size_t pos, size_t len, int seek_char, int max_char)
   else
     result = NULLRET (buf + align + len);

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment in bytes %2zd:",
-	    pos, align * sizeof (CHAR));
+  printf ("Length %4zd, alignment in bytes %2zd:",
+	  pos, align * sizeof (CHAR));

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, buf + align, seek_char, result);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strcmp.c b/benchtests/bench-strcmp.c
index 63a3cd4..c1e0b26 100644
--- a/benchtests/bench-strcmp.c
+++ b/benchtests/bench-strcmp.c
@@ -138,23 +138,19 @@  do_one_test (impl_t *impl,
 	     const CHAR *s1, const CHAR *s2,
 	     int exp_result)
 {
-  if (HP_TIMING_AVAIL)
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s1, s2);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, s1, s2);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -190,14 +186,12 @@  do_test (size_t align1, size_t align2, size_t len, int max_char,
   s2[len + 1] = 24 + exp_result;
   s2[len - 1] -= exp_result;

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s1, s2, exp_result);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strcpy.c b/benchtests/bench-strcpy.c
index 4e024d4..88db83b 100644
--- a/benchtests/bench-strcpy.c
+++ b/benchtests/bench-strcpy.c
@@ -74,6 +74,9 @@  static void
 do_one_test (impl_t *impl, CHAR *dst, const CHAR *src,
 	     size_t len __attribute__((unused)))
 {
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (CALL (impl, dst, src) != STRCPY_RESULT (dst, len))
     {
       error (0, 0, "Wrong result in function %s %p %p", impl->name,
@@ -91,23 +94,16 @@  do_one_test (impl_t *impl, CHAR *dst, const CHAR *src,
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));;
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
 	  CALL (impl, dst, src);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -133,14 +129,12 @@  do_test (size_t align1, size_t align2, size_t len, int max_char)
     s1[i] = 32 + 23 * i % (max_char - 32);
   s1[len] = 0;

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignments in bytes %2zd/%2zd:", len, align1 * sizeof(CHAR), align2 * sizeof(CHAR));
+  printf ("Length %4zd, alignments in bytes %2zd/%2zd:", len, align1 * sizeof(CHAR), align2 * sizeof(CHAR));

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s2, s1, len);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strcpy_chk.c b/benchtests/bench-strcpy_chk.c
index 29e5728..3c2a0b3 100644
--- a/benchtests/bench-strcpy_chk.c
+++ b/benchtests/bench-strcpy_chk.c
@@ -75,6 +75,9 @@  do_one_test (impl_t *impl, char *dst, const char *src,
 	     size_t len, size_t dlen)
 {
   char *res;
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (dlen <= len)
     {
       if (impl->test == 1)
@@ -110,23 +113,16 @@  do_one_test (impl_t *impl, char *dst, const char *src,
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));;
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
+      CALL (impl, dst, src, dlen);
+    }
+  TIMING_NOW (stop);

-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, dst, src, dlen);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
+  TIMING_DIFF (cur, start, stop);

-      printf ("\t%zd", (size_t) best_time);
-    }
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -150,13 +146,13 @@  do_test (size_t align1, size_t align2, size_t len, size_t dlen, int max_char)
     s1[i] = 32 + 23 * i % (max_char - 32);
   s1[len] = 0;

-  if (HP_TIMING_AVAIL && dlen > len)
+  if (dlen > len)
     printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s2, s1, len, dlen);

-  if (HP_TIMING_AVAIL && dlen > len)
+  if (dlen > len)
     putchar ('\n');
 }

diff --git a/benchtests/bench-string.h b/benchtests/bench-string.h
index 2fe8d9f..53e796a 100644
--- a/benchtests/bench-string.h
+++ b/benchtests/bench-string.h
@@ -52,7 +52,7 @@  extern impl_t __start_impls[], __stop_impls[];
 # include <ifunc-impl-list.h>
 # define GL(x) _##x
 # define GLRO(x) _##x
-# include <hp-timing.h>
+# include "bench-timing.h"


 # define TEST_FUNCTION test_main ()
@@ -61,6 +61,8 @@  extern impl_t __start_impls[], __stop_impls[];
 # define OPT_RANDOM 10001
 # define OPT_SEED 10002

+# define INNER_LOOP_ITERS 64
+
 unsigned char *buf1, *buf2;
 int ret, do_srandom;
 unsigned int seed;
@@ -158,16 +160,6 @@  static impl_t *impl_array;
        if (!notall || impl->test)
 # endif /* ! (defined TEST_IFUNC && defined TEST_NAME) */

-# define HP_TIMING_BEST(best_time, start, end)	\
-    do									      \
-      {									      \
-	hp_timing_t tmptime;						      \
-	HP_TIMING_DIFF (tmptime, start + _dl_hp_timing_overhead, end);	      \
-	if (best_time > tmptime)					      \
-	  best_time = tmptime;						      \
-      }									      \
-    while (0)
-
 # ifndef BUF1PAGES
 #  define BUF1PAGES 1
 # endif
@@ -198,7 +190,6 @@  test_init (void)
     error (EXIT_FAILURE, errno, "mmap failed");
   if (mprotect (buf2 + page_size, page_size, PROT_NONE))
     error (EXIT_FAILURE, errno, "mprotect failed");
-  HP_TIMING_DIFF_INIT ();
   if (do_srandom)
     {
       printf ("Setting seed to 0x%x\n", seed);
diff --git a/benchtests/bench-strlen.c b/benchtests/bench-strlen.c
index 63b1e93..44c9c2b 100644
--- a/benchtests/bench-strlen.c
+++ b/benchtests/bench-strlen.c
@@ -62,7 +62,9 @@  IMPL (STRLEN, 1)
 static void
 do_one_test (impl_t *impl, const CHAR *s, size_t exp_len)
 {
-  size_t len = CALL (impl, s);
+  size_t len = CALL (impl, s), i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (len != exp_len)
     {
       error (0, 0, "Wrong result in function %s %zd %zd", impl->name,
@@ -71,23 +73,16 @@  do_one_test (impl_t *impl, const CHAR *s, size_t exp_len)
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, s);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -105,14 +100,12 @@  do_test (size_t align, size_t len)
     buf[align + i] = 1 + 11111 * i % MAX_CHAR;
   buf[align + len] = 0;

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd:", len, align);
+  printf ("Length %4zd, alignment %2zd:", len, align);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, (CHAR *) (buf + align), len);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strncasecmp.c b/benchtests/bench-strncasecmp.c
index 5fa9220..9badd05 100644
--- a/benchtests/bench-strncasecmp.c
+++ b/benchtests/bench-strncasecmp.c
@@ -73,23 +73,19 @@  static void
 do_one_test (impl_t *impl, const char *s1, const char *s2, size_t n,
 	     int exp_result)
 {
-  if (HP_TIMING_AVAIL)
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s1, s2, n);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, s1, s2, n);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -128,14 +124,12 @@  do_test (size_t align1, size_t align2, size_t n, size_t len, int max_char,
   else
     s2[len - 1] -= exp_result;

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+  printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s1, s2, n, exp_result);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strncat.c b/benchtests/bench-strncat.c
index 904daa7..2a17817 100644
--- a/benchtests/bench-strncat.c
+++ b/benchtests/bench-strncat.c
@@ -43,7 +43,9 @@  stupid_strncat (char *dst, const char *src, size_t n)
 static void
 do_one_test (impl_t *impl, char *dst, const char *src, size_t n)
 {
-  size_t k = strlen (dst);
+  size_t k = strlen (dst), i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (CALL (impl, dst, src, n) != dst)
     {
       error (0, 0, "Wrong result in function %s %p != %p", impl->name,
@@ -67,24 +69,18 @@  do_one_test (impl_t *impl, char *dst, const char *src, size_t n)
       ret = 1;
       return;
     }
-  if (HP_TIMING_AVAIL)
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
+      dst[k] = '\0';
+      CALL (impl, dst, src, n);
+    }
+  TIMING_NOW (stop);

-      for (i = 0; i < 32; ++i)
-	{
-	  dst[k] = '\0';
-	  HP_TIMING_NOW (start);
-	  CALL (impl, dst, src, n);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
+  TIMING_DIFF (cur, start, stop);

-      printf ("\t%zd", (size_t) best_time);
-    }
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -114,9 +110,8 @@  do_test (size_t align1, size_t align2, size_t len1, size_t len2,
   for (i = 0; i < len2; i++)
     s2[i] = 32 + 23 * i % (max_char - 32);

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd/%4zd, alignment %2zd/%2zd, N %4zd:",
-	    len1, len2, align1, align2, n);
+  printf ("Length %4zd/%4zd, alignment %2zd/%2zd, N %4zd:",
+	  len1, len2, align1, align2, n);

   FOR_EACH_IMPL (impl, 0)
     {
@@ -124,8 +119,7 @@  do_test (size_t align1, size_t align2, size_t len1, size_t len2,
       do_one_test (impl, s2, s1, n);
     }

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strncmp.c b/benchtests/bench-strncmp.c
index b3af0f9..25df3db 100644
--- a/benchtests/bench-strncmp.c
+++ b/benchtests/bench-strncmp.c
@@ -54,23 +54,19 @@  static void
 do_one_test (impl_t *impl, const char *s1, const char *s2, size_t n,
 	     int exp_result)
 {
-  if (HP_TIMING_AVAIL)
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s1, s2, n);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, s1, s2, n);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -84,14 +80,12 @@  do_test_limit (size_t align1, size_t align2, size_t len, size_t n, int max_char,
     {
       s1 = (char*)(buf1 + page_size);
       s2 = (char*)(buf2 + page_size);
-      if (HP_TIMING_AVAIL)
-	printf ("Length %4zd/%4zd:", len, n);
+      printf ("Length %4zd/%4zd:", len, n);

       FOR_EACH_IMPL (impl, 0)
 	do_one_test (impl, s1, s2, n, 0);

-      if (HP_TIMING_AVAIL)
-	putchar ('\n');
+      putchar ('\n');

       return;
     }
@@ -122,14 +116,12 @@  do_test_limit (size_t align1, size_t align2, size_t len, size_t n, int max_char,
 	s1[len] = 64;
     }

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd/%4zd, alignment %2zd/%2zd:", len, n, align1, align2);
+  printf ("Length %4zd/%4zd, alignment %2zd/%2zd:", len, n, align1, align2);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s1, s2, n, exp_result);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 static void
@@ -167,14 +159,12 @@  do_test (size_t align1, size_t align2, size_t len, size_t n, int max_char,
   if (len >= n)
     s2[n - 1] -= exp_result;

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd/%4zd, alignment %2zd/%2zd:", len, n, align1, align2);
+  printf ("Length %4zd/%4zd, alignment %2zd/%2zd:", len, n, align1, align2);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, (char*)s1, (char*)s2, n, exp_result);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strncpy.c b/benchtests/bench-strncpy.c
index 4065c0a..645925b 100644
--- a/benchtests/bench-strncpy.c
+++ b/benchtests/bench-strncpy.c
@@ -62,6 +62,9 @@  typedef char *(*proto_t) (char *, const char *, size_t);
 static void
 do_one_test (impl_t *impl, char *dst, const char *src, size_t len, size_t n)
 {
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (CALL (impl, dst, src, n) != STRNCPY_RESULT (dst, len, n))
     {
       error (0, 0, "Wrong result in function %s %p %p", impl->name,
@@ -90,23 +93,16 @@  do_one_test (impl_t *impl, char *dst, const char *src, size_t len, size_t n)
 	  }
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute__ ((unused));
-      hp_timing_t stop __attribute__ ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
+      CALL (impl, dst, src, n);
+    }
+  TIMING_NOW (stop);

-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, dst, src, n);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
+  TIMING_DIFF (cur, start, stop);

-      printf ("\t%zd", (size_t) best_time);
-    }
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -132,14 +128,12 @@  do_test (size_t align1, size_t align2, size_t len, size_t n, int max_char)
   for (i = len + 1; i + align1 < page_size && i < len + 64; ++i)
     s1[i] = 32 + 32 * i % (max_char - 32);

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, n %4zd, alignment %2zd/%2zd:", len, n, align1, align2);
+  printf ("Length %4zd, n %4zd, alignment %2zd/%2zd:", len, n, align1, align2);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s2, s1, len, n);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strnlen.c b/benchtests/bench-strnlen.c
index 4233f27..793f9be 100644
--- a/benchtests/bench-strnlen.c
+++ b/benchtests/bench-strnlen.c
@@ -38,7 +38,9 @@  simple_strnlen (const char *s, size_t maxlen)
 static void
 do_one_test (impl_t *impl, const char *s, size_t maxlen, size_t exp_len)
 {
-  size_t len = CALL (impl, s, maxlen);
+  size_t len = CALL (impl, s, maxlen), i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (len != exp_len)
     {
       error (0, 0, "Wrong result in function %s %zd %zd", impl->name,
@@ -47,23 +49,16 @@  do_one_test (impl_t *impl, const char *s, size_t maxlen, size_t exp_len)
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s, maxlen);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, s, maxlen);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -79,14 +74,12 @@  do_test (size_t align, size_t len, size_t maxlen, int max_char)
     buf1[align + i] = 1 + 7 * i % max_char;
   buf1[align + len] = 0;

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd:", len, align);
+  printf ("Length %4zd, alignment %2zd:", len, align);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, (char *) (buf1 + align), maxlen, MIN (len, maxlen));

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strpbrk.c b/benchtests/bench-strpbrk.c
index 0163de8..fe966be 100644
--- a/benchtests/bench-strpbrk.c
+++ b/benchtests/bench-strpbrk.c
@@ -62,6 +62,9 @@  static void
 do_one_test (impl_t *impl, const char *s, const char *rej, RES_TYPE exp_res)
 {
   RES_TYPE res = CALL (impl, s, rej);
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (res != exp_res)
     {
       error (0, 0, "Wrong result in function %s %p %p", impl->name,
@@ -70,23 +73,16 @@  do_one_test (impl_t *impl, const char *s, const char *rej, RES_TYPE exp_res)
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
+      CALL (impl, s, rej);
+    }
+  TIMING_NOW (stop);

-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s, rej);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
+  TIMING_DIFF (cur, start, stop);

-      printf ("\t%zd", (size_t) best_time);
-    }
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -136,14 +132,12 @@  do_test (size_t align, size_t pos, size_t len)
     }
   result = STRPBRK_RESULT (s, pos);

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd, rej len %2zd:", pos, align, len);
+  printf ("Length %4zd, alignment %2zd, rej len %2zd:", pos, align, len);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s, rej, result);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strrchr.c b/benchtests/bench-strrchr.c
index 400ac80..6a7aa84 100644
--- a/benchtests/bench-strrchr.c
+++ b/benchtests/bench-strrchr.c
@@ -63,6 +63,9 @@  static void
 do_one_test (impl_t *impl, const CHAR *s, int c, CHAR *exp_res)
 {
   CHAR *res = CALL (impl, s, c);
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (res != exp_res)
     {
       error (0, 0, "Wrong result in function %s %p %p", impl->name,
@@ -71,23 +74,16 @@  do_one_test (impl_t *impl, const CHAR *s, int c, CHAR *exp_res)
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s, c);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, s, c);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -126,14 +122,12 @@  do_test (size_t align, size_t pos, size_t len, int seek_char, int max_char)
   else
     result = NULL;

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment in bytes %2zd:", pos, align * sizeof(CHAR));
+  printf ("Length %4zd, alignment in bytes %2zd:", pos, align * sizeof(CHAR));

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, (CHAR *) (buf + align), seek_char, result);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strspn.c b/benchtests/bench-strspn.c
index 7cf26f4..634bca1 100644
--- a/benchtests/bench-strspn.c
+++ b/benchtests/bench-strspn.c
@@ -65,7 +65,9 @@  stupid_strspn (const char *s, const char *acc)
 static void
 do_one_test (impl_t *impl, const char *s, const char *acc, size_t exp_res)
 {
-  size_t res = CALL (impl, s, acc);
+  size_t res = CALL (impl, s, acc), i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
   if (res != exp_res)
     {
       error (0, 0, "Wrong result in function %s %p %p", impl->name,
@@ -74,23 +76,16 @@  do_one_test (impl_t *impl, const char *s, const char *acc, size_t exp_res)
       return;
     }

-  if (HP_TIMING_AVAIL)
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~ (hp_timing_t) 0;
-      size_t i;
-
-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s, acc);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
-
-      printf ("\t%zd", (size_t) best_time);
+      CALL (impl, s, acc);
     }
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }

 static void
@@ -128,14 +123,12 @@  do_test (size_t align, size_t pos, size_t len)
       s[i] = '\0';
     }

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd, alignment %2zd, acc len %2zd:", pos, align, len);
+  printf ("Length %4zd, alignment %2zd, acc len %2zd:", pos, align, len);

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s, acc, pos);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 int
diff --git a/benchtests/bench-strstr.c b/benchtests/bench-strstr.c
index 91a8dfe..528a5c0 100644
--- a/benchtests/bench-strstr.c
+++ b/benchtests/bench-strstr.c
@@ -58,23 +58,19 @@  IMPL (strstr, 1)
 static void
 do_one_test (impl_t *impl, const char *s1, const char *s2, char *exp_result)
 {
-  if (HP_TIMING_AVAIL)
+  size_t i, iters = INNER_LOOP_ITERS;
+  timing_t start, stop, cur;
+
+  TIMING_NOW (start);
+  for (i = 0; i < iters; ++i)
     {
-      hp_timing_t start __attribute ((unused));
-      hp_timing_t stop __attribute ((unused));
-      hp_timing_t best_time = ~(hp_timing_t) 0;
-      size_t i;
+      CALL (impl, s1, s2);
+    }
+  TIMING_NOW (stop);

-      for (i = 0; i < 32; ++i)
-	{
-	  HP_TIMING_NOW (start);
-	  CALL (impl, s1, s2);
-	  HP_TIMING_NOW (stop);
-	  HP_TIMING_BEST (best_time, start, stop);
-	}
+  TIMING_DIFF (cur, start, stop);

-      printf ("\t%zd", (size_t) best_time);
-    }
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
 }


@@ -113,15 +109,13 @@  do_test (size_t align1, size_t align2, size_t len1, size_t len2,
     }
   s1[len1] = '\0';

-  if (HP_TIMING_AVAIL)
-    printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:",
-	    len1, len2, align1, align2, fail ? "fail" : "found");
+  printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:",
+	  len1, len2, align1, align2, fail ? "fail" : "found");

   FOR_EACH_IMPL (impl, 0)
     do_one_test (impl, s1, s2, fail ? NULL : s1 + len1 - len2);

-  if (HP_TIMING_AVAIL)
-    putchar ('\n');
+  putchar ('\n');
 }

 static int
diff --git a/benchtests/bench-timing.h b/benchtests/bench-timing.h
index 264d4b8..009813b 100644
--- a/benchtests/bench-timing.h
+++ b/benchtests/bench-timing.h
@@ -70,3 +70,6 @@  typedef uint64_t timing_t;
 	  (min) / (d_iters), 1e9 * (d_total_i) / (d_total_s))

 #endif
+
+#define TIMING_PRINT_MEAN(d_total_s, d_iters) \
+  printf ("\t%g", (d_total_s) / (d_iters))