[2/7] support: Add Mersenne Twister pseudo-random number generator

Message ID 1516298002-4618-3-git-send-email-adhemerval.zanella@linaro.org
State New
Headers show
Series
  • Refactor qsort implementation
Related show

Commit Message

Adhemerval Zanella Jan. 18, 2018, 5:53 p.m.
This patch adds support routines for pseudo-random number generation
based on Mersenne Twister.  The libstc++ version is used as based and
both 32 and 64 bits are provided.  It is used on following qsort tests
and benchmarks.

I decided to use a Mersenne Twister (MT) instead of random_r internal
implementation, which uses a linear feedback shift register approach
with trinomials, because:

  - it is used extensivelly in other implementations (like c+11);
  - it has a quite larger period (2^219937-1) than the type 4 variation
    of random (2^63 - 1);
  - it does not have the RAND_MAX limitation.

Checked on x86_64-linux-gnu.

	* support/Makefile (libsupport-routines): Add support_random.
	(tests): Add tst-support_random.
	* support/support_random.c: New file.
	* support/support_random.h: Likewise.
	* support/tst-support_random.c: Likewise.
---
 support/Makefile             |   2 +
 support/support_random.c     | 219 +++++++++++++++++++++++++++++++++++++++++++
 support/support_random.h     | 109 +++++++++++++++++++++
 support/tst-support_random.c |  87 +++++++++++++++++
 4 files changed, 417 insertions(+)
 create mode 100644 support/support_random.c
 create mode 100644 support/support_random.h
 create mode 100644 support/tst-support_random.c

-- 
2.7.4

Patch

diff --git a/support/Makefile b/support/Makefile
index 1bda81e..8efe577 100644
--- a/support/Makefile
+++ b/support/Makefile
@@ -53,6 +53,7 @@  libsupport-routines = \
   support_format_netent \
   support_isolate_in_subprocess \
   support_record_failure \
+  support_random \
   support_run_diff \
   support_shared_allocate \
   support_test_compare_failure \
@@ -153,6 +154,7 @@  tests = \
   tst-support_record_failure \
   tst-test_compare \
   tst-xreadlink \
+  tst-support_random
 
 ifeq ($(run-built-tests),yes)
 tests-special = \
diff --git a/support/support_random.c b/support/support_random.c
new file mode 100644
index 0000000..f3037a5
--- /dev/null
+++ b/support/support_random.c
@@ -0,0 +1,219 @@ 
+/* Function for pseudo-random number generation.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/random.h>
+#include <support/support_random.h>
+
+int
+random_seed (void *buf, size_t len)
+{
+  ssize_t ret = getrandom (buf, len, 0);
+  if (ret == len)
+    return 0;
+
+  int fd = open ("/dev/urandom", O_RDONLY);
+  if (fd < 0)
+    return -1;
+  void *end = buf + len;
+  while (buf < end)
+    {
+      ssize_t ret = read (fd, buf, end - buf);
+      if (ret <= 0)
+	break;
+      buf += ret;
+    }
+  close (fd);
+  return buf == end ? 0 : -1;
+}
+
+/* The classic Mersenne Twister. Reference:
+   M. Matsumoto and T. Nishimura, Mersenne Twister: A 623-Dimensionally
+   Equidistributed Uniform Pseudo-Random Number Generator, ACM Transactions
+   on Modeling and Computer Simulation, Vol. 8, No. 1, January 1998, pp 3-30.
+
+   This version is based on libstdc++ std::mt19937{_64}.  */
+
+static const size_t mt32_word_size         = 32;
+static const size_t mt32_mask_bits         = 31;
+static const size_t mt32_state_size        = MT32_STATE_SIZE;
+static const size_t mt32_shift_size        = 397;
+static const uint32_t mt32_xor_mask        = 0x9908b0dfUL;
+static const size_t mt32_tempering_u       = 11;
+static const uint32_t mt32_tempering_d     = 0xffffffffUL;
+static const size_t mt32_tempering_s       = 7;
+static const uint32_t mt32_tempering_b     = 0x9d2c5680UL;
+static const size_t mt32_tempering_t       = 15;
+static const uint32_t mt32_tempering_c     = 0xefc60000UL;
+static const size_t mt32_tempering_l       = 18;
+static const uint32_t mt32_init_multiplier = 1812433253UL;
+static const uint32_t mt32_default_seed    = 5489u;
+
+static void
+mt32_gen_rand (struct mt19937_32 *state)
+{
+  const uint32_t upper_mask = (uint32_t)-1 << mt32_mask_bits;
+  const uint32_t lower_mask = ~upper_mask;
+
+  for (size_t k = 0; k < (mt32_state_size - mt32_shift_size); k++)
+    {
+      uint32_t y = ((state->mt[k] & upper_mask)
+		   | (state->mt[k + 1] & lower_mask));
+      state->mt[k] = (state->mt[k + mt32_shift_size] ^ (y >> 1)
+		     ^ ((y & 0x01) ? mt32_xor_mask : 0));
+    }
+
+  for (size_t k = (mt32_state_size - mt32_shift_size);
+       k < (mt32_state_size - 1); k++)
+    {
+      uint32_t y = ((state->mt[k] & upper_mask)
+		   | (state->mt[k + 1] & lower_mask));
+      state->mt[k] = (state->mt[k + (mt32_shift_size - mt32_state_size)]
+		      ^ (y >> 1) ^ ((y & 0x01) ? mt32_xor_mask : 0));
+    }
+
+  uint32_t y = ((state->mt[mt32_state_size - 1] & upper_mask)
+		| (state->mt[0] & lower_mask));
+  state->mt[mt32_state_size - 1] = (state->mt[mt32_shift_size -1] ^ (y >> 1)
+				    ^ (( y & 0x01) ? mt32_xor_mask : 0));
+  state->p = 0;
+}
+
+void
+mt32_seed (struct mt19937_32 *state, uint32_t seed)
+{
+  /* Generators based on linear-feedback shift-register techniques can not
+     handle all zero initial state (they will output zero continually).  In
+     such cases we use the default initial state).  */
+  if (seed == 0x0)
+    seed = mt32_default_seed;
+
+  state->mt[0] = mt32_default_seed;
+  for (size_t i = 1; i < mt32_state_size; i++)
+    {
+      uint32_t x = state->mt[i - 1];
+      x ^= x >> (mt32_word_size - 2);
+      x *= mt32_init_multiplier;
+      x += i;
+      state->mt[i] = x;
+    }
+  state->p = mt32_state_size;
+}
+
+uint32_t
+mt32_rand (struct mt19937_32 *state)
+{
+  /* Reload the vector - cost is O(n) amortized over n calls.  */
+  if (state->p >= mt32_state_size)
+   mt32_gen_rand (state);
+
+  /* Calculate o(x(i)).  */
+  uint32_t z = state->mt[state->p++];
+  z ^= (z >> mt32_tempering_u) & mt32_tempering_d;
+  z ^= (z << mt32_tempering_s) & mt32_tempering_b;
+  z ^= (z << mt32_tempering_t) & mt32_tempering_c;
+  z ^= (z >> mt32_tempering_l);
+  return z;
+}
+
+
+static const size_t mt64_word_size         = 64;
+static const size_t mt64_mask_bits         = 31;
+static const size_t mt64_state_size        = MT64_STATE_SIZE;
+static const size_t mt64_shift_size        = 156;
+static const uint64_t mt64_xor_mask        = 0xb5026f5aa96619e9ULL;
+static const size_t mt64_tempering_u       = 29;
+static const uint64_t mt64_tempering_d     = 0x5555555555555555ULL;
+static const size_t mt64_tempering_s       = 17;
+static const uint64_t mt64_tempering_b     = 0x71d67fffeda60000ULL;
+static const size_t mt64_tempering_t       = 37;
+static const uint64_t mt64_tempering_c     = 0xfff7eee000000000ULL;
+static const size_t mt64_tempering_l       = 43;
+static const uint64_t mt64_init_multiplier = 6364136223846793005ULL;
+static const uint64_t mt64_default_seed    = 5489u;
+
+static void
+mt64_gen_rand (struct mt19937_64 *state)
+{
+  const uint64_t upper_mask = (uint64_t)-1 << mt64_mask_bits;
+  const uint64_t lower_mask = ~upper_mask;
+
+  for (size_t k = 0; k < (mt64_state_size - mt64_shift_size); k++)
+    {
+      uint64_t y = ((state->mt[k] & upper_mask)
+		   | (state->mt[k + 1] & lower_mask));
+      state->mt[k] = (state->mt[k + mt64_shift_size] ^ (y >> 1)
+		     ^ ((y & 0x01) ? mt64_xor_mask : 0));
+    }
+
+  for (size_t k = (mt64_state_size - mt64_shift_size);
+       k < (mt64_state_size - 1); k++)
+    {
+      uint64_t y = ((state->mt[k] & upper_mask)
+		   | (state->mt[k + 1] & lower_mask));
+      state->mt[k] = (state->mt[k + (mt64_shift_size - mt64_state_size)]
+		      ^ (y >> 1) ^ ((y & 0x01) ? mt64_xor_mask : 0));
+    }
+
+  uint64_t y = ((state->mt[mt64_state_size - 1] & upper_mask)
+		| (state->mt[0] & lower_mask));
+  state->mt[mt64_state_size - 1] = (state->mt[mt64_shift_size -1] ^ (y >> 1)
+				    ^ (( y & 0x01) ? mt64_xor_mask : 0));
+  state->p = 0;
+}
+
+void
+mt64_seed (struct mt19937_64 *state, uint64_t seed)
+{
+  /* Generators based on linear-feedback shift-register techniques can not
+     handle all zero initial state (they will output zero continually).  In
+     such cases we use the default initial state).  */
+  if (seed == 0x0)
+    seed = mt64_default_seed;
+
+  state->mt[0] = mt64_default_seed;
+  for (size_t i = 1; i < mt64_state_size; i++)
+    {
+      uint64_t x = state->mt[i - 1];
+      x ^= x >> (mt64_word_size - 2);
+      x *= mt64_init_multiplier;
+      x += i;
+      state->mt[i] = x;
+    }
+  state->p = mt64_state_size;
+}
+
+uint64_t
+mt64_rand (struct mt19937_64 *state)
+{
+  /* Reload the vector - cost is O(n) amortized over n calls.  */
+  if (state->p >= mt64_state_size)
+   mt64_gen_rand (state);
+
+  /* Calculate o(x(i)).  */
+  uint64_t z = state->mt[state->p++];
+  z ^= (z >> mt64_tempering_u) & mt64_tempering_d;
+  z ^= (z << mt64_tempering_s) & mt64_tempering_b;
+  z ^= (z << mt64_tempering_t) & mt64_tempering_c;
+  z ^= (z >> mt64_tempering_l);
+  return z;
+}
diff --git a/support/support_random.h b/support/support_random.h
new file mode 100644
index 0000000..9d58d51
--- /dev/null
+++ b/support/support_random.h
@@ -0,0 +1,109 @@ 
+/* Function for pseudo-random number generation.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef SUPPORT_MT_RAND_H
+#define SUPPORT_MT_RAND_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+
+/* Obtain a random seed at BUF with size of LEN from system random device.
+   It will used getrandom or '/dev/urandom' device case getrandom fails.  */
+int random_seed (void *buf, size_t len);
+
+/* A Mersenne Twister implementation for both uint32_t and uint64_t aimed for
+   fast pseudo-random number generation where rand() is not suffice (due
+   mainly low entropy).
+
+   The usual way to use is:
+
+   uint32_t seed;
+   random_seed (&seed, sizeof (uint32_t));
+
+   mt19937_32 mt;
+   mt32_seed (&mt, seed);
+
+   uint32_t random_number = mt32_rand (&mt);
+
+   If seed is 0 the default one (5489u) is used instead.  Usually the seed
+   should be obtained for a more robust random generation (getrandom or
+   from /dev/{u}random).  */
+
+enum {
+  MT32_STATE_SIZE = 624,
+  MT64_STATE_SIZE = 312
+};
+
+struct mt19937_32
+{
+  uint32_t mt[MT32_STATE_SIZE];
+  size_t p;
+};
+
+struct mt19937_64
+{
+  uint64_t mt[MT64_STATE_SIZE];
+  size_t p;
+};
+
+/* Initialize the mersenne twister STATE with SEED.  If seed is zero the
+   default seed is used (5489u).  */
+void mt32_seed (struct mt19937_32 *state, uint32_t seed);
+void mt64_seed (struct mt19937_64 *state, uint64_t seed);
+/* Output a pseudo-number from mersenned twister STATE.  */
+uint32_t mt32_rand (struct mt19937_32 *state);
+uint64_t mt64_rand (struct mt19937_64 *state);
+
+/* Scales the number NUMBER to the uniformly distributed closed internal
+   [min, max].  */
+static inline int32_t
+uniform_uint32_distribution (int32_t random, uint32_t min, uint32_t max)
+{
+  assert (max >= min);
+  uint32_t range = max - min;
+  /* It assumed that the input random number RANDOM is as larger or equal
+     than the RANGE, so the result will always be downscaled.  */
+  if (range != UINT32_MAX)
+    {
+      uint32_t urange = range + 1;  /* range can be 0.  */
+      uint32_t scaling = UINT32_MAX / urange;
+      random /= scaling;
+    }
+  return random + min;
+}
+
+/* Scales the number NUMBER to the uniformly distributed closed internal
+   [min, max].  */
+static inline uint64_t
+uniform_uint64_distribution (uint64_t random, uint64_t min, uint64_t max)
+{
+  assert (max >= min);
+  uint64_t range = max - min;
+  /* It assumed that the input random number RANDOM is as larger or equal
+     than the RANGE, so the result will always be downscaled.  */
+  if (range != UINT64_MAX)
+    {
+      uint64_t urange = range + 1;  /* range can be 0.  */
+      uint64_t scaling = UINT64_MAX / urange;
+      random /= scaling;
+    }
+  return random + min;
+}
+
+#endif
diff --git a/support/tst-support_random.c b/support/tst-support_random.c
new file mode 100644
index 0000000..3068ca9
--- /dev/null
+++ b/support/tst-support_random.c
@@ -0,0 +1,87 @@ 
+/* Test the Mersenne Twister random functions.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <support/check.h>
+#include <support/support_random.h>
+
+static int
+do_test (void)
+{
+  {
+    struct mt19937_32 mt32;
+    mt32_seed (&mt32, 0);
+    for (int i = 0; i < 9999; ++i)
+      mt32_rand (&mt32);
+    TEST_VERIFY (mt32_rand (&mt32) == 4123659995ul);
+  }
+
+  {
+    struct mt19937_64 mt64;
+    mt64_seed (&mt64, 0);
+    for (int i = 0; i < 9999; ++i)
+      mt64_rand (&mt64);
+    TEST_VERIFY (mt64_rand (&mt64) == UINT64_C(9981545732273789042));
+  }
+
+#define CHECK_UNIFORM_32(min, max)						\
+  ({										\
+    uint32_t v = uniform_uint32_distribution (mt32_rand (&mt32), min, max);	\
+    TEST_VERIFY (v >= min && v <= max);						\
+  })
+
+  {
+    struct mt19937_32 mt32;
+    uint32_t seed;
+    random_seed (&seed, sizeof (seed));
+    mt32_seed (&mt32, seed);
+
+    CHECK_UNIFORM_32 (0, 100);
+    CHECK_UNIFORM_32 (100, 200);
+    CHECK_UNIFORM_32 (100, 1<<10);
+    CHECK_UNIFORM_32 (1<<10, UINT16_MAX);
+    CHECK_UNIFORM_32 (UINT16_MAX, UINT32_MAX);
+  }
+
+#define CHECK_UNIFORM_64(min, max)						\
+  ({										\
+    uint64_t v = uniform_uint64_distribution (mt64_rand (&mt64), min, max);	\
+    TEST_VERIFY (v >= min && v <= max);						\
+  })
+
+  {
+    struct mt19937_64 mt64;
+    uint64_t seed;
+    random_seed (&seed, sizeof (seed));
+    mt64_seed (&mt64, seed);
+
+    CHECK_UNIFORM_64 (0, 100);
+    CHECK_UNIFORM_64 (100, 200);
+    CHECK_UNIFORM_64 (100, 1<<10);
+    CHECK_UNIFORM_64 (1<<10, UINT16_MAX);
+    CHECK_UNIFORM_64 (UINT16_MAX, UINT32_MAX);
+    CHECK_UNIFORM_64 (UINT64_C(1)<<33, UINT64_C(1)<<34);
+    CHECK_UNIFORM_64 (UINT64_C(1)<<34, UINT64_MAX);
+  }
+
+  return 0;
+}
+
+#include <support/test-driver.c>