diff mbox

[RFC,API-NEXT,PATCHv2,3/4] test: performance: add odp_timers

Message ID 1480633409-24532-3-git-send-email-brian.brooks@linaro.org
State New
Headers show

Commit Message

Brian Brooks Dec. 1, 2016, 11:03 p.m. UTC
Add a timers stress test. Timer pool resolution, number of timers pools, number
of timers, queue type, and number of threads may be specified for each test
case.

Timestamps are used to ensure timeout events have been recieved by the program
no later than they should be. Timestamp statistics are printed.

Signed-off-by: Brian Brooks <brian.brooks@linaro.org>

---
 configure.ac                                       |  22 +
 platform/linux-generic/Makefile.am                 |   1 +
 platform/linux-generic/include/odp_time_internal.h |  27 +
 platform/linux-generic/m4/odp_pthread.m4           |   2 +-
 platform/linux-generic/odp_timer.c                 |   7 +
 test/common_plat/performance/Makefile.am           |   6 +-
 test/common_plat/performance/odp_timers.c          | 913 +++++++++++++++++++++
 7 files changed, 976 insertions(+), 2 deletions(-)
 create mode 100644 platform/linux-generic/include/odp_time_internal.h
 create mode 100644 test/common_plat/performance/odp_timers.c

-- 
2.7.4
diff mbox

Patch

diff --git a/configure.ac b/configure.ac
index b460a65..4f6cc18 100644
--- a/configure.ac
+++ b/configure.ac
@@ -71,6 +71,10 @@  AC_TYPE_INT32_T
 AC_TYPE_UINT32_T
 AC_TYPE_UINT64_T
 
+AC_CHECK_LIB([m], [cos])
+AC_CHECK_LIB([gslcblas], [cblas_dgemm])
+AC_CHECK_LIB([gsl], [gsl_hypot])
+
 #########################################################################
 # Get GCC version
 #########################################################################
@@ -210,6 +214,24 @@  DX_INIT_DOXYGEN($PACKAGE_NAME,
 		${builddir}/doc/platform-api-guide/output)
 
 ##########################################################################
+# Event tracing
+##########################################################################
+
+# Checks for --enable-tracing-timers and defines TRACING_TIMERS if found.
+#
+# This is experimental and stores tracing info inside the user-supplied
+# context associated with a timeout event.
+TRACING_TIMERS=0
+AC_ARG_ENABLE([tracing-timers],
+    [  --enable-tracing-timers  trace timeout event scheduling],
+    [if test "x$enableval" = "xyes"; then
+        TRACING_TIMERS=1
+     else
+        TRACING_TIMERS=0
+     fi])
+ODP_CFLAGS="$ODP_CFLAGS -DTRACING_TIMERS=$TRACING_TIMERS"
+
+##########################################################################
 # Enable/disable ODP_DEBUG_PRINT
 ##########################################################################
 ODP_DEBUG_PRINT=0
diff --git a/platform/linux-generic/Makefile.am b/platform/linux-generic/Makefile.am
index 22cf6f3..070ddcf 100644
--- a/platform/linux-generic/Makefile.am
+++ b/platform/linux-generic/Makefile.am
@@ -128,6 +128,7 @@  noinst_HEADERS = \
 		  ${srcdir}/include/odp_schedule_ordered_internal.h \
 		  ${srcdir}/include/odp_sorted_list_internal.h \
 		  ${srcdir}/include/odp_shm_internal.h \
+		  ${srcdir}/include/odp_time_internal.h \
 		  ${srcdir}/include/odp_timer_internal.h \
 		  ${srcdir}/include/odp_timer_wheel_internal.h \
 		  ${srcdir}/include/odp_traffic_mngr_internal.h \
diff --git a/platform/linux-generic/include/odp_time_internal.h b/platform/linux-generic/include/odp_time_internal.h
new file mode 100644
index 0000000..5a0bc75
--- /dev/null
+++ b/platform/linux-generic/include/odp_time_internal.h
@@ -0,0 +1,27 @@ 
+/* Copyright (c) 2016, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:     BSD-3-Clause
+ */
+
+#ifndef ODP_TIME_INTERNAL_H_
+#define ODP_TIME_INTERNAL_H_
+
+static inline uint64_t core_tick(void)
+{
+#if defined(__aarch64__)
+	uint64_t vct;
+	/* __asm__ volatile("isb" : : : "memory"); */
+	__asm__ volatile("mrs %0, cntvct_el0" : "=r"(vct));
+	return vct;
+#elif defined(__x86_64__)
+	uint64_t hi, lo;
+	/* __asm__ volatile("mfence" : : : "memory"); */
+	__asm__ volatile("rdtsc" : "=a"(lo), "=d"(hi));
+	return (hi << 32) | lo;
+#else
+#error Please add support for your core in odp_time_internal.h
+#endif
+}
+
+#endif
diff --git a/platform/linux-generic/m4/odp_pthread.m4 b/platform/linux-generic/m4/odp_pthread.m4
index 7f39103..b5705b2 100644
--- a/platform/linux-generic/m4/odp_pthread.m4
+++ b/platform/linux-generic/m4/odp_pthread.m4
@@ -10,4 +10,4 @@  LIBS="$PTHREAD_LIBS $LIBS"
 AM_CFLAGS="$AM_CFLAGS $PTHREAD_CFLAGS"
 AM_LDFLAGS="$AM_LDFLAGS $PTHREAD_LDFLAGS"
 
-AM_LDFLAGS="$AM_LDFLAGS -pthread -lrt"
+AM_LDFLAGS="$AM_LDFLAGS -pthread -lrt -lm"
diff --git a/platform/linux-generic/odp_timer.c b/platform/linux-generic/odp_timer.c
index 89e0f52..ad44ede 100644
--- a/platform/linux-generic/odp_timer.c
+++ b/platform/linux-generic/odp_timer.c
@@ -51,6 +51,7 @@ 
 #include <odp/api/sync.h>
 #include <odp/api/time.h>
 #include <odp/api/timer.h>
+#include <odp_time_internal.h>
 #include <odp_timer_internal.h>
 
 #define TMO_UNUSED   ((uint64_t)0xFFFFFFFFFFFFFFFF)
@@ -627,6 +628,12 @@  static unsigned timer_expire(odp_timer_pool *tp, uint32_t idx, uint64_t tick)
 		}
 		/* Else ignore events of other types */
 		/* Post the timeout to the destination queue */
+#if TRACING_TIMERS == 1
+		/* Store timeout event enq() timestamp in user-supplied
+		 * timeout event context. */
+		uint64_t *u64 = (uint64_t *)tim->user_ptr;
+		__atomic_store_n(u64, core_tick(), __ATOMIC_RELAXED);
+#endif
 		int rc = odp_queue_enq(tim->queue,
 				       odp_buffer_to_event(tmo_buf));
 		if (odp_unlikely(rc != 0)) {
diff --git a/test/common_plat/performance/Makefile.am b/test/common_plat/performance/Makefile.am
index f184609..24dbe70 100644
--- a/test/common_plat/performance/Makefile.am
+++ b/test/common_plat/performance/Makefile.am
@@ -6,7 +6,8 @@  EXECUTABLES = odp_crypto$(EXEEXT) odp_pktio_perf$(EXEEXT)
 
 COMPILE_ONLY = odp_l2fwd$(EXEEXT) \
 	       odp_sched_latency$(EXEEXT) \
-	       odp_scheduling$(EXEEXT)
+	       odp_scheduling$(EXEEXT) \
+	       odp_timers$(EXEEXT)
 
 TESTSCRIPTS = odp_l2fwd_run.sh \
 	      odp_sched_latency_run.sh \
@@ -26,6 +27,8 @@  odp_sched_latency_LDFLAGS = $(AM_LDFLAGS) -static
 odp_sched_latency_CFLAGS = $(AM_CFLAGS) -I${top_srcdir}/test
 odp_scheduling_LDFLAGS = $(AM_LDFLAGS) -static
 odp_scheduling_CFLAGS = $(AM_CFLAGS) -I${top_srcdir}/test
+odp_timers_LDFLAGS = $(AM_LDFLAGS) -static -lgsl -lgslcblas
+odp_timers_CFLAGS = $(AM_CFLAGS)
 
 noinst_HEADERS = \
 		  $(top_srcdir)/test/test_debug.h
@@ -34,5 +37,6 @@  dist_odp_crypto_SOURCES = odp_crypto.c
 dist_odp_sched_latency_SOURCES = odp_sched_latency.c
 dist_odp_scheduling_SOURCES = odp_scheduling.c
 dist_odp_pktio_perf_SOURCES = odp_pktio_perf.c
+dist_odp_timers_SOURCES = odp_timers.c
 
 EXTRA_DIST = $(TESTSCRIPTS)
diff --git a/test/common_plat/performance/odp_timers.c b/test/common_plat/performance/odp_timers.c
new file mode 100644
index 0000000..cafb77b
--- /dev/null
+++ b/test/common_plat/performance/odp_timers.c
@@ -0,0 +1,913 @@ 
+/* Copyright (c) 2016, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:     BSD-3-Clause
+ */
+
+#define _GNU_SOURCE
+
+#include <odp_api.h>
+#include <odp/helper/linux.h>
+
+#include <assert.h>
+#include <math.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <gsl/gsl_statistics.h>
+
+/*
+ * Every timer pool maintains its own tick counter. The tick period is called
+ * the resolution. For example, if the timer pool's resolution is 100us, then
+ * the timer pool's tick will increment every 100us.
+ *
+ * A timer pool contains a group of timers. Each timer may be set to expire
+ * at a future tick. If the current tick is t34, the timer can be set to expire
+ * on tick t35, t36, t80, or t5000.
+ *
+ * If timers are always set to expire on the next tick, we can construct
+ * a program to not only guarantee timer correctness but also stress test
+ * timers.
+ *
+ * A timer set to expire on t1 needs to be delivered to the application before
+ * t2. Otherwise, it has been delivered too late.
+ *
+ *       set(t1)
+ *         |
+ *      t0 v     t1       t2       t3       t4
+ *       |--------|--------|--------|--------|
+ *                ^     |
+ *                |     |
+ *             trigger  |
+ *                      v
+ *                     recv
+ *
+ * When the program sets a timer to expire on t1, we know that this set time is
+ * bounded between t0 and t1.
+ *
+ * When the timer expires at t1, the corresponding timeout event needs to be
+ * generated, scheduled, and received by the program before t2.
+ *
+ * So, the maximum amount of time between 'set' and 'recv' must be bounded by
+ * twice the tick period. This is what this program asserts given an arbitrary
+ * configuration of timer pools, resolutions, timers, queues, and threads.
+ */
+
+/*
+ * Number of times a timer may be set and subsequently received by the program.
+ *
+ * Note: If a timer has overrun (it has been received 'too late') it will no
+ * longer be set.
+ */
+#define TIMER_ROUNDS  (32)
+
+/* Number of nanoseconds per second. */
+#define ODP_NSEC_PER_SEC  (ODP_TIME_SEC_IN_NS)
+
+#define arraysize(x) \
+	(sizeof((x)) / sizeof((x)[0]))
+
+#define CACHE_ALIGN_ROUNDUP(x) \
+	(ODP_CACHE_LINE_SIZE * \
+	 (((x) + ODP_CACHE_LINE_SIZE - 1) / ODP_CACHE_LINE_SIZE))
+
+#define __FILENAME__ \
+	(strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
+
+#define print(fmt, ...) \
+	_print(false, __FILENAME__, __LINE__, fmt, ##__VA_ARGS__)
+
+#ifdef assert
+#undef assert
+#endif
+
+#define assert(cond) \
+	!(odp_unlikely(!(cond))) ? (void) 0 : _print( \
+		true, __FILENAME__, __LINE__, "Assertion failed: " #cond)
+
+static void _print(bool assert, const char *filename, int line, const char *fmt, ...)
+{
+	char *str = NULL;
+	va_list args;
+
+	va_start(args, fmt);
+	if (vasprintf(&str, fmt, args) == -1) {
+		fprintf(stderr, "vasprintf failed\n");
+		va_end(args);
+		return;
+	}
+	va_end(args);
+
+	/* Useful format: cpu process:thread file.c:123| the log message */
+	fprintf(stderr, "%02u %06u:%06u %s:%d| %s\n",
+		(unsigned int)sched_getcpu(),
+		(unsigned int)getpid(),
+		(unsigned int)syscall(__NR_gettid),
+		filename, line, str);
+	free(str);
+
+	if (assert) abort();
+}
+
+static uint64_t core_tick_hz;
+
+static uint64_t core_tick(void)
+{
+#if defined(__aarch64__)
+	uint64_t vct;
+	/* __asm__ volatile("isb" : : : "memory"); */
+	__asm__ volatile("mrs %0, cntvct_el0" : "=r"(vct));
+	return vct;
+#elif defined(__x86_64__)
+	uint64_t hi, lo;
+	/* __asm__ volatile("mfence" : : : "memory"); */
+	__asm__ volatile("rdtsc" : "=a"(lo), "=d"(hi));
+	return (hi << 32) | lo;
+#else
+#error Please add support for your core in odp_timers.c
+#endif
+}
+
+static uint64_t core_tick_diff_ns(uint64_t before, uint64_t after)
+{
+	uint64_t diff = after - before;
+
+	return llround(((double)diff / (double)core_tick_hz) *
+		       ODP_NSEC_PER_SEC);
+}
+
+static bool core_tick_init(void)
+{
+#if defined(__aarch64__)
+	uint64_t hz;
+	__asm__ volatile("mrs %0, cntfrq_el0" : "=r"(hz));
+	core_tick_hz = hz;
+	return true;
+#elif defined(__x86_64__)
+	uint32_t cpu_info[4];
+
+	/* Check for Invariant TSC. */
+	__asm__ volatile("cpuid" :
+			 "=a"(cpu_info[0]),
+			 "=b"(cpu_info[1]),
+			 "=c"(cpu_info[2]),
+			 "=d"(cpu_info[3]) :
+			 "a"(0x80000000));
+	if (cpu_info[0] >= 0x80000007) {
+		__asm__ volatile("cpuid" :
+				 "=a"(cpu_info[0]),
+				 "=b"(cpu_info[1]),
+				 "=c"(cpu_info[2]),
+				 "=d"(cpu_info[3]) :
+				 "a"(0x80000007));
+		if (!(cpu_info[3] & (1 << 8)))
+			return false;
+	}
+
+	core_tick_hz = odp_cpu_hz();
+	return true;
+#endif
+	return false;
+}
+
+/*
+ * Each |test| specifies how the timer pools, timers, queues, and threads
+ * should be setup. These structures are used to create the test.
+ */
+
+/*
+ * A 'timer pool queue' or 'tpq' represents a timer pool running at
+ * frequency |resolution| containing |nr_timers| timers which are set to
+ * expire their timeout events on a queue.
+ */
+typedef struct {
+	uint64_t resolution;
+	uint32_t nr_timers;
+	odp_schedule_prio_t queue_prio;
+	odp_schedule_sync_t queue_sync;
+} test_timer_pool_queue_t;
+
+typedef struct {
+	const char *name;
+	unsigned int nr_workers;
+	test_timer_pool_queue_t *tpqs;
+	unsigned int nr_tpqs;
+} test_t;
+
+/*
+ * Test state shared across all ODP threads during a specific test.
+ */
+
+/*
+ * TODO: Pointers are used which requires shm to be mapped at the same vaddr
+ * in each ODP thread would could be a thread or process. Guarantee the vaddr
+ * or use offsets.
+ */
+typedef struct {
+	odp_timer_t timer;
+	odp_timeout_t timeout;
+} timer_pair_t;
+
+typedef struct {
+#if TRACING_TIMERS == 1
+	uint64_t enq_timestamp;
+#endif
+	uint64_t set_timestamp;
+	uint64_t recv_timestamp;
+	uint32_t remaining_rounds;
+	timer_pair_t pair;
+} timer_ctx_t;
+
+typedef struct {
+	odp_queue_t queue;
+	odp_timer_pool_t timer_pool;
+	timer_ctx_t *timers;
+	uint32_t nr_timers;
+} timer_pool_queue_t;
+
+typedef struct {
+	odp_barrier_t barrier;
+	odp_pool_t timeout_pool;
+	timer_pool_queue_t *tpqs;
+	uint32_t nr_tpqs;
+} app_t;
+
+/* Returns the total number of timers for across all tpqs in a test. */
+static uint32_t test_nr_timers(test_t *test)
+{
+	size_t i;
+	uint32_t n = 0;
+
+	for (i = 0; i < test->nr_tpqs; i++)
+		n += test->tpqs[i].nr_timers;
+
+	return n;
+}
+
+static int is_main_worker_thread(void)
+{
+	return odp_thread_id() == 1;
+}
+
+static void handle_timeout(odp_event_t event)
+{
+	odp_timeout_t tmo;
+	odp_timer_t timer;
+	timer_ctx_t *tc;
+
+	tmo = odp_timeout_from_event(event);
+	timer = odp_timeout_timer(tmo);
+	tc = (timer_ctx_t *)odp_timeout_user_ptr(tmo);
+
+	/* assert(tc->pair.timeout == tmo); */
+	/* assert(tc->pair.timer == timer); */
+
+	uint64_t now = core_tick();
+	uint64_t set_timestamp = tc->set_timestamp;
+	uint32_t remaining_rounds = --tc->remaining_rounds;
+
+	if (set_timestamp == 0) {
+		/*
+		 * This is this timer's first round. Skip checking for an
+		 * overrun, set the timestamp, and set the timer to go for
+		 * another round.
+		 */
+		tc->set_timestamp = now;
+		odp_timer_set_rel(timer, 1, &event);
+	} else {
+		odp_timer_pool_t tp = odp_timer_pool_from_timer(timer);
+		uint64_t elapsed_ns = core_tick_diff_ns(set_timestamp, now);
+		uint64_t time_budget = 2 * odp_timer_pool_resolution(tp);
+
+		if (elapsed_ns > time_budget) {
+			/* Timeout event received too late. */
+			tc->recv_timestamp = now;
+		} else if (remaining_rounds > 0) {
+			tc->set_timestamp = now;
+			odp_timer_set_rel(timer, 1, &event);
+		} else {
+			/* Timer successfully circulated TIMER_ROUNDS times. */
+			tc->recv_timestamp = now;
+		}
+	}
+}
+
+static int worker_start(void *arg)
+{
+	app_t *app;
+	odp_event_t event;
+	odp_queue_t queue;
+	uint64_t wait_time;
+	int rv;
+
+	(void)arg;
+
+	app = odp_shm_addr(odp_shm_lookup("app"));
+
+	odp_barrier_wait(&app->barrier);
+
+	if (is_main_worker_thread()) {
+		size_t i, j;
+		timer_pool_queue_t *tpq;
+		timer_ctx_t *tc;
+
+		/* Start all timers. */
+		for (i = 0, tpq = app->tpqs; i < app->nr_tpqs; i++, tpq++) {
+			for (j = 0, tc = tpq->timers; j < tpq->nr_timers;
+			     j++, tc++) {
+				odp_event_t e =
+					odp_timeout_to_event(tc->pair.timeout);
+				rv = odp_timer_set_rel(tc->pair.timer, 1, &e);
+				assert(rv == ODP_TIMER_SUCCESS);
+			}
+		}
+		odp_timer_pool_start();
+	}
+
+	wait_time = odp_schedule_wait_time(ODP_NSEC_PER_SEC);
+
+	odp_barrier_wait(&app->barrier);
+
+	/* Main scheduling loop. */
+	for (;;) {
+		event = odp_schedule(&queue, wait_time);
+
+		if (odp_unlikely(event == ODP_EVENT_INVALID))
+			break;
+
+		switch (odp_event_type(event)) {
+		case ODP_EVENT_TIMEOUT:
+			handle_timeout(event);
+			break;
+		default:
+			break;
+		}
+	}
+
+	odp_barrier_wait(&app->barrier);
+
+	if (is_main_worker_thread()) {
+		size_t i, j;
+		timer_pool_queue_t *tpq;
+		timer_ctx_t *tc;
+		odp_event_t e;
+
+		/* Cancel all timers. */
+		for (i = 0, tpq = app->tpqs; i < app->nr_tpqs; i++, tpq++) {
+			for (j = 0, tc = tpq->timers; j < tpq->nr_timers;
+			     j++, tc++) {
+				odp_timer_cancel(tc->pair.timer, &e);
+			}
+		}
+
+		/* Drain queues.*/
+		do { event = odp_schedule(&queue, ODP_NSEC_PER_SEC); }
+		while (event != ODP_EVENT_INVALID);
+	}
+
+	odp_barrier_wait(&app->barrier);
+
+	/* Drain any remaining pre-scheduled events. */
+	odp_schedule_pause();
+	do { event = odp_schedule(&queue, ODP_NSEC_PER_SEC); }
+	while (event != ODP_EVENT_INVALID);
+	odp_schedule_resume();
+
+	odp_barrier_wait(&app->barrier);
+
+	return 0;
+}
+
+static int on_odp_log(odp_log_level_t lvl, const char *fmt, ...)
+{
+	(void)lvl;
+	(void)fmt;
+
+	/* TODO: Interpret log message and take action. */
+
+	return 1;
+}
+
+static void __attribute__((noreturn)) on_odp_abort(void)
+{
+	print("A fatal incident has occurred");
+	abort();
+}
+
+static void setup_tpq(app_t *app, timer_pool_queue_t *tpq,
+		      test_timer_pool_queue_t *ttpq, size_t n)
+{
+	size_t i;
+	char name[32];
+	odp_queue_param_t qp;
+	odp_timer_pool_param_t tpp;
+	timer_ctx_t *tc;
+
+	/* Create the queue. */
+	qp.type     = ODP_QUEUE_TYPE_SCHED;
+	qp.enq_mode = ODP_QUEUE_OP_MT;
+	qp.deq_mode = ODP_QUEUE_OP_DISABLED;
+	qp.sched.prio  = ttpq->queue_prio;
+	qp.sched.sync  = ttpq->queue_sync;
+	qp.sched.group = ODP_SCHED_GROUP_WORKER;
+	qp.sched.lock_count = 0;  /* FIXME: # for ordered? */
+	qp.context     = NULL;
+	qp.context_len = 0;
+
+	snprintf(name, sizeof(name), "q%lu", n);
+
+	tpq->queue = odp_queue_create(name, &qp);
+	assert(tpq->queue != ODP_QUEUE_INVALID);
+
+	/* Create the timer pool. */
+	tpp.res_ns     = ttpq->resolution;
+	tpp.min_tmo    = ttpq->resolution;
+	tpp.max_tmo    = ttpq->resolution << 1;
+	tpp.num_timers = ttpq->nr_timers;
+	tpp.priv       = 0;
+	tpp.clk_src    = ODP_CLOCK_CPU;
+
+	snprintf(name, sizeof(name), "tp%lu", n);
+
+	tpq->timer_pool = odp_timer_pool_create(name, &tpp);
+	assert(tpq->timer_pool != ODP_TIMER_POOL_INVALID);
+
+	/* Create timers. */
+	for (i = 0, tc = tpq->timers; i < tpq->nr_timers; i++, tc++) {
+		tc->set_timestamp = 0;
+		tc->remaining_rounds = TIMER_ROUNDS;
+
+		tc->pair.timer = odp_timer_alloc(
+			tpq->timer_pool, tpq->queue, tc);
+		assert(tc->pair.timer != ODP_TIMER_INVALID);
+
+		tc->pair.timeout = odp_timeout_alloc(app->timeout_pool);
+		assert(tc->pair.timeout != ODP_TIMEOUT_INVALID);
+	}
+}
+
+static void setup(app_t *app, test_t *test, int nr_workers)
+{
+	size_t i;
+	odp_pool_param_t tepp;
+
+	odp_barrier_init(&app->barrier, nr_workers);
+
+	tepp.type    = ODP_POOL_TIMEOUT;
+	tepp.tmo.num = test_nr_timers(test);
+
+	app->timeout_pool = odp_pool_create("pool_tmo0", &tepp);
+	assert(app->timeout_pool != ODP_POOL_INVALID);
+
+	for (i = 0; i < test->nr_tpqs; i++)
+		setup_tpq(app, app->tpqs + i, &test->tpqs[i], i);
+}
+
+static void print_stats(app_t *app)
+{
+	size_t i, j;
+	timer_pool_queue_t *tpq;
+	timer_ctx_t *tc;
+	unsigned int nr_pass = 0;
+	unsigned int nr_fail = 0;
+	bool pass = true;
+	double *t1;
+#if TRACING_TIMERS == 1
+	double *t2;
+	double *t3;
+#endif
+	double avg, sd, max, min;
+
+	for (i = 0, tpq = app->tpqs; i < app->nr_tpqs; i++, tpq++) {
+
+		t1 = malloc(tpq->nr_timers * sizeof(*t1));
+		assert(t1 != NULL);
+#if TRACING_TIMERS == 1
+		t2 = malloc(tpq->nr_timers * sizeof(*t2));
+		assert(t2 != NULL);
+		t3 = malloc(tpq->nr_timers * sizeof(*t3));
+		assert(t3 != NULL);
+#endif
+
+		for (j = 0, tc = tpq->timers; j < tpq->nr_timers; j++, tc++) {
+
+			t1[j] = (double)core_tick_diff_ns(
+				tc->set_timestamp, tc->recv_timestamp);
+#if TRACING_TIMERS == 1
+			t2[j] = (double)core_tick_diff_ns(
+				tc->set_timestamp, tc->enq_timestamp);
+			t3[j] = (double)core_tick_diff_ns(
+				tc->enq_timestamp, tc->recv_timestamp);
+#endif
+
+			if (tc->remaining_rounds > 0) {
+				pass = false;
+				nr_fail++;
+			} else {
+				nr_pass++;
+			}
+		}
+
+		printf("tpq%lu            res:%13.3f\n",
+		       i, (double)odp_timer_pool_resolution(tpq->timer_pool));
+
+		avg = gsl_stats_mean(t1, 1, tpq->nr_timers);
+		sd  = gsl_stats_sd_m(t1, 1, tpq->nr_timers, avg);
+		max = gsl_stats_max(t1, 1, tpq->nr_timers);
+		min = gsl_stats_min(t1, 1, tpq->nr_timers);
+
+		printf("tpq%lu set->recv "
+		       " avg:%13.3f sd:%13.3f max:%13.3f min:%13.3f\n",
+		       i, avg, sd, max, min);
+
+#if TRACING_TIMERS == 1
+		avg = gsl_stats_mean(t2, 1, tpq->nr_timers);
+		sd  = gsl_stats_sd_m(t2, 1, tpq->nr_timers, avg);
+		max = gsl_stats_max(t2, 1, tpq->nr_timers);
+		min = gsl_stats_min(t2, 1, tpq->nr_timers);
+
+		printf("tpq%lu set->enq  "
+		       " avg:%13.3f sd:%13.3f max:%13.3f min:%13.3f\n",
+		       i, avg, sd, max, min);
+
+		avg = gsl_stats_mean(t3, 1, tpq->nr_timers);
+		sd  = gsl_stats_sd_m(t3, 1, tpq->nr_timers, avg);
+		max = gsl_stats_max(t3, 1, tpq->nr_timers);
+		min = gsl_stats_min(t3, 1, tpq->nr_timers);
+
+		printf("tpq%lu enq->recv "
+		       " avg:%13.3f sd:%13.3f max:%13.3f min:%13.3f\n",
+		       i, avg, sd, max, min);
+#endif
+
+		free(t1);
+#if TRACING_TIMERS == 1
+		free(t2);
+		free(t3);
+#endif
+	}
+
+	printf("%s: %u timers passed, %u timers failed\n",
+	       pass ? "PASS" : "FAIL", nr_pass, nr_fail);
+}
+
+static void teardown_tpq(timer_pool_queue_t *tpq)
+{
+	size_t i;
+	odp_event_t event;
+	int rv;
+	timer_ctx_t *tc;
+
+	for (i = 0, tc = tpq->timers; i < tpq->nr_timers; i++, tc++) {
+		event = odp_timer_free(tc->pair.timer);
+		assert(event == ODP_EVENT_INVALID);
+
+		odp_timeout_free(tc->pair.timeout);
+	}
+
+	odp_timer_pool_destroy(tpq->timer_pool);
+
+	rv = odp_queue_destroy(tpq->queue);
+	assert(rv == 0);
+}
+
+static void teardown(app_t *app)
+{
+	size_t i;
+	int rv;
+
+	for (i = 0; i < app->nr_tpqs; i++)
+		teardown_tpq(app->tpqs + i);
+
+	rv = odp_pool_destroy(app->timeout_pool);
+	assert(rv == 0);
+}
+
+static void run_test(test_t *test)
+{
+	odp_init_t init;
+	odp_instance_t instance;
+	odp_cpumask_t worker_mask;
+	odph_odpthread_t *workers;
+	int nr_workers;
+	odph_odpthread_params_t thrp;
+	odp_shm_t shm;
+	app_t *app;
+	size_t i;
+	int rv;
+
+	init.num_worker   = sysconf(_SC_NPROCESSORS_CONF);
+	init.num_control  = 0;
+	init.worker_cpus  = NULL;
+	init.control_cpus = NULL;
+	init.log_fn       = on_odp_log;
+	init.abort_fn     = on_odp_abort;
+
+	rv = odp_init_global(&instance, &init, NULL);
+	assert(rv == 0);
+	rv = odp_init_local(instance, ODP_THREAD_CONTROL);
+	assert(rv == 0);
+
+	/*
+	 * FIXME: Adjust default cpumasks to better utilize cores when
+	 * running benchmarks or stress test programs.
+	 */
+	nr_workers = odp_cpumask_default_worker(&worker_mask, test->nr_workers);
+
+	size_t sz0 = CACHE_ALIGN_ROUNDUP(sizeof(app_t));
+	size_t sz1 = CACHE_ALIGN_ROUNDUP(
+		sizeof(timer_pool_queue_t) * test->nr_tpqs);
+	size_t sz2 = CACHE_ALIGN_ROUNDUP(
+		sizeof(timer_ctx_t) * test_nr_timers(test));
+
+	shm = odp_shm_reserve("app", sz0 + sz1 + sz2, ODP_CACHE_LINE_SIZE, 0);
+	assert(shm != ODP_SHM_INVALID);
+	app = odp_shm_addr(odp_shm_lookup("app"));
+	app->tpqs = (void *)((char *)app + sz0);
+	app->nr_tpqs = test->nr_tpqs;
+
+	timer_pool_queue_t *tpq = app->tpqs;
+	timer_ctx_t *timer_ctx = (void *)((char *)app + sz0 + sz1);
+	for (i = 0; i < test->nr_tpqs; i++) {
+		test_timer_pool_queue_t *ttpq = &test->tpqs[i];
+
+		tpq->timers = timer_ctx;
+		tpq->nr_timers = ttpq->nr_timers;
+
+		tpq++;
+		timer_ctx += ttpq->nr_timers;
+	}
+
+	setup(app, test, nr_workers);
+
+	/* Launch ODP threads and run the test case. */
+	workers = calloc(sizeof(odph_odpthread_t), nr_workers);
+	assert(workers != NULL);
+	thrp.thr_type = ODP_THREAD_WORKER;
+	thrp.instance = instance;
+	thrp.start    = worker_start;
+	thrp.arg      = NULL;
+	rv = odph_odpthreads_create(workers, &worker_mask, &thrp);
+	assert(rv == nr_workers);
+	/* Wait for test case to complete. */
+	rv = odph_odpthreads_join(workers);
+	assert(rv == nr_workers);
+
+	print_stats(app);
+
+	teardown(app);
+
+	free(workers);
+	rv = odp_shm_free(shm);
+	assert(rv == 0);
+	rv = odp_term_local();
+	assert(rv >= 0);
+	rv = odp_term_global(instance);
+	assert(rv == 0);
+}
+
+static void run_single_threaded_measurements(void)
+{
+	odp_instance_t instance;
+	odp_queue_t queue;
+	odp_queue_param_t qp;
+	odp_pool_t pool;
+	odp_pool_param_t pp;
+	odp_timer_pool_t timer_pool;
+	odp_timer_pool_param_t tpp;
+	const int resolution_ns = 1000;
+	timer_ctx_t *timers;
+	int nr_timers, i;
+	int rv;
+
+	printf("Running single threaded measurements\n");
+
+	rv = odp_init_global(&instance, NULL, NULL);
+	assert(rv == 0);
+	rv = odp_init_local(instance, ODP_THREAD_WORKER);
+	assert(rv == 0);
+
+	/* Create a queue. */
+	qp.type     = ODP_QUEUE_TYPE_SCHED;
+	qp.enq_mode = ODP_QUEUE_OP_MT_UNSAFE;
+	qp.deq_mode = ODP_QUEUE_OP_DISABLED;
+	qp.sched.prio  = ODP_SCHED_PRIO_DEFAULT;
+	qp.sched.sync  = ODP_SCHED_SYNC_PARALLEL;
+	qp.sched.group = ODP_SCHED_GROUP_WORKER;
+	qp.sched.lock_count = 0;
+	qp.context     = NULL;
+	qp.context_len = 0;
+
+	queue = odp_queue_create("queue0", &qp);
+	assert(queue != ODP_QUEUE_INVALID);
+
+	for (nr_timers = 100; nr_timers <= 100000; nr_timers *= 10) {
+		uint64_t ts1, ts2;
+		uint64_t duration_ns;
+		int remaining;
+
+		/* Create a pool for timeout events. */
+		pp.type    = ODP_POOL_TIMEOUT;
+		pp.tmo.num = nr_timers;
+
+		pool = odp_pool_create("pool_tmo0", &pp);
+		assert(pool != ODP_POOL_INVALID);
+
+		/* Create a timer pool. */
+		tpp.res_ns     = resolution_ns;
+		tpp.min_tmo    = resolution_ns;
+		tpp.max_tmo    = 0xFFFF;
+		tpp.num_timers = nr_timers;
+		tpp.priv       = 1;
+		tpp.clk_src    = ODP_CLOCK_CPU;
+
+		timer_pool = odp_timer_pool_create("timer_pool0", &tpp);
+		assert(timer_pool != ODP_TIMER_POOL_INVALID);
+
+		/* Alloc timers and their timeout events. */
+		timers = calloc(sizeof(*timers), nr_timers);
+		assert(timers != NULL);
+
+		for (i = 0; i < nr_timers; i++) {
+			timer_pair_t *pair = &timers[i].pair;
+
+			pair->timer = odp_timer_alloc(
+				timer_pool, queue, &timers[i]);
+			assert(pair->timer != ODP_TIMER_INVALID);
+
+			pair->timeout = odp_timeout_alloc(pool);
+			assert(pair->timeout != ODP_TIMEOUT_INVALID);
+		}
+
+		/* Measure the time it takes to arm timers. */
+		ts1 = core_tick();
+
+		for (i = 0; i < nr_timers; i++) {
+			timer_pair_t *pair = &timers[i].pair;
+			odp_event_t e;
+
+			e = odp_timeout_to_event(pair->timeout);
+			odp_timer_set_rel(pair->timer, 1, &e);
+		}
+
+		ts2 = core_tick();
+
+		duration_ns = core_tick_diff_ns(ts1, ts2);
+
+		printf("Arm time for %u timers: %lu ns\n", nr_timers, duration_ns);
+		printf("Arm time per timer: %lu ns\n", duration_ns / nr_timers);
+
+		/* Measure the time it takes to process timers. */
+		remaining = nr_timers;
+
+		ts1 = core_tick();
+
+		while (remaining) {
+			odp_queue_t q;
+			odp_event_t e;
+
+			e = odp_schedule(&q, ODP_SCHED_WAIT);
+
+			if (e == ODP_EVENT_INVALID)
+				continue;
+
+			if (odp_event_type(e) == ODP_EVENT_TIMEOUT)
+				remaining--;
+		}
+
+		ts2 = core_tick();
+
+		duration_ns = core_tick_diff_ns(ts1, ts2);
+
+		printf("Sched time for %u timers: %lu ns\n", nr_timers, duration_ns);
+		printf("Sched time per timer: %lu ns\n", duration_ns / nr_timers);
+
+		/* Cleanup. */
+		for (i = 0; i < nr_timers; i++) {
+			timer_pair_t *pair = &timers[i].pair;
+			odp_event_t e;
+
+			odp_timer_cancel(pair->timer, &e);
+
+			e = odp_timer_free(pair->timer);
+			assert(e == ODP_EVENT_INVALID);
+			odp_timeout_free(pair->timeout);
+		}
+		free(timers);
+		odp_timer_pool_destroy(timer_pool);
+		rv = odp_pool_destroy(pool);
+		assert(rv == 0);
+	}
+
+	rv = odp_queue_destroy(queue);
+	assert(rv == 0);
+
+	rv = odp_term_local();
+	assert(rv >= 0);
+	rv = odp_term_global(instance);
+	assert(rv == 0);
+}
+
+static test_timer_pool_queue_t r10ms_50[] = {
+	{ 10000000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+};
+static test_timer_pool_queue_t r1ms_50[] = {
+	{ 1000000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+};
+static test_timer_pool_queue_t r100us_50[] = {
+	{ 100000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+};
+static test_timer_pool_queue_t r10us_50[] = {
+ 	{ 10000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+};
+static test_timer_pool_queue_t r1us_50[] = {
+	{ 1000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+};
+
+static test_timer_pool_queue_t r10ms_50x4[] = {
+	{ 10000000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+	{ 10000000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+	{ 10000000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+	{ 10000000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+};
+static test_timer_pool_queue_t r1ms_50x4[] = {
+	{ 1000000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+	{ 1000000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+	{ 1000000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+	{ 1000000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+};
+static test_timer_pool_queue_t r100us_50x4[] = {
+	{ 100000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+	{ 100000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+	{ 100000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+	{ 100000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+};
+static test_timer_pool_queue_t r10us_50x4[] = {
+ 	{ 10000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+ 	{ 10000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+ 	{ 10000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+ 	{ 10000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+};
+static test_timer_pool_queue_t r1us_50x4[] = {
+	{ 1000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+	{ 1000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+	{ 1000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+	{ 1000, 50, ODP_SCHED_PRIO_NORMAL, ODP_SCHED_SYNC_PARALLEL },
+};
+
+static test_timer_pool_queue_t mix[] = {
+	{    1000,    4, ODP_SCHED_PRIO_HIGHEST, ODP_SCHED_SYNC_PARALLEL },
+	{  100000,    4, ODP_SCHED_PRIO_HIGHEST, ODP_SCHED_SYNC_PARALLEL },
+	{ 1000000, 2048, ODP_SCHED_PRIO_LOWEST, ODP_SCHED_SYNC_PARALLEL },
+	{ 1000000, 2048, ODP_SCHED_PRIO_LOWEST, ODP_SCHED_SYNC_ATOMIC },
+	{  100000, 1024, ODP_SCHED_PRIO_LOWEST, ODP_SCHED_SYNC_PARALLEL },
+	{  100000, 1024, ODP_SCHED_PRIO_LOWEST, ODP_SCHED_SYNC_ATOMIC },
+	{   10000,   16, ODP_SCHED_PRIO_LOWEST, ODP_SCHED_SYNC_PARALLEL },
+	{   10000,   16, ODP_SCHED_PRIO_LOWEST, ODP_SCHED_SYNC_ATOMIC },
+};
+
+/* Run test with 1 thread and then with 3 threads. */
+#define TEST(name, test) \
+	{ name "_1t", 1, test, arraysize(test) }, \
+	{ name "_3t", 3, test, arraysize(test) }
+
+static test_t tests[] = {
+	TEST("r10ms_50", r10ms_50),
+	TEST("r1ms_50", r1ms_50),
+	TEST("r100us_50", r100us_50),
+	TEST("r10us_50", r10us_50),
+	TEST("r1us_50", r1us_50),
+	TEST("r10ms_50x4", r10ms_50x4),
+	TEST("r1ms_50x4", r1ms_50x4),
+	TEST("r100us_50x4", r100us_50x4),
+	TEST("r10us_50x4", r10us_50x4),
+	TEST("r1us_50x4", r1us_50x4),
+	TEST("mix", mix)
+};
+
+int main(int argc, char *argv[])
+{
+	size_t i;
+
+	if (!core_tick_init()) {
+		print("core_tick_init failed");
+		exit(EXIT_FAILURE);
+	}
+
+	odph_parse_options(argc, argv, NULL, NULL);
+
+	run_single_threaded_measurements();
+
+	for (i = 0; i < arraysize(tests); i++) {
+		printf("Running %s\n", tests[i].name);
+		run_test(&tests[i]);
+	}
+
+	exit(EXIT_SUCCESS);
+}