diff mbox series

[API-NEXT,v2,8/8] linux-gen: time: use hw time counter when available

Message ID 20170424104917.24102-9-petri.savolainen@linaro.org
State Superseded
Headers show
Series Use HW time counter | expand

Commit Message

Petri Savolainen April 24, 2017, 10:49 a.m. UTC
Use 64 bit HW time counter when available. It is used on
x86 when invariant TSC CPU flag indicates that TSC frequency
is constant. Otherwise, the system time is used as before. Direct
HW time counter usage avoids system call, and related latency
and performance issues.

Signed-off-by: Petri Savolainen <petri.savolainen@linaro.org>

---
 platform/linux-generic/Makefile.am                 |   1 +
 platform/linux-generic/arch/arm/odp_cpu_arch.c     |  16 ++
 platform/linux-generic/arch/default/odp_cpu_arch.c |  16 ++
 platform/linux-generic/arch/mips64/odp_cpu_arch.c  |  16 ++
 platform/linux-generic/arch/powerpc/odp_cpu_arch.c |  16 ++
 platform/linux-generic/arch/x86/cpu_flags.c        |   9 +
 platform/linux-generic/arch/x86/odp_cpu_arch.c     |  59 ++++
 .../include/odp/api/plat/time_types.h              |  23 +-
 platform/linux-generic/include/odp_time_internal.h |  24 ++
 platform/linux-generic/odp_time.c                  | 300 ++++++++++++++++-----
 10 files changed, 415 insertions(+), 65 deletions(-)
 create mode 100644 platform/linux-generic/include/odp_time_internal.h

-- 
2.11.0
diff mbox series

Patch

diff --git a/platform/linux-generic/Makefile.am b/platform/linux-generic/Makefile.am
index 60b7f849..ed66fecf 100644
--- a/platform/linux-generic/Makefile.am
+++ b/platform/linux-generic/Makefile.am
@@ -171,6 +171,7 @@  noinst_HEADERS = \
 		  ${srcdir}/include/odp_schedule_if.h \
 		  ${srcdir}/include/odp_sorted_list_internal.h \
 		  ${srcdir}/include/odp_shm_internal.h \
+		  ${srcdir}/include/odp_time_internal.h \
 		  ${srcdir}/include/odp_timer_internal.h \
 		  ${srcdir}/include/odp_timer_wheel_internal.h \
 		  ${srcdir}/include/odp_traffic_mngr_internal.h \
diff --git a/platform/linux-generic/arch/arm/odp_cpu_arch.c b/platform/linux-generic/arch/arm/odp_cpu_arch.c
index 2ac223e0..c31f9084 100644
--- a/platform/linux-generic/arch/arm/odp_cpu_arch.c
+++ b/platform/linux-generic/arch/arm/odp_cpu_arch.c
@@ -13,6 +13,7 @@ 
 #include <odp/api/hints.h>
 #include <odp/api/system_info.h>
 #include <odp_debug_internal.h>
+#include <odp_time_internal.h>
 
 #define GIGA 1000000000
 
@@ -46,3 +47,18 @@  uint64_t odp_cpu_cycles_resolution(void)
 {
 	return 1;
 }
+
+int cpu_has_global_time(void)
+{
+	return 0;
+}
+
+uint64_t cpu_global_time(void)
+{
+	return 0;
+}
+
+uint64_t cpu_global_time_freq(void)
+{
+	return 0;
+}
diff --git a/platform/linux-generic/arch/default/odp_cpu_arch.c b/platform/linux-generic/arch/default/odp_cpu_arch.c
index 2ac223e0..c31f9084 100644
--- a/platform/linux-generic/arch/default/odp_cpu_arch.c
+++ b/platform/linux-generic/arch/default/odp_cpu_arch.c
@@ -13,6 +13,7 @@ 
 #include <odp/api/hints.h>
 #include <odp/api/system_info.h>
 #include <odp_debug_internal.h>
+#include <odp_time_internal.h>
 
 #define GIGA 1000000000
 
@@ -46,3 +47,18 @@  uint64_t odp_cpu_cycles_resolution(void)
 {
 	return 1;
 }
+
+int cpu_has_global_time(void)
+{
+	return 0;
+}
+
+uint64_t cpu_global_time(void)
+{
+	return 0;
+}
+
+uint64_t cpu_global_time_freq(void)
+{
+	return 0;
+}
diff --git a/platform/linux-generic/arch/mips64/odp_cpu_arch.c b/platform/linux-generic/arch/mips64/odp_cpu_arch.c
index 646acf9c..f7eafa0f 100644
--- a/platform/linux-generic/arch/mips64/odp_cpu_arch.c
+++ b/platform/linux-generic/arch/mips64/odp_cpu_arch.c
@@ -7,6 +7,7 @@ 
 #include <odp/api/cpu.h>
 #include <odp/api/hints.h>
 #include <odp/api/system_info.h>
+#include <odp_time_internal.h>
 
 uint64_t odp_cpu_cycles(void)
 {
@@ -29,3 +30,18 @@  uint64_t odp_cpu_cycles_resolution(void)
 {
 	return 1;
 }
+
+int cpu_has_global_time(void)
+{
+	return 0;
+}
+
+uint64_t cpu_global_time(void)
+{
+	return 0;
+}
+
+uint64_t cpu_global_time_freq(void)
+{
+	return 0;
+}
diff --git a/platform/linux-generic/arch/powerpc/odp_cpu_arch.c b/platform/linux-generic/arch/powerpc/odp_cpu_arch.c
index 2ac223e0..c31f9084 100644
--- a/platform/linux-generic/arch/powerpc/odp_cpu_arch.c
+++ b/platform/linux-generic/arch/powerpc/odp_cpu_arch.c
@@ -13,6 +13,7 @@ 
 #include <odp/api/hints.h>
 #include <odp/api/system_info.h>
 #include <odp_debug_internal.h>
+#include <odp_time_internal.h>
 
 #define GIGA 1000000000
 
@@ -46,3 +47,18 @@  uint64_t odp_cpu_cycles_resolution(void)
 {
 	return 1;
 }
+
+int cpu_has_global_time(void)
+{
+	return 0;
+}
+
+uint64_t cpu_global_time(void)
+{
+	return 0;
+}
+
+uint64_t cpu_global_time_freq(void)
+{
+	return 0;
+}
diff --git a/platform/linux-generic/arch/x86/cpu_flags.c b/platform/linux-generic/arch/x86/cpu_flags.c
index 954dac27..a492a35b 100644
--- a/platform/linux-generic/arch/x86/cpu_flags.c
+++ b/platform/linux-generic/arch/x86/cpu_flags.c
@@ -39,6 +39,7 @@ 
 
 #include <arch/x86/cpu_flags.h>
 #include <odp_debug_internal.h>
+#include <odp_time_internal.h>
 #include <stdio.h>
 #include <stdint.h>
 
@@ -357,3 +358,11 @@  void cpu_flags_print_all(void)
 	str[len] = '\0';
 	ODP_PRINT("%s", str);
 }
+
+int cpu_has_global_time(void)
+{
+	if (cpu_get_flag_enabled(RTE_CPUFLAG_INVTSC) > 0)
+		return 1;
+
+	return 0;
+}
diff --git a/platform/linux-generic/arch/x86/odp_cpu_arch.c b/platform/linux-generic/arch/x86/odp_cpu_arch.c
index c8cf27b6..9ba601a3 100644
--- a/platform/linux-generic/arch/x86/odp_cpu_arch.c
+++ b/platform/linux-generic/arch/x86/odp_cpu_arch.c
@@ -3,7 +3,14 @@ 
  *
  * SPDX-License-Identifier:     BSD-3-Clause
  */
+
+#include <odp_posix_extensions.h>
+
 #include <odp/api/cpu.h>
+#include <odp_time_internal.h>
+#include <odp_debug_internal.h>
+
+#include <time.h>
 
 uint64_t odp_cpu_cycles(void)
 {
@@ -31,3 +38,55 @@  uint64_t odp_cpu_cycles_resolution(void)
 {
 	return 1;
 }
+
+uint64_t cpu_global_time(void)
+{
+	return odp_cpu_cycles();
+}
+
+#define SEC_IN_NS 1000000000ULL
+
+/* Measure TSC frequency. Frequency information registers are defined for x86,
+ * but those are often not enumerated. */
+uint64_t cpu_global_time_freq(void)
+{
+	struct timespec sleep, ts1, ts2;
+	uint64_t t1, t2, ts_nsec, cycles, hz;
+	int i;
+	uint64_t avg = 0;
+	int rounds = 4;
+
+	for (i = 0; i < rounds; i++) {
+		sleep.tv_sec  = 0;
+		sleep.tv_nsec = SEC_IN_NS / 10;
+
+		if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts1)) {
+			ODP_DBG("clock_gettime failed\n");
+			return 0;
+		}
+
+		t1 = cpu_global_time();
+
+		if (nanosleep(&sleep, NULL) < 0) {
+			ODP_DBG("nanosleep failed\n");
+			return 0;
+		}
+
+		if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts2)) {
+			ODP_DBG("clock_gettime failed\n");
+			return 0;
+		}
+
+		t2 = cpu_global_time();
+
+		ts_nsec  = (ts2.tv_sec - ts1.tv_sec) * SEC_IN_NS;
+		ts_nsec += ts2.tv_nsec - ts1.tv_nsec;
+
+		cycles = t2 - t1;
+
+		hz = (cycles * SEC_IN_NS) / ts_nsec;
+		avg += hz;
+	}
+
+	return avg / rounds;
+}
diff --git a/platform/linux-generic/include/odp/api/plat/time_types.h b/platform/linux-generic/include/odp/api/plat/time_types.h
index 4847f3b1..1cafb1f7 100644
--- a/platform/linux-generic/include/odp/api/plat/time_types.h
+++ b/platform/linux-generic/include/odp/api/plat/time_types.h
@@ -26,11 +26,28 @@  extern "C" {
  * the linux timespec structure, which is dependent on POSIX extension level.
  */
 typedef struct odp_time_t {
-	int64_t tv_sec;      /**< @internal Seconds */
-	int64_t tv_nsec;     /**< @internal Nanoseconds */
+	union {
+		/** @internal Posix timespec */
+		struct {
+			/** @internal Seconds */
+			int64_t tv_sec;
+
+			/** @internal Nanoseconds */
+			int64_t tv_nsec;
+		} spec;
+
+		/** @internal HW time counter */
+		struct {
+			/** @internal Counter value */
+			uint64_t count;
+
+			/** @internal Reserved */
+			uint64_t reserved;
+		} hw;
+	};
 } odp_time_t;
 
-#define ODP_TIME_NULL ((odp_time_t){0, 0})
+#define ODP_TIME_NULL ((odp_time_t){.spec = {0, 0} })
 
 /**
  * @}
diff --git a/platform/linux-generic/include/odp_time_internal.h b/platform/linux-generic/include/odp_time_internal.h
new file mode 100644
index 00000000..99ac7977
--- /dev/null
+++ b/platform/linux-generic/include/odp_time_internal.h
@@ -0,0 +1,24 @@ 
+/* Copyright (c) 2017, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:     BSD-3-Clause
+ */
+
+#ifndef ODP_TIME_INTERNAL_H_
+#define ODP_TIME_INTERNAL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+int cpu_has_global_time(void);
+uint64_t cpu_global_time(void);
+uint64_t cpu_global_time_freq(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/platform/linux-generic/odp_time.c b/platform/linux-generic/odp_time.c
index 81e05224..0362b003 100644
--- a/platform/linux-generic/odp_time.c
+++ b/platform/linux-generic/odp_time.c
@@ -10,36 +10,39 @@ 
 #include <odp/api/time.h>
 #include <odp/api/hints.h>
 #include <odp_debug_internal.h>
+#include <odp_time_internal.h>
+#include <string.h>
+#include <inttypes.h>
 
-static odp_time_t start_time;
+typedef struct time_global_t {
+	odp_time_t start_time;
+	int        use_hw;
+	uint64_t   hw_start;
+	uint64_t   hw_freq_hz;
+} time_global_t;
 
-static inline
-uint64_t time_to_ns(odp_time_t time)
-{
-	uint64_t ns;
-
-	ns = time.tv_sec * ODP_TIME_SEC_IN_NS;
-	ns += time.tv_nsec;
+static time_global_t global;
 
-	return ns;
-}
+/*
+ * Posix timespec based functions
+ */
 
-static inline odp_time_t time_diff(odp_time_t t2, odp_time_t t1)
+static inline odp_time_t time_spec_diff(odp_time_t t2, odp_time_t t1)
 {
 	odp_time_t time;
 
-	time.tv_sec = t2.tv_sec - t1.tv_sec;
-	time.tv_nsec = t2.tv_nsec - t1.tv_nsec;
+	time.spec.tv_sec = t2.spec.tv_sec - t1.spec.tv_sec;
+	time.spec.tv_nsec = t2.spec.tv_nsec - t1.spec.tv_nsec;
 
-	if (time.tv_nsec < 0) {
-		time.tv_nsec += ODP_TIME_SEC_IN_NS;
-		--time.tv_sec;
+	if (time.spec.tv_nsec < 0) {
+		time.spec.tv_nsec += ODP_TIME_SEC_IN_NS;
+		--time.spec.tv_sec;
 	}
 
 	return time;
 }
 
-static inline odp_time_t time_local(void)
+static inline odp_time_t time_spec_cur(void)
 {
 	int ret;
 	odp_time_t time;
@@ -49,77 +52,234 @@  static inline odp_time_t time_local(void)
 	if (odp_unlikely(ret != 0))
 		ODP_ABORT("clock_gettime failed\n");
 
-	time.tv_sec = sys_time.tv_sec;
-	time.tv_nsec = sys_time.tv_nsec;
+	time.spec.tv_sec = sys_time.tv_sec;
+	time.spec.tv_nsec = sys_time.tv_nsec;
 
-	return time_diff(time, start_time);
+	return time_spec_diff(time, global.start_time);
 }
 
-static inline int time_cmp(odp_time_t t2, odp_time_t t1)
+static inline uint64_t time_spec_res(void)
 {
-	if (t2.tv_sec < t1.tv_sec)
+	int ret;
+	struct timespec tres;
+
+	ret = clock_getres(CLOCK_MONOTONIC_RAW, &tres);
+	if (odp_unlikely(ret != 0))
+		ODP_ABORT("clock_getres failed\n");
+
+	return ODP_TIME_SEC_IN_NS / (uint64_t)tres.tv_nsec;
+}
+
+static inline int time_spec_cmp(odp_time_t t2, odp_time_t t1)
+{
+	if (t2.spec.tv_sec < t1.spec.tv_sec)
 		return -1;
 
-	if (t2.tv_sec > t1.tv_sec)
+	if (t2.spec.tv_sec > t1.spec.tv_sec)
 		return 1;
 
-	return t2.tv_nsec - t1.tv_nsec;
+	return t2.spec.tv_nsec - t1.spec.tv_nsec;
 }
 
-static inline odp_time_t time_sum(odp_time_t t1, odp_time_t t2)
+static inline odp_time_t time_spec_sum(odp_time_t t1, odp_time_t t2)
 {
 	odp_time_t time;
 
-	time.tv_sec = t2.tv_sec + t1.tv_sec;
-	time.tv_nsec = t2.tv_nsec + t1.tv_nsec;
+	time.spec.tv_sec = t2.spec.tv_sec + t1.spec.tv_sec;
+	time.spec.tv_nsec = t2.spec.tv_nsec + t1.spec.tv_nsec;
 
-	if (time.tv_nsec >= (long)ODP_TIME_SEC_IN_NS) {
-		time.tv_nsec -= ODP_TIME_SEC_IN_NS;
-		++time.tv_sec;
+	if (time.spec.tv_nsec >= (long)ODP_TIME_SEC_IN_NS) {
+		time.spec.tv_nsec -= ODP_TIME_SEC_IN_NS;
+		++time.spec.tv_sec;
 	}
 
 	return time;
 }
 
-static inline odp_time_t time_local_from_ns(uint64_t ns)
+static inline uint64_t time_spec_to_ns(odp_time_t time)
+{
+	uint64_t ns;
+
+	ns = time.spec.tv_sec * ODP_TIME_SEC_IN_NS;
+	ns += time.spec.tv_nsec;
+
+	return ns;
+}
+
+static inline odp_time_t time_spec_from_ns(uint64_t ns)
 {
 	odp_time_t time;
 
-	time.tv_sec = ns / ODP_TIME_SEC_IN_NS;
-	time.tv_nsec = ns - time.tv_sec * ODP_TIME_SEC_IN_NS;
+	time.spec.tv_sec = ns / ODP_TIME_SEC_IN_NS;
+	time.spec.tv_nsec = ns - time.spec.tv_sec * ODP_TIME_SEC_IN_NS;
 
 	return time;
 }
 
-static inline void time_wait_until(odp_time_t time)
+/*
+ * HW time counter based functions
+ */
+
+static inline odp_time_t time_hw_cur(void)
 {
-	odp_time_t cur;
+	odp_time_t time;
 
-	do {
-		cur = time_local();
-	} while (time_cmp(time, cur) > 0);
+	time.hw.count = cpu_global_time() - global.hw_start;
+
+	return time;
 }
 
-static inline uint64_t time_local_res(void)
+static inline uint64_t time_hw_res(void)
 {
-	int ret;
-	struct timespec tres;
+	/* Promise a bit lower resolution than average cycle counter
+	 * frequency */
+	return global.hw_freq_hz / 10;
+}
 
-	ret = clock_getres(CLOCK_MONOTONIC_RAW, &tres);
-	if (odp_unlikely(ret != 0))
-		ODP_ABORT("clock_getres failed\n");
+static inline int time_hw_cmp(odp_time_t t2, odp_time_t t1)
+{
+	if (odp_likely(t2.hw.count > t1.hw.count))
+		return 1;
 
-	return ODP_TIME_SEC_IN_NS / (uint64_t)tres.tv_nsec;
+	if (t2.hw.count < t1.hw.count)
+		return -1;
+
+	return 0;
+}
+
+static inline odp_time_t time_hw_diff(odp_time_t t2, odp_time_t t1)
+{
+	odp_time_t time;
+
+	time.hw.count = t2.hw.count - t1.hw.count;
+
+	return time;
+}
+
+static inline odp_time_t time_hw_sum(odp_time_t t1, odp_time_t t2)
+{
+	odp_time_t time;
+
+	time.hw.count = t1.hw.count + t2.hw.count;
+
+	return time;
+}
+
+static inline uint64_t time_hw_to_ns(odp_time_t time)
+{
+	uint64_t nsec;
+	uint64_t freq_hz = global.hw_freq_hz;
+	uint64_t count = time.hw.count;
+	uint64_t sec = 0;
+
+	if (count >= freq_hz) {
+		sec   = count / freq_hz;
+		count = count - sec * freq_hz;
+	}
+
+	nsec = (ODP_TIME_SEC_IN_NS * count) / freq_hz;
+
+	return (sec * ODP_TIME_SEC_IN_NS) + nsec;
+}
+
+static inline odp_time_t time_hw_from_ns(uint64_t ns)
+{
+	odp_time_t time;
+	uint64_t count;
+	uint64_t freq_hz = global.hw_freq_hz;
+	uint64_t sec = 0;
+
+	if (ns >= ODP_TIME_SEC_IN_NS) {
+		sec = ns / ODP_TIME_SEC_IN_NS;
+		ns  = ns - sec * ODP_TIME_SEC_IN_NS;
+	}
+
+	count  = sec * freq_hz;
+	count += (ns * freq_hz) / ODP_TIME_SEC_IN_NS;
+
+	time.hw.reserved = 0;
+	time.hw.count = count;
+
+	return time;
+}
+
+/*
+ * Common functions
+ */
+
+static inline odp_time_t time_cur(void)
+{
+	if (global.use_hw)
+		return time_hw_cur();
+
+	return time_spec_cur();
+}
+
+static inline uint64_t time_res(void)
+{
+	if (global.use_hw)
+		return time_hw_res();
+
+	return time_spec_res();
+}
+
+static inline int time_cmp(odp_time_t t2, odp_time_t t1)
+{
+	if (global.use_hw)
+		return time_hw_cmp(t2, t1);
+
+	return time_spec_cmp(t2, t1);
+}
+
+static inline odp_time_t time_diff(odp_time_t t2, odp_time_t t1)
+{
+	if (global.use_hw)
+		return time_hw_diff(t2, t1);
+
+	return time_spec_diff(t2, t1);
+}
+
+static inline odp_time_t time_sum(odp_time_t t1, odp_time_t t2)
+{
+	if (global.use_hw)
+		return time_hw_sum(t1, t2);
+
+	return time_spec_sum(t1, t2);
+}
+
+static inline uint64_t time_to_ns(odp_time_t time)
+{
+	if (global.use_hw)
+		return time_hw_to_ns(time);
+
+	return time_spec_to_ns(time);
+}
+
+static inline odp_time_t time_from_ns(uint64_t ns)
+{
+	if (global.use_hw)
+		return time_hw_from_ns(ns);
+
+	return time_spec_from_ns(ns);
+}
+
+static inline void time_wait_until(odp_time_t time)
+{
+	odp_time_t cur;
+
+	do {
+		cur = time_cur();
+	} while (time_cmp(time, cur) > 0);
 }
 
 odp_time_t odp_time_local(void)
 {
-	return time_local();
+	return time_cur();
 }
 
 odp_time_t odp_time_global(void)
 {
-	return time_local();
+	return time_cur();
 }
 
 odp_time_t odp_time_diff(odp_time_t t2, odp_time_t t1)
@@ -134,12 +294,12 @@  uint64_t odp_time_to_ns(odp_time_t time)
 
 odp_time_t odp_time_local_from_ns(uint64_t ns)
 {
-	return time_local_from_ns(ns);
+	return time_from_ns(ns);
 }
 
 odp_time_t odp_time_global_from_ns(uint64_t ns)
 {
-	return time_local_from_ns(ns);
+	return time_from_ns(ns);
 }
 
 int odp_time_cmp(odp_time_t t2, odp_time_t t1)
@@ -154,18 +314,18 @@  odp_time_t odp_time_sum(odp_time_t t1, odp_time_t t2)
 
 uint64_t odp_time_local_res(void)
 {
-	return time_local_res();
+	return time_res();
 }
 
 uint64_t odp_time_global_res(void)
 {
-	return time_local_res();
+	return time_res();
 }
 
 void odp_time_wait_ns(uint64_t ns)
 {
-	odp_time_t cur = time_local();
-	odp_time_t wait = time_local_from_ns(ns);
+	odp_time_t cur = time_cur();
+	odp_time_t wait = time_from_ns(ns);
 	odp_time_t end_time = time_sum(cur, wait);
 
 	time_wait_until(end_time);
@@ -193,15 +353,31 @@  uint64_t odp_time_to_u64(odp_time_t time)
 
 int odp_time_init_global(void)
 {
-	int ret;
-	struct timespec time;
-
-	ret = clock_gettime(CLOCK_MONOTONIC_RAW, &time);
-	if (ret) {
-		start_time = ODP_TIME_NULL;
-	} else {
-		start_time.tv_sec = time.tv_sec;
-		start_time.tv_nsec = time.tv_nsec;
+	struct timespec sys_time;
+	int ret = 0;
+
+	memset(&global, 0, sizeof(time_global_t));
+
+	if (cpu_has_global_time()) {
+		global.use_hw = 1;
+		global.hw_freq_hz  = cpu_global_time_freq();
+
+		if (global.hw_freq_hz == 0)
+			return -1;
+
+		printf("HW time counter freq: %" PRIu64 " hz\n\n",
+		       global.hw_freq_hz);
+
+		global.hw_start = cpu_global_time();
+		return 0;
+	}
+
+	global.start_time = ODP_TIME_NULL;
+
+	ret = clock_gettime(CLOCK_MONOTONIC_RAW, &sys_time);
+	if (ret == 0) {
+		global.start_time.spec.tv_sec  = sys_time.tv_sec;
+		global.start_time.spec.tv_nsec = sys_time.tv_nsec;
 	}
 
 	return ret;