diff mbox

[DRAFT] time: Fix race around clocksource changes

Message ID 1495254833-26493-1-git-send-email-john.stultz@linaro.org
State New
Headers show

Commit Message

John Stultz May 20, 2017, 4:33 a.m. UTC
Just an early version of the patch I'm working on. Wanted
to get your feedback on what you thought about it.

-john

In some testing on arm64 platforms, I was seeing null ptr
crashes in the kselftest/timers clocksource-switch test.

This was happening in a read function like:
u64 clocksource_mmio_readl_down(struct clocksource *c)
{
    return ~(u64)readl_relaxed(to_mmio_clksrc(c)->reg) & c->mask;
}

Where the callers enter the seqlock, and then call something
like:
    cycle_now = tkr->read(tkr->clock);

The problem seeming to be that since the read and clock
references are happening separately, its possible the clocksource
change happens in between and we end up calling the old read with
the new clocksource, (or vice-versa) which causes the
to_mmio_clksrc() in the read function to run off into space.

This patch tries to address the issue by removing the tkr->read
pointer (which basically is a cached copy of the tkr->clock->read
ptr), and by reading the clocksource ptr into a local clock ptr,
then calling "clock->read(clock)". This way we always call the
read funciton with the appropriate clocksource and don't
accidentaly mix them.

This draft does have one issue I need to resolve, which is that
there is logic to set the tkr.read to a dummy function when
timekeeping is suspended, which avoids the perf_clock methods
from trying to access the clocksource when the timekeeping core
is halted. I've not really addressed this here yet, but wanted
to see if folks had any alternative ideas for a solution.

Signed-off-by: John Stultz <john.stultz@linaro.org>

---
 include/linux/timekeeper_internal.h |  2 --
 kernel/time/timekeeping.c           | 43 ++++++++++++++++++++++---------------
 2 files changed, 26 insertions(+), 19 deletions(-)

-- 
2.7.4
diff mbox

Patch

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 528cc86..5b3b5a6 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -13,7 +13,6 @@ 
 /**
  * struct tk_read_base - base structure for timekeeping readout
  * @clock:	Current clocksource used for timekeeping.
- * @read:	Read function of @clock
  * @mask:	Bitmask for two's complement subtraction of non 64bit clocks
  * @cycle_last: @clock cycle value at last update
  * @mult:	(NTP adjusted) multiplier for scaled math conversion
@@ -29,7 +28,6 @@ 
  */
 struct tk_read_base {
 	struct clocksource	*clock;
-	u64			(*read)(struct clocksource *cs);
 	u64			mask;
 	u64			cycle_last;
 	u32			mult;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 721c7f1..2ac5c54 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -163,6 +163,7 @@  static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
 static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
+	struct clocksource *clock;
 	u64 now, last, mask, max, delta;
 	unsigned int seq;
 
@@ -175,7 +176,8 @@  static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 	 */
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		now = tkr->read(tkr->clock);
+		clock = tkr->clock;
+		now = clock->read(clock);
 		last = tkr->cycle_last;
 		mask = tkr->mask;
 		max = tkr->clock->max_cycles;
@@ -207,9 +209,11 @@  static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
 static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 {
 	u64 cycle_now, delta;
+	struct clocksource *clock;
 
 	/* read clocksource */
-	cycle_now = tkr->read(tkr->clock);
+	clock = tkr->clock;
+	cycle_now = clock->read(clock);
 
 	/* calculate the delta since the last update_wall_time */
 	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -238,12 +242,10 @@  static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	++tk->cs_was_changed_seq;
 	old_clock = tk->tkr_mono.clock;
 	tk->tkr_mono.clock = clock;
-	tk->tkr_mono.read = clock->read;
 	tk->tkr_mono.mask = clock->mask;
-	tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+	tk->tkr_mono.cycle_last = clock->read(clock);
 
 	tk->tkr_raw.clock = clock;
-	tk->tkr_raw.read = clock->read;
 	tk->tkr_raw.mask = clock->mask;
 	tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
 
@@ -394,17 +396,19 @@  static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf
 static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 {
 	struct tk_read_base *tkr;
+	struct clocksource *clock;
 	unsigned int seq;
 	u64 now;
 
 	do {
 		seq = raw_read_seqcount_latch(&tkf->seq);
 		tkr = tkf->base + (seq & 0x01);
+		clock = tkr->clock;
 		now = ktime_to_ns(tkr->base);
 
 		now += timekeeping_delta_to_ns(tkr,
 				clocksource_delta(
-					tkr->read(tkr->clock),
+					clock->read(clock),
 					tkr->cycle_last,
 					tkr->mask));
 	} while (read_seqcount_retry(&tkf->seq, seq));
@@ -475,15 +479,14 @@  static void halt_fast_timekeeper(struct timekeeper *tk)
 {
 	static struct tk_read_base tkr_dummy;
 	struct tk_read_base *tkr = &tk->tkr_mono;
+	struct clocksource *clock = tkr->clock;
 
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-	cycles_at_suspend = tkr->read(tkr->clock);
-	tkr_dummy.read = dummy_clock_read;
+	cycles_at_suspend = clock->read(clock);
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
 
 	tkr = &tk->tkr_raw;
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-	tkr_dummy.read = dummy_clock_read;
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 
@@ -653,7 +656,7 @@  static void timekeeping_forward_now(struct timekeeper *tk)
 	u64 cycle_now, delta;
 	u64 nsec;
 
-	cycle_now = tk->tkr_mono.read(clock);
+	cycle_now = clock->read(clock);
 	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 	tk->tkr_mono.cycle_last = cycle_now;
 	tk->tkr_raw.cycle_last  = cycle_now;
@@ -918,6 +921,7 @@  time64_t __ktime_get_real_seconds(void)
 void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
+	struct clocksource *clock;
 	unsigned long seq;
 	ktime_t base_raw;
 	ktime_t base_real;
@@ -929,8 +933,8 @@  void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-
-		now = tk->tkr_mono.read(tk->tkr_mono.clock);
+		clock = tk->tkr_mono.clock;
+		now = clock->read(clock);
 		systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
 		systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
 		base_real = ktime_add(tk->tkr_mono.base,
@@ -1076,6 +1080,7 @@  int get_device_system_crosststamp(int (*get_time_fn)
 {
 	struct system_counterval_t system_counterval;
 	struct timekeeper *tk = &tk_core.timekeeper;
+	struct clocksource *clock;
 	u64 cycles, now, interval_start;
 	unsigned int clock_was_set_seq = 0;
 	ktime_t base_real, base_raw;
@@ -1100,7 +1105,8 @@  int get_device_system_crosststamp(int (*get_time_fn)
 		 * system counter value is the same as the currently installed
 		 * timekeeper clocksource
 		 */
-		if (tk->tkr_mono.clock != system_counterval.cs)
+		clock = tk->tkr_mono.clock;
+		if (clock != system_counterval.cs)
 			return -ENODEV;
 		cycles = system_counterval.cycles;
 
@@ -1108,7 +1114,7 @@  int get_device_system_crosststamp(int (*get_time_fn)
 		 * Check whether the system counter value provided by the
 		 * device driver is on the current timekeeping interval.
 		 */
-		now = tk->tkr_mono.read(tk->tkr_mono.clock);
+		now = clock->read(clock);
 		interval_start = tk->tkr_mono.cycle_last;
 		if (!cycle_between(interval_start, cycles, now)) {
 			clock_was_set_seq = tk->clock_was_set_seq;
@@ -1603,7 +1609,7 @@  void timekeeping_inject_sleeptime64(struct timespec64 *delta)
 void timekeeping_resume(void)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	struct clocksource *clock = tk->tkr_mono.clock;
+	struct clocksource *clock;
 	unsigned long flags;
 	struct timespec64 ts_new, ts_delta;
 	u64 cycle_now;
@@ -1629,7 +1635,8 @@  void timekeeping_resume(void)
 	 * The less preferred source will only be tried if there is no better
 	 * usable source. The rtc part is handled separately in rtc core code.
 	 */
-	cycle_now = tk->tkr_mono.read(clock);
+	clock = tk->tkr_mono.clock;
+	cycle_now = clock->read(clock);
 	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
 		cycle_now > tk->tkr_mono.cycle_last) {
 		u64 nsec, cyc_delta;
@@ -2017,6 +2024,7 @@  void update_wall_time(void)
 {
 	struct timekeeper *real_tk = &tk_core.timekeeper;
 	struct timekeeper *tk = &shadow_timekeeper;
+	struct clocksource *clock;
 	u64 offset;
 	int shift = 0, maxshift;
 	unsigned int clock_set = 0;
@@ -2031,7 +2039,8 @@  void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	offset = real_tk->cycle_interval;
 #else
-	offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+	clock = tk->tkr_mono.clock;
+	offset = clocksource_delta(clock->read(clock),
 				   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif