[RFC] sched_clock: Track monotonic raw clock

Message ID	1405705419-4194-1-git-send-email-pawel.moll@arm.com
State	New
Headers	show Return-Path: <patchwork-forward+bncBDV43YEW6EKBBZ5ZUWPAKGQEDIWJDII@linaro.org> MIME-Version: 1.0 Received-SPF: pass (google.com: domain of patch+caf_=patchwork-forward=linaro.org@linaro.org designates 209.85.220.174 as permitted sender) client-ip=209.85.220.174; Received-SPF: none (google.com: linux-kernel-owner@vger.kernel.org does not designate permitted sender hosts) client-ip=209.132.180.67; From: Pawel Moll <pawel.moll@arm.com> To: Steven Rostedt <rostedt@goodmis.org>, Ingo Molnar <mingo@redhat.com>, Peter Zijlstra <peterz@infradead.org>, Oleg Nesterov <oleg@redhat.com>, Andrew Morton <akpm@linux-foundation.org>, Mel Gorman <mgorman@suse.de>, Andy Lutomirski <luto@amacapital.net>, John Stultz <john.stultz@linaro.org>, Stephen Boyd <sboyd@codeaurora.org>, Baruch Siach <baruch@tkos.co.il>, Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org, Pawel Moll <pawel.moll@arm.com> Subject: [RFC] sched_clock: Track monotonic raw clock Date: Fri, 18 Jul 2014 18:43:39 +0100 Message-Id: <1405705419-4194-1-git-send-email-pawel.moll@arm.com> Sender: linux-kernel-owner@vger.kernel.org Precedence: list Mailing-list: list patchwork-forward@linaro.org; contact patchwork-forward+owners@linaro.org

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 0a68d5a..4cca9bb 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -550,6 +550,34 @@ TRACE_EVENT(sched_wake_idle_without_ipi, TP_printk("cpu=%d", __entry->cpu) ); + +/* + * Tracepoint for sched clock adjustments + */ +TRACE_EVENT(sched_clock_adjust, + + TP_PROTO(u64 sched, u64 mono, s32 mult_adj), + + TP_ARGS(sched, mono, mult_adj), + + TP_STRUCT__entry( + __field( u64, sched ) + __field( u64, mono ) + __field( s32, mult_adj ) + ), + + TP_fast_assign( + __entry->sched = sched; + __entry->mono = mono; + __entry->mult_adj = mult_adj; + ), + + TP_printk("sched=%lluns, mono=%lluns, error=%lldns, mult_adj=%d", + (unsigned long long)__entry->sched, + (unsigned long long)__entry->mono, + (unsigned long long)(__entry->mono - __entry->sched), + __entry->mult_adj) +); #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 445106d..b9c9e04 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -17,18 +17,26 @@ #include <linux/sched_clock.h> #include <linux/seqlock.h> #include <linux/bitops.h> +#include <linux/debugfs.h> + +#include <trace/events/sched.h> struct clock_data { ktime_t wrap_kt; u64 epoch_ns; + u64 epoch_mono_ns; u64 epoch_cyc; seqcount_t seq; unsigned long rate; u32 mult; + s32 mult_adj; + s64 error_int; u32 shift; bool suspended; }; +#define REFRESH_PERIOD 100000000ULL /* 10^8 ns = 0.1 s */ + static struct hrtimer sched_clock_timer; static int irqtime = -1; @@ -38,6 +46,45 @@ static struct clock_data cd = { .mult = NSEC_PER_SEC / HZ, }; +#ifdef DEBUG +#define ERROR_LOG_LEN (NSEC_PER_SEC / REFRESH_PERIOD * 10) /* 10 s */ +static u64 sched_clock_error_log[ERROR_LOG_LEN]; +static int sched_clock_error_log_next; +static int sched_clock_error_log_len; + +static u64 sched_clock_error_max; +static u64 sched_clock_error_min = ~0; + +static int sched_clock_error_log_show(struct seq_file *m, void *p) +{ + u64 avg = 0; + int i; + + for (i = 0; i < sched_clock_error_log_len; i++) + avg += sched_clock_error_log[i]; + do_div(avg, sched_clock_error_log_len); + + seq_printf(m, "min error: %lluns\n", sched_clock_error_min); + seq_printf(m, "max error: %lluns\n", sched_clock_error_max); + seq_printf(m, "%d samples moving average error: %lluns\n", + sched_clock_error_log_len, avg); + + return 0; +} + +static int sched_clock_error_log_open(struct inode *inode, struct file *file) +{ + return single_open(file, sched_clock_error_log_show, inode->i_private); +} + +static struct file_operations sched_clock_error_log_fops = { + .open = sched_clock_error_log_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + static u64 __read_mostly sched_clock_mask; static u64 notrace jiffy_sched_clock_read(void) @@ -74,7 +121,7 @@ unsigned long long notrace sched_clock(void) cyc = read_sched_clock(); cyc = (cyc - epoch_cyc) & sched_clock_mask; - return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); + return epoch_ns + cyc_to_ns(cyc, cd.mult + cd.mult_adj, cd.shift); } /* @@ -83,18 +130,72 @@ unsigned long long notrace sched_clock(void) static void notrace update_sched_clock(void) { unsigned long flags; - u64 cyc; + u64 cyc, delta_cyc; u64 ns; + u64 mono_ns = 0; + s64 error_ns = 0, error = 0, error_int = 0; + s32 mult_adj = 0; + struct timespec mono; cyc = read_sched_clock(); - ns = cd.epoch_ns + - cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, - cd.mult, cd.shift); + delta_cyc = (cyc - cd.epoch_cyc) & sched_clock_mask; + ns = cd.epoch_ns + cyc_to_ns(delta_cyc, cd.mult + cd.mult_adj, + cd.shift); + + if (!cd.epoch_mono_ns) { + /* Initialize monotonic raw clock epoch */ + getrawmonotonic(&mono); + mono_ns = timespec_to_ns(&mono); + } + + if (cd.epoch_mono_ns) { + int sign; + + /* We have a simple PI controller here */ + getrawmonotonic(&mono); + mono_ns = timespec_to_ns(&mono); + error_ns = mono_ns - ns; + sign = (error_ns > 0) - (error_ns < 0); + error_ns = abs(error_ns); + + /* Convert error in ns into "mult units" */ + error = error_ns; + if (delta_cyc >> cd.shift) + do_div(error, delta_cyc >> cd.shift); + else + error = 0; + + /* Integral term of the controller */ + error_int = error * (mono_ns - cd.epoch_mono_ns); + do_div(error_int, NSEC_PER_SEC); + error_int = sign * error_int + cd.error_int; + + /* Tune the cyc_to_ns formula */ + mult_adj = sign * (error >> 2) + (cd.error_int >> 2); + +#ifdef DEBUG + sched_clock_error_log[sched_clock_error_log_next++] = error_ns; + if (sched_clock_error_log_next == ERROR_LOG_LEN) + sched_clock_error_log_next = 0; + else if (sched_clock_error_log_len < ERROR_LOG_LEN) + sched_clock_error_log_len++; + + if (error_ns < sched_clock_error_min) + sched_clock_error_min = error_ns; + if (error_ns > sched_clock_error_max) + sched_clock_error_max = error_ns; +#endif + + trace_sched_clock_adjust(mono_ns, ns, mult_adj); + } raw_local_irq_save(flags); raw_write_seqcount_begin(&cd.seq); cd.epoch_ns = ns; + cd.epoch_mono_ns = mono_ns; cd.epoch_cyc = cyc; + cd.mult_adj = mult_adj; + cd.error_int = error_int; raw_write_seqcount_end(&cd.seq); raw_local_irq_restore(flags); } @@ -127,13 +228,15 @@ void __init sched_clock_register(u64 (*read)(void), int bits, /* calculate how many ns until we wrap */ wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask); - new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); + + /* update epoch before we wrap and at least once per refresh period */ + new_wrap_kt = ns_to_ktime(min(wrap - (wrap >> 3), REFRESH_PERIOD)); /* update epoch for new counter and update epoch_ns from old counter*/ new_epoch = read(); cyc = read_sched_clock(); ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, - cd.mult, cd.shift); + cd.mult + cd.mult_adj, cd.shift); raw_write_seqcount_begin(&cd.seq); read_sched_clock = read; @@ -141,6 +244,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits, cd.rate = rate; cd.wrap_kt = new_wrap_kt; cd.mult = new_mult; + cd.mult_adj = 0; cd.shift = new_shift; cd.epoch_cyc = new_epoch; cd.epoch_ns = ns; @@ -209,7 +313,29 @@ static struct syscore_ops sched_clock_ops = { static int __init sched_clock_syscore_init(void) { + int err = 0; + register_syscore_ops(&sched_clock_ops); - return 0; + + /* + * As long as not everyone is using this generic implementation, + * userspace must be able to tell what does the sched_clock values + * relate to (if anything). + */ + if (read_sched_clock != jiffy_sched_clock_read) { + static struct debugfs_blob_wrapper blob; + + blob.data = "CLOCK_MONOTONIC_RAW"; + blob.size = strlen(blob.data) + 1; + err = PTR_ERR_OR_ZERO(debugfs_create_blob("sched_clock_base", + S_IRUGO, NULL, &blob)); + } + +#ifdef DEBUG + err = PTR_ERR_OR_ZERO(debugfs_create_file("sched_clock_error", S_IRUGO, + NULL, NULL, &sched_clock_error_log_fops)); +#endif + + return err; } device_initcall(sched_clock_syscore_init);

[RFC] sched_clock: Track monotonic raw clock

Commit Message

Comments

Patch