[V8,3/3] irq: Compute the periodic interval for interrupts

Message ID	1490290924-12958-3-git-send-email-daniel.lezcano@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67; From: Daniel Lezcano <daniel.lezcano@linaro.org> To: tglx@linutronix.de Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, nicolas.pitre@linaro.org, rafael@kernel.org, vincent.guittot@linaro.org Subject: [PATCH V8 3/3] irq: Compute the periodic interval for interrupts Date: Thu, 23 Mar 2017 18:42:03 +0100 Message-Id: <1490290924-12958-3-git-send-email-daniel.lezcano@linaro.org> In-Reply-To: <1490290924-12958-1-git-send-email-daniel.lezcano@linaro.org> References: <1490290924-12958-1-git-send-email-daniel.lezcano@linaro.org> Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk
Series	None \| expand [V8,2/3] irq: Track the interrupt timings [V8,3/3] irq: Compute the periodic interval for interrupts

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c050333..62bb0c9 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -707,6 +707,7 @@ static inline void init_irq_proc(void) #ifdef CONFIG_IRQ_TIMINGS void irq_timings_enable(void); void irq_timings_disable(void); +u64 irq_timings_next_event(u64 now); #endif struct seq_file; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index abaadaa..677c1a2 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -247,13 +247,21 @@ struct irq_timings { DECLARE_PER_CPU(struct irq_timings, irq_timings); +extern void irq_timings_free(int irq); +extern int irq_timings_alloc(int irq); + static inline void remove_timings(struct irq_desc *desc) { desc->istate &= ~IRQS_TIMINGS; + + irq_timings_free(irq_desc_get_irq(desc)); } static inline void setup_timings(struct irq_desc *desc, struct irqaction *act) { + int irq = irq_desc_get_irq(desc); + int ret; + /* * We don't need the measurement because the idle code already * knows the next expiry event. @@ -261,6 +269,17 @@ static inline void setup_timings(struct irq_desc *desc, struct irqaction *act) if (act->flags & __IRQF_TIMER) return; + /* + * In case the timing allocation fails, we just want to warn, + * not fail, so letting the system boot anyway. + */ + ret = irq_timings_alloc(irq); + if (ret) { + pr_warn("Failed to allocate irq timing stats for irq%d (%d)", + irq, ret); + return; + } + desc->istate |= IRQS_TIMINGS; } diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index 21ceca8..a3879cb 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c @@ -9,9 +9,14 @@ * */ #include <linux/percpu.h> +#include <linux/slab.h> #include <linux/static_key.h> #include <linux/interrupt.h> +#include <linux/idr.h> #include <linux/irq.h> +#include <linux/math64.h> + +#include <trace/events/irq.h> #include "internals.h" @@ -19,6 +24,18 @@ DEFINE_PER_CPU(struct irq_timings, irq_timings); +struct irq_stat { + u64 ne; /* next event */ + u64 lts; /* last timestamp */ + u64 variance; /* variance */ + u32 avg; /* mean value */ + u32 count; /* number of samples */ + int anomalies; /* number of consecutives anomalies */ + int valid; /* behaviour of the interrupt */ +}; + +static DEFINE_IDR(irq_stats); + void irq_timings_enable(void) { static_branch_inc(&irq_timing_enabled); @@ -28,3 +45,330 @@ void irq_timings_disable(void) { static_branch_dec(&irq_timing_enabled); } + +/** + * irqs_update - update the irq timing statistics with a new timestamp + * + * @irqs: an irq_stat struct pointer + * @ts: the new timestamp + * + * ** This function must be called with the local irq disabled ** + * + * The statistics are computed online, in other words, the code is + * designed to compute the statistics on a stream of values rather + * than doing multiple passes on the values to compute the average, + * then the variance. The integer division introduces a loss of + * precision but with an acceptable error margin regarding the results + * we would have with the double floating precision: we are dealing + * with nanosec, so big numbers, consequently the mantisse is + * negligeable, especially when converting the time in usec + * afterwards. + * + * The computation happens at idle time. When the CPU is not idle, the + * interrupts' timestamps are stored in the circular buffer, when the + * CPU goes idle and this routine is called, all the buffer's values + * are injected in the statistical model continuying to extend the + * statistics from the previous busy-idle cycle. + * + * The observations showed a device will trigger a burst of periodic + * interrupts followed by one or two peaks of longer time, for + * instance when a SD card device flushes its cache, then the periodic + * intervals occur again. A one second inactivity period resets the + * stats, that gives us the certitude the statistical values won't + * exceed 1x10^9, thus the computation won't overflow. + * + * Basically, the purpose of the algorithm is to watch the periodic + * interrupts and eliminate the peaks. + * + * An interrupt is considered periodically stable if the interval of + * its occurences follow the normal distribution, thus the values + * comply with: + * + * avg - 3 x stddev < value < avg + 3 x stddev + * + * Which can be simplified to: + * + * -3 x stddev < value - avg < 3 x stddev + * + * abs(value - avg) < 3 x stddev + * + * In order to save a costly square root computation, we use the + * variance. For the record, stddev = sqrt(variance). The equation + * above becomes: + * + * abs(value - avg) < 3 x sqrt(variance) + * + * And finally we square it: + * + * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2 + * + * (value - avg) x (value - avg) < 9 x variance + * + * Statistically speaking, any values out of this interval is + * considered as an anomaly and is discarded. However, a normal + * distribution appears when the number of samples is 30 (it is the + * rule of thumb in statistics, cf. "30 samples" on Internet). When + * there are three consecutive anomalies, the statistics are resetted. + * + */ +static void irqs_update(struct irq_stat *irqs, u64 ts) +{ + u64 old_ts = irqs->lts; + u64 variance = 0; + u64 interval; + s64 diff; + + /* + * The timestamps are absolute time values, we need to compute + * the timing interval between two interrupts. + */ + irqs->lts = ts; + + /* + * The interval type is u64 in order to deal with the same + * type in our computation, that prevent mindfuck issues with + * overflow, sign and do_div. + */ + interval = ts - old_ts; + + /* + * The interrupt triggered more than one second apart, that + * ends the sequence as predictible for our purpose. In this + * case, assume we have the beginning of a sequence and the + * timestamp is the first value. As it is impossible to + * predict anything at this point, return. + * + * Note the first timestamp of the sequence will always fall + * in this test because the old_ts is zero. That is what we + * want as we need another timestamp to compute an interval. + */ + if (interval >= NSEC_PER_SEC) { + memset(irqs, 0, sizeof(*irqs)); + irqs->lts = ts; + return; + } + + /* + * Pre-compute the delta with the average as the result is + * used several times in this function. + */ + diff = interval - irqs->avg; + + /* + * Increment the number of samples. + */ + irqs->count++; + + /* + * Online variance divided by the number of elements if there + * is more than one sample. + */ + if (likely(irqs->count > 1)) + variance = div_u64(irqs->variance, irqs->count - 1); + + /* + * The rule of thumb in statistics for the normal distribution + * is having at least 30 samples in order to have the model to + * apply. Values outside the interval are considered as an + * anomaly. + */ + if ((irqs->count >= 30) && ((diff * diff) > (9 * variance))) { + /* + * After three consecutive anomalies, we reset the + * stats as it is no longer stable enough. + */ + if (irqs->anomalies++ >= 3) { + memset(irqs, 0, sizeof(*irqs)); + irqs->lts = ts; + return; + } + } else { + /* + * The anomalies must be consecutives, so at this + * point, we reset the anomalies counter. + */ + irqs->anomalies = 0; + } + + /* + * The interrupt is considered stable enough to try to predict + * the next event on it. + */ + irqs->valid = 1; + + /* + * Online average algorithm: + * + * new_average = average + ((value - average) / count) + * + * The variance computation depends on the new average + * to be computed here first. + * + */ + irqs->avg = irqs->avg + div_s64(diff, irqs->count); + + /* + * Online variance algorithm: + * + * new_variance = variance + (value - average) x (value - new_average) + * + * Warning: irqs->avg is updated with the line above, hence + * 'interval - irqs->avg' is no longer equal to 'diff' + */ + irqs->variance = irqs->variance + (diff * (interval - irqs->avg)); + + /* + * Update the next event + */ + irqs->ne = ts + irqs->avg; +} + +/** + * irq_timings_next_event - Return when the next event is supposed to arrive + * + * *** This function must be called with the local irq disabled *** + * + * During the last busy cycle, the number of interrupts is incremented + * and stored in the irq_timings structure. This information is + * necessary to: + * + * - know if the index in the table wrapped up: + * + * If more than the array size interrupts happened during the + * last busy/idle cycle, the index wrapped up and we have to + * begin with the next element in the array which is the last one + * in the sequence, otherwise it is a the index 0. + * + * - have an indication of the interrupts activity on this CPU + * (eg. irq/sec) + * + * The values are 'consumed' after inserting in the statistical model, + * thus the count is reinitialized. + * + * The array of values **must** be browsed in the time direction, the + * timestamp must increase between an element and the next one. + * + * Returns a nanosec time based estimation of the earliest interrupt, + * U64_MAX otherwise. + */ +u64 irq_timings_next_event(u64 now) +{ + struct irq_timings *irqts = this_cpu_ptr(&irq_timings); + struct irq_timing *irqt; + struct irq_stat *irqs; + struct irq_stat __percpu *s; + u64 ne = U64_MAX; + int index, count, i, irq = 0; + + /* + * Number of elements in the circular buffer. If it happens it + * was flushed before, then the number of elements could be + * smaller than IRQ_TIMINGS_SIZE, so the count is used, + * otherwise the array size is used as we wrapped. The index + * begins from zero when we did not wrap. That could be done + * in a nicer way with the proper circular array structure + * type but with the cost of extra computation in the + * interrupt handler hot path. We choose efficiency. + */ + if (irqts->count >= IRQ_TIMINGS_SIZE) { + count = IRQ_TIMINGS_SIZE; + index = irqts->count & IRQ_TIMINGS_MASK; + } else { + count = irqts->count; + index = 0; + } + + /* + * Inject measured irq/timestamp to the statistical model. + */ + for (i = 0; i < count; i++) { + + irqt = &irqts->values[(index + i) & IRQ_TIMINGS_MASK]; + + s = idr_find(&irq_stats, irqt->irq); + + irqs = this_cpu_ptr(s); + + irqs_update(irqs, irqt->ts); + } + + /* + * Reset the counter, we consumed all the data from our + * circular buffer. + */ + irqts->count = 0; + + /* + * Look in the list of interrupts' statistics, the earliest + * next event. + */ + idr_for_each_entry(&irq_stats, s, i) { + + irqs = this_cpu_ptr(s); + + if (!irqs->valid) + continue; + + if (irqs->ne <= now) { + irq = i; + ne = now; + + /* + * This interrupt mustn't use in the future + * until new events occur and update the + * statistics. + */ + irqs->valid = 0; + break; + } + + if (irqs->ne < ne) { + irq = i; + ne = irqs->ne; + } + } + + return ne; +} + +void irq_timings_free(int irq) +{ + struct irq_stat __percpu *s; + + s = idr_find(&irq_stats, irq); + if (s) { + free_percpu(s); + idr_remove(&irq_stats, irq); + } +} + +int irq_timings_alloc(int irq) +{ + int id; + struct irq_stat __percpu *s; + + /* + * Some platforms can have the same private interrupt per cpu, + * so this function may be be called several times with the + * same interrupt number. Just bail out in case the per cpu + * stat structure is already allocated. + */ + s = idr_find(&irq_stats, irq); + if (s) + return 0; + + s = alloc_percpu(*s); + if (!s) + return -ENOMEM; + + idr_preload(GFP_KERNEL); + id = idr_alloc(&irq_stats, s, irq, irq + 1, GFP_NOWAIT); + idr_preload_end(); + + if (id < 0) { + free_percpu(s); + return id; + } + + return 0; +}

[V8,3/3] irq: Compute the periodic interval for interrupts

Commit Message

Comments

Patch