| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License version 2 as |
| * published by the Free Software Foundation. |
| * |
| */ |
| #include <linux/kernel.h> |
| #include <linux/percpu.h> |
| #include <linux/slab.h> |
| #include <linux/static_key.h> |
| #include <linux/interrupt.h> |
| #include <linux/idr.h> |
| #include <linux/irq.h> |
| #include <linux/math64.h> |
| |
| #include <trace/events/irq.h> |
| |
| #include "internals.h" |
| |
| DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); |
| |
| DEFINE_PER_CPU(struct irq_timings, irq_timings); |
| |
| struct irqt_stat { |
| u64 next_evt; |
| u64 last_ts; |
| u64 variance; |
| u32 avg; |
| u32 nr_samples; |
| int anomalies; |
| int valid; |
| }; |
| |
| static DEFINE_IDR(irqt_stats); |
| |
| void irq_timings_enable(void) |
| { |
| static_branch_enable(&irq_timing_enabled); |
| } |
| |
| void irq_timings_disable(void) |
| { |
| static_branch_disable(&irq_timing_enabled); |
| } |
| |
| /** |
| * irqs_update - update the irq timing statistics with a new timestamp |
| * |
| * @irqs: an irqt_stat struct pointer |
| * @ts: the new timestamp |
| * |
| * The statistics are computed online, in other words, the code is |
| * designed to compute the statistics on a stream of values rather |
| * than doing multiple passes on the values to compute the average, |
| * then the variance. The integer division introduces a loss of |
| * precision but with an acceptable error margin regarding the results |
| * we would have with the double floating precision: we are dealing |
| * with nanosec, so big numbers, consequently the mantisse is |
| * negligeable, especially when converting the time in usec |
| * afterwards. |
| * |
| * The computation happens at idle time. When the CPU is not idle, the |
| * interrupts' timestamps are stored in the circular buffer, when the |
| * CPU goes idle and this routine is called, all the buffer's values |
| * are injected in the statistical model continuying to extend the |
| * statistics from the previous busy-idle cycle. |
| * |
| * The observations showed a device will trigger a burst of periodic |
| * interrupts followed by one or two peaks of longer time, for |
| * instance when a SD card device flushes its cache, then the periodic |
| * intervals occur again. A one second inactivity period resets the |
| * stats, that gives us the certitude the statistical values won't |
| * exceed 1x10^9, thus the computation won't overflow. |
| * |
| * Basically, the purpose of the algorithm is to watch the periodic |
| * interrupts and eliminate the peaks. |
| * |
| * An interrupt is considered periodically stable if the interval of |
| * its occurences follow the normal distribution, thus the values |
| * comply with: |
| * |
| * avg - 3 x stddev < value < avg + 3 x stddev |
| * |
| * Which can be simplified to: |
| * |
| * -3 x stddev < value - avg < 3 x stddev |
| * |
| * abs(value - avg) < 3 x stddev |
| * |
| * In order to save a costly square root computation, we use the |
| * variance. For the record, stddev = sqrt(variance). The equation |
| * above becomes: |
| * |
| * abs(value - avg) < 3 x sqrt(variance) |
| * |
| * And finally we square it: |
| * |
| * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2 |
| * |
| * (value - avg) x (value - avg) < 9 x variance |
| * |
| * Statistically speaking, any values out of this interval is |
| * considered as an anomaly and is discarded. However, a normal |
| * distribution appears when the number of samples is 30 (it is the |
| * rule of thumb in statistics, cf. "30 samples" on Internet). When |
| * there are three consecutive anomalies, the statistics are resetted. |
| * |
| */ |
| static void irqs_update(struct irqt_stat *irqs, u64 ts) |
| { |
| u64 old_ts = irqs->last_ts; |
| u64 variance = 0; |
| u64 interval; |
| s64 diff; |
| |
| /* |
| * The timestamps are absolute time values, we need to compute |
| * the timing interval between two interrupts. |
| */ |
| irqs->last_ts = ts; |
| |
| /* |
| * The interval type is u64 in order to deal with the same |
| * type in our computation, that prevent mindfuck issues with |
| * overflow, sign and division. |
| */ |
| interval = ts - old_ts; |
| |
| /* |
| * The interrupt triggered more than one second apart, that |
| * ends the sequence as predictible for our purpose. In this |
| * case, assume we have the beginning of a sequence and the |
| * timestamp is the first value. As it is impossible to |
| * predict anything at this point, return. |
| * |
| * Note the first timestamp of the sequence will always fall |
| * in this test because the old_ts is zero. That is what we |
| * want as we need another timestamp to compute an interval. |
| */ |
| if (interval >= NSEC_PER_SEC) { |
| memset(irqs, 0, sizeof(*irqs)); |
| irqs->last_ts = ts; |
| return; |
| } |
| |
| /* |
| * Pre-compute the delta with the average as the result is |
| * used several times in this function. |
| */ |
| diff = interval - irqs->avg; |
| |
| /* |
| * Increment the number of samples. |
| */ |
| irqs->nr_samples++; |
| |
| /* |
| * Online variance divided by the number of elements if there |
| * is more than one sample. Normally the formula is division |
| * by nr_samples - 1 but we assume the number of element will be |
| * more than 32 and dividing by 32 instead of 31 is enough |
| * precise. |
| */ |
| if (likely(irqs->nr_samples > 1)) |
| variance = irqs->variance >> IRQ_TIMINGS_SHIFT; |
| |
| /* |
| * The rule of thumb in statistics for the normal distribution |
| * is having at least 30 samples in order to have the model to |
| * apply. Values outside the interval are considered as an |
| * anomaly. |
| */ |
| if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) { |
| /* |
| * After three consecutive anomalies, we reset the |
| * stats as it is no longer stable enough. |
| */ |
| if (irqs->anomalies++ >= 3) { |
| memset(irqs, 0, sizeof(*irqs)); |
| irqs->last_ts = ts; |
| return; |
| } |
| } else { |
| /* |
| * The anomalies must be consecutives, so at this |
| * point, we reset the anomalies counter. |
| */ |
| irqs->anomalies = 0; |
| } |
| |
| /* |
| * The interrupt is considered stable enough to try to predict |
| * the next event on it. |
| */ |
| irqs->valid = 1; |
| |
| /* |
| * Online average algorithm: |
| * |
| * new_average = average + ((value - average) / count) |
| * |
| * The variance computation depends on the new average |
| * to be computed here first. |
| * |
| */ |
| irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT); |
| |
| /* |
| * Online variance algorithm: |
| * |
| * new_variance = variance + (value - average) x (value - new_average) |
| * |
| * Warning: irqs->avg is updated with the line above, hence |
| * 'interval - irqs->avg' is no longer equal to 'diff' |
| */ |
| irqs->variance = irqs->variance + (diff * (interval - irqs->avg)); |
| |
| /* |
| * Update the next event |
| */ |
| irqs->next_evt = ts + irqs->avg; |
| } |
| |
| /** |
| * irq_timings_next_event - Return when the next event is supposed to arrive |
| * |
| * During the last busy cycle, the number of interrupts is incremented |
| * and stored in the irq_timings structure. This information is |
| * necessary to: |
| * |
| * - know if the index in the table wrapped up: |
| * |
| * If more than the array size interrupts happened during the |
| * last busy/idle cycle, the index wrapped up and we have to |
| * begin with the next element in the array which is the last one |
| * in the sequence, otherwise it is a the index 0. |
| * |
| * - have an indication of the interrupts activity on this CPU |
| * (eg. irq/sec) |
| * |
| * The values are 'consumed' after inserting in the statistical model, |
| * thus the count is reinitialized. |
| * |
| * The array of values **must** be browsed in the time direction, the |
| * timestamp must increase between an element and the next one. |
| * |
| * Returns a nanosec time based estimation of the earliest interrupt, |
| * U64_MAX otherwise. |
| */ |
| u64 irq_timings_next_event(u64 now) |
| { |
| struct irq_timings *irqts = this_cpu_ptr(&irq_timings); |
| struct irqt_stat *irqs; |
| struct irqt_stat __percpu *s; |
| u64 ts, next_evt = U64_MAX; |
| int i, irq = 0; |
| |
| /* |
| * This function must be called with the local irq disabled in |
| * order to prevent the timings circular buffer to be updated |
| * while we are reading it. |
| */ |
| lockdep_assert_irqs_disabled(); |
| |
| /* |
| * Number of elements in the circular buffer: If it happens it |
| * was flushed before, then the number of elements could be |
| * smaller than IRQ_TIMINGS_SIZE, so the count is used, |
| * otherwise the array size is used as we wrapped. The index |
| * begins from zero when we did not wrap. That could be done |
| * in a nicer way with the proper circular array structure |
| * type but with the cost of extra computation in the |
| * interrupt handler hot path. We choose efficiency. |
| * |
| * Inject measured irq/timestamp to the statistical model |
| * while decrementing the counter because we consume the data |
| * from our circular buffer. |
| */ |
| for (i = irqts->count & IRQ_TIMINGS_MASK, |
| irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); |
| irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { |
| |
| irq = irq_timing_decode(irqts->values[i], &ts); |
| |
| s = idr_find(&irqt_stats, irq); |
| if (s) { |
| irqs = this_cpu_ptr(s); |
| irqs_update(irqs, ts); |
| } |
| } |
| |
| /* |
| * Look in the list of interrupts' statistics, the earliest |
| * next event. |
| */ |
| idr_for_each_entry(&irqt_stats, s, i) { |
| |
| irqs = this_cpu_ptr(s); |
| |
| if (!irqs->valid) |
| continue; |
| |
| if (irqs->next_evt <= now) { |
| irq = i; |
| next_evt = now; |
| |
| /* |
| * This interrupt mustn't use in the future |
| * until new events occur and update the |
| * statistics. |
| */ |
| irqs->valid = 0; |
| break; |
| } |
| |
| if (irqs->next_evt < next_evt) { |
| irq = i; |
| next_evt = irqs->next_evt; |
| } |
| } |
| |
| return next_evt; |
| } |
| |
| void irq_timings_free(int irq) |
| { |
| struct irqt_stat __percpu *s; |
| |
| s = idr_find(&irqt_stats, irq); |
| if (s) { |
| free_percpu(s); |
| idr_remove(&irqt_stats, irq); |
| } |
| } |
| |
| int irq_timings_alloc(int irq) |
| { |
| struct irqt_stat __percpu *s; |
| int id; |
| |
| /* |
| * Some platforms can have the same private interrupt per cpu, |
| * so this function may be be called several times with the |
| * same interrupt number. Just bail out in case the per cpu |
| * stat structure is already allocated. |
| */ |
| s = idr_find(&irqt_stats, irq); |
| if (s) |
| return 0; |
| |
| s = alloc_percpu(*s); |
| if (!s) |
| return -ENOMEM; |
| |
| idr_preload(GFP_KERNEL); |
| id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT); |
| idr_preload_end(); |
| |
| if (id < 0) { |
| free_percpu(s); |
| return id; |
| } |
| |
| return 0; |
| } |