Thomas Gleixner | 52a65ff | 2018-03-14 22:15:19 +0100 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
Thomas Gleixner | f3f59fb | 2018-03-20 14:17:04 +0100 | [diff] [blame] | 2 | // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> |
| 3 | |
Daniel Lezcano | e1c9214 | 2017-06-23 16:11:08 +0200 | [diff] [blame] | 4 | #include <linux/kernel.h> |
Daniel Lezcano | b2d3d61 | 2017-06-23 16:11:07 +0200 | [diff] [blame] | 5 | #include <linux/percpu.h> |
Daniel Lezcano | e1c9214 | 2017-06-23 16:11:08 +0200 | [diff] [blame] | 6 | #include <linux/slab.h> |
Daniel Lezcano | b2d3d61 | 2017-06-23 16:11:07 +0200 | [diff] [blame] | 7 | #include <linux/static_key.h> |
| 8 | #include <linux/interrupt.h> |
Daniel Lezcano | e1c9214 | 2017-06-23 16:11:08 +0200 | [diff] [blame] | 9 | #include <linux/idr.h> |
Daniel Lezcano | b2d3d61 | 2017-06-23 16:11:07 +0200 | [diff] [blame] | 10 | #include <linux/irq.h> |
Daniel Lezcano | e1c9214 | 2017-06-23 16:11:08 +0200 | [diff] [blame] | 11 | #include <linux/math64.h> |
| 12 | |
| 13 | #include <trace/events/irq.h> |
Daniel Lezcano | b2d3d61 | 2017-06-23 16:11:07 +0200 | [diff] [blame] | 14 | |
| 15 | #include "internals.h" |
| 16 | |
| 17 | DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); |
| 18 | |
| 19 | DEFINE_PER_CPU(struct irq_timings, irq_timings); |
| 20 | |
Daniel Lezcano | e1c9214 | 2017-06-23 16:11:08 +0200 | [diff] [blame] | 21 | struct irqt_stat { |
| 22 | u64 next_evt; |
| 23 | u64 last_ts; |
| 24 | u64 variance; |
| 25 | u32 avg; |
| 26 | u32 nr_samples; |
| 27 | int anomalies; |
| 28 | int valid; |
| 29 | }; |
| 30 | |
| 31 | static DEFINE_IDR(irqt_stats); |
| 32 | |
Daniel Lezcano | b2d3d61 | 2017-06-23 16:11:07 +0200 | [diff] [blame] | 33 | void irq_timings_enable(void) |
| 34 | { |
| 35 | static_branch_enable(&irq_timing_enabled); |
| 36 | } |
| 37 | |
| 38 | void irq_timings_disable(void) |
| 39 | { |
| 40 | static_branch_disable(&irq_timing_enabled); |
| 41 | } |
Daniel Lezcano | e1c9214 | 2017-06-23 16:11:08 +0200 | [diff] [blame] | 42 | |
| 43 | /** |
| 44 | * irqs_update - update the irq timing statistics with a new timestamp |
| 45 | * |
| 46 | * @irqs: an irqt_stat struct pointer |
| 47 | * @ts: the new timestamp |
| 48 | * |
| 49 | * The statistics are computed online, in other words, the code is |
| 50 | * designed to compute the statistics on a stream of values rather |
| 51 | * than doing multiple passes on the values to compute the average, |
| 52 | * then the variance. The integer division introduces a loss of |
| 53 | * precision but with an acceptable error margin regarding the results |
| 54 | * we would have with the double floating precision: we are dealing |
| 55 | * with nanosec, so big numbers, consequently the mantisse is |
| 56 | * negligeable, especially when converting the time in usec |
| 57 | * afterwards. |
| 58 | * |
| 59 | * The computation happens at idle time. When the CPU is not idle, the |
| 60 | * interrupts' timestamps are stored in the circular buffer, when the |
| 61 | * CPU goes idle and this routine is called, all the buffer's values |
| 62 | * are injected in the statistical model continuying to extend the |
| 63 | * statistics from the previous busy-idle cycle. |
| 64 | * |
| 65 | * The observations showed a device will trigger a burst of periodic |
| 66 | * interrupts followed by one or two peaks of longer time, for |
| 67 | * instance when a SD card device flushes its cache, then the periodic |
| 68 | * intervals occur again. A one second inactivity period resets the |
| 69 | * stats, that gives us the certitude the statistical values won't |
| 70 | * exceed 1x10^9, thus the computation won't overflow. |
| 71 | * |
| 72 | * Basically, the purpose of the algorithm is to watch the periodic |
| 73 | * interrupts and eliminate the peaks. |
| 74 | * |
| 75 | * An interrupt is considered periodically stable if the interval of |
| 76 | * its occurences follow the normal distribution, thus the values |
| 77 | * comply with: |
| 78 | * |
| 79 | * avg - 3 x stddev < value < avg + 3 x stddev |
| 80 | * |
| 81 | * Which can be simplified to: |
| 82 | * |
| 83 | * -3 x stddev < value - avg < 3 x stddev |
| 84 | * |
| 85 | * abs(value - avg) < 3 x stddev |
| 86 | * |
| 87 | * In order to save a costly square root computation, we use the |
| 88 | * variance. For the record, stddev = sqrt(variance). The equation |
| 89 | * above becomes: |
| 90 | * |
| 91 | * abs(value - avg) < 3 x sqrt(variance) |
| 92 | * |
| 93 | * And finally we square it: |
| 94 | * |
| 95 | * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2 |
| 96 | * |
| 97 | * (value - avg) x (value - avg) < 9 x variance |
| 98 | * |
| 99 | * Statistically speaking, any values out of this interval is |
| 100 | * considered as an anomaly and is discarded. However, a normal |
| 101 | * distribution appears when the number of samples is 30 (it is the |
| 102 | * rule of thumb in statistics, cf. "30 samples" on Internet). When |
| 103 | * there are three consecutive anomalies, the statistics are resetted. |
| 104 | * |
| 105 | */ |
| 106 | static void irqs_update(struct irqt_stat *irqs, u64 ts) |
| 107 | { |
| 108 | u64 old_ts = irqs->last_ts; |
| 109 | u64 variance = 0; |
| 110 | u64 interval; |
| 111 | s64 diff; |
| 112 | |
| 113 | /* |
| 114 | * The timestamps are absolute time values, we need to compute |
| 115 | * the timing interval between two interrupts. |
| 116 | */ |
| 117 | irqs->last_ts = ts; |
| 118 | |
| 119 | /* |
| 120 | * The interval type is u64 in order to deal with the same |
| 121 | * type in our computation, that prevent mindfuck issues with |
| 122 | * overflow, sign and division. |
| 123 | */ |
| 124 | interval = ts - old_ts; |
| 125 | |
| 126 | /* |
| 127 | * The interrupt triggered more than one second apart, that |
| 128 | * ends the sequence as predictible for our purpose. In this |
| 129 | * case, assume we have the beginning of a sequence and the |
| 130 | * timestamp is the first value. As it is impossible to |
| 131 | * predict anything at this point, return. |
| 132 | * |
| 133 | * Note the first timestamp of the sequence will always fall |
| 134 | * in this test because the old_ts is zero. That is what we |
| 135 | * want as we need another timestamp to compute an interval. |
| 136 | */ |
| 137 | if (interval >= NSEC_PER_SEC) { |
| 138 | memset(irqs, 0, sizeof(*irqs)); |
| 139 | irqs->last_ts = ts; |
| 140 | return; |
| 141 | } |
| 142 | |
| 143 | /* |
| 144 | * Pre-compute the delta with the average as the result is |
| 145 | * used several times in this function. |
| 146 | */ |
| 147 | diff = interval - irqs->avg; |
| 148 | |
| 149 | /* |
| 150 | * Increment the number of samples. |
| 151 | */ |
| 152 | irqs->nr_samples++; |
| 153 | |
| 154 | /* |
| 155 | * Online variance divided by the number of elements if there |
| 156 | * is more than one sample. Normally the formula is division |
| 157 | * by nr_samples - 1 but we assume the number of element will be |
| 158 | * more than 32 and dividing by 32 instead of 31 is enough |
| 159 | * precise. |
| 160 | */ |
| 161 | if (likely(irqs->nr_samples > 1)) |
| 162 | variance = irqs->variance >> IRQ_TIMINGS_SHIFT; |
| 163 | |
| 164 | /* |
| 165 | * The rule of thumb in statistics for the normal distribution |
| 166 | * is having at least 30 samples in order to have the model to |
| 167 | * apply. Values outside the interval are considered as an |
| 168 | * anomaly. |
| 169 | */ |
| 170 | if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) { |
| 171 | /* |
| 172 | * After three consecutive anomalies, we reset the |
| 173 | * stats as it is no longer stable enough. |
| 174 | */ |
| 175 | if (irqs->anomalies++ >= 3) { |
| 176 | memset(irqs, 0, sizeof(*irqs)); |
| 177 | irqs->last_ts = ts; |
| 178 | return; |
| 179 | } |
| 180 | } else { |
| 181 | /* |
| 182 | * The anomalies must be consecutives, so at this |
| 183 | * point, we reset the anomalies counter. |
| 184 | */ |
| 185 | irqs->anomalies = 0; |
| 186 | } |
| 187 | |
| 188 | /* |
| 189 | * The interrupt is considered stable enough to try to predict |
| 190 | * the next event on it. |
| 191 | */ |
| 192 | irqs->valid = 1; |
| 193 | |
| 194 | /* |
| 195 | * Online average algorithm: |
| 196 | * |
| 197 | * new_average = average + ((value - average) / count) |
| 198 | * |
| 199 | * The variance computation depends on the new average |
| 200 | * to be computed here first. |
| 201 | * |
| 202 | */ |
| 203 | irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT); |
| 204 | |
| 205 | /* |
| 206 | * Online variance algorithm: |
| 207 | * |
| 208 | * new_variance = variance + (value - average) x (value - new_average) |
| 209 | * |
| 210 | * Warning: irqs->avg is updated with the line above, hence |
| 211 | * 'interval - irqs->avg' is no longer equal to 'diff' |
| 212 | */ |
| 213 | irqs->variance = irqs->variance + (diff * (interval - irqs->avg)); |
| 214 | |
| 215 | /* |
| 216 | * Update the next event |
| 217 | */ |
| 218 | irqs->next_evt = ts + irqs->avg; |
| 219 | } |
| 220 | |
| 221 | /** |
| 222 | * irq_timings_next_event - Return when the next event is supposed to arrive |
| 223 | * |
| 224 | * During the last busy cycle, the number of interrupts is incremented |
| 225 | * and stored in the irq_timings structure. This information is |
| 226 | * necessary to: |
| 227 | * |
| 228 | * - know if the index in the table wrapped up: |
| 229 | * |
| 230 | * If more than the array size interrupts happened during the |
| 231 | * last busy/idle cycle, the index wrapped up and we have to |
| 232 | * begin with the next element in the array which is the last one |
| 233 | * in the sequence, otherwise it is a the index 0. |
| 234 | * |
| 235 | * - have an indication of the interrupts activity on this CPU |
| 236 | * (eg. irq/sec) |
| 237 | * |
| 238 | * The values are 'consumed' after inserting in the statistical model, |
| 239 | * thus the count is reinitialized. |
| 240 | * |
| 241 | * The array of values **must** be browsed in the time direction, the |
| 242 | * timestamp must increase between an element and the next one. |
| 243 | * |
| 244 | * Returns a nanosec time based estimation of the earliest interrupt, |
| 245 | * U64_MAX otherwise. |
| 246 | */ |
| 247 | u64 irq_timings_next_event(u64 now) |
| 248 | { |
| 249 | struct irq_timings *irqts = this_cpu_ptr(&irq_timings); |
| 250 | struct irqt_stat *irqs; |
| 251 | struct irqt_stat __percpu *s; |
| 252 | u64 ts, next_evt = U64_MAX; |
| 253 | int i, irq = 0; |
| 254 | |
| 255 | /* |
| 256 | * This function must be called with the local irq disabled in |
| 257 | * order to prevent the timings circular buffer to be updated |
| 258 | * while we are reading it. |
| 259 | */ |
Frederic Weisbecker | a934d4d | 2017-11-06 16:01:25 +0100 | [diff] [blame] | 260 | lockdep_assert_irqs_disabled(); |
Daniel Lezcano | e1c9214 | 2017-06-23 16:11:08 +0200 | [diff] [blame] | 261 | |
| 262 | /* |
| 263 | * Number of elements in the circular buffer: If it happens it |
| 264 | * was flushed before, then the number of elements could be |
| 265 | * smaller than IRQ_TIMINGS_SIZE, so the count is used, |
| 266 | * otherwise the array size is used as we wrapped. The index |
| 267 | * begins from zero when we did not wrap. That could be done |
| 268 | * in a nicer way with the proper circular array structure |
| 269 | * type but with the cost of extra computation in the |
| 270 | * interrupt handler hot path. We choose efficiency. |
| 271 | * |
| 272 | * Inject measured irq/timestamp to the statistical model |
| 273 | * while decrementing the counter because we consume the data |
| 274 | * from our circular buffer. |
| 275 | */ |
| 276 | for (i = irqts->count & IRQ_TIMINGS_MASK, |
| 277 | irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); |
| 278 | irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { |
| 279 | |
| 280 | irq = irq_timing_decode(irqts->values[i], &ts); |
| 281 | |
| 282 | s = idr_find(&irqt_stats, irq); |
| 283 | if (s) { |
| 284 | irqs = this_cpu_ptr(s); |
| 285 | irqs_update(irqs, ts); |
| 286 | } |
| 287 | } |
| 288 | |
| 289 | /* |
| 290 | * Look in the list of interrupts' statistics, the earliest |
| 291 | * next event. |
| 292 | */ |
| 293 | idr_for_each_entry(&irqt_stats, s, i) { |
| 294 | |
| 295 | irqs = this_cpu_ptr(s); |
| 296 | |
| 297 | if (!irqs->valid) |
| 298 | continue; |
| 299 | |
| 300 | if (irqs->next_evt <= now) { |
| 301 | irq = i; |
| 302 | next_evt = now; |
| 303 | |
| 304 | /* |
| 305 | * This interrupt mustn't use in the future |
| 306 | * until new events occur and update the |
| 307 | * statistics. |
| 308 | */ |
| 309 | irqs->valid = 0; |
| 310 | break; |
| 311 | } |
| 312 | |
| 313 | if (irqs->next_evt < next_evt) { |
| 314 | irq = i; |
| 315 | next_evt = irqs->next_evt; |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | return next_evt; |
| 320 | } |
| 321 | |
| 322 | void irq_timings_free(int irq) |
| 323 | { |
| 324 | struct irqt_stat __percpu *s; |
| 325 | |
| 326 | s = idr_find(&irqt_stats, irq); |
| 327 | if (s) { |
| 328 | free_percpu(s); |
| 329 | idr_remove(&irqt_stats, irq); |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | int irq_timings_alloc(int irq) |
| 334 | { |
| 335 | struct irqt_stat __percpu *s; |
| 336 | int id; |
| 337 | |
| 338 | /* |
| 339 | * Some platforms can have the same private interrupt per cpu, |
| 340 | * so this function may be be called several times with the |
| 341 | * same interrupt number. Just bail out in case the per cpu |
| 342 | * stat structure is already allocated. |
| 343 | */ |
| 344 | s = idr_find(&irqt_stats, irq); |
| 345 | if (s) |
| 346 | return 0; |
| 347 | |
| 348 | s = alloc_percpu(*s); |
| 349 | if (!s) |
| 350 | return -ENOMEM; |
| 351 | |
| 352 | idr_preload(GFP_KERNEL); |
| 353 | id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT); |
| 354 | idr_preload_end(); |
| 355 | |
| 356 | if (id < 0) { |
| 357 | free_percpu(s); |
| 358 | return id; |
| 359 | } |
| 360 | |
| 361 | return 0; |
| 362 | } |