Max Filippov | 9bd46da | 2015-06-14 01:41:25 +0300 | [diff] [blame^] | 1 | /* |
| 2 | * Xtensa Performance Monitor Module driver |
| 3 | * See Tensilica Debug User's Guide for PMU registers documentation. |
| 4 | * |
| 5 | * Copyright (C) 2015 Cadence Design Systems Inc. |
| 6 | * |
| 7 | * This program is free software; you can redistribute it and/or modify |
| 8 | * it under the terms of the GNU General Public License version 2 as |
| 9 | * published by the Free Software Foundation. |
| 10 | */ |
| 11 | |
| 12 | #include <linux/interrupt.h> |
| 13 | #include <linux/irqdomain.h> |
| 14 | #include <linux/module.h> |
| 15 | #include <linux/of.h> |
| 16 | #include <linux/perf_event.h> |
| 17 | #include <linux/platform_device.h> |
| 18 | |
| 19 | #include <asm/processor.h> |
| 20 | #include <asm/stacktrace.h> |
| 21 | |
| 22 | /* Global control/status for all perf counters */ |
| 23 | #define XTENSA_PMU_PMG 0x1000 |
| 24 | /* Perf counter values */ |
| 25 | #define XTENSA_PMU_PM(i) (0x1080 + (i) * 4) |
| 26 | /* Perf counter control registers */ |
| 27 | #define XTENSA_PMU_PMCTRL(i) (0x1100 + (i) * 4) |
| 28 | /* Perf counter status registers */ |
| 29 | #define XTENSA_PMU_PMSTAT(i) (0x1180 + (i) * 4) |
| 30 | |
| 31 | #define XTENSA_PMU_PMG_PMEN 0x1 |
| 32 | |
| 33 | #define XTENSA_PMU_COUNTER_MASK 0xffffffffULL |
| 34 | #define XTENSA_PMU_COUNTER_MAX 0x7fffffff |
| 35 | |
| 36 | #define XTENSA_PMU_PMCTRL_INTEN 0x00000001 |
| 37 | #define XTENSA_PMU_PMCTRL_KRNLCNT 0x00000008 |
| 38 | #define XTENSA_PMU_PMCTRL_TRACELEVEL 0x000000f0 |
| 39 | #define XTENSA_PMU_PMCTRL_SELECT_SHIFT 8 |
| 40 | #define XTENSA_PMU_PMCTRL_SELECT 0x00001f00 |
| 41 | #define XTENSA_PMU_PMCTRL_MASK_SHIFT 16 |
| 42 | #define XTENSA_PMU_PMCTRL_MASK 0xffff0000 |
| 43 | |
| 44 | #define XTENSA_PMU_MASK(select, mask) \ |
| 45 | (((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \ |
| 46 | ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \ |
| 47 | XTENSA_PMU_PMCTRL_TRACELEVEL | \ |
| 48 | XTENSA_PMU_PMCTRL_INTEN) |
| 49 | |
| 50 | #define XTENSA_PMU_PMSTAT_OVFL 0x00000001 |
| 51 | #define XTENSA_PMU_PMSTAT_INTASRT 0x00000010 |
| 52 | |
| 53 | struct xtensa_pmu_events { |
| 54 | /* Array of events currently on this core */ |
| 55 | struct perf_event *event[XCHAL_NUM_PERF_COUNTERS]; |
| 56 | /* Bitmap of used hardware counters */ |
| 57 | unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)]; |
| 58 | }; |
| 59 | static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events); |
| 60 | |
| 61 | static const u32 xtensa_hw_ctl[] = { |
| 62 | [PERF_COUNT_HW_CPU_CYCLES] = XTENSA_PMU_MASK(0, 0x1), |
| 63 | [PERF_COUNT_HW_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0xffff), |
| 64 | [PERF_COUNT_HW_CACHE_REFERENCES] = XTENSA_PMU_MASK(10, 0x1), |
| 65 | [PERF_COUNT_HW_CACHE_MISSES] = XTENSA_PMU_MASK(12, 0x1), |
| 66 | /* Taken and non-taken branches + taken loop ends */ |
| 67 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XTENSA_PMU_MASK(2, 0x490), |
| 68 | /* Instruction-related + other global stall cycles */ |
| 69 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XTENSA_PMU_MASK(4, 0x1ff), |
| 70 | /* Data-related global stall cycles */ |
| 71 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = XTENSA_PMU_MASK(3, 0x1ff), |
| 72 | }; |
| 73 | |
| 74 | #define C(_x) PERF_COUNT_HW_CACHE_##_x |
| 75 | |
| 76 | static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = { |
| 77 | [C(L1D)] = { |
| 78 | [C(OP_READ)] = { |
| 79 | [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(10, 0x1), |
| 80 | [C(RESULT_MISS)] = XTENSA_PMU_MASK(10, 0x2), |
| 81 | }, |
| 82 | [C(OP_WRITE)] = { |
| 83 | [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(11, 0x1), |
| 84 | [C(RESULT_MISS)] = XTENSA_PMU_MASK(11, 0x2), |
| 85 | }, |
| 86 | }, |
| 87 | [C(L1I)] = { |
| 88 | [C(OP_READ)] = { |
| 89 | [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(8, 0x1), |
| 90 | [C(RESULT_MISS)] = XTENSA_PMU_MASK(8, 0x2), |
| 91 | }, |
| 92 | }, |
| 93 | [C(DTLB)] = { |
| 94 | [C(OP_READ)] = { |
| 95 | [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(9, 0x1), |
| 96 | [C(RESULT_MISS)] = XTENSA_PMU_MASK(9, 0x8), |
| 97 | }, |
| 98 | }, |
| 99 | [C(ITLB)] = { |
| 100 | [C(OP_READ)] = { |
| 101 | [C(RESULT_ACCESS)] = XTENSA_PMU_MASK(7, 0x1), |
| 102 | [C(RESULT_MISS)] = XTENSA_PMU_MASK(7, 0x8), |
| 103 | }, |
| 104 | }, |
| 105 | }; |
| 106 | |
| 107 | static int xtensa_pmu_cache_event(u64 config) |
| 108 | { |
| 109 | unsigned int cache_type, cache_op, cache_result; |
| 110 | int ret; |
| 111 | |
| 112 | cache_type = (config >> 0) & 0xff; |
| 113 | cache_op = (config >> 8) & 0xff; |
| 114 | cache_result = (config >> 16) & 0xff; |
| 115 | |
| 116 | if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) || |
| 117 | cache_op >= C(OP_MAX) || |
| 118 | cache_result >= C(RESULT_MAX)) |
| 119 | return -EINVAL; |
| 120 | |
| 121 | ret = xtensa_cache_ctl[cache_type][cache_op][cache_result]; |
| 122 | |
| 123 | if (ret == 0) |
| 124 | return -EINVAL; |
| 125 | |
| 126 | return ret; |
| 127 | } |
| 128 | |
| 129 | static inline uint32_t xtensa_pmu_read_counter(int idx) |
| 130 | { |
| 131 | return get_er(XTENSA_PMU_PM(idx)); |
| 132 | } |
| 133 | |
| 134 | static inline void xtensa_pmu_write_counter(int idx, uint32_t v) |
| 135 | { |
| 136 | set_er(v, XTENSA_PMU_PM(idx)); |
| 137 | } |
| 138 | |
| 139 | static void xtensa_perf_event_update(struct perf_event *event, |
| 140 | struct hw_perf_event *hwc, int idx) |
| 141 | { |
| 142 | uint64_t prev_raw_count, new_raw_count; |
| 143 | int64_t delta; |
| 144 | |
| 145 | do { |
| 146 | prev_raw_count = local64_read(&hwc->prev_count); |
| 147 | new_raw_count = xtensa_pmu_read_counter(event->hw.idx); |
| 148 | } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
| 149 | new_raw_count) != prev_raw_count); |
| 150 | |
| 151 | delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK; |
| 152 | |
| 153 | local64_add(delta, &event->count); |
| 154 | local64_sub(delta, &hwc->period_left); |
| 155 | } |
| 156 | |
| 157 | static bool xtensa_perf_event_set_period(struct perf_event *event, |
| 158 | struct hw_perf_event *hwc, int idx) |
| 159 | { |
| 160 | bool rc = false; |
| 161 | s64 left; |
| 162 | |
| 163 | if (!is_sampling_event(event)) { |
| 164 | left = XTENSA_PMU_COUNTER_MAX; |
| 165 | } else { |
| 166 | s64 period = hwc->sample_period; |
| 167 | |
| 168 | left = local64_read(&hwc->period_left); |
| 169 | if (left <= -period) { |
| 170 | left = period; |
| 171 | local64_set(&hwc->period_left, left); |
| 172 | hwc->last_period = period; |
| 173 | rc = true; |
| 174 | } else if (left <= 0) { |
| 175 | left += period; |
| 176 | local64_set(&hwc->period_left, left); |
| 177 | hwc->last_period = period; |
| 178 | rc = true; |
| 179 | } |
| 180 | if (left > XTENSA_PMU_COUNTER_MAX) |
| 181 | left = XTENSA_PMU_COUNTER_MAX; |
| 182 | } |
| 183 | |
| 184 | local64_set(&hwc->prev_count, -left); |
| 185 | xtensa_pmu_write_counter(idx, -left); |
| 186 | perf_event_update_userpage(event); |
| 187 | |
| 188 | return rc; |
| 189 | } |
| 190 | |
| 191 | static void xtensa_pmu_enable(struct pmu *pmu) |
| 192 | { |
| 193 | set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); |
| 194 | } |
| 195 | |
| 196 | static void xtensa_pmu_disable(struct pmu *pmu) |
| 197 | { |
| 198 | set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG); |
| 199 | } |
| 200 | |
| 201 | static int xtensa_pmu_event_init(struct perf_event *event) |
| 202 | { |
| 203 | int ret; |
| 204 | |
| 205 | switch (event->attr.type) { |
| 206 | case PERF_TYPE_HARDWARE: |
| 207 | if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) || |
| 208 | xtensa_hw_ctl[event->attr.config] == 0) |
| 209 | return -EINVAL; |
| 210 | event->hw.config = xtensa_hw_ctl[event->attr.config]; |
| 211 | return 0; |
| 212 | |
| 213 | case PERF_TYPE_HW_CACHE: |
| 214 | ret = xtensa_pmu_cache_event(event->attr.config); |
| 215 | if (ret < 0) |
| 216 | return ret; |
| 217 | event->hw.config = ret; |
| 218 | return 0; |
| 219 | |
| 220 | case PERF_TYPE_RAW: |
| 221 | /* Not 'previous counter' select */ |
| 222 | if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) == |
| 223 | (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT)) |
| 224 | return -EINVAL; |
| 225 | event->hw.config = (event->attr.config & |
| 226 | (XTENSA_PMU_PMCTRL_KRNLCNT | |
| 227 | XTENSA_PMU_PMCTRL_TRACELEVEL | |
| 228 | XTENSA_PMU_PMCTRL_SELECT | |
| 229 | XTENSA_PMU_PMCTRL_MASK)) | |
| 230 | XTENSA_PMU_PMCTRL_INTEN; |
| 231 | return 0; |
| 232 | |
| 233 | default: |
| 234 | return -ENOENT; |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | /* |
| 239 | * Starts/Stops a counter present on the PMU. The PMI handler |
| 240 | * should stop the counter when perf_event_overflow() returns |
| 241 | * !0. ->start() will be used to continue. |
| 242 | */ |
| 243 | static void xtensa_pmu_start(struct perf_event *event, int flags) |
| 244 | { |
| 245 | struct hw_perf_event *hwc = &event->hw; |
| 246 | int idx = hwc->idx; |
| 247 | |
| 248 | if (WARN_ON_ONCE(idx == -1)) |
| 249 | return; |
| 250 | |
| 251 | if (flags & PERF_EF_RELOAD) { |
| 252 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); |
| 253 | xtensa_perf_event_set_period(event, hwc, idx); |
| 254 | } |
| 255 | |
| 256 | hwc->state = 0; |
| 257 | |
| 258 | set_er(hwc->config, XTENSA_PMU_PMCTRL(idx)); |
| 259 | } |
| 260 | |
| 261 | static void xtensa_pmu_stop(struct perf_event *event, int flags) |
| 262 | { |
| 263 | struct hw_perf_event *hwc = &event->hw; |
| 264 | int idx = hwc->idx; |
| 265 | |
| 266 | if (!(hwc->state & PERF_HES_STOPPED)) { |
| 267 | set_er(0, XTENSA_PMU_PMCTRL(idx)); |
| 268 | set_er(get_er(XTENSA_PMU_PMSTAT(idx)), |
| 269 | XTENSA_PMU_PMSTAT(idx)); |
| 270 | hwc->state |= PERF_HES_STOPPED; |
| 271 | } |
| 272 | |
| 273 | if ((flags & PERF_EF_UPDATE) && |
| 274 | !(event->hw.state & PERF_HES_UPTODATE)) { |
| 275 | xtensa_perf_event_update(event, &event->hw, idx); |
| 276 | event->hw.state |= PERF_HES_UPTODATE; |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | /* |
| 281 | * Adds/Removes a counter to/from the PMU, can be done inside |
| 282 | * a transaction, see the ->*_txn() methods. |
| 283 | */ |
| 284 | static int xtensa_pmu_add(struct perf_event *event, int flags) |
| 285 | { |
| 286 | struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); |
| 287 | struct hw_perf_event *hwc = &event->hw; |
| 288 | int idx = hwc->idx; |
| 289 | |
| 290 | if (__test_and_set_bit(idx, ev->used_mask)) { |
| 291 | idx = find_first_zero_bit(ev->used_mask, |
| 292 | XCHAL_NUM_PERF_COUNTERS); |
| 293 | if (idx == XCHAL_NUM_PERF_COUNTERS) |
| 294 | return -EAGAIN; |
| 295 | |
| 296 | __set_bit(idx, ev->used_mask); |
| 297 | hwc->idx = idx; |
| 298 | } |
| 299 | ev->event[idx] = event; |
| 300 | |
| 301 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; |
| 302 | |
| 303 | if (flags & PERF_EF_START) |
| 304 | xtensa_pmu_start(event, PERF_EF_RELOAD); |
| 305 | |
| 306 | perf_event_update_userpage(event); |
| 307 | return 0; |
| 308 | } |
| 309 | |
| 310 | static void xtensa_pmu_del(struct perf_event *event, int flags) |
| 311 | { |
| 312 | struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); |
| 313 | |
| 314 | xtensa_pmu_stop(event, PERF_EF_UPDATE); |
| 315 | __clear_bit(event->hw.idx, ev->used_mask); |
| 316 | perf_event_update_userpage(event); |
| 317 | } |
| 318 | |
| 319 | static void xtensa_pmu_read(struct perf_event *event) |
| 320 | { |
| 321 | xtensa_perf_event_update(event, &event->hw, event->hw.idx); |
| 322 | } |
| 323 | |
| 324 | static int callchain_trace(struct stackframe *frame, void *data) |
| 325 | { |
| 326 | struct perf_callchain_entry *entry = data; |
| 327 | |
| 328 | perf_callchain_store(entry, frame->pc); |
| 329 | return 0; |
| 330 | } |
| 331 | |
| 332 | void perf_callchain_kernel(struct perf_callchain_entry *entry, |
| 333 | struct pt_regs *regs) |
| 334 | { |
| 335 | xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH, |
| 336 | callchain_trace, NULL, entry); |
| 337 | } |
| 338 | |
| 339 | void perf_callchain_user(struct perf_callchain_entry *entry, |
| 340 | struct pt_regs *regs) |
| 341 | { |
| 342 | xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH, |
| 343 | callchain_trace, entry); |
| 344 | } |
| 345 | |
| 346 | void perf_event_print_debug(void) |
| 347 | { |
| 348 | unsigned long flags; |
| 349 | unsigned i; |
| 350 | |
| 351 | local_irq_save(flags); |
| 352 | pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(), |
| 353 | get_er(XTENSA_PMU_PMG)); |
| 354 | for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) |
| 355 | pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n", |
| 356 | i, get_er(XTENSA_PMU_PM(i)), |
| 357 | i, get_er(XTENSA_PMU_PMCTRL(i)), |
| 358 | i, get_er(XTENSA_PMU_PMSTAT(i))); |
| 359 | local_irq_restore(flags); |
| 360 | } |
| 361 | |
| 362 | static irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id) |
| 363 | { |
| 364 | irqreturn_t rc = IRQ_NONE; |
| 365 | struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events); |
| 366 | unsigned i; |
| 367 | |
| 368 | for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS); |
| 369 | i < XCHAL_NUM_PERF_COUNTERS; |
| 370 | i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) { |
| 371 | uint32_t v = get_er(XTENSA_PMU_PMSTAT(i)); |
| 372 | struct perf_event *event = ev->event[i]; |
| 373 | struct hw_perf_event *hwc = &event->hw; |
| 374 | u64 last_period; |
| 375 | |
| 376 | if (!(v & XTENSA_PMU_PMSTAT_OVFL)) |
| 377 | continue; |
| 378 | |
| 379 | set_er(v, XTENSA_PMU_PMSTAT(i)); |
| 380 | xtensa_perf_event_update(event, hwc, i); |
| 381 | last_period = hwc->last_period; |
| 382 | if (xtensa_perf_event_set_period(event, hwc, i)) { |
| 383 | struct perf_sample_data data; |
| 384 | struct pt_regs *regs = get_irq_regs(); |
| 385 | |
| 386 | perf_sample_data_init(&data, 0, last_period); |
| 387 | if (perf_event_overflow(event, &data, regs)) |
| 388 | xtensa_pmu_stop(event, 0); |
| 389 | } |
| 390 | |
| 391 | rc = IRQ_HANDLED; |
| 392 | } |
| 393 | return rc; |
| 394 | } |
| 395 | |
| 396 | static struct pmu xtensa_pmu = { |
| 397 | .pmu_enable = xtensa_pmu_enable, |
| 398 | .pmu_disable = xtensa_pmu_disable, |
| 399 | .event_init = xtensa_pmu_event_init, |
| 400 | .add = xtensa_pmu_add, |
| 401 | .del = xtensa_pmu_del, |
| 402 | .start = xtensa_pmu_start, |
| 403 | .stop = xtensa_pmu_stop, |
| 404 | .read = xtensa_pmu_read, |
| 405 | }; |
| 406 | |
| 407 | static void xtensa_pmu_setup(void) |
| 408 | { |
| 409 | unsigned i; |
| 410 | |
| 411 | set_er(0, XTENSA_PMU_PMG); |
| 412 | for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) { |
| 413 | set_er(0, XTENSA_PMU_PMCTRL(i)); |
| 414 | set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i)); |
| 415 | } |
| 416 | } |
| 417 | |
| 418 | static int xtensa_pmu_notifier(struct notifier_block *self, |
| 419 | unsigned long action, void *data) |
| 420 | { |
| 421 | switch (action & ~CPU_TASKS_FROZEN) { |
| 422 | case CPU_STARTING: |
| 423 | xtensa_pmu_setup(); |
| 424 | break; |
| 425 | |
| 426 | default: |
| 427 | break; |
| 428 | } |
| 429 | |
| 430 | return NOTIFY_OK; |
| 431 | } |
| 432 | |
| 433 | static int __init xtensa_pmu_init(void) |
| 434 | { |
| 435 | int ret; |
| 436 | int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT); |
| 437 | |
| 438 | perf_cpu_notifier(xtensa_pmu_notifier); |
| 439 | ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU, |
| 440 | "pmu", NULL); |
| 441 | if (ret < 0) |
| 442 | return ret; |
| 443 | |
| 444 | ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW); |
| 445 | if (ret) |
| 446 | free_irq(irq, NULL); |
| 447 | |
| 448 | return ret; |
| 449 | } |
| 450 | early_initcall(xtensa_pmu_init); |