| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Copyright (C) 2008-2017 Andes Technology Corporation |
| * |
| * Reference ARMv7: Jean Pihet <jpihet@mvista.com> |
| * 2010 (c) MontaVista Software, LLC. |
| */ |
| |
| #include <linux/perf_event.h> |
| #include <linux/bitmap.h> |
| #include <linux/export.h> |
| #include <linux/kernel.h> |
| #include <linux/of.h> |
| #include <linux/platform_device.h> |
| #include <linux/slab.h> |
| #include <linux/spinlock.h> |
| #include <linux/pm_runtime.h> |
| #include <linux/ftrace.h> |
| #include <linux/uaccess.h> |
| #include <linux/sched/clock.h> |
| #include <linux/percpu-defs.h> |
| |
| #include <asm/pmu.h> |
| #include <asm/irq_regs.h> |
| #include <asm/nds32.h> |
| #include <asm/stacktrace.h> |
| #include <asm/perf_event.h> |
| #include <nds32_intrinsic.h> |
| |
| /* Set at runtime when we know what CPU type we are. */ |
| static struct nds32_pmu *cpu_pmu; |
| |
| static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); |
| static void nds32_pmu_start(struct nds32_pmu *cpu_pmu); |
| static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu); |
| static struct platform_device_id cpu_pmu_plat_device_ids[] = { |
| {.name = "nds32-pfm"}, |
| {}, |
| }; |
| |
| static int nds32_pmu_map_cache_event(const unsigned int (*cache_map) |
| [PERF_COUNT_HW_CACHE_MAX] |
| [PERF_COUNT_HW_CACHE_OP_MAX] |
| [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config) |
| { |
| unsigned int cache_type, cache_op, cache_result, ret; |
| |
| cache_type = (config >> 0) & 0xff; |
| if (cache_type >= PERF_COUNT_HW_CACHE_MAX) |
| return -EINVAL; |
| |
| cache_op = (config >> 8) & 0xff; |
| if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) |
| return -EINVAL; |
| |
| cache_result = (config >> 16) & 0xff; |
| if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) |
| return -EINVAL; |
| |
| ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; |
| |
| if (ret == CACHE_OP_UNSUPPORTED) |
| return -ENOENT; |
| |
| return ret; |
| } |
| |
| static int |
| nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX], |
| u64 config) |
| { |
| int mapping; |
| |
| if (config >= PERF_COUNT_HW_MAX) |
| return -ENOENT; |
| |
| mapping = (*event_map)[config]; |
| return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; |
| } |
| |
| static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config) |
| { |
| int ev_type = (int)(config & raw_event_mask); |
| int idx = config >> 8; |
| |
| switch (idx) { |
| case 0: |
| ev_type = PFM_OFFSET_MAGIC_0 + ev_type; |
| if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE) |
| return -ENOENT; |
| break; |
| case 1: |
| ev_type = PFM_OFFSET_MAGIC_1 + ev_type; |
| if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE) |
| return -ENOENT; |
| break; |
| case 2: |
| ev_type = PFM_OFFSET_MAGIC_2 + ev_type; |
| if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE) |
| return -ENOENT; |
| break; |
| default: |
| return -ENOENT; |
| } |
| |
| return ev_type; |
| } |
| |
| int |
| nds32_pmu_map_event(struct perf_event *event, |
| const unsigned int (*event_map)[PERF_COUNT_HW_MAX], |
| const unsigned int (*cache_map) |
| [PERF_COUNT_HW_CACHE_MAX] |
| [PERF_COUNT_HW_CACHE_OP_MAX] |
| [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask) |
| { |
| u64 config = event->attr.config; |
| |
| switch (event->attr.type) { |
| case PERF_TYPE_HARDWARE: |
| return nds32_pmu_map_hw_event(event_map, config); |
| case PERF_TYPE_HW_CACHE: |
| return nds32_pmu_map_cache_event(cache_map, config); |
| case PERF_TYPE_RAW: |
| return nds32_pmu_map_raw_event(raw_event_mask, config); |
| } |
| |
| return -ENOENT; |
| } |
| |
| static int nds32_spav3_map_event(struct perf_event *event) |
| { |
| return nds32_pmu_map_event(event, &nds32_pfm_perf_map, |
| &nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK); |
| } |
| |
| static inline u32 nds32_pfm_getreset_flags(void) |
| { |
| /* Read overflow status */ |
| u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL); |
| u32 old_val = val; |
| |
| /* Write overflow bit to clear status, and others keep it 0 */ |
| u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]; |
| |
| __nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL); |
| |
| return old_val; |
| } |
| |
| static inline int nds32_pfm_has_overflowed(u32 pfm) |
| { |
| u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]; |
| |
| return pfm & ov_flag; |
| } |
| |
| static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx) |
| { |
| u32 mask = 0; |
| |
| switch (idx) { |
| case 0: |
| mask = PFM_CTL_OVF[0]; |
| break; |
| case 1: |
| mask = PFM_CTL_OVF[1]; |
| break; |
| case 2: |
| mask = PFM_CTL_OVF[2]; |
| break; |
| default: |
| pr_err("%s index wrong\n", __func__); |
| break; |
| } |
| return pfm & mask; |
| } |
| |
| /* |
| * Set the next IRQ period, based on the hwc->period_left value. |
| * To be called with the event disabled in hw: |
| */ |
| int nds32_pmu_event_set_period(struct perf_event *event) |
| { |
| struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); |
| struct hw_perf_event *hwc = &event->hw; |
| s64 left = local64_read(&hwc->period_left); |
| s64 period = hwc->sample_period; |
| int ret = 0; |
| |
| /* The period may have been changed by PERF_EVENT_IOC_PERIOD */ |
| if (unlikely(period != hwc->last_period)) |
| left = period - (hwc->last_period - left); |
| |
| if (unlikely(left <= -period)) { |
| left = period; |
| local64_set(&hwc->period_left, left); |
| hwc->last_period = period; |
| ret = 1; |
| } |
| |
| if (unlikely(left <= 0)) { |
| left += period; |
| local64_set(&hwc->period_left, left); |
| hwc->last_period = period; |
| ret = 1; |
| } |
| |
| if (left > (s64)nds32_pmu->max_period) |
| left = nds32_pmu->max_period; |
| |
| /* |
| * The hw event starts counting from this event offset, |
| * mark it to be able to extract future "deltas": |
| */ |
| local64_set(&hwc->prev_count, (u64)(-left)); |
| |
| nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period); |
| |
| perf_event_update_userpage(event); |
| |
| return ret; |
| } |
| |
| static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev) |
| { |
| u32 pfm; |
| struct perf_sample_data data; |
| struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev; |
| struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); |
| struct pt_regs *regs; |
| int idx; |
| /* |
| * Get and reset the IRQ flags |
| */ |
| pfm = nds32_pfm_getreset_flags(); |
| |
| /* |
| * Did an overflow occur? |
| */ |
| if (!nds32_pfm_has_overflowed(pfm)) |
| return IRQ_NONE; |
| |
| /* |
| * Handle the counter(s) overflow(s) |
| */ |
| regs = get_irq_regs(); |
| |
| nds32_pmu_stop(cpu_pmu); |
| for (idx = 0; idx < cpu_pmu->num_events; ++idx) { |
| struct perf_event *event = cpuc->events[idx]; |
| struct hw_perf_event *hwc; |
| |
| /* Ignore if we don't have an event. */ |
| if (!event) |
| continue; |
| |
| /* |
| * We have a single interrupt for all counters. Check that |
| * each counter has overflowed before we process it. |
| */ |
| if (!nds32_pfm_counter_has_overflowed(pfm, idx)) |
| continue; |
| |
| hwc = &event->hw; |
| nds32_pmu_event_update(event); |
| perf_sample_data_init(&data, 0, hwc->last_period); |
| if (!nds32_pmu_event_set_period(event)) |
| continue; |
| |
| if (perf_event_overflow(event, &data, regs)) |
| cpu_pmu->disable(event); |
| } |
| nds32_pmu_start(cpu_pmu); |
| /* |
| * Handle the pending perf events. |
| * |
| * Note: this call *must* be run with interrupts disabled. For |
| * platforms that can have the PMU interrupts raised as an NMI, this |
| * will not work. |
| */ |
| irq_work_run(); |
| |
| return IRQ_HANDLED; |
| } |
| |
| static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx) |
| { |
| return ((idx >= 0) && (idx < cpu_pmu->num_events)); |
| } |
| |
| static inline int nds32_pfm_disable_counter(int idx) |
| { |
| unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); |
| u32 mask = 0; |
| |
| mask = PFM_CTL_EN[idx]; |
| val &= ~mask; |
| val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); |
| __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); |
| return idx; |
| } |
| |
| /* |
| * Add an event filter to a given event. |
| */ |
| static int nds32_pmu_set_event_filter(struct hw_perf_event *event, |
| struct perf_event_attr *attr) |
| { |
| unsigned long config_base = 0; |
| int idx = event->idx; |
| unsigned long no_kernel_tracing = 0; |
| unsigned long no_user_tracing = 0; |
| /* If index is -1, do not do anything */ |
| if (idx == -1) |
| return 0; |
| |
| no_kernel_tracing = PFM_CTL_KS[idx]; |
| no_user_tracing = PFM_CTL_KU[idx]; |
| /* |
| * Default: enable both kernel and user mode tracing. |
| */ |
| if (attr->exclude_user) |
| config_base |= no_user_tracing; |
| |
| if (attr->exclude_kernel) |
| config_base |= no_kernel_tracing; |
| |
| /* |
| * Install the filter into config_base as this is used to |
| * construct the event type. |
| */ |
| event->config_base |= config_base; |
| return 0; |
| } |
| |
| static inline void nds32_pfm_write_evtsel(int idx, u32 evnum) |
| { |
| u32 offset = 0; |
| u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL); |
| u32 ev_mask = 0; |
| u32 no_kernel_mask = 0; |
| u32 no_user_mask = 0; |
| u32 val; |
| |
| offset = PFM_CTL_OFFSEL[idx]; |
| /* Clear previous mode selection, and write new one */ |
| no_kernel_mask = PFM_CTL_KS[idx]; |
| no_user_mask = PFM_CTL_KU[idx]; |
| ori_val &= ~no_kernel_mask; |
| ori_val &= ~no_user_mask; |
| if (evnum & no_kernel_mask) |
| ori_val |= no_kernel_mask; |
| |
| if (evnum & no_user_mask) |
| ori_val |= no_user_mask; |
| |
| /* Clear previous event selection */ |
| ev_mask = PFM_CTL_SEL[idx]; |
| ori_val &= ~ev_mask; |
| evnum &= SOFTWARE_EVENT_MASK; |
| |
| /* undo the linear mapping */ |
| evnum = get_converted_evet_hw_num(evnum); |
| val = ori_val | (evnum << offset); |
| val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); |
| __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); |
| } |
| |
| static inline int nds32_pfm_enable_counter(int idx) |
| { |
| unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); |
| u32 mask = 0; |
| |
| mask = PFM_CTL_EN[idx]; |
| val |= mask; |
| val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); |
| __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); |
| return idx; |
| } |
| |
| static inline int nds32_pfm_enable_intens(int idx) |
| { |
| unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); |
| u32 mask = 0; |
| |
| mask = PFM_CTL_IE[idx]; |
| val |= mask; |
| val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); |
| __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); |
| return idx; |
| } |
| |
| static inline int nds32_pfm_disable_intens(int idx) |
| { |
| unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); |
| u32 mask = 0; |
| |
| mask = PFM_CTL_IE[idx]; |
| val &= ~mask; |
| val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); |
| __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); |
| return idx; |
| } |
| |
| static int event_requires_mode_exclusion(struct perf_event_attr *attr) |
| { |
| /* Other modes NDS32 does not support */ |
| return attr->exclude_user || attr->exclude_kernel; |
| } |
| |
| static void nds32_pmu_enable_event(struct perf_event *event) |
| { |
| unsigned long flags; |
| unsigned int evnum = 0; |
| struct hw_perf_event *hwc = &event->hw; |
| struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); |
| struct pmu_hw_events *events = cpu_pmu->get_hw_events(); |
| int idx = hwc->idx; |
| |
| if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { |
| pr_err("CPU enabling wrong pfm counter IRQ enable\n"); |
| return; |
| } |
| |
| /* |
| * Enable counter and interrupt, and set the counter to count |
| * the event that we're interested in. |
| */ |
| raw_spin_lock_irqsave(&events->pmu_lock, flags); |
| |
| /* |
| * Disable counter |
| */ |
| nds32_pfm_disable_counter(idx); |
| |
| /* |
| * Check whether we need to exclude the counter from certain modes. |
| */ |
| if ((!cpu_pmu->set_event_filter || |
| cpu_pmu->set_event_filter(hwc, &event->attr)) && |
| event_requires_mode_exclusion(&event->attr)) { |
| pr_notice |
| ("NDS32 performance counters do not support mode exclusion\n"); |
| hwc->config_base = 0; |
| } |
| /* Write event */ |
| evnum = hwc->config_base; |
| nds32_pfm_write_evtsel(idx, evnum); |
| |
| /* |
| * Enable interrupt for this counter |
| */ |
| nds32_pfm_enable_intens(idx); |
| |
| /* |
| * Enable counter |
| */ |
| nds32_pfm_enable_counter(idx); |
| |
| raw_spin_unlock_irqrestore(&events->pmu_lock, flags); |
| } |
| |
| static void nds32_pmu_disable_event(struct perf_event *event) |
| { |
| unsigned long flags; |
| struct hw_perf_event *hwc = &event->hw; |
| struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); |
| struct pmu_hw_events *events = cpu_pmu->get_hw_events(); |
| int idx = hwc->idx; |
| |
| if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { |
| pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx); |
| return; |
| } |
| |
| /* |
| * Disable counter and interrupt |
| */ |
| raw_spin_lock_irqsave(&events->pmu_lock, flags); |
| |
| /* |
| * Disable counter |
| */ |
| nds32_pfm_disable_counter(idx); |
| |
| /* |
| * Disable interrupt for this counter |
| */ |
| nds32_pfm_disable_intens(idx); |
| |
| raw_spin_unlock_irqrestore(&events->pmu_lock, flags); |
| } |
| |
| static inline u32 nds32_pmu_read_counter(struct perf_event *event) |
| { |
| struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); |
| struct hw_perf_event *hwc = &event->hw; |
| int idx = hwc->idx; |
| u32 count = 0; |
| |
| if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { |
| pr_err("CPU reading wrong counter %d\n", idx); |
| } else { |
| switch (idx) { |
| case PFMC0: |
| count = __nds32__mfsr(NDS32_SR_PFMC0); |
| break; |
| case PFMC1: |
| count = __nds32__mfsr(NDS32_SR_PFMC1); |
| break; |
| case PFMC2: |
| count = __nds32__mfsr(NDS32_SR_PFMC2); |
| break; |
| default: |
| pr_err |
| ("%s: CPU has no performance counters %d\n", |
| __func__, idx); |
| } |
| } |
| return count; |
| } |
| |
| static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value) |
| { |
| struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); |
| struct hw_perf_event *hwc = &event->hw; |
| int idx = hwc->idx; |
| |
| if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { |
| pr_err("CPU writing wrong counter %d\n", idx); |
| } else { |
| switch (idx) { |
| case PFMC0: |
| __nds32__mtsr_isb(value, NDS32_SR_PFMC0); |
| break; |
| case PFMC1: |
| __nds32__mtsr_isb(value, NDS32_SR_PFMC1); |
| break; |
| case PFMC2: |
| __nds32__mtsr_isb(value, NDS32_SR_PFMC2); |
| break; |
| default: |
| pr_err |
| ("%s: CPU has no performance counters %d\n", |
| __func__, idx); |
| } |
| } |
| } |
| |
| static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc, |
| struct perf_event *event) |
| { |
| int idx; |
| struct hw_perf_event *hwc = &event->hw; |
| /* |
| * Current implementation maps cycles, instruction count and cache-miss |
| * to specific counter. |
| * However, multiple of the 3 counters are able to count these events. |
| * |
| * |
| * SOFTWARE_EVENT_MASK mask for getting event num , |
| * This is defined by Jia-Rung, you can change the polocies. |
| * However, do not exceed 8 bits. This is hardware specific. |
| * The last number is SPAv3_2_SEL_LAST. |
| */ |
| unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK; |
| |
| idx = get_converted_event_idx(evtype); |
| /* |
| * Try to get the counter for correpsonding event |
| */ |
| if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) { |
| if (!test_and_set_bit(idx, cpuc->used_mask)) |
| return idx; |
| if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask)) |
| return NDS32_IDX_COUNTER0; |
| if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask)) |
| return NDS32_IDX_COUNTER1; |
| } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) { |
| if (!test_and_set_bit(idx, cpuc->used_mask)) |
| return idx; |
| else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask)) |
| return NDS32_IDX_COUNTER1; |
| else if (!test_and_set_bit |
| (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask)) |
| return NDS32_IDX_CYCLE_COUNTER; |
| } else { |
| if (!test_and_set_bit(idx, cpuc->used_mask)) |
| return idx; |
| } |
| return -EAGAIN; |
| } |
| |
| static void nds32_pmu_start(struct nds32_pmu *cpu_pmu) |
| { |
| unsigned long flags; |
| unsigned int val; |
| struct pmu_hw_events *events = cpu_pmu->get_hw_events(); |
| |
| raw_spin_lock_irqsave(&events->pmu_lock, flags); |
| |
| /* Enable all counters , NDS PFM has 3 counters */ |
| val = __nds32__mfsr(NDS32_SR_PFM_CTL); |
| val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]); |
| val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); |
| __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); |
| |
| raw_spin_unlock_irqrestore(&events->pmu_lock, flags); |
| } |
| |
| static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu) |
| { |
| unsigned long flags; |
| unsigned int val; |
| struct pmu_hw_events *events = cpu_pmu->get_hw_events(); |
| |
| raw_spin_lock_irqsave(&events->pmu_lock, flags); |
| |
| /* Disable all counters , NDS PFM has 3 counters */ |
| val = __nds32__mfsr(NDS32_SR_PFM_CTL); |
| val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]); |
| val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); |
| __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); |
| |
| raw_spin_unlock_irqrestore(&events->pmu_lock, flags); |
| } |
| |
| static void nds32_pmu_reset(void *info) |
| { |
| u32 val = 0; |
| |
| val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); |
| __nds32__mtsr(val, NDS32_SR_PFM_CTL); |
| __nds32__mtsr(0, NDS32_SR_PFM_CTL); |
| __nds32__mtsr(0, NDS32_SR_PFMC0); |
| __nds32__mtsr(0, NDS32_SR_PFMC1); |
| __nds32__mtsr(0, NDS32_SR_PFMC2); |
| } |
| |
| static void nds32_pmu_init(struct nds32_pmu *cpu_pmu) |
| { |
| cpu_pmu->handle_irq = nds32_pmu_handle_irq; |
| cpu_pmu->enable = nds32_pmu_enable_event; |
| cpu_pmu->disable = nds32_pmu_disable_event; |
| cpu_pmu->read_counter = nds32_pmu_read_counter; |
| cpu_pmu->write_counter = nds32_pmu_write_counter; |
| cpu_pmu->get_event_idx = nds32_pmu_get_event_idx; |
| cpu_pmu->start = nds32_pmu_start; |
| cpu_pmu->stop = nds32_pmu_stop; |
| cpu_pmu->reset = nds32_pmu_reset; |
| cpu_pmu->max_period = 0xFFFFFFFF; /* Maximum counts */ |
| }; |
| |
| static u32 nds32_read_num_pfm_events(void) |
| { |
| /* NDS32 SPAv3 PMU support 3 counter */ |
| return 3; |
| } |
| |
| static int device_pmu_init(struct nds32_pmu *cpu_pmu) |
| { |
| nds32_pmu_init(cpu_pmu); |
| /* |
| * This name should be devive-specific name, whatever you like :) |
| * I think "PMU" will be a good generic name. |
| */ |
| cpu_pmu->name = "nds32v3-pmu"; |
| cpu_pmu->map_event = nds32_spav3_map_event; |
| cpu_pmu->num_events = nds32_read_num_pfm_events(); |
| cpu_pmu->set_event_filter = nds32_pmu_set_event_filter; |
| return 0; |
| } |
| |
| /* |
| * CPU PMU identification and probing. |
| */ |
| static int probe_current_pmu(struct nds32_pmu *pmu) |
| { |
| int ret; |
| |
| get_cpu(); |
| ret = -ENODEV; |
| /* |
| * If ther are various CPU types with its own PMU, initialize with |
| * |
| * the corresponding one |
| */ |
| device_pmu_init(pmu); |
| put_cpu(); |
| return ret; |
| } |
| |
| static void nds32_pmu_enable(struct pmu *pmu) |
| { |
| struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu); |
| struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events(); |
| int enabled = bitmap_weight(hw_events->used_mask, |
| nds32_pmu->num_events); |
| |
| if (enabled) |
| nds32_pmu->start(nds32_pmu); |
| } |
| |
| static void nds32_pmu_disable(struct pmu *pmu) |
| { |
| struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu); |
| |
| nds32_pmu->stop(nds32_pmu); |
| } |
| |
| static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu) |
| { |
| nds32_pmu->free_irq(nds32_pmu); |
| pm_runtime_put_sync(&nds32_pmu->plat_device->dev); |
| } |
| |
| static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev) |
| { |
| struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev; |
| int ret; |
| u64 start_clock, finish_clock; |
| |
| start_clock = local_clock(); |
| ret = nds32_pmu->handle_irq(irq, dev); |
| finish_clock = local_clock(); |
| |
| perf_sample_event_took(finish_clock - start_clock); |
| return ret; |
| } |
| |
| static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu) |
| { |
| int err; |
| struct platform_device *pmu_device = nds32_pmu->plat_device; |
| |
| if (!pmu_device) |
| return -ENODEV; |
| |
| pm_runtime_get_sync(&pmu_device->dev); |
| err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq); |
| if (err) { |
| nds32_pmu_release_hardware(nds32_pmu); |
| return err; |
| } |
| |
| return 0; |
| } |
| |
| static int |
| validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, |
| struct perf_event *event) |
| { |
| struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); |
| |
| if (is_software_event(event)) |
| return 1; |
| |
| if (event->pmu != pmu) |
| return 0; |
| |
| if (event->state < PERF_EVENT_STATE_OFF) |
| return 1; |
| |
| if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) |
| return 1; |
| |
| return nds32_pmu->get_event_idx(hw_events, event) >= 0; |
| } |
| |
| static int validate_group(struct perf_event *event) |
| { |
| struct perf_event *sibling, *leader = event->group_leader; |
| struct pmu_hw_events fake_pmu; |
| DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS); |
| /* |
| * Initialize the fake PMU. We only need to populate the |
| * used_mask for the purposes of validation. |
| */ |
| memset(fake_used_mask, 0, sizeof(fake_used_mask)); |
| |
| if (!validate_event(event->pmu, &fake_pmu, leader)) |
| return -EINVAL; |
| |
| for_each_sibling_event(sibling, leader) { |
| if (!validate_event(event->pmu, &fake_pmu, sibling)) |
| return -EINVAL; |
| } |
| |
| if (!validate_event(event->pmu, &fake_pmu, event)) |
| return -EINVAL; |
| |
| return 0; |
| } |
| |
| static int __hw_perf_event_init(struct perf_event *event) |
| { |
| struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); |
| struct hw_perf_event *hwc = &event->hw; |
| int mapping; |
| |
| mapping = nds32_pmu->map_event(event); |
| |
| if (mapping < 0) { |
| pr_debug("event %x:%llx not supported\n", event->attr.type, |
| event->attr.config); |
| return mapping; |
| } |
| |
| /* |
| * We don't assign an index until we actually place the event onto |
| * hardware. Use -1 to signify that we haven't decided where to put it |
| * yet. For SMP systems, each core has it's own PMU so we can't do any |
| * clever allocation or constraints checking at this point. |
| */ |
| hwc->idx = -1; |
| hwc->config_base = 0; |
| hwc->config = 0; |
| hwc->event_base = 0; |
| |
| /* |
| * Check whether we need to exclude the counter from certain modes. |
| */ |
| if ((!nds32_pmu->set_event_filter || |
| nds32_pmu->set_event_filter(hwc, &event->attr)) && |
| event_requires_mode_exclusion(&event->attr)) { |
| pr_debug |
| ("NDS performance counters do not support mode exclusion\n"); |
| return -EOPNOTSUPP; |
| } |
| |
| /* |
| * Store the event encoding into the config_base field. |
| */ |
| hwc->config_base |= (unsigned long)mapping; |
| |
| if (!hwc->sample_period) { |
| /* |
| * For non-sampling runs, limit the sample_period to half |
| * of the counter width. That way, the new counter value |
| * is far less likely to overtake the previous one unless |
| * you have some serious IRQ latency issues. |
| */ |
| hwc->sample_period = nds32_pmu->max_period >> 1; |
| hwc->last_period = hwc->sample_period; |
| local64_set(&hwc->period_left, hwc->sample_period); |
| } |
| |
| if (event->group_leader != event) { |
| if (validate_group(event) != 0) |
| return -EINVAL; |
| } |
| |
| return 0; |
| } |
| |
| static int nds32_pmu_event_init(struct perf_event *event) |
| { |
| struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); |
| int err = 0; |
| atomic_t *active_events = &nds32_pmu->active_events; |
| |
| /* does not support taken branch sampling */ |
| if (has_branch_stack(event)) |
| return -EOPNOTSUPP; |
| |
| if (nds32_pmu->map_event(event) == -ENOENT) |
| return -ENOENT; |
| |
| if (!atomic_inc_not_zero(active_events)) { |
| if (atomic_read(active_events) == 0) { |
| /* Register irq handler */ |
| err = nds32_pmu_reserve_hardware(nds32_pmu); |
| } |
| |
| if (!err) |
| atomic_inc(active_events); |
| } |
| |
| if (err) |
| return err; |
| |
| err = __hw_perf_event_init(event); |
| |
| return err; |
| } |
| |
| static void nds32_start(struct perf_event *event, int flags) |
| { |
| struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); |
| struct hw_perf_event *hwc = &event->hw; |
| /* |
| * NDS pmu always has to reprogram the period, so ignore |
| * PERF_EF_RELOAD, see the comment below. |
| */ |
| if (flags & PERF_EF_RELOAD) |
| WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); |
| |
| hwc->state = 0; |
| /* Set the period for the event. */ |
| nds32_pmu_event_set_period(event); |
| |
| nds32_pmu->enable(event); |
| } |
| |
| static int nds32_pmu_add(struct perf_event *event, int flags) |
| { |
| struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); |
| struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events(); |
| struct hw_perf_event *hwc = &event->hw; |
| int idx; |
| int err = 0; |
| |
| perf_pmu_disable(event->pmu); |
| |
| /* If we don't have a space for the counter then finish early. */ |
| idx = nds32_pmu->get_event_idx(hw_events, event); |
| if (idx < 0) { |
| err = idx; |
| goto out; |
| } |
| |
| /* |
| * If there is an event in the counter we are going to use then make |
| * sure it is disabled. |
| */ |
| event->hw.idx = idx; |
| nds32_pmu->disable(event); |
| hw_events->events[idx] = event; |
| |
| hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; |
| if (flags & PERF_EF_START) |
| nds32_start(event, PERF_EF_RELOAD); |
| |
| /* Propagate our changes to the userspace mapping. */ |
| perf_event_update_userpage(event); |
| |
| out: |
| perf_pmu_enable(event->pmu); |
| return err; |
| } |
| |
| u64 nds32_pmu_event_update(struct perf_event *event) |
| { |
| struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); |
| struct hw_perf_event *hwc = &event->hw; |
| u64 delta, prev_raw_count, new_raw_count; |
| |
| again: |
| prev_raw_count = local64_read(&hwc->prev_count); |
| new_raw_count = nds32_pmu->read_counter(event); |
| |
| if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
| new_raw_count) != prev_raw_count) { |
| goto again; |
| } |
| /* |
| * Whether overflow or not, "unsigned substraction" |
| * will always get their delta |
| */ |
| delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period; |
| |
| local64_add(delta, &event->count); |
| local64_sub(delta, &hwc->period_left); |
| |
| return new_raw_count; |
| } |
| |
| static void nds32_stop(struct perf_event *event, int flags) |
| { |
| struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); |
| struct hw_perf_event *hwc = &event->hw; |
| /* |
| * NDS pmu always has to update the counter, so ignore |
| * PERF_EF_UPDATE, see comments in nds32_start(). |
| */ |
| if (!(hwc->state & PERF_HES_STOPPED)) { |
| nds32_pmu->disable(event); |
| nds32_pmu_event_update(event); |
| hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; |
| } |
| } |
| |
| static void nds32_pmu_del(struct perf_event *event, int flags) |
| { |
| struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); |
| struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events(); |
| struct hw_perf_event *hwc = &event->hw; |
| int idx = hwc->idx; |
| |
| nds32_stop(event, PERF_EF_UPDATE); |
| hw_events->events[idx] = NULL; |
| clear_bit(idx, hw_events->used_mask); |
| |
| perf_event_update_userpage(event); |
| } |
| |
| static void nds32_pmu_read(struct perf_event *event) |
| { |
| nds32_pmu_event_update(event); |
| } |
| |
| /* Please refer to SPAv3 for more hardware specific details */ |
| PMU_FORMAT_ATTR(event, "config:0-63"); |
| |
| static struct attribute *nds32_arch_formats_attr[] = { |
| &format_attr_event.attr, |
| NULL, |
| }; |
| |
| static struct attribute_group nds32_pmu_format_group = { |
| .name = "format", |
| .attrs = nds32_arch_formats_attr, |
| }; |
| |
| static ssize_t nds32_pmu_cpumask_show(struct device *dev, |
| struct device_attribute *attr, |
| char *buf) |
| { |
| return 0; |
| } |
| |
| static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL); |
| |
| static struct attribute *nds32_pmu_common_attrs[] = { |
| &dev_attr_cpus.attr, |
| NULL, |
| }; |
| |
| static struct attribute_group nds32_pmu_common_group = { |
| .attrs = nds32_pmu_common_attrs, |
| }; |
| |
| static const struct attribute_group *nds32_pmu_attr_groups[] = { |
| &nds32_pmu_format_group, |
| &nds32_pmu_common_group, |
| NULL, |
| }; |
| |
| static void nds32_init(struct nds32_pmu *nds32_pmu) |
| { |
| atomic_set(&nds32_pmu->active_events, 0); |
| |
| nds32_pmu->pmu = (struct pmu) { |
| .pmu_enable = nds32_pmu_enable, |
| .pmu_disable = nds32_pmu_disable, |
| .attr_groups = nds32_pmu_attr_groups, |
| .event_init = nds32_pmu_event_init, |
| .add = nds32_pmu_add, |
| .del = nds32_pmu_del, |
| .start = nds32_start, |
| .stop = nds32_stop, |
| .read = nds32_pmu_read, |
| }; |
| } |
| |
| int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type) |
| { |
| nds32_init(nds32_pmu); |
| pm_runtime_enable(&nds32_pmu->plat_device->dev); |
| pr_info("enabled with %s PMU driver, %d counters available\n", |
| nds32_pmu->name, nds32_pmu->num_events); |
| return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type); |
| } |
| |
| static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) |
| { |
| return this_cpu_ptr(&cpu_hw_events); |
| } |
| |
| static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler) |
| { |
| int err, irq, irqs; |
| struct platform_device *pmu_device = cpu_pmu->plat_device; |
| |
| if (!pmu_device) |
| return -ENODEV; |
| |
| irqs = min(pmu_device->num_resources, num_possible_cpus()); |
| if (irqs < 1) { |
| pr_err("no irqs for PMUs defined\n"); |
| return -ENODEV; |
| } |
| |
| irq = platform_get_irq(pmu_device, 0); |
| err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm", |
| cpu_pmu); |
| if (err) { |
| pr_err("unable to request IRQ%d for NDS PMU counters\n", |
| irq); |
| return err; |
| } |
| return 0; |
| } |
| |
| static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu) |
| { |
| int irq; |
| struct platform_device *pmu_device = cpu_pmu->plat_device; |
| |
| irq = platform_get_irq(pmu_device, 0); |
| if (irq >= 0) |
| free_irq(irq, cpu_pmu); |
| } |
| |
| static void cpu_pmu_init(struct nds32_pmu *cpu_pmu) |
| { |
| int cpu; |
| struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); |
| |
| raw_spin_lock_init(&events->pmu_lock); |
| |
| cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events; |
| cpu_pmu->request_irq = cpu_pmu_request_irq; |
| cpu_pmu->free_irq = cpu_pmu_free_irq; |
| |
| /* Ensure the PMU has sane values out of reset. */ |
| if (cpu_pmu->reset) |
| on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); |
| } |
| |
| static const struct of_device_id cpu_pmu_of_device_ids[] = { |
| {.compatible = "andestech,nds32v3-pmu", |
| .data = device_pmu_init}, |
| {}, |
| }; |
| |
| static int cpu_pmu_device_probe(struct platform_device *pdev) |
| { |
| const struct of_device_id *of_id; |
| int (*init_fn)(struct nds32_pmu *nds32_pmu); |
| struct device_node *node = pdev->dev.of_node; |
| struct nds32_pmu *pmu; |
| int ret = -ENODEV; |
| |
| if (cpu_pmu) { |
| pr_notice("[perf] attempt to register multiple PMU devices!\n"); |
| return -ENOSPC; |
| } |
| |
| pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); |
| if (!pmu) |
| return -ENOMEM; |
| |
| of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node); |
| if (node && of_id) { |
| init_fn = of_id->data; |
| ret = init_fn(pmu); |
| } else { |
| ret = probe_current_pmu(pmu); |
| } |
| |
| if (ret) { |
| pr_notice("[perf] failed to probe PMU!\n"); |
| goto out_free; |
| } |
| |
| cpu_pmu = pmu; |
| cpu_pmu->plat_device = pdev; |
| cpu_pmu_init(cpu_pmu); |
| ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW); |
| |
| if (!ret) |
| return 0; |
| |
| out_free: |
| pr_notice("[perf] failed to register PMU devices!\n"); |
| kfree(pmu); |
| return ret; |
| } |
| |
| static struct platform_driver cpu_pmu_driver = { |
| .driver = { |
| .name = "nds32-pfm", |
| .of_match_table = cpu_pmu_of_device_ids, |
| }, |
| .probe = cpu_pmu_device_probe, |
| .id_table = cpu_pmu_plat_device_ids, |
| }; |
| |
| static int __init register_pmu_driver(void) |
| { |
| int err = 0; |
| |
| err = platform_driver_register(&cpu_pmu_driver); |
| if (err) |
| pr_notice("[perf] PMU initialization failed\n"); |
| else |
| pr_notice("[perf] PMU initialization done\n"); |
| |
| return err; |
| } |
| |
| device_initcall(register_pmu_driver); |
| |
| /* |
| * References: arch/nds32/kernel/traps.c:__dump() |
| * You will need to know the NDS ABI first. |
| */ |
| static int unwind_frame_kernel(struct stackframe *frame) |
| { |
| int graph = 0; |
| #ifdef CONFIG_FRAME_POINTER |
| /* 0x3 means misalignment */ |
| if (!kstack_end((void *)frame->fp) && |
| !((unsigned long)frame->fp & 0x3) && |
| ((unsigned long)frame->fp >= TASK_SIZE)) { |
| /* |
| * The array index is based on the ABI, the below graph |
| * illustrate the reasons. |
| * Function call procedure: "smw" and "lmw" will always |
| * update SP and FP for you automatically. |
| * |
| * Stack Relative Address |
| * | | 0 |
| * ---- |
| * |LP| <-- SP(before smw) <-- FP(after smw) -1 |
| * ---- |
| * |FP| -2 |
| * ---- |
| * | | <-- SP(after smw) -3 |
| */ |
| frame->lp = ((unsigned long *)frame->fp)[-1]; |
| frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET]; |
| /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */ |
| if (__kernel_text_address(frame->lp)) |
| frame->lp = ftrace_graph_ret_addr |
| (NULL, &graph, frame->lp, NULL); |
| |
| return 0; |
| } else { |
| return -EPERM; |
| } |
| #else |
| /* |
| * You can refer to arch/nds32/kernel/traps.c:__dump() |
| * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp". |
| * And, the "sp" is not always correct. |
| * |
| * Stack Relative Address |
| * | | 0 |
| * ---- |
| * |LP| <-- SP(before smw) -1 |
| * ---- |
| * | | <-- SP(after smw) -2 |
| * ---- |
| */ |
| if (!kstack_end((void *)frame->sp)) { |
| frame->lp = ((unsigned long *)frame->sp)[1]; |
| /* TODO: How to deal with the value in first |
| * "sp" is not correct? |
| */ |
| if (__kernel_text_address(frame->lp)) |
| frame->lp = ftrace_graph_ret_addr |
| (tsk, &graph, frame->lp, NULL); |
| |
| frame->sp = ((unsigned long *)frame->sp) + 1; |
| |
| return 0; |
| } else { |
| return -EPERM; |
| } |
| #endif |
| } |
| |
| static void notrace |
| walk_stackframe(struct stackframe *frame, |
| int (*fn_record)(struct stackframe *, void *), |
| void *data) |
| { |
| while (1) { |
| int ret; |
| |
| if (fn_record(frame, data)) |
| break; |
| |
| ret = unwind_frame_kernel(frame); |
| if (ret < 0) |
| break; |
| } |
| } |
| |
| /* |
| * Gets called by walk_stackframe() for every stackframe. This will be called |
| * whist unwinding the stackframe and is like a subroutine return so we use |
| * the PC. |
| */ |
| static int callchain_trace(struct stackframe *fr, void *data) |
| { |
| struct perf_callchain_entry_ctx *entry = data; |
| |
| perf_callchain_store(entry, fr->lp); |
| return 0; |
| } |
| |
| /* |
| * Get the return address for a single stackframe and return a pointer to the |
| * next frame tail. |
| */ |
| static unsigned long |
| user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) |
| { |
| struct frame_tail buftail; |
| unsigned long lp = 0; |
| unsigned long *user_frame_tail = |
| (unsigned long *)(fp - (unsigned long)sizeof(buftail)); |
| |
| /* Check accessibility of one struct frame_tail beyond */ |
| if (!access_ok(user_frame_tail, sizeof(buftail))) |
| return 0; |
| if (__copy_from_user_inatomic |
| (&buftail, user_frame_tail, sizeof(buftail))) |
| return 0; |
| |
| /* |
| * Refer to unwind_frame_kernel() for more illurstration |
| */ |
| lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */ |
| fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */ |
| perf_callchain_store(entry, lp); |
| return fp; |
| } |
| |
| static unsigned long |
| user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry, |
| unsigned long fp) |
| { |
| struct frame_tail_opt_size buftail; |
| unsigned long lp = 0; |
| |
| unsigned long *user_frame_tail = |
| (unsigned long *)(fp - (unsigned long)sizeof(buftail)); |
| |
| /* Check accessibility of one struct frame_tail beyond */ |
| if (!access_ok(user_frame_tail, sizeof(buftail))) |
| return 0; |
| if (__copy_from_user_inatomic |
| (&buftail, user_frame_tail, sizeof(buftail))) |
| return 0; |
| |
| /* |
| * Refer to unwind_frame_kernel() for more illurstration |
| */ |
| lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */ |
| fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */ |
| |
| perf_callchain_store(entry, lp); |
| return fp; |
| } |
| |
| /* |
| * This will be called when the target is in user mode |
| * This function will only be called when we use |
| * "PERF_SAMPLE_CALLCHAIN" in |
| * kernel/events/core.c:perf_prepare_sample() |
| * |
| * How to trigger perf_callchain_[user/kernel] : |
| * $ perf record -e cpu-clock --call-graph fp ./program |
| * $ perf report --call-graph |
| */ |
| unsigned long leaf_fp; |
| void |
| perf_callchain_user(struct perf_callchain_entry_ctx *entry, |
| struct pt_regs *regs) |
| { |
| unsigned long fp = 0; |
| unsigned long gp = 0; |
| unsigned long lp = 0; |
| unsigned long sp = 0; |
| unsigned long *user_frame_tail; |
| |
| leaf_fp = 0; |
| |
| perf_callchain_store(entry, regs->ipc); |
| fp = regs->fp; |
| gp = regs->gp; |
| lp = regs->lp; |
| sp = regs->sp; |
| if (entry->nr < PERF_MAX_STACK_DEPTH && |
| (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) { |
| user_frame_tail = |
| (unsigned long *)(fp - (unsigned long)sizeof(fp)); |
| |
| if (!access_ok(user_frame_tail, sizeof(fp))) |
| return; |
| |
| if (__copy_from_user_inatomic |
| (&leaf_fp, user_frame_tail, sizeof(fp))) |
| return; |
| |
| if (leaf_fp == lp) { |
| /* |
| * Maybe this is non leaf function |
| * with optimize for size, |
| * or maybe this is the function |
| * with optimize for size |
| */ |
| struct frame_tail buftail; |
| |
| user_frame_tail = |
| (unsigned long *)(fp - |
| (unsigned long)sizeof(buftail)); |
| |
| if (!access_ok(user_frame_tail, sizeof(buftail))) |
| return; |
| |
| if (__copy_from_user_inatomic |
| (&buftail, user_frame_tail, sizeof(buftail))) |
| return; |
| |
| if (buftail.stack_fp == gp) { |
| /* non leaf function with optimize |
| * for size condition |
| */ |
| struct frame_tail_opt_size buftail_opt_size; |
| |
| user_frame_tail = |
| (unsigned long *)(fp - (unsigned long) |
| sizeof(buftail_opt_size)); |
| |
| if (!access_ok(user_frame_tail, |
| sizeof(buftail_opt_size))) |
| return; |
| |
| if (__copy_from_user_inatomic |
| (&buftail_opt_size, user_frame_tail, |
| sizeof(buftail_opt_size))) |
| return; |
| |
| perf_callchain_store(entry, lp); |
| fp = buftail_opt_size.stack_fp; |
| |
| while ((entry->nr < PERF_MAX_STACK_DEPTH) && |
| (unsigned long)fp && |
| !((unsigned long)fp & 0x7) && |
| fp > sp) { |
| sp = fp; |
| fp = user_backtrace_opt_size(entry, fp); |
| } |
| |
| } else { |
| /* this is the function |
| * without optimize for size |
| */ |
| fp = buftail.stack_fp; |
| perf_callchain_store(entry, lp); |
| while ((entry->nr < PERF_MAX_STACK_DEPTH) && |
| (unsigned long)fp && |
| !((unsigned long)fp & 0x7) && |
| fp > sp) { |
| sp = fp; |
| fp = user_backtrace(entry, fp); |
| } |
| } |
| } else { |
| /* this is leaf function */ |
| fp = leaf_fp; |
| perf_callchain_store(entry, lp); |
| |
| /* previous function callcahin */ |
| while ((entry->nr < PERF_MAX_STACK_DEPTH) && |
| (unsigned long)fp && |
| !((unsigned long)fp & 0x7) && fp > sp) { |
| sp = fp; |
| fp = user_backtrace(entry, fp); |
| } |
| } |
| return; |
| } |
| } |
| |
| /* This will be called when the target is in kernel mode */ |
| void |
| perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, |
| struct pt_regs *regs) |
| { |
| struct stackframe fr; |
| |
| fr.fp = regs->fp; |
| fr.lp = regs->lp; |
| fr.sp = regs->sp; |
| walk_stackframe(&fr, callchain_trace, entry); |
| } |
| |
| unsigned long perf_instruction_pointer(struct pt_regs *regs) |
| { |
| return instruction_pointer(regs); |
| } |
| |
| unsigned long perf_misc_flags(struct pt_regs *regs) |
| { |
| int misc = 0; |
| |
| if (user_mode(regs)) |
| misc |= PERF_RECORD_MISC_USER; |
| else |
| misc |= PERF_RECORD_MISC_KERNEL; |
| |
| return misc; |
| } |