Merge branch 'linus' into perf/core, to pick up fixes before merging new changes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 33787ee..929655d 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1622,6 +1622,29 @@
}
EXPORT_SYMBOL_GPL(events_sysfs_show);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_ht_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_ht_attr, attr);
+
+ /*
+ * Report conditional events depending on Hyper-Threading.
+ *
+ * This is overly conservative as usually the HT special
+ * handling is not needed if the other CPU thread is idle.
+ *
+ * Note this does not (and cannot) handle the case when thread
+ * siblings are invisible, for example with virtualization
+ * if they are owned by some other guest. The user tool
+ * has to re-read when a thread sibling gets onlined later.
+ */
+ return sprintf(page, "%s",
+ topology_max_smt_threads() > 1 ?
+ pmu_attr->event_str_ht :
+ pmu_attr->event_str_noht);
+}
+
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
EVENT_ATTR(cache-references, CACHE_REFERENCES );
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7c66695..3ed528c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -16,6 +16,7 @@
#include <asm/cpufeature.h>
#include <asm/hardirq.h>
+#include <asm/intel-family.h>
#include <asm/apic.h>
#include "../perf_event.h"
@@ -177,7 +178,7 @@
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_skl_event_constraints[] = {
+static struct event_constraint intel_skl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
@@ -186,10 +187,8 @@
};
static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
- INTEL_UEVENT_EXTRA_REG(0x01b7,
- MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x02b7,
- MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
EVENT_EXTRA_END
};
@@ -225,14 +224,51 @@
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
-struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
NULL,
};
-struct attribute *snb_events_attrs[] = {
+/*
+ * topdown events for Intel Core CPUs.
+ *
+ * The events are all in slots, which is a free slot in a 4 wide
+ * pipeline. Some events are already reported in slots, for cycle
+ * events we multiply by the pipeline width (4).
+ *
+ * With Hyper Threading on, topdown metrics are either summed or averaged
+ * between the threads of a core: (count_t0 + count_t1).
+ *
+ * For the average case the metric is always scaled to pipeline width,
+ * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
+ */
+
+EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
+ "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */
+ "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */
+EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
+ "event=0xe,umask=0x1"); /* uops_issued.any */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
+ "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
+ "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
+ "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */
+ "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
+ "4", "2");
+
+static struct attribute *snb_events_attrs[] = {
EVENT_PTR(mem_ld_snb),
EVENT_PTR(mem_st_snb),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
NULL,
};
@@ -258,7 +294,7 @@
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_bdw_event_constraints[] = {
+static struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
@@ -1332,6 +1368,29 @@
},
};
+EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
+EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
+/* no_alloc_cycles.not_delivered */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
+ "event=0xca,umask=0x50");
+EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
+ "event=0xc2,umask=0x10");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
+ "event=0xc2,umask=0x10");
+
+static struct attribute *slm_events_attrs[] = {
+ EVENT_PTR(td_total_slots_slm),
+ EVENT_PTR(td_total_slots_scale_slm),
+ EVENT_PTR(td_fetch_bubbles_slm),
+ EVENT_PTR(td_fetch_bubbles_scale_slm),
+ EVENT_PTR(td_slots_issued_slm),
+ EVENT_PTR(td_slots_retired_slm),
+ NULL
+};
+
static struct extra_reg intel_slm_extra_regs[] __read_mostly =
{
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
@@ -3261,11 +3320,11 @@
u32 rev = UINT_MAX; /* default to broken for unknown models */
switch (cpu_data(cpu).x86_model) {
- case 42: /* SNB */
+ case INTEL_FAM6_SANDYBRIDGE:
rev = 0x28;
break;
- case 45: /* SNB-EP */
+ case INTEL_FAM6_SANDYBRIDGE_X:
switch (cpu_data(cpu).x86_mask) {
case 6: rev = 0x618; break;
case 7: rev = 0x70c; break;
@@ -3437,6 +3496,13 @@
EVENT_PTR(cycles_ct),
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
NULL
};
@@ -3508,15 +3574,15 @@
* Install the hw-cache-events table:
*/
switch (boot_cpu_data.x86_model) {
- case 14: /* 65nm Core "Yonah" */
+ case INTEL_FAM6_CORE_YONAH:
pr_cont("Core events, ");
break;
- case 15: /* 65nm Core2 "Merom" */
+ case INTEL_FAM6_CORE2_MEROM:
x86_add_quirk(intel_clovertown_quirk);
- case 22: /* 65nm Core2 "Merom-L" */
- case 23: /* 45nm Core2 "Penryn" */
- case 29: /* 45nm Core2 "Dunnington (MP) */
+ case INTEL_FAM6_CORE2_MEROM_L:
+ case INTEL_FAM6_CORE2_PENRYN:
+ case INTEL_FAM6_CORE2_DUNNINGTON:
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3527,9 +3593,9 @@
pr_cont("Core2 events, ");
break;
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3557,11 +3623,11 @@
pr_cont("Nehalem events, ");
break;
- case 28: /* 45nm Atom "Pineview" */
- case 38: /* 45nm Atom "Lincroft" */
- case 39: /* 32nm Atom "Penwell" */
- case 53: /* 32nm Atom "Cloverview" */
- case 54: /* 32nm Atom "Cedarview" */
+ case INTEL_FAM6_ATOM_PINEVIEW:
+ case INTEL_FAM6_ATOM_LINCROFT:
+ case INTEL_FAM6_ATOM_PENWELL:
+ case INTEL_FAM6_ATOM_CLOVERVIEW:
+ case INTEL_FAM6_ATOM_CEDARVIEW:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3573,9 +3639,9 @@
pr_cont("Atom events, ");
break;
- case 55: /* 22nm Atom "Silvermont" */
- case 76: /* 14nm Atom "Airmont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ case INTEL_FAM6_ATOM_SILVERMONT1:
+ case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_AIRMONT:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -3587,11 +3653,12 @@
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.cpu_events = slm_events_attrs;
pr_cont("Silvermont events, ");
break;
- case 92: /* 14nm Atom "Goldmont" */
- case 95: /* 14nm Atom "Goldmont Denverton" */
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_ATOM_DENVERTON:
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -3614,9 +3681,9 @@
pr_cont("Goldmont events, ");
break;
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_WESTMERE_EX:
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3643,8 +3710,8 @@
pr_cont("Westmere events, ");
break;
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_SANDYBRIDGE_X:
x86_add_quirk(intel_sandybridge_quirk);
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
@@ -3657,7 +3724,7 @@
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- if (boot_cpu_data.x86_model == 45)
+ if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3679,8 +3746,8 @@
pr_cont("SandyBridge events, ");
break;
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE_X:
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -3696,7 +3763,7 @@
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
- if (boot_cpu_data.x86_model == 62)
+ if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3714,10 +3781,10 @@
break;
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
x86_add_quirk(intel_ht_bug);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -3741,10 +3808,10 @@
pr_cont("Haswell events, ");
break;
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_X:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -3777,7 +3844,7 @@
pr_cont("Broadwell events, ");
break;
- case 87: /* Knights Landing Xeon Phi */
+ case INTEL_FAM6_XEON_PHI_KNL:
memcpy(hw_cache_event_ids,
slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs,
@@ -3795,16 +3862,22 @@
pr_cont("Knights Landing events, ");
break;
- case 142: /* 14nm Kabylake Mobile */
- case 158: /* 14nm Kabylake Desktop */
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- case 85: /* 14nm Skylake Server */
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_skl();
+ /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
+ event_attr_td_recovery_bubbles.event_str_noht =
+ "event=0xd,umask=0x1,cmask=1";
+ event_attr_td_recovery_bubbles.event_str_ht =
+ "event=0xd,umask=0x1,cmask=1,any=1";
+
x86_pmu.event_constraints = intel_skl_event_constraints;
x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
x86_pmu.extra_regs = intel_skl_extra_regs;
@@ -3917,16 +3990,14 @@
*/
static __init int fixup_ht_bug(void)
{
- int cpu = smp_processor_id();
- int w, c;
+ int c;
/*
* problem not present on this CPU model, nothing to do
*/
if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
return 0;
- w = cpumask_weight(topology_sibling_cpumask(cpu));
- if (w > 1) {
+ if (topology_max_smt_threads() > 1) {
pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
return 0;
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9ba4e41..4c7638b 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -89,6 +89,7 @@
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "../perf_event.h"
MODULE_LICENSE("GPL");
@@ -511,37 +512,37 @@
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
static const struct x86_cpu_id intel_cstates_match[] __initconst = {
- X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */
- X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */
- X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
- X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */
- X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */
- X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
- X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */
- X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */
+ X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
- X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */
- X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
- X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */
- X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */
- X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
- X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
- X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */
- X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */
- X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
- X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */
- X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */
- X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */
- X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates),
- X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */
- X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index e30eef4..d0c58b3 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -55,6 +55,7 @@
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "../perf_event.h"
MODULE_LICENSE("GPL");
@@ -786,26 +787,27 @@
};
static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
- X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */
- X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
- X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */
- X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
- X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */
- X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */
- X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */
- X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */
- X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */
- X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */
- X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
- X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */
- X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index fce7406..4e70d27 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,4 +1,5 @@
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "uncore.h"
static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@ -882,7 +883,7 @@
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
+ struct intel_uncore_pmu *pmu = NULL;
struct intel_uncore_box *box;
int phys_id, pkg, ret;
@@ -903,20 +904,37 @@
}
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+
/*
- * for performance monitoring unit with multiple boxes,
- * each box has a different function id.
+ * Some platforms, e.g. Knights Landing, use a common PCI device ID
+ * for multiple instances of an uncore PMU device type. We should check
+ * PCI slot and func to indicate the uncore box.
*/
- pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
- /* Knights Landing uses a common PCI device ID for multiple instances of
- * an uncore PMU device type. There is only one entry per device type in
- * the knl_uncore_pci_ids table inspite of multiple devices present for
- * some device types. Hence PCI device idx would be 0 for all devices.
- * So increment pmu pointer to point to an unused array element.
- */
- if (boot_cpu_data.x86_model == 87) {
- while (pmu->func_id >= 0)
- pmu++;
+ if (id->driver_data & ~0xffff) {
+ struct pci_driver *pci_drv = pdev->driver;
+ const struct pci_device_id *ids = pci_drv->id_table;
+ unsigned int devfn;
+
+ while (ids && ids->vendor) {
+ if ((ids->vendor == pdev->vendor) &&
+ (ids->device == pdev->device)) {
+ devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
+ UNCORE_PCI_DEV_FUNC(ids->driver_data));
+ if (devfn == pdev->devfn) {
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
+ break;
+ }
+ }
+ ids++;
+ }
+ if (pmu == NULL)
+ return -ENODEV;
+ } else {
+ /*
+ * for performance monitoring unit with multiple boxes,
+ * each box has a different function id.
+ */
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
@@ -1365,26 +1383,26 @@
};
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
- X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */
- X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */
- X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */
- X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */
- X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */
- X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */
- X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */
- X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */
- X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */
- X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */
- X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */
- X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */
- X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */
- X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */
- X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */
- X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */
- X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */
- X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 79766b9..66c3a36 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -15,7 +15,11 @@
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
+#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \
+ ((dev << 24) | (func << 16) | (type << 8) | idx)
#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
+#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff)
+#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff)
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
#define UNCORE_EXTRA_PCI_DEV 0xff
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 874e8bd..824e540 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2164,21 +2164,101 @@
*/
static const struct pci_device_id knl_uncore_pci_ids[] = {
- { /* MC UClk */
+ { /* MC0 UClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0),
},
- { /* MC DClk Channel */
+ { /* MC1 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1),
+ },
+ { /* MC0 DClk CH 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0),
},
- { /* EDC UClk */
+ { /* MC0 DClk CH 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1),
+ },
+ { /* MC0 DClk CH 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2),
+ },
+ { /* MC1 DClk CH 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3),
+ },
+ { /* MC1 DClk CH 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4),
+ },
+ { /* MC1 DClk CH 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5),
+ },
+ { /* EDC0 UClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0),
},
- { /* EDC EClk */
+ { /* EDC1 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1),
+ },
+ { /* EDC2 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2),
+ },
+ { /* EDC3 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3),
+ },
+ { /* EDC4 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4),
+ },
+ { /* EDC5 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5),
+ },
+ { /* EDC6 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6),
+ },
+ { /* EDC7 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7),
+ },
+ { /* EDC0 EClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0),
+ },
+ { /* EDC1 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1),
+ },
+ { /* EDC2 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2),
+ },
+ { /* EDC3 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3),
+ },
+ { /* EDC4 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4),
+ },
+ { /* EDC5 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5),
+ },
+ { /* EDC6 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6),
+ },
+ { /* EDC7 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7),
},
{ /* M2PCIe */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 85ef3c2..50b3a05 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,4 +1,5 @@
#include <linux/perf_event.h>
+#include <asm/intel-family.h>
enum perf_msr_id {
PERF_MSR_TSC = 0,
@@ -34,39 +35,43 @@
return false;
switch (boot_cpu_data.x86_model) {
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE2:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_WESTMERE_EX:
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_SANDYBRIDGE_X:
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE_X:
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_X:
- case 55: /* 22nm Atom "Silvermont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
- case 76: /* 14nm Atom "Airmont" */
+ case INTEL_FAM6_ATOM_SILVERMONT1:
+ case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_AIRMONT:
if (idx == PERF_MSR_SMI)
return true;
break;
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8bd764d..e2d7285 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -668,6 +668,14 @@
.event_str = str, \
};
+#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \
+static struct perf_pmu_events_ht_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\
+ .id = 0, \
+ .event_str_noht = noht, \
+ .event_str_ht = ht, \
+}
+
extern struct x86_pmu x86_pmu __read_mostly;
static inline bool x86_pmu_has_lbr_callstack(void)
@@ -803,6 +811,8 @@
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page);
#ifdef CONFIG_CPU_SUP_AMD
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 7f991bd5..e346572 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -129,6 +129,14 @@
extern unsigned int __max_logical_packages;
#define topology_max_packages() (__max_logical_packages)
+
+extern int __max_smt_threads;
+
+static inline int topology_max_smt_threads(void)
+{
+ return __max_smt_threads;
+}
+
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
extern int topology_phys_to_logical_pkg(unsigned int pkg);
#else
@@ -136,6 +144,7 @@
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
#endif
static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fafe8b9..2ed0ec1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -105,6 +105,9 @@
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
+/* Maximum number of SMT threads on any online core */
+int __max_smt_threads __read_mostly;
+
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
@@ -493,7 +496,7 @@
bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
- int i;
+ int i, threads;
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
@@ -550,6 +553,10 @@
if (match_die(c, o) && !topology_same_node(c, o))
primarily_use_numa_for_topology();
}
+
+ threads = cpumask_weight(topology_sibling_cpumask(cpu));
+ if (threads > __max_smt_threads)
+ __max_smt_threads = threads;
}
/* maps the cpu to the sched domain representing multi-core */
@@ -1441,6 +1448,21 @@
#ifdef CONFIG_HOTPLUG_CPU
+/* Recompute SMT state for all CPUs on offline */
+static void recompute_smt_state(void)
+{
+ int max_threads, cpu;
+
+ max_threads = 0;
+ for_each_online_cpu (cpu) {
+ int threads = cpumask_weight(topology_sibling_cpumask(cpu));
+
+ if (threads > max_threads)
+ max_threads = threads;
+ }
+ __max_smt_threads = max_threads;
+}
+
static void remove_siblinginfo(int cpu)
{
int sibling;
@@ -1465,6 +1487,7 @@
c->phys_proc_id = 0;
c->cpu_core_id = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
+ recompute_smt_state();
}
static void remove_cpu_from_maps(int cpu)
diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c
index 2776bec..e57f923 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -26,6 +26,7 @@
#include <linux/seq_file.h>
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include <asm/pmc_core.h>
#include "intel_pmc_core.h"
@@ -138,10 +139,10 @@
#endif /* CONFIG_DEBUG_FS */
static const struct x86_cpu_id intel_pmc_core_ids[] = {
- { X86_VENDOR_INTEL, 6, 0x4e, X86_FEATURE_MWAIT,
- (kernel_ulong_t)NULL}, /* Skylake CPUID Signature */
- { X86_VENDOR_INTEL, 6, 0x5e, X86_FEATURE_MWAIT,
- (kernel_ulong_t)NULL}, /* Skylake CPUID Signature */
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_MOBILE, X86_FEATURE_MWAIT,
+ (kernel_ulong_t)NULL},
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_DESKTOP, X86_FEATURE_MWAIT,
+ (kernel_ulong_t)NULL},
{}
};
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1a827ce..7921f4f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -517,6 +517,11 @@
struct perf_cgroup;
struct ring_buffer;
+struct pmu_event_list {
+ raw_spinlock_t lock;
+ struct list_head list;
+};
+
/**
* struct perf_event - performance event kernel representation:
*/
@@ -675,6 +680,7 @@
int cgrp_defer_enabled;
#endif
+ struct list_head sb_list;
#endif /* CONFIG_PERF_EVENTS */
};
@@ -1074,7 +1080,7 @@
extern struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
u32 max_stack, bool crosstask, bool add_mark);
-extern int get_callchain_buffers(void);
+extern int get_callchain_buffers(int max_stack);
extern void put_callchain_buffers(void);
extern int sysctl_perf_event_max_stack;
@@ -1326,6 +1332,13 @@
const char *event_str;
};
+struct perf_pmu_events_ht_attr {
+ struct device_attribute attr;
+ u64 id;
+ const char *event_str_ht;
+ const char *event_str_noht;
+};
+
ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 36ce552..c66a485 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -276,6 +276,9 @@
/*
* Hardware event_id to monitor via a performance monitoring event:
+ *
+ * @sample_max_stack: Max number of frame pointers in a callchain,
+ * should be < /proc/sys/kernel/perf_event_max_stack
*/
struct perf_event_attr {
@@ -385,7 +388,8 @@
* Wakeup watermark for AUX area
*/
__u32 aux_watermark;
- __u32 __reserved_2; /* align to __u64 */
+ __u16 sample_max_stack;
+ __u16 __reserved_2; /* align to __u64 */
};
#define perf_flags(attr) (*(&(attr)->read_format + 1))
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 080a2df..bf4495f 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -99,7 +99,7 @@
if (err)
goto free_smap;
- err = get_callchain_buffers();
+ err = get_callchain_buffers(sysctl_perf_event_max_stack);
if (err)
goto free_smap;
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 179ef46..e9fdb52 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -104,7 +104,7 @@
return -ENOMEM;
}
-int get_callchain_buffers(void)
+int get_callchain_buffers(int event_max_stack)
{
int err = 0;
int count;
@@ -121,6 +121,15 @@
/* If the allocation failed, give up */
if (!callchain_cpus_entries)
err = -ENOMEM;
+ /*
+ * If requesting per event more than the global cap,
+ * return a different error to help userspace figure
+ * this out.
+ *
+ * And also do it here so that we have &callchain_mutex held.
+ */
+ if (event_max_stack > sysctl_perf_event_max_stack)
+ err = -EOVERFLOW;
goto exit;
}
@@ -174,11 +183,12 @@
bool user = !event->attr.exclude_callchain_user;
/* Disallow cross-task user callchains. */
bool crosstask = event->ctx->task && event->ctx->task != current;
+ const u32 max_stack = event->attr.sample_max_stack;
if (!kernel && !user)
return NULL;
- return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true);
+ return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true);
}
struct perf_callchain_entry *
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9c51ec3..9345028 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -335,6 +335,7 @@
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
+static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
@@ -396,6 +397,13 @@
if (ret || !write)
return ret;
+ /*
+ * If throttling is disabled don't allow the write:
+ */
+ if (sysctl_perf_cpu_time_max_percent == 100 ||
+ sysctl_perf_cpu_time_max_percent == 0)
+ return -EINVAL;
+
max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
update_perf_cpu_limits();
@@ -3665,6 +3673,39 @@
static void ring_buffer_attach(struct perf_event *event,
struct ring_buffer *rb);
+static void detach_sb_event(struct perf_event *event)
+{
+ struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
+
+ raw_spin_lock(&pel->lock);
+ list_del_rcu(&event->sb_list);
+ raw_spin_unlock(&pel->lock);
+}
+
+static bool is_sb_event(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+
+ if (event->parent)
+ return false;
+
+ if (event->attach_state & PERF_ATTACH_TASK)
+ return false;
+
+ if (attr->mmap || attr->mmap_data || attr->mmap2 ||
+ attr->comm || attr->comm_exec ||
+ attr->task ||
+ attr->context_switch)
+ return true;
+ return false;
+}
+
+static void unaccount_pmu_sb_event(struct perf_event *event)
+{
+ if (is_sb_event(event))
+ detach_sb_event(event);
+}
+
static void unaccount_event_cpu(struct perf_event *event, int cpu)
{
if (event->parent)
@@ -3728,6 +3769,8 @@
}
unaccount_event_cpu(event, event->cpu);
+
+ unaccount_pmu_sb_event(event);
}
static void perf_sched_delayed(struct work_struct *work)
@@ -5854,11 +5897,11 @@
perf_output_end(&handle);
}
-typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
+typedef void (perf_iterate_f)(struct perf_event *event, void *data);
static void
-perf_event_aux_ctx(struct perf_event_context *ctx,
- perf_event_aux_output_cb output,
+perf_iterate_ctx(struct perf_event_context *ctx,
+ perf_iterate_f output,
void *data, bool all)
{
struct perf_event *event;
@@ -5875,52 +5918,55 @@
}
}
-static void
-perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
- struct perf_event_context *task_ctx)
+static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
{
- rcu_read_lock();
- preempt_disable();
- perf_event_aux_ctx(task_ctx, output, data, false);
- preempt_enable();
- rcu_read_unlock();
+ struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
+ struct perf_event *event;
+
+ list_for_each_entry_rcu(event, &pel->list, sb_list) {
+ if (event->state < PERF_EVENT_STATE_INACTIVE)
+ continue;
+ if (!event_filter_match(event))
+ continue;
+ output(event, data);
+ }
}
+/*
+ * Iterate all events that need to receive side-band events.
+ *
+ * For new callers; ensure that account_pmu_sb_event() includes
+ * your event, otherwise it might not get delivered.
+ */
static void
-perf_event_aux(perf_event_aux_output_cb output, void *data,
+perf_iterate_sb(perf_iterate_f output, void *data,
struct perf_event_context *task_ctx)
{
- struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
- struct pmu *pmu;
int ctxn;
+ rcu_read_lock();
+ preempt_disable();
+
/*
- * If we have task_ctx != NULL we only notify
- * the task context itself. The task_ctx is set
- * only for EXIT events before releasing task
+ * If we have task_ctx != NULL we only notify the task context itself.
+ * The task_ctx is set only for EXIT events before releasing task
* context.
*/
if (task_ctx) {
- perf_event_aux_task_ctx(output, data, task_ctx);
- return;
+ perf_iterate_ctx(task_ctx, output, data, false);
+ goto done;
}
- rcu_read_lock();
- list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
- if (cpuctx->unique_pmu != pmu)
- goto next;
- perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
- ctxn = pmu->task_ctx_nr;
- if (ctxn < 0)
- goto next;
+ perf_iterate_sb_cpu(output, data);
+
+ for_each_task_context_nr(ctxn) {
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
if (ctx)
- perf_event_aux_ctx(ctx, output, data, false);
-next:
- put_cpu_ptr(pmu->pmu_cpu_context);
+ perf_iterate_ctx(ctx, output, data, false);
}
+done:
+ preempt_enable();
rcu_read_unlock();
}
@@ -5969,7 +6015,7 @@
perf_event_enable_on_exec(ctxn);
- perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL,
+ perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL,
true);
}
rcu_read_unlock();
@@ -6013,9 +6059,9 @@
};
rcu_read_lock();
- perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
+ perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
if (cpuctx->task_ctx)
- perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
+ perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop,
&ro, false);
rcu_read_unlock();
@@ -6144,7 +6190,7 @@
},
};
- perf_event_aux(perf_event_task_output,
+ perf_iterate_sb(perf_event_task_output,
&task_event,
task_ctx);
}
@@ -6223,7 +6269,7 @@
comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
- perf_event_aux(perf_event_comm_output,
+ perf_iterate_sb(perf_event_comm_output,
comm_event,
NULL);
}
@@ -6454,7 +6500,7 @@
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
- perf_event_aux(perf_event_mmap_output,
+ perf_iterate_sb(perf_event_mmap_output,
mmap_event,
NULL);
@@ -6537,7 +6583,7 @@
if (!ctx)
continue;
- perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
+ perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true);
}
rcu_read_unlock();
}
@@ -6724,7 +6770,7 @@
},
};
- perf_event_aux(perf_event_switch_output,
+ perf_iterate_sb(perf_event_switch_output,
&switch_event,
NULL);
}
@@ -8646,6 +8692,28 @@
return pmu;
}
+static void attach_sb_event(struct perf_event *event)
+{
+ struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
+
+ raw_spin_lock(&pel->lock);
+ list_add_rcu(&event->sb_list, &pel->list);
+ raw_spin_unlock(&pel->lock);
+}
+
+/*
+ * We keep a list of all !task (and therefore per-cpu) events
+ * that need to receive side-band records.
+ *
+ * This avoids having to scan all the various PMU per-cpu contexts
+ * looking for them.
+ */
+static void account_pmu_sb_event(struct perf_event *event)
+{
+ if (is_sb_event(event))
+ attach_sb_event(event);
+}
+
static void account_event_cpu(struct perf_event *event, int cpu)
{
if (event->parent)
@@ -8726,6 +8794,8 @@
enabled:
account_event_cpu(event, event->cpu);
+
+ account_pmu_sb_event(event);
}
/*
@@ -8874,7 +8944,7 @@
if (!event->parent) {
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
- err = get_callchain_buffers();
+ err = get_callchain_buffers(attr->sample_max_stack);
if (err)
goto err_addr_filters;
}
@@ -9196,6 +9266,9 @@
return -EINVAL;
}
+ if (!attr.sample_max_stack)
+ attr.sample_max_stack = sysctl_perf_event_max_stack;
+
/*
* In cgroup mode, the pid argument is used to pass the fd
* opened to the cgroup directory in cgroupfs. The cpu argument
@@ -9269,7 +9342,7 @@
if (is_sampling_event(event)) {
if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
- err = -ENOTSUPP;
+ err = -EOPNOTSUPP;
goto err_alloc;
}
}
@@ -10231,6 +10304,9 @@
swhash = &per_cpu(swevent_htable, cpu);
mutex_init(&swhash->hlist_mutex);
INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
+
+ INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
+ raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
}
}
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index 316f308..67ff93e 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -10,6 +10,7 @@
CC = $(CROSS_COMPILE)gcc
AR = $(CROSS_COMPILE)ar
+LD = $(CROSS_COMPILE)ld
MAKEFLAGS += --no-print-directory
diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c
index 0e636c4..b0a035f 100644
--- a/tools/lib/api/fd/array.c
+++ b/tools/lib/api/fd/array.c
@@ -85,7 +85,8 @@
}
int fdarray__filter(struct fdarray *fda, short revents,
- void (*entry_destructor)(struct fdarray *fda, int fd))
+ void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
+ void *arg)
{
int fd, nr = 0;
@@ -95,7 +96,7 @@
for (fd = 0; fd < fda->nr; ++fd) {
if (fda->entries[fd].revents & revents) {
if (entry_destructor)
- entry_destructor(fda, fd);
+ entry_destructor(fda, fd, arg);
continue;
}
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
index 45db018..e87fd80 100644
--- a/tools/lib/api/fd/array.h
+++ b/tools/lib/api/fd/array.h
@@ -34,7 +34,8 @@
int fdarray__add(struct fdarray *fda, int fd, short revents);
int fdarray__poll(struct fdarray *fda, int timeout);
int fdarray__filter(struct fdarray *fda, short revents,
- void (*entry_destructor)(struct fdarray *fda, int fd));
+ void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
+ void *arg);
int fdarray__grow(struct fdarray *fda, int extra);
int fdarray__fprintf(struct fdarray *fda, FILE *fp);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7e543c3..462e526 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1186,20 +1186,14 @@
return next;
}
-const char *
-bpf_object__get_name(struct bpf_object *obj)
+const char *bpf_object__name(struct bpf_object *obj)
{
- if (!obj)
- return ERR_PTR(-EINVAL);
- return obj->path;
+ return obj ? obj->path : ERR_PTR(-EINVAL);
}
-unsigned int
-bpf_object__get_kversion(struct bpf_object *obj)
+unsigned int bpf_object__kversion(struct bpf_object *obj)
{
- if (!obj)
- return 0;
- return obj->kern_version;
+ return obj ? obj->kern_version : 0;
}
struct bpf_program *
@@ -1224,9 +1218,8 @@
return &obj->programs[idx];
}
-int bpf_program__set_private(struct bpf_program *prog,
- void *priv,
- bpf_program_clear_priv_t clear_priv)
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+ bpf_program_clear_priv_t clear_priv)
{
if (prog->priv && prog->clear_priv)
prog->clear_priv(prog, prog->priv);
@@ -1236,10 +1229,9 @@
return 0;
}
-int bpf_program__get_private(struct bpf_program *prog, void **ppriv)
+void *bpf_program__priv(struct bpf_program *prog)
{
- *ppriv = prog->priv;
- return 0;
+ return prog ? prog->priv : ERR_PTR(-EINVAL);
}
const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
@@ -1311,32 +1303,23 @@
return fd;
}
-int bpf_map__get_fd(struct bpf_map *map)
+int bpf_map__fd(struct bpf_map *map)
{
- if (!map)
- return -EINVAL;
-
- return map->fd;
+ return map ? map->fd : -EINVAL;
}
-int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef)
+const struct bpf_map_def *bpf_map__def(struct bpf_map *map)
{
- if (!map || !pdef)
- return -EINVAL;
-
- *pdef = map->def;
- return 0;
+ return map ? &map->def : ERR_PTR(-EINVAL);
}
-const char *bpf_map__get_name(struct bpf_map *map)
+const char *bpf_map__name(struct bpf_map *map)
{
- if (!map)
- return NULL;
- return map->name;
+ return map ? map->name : NULL;
}
-int bpf_map__set_private(struct bpf_map *map, void *priv,
- bpf_map_clear_priv_t clear_priv)
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv)
{
if (!map)
return -EINVAL;
@@ -1351,14 +1334,9 @@
return 0;
}
-int bpf_map__get_private(struct bpf_map *map, void **ppriv)
+void *bpf_map__priv(struct bpf_map *map)
{
- if (!map)
- return -EINVAL;
-
- if (ppriv)
- *ppriv = map->priv;
- return 0;
+ return map ? map->priv : ERR_PTR(-EINVAL);
}
struct bpf_map *
@@ -1389,7 +1367,7 @@
}
struct bpf_map *
-bpf_object__get_map_by_name(struct bpf_object *obj, const char *name)
+bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
{
struct bpf_map *pos;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index a51594c..722f46b 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -55,8 +55,8 @@
/* Load/unload object into/from kernel */
int bpf_object__load(struct bpf_object *obj);
int bpf_object__unload(struct bpf_object *obj);
-const char *bpf_object__get_name(struct bpf_object *obj);
-unsigned int bpf_object__get_kversion(struct bpf_object *obj);
+const char *bpf_object__name(struct bpf_object *obj);
+unsigned int bpf_object__kversion(struct bpf_object *obj);
struct bpf_object *bpf_object__next(struct bpf_object *prev);
#define bpf_object__for_each_safe(pos, tmp) \
@@ -78,11 +78,10 @@
typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
void *);
-int bpf_program__set_private(struct bpf_program *prog, void *priv,
- bpf_program_clear_priv_t clear_priv);
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+ bpf_program_clear_priv_t clear_priv);
-int bpf_program__get_private(struct bpf_program *prog,
- void **ppriv);
+void *bpf_program__priv(struct bpf_program *prog);
const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
@@ -171,7 +170,7 @@
*/
struct bpf_map;
struct bpf_map *
-bpf_object__get_map_by_name(struct bpf_object *obj, const char *name);
+bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
struct bpf_map *
bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
@@ -180,13 +179,13 @@
(pos) != NULL; \
(pos) = bpf_map__next((pos), (obj)))
-int bpf_map__get_fd(struct bpf_map *map);
-int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef);
-const char *bpf_map__get_name(struct bpf_map *map);
+int bpf_map__fd(struct bpf_map *map);
+const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
+const char *bpf_map__name(struct bpf_map *map);
typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
-int bpf_map__set_private(struct bpf_map *map, void *priv,
- bpf_map_clear_priv_t clear_priv);
-int bpf_map__get_private(struct bpf_map *map, void **ppriv);
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv);
+void *bpf_map__priv(struct bpf_map *map);
#endif
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 3d1bb80..3db3db9 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -30,3 +30,4 @@
*.pyo
.config-detected
util/intel-pt-decoder/inat-tables.c
+arch/*/include/generated/
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 04f23b4..d96ccd4 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -204,6 +204,38 @@
--no-aggr::
Do not aggregate counts across all monitored CPUs.
+--topdown::
+Print top down level 1 metrics if supported by the CPU. This allows to
+determine bottle necks in the CPU pipeline for CPU bound workloads,
+by breaking the cycles consumed down into frontend bound, backend bound,
+bad speculation and retiring.
+
+Frontend bound means that the CPU cannot fetch and decode instructions fast
+enough. Backend bound means that computation or memory access is the bottle
+neck. Bad Speculation means that the CPU wasted cycles due to branch
+mispredictions and similar issues. Retiring means that the CPU computed without
+an apparently bottleneck. The bottleneck is only the real bottleneck
+if the workload is actually bound by the CPU and not by something else.
+
+For best results it is usually a good idea to use it with interval
+mode like -I 1000, as the bottleneck of workloads can change often.
+
+The top down metrics are collected per core instead of per
+CPU thread. Per core mode is automatically enabled
+and -a (global monitoring) is needed, requiring root rights or
+perf.perf_event_paranoid=-1.
+
+Topdown uses the full Performance Monitoring Unit, and needs
+disabling of the NMI watchdog (as root):
+echo 0 > /proc/sys/kernel/nmi_watchdog
+for best results. Otherwise the bottlenecks may be inconsistent
+on workload with changing phases.
+
+This enables --metric-only, unless overriden with --no-metric-only.
+
+To interpret the results it is usually needed to know on which
+CPUs the workload runs on. If needed the CPUs can be forced using
+taskset.
EXAMPLES
--------
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
index d22e3d0..f98da17 100644
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -1,4 +1,4 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o
-libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index e58123a8..02f41db 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,2 +1,2 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o
-libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c
index a87afa9..c116b71 100644
--- a/tools/perf/arch/arm64/util/unwind-libunwind.c
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -1,11 +1,13 @@
+#ifndef REMOTE_UNWIND_LIBUNWIND
#include <errno.h>
#include <libunwind.h>
#include "perf_regs.h"
#include "../../util/unwind.h"
#include "../../util/debug.h"
+#endif
-int libunwind__arch_reg_id(int regnum)
+int LIBUNWIND__ARCH_REG_ID(int regnum)
{
switch (regnum) {
case UNW_AARCH64_X0:
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index e83c8ce..fa090a9 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -102,7 +102,7 @@
* Return architecture name in a normalized form.
* The conversion logic comes from the Makefile.
*/
-static const char *normalize_arch(char *arch)
+const char *normalize_arch(char *arch)
{
if (!strcmp(arch, "x86_64"))
return "x86";
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 7529cfb..6b01c73 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -6,5 +6,6 @@
extern const char *objdump_path;
int perf_env__lookup_objdump(struct perf_env *env);
+const char *normalize_arch(char *arch);
#endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 4659703..f95e6f4 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -3,11 +3,12 @@
libperf-y += pmu.o
libperf-y += kvm-stat.o
libperf-y += perf_regs.o
+libperf-y += group.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
-libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c
new file mode 100644
index 0000000..37f92aa
--- /dev/null
+++ b/tools/perf/arch/x86/util/group.c
@@ -0,0 +1,27 @@
+#include <stdio.h>
+#include "api/fs/fs.h"
+#include "util/group.h"
+
+/*
+ * Check whether we can use a group for top down.
+ * Without a group may get bad results due to multiplexing.
+ */
+bool arch_topdown_check_group(bool *warn)
+{
+ int n;
+
+ if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
+ return false;
+ if (n > 0) {
+ *warn = true;
+ return false;
+ }
+ return true;
+}
+
+void arch_topdown_group_warn(void)
+{
+ fprintf(stderr,
+ "nmi_watchdog enabled with topdown. May give wrong results.\n"
+ "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
+}
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 357f1b1..2e5567c 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -62,6 +62,8 @@
struct perf_tsc_conversion tc;
int err;
+ if (!pc)
+ return 0;
err = perf_read_tsc_conversion(pc, &tc);
if (err == -EOPNOTSUPP)
return 0;
diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c
index db25e93..4f16661 100644
--- a/tools/perf/arch/x86/util/unwind-libunwind.c
+++ b/tools/perf/arch/x86/util/unwind-libunwind.c
@@ -1,12 +1,14 @@
+#ifndef REMOTE_UNWIND_LIBUNWIND
#include <errno.h>
#include <libunwind.h>
#include "perf_regs.h"
#include "../../util/unwind.h"
#include "../../util/debug.h"
+#endif
#ifdef HAVE_ARCH_X86_64_SUPPORT
-int libunwind__arch_reg_id(int regnum)
+int LIBUNWIND__ARCH_REG_ID(int regnum)
{
int id;
@@ -70,7 +72,7 @@
return id;
}
#else
-int libunwind__arch_reg_id(int regnum)
+int LIBUNWIND__ARCH_REG_ID(int regnum)
{
int id;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index dc3fcb5..d4cf1b0 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -655,6 +655,13 @@
return 0;
}
+static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
+{
+ if (rec->evlist && rec->evlist->mmap && rec->evlist->mmap[0].base)
+ return rec->evlist->mmap[0].base;
+ return NULL;
+}
+
static int record__synthesize(struct record *rec)
{
struct perf_session *session = rec->session;
@@ -692,7 +699,7 @@
}
}
- err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
+ err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
process_synthesized_event, machine);
if (err)
goto out;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index e3ce2f3..4601123 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -339,7 +339,7 @@
*/
static int perf_session__check_output_opt(struct perf_session *session)
{
- int j;
+ unsigned int j;
struct perf_evsel *evsel;
for (j = 0; j < PERF_TYPE_MAX; ++j) {
@@ -388,17 +388,20 @@
struct perf_event_attr *attr;
j = PERF_TYPE_TRACEPOINT;
- evsel = perf_session__find_first_evtype(session, j);
- if (evsel == NULL)
- goto out;
- attr = &evsel->attr;
+ evlist__for_each(session->evlist, evsel) {
+ if (evsel->attr.type != j)
+ continue;
- if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
- output[j].fields |= PERF_OUTPUT_IP;
- output[j].fields |= PERF_OUTPUT_SYM;
- output[j].fields |= PERF_OUTPUT_DSO;
- set_print_ip_opts(attr);
+ attr = &evsel->attr;
+
+ if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
+ output[j].fields |= PERF_OUTPUT_IP;
+ output[j].fields |= PERF_OUTPUT_SYM;
+ output[j].fields |= PERF_OUTPUT_DSO;
+ set_print_ip_opts(attr);
+ goto out;
+ }
}
}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ee7ada7..dff6373 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,10 +59,13 @@
#include "util/thread.h"
#include "util/thread_map.h"
#include "util/counts.h"
+#include "util/group.h"
#include "util/session.h"
#include "util/tool.h"
+#include "util/group.h"
#include "asm/bug.h"
+#include <api/fs/fs.h>
#include <stdlib.h>
#include <sys/prctl.h>
#include <locale.h>
@@ -98,6 +101,15 @@
"}"
};
+static const char * topdown_attrs[] = {
+ "topdown-total-slots",
+ "topdown-slots-retired",
+ "topdown-recovery-bubbles",
+ "topdown-fetch-bubbles",
+ "topdown-slots-issued",
+ NULL,
+};
+
static struct perf_evlist *evsel_list;
static struct target target = {
@@ -112,6 +124,7 @@
static bool null_run = false;
static int detailed_run = 0;
static bool transaction_run;
+static bool topdown_run = false;
static bool big_num = true;
static int big_num_opt = -1;
static const char *csv_sep = NULL;
@@ -124,6 +137,7 @@
static unsigned int unit_width = 4; /* strlen("unit") */
static bool forever = false;
static bool metric_only = false;
+static bool force_metric_only = false;
static struct timespec ref_time;
static struct cpu_map *aggr_map;
static aggr_get_id_t aggr_get_id;
@@ -1302,7 +1316,15 @@
[AGGR_GLOBAL] = 0,
};
-static void print_metric_headers(char *prefix)
+static const char *aggr_header_csv[] = {
+ [AGGR_CORE] = "core,cpus,",
+ [AGGR_SOCKET] = "socket,cpus",
+ [AGGR_NONE] = "cpu,",
+ [AGGR_THREAD] = "comm-pid,",
+ [AGGR_GLOBAL] = ""
+};
+
+static void print_metric_headers(const char *prefix, bool no_indent)
{
struct perf_stat_output_ctx out;
struct perf_evsel *counter;
@@ -1313,9 +1335,15 @@
if (prefix)
fprintf(stat_config.output, "%s", prefix);
- if (!csv_output)
+ if (!csv_output && !no_indent)
fprintf(stat_config.output, "%*s",
aggr_header_lens[stat_config.aggr_mode], "");
+ if (csv_output) {
+ if (stat_config.interval)
+ fputs("time,", stat_config.output);
+ fputs(aggr_header_csv[stat_config.aggr_mode],
+ stat_config.output);
+ }
/* Print metrics headers only */
evlist__for_each(evsel_list, counter) {
@@ -1338,28 +1366,40 @@
sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
- if (num_print_interval == 0 && !csv_output && !metric_only) {
+ if (num_print_interval == 0 && !csv_output) {
switch (stat_config.aggr_mode) {
case AGGR_SOCKET:
- fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit");
+ fprintf(output, "# time socket cpus");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
break;
case AGGR_CORE:
- fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit");
+ fprintf(output, "# time core cpus");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
break;
case AGGR_NONE:
- fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit");
+ fprintf(output, "# time CPU");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
break;
case AGGR_THREAD:
- fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit");
+ fprintf(output, "# time comm-pid");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
break;
case AGGR_GLOBAL:
default:
- fprintf(output, "# time counts %*s events\n", unit_width, "unit");
+ fprintf(output, "# time");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
case AGGR_UNSET:
break;
}
}
+ if (num_print_interval == 0 && metric_only)
+ print_metric_headers(" ", true);
if (++num_print_interval == 25)
num_print_interval = 0;
}
@@ -1428,8 +1468,8 @@
if (metric_only) {
static int num_print_iv;
- if (num_print_iv == 0)
- print_metric_headers(prefix);
+ if (num_print_iv == 0 && !interval)
+ print_metric_headers(prefix, false);
if (num_print_iv++ == 25)
num_print_iv = 0;
if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
@@ -1520,6 +1560,14 @@
return 0;
}
+static int enable_metric_only(const struct option *opt __maybe_unused,
+ const char *s __maybe_unused, int unset)
+{
+ force_metric_only = true;
+ metric_only = !unset;
+ return 0;
+}
+
static const struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
@@ -1578,8 +1626,10 @@
"aggregate counts per thread", AGGR_THREAD),
OPT_UINTEGER('D', "delay", &initial_delay,
"ms to wait before starting measurement after program start"),
- OPT_BOOLEAN(0, "metric-only", &metric_only,
- "Only print computed metrics. No raw values"),
+ OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
+ "Only print computed metrics. No raw values", enable_metric_only),
+ OPT_BOOLEAN(0, "topdown", &topdown_run,
+ "measure topdown level 1 statistics"),
OPT_END()
};
@@ -1772,12 +1822,62 @@
return 0;
}
+static int topdown_filter_events(const char **attr, char **str, bool use_group)
+{
+ int off = 0;
+ int i;
+ int len = 0;
+ char *s;
+
+ for (i = 0; attr[i]; i++) {
+ if (pmu_have_event("cpu", attr[i])) {
+ len += strlen(attr[i]) + 1;
+ attr[i - off] = attr[i];
+ } else
+ off++;
+ }
+ attr[i - off] = NULL;
+
+ *str = malloc(len + 1 + 2);
+ if (!*str)
+ return -1;
+ s = *str;
+ if (i - off == 0) {
+ *s = 0;
+ return 0;
+ }
+ if (use_group)
+ *s++ = '{';
+ for (i = 0; attr[i]; i++) {
+ strcpy(s, attr[i]);
+ s += strlen(s);
+ *s++ = ',';
+ }
+ if (use_group) {
+ s[-1] = '}';
+ *s = 0;
+ } else
+ s[-1] = 0;
+ return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+ *warn = false;
+ return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
+
/*
* Add default attributes, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used:
*/
static int add_default_attributes(void)
{
+ int err;
struct perf_event_attr default_attrs0[] = {
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
@@ -1896,7 +1996,6 @@
return 0;
if (transaction_run) {
- int err;
if (pmu_have_event("cpu", "cycles-ct") &&
pmu_have_event("cpu", "el-start"))
err = parse_events(evsel_list, transaction_attrs, NULL);
@@ -1909,6 +2008,46 @@
return 0;
}
+ if (topdown_run) {
+ char *str = NULL;
+ bool warn = false;
+
+ if (stat_config.aggr_mode != AGGR_GLOBAL &&
+ stat_config.aggr_mode != AGGR_CORE) {
+ pr_err("top down event configuration requires --per-core mode\n");
+ return -1;
+ }
+ stat_config.aggr_mode = AGGR_CORE;
+ if (nr_cgroups || !target__has_cpu(&target)) {
+ pr_err("top down event configuration requires system-wide mode (-a)\n");
+ return -1;
+ }
+
+ if (!force_metric_only)
+ metric_only = true;
+ if (topdown_filter_events(topdown_attrs, &str,
+ arch_topdown_check_group(&warn)) < 0) {
+ pr_err("Out of memory\n");
+ return -1;
+ }
+ if (topdown_attrs[0] && str) {
+ if (warn)
+ arch_topdown_group_warn();
+ err = parse_events(evsel_list, str, NULL);
+ if (err) {
+ fprintf(stderr,
+ "Cannot set up top down events %s: %d\n",
+ str, err);
+ free(str);
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "System does not support topdown\n");
+ return -1;
+ }
+ free(str);
+ }
+
if (!evsel_list->nr_entries) {
if (target__has_cpu(&target))
default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 5ad0255..098874b 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -73,17 +73,25 @@
#
# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
#
+
+libunwind_arch_set_flags = $(eval $(libunwind_arch_set_flags_code))
+define libunwind_arch_set_flags_code
+ FEATURE_CHECK_CFLAGS-libunwind-$(1) = -I$(LIBUNWIND_DIR)/include
+ FEATURE_CHECK_LDFLAGS-libunwind-$(1) = -L$(LIBUNWIND_DIR)/lib
+endef
+
ifdef LIBUNWIND_DIR
LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include
LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
+ LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64
+ $(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch)))
endif
-LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS)
# Set per-feature check compilation flags
FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
-FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
-FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
ifeq ($(NO_PERF_REGS),0)
CFLAGS += -DHAVE_PERF_REGS_SUPPORT
@@ -351,10 +359,40 @@
endif
ifndef NO_LIBUNWIND
+ have_libunwind :=
+
+ ifeq ($(feature-libunwind-x86), 1)
+ $(call detected,CONFIG_LIBUNWIND_X86)
+ CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT
+ LDFLAGS += -lunwind-x86
+ have_libunwind = 1
+ endif
+
+ ifeq ($(feature-libunwind-aarch64), 1)
+ $(call detected,CONFIG_LIBUNWIND_AARCH64)
+ CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT
+ LDFLAGS += -lunwind-aarch64
+ have_libunwind = 1
+ $(call feature_check,libunwind-debug-frame-aarch64)
+ ifneq ($(feature-libunwind-debug-frame-aarch64), 1)
+ msg := $(warning No debug_frame support found in libunwind-aarch64);
+ CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME_AARCH64
+ endif
+ endif
+
ifneq ($(feature-libunwind), 1)
msg := $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR);
+ NO_LOCAL_LIBUNWIND := 1
+ else
+ have_libunwind := 1
+ $(call detected,CONFIG_LOCAL_LIBUNWIND)
+ endif
+
+ ifneq ($(have_libunwind), 1)
NO_LIBUNWIND := 1
endif
+else
+ NO_LOCAL_LIBUNWIND := 1
endif
ifndef NO_LIBBPF
@@ -392,7 +430,7 @@
NO_DWARF_UNWIND := 1
endif
-ifndef NO_LIBUNWIND
+ifndef NO_LOCAL_LIBUNWIND
ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
$(call feature_check,libunwind-debug-frame)
ifneq ($(feature-libunwind-debug-frame), 1)
@@ -403,8 +441,12 @@
# non-ARM has no dwarf_find_debug_frame() function:
CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
endif
- CFLAGS += -DHAVE_LIBUNWIND_SUPPORT
EXTLIBS += $(LIBUNWIND_LIBS)
+ LDFLAGS += $(LIBUNWIND_LIBS)
+endif
+
+ifndef NO_LIBUNWIND
+ CFLAGS += -DHAVE_LIBUNWIND_SUPPORT
CFLAGS += $(LIBUNWIND_CFLAGS)
LDFLAGS += $(LIBUNWIND_LDFLAGS)
endif
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index c809463..59dbd05 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -36,7 +36,7 @@
}
fdarray__init_revents(fda, POLLIN);
- nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
if (nr_fds != fda->nr_alloc) {
pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything",
nr_fds, fda->nr_alloc);
@@ -44,7 +44,7 @@
}
fdarray__init_revents(fda, POLLHUP);
- nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
if (nr_fds != 0) {
pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds",
nr_fds, fda->nr_alloc);
@@ -57,7 +57,7 @@
pr_debug("\nfiltering all but fda->entries[2]:");
fdarray__fprintf_prefix(fda, "before", stderr);
- nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
fdarray__fprintf_prefix(fda, " after", stderr);
if (nr_fds != 1) {
pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds);
@@ -78,7 +78,7 @@
pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
fdarray__fprintf_prefix(fda, "before", stderr);
- nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
fdarray__fprintf_prefix(fda, " after", stderr);
if (nr_fds != 2) {
pr_debug("\nfdarray__filter()=%d != 2, should have left just two events",
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 7865f68..b2a2c74 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1783,8 +1783,8 @@
struct evlist_test e;
char name[MAX_NAME];
- if (!strcmp(ent->d_name, ".") ||
- !strcmp(ent->d_name, ".."))
+ /* Names containing . are special and cannot be used directly */
+ if (strchr(ent->d_name, '.'))
continue;
snprintf(name, MAX_NAME, "cpu/event=%s/u", ent->d_name);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8c6c8a0..fced833 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -99,7 +99,10 @@
libperf-$(CONFIG_DWARF) += dwarf-aux.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
+libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 493307d..dcc8845 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -339,7 +339,7 @@
}
pr_debug("bpf: config '%s' is ok\n", config_str);
- err = bpf_program__set_private(prog, priv, clear_prog_priv);
+ err = bpf_program__set_priv(prog, priv, clear_prog_priv);
if (err) {
pr_debug("Failed to set priv for program '%s'\n", config_str);
goto errout;
@@ -380,15 +380,14 @@
struct bpf_insn *orig_insns, int orig_insns_cnt,
struct bpf_prog_prep_result *res)
{
+ struct bpf_prog_priv *priv = bpf_program__priv(prog);
struct probe_trace_event *tev;
struct perf_probe_event *pev;
- struct bpf_prog_priv *priv;
struct bpf_insn *buf;
size_t prologue_cnt = 0;
int i, err;
- err = bpf_program__get_private(prog, (void **)&priv);
- if (err || !priv)
+ if (IS_ERR(priv) || !priv)
goto errout;
pev = &priv->pev;
@@ -535,13 +534,12 @@
static int hook_load_preprocessor(struct bpf_program *prog)
{
+ struct bpf_prog_priv *priv = bpf_program__priv(prog);
struct perf_probe_event *pev;
- struct bpf_prog_priv *priv;
bool need_prologue = false;
int err, i;
- err = bpf_program__get_private(prog, (void **)&priv);
- if (err || !priv) {
+ if (IS_ERR(priv) || !priv) {
pr_debug("Internal error when hook preprocessor\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -607,9 +605,11 @@
if (err)
goto out;
- err = bpf_program__get_private(prog, (void **)&priv);
- if (err || !priv)
+ priv = bpf_program__priv(prog);
+ if (IS_ERR(priv) || !priv) {
+ err = PTR_ERR(priv);
goto out;
+ }
pev = &priv->pev;
err = convert_perf_probe_events(pev, 1);
@@ -645,13 +645,12 @@
{
int err, ret = 0;
struct bpf_program *prog;
- struct bpf_prog_priv *priv;
bpf_object__for_each_program(prog, obj) {
+ struct bpf_prog_priv *priv = bpf_program__priv(prog);
int i;
- err = bpf_program__get_private(prog, (void **)&priv);
- if (err || !priv)
+ if (IS_ERR(priv) || !priv)
continue;
for (i = 0; i < priv->pev.ntevs; i++) {
@@ -702,14 +701,12 @@
int err;
bpf_object__for_each_program(prog, obj) {
+ struct bpf_prog_priv *priv = bpf_program__priv(prog);
struct probe_trace_event *tev;
struct perf_probe_event *pev;
- struct bpf_prog_priv *priv;
int i, fd;
- err = bpf_program__get_private(prog,
- (void **)&priv);
- if (err || !priv) {
+ if (IS_ERR(priv) || !priv) {
pr_debug("bpf: failed to get private field\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -897,15 +894,12 @@
static int
bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
{
- struct bpf_map_priv *priv;
- const char *map_name;
- int err;
+ const char *map_name = bpf_map__name(map);
+ struct bpf_map_priv *priv = bpf_map__priv(map);
- map_name = bpf_map__get_name(map);
- err = bpf_map__get_private(map, (void **)&priv);
- if (err) {
+ if (IS_ERR(priv)) {
pr_debug("Failed to get private from map %s\n", map_name);
- return err;
+ return PTR_ERR(priv);
}
if (!priv) {
@@ -916,7 +910,7 @@
}
INIT_LIST_HEAD(&priv->ops_list);
- if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) {
+ if (bpf_map__set_priv(map, priv, bpf_map_priv__clear)) {
free(priv);
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -948,30 +942,26 @@
__bpf_map__config_value(struct bpf_map *map,
struct parse_events_term *term)
{
- struct bpf_map_def def;
struct bpf_map_op *op;
- const char *map_name;
- int err;
+ const char *map_name = bpf_map__name(map);
+ const struct bpf_map_def *def = bpf_map__def(map);
- map_name = bpf_map__get_name(map);
-
- err = bpf_map__get_def(map, &def);
- if (err) {
+ if (IS_ERR(def)) {
pr_debug("Unable to get map definition from '%s'\n",
map_name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
- if (def.type != BPF_MAP_TYPE_ARRAY) {
+ if (def->type != BPF_MAP_TYPE_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
}
- if (def.key_size < sizeof(unsigned int)) {
+ if (def->key_size < sizeof(unsigned int)) {
pr_debug("Map %s has incorrect key size\n", map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
}
- switch (def.value_size) {
+ switch (def->value_size) {
case 1:
case 2:
case 4:
@@ -1014,12 +1004,10 @@
struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
- struct bpf_map_def def;
+ const struct bpf_map_def *def;
struct bpf_map_op *op;
- const char *map_name;
- int err;
+ const char *map_name = bpf_map__name(map);
- map_name = bpf_map__get_name(map);
evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
if (!evsel) {
pr_debug("Event (for '%s') '%s' doesn't exist\n",
@@ -1027,18 +1015,18 @@
return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
}
- err = bpf_map__get_def(map, &def);
- if (err) {
+ def = bpf_map__def(map);
+ if (IS_ERR(def)) {
pr_debug("Unable to get map definition from '%s'\n",
map_name);
- return err;
+ return PTR_ERR(def);
}
/*
* No need to check key_size and value_size:
* kernel has already checked them.
*/
- if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
map_name);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
@@ -1087,9 +1075,8 @@
const char *map_name)
{
struct parse_events_array *array = &term->array;
- struct bpf_map_def def;
+ const struct bpf_map_def *def;
unsigned int i;
- int err;
if (!array->nr_ranges)
return 0;
@@ -1099,8 +1086,8 @@
return -BPF_LOADER_ERRNO__INTERNAL;
}
- err = bpf_map__get_def(map, &def);
- if (err) {
+ def = bpf_map__def(map);
+ if (IS_ERR(def)) {
pr_debug("ERROR: Unable to get map definition from '%s'\n",
map_name);
return -BPF_LOADER_ERRNO__INTERNAL;
@@ -1111,7 +1098,7 @@
size_t length = array->ranges[i].length;
unsigned int idx = start + length - 1;
- if (idx >= def.max_entries) {
+ if (idx >= def->max_entries) {
pr_debug("ERROR: index %d too large\n", idx);
return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
}
@@ -1147,7 +1134,7 @@
goto out;
}
- map = bpf_object__get_map_by_name(obj, map_name);
+ map = bpf_object__find_map_by_name(obj, map_name);
if (!map) {
pr_debug("ERROR: Map %s doesn't exist\n", map_name);
err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
@@ -1204,14 +1191,14 @@
}
typedef int (*map_config_func_t)(const char *name, int map_fd,
- struct bpf_map_def *pdef,
+ const struct bpf_map_def *pdef,
struct bpf_map_op *op,
void *pkey, void *arg);
static int
foreach_key_array_all(map_config_func_t func,
void *arg, const char *name,
- int map_fd, struct bpf_map_def *pdef,
+ int map_fd, const struct bpf_map_def *pdef,
struct bpf_map_op *op)
{
unsigned int i;
@@ -1231,7 +1218,7 @@
static int
foreach_key_array_ranges(map_config_func_t func, void *arg,
const char *name, int map_fd,
- struct bpf_map_def *pdef,
+ const struct bpf_map_def *pdef,
struct bpf_map_op *op)
{
unsigned int i, j;
@@ -1261,15 +1248,12 @@
void *arg)
{
int err, map_fd;
- const char *name;
struct bpf_map_op *op;
- struct bpf_map_def def;
- struct bpf_map_priv *priv;
+ const struct bpf_map_def *def;
+ const char *name = bpf_map__name(map);
+ struct bpf_map_priv *priv = bpf_map__priv(map);
- name = bpf_map__get_name(map);
-
- err = bpf_map__get_private(map, (void **)&priv);
- if (err) {
+ if (IS_ERR(priv)) {
pr_debug("ERROR: failed to get private from map %s\n", name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
@@ -1278,29 +1262,29 @@
return 0;
}
- err = bpf_map__get_def(map, &def);
- if (err) {
+ def = bpf_map__def(map);
+ if (IS_ERR(def)) {
pr_debug("ERROR: failed to get definition from map %s\n", name);
return -BPF_LOADER_ERRNO__INTERNAL;
}
- map_fd = bpf_map__get_fd(map);
+ map_fd = bpf_map__fd(map);
if (map_fd < 0) {
pr_debug("ERROR: failed to get fd from map %s\n", name);
return map_fd;
}
list_for_each_entry(op, &priv->ops_list, list) {
- switch (def.type) {
+ switch (def->type) {
case BPF_MAP_TYPE_ARRAY:
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
switch (op->key_type) {
case BPF_MAP_KEY_ALL:
err = foreach_key_array_all(func, arg, name,
- map_fd, &def, op);
+ map_fd, def, op);
break;
case BPF_MAP_KEY_RANGES:
err = foreach_key_array_ranges(func, arg, name,
- map_fd, &def,
+ map_fd, def,
op);
break;
default:
@@ -1410,7 +1394,7 @@
static int
apply_obj_config_map_for_key(const char *name, int map_fd,
- struct bpf_map_def *pdef __maybe_unused,
+ const struct bpf_map_def *pdef,
struct bpf_map_op *op,
void *pkey, void *arg __maybe_unused)
{
@@ -1475,9 +1459,9 @@
#define bpf__for_each_stdout_map(pos, obj, objtmp) \
bpf__for_each_map(pos, obj, objtmp) \
- if (bpf_map__get_name(pos) && \
+ if (bpf_map__name(pos) && \
(strcmp("__bpf_stdout__", \
- bpf_map__get_name(pos)) == 0))
+ bpf_map__name(pos)) == 0))
int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
{
@@ -1489,10 +1473,9 @@
bool need_init = false;
bpf__for_each_stdout_map(map, obj, tmp) {
- struct bpf_map_priv *priv;
+ struct bpf_map_priv *priv = bpf_map__priv(map);
- err = bpf_map__get_private(map, (void **)&priv);
- if (err)
+ if (IS_ERR(priv))
return -BPF_LOADER_ERRNO__INTERNAL;
/*
@@ -1520,10 +1503,9 @@
}
bpf__for_each_stdout_map(map, obj, tmp) {
- struct bpf_map_priv *priv;
+ struct bpf_map_priv *priv = bpf_map__priv(map);
- err = bpf_map__get_private(map, (void **)&priv);
- if (err)
+ if (IS_ERR(priv))
return -BPF_LOADER_ERRNO__INTERNAL;
if (priv)
continue;
@@ -1533,7 +1515,7 @@
if (!priv)
return -ENOMEM;
- err = bpf_map__set_private(map, priv, bpf_map_priv__clear);
+ err = bpf_map__set_priv(map, priv, bpf_map_priv__clear);
if (err) {
bpf_map_priv__clear(map, priv);
return err;
@@ -1677,7 +1659,7 @@
{
bpf__strerror_head(err, buf, size);
case LIBBPF_ERRNO__KVER: {
- unsigned int obj_kver = bpf_object__get_kversion(obj);
+ unsigned int obj_kver = bpf_object__kversion(obj);
unsigned int real_kver;
if (fetch_kernel_version(&real_kver, NULL, 0)) {
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 67e5966..20aef90 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -144,7 +144,29 @@
return ret;
}
-static char *build_id__filename(const char *sbuild_id, char *bf, size_t size)
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+ size_t size)
+{
+ bool retry_old = true;
+
+ snprintf(bf, size, "%s/%s/%s/kallsyms",
+ buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+retry:
+ if (!access(bf, F_OK))
+ return bf;
+ if (retry_old) {
+ /* Try old style kallsyms cache */
+ snprintf(bf, size, "%s/%s/%s",
+ buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+ retry_old = false;
+ goto retry;
+ }
+
+ return NULL;
+}
+
+static char *build_id_cache__linkname(const char *sbuild_id, char *bf,
+ size_t size)
{
char *tmp = bf;
int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
@@ -154,23 +176,52 @@
return bf;
}
+static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso)
+{
+ return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : "elf");
+}
+
char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size)
{
- char build_id_hex[SBUILD_ID_SIZE];
+ bool is_kallsyms = dso__is_kallsyms((struct dso *)dso);
+ bool is_vdso = dso__is_vdso((struct dso *)dso);
+ char sbuild_id[SBUILD_ID_SIZE];
+ char *linkname;
+ bool alloc = (bf == NULL);
+ int ret;
if (!dso->has_build_id)
return NULL;
- build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex);
- return build_id__filename(build_id_hex, bf, size);
+ build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+ if (!linkname)
+ return NULL;
+
+ /* Check if old style build_id cache */
+ if (is_regular_file(linkname))
+ ret = asnprintf(&bf, size, "%s", linkname);
+ else
+ ret = asnprintf(&bf, size, "%s/%s", linkname,
+ build_id_cache__basename(is_kallsyms, is_vdso));
+ if (ret < 0 || (!alloc && size < (unsigned int)ret))
+ bf = NULL;
+ free(linkname);
+
+ return bf;
}
bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
{
- char *id_name, *ch;
+ char *id_name = NULL, *ch;
struct stat sb;
+ char sbuild_id[SBUILD_ID_SIZE];
- id_name = dso__build_id_filename(dso, bf, size);
+ if (!dso->has_build_id)
+ goto err;
+
+ build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ id_name = build_id_cache__linkname(sbuild_id, NULL, 0);
if (!id_name)
goto err;
if (access(id_name, F_OK))
@@ -194,18 +245,14 @@
if (ch - 3 < bf)
goto err;
+ free(id_name);
return strncmp(".ko", ch - 3, 3) == 0;
err:
- /*
- * If dso__build_id_filename work, get id_name again,
- * because id_name points to bf and is broken.
- */
- if (id_name)
- id_name = dso__build_id_filename(dso, bf, size);
pr_err("Invalid build id: %s\n", id_name ? :
dso->long_name ? :
dso->short_name ? :
"[unknown]");
+ free(id_name);
return false;
}
@@ -341,7 +388,8 @@
}
static char *build_id_cache__dirname_from_path(const char *name,
- bool is_kallsyms, bool is_vdso)
+ bool is_kallsyms, bool is_vdso,
+ const char *sbuild_id)
{
char *realname = (char *)name, *filename;
bool slash = is_kallsyms || is_vdso;
@@ -352,8 +400,9 @@
return NULL;
}
- if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "",
- is_vdso ? DSO__NAME_VDSO : realname) < 0)
+ if (asprintf(&filename, "%s%s%s%s%s", buildid_dir, slash ? "/" : "",
+ is_vdso ? DSO__NAME_VDSO : realname,
+ sbuild_id ? "/" : "", sbuild_id ?: "") < 0)
filename = NULL;
if (!slash)
@@ -368,7 +417,8 @@
char *dir_name;
int ret = 0;
- dir_name = build_id_cache__dirname_from_path(pathname, false, false);
+ dir_name = build_id_cache__dirname_from_path(pathname, false, false,
+ NULL);
if (!dir_name)
return -ENOMEM;
@@ -385,7 +435,7 @@
{
const size_t size = PATH_MAX;
char *realname = NULL, *filename = NULL, *dir_name = NULL,
- *linkname = zalloc(size), *targetname, *tmp;
+ *linkname = zalloc(size), *tmp;
int err = -1;
if (!is_kallsyms) {
@@ -394,14 +444,22 @@
goto out_free;
}
- dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso);
+ dir_name = build_id_cache__dirname_from_path(name, is_kallsyms,
+ is_vdso, sbuild_id);
if (!dir_name)
goto out_free;
+ /* Remove old style build-id cache */
+ if (is_regular_file(dir_name))
+ if (unlink(dir_name))
+ goto out_free;
+
if (mkdir_p(dir_name, 0755))
goto out_free;
- if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) {
+ /* Save the allocated buildid dirname */
+ if (asprintf(&filename, "%s/%s", dir_name,
+ build_id_cache__basename(is_kallsyms, is_vdso)) < 0) {
filename = NULL;
goto out_free;
}
@@ -415,7 +473,7 @@
goto out_free;
}
- if (!build_id__filename(sbuild_id, linkname, size))
+ if (!build_id_cache__linkname(sbuild_id, linkname, size))
goto out_free;
tmp = strrchr(linkname, '/');
*tmp = '\0';
@@ -424,10 +482,10 @@
goto out_free;
*tmp = '/';
- targetname = filename + strlen(buildid_dir) - 5;
- memcpy(targetname, "../..", 5);
+ tmp = dir_name + strlen(buildid_dir) - 5;
+ memcpy(tmp, "../..", 5);
- if (symlink(targetname, linkname) == 0)
+ if (symlink(tmp, linkname) == 0)
err = 0;
out_free:
if (!is_kallsyms)
@@ -452,7 +510,7 @@
bool build_id_cache__cached(const char *sbuild_id)
{
bool ret = false;
- char *filename = build_id__filename(sbuild_id, NULL, 0);
+ char *filename = build_id_cache__linkname(sbuild_id, NULL, 0);
if (filename && !access(filename, F_OK))
ret = true;
@@ -471,7 +529,7 @@
if (filename == NULL || linkname == NULL)
goto out_free;
- if (!build_id__filename(sbuild_id, linkname, size))
+ if (!build_id_cache__linkname(sbuild_id, linkname, size))
goto out_free;
if (access(linkname, F_OK))
@@ -489,7 +547,7 @@
tmp = strrchr(linkname, '/') + 1;
snprintf(tmp, size - (tmp - linkname), "%s", filename);
- if (unlink(linkname))
+ if (rm_rf(linkname))
goto out_free;
err = 0;
@@ -501,7 +559,7 @@
static int dso__cache_build_id(struct dso *dso, struct machine *machine)
{
- bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
+ bool is_kallsyms = dso__is_kallsyms(dso);
bool is_vdso = dso__is_vdso(dso);
const char *name = dso->long_name;
char nm[PATH_MAX];
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 64af3e2..e5435f4 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -14,6 +14,8 @@
int build_id__sprintf(const u8 *build_id, int len, char *bf);
int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+ size_t size);
char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size);
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 65e2a4f..a70f6b5 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -94,6 +94,7 @@
enum perf_call_graph_mode record_mode;
u32 dump_size;
enum chain_mode mode;
+ u16 max_stack;
u32 print_limit;
double min_percent;
sort_chain_func_t sort;
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index dad7d82..8749eca 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -275,7 +275,8 @@
break;
}
}
- die("bad config file line %d in %s", config_linenr, config_file_name);
+ pr_err("bad config file line %d in %s\n", config_linenr, config_file_name);
+ return -1;
}
static int parse_unit_factor(const char *end, unsigned long *val)
@@ -479,16 +480,15 @@
int perf_config(config_fn_t fn, void *data)
{
- int ret = 0, found = 0;
+ int ret = -1;
const char *home = NULL;
/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
if (config_exclusive_filename)
return perf_config_from_file(fn, config_exclusive_filename, data);
if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
- ret += perf_config_from_file(fn, perf_etc_perfconfig(),
- data);
- found += 1;
+ if (perf_config_from_file(fn, perf_etc_perfconfig(), data) < 0)
+ goto out;
}
home = getenv("HOME");
@@ -514,14 +514,12 @@
if (!st.st_size)
goto out_free;
- ret += perf_config_from_file(fn, user_config, data);
- found += 1;
+ ret = perf_config_from_file(fn, user_config, data);
+
out_free:
free(user_config);
}
out:
- if (found == 0)
- return -1;
return ret;
}
@@ -609,8 +607,12 @@
struct perf_config_section *section = NULL;
struct perf_config_item *item = NULL;
struct perf_config_set *set = perf_config_set;
- struct list_head *sections = &set->sections;
+ struct list_head *sections;
+ if (set == NULL)
+ return -1;
+
+ sections = &set->sections;
key = ptr = strdup(var);
if (!key) {
pr_debug("%s: strdup failed\n", __func__);
@@ -641,17 +643,64 @@
out_free:
free(key);
- perf_config_set__delete(set);
return -1;
}
+static int perf_config_set__init(struct perf_config_set *set)
+{
+ int ret = -1;
+ const char *home = NULL;
+
+ /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+ if (config_exclusive_filename)
+ return perf_config_from_file(collect_config, config_exclusive_filename, set);
+ if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
+ if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0)
+ goto out;
+ }
+
+ home = getenv("HOME");
+ if (perf_config_global() && home) {
+ char *user_config = strdup(mkpath("%s/.perfconfig", home));
+ struct stat st;
+
+ if (user_config == NULL) {
+ warning("Not enough memory to process %s/.perfconfig, "
+ "ignoring it.", home);
+ goto out;
+ }
+
+ if (stat(user_config, &st) < 0)
+ goto out_free;
+
+ if (st.st_uid && (st.st_uid != geteuid())) {
+ warning("File %s not owned by current user or root, "
+ "ignoring it.", user_config);
+ goto out_free;
+ }
+
+ if (!st.st_size)
+ goto out_free;
+
+ ret = perf_config_from_file(collect_config, user_config, set);
+
+out_free:
+ free(user_config);
+ }
+out:
+ return ret;
+}
+
struct perf_config_set *perf_config_set__new(void)
{
struct perf_config_set *set = zalloc(sizeof(*set));
if (set) {
INIT_LIST_HEAD(&set->sections);
- perf_config(collect_config, set);
+ if (perf_config_set__init(set) < 0) {
+ perf_config_set__delete(set);
+ set = NULL;
+ }
}
return set;
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index c9a6dc1..b0c2b5c 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -233,17 +233,6 @@
return 0;
}
-static struct thread *get_main_thread(struct machine *machine, struct thread *thread)
-{
- if (thread->pid_ == thread->tid)
- return thread__get(thread);
-
- if (thread->pid_ == -1)
- return NULL;
-
- return machine__find_thread(machine, thread->pid_, thread->pid_);
-}
-
static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
u64 *dso_db_id, u64 *sym_db_id, u64 *offset)
{
@@ -382,7 +371,7 @@
if (err)
return err;
- main_thread = get_main_thread(al->machine, thread);
+ main_thread = thread__main_thread(al->machine, thread);
if (main_thread)
comm = machine__thread_exec_comm(al->machine, main_thread);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 0953280..76d79d0 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -349,6 +349,11 @@
dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE;
}
+static inline bool dso__is_kallsyms(struct dso *dso)
+{
+ return dso->kernel && dso->long_name[0] != '/';
+}
+
void dso__free_a2l(struct dso *dso);
enum dso_type dso__type(struct dso *dso, struct machine *machine);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e82ba90..1b918aa0 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -462,9 +462,9 @@
return 0;
}
-static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx)
+static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx, short revent)
{
- int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP);
+ int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
/*
* Save the idx so that when we filter out fds POLLHUP'ed we can
* close the associated evlist->mmap[] entry.
@@ -480,10 +480,11 @@
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
{
- return __perf_evlist__add_pollfd(evlist, fd, -1);
+ return __perf_evlist__add_pollfd(evlist, fd, -1, POLLIN);
}
-static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
+static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
+ void *arg __maybe_unused)
{
struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
@@ -493,7 +494,7 @@
int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
{
return fdarray__filter(&evlist->pollfd, revents_and_mask,
- perf_evlist__munmap_filtered);
+ perf_evlist__munmap_filtered, NULL);
}
int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
@@ -777,7 +778,7 @@
return event;
}
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx)
{
struct perf_mmap *md = &evlist->mmap[idx];
u64 head;
@@ -832,6 +833,13 @@
return perf_mmap__read(md, false, start, end, &md->prev);
}
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+{
+ if (!evlist->backward)
+ return perf_evlist__mmap_read_forward(evlist, idx);
+ return perf_evlist__mmap_read_backward(evlist, idx);
+}
+
void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
{
struct perf_mmap *md = &evlist->mmap[idx];
@@ -856,9 +864,11 @@
static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
{
- BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
+ struct perf_mmap *md = &evlist->mmap[idx];
- if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
+ BUG_ON(md->base && atomic_read(&md->refcnt) == 0);
+
+ if (atomic_dec_and_test(&md->refcnt))
__perf_evlist__munmap(evlist, idx);
}
@@ -936,9 +946,12 @@
if (cpu_map__empty(evlist->cpus))
evlist->nr_mmaps = thread_map__nr(evlist->threads);
evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+ if (!evlist->mmap)
+ return -ENOMEM;
+
for (i = 0; i < evlist->nr_mmaps; i++)
evlist->mmap[i].fd = -1;
- return evlist->mmap != NULL ? 0 : -ENOMEM;
+ return 0;
}
struct mmap_params {
@@ -983,15 +996,28 @@
return 0;
}
+static bool
+perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
+ struct perf_evsel *evsel)
+{
+ if (evsel->overwrite)
+ return false;
+ return true;
+}
+
static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
struct mmap_params *mp, int cpu,
int thread, int *output)
{
struct perf_evsel *evsel;
+ int revent;
evlist__for_each(evlist, evsel) {
int fd;
+ if (evsel->overwrite != (evlist->overwrite && evlist->backward))
+ continue;
+
if (evsel->system_wide && thread)
continue;
@@ -1008,6 +1034,8 @@
perf_evlist__mmap_get(evlist, idx);
}
+ revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;
+
/*
* The system_wide flag causes a selected event to be opened
* always without a pid. Consequently it will never get a
@@ -1016,7 +1044,7 @@
* Therefore don't add it for polling.
*/
if (!evsel->system_wide &&
- __perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
+ __perf_evlist__add_pollfd(evlist, fd, idx, revent) < 0) {
perf_evlist__mmap_put(evlist, idx);
return -1;
}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index d740fb8..68cb136 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -131,6 +131,8 @@
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
+union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist,
+ int idx);
union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
int idx);
void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 5d7037e..9b2e3e6 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -572,6 +572,8 @@
perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+ attr->sample_max_stack = param->max_stack;
+
if (param->record_mode == CALLCHAIN_LBR) {
if (!opts->branch_stack) {
if (attr->exclude_user) {
@@ -635,7 +637,8 @@
struct perf_event_attr *attr = &evsel->attr;
struct callchain_param param;
u32 dump_size = 0;
- char *callgraph_buf = NULL;
+ int max_stack = 0;
+ const char *callgraph_buf = NULL;
/* callgraph default */
param.record_mode = callchain_param.record_mode;
@@ -662,6 +665,9 @@
case PERF_EVSEL__CONFIG_TERM_STACK_USER:
dump_size = term->val.stack_user;
break;
+ case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
+ max_stack = term->val.max_stack;
+ break;
case PERF_EVSEL__CONFIG_TERM_INHERIT:
/*
* attr->inherit should has already been set by
@@ -677,7 +683,12 @@
}
/* User explicitly set per-event callgraph, clear the old setting and reset. */
- if ((callgraph_buf != NULL) || (dump_size > 0)) {
+ if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
+ if (max_stack) {
+ param.max_stack = max_stack;
+ if (callgraph_buf == NULL)
+ callgraph_buf = "fp";
+ }
/* parse callgraph parameters */
if (callgraph_buf != NULL) {
@@ -1329,6 +1340,7 @@
PRINT_ATTRf(clockid, p_signed);
PRINT_ATTRf(sample_regs_intr, p_hex);
PRINT_ATTRf(aux_watermark, p_unsigned);
+ PRINT_ATTRf(sample_max_stack, p_unsigned);
return ret;
}
@@ -2239,17 +2251,11 @@
return sample->raw_data + offset;
}
-u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
- const char *name)
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
+ bool needs_swap)
{
- struct format_field *field = perf_evsel__field(evsel, name);
- void *ptr;
u64 value;
-
- if (!field)
- return 0;
-
- ptr = sample->raw_data + field->offset;
+ void *ptr = sample->raw_data + field->offset;
switch (field->size) {
case 1:
@@ -2267,7 +2273,7 @@
return 0;
}
- if (!evsel->needs_swap)
+ if (!needs_swap)
return value;
switch (field->size) {
@@ -2284,6 +2290,17 @@
return 0;
}
+u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
+ const char *name)
+{
+ struct format_field *field = perf_evsel__field(evsel, name);
+
+ if (!field)
+ return 0;
+
+ return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
+}
+
bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
char *msg, size_t msgsize)
{
@@ -2372,6 +2389,9 @@
"No such device - did you specify an out-of-range profile CPU?");
break;
case EOPNOTSUPP:
+ if (evsel->attr.sample_period != 0)
+ return scnprintf(msg, size, "%s",
+ "PMU Hardware doesn't support sampling/overflow-interrupts.");
if (evsel->attr.precise_ip)
return scnprintf(msg, size, "%s",
"\'precise\' request may not be supported. Try removing 'p' modifier.");
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index c1f1015..828ddd1 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -44,6 +44,7 @@
PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
PERF_EVSEL__CONFIG_TERM_STACK_USER,
PERF_EVSEL__CONFIG_TERM_INHERIT,
+ PERF_EVSEL__CONFIG_TERM_MAX_STACK,
PERF_EVSEL__CONFIG_TERM_MAX,
};
@@ -56,6 +57,7 @@
bool time;
char *callgraph;
u64 stack_user;
+ int max_stack;
bool inherit;
} val;
};
@@ -259,6 +261,8 @@
struct format_field;
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap);
+
struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
#define perf_evsel__match(evsel, t, c) \
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h
new file mode 100644
index 0000000..116debe
--- /dev/null
+++ b/tools/perf/util/group.h
@@ -0,0 +1,7 @@
+#ifndef GROUP_H
+#define GROUP_H 1
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+
+#endif
diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c
new file mode 100644
index 0000000..4fb5395
--- /dev/null
+++ b/tools/perf/util/libunwind/arm64.c
@@ -0,0 +1,35 @@
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the defination of this function is included directly from
+ * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each arm64 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arm64_reg_id(regnum)
+
+#include "unwind.h"
+#include "debug.h"
+#include "libunwind-aarch64.h"
+#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#include "../../arch/arm64/util/unwind-libunwind.c"
+
+/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind,
+ * assign NO_LIBUNWIND_DEBUG_FRAME_AARCH64 to it for compiling arm64
+ * unwind methods.
+ */
+#undef NO_LIBUNWIND_DEBUG_FRAME
+#ifdef NO_LIBUNWIND_DEBUG_FRAME_AARCH64
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+arm64_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c
new file mode 100644
index 0000000..d98c17e
--- /dev/null
+++ b/tools/perf/util/libunwind/x86_32.c
@@ -0,0 +1,37 @@
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the defination of this function is included directly from
+ * 'arch/x86/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each x86 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__x86_reg_id(regnum)
+
+#include "unwind.h"
+#include "debug.h"
+#include "libunwind-x86.h"
+#include <../../../../arch/x86/include/uapi/asm/perf_regs.h>
+
+/* HAVE_ARCH_X86_64_SUPPORT is used in'arch/x86/util/unwind-libunwind.c'
+ * for x86_32, we undef it to compile code for x86_32 only.
+ */
+#undef HAVE_ARCH_X86_64_SUPPORT
+#include "../../arch/x86/util/unwind-libunwind.c"
+
+/* Explicitly define NO_LIBUNWIND_DEBUG_FRAME, because non-ARM has no
+ * dwarf_find_debug_frame() function.
+ */
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+x86_32_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b177218..a0c186a 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1353,11 +1353,16 @@
if (map == NULL)
goto out_problem_map;
- thread__insert_map(thread, map);
+ ret = thread__insert_map(thread, map);
+ if (ret)
+ goto out_problem_insert;
+
thread__put(thread);
map__put(map);
return 0;
+out_problem_insert:
+ map__put(map);
out_problem_map:
thread__put(thread);
out_problem:
@@ -1403,11 +1408,16 @@
if (map == NULL)
goto out_problem_map;
- thread__insert_map(thread, map);
+ ret = thread__insert_map(thread, map);
+ if (ret)
+ goto out_problem_insert;
+
thread__put(thread);
map__put(map);
return 0;
+out_problem_insert:
+ map__put(map);
out_problem_map:
thread__put(thread);
out_problem:
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c6fd047..d15e335 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -900,6 +900,7 @@
[PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size",
[PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit",
[PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit",
+ [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack",
};
static bool config_term_shrinked;
@@ -995,6 +996,9 @@
case PARSE_EVENTS__TERM_TYPE_NAME:
CHECK_TYPE_VAL(STR);
break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ CHECK_TYPE_VAL(NUM);
+ break;
default:
err->str = strdup("unknown term");
err->idx = term->err_term;
@@ -1040,6 +1044,7 @@
case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
case PARSE_EVENTS__TERM_TYPE_INHERIT:
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
return config_term_common(attr, term, err);
default:
if (err) {
@@ -1109,6 +1114,9 @@
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1);
break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
+ break;
default:
break;
}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index d740c3c..46c05cc 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -68,6 +68,7 @@
PARSE_EVENTS__TERM_TYPE_STACKSIZE,
PARSE_EVENTS__TERM_TYPE_NOINHERIT,
PARSE_EVENTS__TERM_TYPE_INHERIT,
+ PARSE_EVENTS__TERM_TYPE_MAX_STACK,
__PARSE_EVENTS__TERM_TYPE_NR,
};
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 1477fbc..3c15b33 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -199,6 +199,7 @@
time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
+max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
, { return ','; }
@@ -259,6 +260,7 @@
cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
L1-dcache|l1-d|l1d|L1-data |
L1-icache|l1-i|l1i|L1-instruction |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5214974..dfedf09 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -593,6 +593,7 @@
if (bswap_safe(f, 0)) \
attr->f = bswap_##sz(attr->f); \
} while(0)
+#define bswap_field_16(f) bswap_field(f, 16)
#define bswap_field_32(f) bswap_field(f, 32)
#define bswap_field_64(f) bswap_field(f, 64)
@@ -608,6 +609,7 @@
bswap_field_64(sample_regs_user);
bswap_field_32(sample_stack_user);
bswap_field_32(aux_watermark);
+ bswap_field_16(sample_max_stack);
/*
* After read_format are bitfields. Check read_format because
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index aa9efe0..8a2bbd2 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -36,6 +36,11 @@
static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
static bool have_frontend_stalled;
struct stats walltime_nsecs_stats;
@@ -82,6 +87,11 @@
sizeof(runtime_transaction_stats));
memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+ memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
+ memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
+ memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
+ memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
+ memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
}
/*
@@ -105,6 +115,16 @@
update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
else if (perf_stat_evsel__is(counter, ELISION_START))
update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
+ update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
+ update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
+ update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
+ update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
+ update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
@@ -302,6 +322,107 @@
out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
}
+/*
+ * High level "TopDown" CPU core pipe line bottleneck break down.
+ *
+ * Basic concept following
+ * Yasin, A Top Down Method for Performance analysis and Counter architecture
+ * ISPASS14
+ *
+ * The CPU pipeline is divided into 4 areas that can be bottlenecks:
+ *
+ * Frontend -> Backend -> Retiring
+ * BadSpeculation in addition means out of order execution that is thrown away
+ * (for example branch mispredictions)
+ * Frontend is instruction decoding.
+ * Backend is execution, like computation and accessing data in memory
+ * Retiring is good execution that is not directly bottlenecked
+ *
+ * The formulas are computed in slots.
+ * A slot is an entry in the pipeline each for the pipeline width
+ * (for example a 4-wide pipeline has 4 slots for each cycle)
+ *
+ * Formulas:
+ * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
+ * TotalSlots
+ * Retiring = SlotsRetired / TotalSlots
+ * FrontendBound = FetchBubbles / TotalSlots
+ * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
+ *
+ * The kernel provides the mapping to the low level CPU events and any scaling
+ * needed for the CPU pipeline width, for example:
+ *
+ * TotalSlots = Cycles * 4
+ *
+ * The scaling factor is communicated in the sysfs unit.
+ *
+ * In some cases the CPU may not be able to measure all the formulas due to
+ * missing events. In this case multiple formulas are combined, as possible.
+ *
+ * Full TopDown supports more levels to sub-divide each area: for example
+ * BackendBound into computing bound and memory bound. For now we only
+ * support Level 1 TopDown.
+ */
+
+static double sanitize_val(double x)
+{
+ if (x < 0 && x >= -0.02)
+ return 0.0;
+ return x;
+}
+
+static double td_total_slots(int ctx, int cpu)
+{
+ return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
+}
+
+static double td_bad_spec(int ctx, int cpu)
+{
+ double bad_spec = 0;
+ double total_slots;
+ double total;
+
+ total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
+ avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
+ avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
+ total_slots = td_total_slots(ctx, cpu);
+ if (total_slots)
+ bad_spec = total / total_slots;
+ return sanitize_val(bad_spec);
+}
+
+static double td_retiring(int ctx, int cpu)
+{
+ double retiring = 0;
+ double total_slots = td_total_slots(ctx, cpu);
+ double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
+
+ if (total_slots)
+ retiring = ret_slots / total_slots;
+ return retiring;
+}
+
+static double td_fe_bound(int ctx, int cpu)
+{
+ double fe_bound = 0;
+ double total_slots = td_total_slots(ctx, cpu);
+ double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
+
+ if (total_slots)
+ fe_bound = fetch_bub / total_slots;
+ return fe_bound;
+}
+
+static double td_be_bound(int ctx, int cpu)
+{
+ double sum = (td_fe_bound(ctx, cpu) +
+ td_bad_spec(ctx, cpu) +
+ td_retiring(ctx, cpu));
+ if (sum == 0)
+ return 0;
+ return sanitize_val(1.0 - sum);
+}
+
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu,
struct perf_stat_output_ctx *out)
@@ -309,6 +430,7 @@
void *ctxp = out->ctx;
print_metric_t print_metric = out->print_metric;
double total, ratio = 0.0, total2;
+ const char *color = NULL;
int ctx = evsel_context(evsel);
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
@@ -452,6 +574,46 @@
avg / ratio);
else
print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
+ double fe_bound = td_fe_bound(ctx, cpu);
+
+ if (fe_bound > 0.2)
+ color = PERF_COLOR_RED;
+ print_metric(ctxp, color, "%8.1f%%", "frontend bound",
+ fe_bound * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
+ double retiring = td_retiring(ctx, cpu);
+
+ if (retiring > 0.7)
+ color = PERF_COLOR_GREEN;
+ print_metric(ctxp, color, "%8.1f%%", "retiring",
+ retiring * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
+ double bad_spec = td_bad_spec(ctx, cpu);
+
+ if (bad_spec > 0.1)
+ color = PERF_COLOR_RED;
+ print_metric(ctxp, color, "%8.1f%%", "bad speculation",
+ bad_spec * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
+ double be_bound = td_be_bound(ctx, cpu);
+ const char *name = "backend bound";
+ static int have_recovery_bubbles = -1;
+
+ /* In case the CPU does not support topdown-recovery-bubbles */
+ if (have_recovery_bubbles < 0)
+ have_recovery_bubbles = pmu_have_event("cpu",
+ "topdown-recovery-bubbles");
+ if (!have_recovery_bubbles)
+ name = "backend bound/bad spec";
+
+ if (be_bound > 0.2)
+ color = PERF_COLOR_RED;
+ if (td_total_slots(ctx, cpu) > 0)
+ print_metric(ctxp, color, "%8.1f%%", name,
+ be_bound * 100.);
+ else
+ print_metric(ctxp, NULL, NULL, name, 0);
} else if (runtime_nsecs_stats[cpu].n != 0) {
char unit = 'M';
char unit_buf[10];
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index ffa1d06..c1ba255 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -79,6 +79,11 @@
ID(TRANSACTION_START, cpu/tx-start/),
ID(ELISION_START, cpu/el-start/),
ID(CYCLES_IN_TX_CP, cpu/cycles-ct/),
+ ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
+ ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
+ ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
+ ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
+ ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
};
#undef ID
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0150e78..c29bb94 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -17,6 +17,11 @@
PERF_STAT_EVSEL_ID__TRANSACTION_START,
PERF_STAT_EVSEL_ID__ELISION_START,
PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
+ PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
+ PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
+ PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
+ PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
+ PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
PERF_STAT_EVSEL_ID__MAX,
};
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 54c4ff2..09c5c34 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1641,6 +1641,20 @@
return ret;
}
+/*
+ * Use open(O_RDONLY) to check readability directly instead of access(R_OK)
+ * since access(R_OK) only checks with real UID/GID but open() use effective
+ * UID/GID and actual capabilities (e.g. /proc/kcore requires CAP_SYS_RAWIO).
+ */
+static bool filename__readable(const char *file)
+{
+ int fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return false;
+ close(fd);
+ return true;
+}
+
static char *dso__find_kallsyms(struct dso *dso, struct map *map)
{
u8 host_build_id[BUILD_ID_SIZE];
@@ -1660,58 +1674,43 @@
sizeof(host_build_id)) == 0)
is_host = dso__build_id_equal(dso, host_build_id);
- build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
-
- scnprintf(path, sizeof(path), "%s/%s/%s", buildid_dir,
- DSO__NAME_KCORE, sbuild_id);
-
- /* Use /proc/kallsyms if possible */
+ /* Try a fast path for /proc/kallsyms if possible */
if (is_host) {
- DIR *d;
- int fd;
-
- /* If no cached kcore go with /proc/kallsyms */
- d = opendir(path);
- if (!d)
- goto proc_kallsyms;
- closedir(d);
-
/*
- * Do not check the build-id cache, until we know we cannot use
- * /proc/kcore.
+ * Do not check the build-id cache, unless we know we cannot use
+ * /proc/kcore or module maps don't match to /proc/kallsyms.
+ * To check readability of /proc/kcore, do not use access(R_OK)
+ * since /proc/kcore requires CAP_SYS_RAWIO to read and access
+ * can't check it.
*/
- fd = open("/proc/kcore", O_RDONLY);
- if (fd != -1) {
- close(fd);
- /* If module maps match go with /proc/kallsyms */
- if (!validate_kcore_addresses("/proc/kallsyms", map))
- goto proc_kallsyms;
- }
-
- /* Find kallsyms in build-id cache with kcore */
- if (!find_matching_kcore(map, path, sizeof(path)))
- return strdup(path);
-
- goto proc_kallsyms;
+ if (filename__readable("/proc/kcore") &&
+ !validate_kcore_addresses("/proc/kallsyms", map))
+ goto proc_kallsyms;
}
+ build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+
/* Find kallsyms in build-id cache with kcore */
+ scnprintf(path, sizeof(path), "%s/%s/%s",
+ buildid_dir, DSO__NAME_KCORE, sbuild_id);
+
if (!find_matching_kcore(map, path, sizeof(path)))
return strdup(path);
- scnprintf(path, sizeof(path), "%s/%s/%s",
- buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+ /* Use current /proc/kallsyms if possible */
+ if (is_host) {
+proc_kallsyms:
+ return strdup("/proc/kallsyms");
+ }
- if (access(path, F_OK)) {
+ /* Finally, find a cache of kallsyms */
+ if (!build_id_cache__kallsyms_path(sbuild_id, path, sizeof(path))) {
pr_err("No kallsyms or vmlinux with build-id %s was found\n",
sbuild_id);
return NULL;
}
return strdup(path);
-
-proc_kallsyms:
- return strdup("/proc/kallsyms");
}
static int dso__load_kernel_sym(struct dso *dso, struct map *map,
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 45fcb71..f30f956 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -43,9 +43,6 @@
thread->cpu = -1;
INIT_LIST_HEAD(&thread->comm_list);
- if (unwind__prepare_access(thread) < 0)
- goto err_thread;
-
comm_str = malloc(32);
if (!comm_str)
goto err_thread;
@@ -201,10 +198,18 @@
map_groups__fprintf(thread->mg, fp);
}
-void thread__insert_map(struct thread *thread, struct map *map)
+int thread__insert_map(struct thread *thread, struct map *map)
{
+ int ret;
+
+ ret = unwind__prepare_access(thread, map);
+ if (ret)
+ return ret;
+
map_groups__fixup_overlappings(thread->mg, map, stderr);
map_groups__insert(thread->mg, map);
+
+ return 0;
}
static int thread__clone_map_groups(struct thread *thread,
@@ -265,3 +270,14 @@
break;
}
}
+
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread)
+{
+ if (thread->pid_ == thread->tid)
+ return thread__get(thread);
+
+ if (thread->pid_ == -1)
+ return NULL;
+
+ return machine__find_thread(machine, thread->pid_, thread->pid_);
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 45fba13..99263cb 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -9,11 +9,9 @@
#include "symbol.h"
#include <strlist.h>
#include <intlist.h>
-#ifdef HAVE_LIBUNWIND_SUPPORT
-#include <libunwind.h>
-#endif
struct thread_stack;
+struct unwind_libunwind_ops;
struct thread {
union {
@@ -36,7 +34,8 @@
void *priv;
struct thread_stack *ts;
#ifdef HAVE_LIBUNWIND_SUPPORT
- unw_addr_space_t addr_space;
+ void *addr_space;
+ struct unwind_libunwind_ops *unwind_libunwind_ops;
#endif
};
@@ -77,10 +76,12 @@
struct comm *thread__comm(const struct thread *thread);
struct comm *thread__exec_comm(const struct thread *thread);
const char *thread__comm_str(const struct thread *thread);
-void thread__insert_map(struct thread *thread, struct map *map);
+int thread__insert_map(struct thread *thread, struct map *map);
int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
size_t thread__fprintf(struct thread *thread, FILE *fp);
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
+
void thread__find_addr_map(struct thread *thread,
u8 cpumode, enum map_type type, u64 addr,
struct addr_location *al);
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
new file mode 100644
index 0000000..01c2e86
--- /dev/null
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -0,0 +1,697 @@
+/*
+ * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps.
+ *
+ * Lots of this code have been borrowed or heavily inspired from parts of
+ * the libunwind 0.99 code which are (amongst other contributors I may have
+ * forgotten):
+ *
+ * Copyright (C) 2002-2007 Hewlett-Packard Co
+ * Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * And the bugs have been added by:
+ *
+ * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com>
+ *
+ */
+
+#include <elf.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <linux/list.h>
+#ifndef REMOTE_UNWIND_LIBUNWIND
+#include <libunwind.h>
+#include <libunwind-ptrace.h>
+#endif
+#include "callchain.h"
+#include "thread.h"
+#include "session.h"
+#include "perf_regs.h"
+#include "unwind.h"
+#include "symbol.h"
+#include "util.h"
+#include "debug.h"
+#include "asm/bug.h"
+
+extern int
+UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+ unw_word_t ip,
+ unw_word_t segbase,
+ const char *obj_name, unw_word_t start,
+ unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */
+#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */
+
+/* Pointer-encoding formats: */
+#define DW_EH_PE_omit 0xff
+#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */
+#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */
+#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */
+#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */
+#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */
+
+/* Pointer-encoding application: */
+#define DW_EH_PE_absptr 0x00 /* absolute value */
+#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */
+
+/*
+ * The following are not documented by LSB v1.3, yet they are used by
+ * GCC, presumably they aren't documented by LSB since they aren't
+ * used on Linux:
+ */
+#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */
+#define DW_EH_PE_aligned 0x50 /* aligned pointer */
+
+/* Flags intentionaly not handled, since they're not needed:
+ * #define DW_EH_PE_indirect 0x80
+ * #define DW_EH_PE_uleb128 0x01
+ * #define DW_EH_PE_udata2 0x02
+ * #define DW_EH_PE_sleb128 0x09
+ * #define DW_EH_PE_sdata2 0x0a
+ * #define DW_EH_PE_textrel 0x20
+ * #define DW_EH_PE_datarel 0x30
+ */
+
+struct unwind_info {
+ struct perf_sample *sample;
+ struct machine *machine;
+ struct thread *thread;
+};
+
+#define dw_read(ptr, type, end) ({ \
+ type *__p = (type *) ptr; \
+ type __v; \
+ if ((__p + 1) > (type *) end) \
+ return -EINVAL; \
+ __v = *__p++; \
+ ptr = (typeof(ptr)) __p; \
+ __v; \
+ })
+
+static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
+ u8 encoding)
+{
+ u8 *cur = *p;
+ *val = 0;
+
+ switch (encoding) {
+ case DW_EH_PE_omit:
+ *val = 0;
+ goto out;
+ case DW_EH_PE_ptr:
+ *val = dw_read(cur, unsigned long, end);
+ goto out;
+ default:
+ break;
+ }
+
+ switch (encoding & DW_EH_PE_APPL_MASK) {
+ case DW_EH_PE_absptr:
+ break;
+ case DW_EH_PE_pcrel:
+ *val = (unsigned long) cur;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if ((encoding & 0x07) == 0x00)
+ encoding |= DW_EH_PE_udata4;
+
+ switch (encoding & DW_EH_PE_FORMAT_MASK) {
+ case DW_EH_PE_sdata4:
+ *val += dw_read(cur, s32, end);
+ break;
+ case DW_EH_PE_udata4:
+ *val += dw_read(cur, u32, end);
+ break;
+ case DW_EH_PE_sdata8:
+ *val += dw_read(cur, s64, end);
+ break;
+ case DW_EH_PE_udata8:
+ *val += dw_read(cur, u64, end);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ out:
+ *p = cur;
+ return 0;
+}
+
+#define dw_read_encoded_value(ptr, end, enc) ({ \
+ u64 __v; \
+ if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \
+ return -EINVAL; \
+ } \
+ __v; \
+ })
+
+static u64 elf_section_offset(int fd, const char *name)
+{
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ u64 offset = 0;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ return 0;
+
+ do {
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ break;
+
+ if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
+ break;
+
+ offset = shdr.sh_offset;
+ } while (0);
+
+ elf_end(elf);
+ return offset;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int elf_is_exec(int fd, const char *name)
+{
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ int retval = 0;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ return 0;
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto out;
+
+ retval = (ehdr.e_type == ET_EXEC);
+
+out:
+ elf_end(elf);
+ pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval);
+ return retval;
+}
+#endif
+
+struct table_entry {
+ u32 start_ip_offset;
+ u32 fde_offset;
+};
+
+struct eh_frame_hdr {
+ unsigned char version;
+ unsigned char eh_frame_ptr_enc;
+ unsigned char fde_count_enc;
+ unsigned char table_enc;
+
+ /*
+ * The rest of the header is variable-length and consists of the
+ * following members:
+ *
+ * encoded_t eh_frame_ptr;
+ * encoded_t fde_count;
+ */
+
+ /* A single encoded pointer should not be more than 8 bytes. */
+ u64 enc[2];
+
+ /*
+ * struct {
+ * encoded_t start_ip;
+ * encoded_t fde_addr;
+ * } binary_search_table[fde_count];
+ */
+ char data[0];
+} __packed;
+
+static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
+ u64 offset, u64 *table_data, u64 *segbase,
+ u64 *fde_count)
+{
+ struct eh_frame_hdr hdr;
+ u8 *enc = (u8 *) &hdr.enc;
+ u8 *end = (u8 *) &hdr.data;
+ ssize_t r;
+
+ r = dso__data_read_offset(dso, machine, offset,
+ (u8 *) &hdr, sizeof(hdr));
+ if (r != sizeof(hdr))
+ return -EINVAL;
+
+ /* We dont need eh_frame_ptr, just skip it. */
+ dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
+
+ *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
+ *segbase = offset;
+ *table_data = (enc - (u8 *) &hdr) + offset;
+ return 0;
+}
+
+static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
+ u64 *table_data, u64 *segbase,
+ u64 *fde_count)
+{
+ int ret = -EINVAL, fd;
+ u64 offset = dso->data.eh_frame_hdr_offset;
+
+ if (offset == 0) {
+ fd = dso__data_get_fd(dso, machine);
+ if (fd < 0)
+ return -EINVAL;
+
+ /* Check the .eh_frame section for unwinding info */
+ offset = elf_section_offset(fd, ".eh_frame_hdr");
+ dso->data.eh_frame_hdr_offset = offset;
+ dso__data_put_fd(dso);
+ }
+
+ if (offset)
+ ret = unwind_spec_ehframe(dso, machine, offset,
+ table_data, segbase,
+ fde_count);
+
+ return ret;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int read_unwind_spec_debug_frame(struct dso *dso,
+ struct machine *machine, u64 *offset)
+{
+ int fd;
+ u64 ofs = dso->data.debug_frame_offset;
+
+ if (ofs == 0) {
+ fd = dso__data_get_fd(dso, machine);
+ if (fd < 0)
+ return -EINVAL;
+
+ /* Check the .debug_frame section for unwinding info */
+ ofs = elf_section_offset(fd, ".debug_frame");
+ dso->data.debug_frame_offset = ofs;
+ dso__data_put_fd(dso);
+ }
+
+ *offset = ofs;
+ if (*offset)
+ return 0;
+
+ return -EINVAL;
+}
+#endif
+
+static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
+{
+ struct addr_location al;
+
+ thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+ MAP__FUNCTION, ip, &al);
+ if (!al.map) {
+ /*
+ * We've seen cases (softice) where DWARF unwinder went
+ * through non executable mmaps, which we need to lookup
+ * in MAP__VARIABLE tree.
+ */
+ thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+ MAP__VARIABLE, ip, &al);
+ }
+ return al.map;
+}
+
+static int
+find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
+ int need_unwind_info, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct map *map;
+ unw_dyn_info_t di;
+ u64 table_data, segbase, fde_count;
+ int ret = -EINVAL;
+
+ map = find_map(ip, ui);
+ if (!map || !map->dso)
+ return -EINVAL;
+
+ pr_debug("unwind: find_proc_info dso %s\n", map->dso->name);
+
+ /* Check the .eh_frame section for unwinding info */
+ if (!read_unwind_spec_eh_frame(map->dso, ui->machine,
+ &table_data, &segbase, &fde_count)) {
+ memset(&di, 0, sizeof(di));
+ di.format = UNW_INFO_FORMAT_REMOTE_TABLE;
+ di.start_ip = map->start;
+ di.end_ip = map->end;
+ di.u.rti.segbase = map->start + segbase;
+ di.u.rti.table_data = map->start + table_data;
+ di.u.rti.table_len = fde_count * sizeof(struct table_entry)
+ / sizeof(unw_word_t);
+ ret = dwarf_search_unwind_table(as, ip, &di, pi,
+ need_unwind_info, arg);
+ }
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+ /* Check the .debug_frame section for unwinding info */
+ if (ret < 0 &&
+ !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
+ int fd = dso__data_get_fd(map->dso, ui->machine);
+ int is_exec = elf_is_exec(fd, map->dso->name);
+ unw_word_t base = is_exec ? 0 : map->start;
+ const char *symfile;
+
+ if (fd >= 0)
+ dso__data_put_fd(map->dso);
+
+ symfile = map->dso->symsrc_filename ?: map->dso->name;
+
+ memset(&di, 0, sizeof(di));
+ if (dwarf_find_debug_frame(0, &di, ip, base, symfile,
+ map->start, map->end))
+ return dwarf_search_unwind_table(as, ip, &di, pi,
+ need_unwind_info, arg);
+ }
+#endif
+
+ return ret;
+}
+
+static int access_fpreg(unw_addr_space_t __maybe_unused as,
+ unw_regnum_t __maybe_unused num,
+ unw_fpreg_t __maybe_unused *val,
+ int __maybe_unused __write,
+ void __maybe_unused *arg)
+{
+ pr_err("unwind: access_fpreg unsupported\n");
+ return -UNW_EINVAL;
+}
+
+static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
+ unw_word_t __maybe_unused *dil_addr,
+ void __maybe_unused *arg)
+{
+ return -UNW_ENOINFO;
+}
+
+static int resume(unw_addr_space_t __maybe_unused as,
+ unw_cursor_t __maybe_unused *cu,
+ void __maybe_unused *arg)
+{
+ pr_err("unwind: resume unsupported\n");
+ return -UNW_EINVAL;
+}
+
+static int
+get_proc_name(unw_addr_space_t __maybe_unused as,
+ unw_word_t __maybe_unused addr,
+ char __maybe_unused *bufp, size_t __maybe_unused buf_len,
+ unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
+{
+ pr_err("unwind: get_proc_name unsupported\n");
+ return -UNW_EINVAL;
+}
+
+static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
+ unw_word_t *data)
+{
+ struct map *map;
+ ssize_t size;
+
+ map = find_map(addr, ui);
+ if (!map) {
+ pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+ return -1;
+ }
+
+ if (!map->dso)
+ return -1;
+
+ size = dso__data_read_addr(map->dso, map, ui->machine,
+ addr, (u8 *) data, sizeof(*data));
+
+ return !(size == sizeof(*data));
+}
+
+static int access_mem(unw_addr_space_t __maybe_unused as,
+ unw_word_t addr, unw_word_t *valp,
+ int __write, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct stack_dump *stack = &ui->sample->user_stack;
+ u64 start, end;
+ int offset;
+ int ret;
+
+ /* Don't support write, probably not needed. */
+ if (__write || !stack || !ui->sample->user_regs.regs) {
+ *valp = 0;
+ return 0;
+ }
+
+ ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP);
+ if (ret)
+ return ret;
+
+ end = start + stack->size;
+
+ /* Check overflow. */
+ if (addr + sizeof(unw_word_t) < addr)
+ return -EINVAL;
+
+ if (addr < start || addr + sizeof(unw_word_t) >= end) {
+ ret = access_dso_mem(ui, addr, valp);
+ if (ret) {
+ pr_debug("unwind: access_mem %p not inside range"
+ " 0x%" PRIx64 "-0x%" PRIx64 "\n",
+ (void *) (uintptr_t) addr, start, end);
+ *valp = 0;
+ return ret;
+ }
+ return 0;
+ }
+
+ offset = addr - start;
+ *valp = *(unw_word_t *)&stack->data[offset];
+ pr_debug("unwind: access_mem addr %p val %lx, offset %d\n",
+ (void *) (uintptr_t) addr, (unsigned long)*valp, offset);
+ return 0;
+}
+
+static int access_reg(unw_addr_space_t __maybe_unused as,
+ unw_regnum_t regnum, unw_word_t *valp,
+ int __write, void *arg)
+{
+ struct unwind_info *ui = arg;
+ int id, ret;
+ u64 val;
+
+ /* Don't support write, I suspect we don't need it. */
+ if (__write) {
+ pr_err("unwind: access_reg w %d\n", regnum);
+ return 0;
+ }
+
+ if (!ui->sample->user_regs.regs) {
+ *valp = 0;
+ return 0;
+ }
+
+ id = LIBUNWIND__ARCH_REG_ID(regnum);
+ if (id < 0)
+ return -EINVAL;
+
+ ret = perf_reg_value(&val, &ui->sample->user_regs, id);
+ if (ret) {
+ pr_err("unwind: can't read reg %d\n", regnum);
+ return ret;
+ }
+
+ *valp = (unw_word_t) val;
+ pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp);
+ return 0;
+}
+
+static void put_unwind_info(unw_addr_space_t __maybe_unused as,
+ unw_proc_info_t *pi __maybe_unused,
+ void *arg __maybe_unused)
+{
+ pr_debug("unwind: put_unwind_info called\n");
+}
+
+static int entry(u64 ip, struct thread *thread,
+ unwind_entry_cb_t cb, void *arg)
+{
+ struct unwind_entry e;
+ struct addr_location al;
+
+ thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
+ MAP__FUNCTION, ip, &al);
+
+ e.ip = ip;
+ e.map = al.map;
+ e.sym = al.sym;
+
+ pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
+ al.sym ? al.sym->name : "''",
+ ip,
+ al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
+
+ return cb(&e, arg);
+}
+
+static void display_error(int err)
+{
+ switch (err) {
+ case UNW_EINVAL:
+ pr_err("unwind: Only supports local.\n");
+ break;
+ case UNW_EUNSPEC:
+ pr_err("unwind: Unspecified error.\n");
+ break;
+ case UNW_EBADREG:
+ pr_err("unwind: Register unavailable.\n");
+ break;
+ default:
+ break;
+ }
+}
+
+static unw_accessors_t accessors = {
+ .find_proc_info = find_proc_info,
+ .put_unwind_info = put_unwind_info,
+ .get_dyn_info_list_addr = get_dyn_info_list_addr,
+ .access_mem = access_mem,
+ .access_reg = access_reg,
+ .access_fpreg = access_fpreg,
+ .resume = resume,
+ .get_proc_name = get_proc_name,
+};
+
+static int _unwind__prepare_access(struct thread *thread)
+{
+ if (callchain_param.record_mode != CALLCHAIN_DWARF)
+ return 0;
+
+ thread->addr_space = unw_create_addr_space(&accessors, 0);
+ if (!thread->addr_space) {
+ pr_err("unwind: Can't create unwind address space.\n");
+ return -ENOMEM;
+ }
+
+ unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
+ return 0;
+}
+
+static void _unwind__flush_access(struct thread *thread)
+{
+ if (callchain_param.record_mode != CALLCHAIN_DWARF)
+ return;
+
+ unw_flush_cache(thread->addr_space, 0, 0);
+}
+
+static void _unwind__finish_access(struct thread *thread)
+{
+ if (callchain_param.record_mode != CALLCHAIN_DWARF)
+ return;
+
+ unw_destroy_addr_space(thread->addr_space);
+}
+
+static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
+ void *arg, int max_stack)
+{
+ u64 val;
+ unw_word_t ips[max_stack];
+ unw_addr_space_t addr_space;
+ unw_cursor_t c;
+ int ret, i = 0;
+
+ ret = perf_reg_value(&val, &ui->sample->user_regs, PERF_REG_IP);
+ if (ret)
+ return ret;
+
+ ips[i++] = (unw_word_t) val;
+
+ /*
+ * If we need more than one entry, do the DWARF
+ * unwind itself.
+ */
+ if (max_stack - 1 > 0) {
+ WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
+ addr_space = ui->thread->addr_space;
+
+ if (addr_space == NULL)
+ return -1;
+
+ ret = unw_init_remote(&c, addr_space, ui);
+ if (ret)
+ display_error(ret);
+
+ while (!ret && (unw_step(&c) > 0) && i < max_stack) {
+ unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+ ++i;
+ }
+
+ max_stack = i;
+ }
+
+ /*
+ * Display what we got based on the order setup.
+ */
+ for (i = 0; i < max_stack && !ret; i++) {
+ int j = i;
+
+ if (callchain_param.order == ORDER_CALLER)
+ j = max_stack - i - 1;
+ ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
+ }
+
+ return ret;
+}
+
+static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data, int max_stack)
+{
+ struct unwind_info ui = {
+ .sample = data,
+ .thread = thread,
+ .machine = thread->mg->machine,
+ };
+
+ if (!data->user_regs.regs)
+ return -EINVAL;
+
+ if (max_stack <= 0)
+ return -EINVAL;
+
+ return get_entries(&ui, cb, arg, max_stack);
+}
+
+static struct unwind_libunwind_ops
+_unwind_libunwind_ops = {
+ .prepare_access = _unwind__prepare_access,
+ .flush_access = _unwind__flush_access,
+ .finish_access = _unwind__finish_access,
+ .get_entries = _unwind__get_entries,
+};
+
+#ifndef REMOTE_UNWIND_LIBUNWIND
+struct unwind_libunwind_ops *
+local_unwind_libunwind_ops = &_unwind_libunwind_ops;
+#endif
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index 63687d3..8547119 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -1,682 +1,76 @@
-/*
- * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps.
- *
- * Lots of this code have been borrowed or heavily inspired from parts of
- * the libunwind 0.99 code which are (amongst other contributors I may have
- * forgotten):
- *
- * Copyright (C) 2002-2007 Hewlett-Packard Co
- * Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * And the bugs have been added by:
- *
- * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com>
- * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com>
- *
- */
-
-#include <elf.h>
-#include <gelf.h>
-#include <fcntl.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <linux/list.h>
-#include <libunwind.h>
-#include <libunwind-ptrace.h>
-#include "callchain.h"
+#include "unwind.h"
#include "thread.h"
#include "session.h"
-#include "perf_regs.h"
-#include "unwind.h"
-#include "symbol.h"
-#include "util.h"
#include "debug.h"
-#include "asm/bug.h"
+#include "arch/common.h"
-extern int
-UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
- unw_word_t ip,
- unw_dyn_info_t *di,
- unw_proc_info_t *pi,
- int need_unwind_info, void *arg);
+struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops;
-#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
-
-extern int
-UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
- unw_word_t ip,
- unw_word_t segbase,
- const char *obj_name, unw_word_t start,
- unw_word_t end);
-
-#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
-
-#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */
-#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */
-
-/* Pointer-encoding formats: */
-#define DW_EH_PE_omit 0xff
-#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */
-#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */
-#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */
-#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */
-#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */
-
-/* Pointer-encoding application: */
-#define DW_EH_PE_absptr 0x00 /* absolute value */
-#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */
-
-/*
- * The following are not documented by LSB v1.3, yet they are used by
- * GCC, presumably they aren't documented by LSB since they aren't
- * used on Linux:
- */
-#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */
-#define DW_EH_PE_aligned 0x50 /* aligned pointer */
-
-/* Flags intentionaly not handled, since they're not needed:
- * #define DW_EH_PE_indirect 0x80
- * #define DW_EH_PE_uleb128 0x01
- * #define DW_EH_PE_udata2 0x02
- * #define DW_EH_PE_sleb128 0x09
- * #define DW_EH_PE_sdata2 0x0a
- * #define DW_EH_PE_textrel 0x20
- * #define DW_EH_PE_datarel 0x30
- */
-
-struct unwind_info {
- struct perf_sample *sample;
- struct machine *machine;
- struct thread *thread;
-};
-
-#define dw_read(ptr, type, end) ({ \
- type *__p = (type *) ptr; \
- type __v; \
- if ((__p + 1) > (type *) end) \
- return -EINVAL; \
- __v = *__p++; \
- ptr = (typeof(ptr)) __p; \
- __v; \
- })
-
-static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
- u8 encoding)
+static void unwind__register_ops(struct thread *thread,
+ struct unwind_libunwind_ops *ops)
{
- u8 *cur = *p;
- *val = 0;
-
- switch (encoding) {
- case DW_EH_PE_omit:
- *val = 0;
- goto out;
- case DW_EH_PE_ptr:
- *val = dw_read(cur, unsigned long, end);
- goto out;
- default:
- break;
- }
-
- switch (encoding & DW_EH_PE_APPL_MASK) {
- case DW_EH_PE_absptr:
- break;
- case DW_EH_PE_pcrel:
- *val = (unsigned long) cur;
- break;
- default:
- return -EINVAL;
- }
-
- if ((encoding & 0x07) == 0x00)
- encoding |= DW_EH_PE_udata4;
-
- switch (encoding & DW_EH_PE_FORMAT_MASK) {
- case DW_EH_PE_sdata4:
- *val += dw_read(cur, s32, end);
- break;
- case DW_EH_PE_udata4:
- *val += dw_read(cur, u32, end);
- break;
- case DW_EH_PE_sdata8:
- *val += dw_read(cur, s64, end);
- break;
- case DW_EH_PE_udata8:
- *val += dw_read(cur, u64, end);
- break;
- default:
- return -EINVAL;
- }
-
- out:
- *p = cur;
- return 0;
+ thread->unwind_libunwind_ops = ops;
}
-#define dw_read_encoded_value(ptr, end, enc) ({ \
- u64 __v; \
- if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \
- return -EINVAL; \
- } \
- __v; \
- })
-
-static u64 elf_section_offset(int fd, const char *name)
+int unwind__prepare_access(struct thread *thread, struct map *map)
{
- Elf *elf;
- GElf_Ehdr ehdr;
- GElf_Shdr shdr;
- u64 offset = 0;
+ const char *arch;
+ enum dso_type dso_type;
+ struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops;
- elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
- if (elf == NULL)
+ if (thread->addr_space) {
+ pr_debug("unwind: thread map already set, dso=%s\n",
+ map->dso->name);
+ return 0;
+ }
+
+ /* env->arch is NULL for live-mode (i.e. perf top) */
+ if (!thread->mg->machine->env || !thread->mg->machine->env->arch)
+ goto out_register;
+
+ dso_type = dso__type(map->dso, thread->mg->machine);
+ if (dso_type == DSO__TYPE_UNKNOWN)
return 0;
- do {
- if (gelf_getehdr(elf, &ehdr) == NULL)
- break;
+ arch = normalize_arch(thread->mg->machine->env->arch);
- if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
- break;
-
- offset = shdr.sh_offset;
- } while (0);
-
- elf_end(elf);
- return offset;
-}
-
-#ifndef NO_LIBUNWIND_DEBUG_FRAME
-static int elf_is_exec(int fd, const char *name)
-{
- Elf *elf;
- GElf_Ehdr ehdr;
- int retval = 0;
-
- elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
- if (elf == NULL)
- return 0;
- if (gelf_getehdr(elf, &ehdr) == NULL)
- goto out;
-
- retval = (ehdr.e_type == ET_EXEC);
-
-out:
- elf_end(elf);
- pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval);
- return retval;
-}
-#endif
-
-struct table_entry {
- u32 start_ip_offset;
- u32 fde_offset;
-};
-
-struct eh_frame_hdr {
- unsigned char version;
- unsigned char eh_frame_ptr_enc;
- unsigned char fde_count_enc;
- unsigned char table_enc;
-
- /*
- * The rest of the header is variable-length and consists of the
- * following members:
- *
- * encoded_t eh_frame_ptr;
- * encoded_t fde_count;
- */
-
- /* A single encoded pointer should not be more than 8 bytes. */
- u64 enc[2];
-
- /*
- * struct {
- * encoded_t start_ip;
- * encoded_t fde_addr;
- * } binary_search_table[fde_count];
- */
- char data[0];
-} __packed;
-
-static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
- u64 offset, u64 *table_data, u64 *segbase,
- u64 *fde_count)
-{
- struct eh_frame_hdr hdr;
- u8 *enc = (u8 *) &hdr.enc;
- u8 *end = (u8 *) &hdr.data;
- ssize_t r;
-
- r = dso__data_read_offset(dso, machine, offset,
- (u8 *) &hdr, sizeof(hdr));
- if (r != sizeof(hdr))
- return -EINVAL;
-
- /* We dont need eh_frame_ptr, just skip it. */
- dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
-
- *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
- *segbase = offset;
- *table_data = (enc - (u8 *) &hdr) + offset;
- return 0;
-}
-
-static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
- u64 *table_data, u64 *segbase,
- u64 *fde_count)
-{
- int ret = -EINVAL, fd;
- u64 offset = dso->data.eh_frame_hdr_offset;
-
- if (offset == 0) {
- fd = dso__data_get_fd(dso, machine);
- if (fd < 0)
- return -EINVAL;
-
- /* Check the .eh_frame section for unwinding info */
- offset = elf_section_offset(fd, ".eh_frame_hdr");
- dso->data.eh_frame_hdr_offset = offset;
- dso__data_put_fd(dso);
+ if (!strcmp(arch, "x86")) {
+ if (dso_type != DSO__TYPE_64BIT)
+ ops = x86_32_unwind_libunwind_ops;
+ } else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) {
+ if (dso_type == DSO__TYPE_64BIT)
+ ops = arm64_unwind_libunwind_ops;
}
- if (offset)
- ret = unwind_spec_ehframe(dso, machine, offset,
- table_data, segbase,
- fde_count);
-
- return ret;
-}
-
-#ifndef NO_LIBUNWIND_DEBUG_FRAME
-static int read_unwind_spec_debug_frame(struct dso *dso,
- struct machine *machine, u64 *offset)
-{
- int fd;
- u64 ofs = dso->data.debug_frame_offset;
-
- if (ofs == 0) {
- fd = dso__data_get_fd(dso, machine);
- if (fd < 0)
- return -EINVAL;
-
- /* Check the .debug_frame section for unwinding info */
- ofs = elf_section_offset(fd, ".debug_frame");
- dso->data.debug_frame_offset = ofs;
- dso__data_put_fd(dso);
- }
-
- *offset = ofs;
- if (*offset)
- return 0;
-
- return -EINVAL;
-}
-#endif
-
-static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
-{
- struct addr_location al;
-
- thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
- MAP__FUNCTION, ip, &al);
- if (!al.map) {
- /*
- * We've seen cases (softice) where DWARF unwinder went
- * through non executable mmaps, which we need to lookup
- * in MAP__VARIABLE tree.
- */
- thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
- MAP__VARIABLE, ip, &al);
- }
- return al.map;
-}
-
-static int
-find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
- int need_unwind_info, void *arg)
-{
- struct unwind_info *ui = arg;
- struct map *map;
- unw_dyn_info_t di;
- u64 table_data, segbase, fde_count;
- int ret = -EINVAL;
-
- map = find_map(ip, ui);
- if (!map || !map->dso)
- return -EINVAL;
-
- pr_debug("unwind: find_proc_info dso %s\n", map->dso->name);
-
- /* Check the .eh_frame section for unwinding info */
- if (!read_unwind_spec_eh_frame(map->dso, ui->machine,
- &table_data, &segbase, &fde_count)) {
- memset(&di, 0, sizeof(di));
- di.format = UNW_INFO_FORMAT_REMOTE_TABLE;
- di.start_ip = map->start;
- di.end_ip = map->end;
- di.u.rti.segbase = map->start + segbase;
- di.u.rti.table_data = map->start + table_data;
- di.u.rti.table_len = fde_count * sizeof(struct table_entry)
- / sizeof(unw_word_t);
- ret = dwarf_search_unwind_table(as, ip, &di, pi,
- need_unwind_info, arg);
- }
-
-#ifndef NO_LIBUNWIND_DEBUG_FRAME
- /* Check the .debug_frame section for unwinding info */
- if (ret < 0 &&
- !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
- int fd = dso__data_get_fd(map->dso, ui->machine);
- int is_exec = elf_is_exec(fd, map->dso->name);
- unw_word_t base = is_exec ? 0 : map->start;
- const char *symfile;
-
- if (fd >= 0)
- dso__data_put_fd(map->dso);
-
- symfile = map->dso->symsrc_filename ?: map->dso->name;
-
- memset(&di, 0, sizeof(di));
- if (dwarf_find_debug_frame(0, &di, ip, base, symfile,
- map->start, map->end))
- return dwarf_search_unwind_table(as, ip, &di, pi,
- need_unwind_info, arg);
- }
-#endif
-
- return ret;
-}
-
-static int access_fpreg(unw_addr_space_t __maybe_unused as,
- unw_regnum_t __maybe_unused num,
- unw_fpreg_t __maybe_unused *val,
- int __maybe_unused __write,
- void __maybe_unused *arg)
-{
- pr_err("unwind: access_fpreg unsupported\n");
- return -UNW_EINVAL;
-}
-
-static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
- unw_word_t __maybe_unused *dil_addr,
- void __maybe_unused *arg)
-{
- return -UNW_ENOINFO;
-}
-
-static int resume(unw_addr_space_t __maybe_unused as,
- unw_cursor_t __maybe_unused *cu,
- void __maybe_unused *arg)
-{
- pr_err("unwind: resume unsupported\n");
- return -UNW_EINVAL;
-}
-
-static int
-get_proc_name(unw_addr_space_t __maybe_unused as,
- unw_word_t __maybe_unused addr,
- char __maybe_unused *bufp, size_t __maybe_unused buf_len,
- unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
-{
- pr_err("unwind: get_proc_name unsupported\n");
- return -UNW_EINVAL;
-}
-
-static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
- unw_word_t *data)
-{
- struct map *map;
- ssize_t size;
-
- map = find_map(addr, ui);
- if (!map) {
- pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+ if (!ops) {
+ pr_err("unwind: target platform=%s is not supported\n", arch);
return -1;
}
+out_register:
+ unwind__register_ops(thread, ops);
- if (!map->dso)
- return -1;
-
- size = dso__data_read_addr(map->dso, map, ui->machine,
- addr, (u8 *) data, sizeof(*data));
-
- return !(size == sizeof(*data));
-}
-
-static int access_mem(unw_addr_space_t __maybe_unused as,
- unw_word_t addr, unw_word_t *valp,
- int __write, void *arg)
-{
- struct unwind_info *ui = arg;
- struct stack_dump *stack = &ui->sample->user_stack;
- u64 start, end;
- int offset;
- int ret;
-
- /* Don't support write, probably not needed. */
- if (__write || !stack || !ui->sample->user_regs.regs) {
- *valp = 0;
- return 0;
- }
-
- ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP);
- if (ret)
- return ret;
-
- end = start + stack->size;
-
- /* Check overflow. */
- if (addr + sizeof(unw_word_t) < addr)
- return -EINVAL;
-
- if (addr < start || addr + sizeof(unw_word_t) >= end) {
- ret = access_dso_mem(ui, addr, valp);
- if (ret) {
- pr_debug("unwind: access_mem %p not inside range"
- " 0x%" PRIx64 "-0x%" PRIx64 "\n",
- (void *) (uintptr_t) addr, start, end);
- *valp = 0;
- return ret;
- }
- return 0;
- }
-
- offset = addr - start;
- *valp = *(unw_word_t *)&stack->data[offset];
- pr_debug("unwind: access_mem addr %p val %lx, offset %d\n",
- (void *) (uintptr_t) addr, (unsigned long)*valp, offset);
- return 0;
-}
-
-static int access_reg(unw_addr_space_t __maybe_unused as,
- unw_regnum_t regnum, unw_word_t *valp,
- int __write, void *arg)
-{
- struct unwind_info *ui = arg;
- int id, ret;
- u64 val;
-
- /* Don't support write, I suspect we don't need it. */
- if (__write) {
- pr_err("unwind: access_reg w %d\n", regnum);
- return 0;
- }
-
- if (!ui->sample->user_regs.regs) {
- *valp = 0;
- return 0;
- }
-
- id = libunwind__arch_reg_id(regnum);
- if (id < 0)
- return -EINVAL;
-
- ret = perf_reg_value(&val, &ui->sample->user_regs, id);
- if (ret) {
- pr_err("unwind: can't read reg %d\n", regnum);
- return ret;
- }
-
- *valp = (unw_word_t) val;
- pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp);
- return 0;
-}
-
-static void put_unwind_info(unw_addr_space_t __maybe_unused as,
- unw_proc_info_t *pi __maybe_unused,
- void *arg __maybe_unused)
-{
- pr_debug("unwind: put_unwind_info called\n");
-}
-
-static int entry(u64 ip, struct thread *thread,
- unwind_entry_cb_t cb, void *arg)
-{
- struct unwind_entry e;
- struct addr_location al;
-
- thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
- MAP__FUNCTION, ip, &al);
-
- e.ip = ip;
- e.map = al.map;
- e.sym = al.sym;
-
- pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
- al.sym ? al.sym->name : "''",
- ip,
- al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
-
- return cb(&e, arg);
-}
-
-static void display_error(int err)
-{
- switch (err) {
- case UNW_EINVAL:
- pr_err("unwind: Only supports local.\n");
- break;
- case UNW_EUNSPEC:
- pr_err("unwind: Unspecified error.\n");
- break;
- case UNW_EBADREG:
- pr_err("unwind: Register unavailable.\n");
- break;
- default:
- break;
- }
-}
-
-static unw_accessors_t accessors = {
- .find_proc_info = find_proc_info,
- .put_unwind_info = put_unwind_info,
- .get_dyn_info_list_addr = get_dyn_info_list_addr,
- .access_mem = access_mem,
- .access_reg = access_reg,
- .access_fpreg = access_fpreg,
- .resume = resume,
- .get_proc_name = get_proc_name,
-};
-
-int unwind__prepare_access(struct thread *thread)
-{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
- return 0;
-
- thread->addr_space = unw_create_addr_space(&accessors, 0);
- if (!thread->addr_space) {
- pr_err("unwind: Can't create unwind address space.\n");
- return -ENOMEM;
- }
-
- unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
- return 0;
+ return thread->unwind_libunwind_ops->prepare_access(thread);
}
void unwind__flush_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
- return;
-
- unw_flush_cache(thread->addr_space, 0, 0);
+ if (thread->unwind_libunwind_ops)
+ thread->unwind_libunwind_ops->flush_access(thread);
}
void unwind__finish_access(struct thread *thread)
{
- if (callchain_param.record_mode != CALLCHAIN_DWARF)
- return;
-
- unw_destroy_addr_space(thread->addr_space);
-}
-
-static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
- void *arg, int max_stack)
-{
- u64 val;
- unw_word_t ips[max_stack];
- unw_addr_space_t addr_space;
- unw_cursor_t c;
- int ret, i = 0;
-
- ret = perf_reg_value(&val, &ui->sample->user_regs, PERF_REG_IP);
- if (ret)
- return ret;
-
- ips[i++] = (unw_word_t) val;
-
- /*
- * If we need more than one entry, do the DWARF
- * unwind itself.
- */
- if (max_stack - 1 > 0) {
- WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
- addr_space = ui->thread->addr_space;
-
- if (addr_space == NULL)
- return -1;
-
- ret = unw_init_remote(&c, addr_space, ui);
- if (ret)
- display_error(ret);
-
- while (!ret && (unw_step(&c) > 0) && i < max_stack) {
- unw_get_reg(&c, UNW_REG_IP, &ips[i]);
- ++i;
- }
-
- max_stack = i;
- }
-
- /*
- * Display what we got based on the order setup.
- */
- for (i = 0; i < max_stack && !ret; i++) {
- int j = i;
-
- if (callchain_param.order == ORDER_CALLER)
- j = max_stack - i - 1;
- ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
- }
-
- return ret;
+ if (thread->unwind_libunwind_ops)
+ thread->unwind_libunwind_ops->finish_access(thread);
}
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
- struct thread *thread,
- struct perf_sample *data, int max_stack)
+ struct thread *thread,
+ struct perf_sample *data, int max_stack)
{
- struct unwind_info ui = {
- .sample = data,
- .thread = thread,
- .machine = thread->mg->machine,
- };
-
- if (!data->user_regs.regs)
- return -EINVAL;
-
- if (max_stack <= 0)
- return -EINVAL;
-
- return get_entries(&ui, cb, arg, max_stack);
+ if (thread->unwind_libunwind_ops)
+ return thread->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack);
+ return 0;
}
diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h
index 12790cf..b074662 100644
--- a/tools/perf/util/unwind.h
+++ b/tools/perf/util/unwind.h
@@ -14,18 +14,31 @@
typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg);
+struct unwind_libunwind_ops {
+ int (*prepare_access)(struct thread *thread);
+ void (*flush_access)(struct thread *thread);
+ void (*finish_access)(struct thread *thread);
+ int (*get_entries)(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data, int max_stack);
+};
+
#ifdef HAVE_DWARF_UNWIND_SUPPORT
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
struct perf_sample *data, int max_stack);
/* libunwind specific */
#ifdef HAVE_LIBUNWIND_SUPPORT
-int libunwind__arch_reg_id(int regnum);
-int unwind__prepare_access(struct thread *thread);
+#ifndef LIBUNWIND__ARCH_REG_ID
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arch_reg_id(regnum)
+#endif
+int LIBUNWIND__ARCH_REG_ID(int regnum);
+int unwind__prepare_access(struct thread *thread, struct map *map);
void unwind__flush_access(struct thread *thread);
void unwind__finish_access(struct thread *thread);
#else
-static inline int unwind__prepare_access(struct thread *thread __maybe_unused)
+static inline int unwind__prepare_access(struct thread *thread __maybe_unused,
+ struct map *map __maybe_unused)
{
return 0;
}
@@ -44,7 +57,8 @@
return 0;
}
-static inline int unwind__prepare_access(struct thread *thread __maybe_unused)
+static inline int unwind__prepare_access(struct thread *thread __maybe_unused,
+ struct map *map __maybe_unused)
{
return 0;
}