Merge branch 'perf/urgent' into perf/core

Merge this branch to pick up a fixlet and to update to a more recent base.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 084ef95..e489c14 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -237,6 +237,8 @@
 	(high) = (u32)(_l >> 32);			\
 } while (0)
 
+#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
+
 #define rdtscp(low, high, aux)					\
 do {                                                            \
 	unsigned long long _val = native_read_tscp(&(aux));     \
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6cbbabf..14ce05d 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -252,6 +252,8 @@
 	high = _l >> 32;			\
 } while (0)
 
+#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))
+
 static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
 {
 	return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 1e9bed1..f3971bb 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -48,7 +48,7 @@
 #endif
 };
 
-extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm);
+extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
 extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 6ab6aa2..bac4c38 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -32,7 +32,9 @@
 
 ifdef CONFIG_PERF_EVENTS
 obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_p4.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_uncore.o
 endif
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6ef9d41..69d5fea 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -35,17 +35,6 @@
 
 #include "perf_event.h"
 
-#if 0
-#undef wrmsrl
-#define wrmsrl(msr, val) 					\
-do {								\
-	trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
-			(unsigned long)(val));			\
-	native_write_msr((msr), (u32)((u64)(val)), 		\
-			(u32)((u64)(val) >> 32));		\
-} while (0)
-#endif
-
 struct x86_pmu x86_pmu __read_mostly;
 
 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
@@ -86,7 +75,7 @@
 	 */
 again:
 	prev_raw_count = local64_read(&hwc->prev_count);
-	rdmsrl(hwc->event_base, new_raw_count);
+	rdpmcl(hwc->event_base_rdpmc, new_raw_count);
 
 	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 					new_raw_count) != prev_raw_count)
@@ -637,7 +626,7 @@
 	c = sched->constraints[sched->state.event];
 
 	/* Prefer fixed purpose counters */
-	if (x86_pmu.num_counters_fixed) {
+	if (c->idxmsk64 & (~0ULL << X86_PMC_IDX_FIXED)) {
 		idx = X86_PMC_IDX_FIXED;
 		for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
 			if (!__test_and_set_bit(idx, sched->state.used))
@@ -704,8 +693,8 @@
 /*
  * Assign a counter for each event.
  */
-static int perf_assign_events(struct event_constraint **constraints, int n,
-			      int wmin, int wmax, int *assign)
+int perf_assign_events(struct event_constraint **constraints, int n,
+			int wmin, int wmax, int *assign)
 {
 	struct perf_sched sched;
 
@@ -830,9 +819,11 @@
 	} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
 		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
 		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
+		hwc->event_base_rdpmc = (hwc->idx - X86_PMC_IDX_FIXED) | 1<<30;
 	} else {
 		hwc->config_base = x86_pmu_config_addr(hwc->idx);
 		hwc->event_base  = x86_pmu_event_addr(hwc->idx);
+		hwc->event_base_rdpmc = hwc->idx;
 	}
 }
 
@@ -1649,7 +1640,12 @@
 			      struct device_attribute *attr,
 			      const char *buf, size_t count)
 {
-	unsigned long val = simple_strtoul(buf, NULL, 0);
+	unsigned long val;
+	ssize_t ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
 
 	if (!!val != !!x86_pmu.attr_rdpmc) {
 		x86_pmu.attr_rdpmc = !!val;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 7241e2f..83238f2 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -14,6 +14,18 @@
 
 #include <linux/perf_event.h>
 
+#if 0
+#undef wrmsrl
+#define wrmsrl(msr, val) 						\
+do {									\
+	unsigned int _msr = (msr);					\
+	u64 _val = (val);						\
+	trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr),		\
+			(unsigned long long)(_val));			\
+	native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32));	\
+} while (0)
+#endif
+
 /*
  *          |   NHM/WSM    |      SNB     |
  * register -------------------------------
@@ -57,7 +69,7 @@
 };
 
 /* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS		4
+#define MAX_PEBS_EVENTS		8
 
 /*
  * A debug store configuration.
@@ -366,6 +378,7 @@
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
+	int 		max_pebs_events;
 
 	/*
 	 * Intel LBR
@@ -468,6 +481,8 @@
 
 void x86_pmu_enable_all(int added);
 
+int perf_assign_events(struct event_constraint **constraints, int n,
+			int wmin, int wmax, int *assign);
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
 
 void x86_pmu_stop(struct perf_event *event, int flags);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 187c294..e23e71f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1800,6 +1800,8 @@
 	x86_pmu.events_maskl		= ebx.full;
 	x86_pmu.events_mask_len		= eax.split.mask_length;
 
+	x86_pmu.max_pebs_events		= min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
 	 * assume at least 3 events:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 35e2192..026373e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -620,7 +620,7 @@
 	 * Should not happen, we program the threshold at 1 and do not
 	 * set a reset value.
 	 */
-	WARN_ON_ONCE(n > 1);
+	WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
 	at += n - 1;
 
 	__intel_pmu_pebs_event(event, iregs, at);
@@ -651,10 +651,10 @@
 	 * Should not happen, we program the threshold at 1 and do not
 	 * set a reset value.
 	 */
-	WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
+	WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
 
 	for ( ; at < top; at++) {
-		for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
+		for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
 			event = cpuc->events[bit];
 			if (!test_bit(bit, cpuc->active_mask))
 				continue;
@@ -670,7 +670,7 @@
 			break;
 		}
 
-		if (!event || bit >= MAX_PEBS_EVENTS)
+		if (!event || bit >= x86_pmu.max_pebs_events)
 			continue;
 
 		__intel_pmu_pebs_event(event, iregs, at);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
new file mode 100644
index 0000000..6f43f95
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -0,0 +1,1703 @@
+#include "perf_event_intel_uncore.h"
+
+static struct intel_uncore_type *empty_uncore[] = { NULL, };
+static struct intel_uncore_type **msr_uncores = empty_uncore;
+static struct intel_uncore_type **pci_uncores = empty_uncore;
+/* pci bus to socket mapping */
+static int pcibus_to_physid[256] = { [0 ... 255] = -1, };
+
+static DEFINE_RAW_SPINLOCK(uncore_box_lock);
+
+/* mask of cpus that collect uncore events */
+static cpumask_t uncore_cpu_mask;
+
+/* constraint for the fixed counter */
+static struct event_constraint constraint_fixed =
+	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
+DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
+DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
+
+/* Sandy Bridge-EP uncore support */
+static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	int box_ctl = uncore_pci_box_ctl(box);
+	u32 config;
+
+	pci_read_config_dword(pdev, box_ctl, &config);
+	config |= SNBEP_PMON_BOX_CTL_FRZ;
+	pci_write_config_dword(pdev, box_ctl, config);
+}
+
+static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	int box_ctl = uncore_pci_box_ctl(box);
+	u32 config;
+
+	pci_read_config_dword(pdev, box_ctl, &config);
+	config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+	pci_write_config_dword(pdev, box_ctl, config);
+}
+
+static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	struct hw_perf_event *hwc = &event->hw;
+
+	pci_write_config_dword(pdev, hwc->config_base, hwc->config |
+				SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	struct hw_perf_event *hwc = &event->hw;
+
+	pci_write_config_dword(pdev, hwc->config_base, hwc->config);
+}
+
+static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	struct hw_perf_event *hwc = &event->hw;
+	u64 count;
+
+	pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
+	pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
+	return count;
+}
+
+static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL,
+				SNBEP_PMON_BOX_CTL_INT);
+}
+
+static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+	u64 config;
+	unsigned msr;
+
+	msr = uncore_msr_box_ctl(box);
+	if (msr) {
+		rdmsrl(msr, config);
+		config |= SNBEP_PMON_BOX_CTL_FRZ;
+		wrmsrl(msr, config);
+		return;
+	}
+}
+
+static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+	u64 config;
+	unsigned msr;
+
+	msr = uncore_msr_box_ctl(box);
+	if (msr) {
+		rdmsrl(msr, config);
+		config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+		wrmsrl(msr, config);
+		return;
+	}
+}
+
+static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	wrmsrl(hwc->config_base, hwc->config);
+}
+
+static u64 snbep_uncore_msr_read_counter(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 count;
+
+	rdmsrl(hwc->event_base, count);
+	return count;
+}
+
+static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+	unsigned msr = uncore_msr_box_ctl(box);
+	if (msr)
+		wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
+}
+
+static struct attribute *snbep_uncore_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
+	NULL,
+};
+
+static struct attribute *snbep_uncore_ubox_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh5.attr,
+	NULL,
+};
+
+static struct attribute *snbep_uncore_pcu_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_occ_sel.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh5.attr,
+	&format_attr_occ_invert.attr,
+	&format_attr_occ_edge.attr,
+	NULL,
+};
+
+static struct uncore_event_desc snbep_uncore_imc_events[] = {
+	INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0xff,umask=0xff"),
+	INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"),
+	INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
+	{ /* end: all zeroes */ },
+};
+
+static struct uncore_event_desc snbep_uncore_qpi_events[] = {
+	INTEL_UNCORE_EVENT_DESC(clockticks,       "event=0x14"),
+	INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
+	INTEL_UNCORE_EVENT_DESC(drs_data,         "event=0x02,umask=0x08"),
+	INTEL_UNCORE_EVENT_DESC(ncb_data,         "event=0x03,umask=0x04"),
+	{ /* end: all zeroes */ },
+};
+
+static struct attribute_group snbep_uncore_format_group = {
+	.name = "format",
+	.attrs = snbep_uncore_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_ubox_format_group = {
+	.name = "format",
+	.attrs = snbep_uncore_ubox_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_pcu_format_group = {
+	.name = "format",
+	.attrs = snbep_uncore_pcu_formats_attr,
+};
+
+static struct intel_uncore_ops snbep_uncore_msr_ops = {
+	.init_box	= snbep_uncore_msr_init_box,
+	.disable_box	= snbep_uncore_msr_disable_box,
+	.enable_box	= snbep_uncore_msr_enable_box,
+	.disable_event	= snbep_uncore_msr_disable_event,
+	.enable_event	= snbep_uncore_msr_enable_event,
+	.read_counter	= snbep_uncore_msr_read_counter,
+};
+
+static struct intel_uncore_ops snbep_uncore_pci_ops = {
+	.init_box	= snbep_uncore_pci_init_box,
+	.disable_box	= snbep_uncore_pci_disable_box,
+	.enable_box	= snbep_uncore_pci_enable_box,
+	.disable_event	= snbep_uncore_pci_disable_event,
+	.enable_event	= snbep_uncore_pci_enable_event,
+	.read_counter	= snbep_uncore_pci_read_counter,
+};
+
+static struct event_constraint snbep_uncore_cbox_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x04, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x05, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x07, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x13, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x1b, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0x1f, 0xe),
+	UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r2pcie_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x12, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r3qpi_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x30, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+	EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snbep_uncore_ubox = {
+	.name		= "ubox",
+	.num_counters   = 2,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 44,
+	.fixed_ctr_bits	= 48,
+	.perf_ctr	= SNBEP_U_MSR_PMON_CTR0,
+	.event_ctl	= SNBEP_U_MSR_PMON_CTL0,
+	.event_mask	= SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+	.fixed_ctr	= SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
+	.fixed_ctl	= SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
+	.ops		= &snbep_uncore_msr_ops,
+	.format_group	= &snbep_uncore_ubox_format_group,
+};
+
+static struct intel_uncore_type snbep_uncore_cbox = {
+	.name		= "cbox",
+	.num_counters   = 4,
+	.num_boxes	= 8,
+	.perf_ctr_bits	= 44,
+	.event_ctl	= SNBEP_C0_MSR_PMON_CTL0,
+	.perf_ctr	= SNBEP_C0_MSR_PMON_CTR0,
+	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SNBEP_C0_MSR_PMON_BOX_CTL,
+	.msr_offset	= SNBEP_CBO_MSR_OFFSET,
+	.constraints	= snbep_uncore_cbox_constraints,
+	.ops		= &snbep_uncore_msr_ops,
+	.format_group	= &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type snbep_uncore_pcu = {
+	.name		= "pcu",
+	.num_counters   = 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.perf_ctr	= SNBEP_PCU_MSR_PMON_CTR0,
+	.event_ctl	= SNBEP_PCU_MSR_PMON_CTL0,
+	.event_mask	= SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SNBEP_PCU_MSR_PMON_BOX_CTL,
+	.ops		= &snbep_uncore_msr_ops,
+	.format_group	= &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *snbep_msr_uncores[] = {
+	&snbep_uncore_ubox,
+	&snbep_uncore_cbox,
+	&snbep_uncore_pcu,
+	NULL,
+};
+
+#define SNBEP_UNCORE_PCI_COMMON_INIT()				\
+	.perf_ctr	= SNBEP_PCI_PMON_CTR0,			\
+	.event_ctl	= SNBEP_PCI_PMON_CTL0,			\
+	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,		\
+	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,		\
+	.ops		= &snbep_uncore_pci_ops,		\
+	.format_group	= &snbep_uncore_format_group
+
+static struct intel_uncore_type snbep_uncore_ha = {
+	.name		= "ha",
+	.num_counters   = 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_imc = {
+	.name		= "imc",
+	.num_counters   = 4,
+	.num_boxes	= 4,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.fixed_ctr	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+	.fixed_ctl	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+	.event_descs	= snbep_uncore_imc_events,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_qpi = {
+	.name		= "qpi",
+	.num_counters   = 4,
+	.num_boxes	= 2,
+	.perf_ctr_bits	= 48,
+	.event_descs	= snbep_uncore_qpi_events,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+
+static struct intel_uncore_type snbep_uncore_r2pcie = {
+	.name		= "r2pcie",
+	.num_counters   = 4,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 44,
+	.constraints	= snbep_uncore_r2pcie_constraints,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_r3qpi = {
+	.name		= "r3qpi",
+	.num_counters   = 3,
+	.num_boxes	= 2,
+	.perf_ctr_bits	= 44,
+	.constraints	= snbep_uncore_r3qpi_constraints,
+	SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type *snbep_pci_uncores[] = {
+	&snbep_uncore_ha,
+	&snbep_uncore_imc,
+	&snbep_uncore_qpi,
+	&snbep_uncore_r2pcie,
+	&snbep_uncore_r3qpi,
+	NULL,
+};
+
+static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = {
+	{ /* Home Agent */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
+		.driver_data = (unsigned long)&snbep_uncore_ha,
+	},
+	{ /* MC Channel 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
+		.driver_data = (unsigned long)&snbep_uncore_imc,
+	},
+	{ /* MC Channel 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
+		.driver_data = (unsigned long)&snbep_uncore_imc,
+	},
+	{ /* MC Channel 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
+		.driver_data = (unsigned long)&snbep_uncore_imc,
+	},
+	{ /* MC Channel 3 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
+		.driver_data = (unsigned long)&snbep_uncore_imc,
+	},
+	{ /* QPI Port 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
+		.driver_data = (unsigned long)&snbep_uncore_qpi,
+	},
+	{ /* QPI Port 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
+		.driver_data = (unsigned long)&snbep_uncore_qpi,
+	},
+	{ /* P2PCIe */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
+		.driver_data = (unsigned long)&snbep_uncore_r2pcie,
+	},
+	{ /* R3QPI Link 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
+		.driver_data = (unsigned long)&snbep_uncore_r3qpi,
+	},
+	{ /* R3QPI Link 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
+		.driver_data = (unsigned long)&snbep_uncore_r3qpi,
+	},
+	{ /* end: all zeroes */ }
+};
+
+static struct pci_driver snbep_uncore_pci_driver = {
+	.name		= "snbep_uncore",
+	.id_table	= snbep_uncore_pci_ids,
+};
+
+/*
+ * build pci bus to socket mapping
+ */
+static void snbep_pci2phy_map_init(void)
+{
+	struct pci_dev *ubox_dev = NULL;
+	int i, bus, nodeid;
+	u32 config;
+
+	while (1) {
+		/* find the UBOX device */
+		ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
+					PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX,
+					ubox_dev);
+		if (!ubox_dev)
+			break;
+		bus = ubox_dev->bus->number;
+		/* get the Node ID of the local register */
+		pci_read_config_dword(ubox_dev, 0x40, &config);
+		nodeid = config;
+		/* get the Node ID mapping */
+		pci_read_config_dword(ubox_dev, 0x54, &config);
+		/*
+		 * every three bits in the Node ID mapping register maps
+		 * to a particular node.
+		 */
+		for (i = 0; i < 8; i++) {
+			if (nodeid == ((config >> (3 * i)) & 0x7)) {
+				pcibus_to_physid[bus] = i;
+				break;
+			}
+		}
+	};
+	return;
+}
+/* end of Sandy Bridge-EP uncore support */
+
+
+/* Sandy Bridge uncore support */
+static void snb_uncore_msr_enable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+		wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+	else
+		wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
+}
+
+static void snb_uncore_msr_disable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	wrmsrl(event->hw.config_base, 0);
+}
+
+static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	u64 count;
+	rdmsrl(event->hw.event_base, count);
+	return count;
+}
+
+static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+	if (box->pmu->pmu_idx == 0) {
+		wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+			SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+	}
+}
+
+static struct attribute *snb_uncore_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_cmask5.attr,
+	NULL,
+};
+
+static struct attribute_group snb_uncore_format_group = {
+	.name = "format",
+	.attrs = snb_uncore_formats_attr,
+};
+
+static struct intel_uncore_ops snb_uncore_msr_ops = {
+	.init_box	= snb_uncore_msr_init_box,
+	.disable_event	= snb_uncore_msr_disable_event,
+	.enable_event	= snb_uncore_msr_enable_event,
+	.read_counter	= snb_uncore_msr_read_counter,
+};
+
+static struct event_constraint snb_uncore_cbox_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
+	EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snb_uncore_cbox = {
+	.name		= "cbox",
+	.num_counters   = 2,
+	.num_boxes	= 4,
+	.perf_ctr_bits	= 44,
+	.fixed_ctr_bits	= 48,
+	.perf_ctr	= SNB_UNC_CBO_0_PER_CTR0,
+	.event_ctl	= SNB_UNC_CBO_0_PERFEVTSEL0,
+	.fixed_ctr	= SNB_UNC_FIXED_CTR,
+	.fixed_ctl	= SNB_UNC_FIXED_CTR_CTRL,
+	.single_fixed	= 1,
+	.event_mask	= SNB_UNC_RAW_EVENT_MASK,
+	.msr_offset	= SNB_UNC_CBO_MSR_OFFSET,
+	.constraints	= snb_uncore_cbox_constraints,
+	.ops		= &snb_uncore_msr_ops,
+	.format_group	= &snb_uncore_format_group,
+};
+
+static struct intel_uncore_type *snb_msr_uncores[] = {
+	&snb_uncore_cbox,
+	NULL,
+};
+/* end of Sandy Bridge uncore support */
+
+/* Nehalem uncore support */
+static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+	wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+	wrmsrl(NHM_UNC_PERF_GLOBAL_CTL,
+		NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
+}
+
+static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+		wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+	else
+		wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
+}
+
+static struct attribute *nhm_uncore_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_cmask8.attr,
+	NULL,
+};
+
+static struct attribute_group nhm_uncore_format_group = {
+	.name = "format",
+	.attrs = nhm_uncore_formats_attr,
+};
+
+static struct uncore_event_desc nhm_uncore_events[] = {
+	INTEL_UNCORE_EVENT_DESC(clockticks,                "event=0xff,umask=0xff"),
+	INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any,       "event=0x2f,umask=0x0f"),
+	INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any,      "event=0x2c,umask=0x0f"),
+	INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads,     "event=0x20,umask=0x01"),
+	INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes,    "event=0x20,umask=0x02"),
+	INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads,  "event=0x20,umask=0x04"),
+	INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
+	INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads,   "event=0x20,umask=0x10"),
+	INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes,  "event=0x20,umask=0x20"),
+	{ /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhm_uncore_msr_ops = {
+	.disable_box	= nhm_uncore_msr_disable_box,
+	.enable_box	= nhm_uncore_msr_enable_box,
+	.disable_event	= snb_uncore_msr_disable_event,
+	.enable_event	= nhm_uncore_msr_enable_event,
+	.read_counter	= snb_uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type nhm_uncore = {
+	.name		= "",
+	.num_counters   = 8,
+	.num_boxes	= 1,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.event_ctl	= NHM_UNC_PERFEVTSEL0,
+	.perf_ctr	= NHM_UNC_UNCORE_PMC0,
+	.fixed_ctr	= NHM_UNC_FIXED_CTR,
+	.fixed_ctl	= NHM_UNC_FIXED_CTR_CTRL,
+	.event_mask	= NHM_UNC_RAW_EVENT_MASK,
+	.event_descs	= nhm_uncore_events,
+	.ops		= &nhm_uncore_msr_ops,
+	.format_group	= &nhm_uncore_format_group,
+};
+
+static struct intel_uncore_type *nhm_msr_uncores[] = {
+	&nhm_uncore,
+	NULL,
+};
+/* end of Nehalem uncore support */
+
+static void uncore_assign_hw_event(struct intel_uncore_box *box,
+				struct perf_event *event, int idx)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	hwc->idx = idx;
+	hwc->last_tag = ++box->tags[idx];
+
+	if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
+		hwc->event_base = uncore_fixed_ctr(box);
+		hwc->config_base = uncore_fixed_ctl(box);
+		return;
+	}
+
+	hwc->config_base = uncore_event_ctl(box, hwc->idx);
+	hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
+}
+
+static void uncore_perf_event_update(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	u64 prev_count, new_count, delta;
+	int shift;
+
+	if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
+		shift = 64 - uncore_fixed_ctr_bits(box);
+	else
+		shift = 64 - uncore_perf_ctr_bits(box);
+
+	/* the hrtimer might modify the previous event value */
+again:
+	prev_count = local64_read(&event->hw.prev_count);
+	new_count = uncore_read_counter(box, event);
+	if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
+		goto again;
+
+	delta = (new_count << shift) - (prev_count << shift);
+	delta >>= shift;
+
+	local64_add(delta, &event->count);
+}
+
+/*
+ * The overflow interrupt is unavailable for SandyBridge-EP, is broken
+ * for SandyBridge. So we use hrtimer to periodically poll the counter
+ * to avoid overflow.
+ */
+static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
+{
+	struct intel_uncore_box *box;
+	unsigned long flags;
+	int bit;
+
+	box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
+	if (!box->n_active || box->cpu != smp_processor_id())
+		return HRTIMER_NORESTART;
+	/*
+	 * disable local interrupt to prevent uncore_pmu_event_start/stop
+	 * to interrupt the update process
+	 */
+	local_irq_save(flags);
+
+	for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
+		uncore_perf_event_update(box, box->events[bit]);
+
+	local_irq_restore(flags);
+
+	hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL));
+	return HRTIMER_RESTART;
+}
+
+static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
+{
+	__hrtimer_start_range_ns(&box->hrtimer,
+			ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0,
+			HRTIMER_MODE_REL_PINNED, 0);
+}
+
+static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
+{
+	hrtimer_cancel(&box->hrtimer);
+}
+
+static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
+{
+	hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	box->hrtimer.function = uncore_pmu_hrtimer;
+}
+
+struct intel_uncore_box *uncore_alloc_box(int cpu)
+{
+	struct intel_uncore_box *box;
+
+	box = kmalloc_node(sizeof(*box), GFP_KERNEL | __GFP_ZERO,
+			   cpu_to_node(cpu));
+	if (!box)
+		return NULL;
+
+	uncore_pmu_init_hrtimer(box);
+	atomic_set(&box->refcnt, 1);
+	box->cpu = -1;
+	box->phys_id = -1;
+
+	return box;
+}
+
+static struct intel_uncore_box *
+uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
+{
+	static struct intel_uncore_box *box;
+
+	box = *per_cpu_ptr(pmu->box, cpu);
+	if (box)
+		return box;
+
+	raw_spin_lock(&uncore_box_lock);
+	list_for_each_entry(box, &pmu->box_list, list) {
+		if (box->phys_id == topology_physical_package_id(cpu)) {
+			atomic_inc(&box->refcnt);
+			*per_cpu_ptr(pmu->box, cpu) = box;
+			break;
+		}
+	}
+	raw_spin_unlock(&uncore_box_lock);
+
+	return *per_cpu_ptr(pmu->box, cpu);
+}
+
+static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
+{
+	return container_of(event->pmu, struct intel_uncore_pmu, pmu);
+}
+
+static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
+{
+	/*
+	 * perf core schedules event on the basis of cpu, uncore events are
+	 * collected by one of the cpus inside a physical package.
+	 */
+	return uncore_pmu_to_box(uncore_event_to_pmu(event),
+				 smp_processor_id());
+}
+
+static int uncore_collect_events(struct intel_uncore_box *box,
+				struct perf_event *leader, bool dogrp)
+{
+	struct perf_event *event;
+	int n, max_count;
+
+	max_count = box->pmu->type->num_counters;
+	if (box->pmu->type->fixed_ctl)
+		max_count++;
+
+	if (box->n_events >= max_count)
+		return -EINVAL;
+
+	n = box->n_events;
+	box->event_list[n] = leader;
+	n++;
+	if (!dogrp)
+		return n;
+
+	list_for_each_entry(event, &leader->sibling_list, group_entry) {
+		if (event->state <= PERF_EVENT_STATE_OFF)
+			continue;
+
+		if (n >= max_count)
+			return -EINVAL;
+
+		box->event_list[n] = event;
+		n++;
+	}
+	return n;
+}
+
+static struct event_constraint *
+uncore_event_constraint(struct intel_uncore_type *type,
+			struct perf_event *event)
+{
+	struct event_constraint *c;
+
+	if (event->hw.config == ~0ULL)
+		return &constraint_fixed;
+
+	if (type->constraints) {
+		for_each_event_constraint(c, type->constraints) {
+			if ((event->hw.config & c->cmask) == c->code)
+				return c;
+		}
+	}
+
+	return &type->unconstrainted;
+}
+
+static int uncore_assign_events(struct intel_uncore_box *box,
+				int assign[], int n)
+{
+	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
+	struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
+	int i, ret, wmin, wmax;
+	struct hw_perf_event *hwc;
+
+	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
+
+	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+		c = uncore_event_constraint(box->pmu->type,
+				box->event_list[i]);
+		constraints[i] = c;
+		wmin = min(wmin, c->weight);
+		wmax = max(wmax, c->weight);
+	}
+
+	/* fastpath, try to reuse previous register */
+	for (i = 0; i < n; i++) {
+		hwc = &box->event_list[i]->hw;
+		c = constraints[i];
+
+		/* never assigned */
+		if (hwc->idx == -1)
+			break;
+
+		/* constraint still honored */
+		if (!test_bit(hwc->idx, c->idxmsk))
+			break;
+
+		/* not already used */
+		if (test_bit(hwc->idx, used_mask))
+			break;
+
+		__set_bit(hwc->idx, used_mask);
+		assign[i] = hwc->idx;
+	}
+	if (i == n)
+		return 0;
+
+	/* slow path */
+	ret = perf_assign_events(constraints, n, wmin, wmax, assign);
+	return ret ? -EINVAL : 0;
+}
+
+static void uncore_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct intel_uncore_box *box = uncore_event_to_box(event);
+	int idx = event->hw.idx;
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
+		return;
+
+	event->hw.state = 0;
+	box->events[idx] = event;
+	box->n_active++;
+	__set_bit(idx, box->active_mask);
+
+	local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
+	uncore_enable_event(box, event);
+
+	if (box->n_active == 1) {
+		uncore_enable_box(box);
+		uncore_pmu_start_hrtimer(box);
+	}
+}
+
+static void uncore_pmu_event_stop(struct perf_event *event, int flags)
+{
+	struct intel_uncore_box *box = uncore_event_to_box(event);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
+		uncore_disable_event(box, event);
+		box->n_active--;
+		box->events[hwc->idx] = NULL;
+		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+		hwc->state |= PERF_HES_STOPPED;
+
+		if (box->n_active == 0) {
+			uncore_disable_box(box);
+			uncore_pmu_cancel_hrtimer(box);
+		}
+	}
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		/*
+		 * Drain the remaining delta count out of a event
+		 * that we are disabling:
+		 */
+		uncore_perf_event_update(box, event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int uncore_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct intel_uncore_box *box = uncore_event_to_box(event);
+	struct hw_perf_event *hwc = &event->hw;
+	int assign[UNCORE_PMC_IDX_MAX];
+	int i, n, ret;
+
+	if (!box)
+		return -ENODEV;
+
+	ret = n = uncore_collect_events(box, event, false);
+	if (ret < 0)
+		return ret;
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (!(flags & PERF_EF_START))
+		hwc->state |= PERF_HES_ARCH;
+
+	ret = uncore_assign_events(box, assign, n);
+	if (ret)
+		return ret;
+
+	/* save events moving to new counters */
+	for (i = 0; i < box->n_events; i++) {
+		event = box->event_list[i];
+		hwc = &event->hw;
+
+		if (hwc->idx == assign[i] &&
+			hwc->last_tag == box->tags[assign[i]])
+			continue;
+		/*
+		 * Ensure we don't accidentally enable a stopped
+		 * counter simply because we rescheduled.
+		 */
+		if (hwc->state & PERF_HES_STOPPED)
+			hwc->state |= PERF_HES_ARCH;
+
+		uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+	}
+
+	/* reprogram moved events into new counters */
+	for (i = 0; i < n; i++) {
+		event = box->event_list[i];
+		hwc = &event->hw;
+
+		if (hwc->idx != assign[i] ||
+			hwc->last_tag != box->tags[assign[i]])
+			uncore_assign_hw_event(box, event, assign[i]);
+		else if (i < box->n_events)
+			continue;
+
+		if (hwc->state & PERF_HES_ARCH)
+			continue;
+
+		uncore_pmu_event_start(event, 0);
+	}
+	box->n_events = n;
+
+	return 0;
+}
+
+static void uncore_pmu_event_del(struct perf_event *event, int flags)
+{
+	struct intel_uncore_box *box = uncore_event_to_box(event);
+	int i;
+
+	uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+
+	for (i = 0; i < box->n_events; i++) {
+		if (event == box->event_list[i]) {
+			while (++i < box->n_events)
+				box->event_list[i - 1] = box->event_list[i];
+
+			--box->n_events;
+			break;
+		}
+	}
+
+	event->hw.idx = -1;
+	event->hw.last_tag = ~0ULL;
+}
+
+static void uncore_pmu_event_read(struct perf_event *event)
+{
+	struct intel_uncore_box *box = uncore_event_to_box(event);
+	uncore_perf_event_update(box, event);
+}
+
+/*
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
+static int uncore_validate_group(struct intel_uncore_pmu *pmu,
+				struct perf_event *event)
+{
+	struct perf_event *leader = event->group_leader;
+	struct intel_uncore_box *fake_box;
+	int assign[UNCORE_PMC_IDX_MAX];
+	int ret = -EINVAL, n;
+
+	fake_box = uncore_alloc_box(smp_processor_id());
+	if (!fake_box)
+		return -ENOMEM;
+
+	fake_box->pmu = pmu;
+	/*
+	 * the event is not yet connected with its
+	 * siblings therefore we must first collect
+	 * existing siblings, then add the new event
+	 * before we can simulate the scheduling
+	 */
+	n = uncore_collect_events(fake_box, leader, true);
+	if (n < 0)
+		goto out;
+
+	fake_box->n_events = n;
+	n = uncore_collect_events(fake_box, event, false);
+	if (n < 0)
+		goto out;
+
+	fake_box->n_events = n;
+
+	ret = uncore_assign_events(fake_box, assign, n);
+out:
+	kfree(fake_box);
+	return ret;
+}
+
+int uncore_pmu_event_init(struct perf_event *event)
+{
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	struct hw_perf_event *hwc = &event->hw;
+	int ret;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	pmu = uncore_event_to_pmu(event);
+	/* no device found for this pmu */
+	if (pmu->func_id < 0)
+		return -ENOENT;
+
+	/*
+	 * Uncore PMU does measure at all privilege level all the time.
+	 * So it doesn't make sense to specify any exclude bits.
+	 */
+	if (event->attr.exclude_user || event->attr.exclude_kernel ||
+			event->attr.exclude_hv || event->attr.exclude_idle)
+		return -EINVAL;
+
+	/* Sampling not supported yet */
+	if (hwc->sample_period)
+		return -EINVAL;
+
+	/*
+	 * Place all uncore events for a particular physical package
+	 * onto a single cpu
+	 */
+	if (event->cpu < 0)
+		return -EINVAL;
+	box = uncore_pmu_to_box(pmu, event->cpu);
+	if (!box || box->cpu < 0)
+		return -EINVAL;
+	event->cpu = box->cpu;
+
+	if (event->attr.config == UNCORE_FIXED_EVENT) {
+		/* no fixed counter */
+		if (!pmu->type->fixed_ctl)
+			return -EINVAL;
+		/*
+		 * if there is only one fixed counter, only the first pmu
+		 * can access the fixed counter
+		 */
+		if (pmu->type->single_fixed && pmu->pmu_idx > 0)
+			return -EINVAL;
+		hwc->config = ~0ULL;
+	} else {
+		hwc->config = event->attr.config & pmu->type->event_mask;
+	}
+
+	event->hw.idx = -1;
+	event->hw.last_tag = ~0ULL;
+
+	if (event->group_leader != event)
+		ret = uncore_validate_group(pmu, event);
+	else
+		ret = 0;
+
+	return ret;
+}
+
+static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
+{
+	int ret;
+
+	pmu->pmu = (struct pmu) {
+		.attr_groups	= pmu->type->attr_groups,
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= uncore_pmu_event_init,
+		.add		= uncore_pmu_event_add,
+		.del		= uncore_pmu_event_del,
+		.start		= uncore_pmu_event_start,
+		.stop		= uncore_pmu_event_stop,
+		.read		= uncore_pmu_event_read,
+	};
+
+	if (pmu->type->num_boxes == 1) {
+		if (strlen(pmu->type->name) > 0)
+			sprintf(pmu->name, "uncore_%s", pmu->type->name);
+		else
+			sprintf(pmu->name, "uncore");
+	} else {
+		sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
+			pmu->pmu_idx);
+	}
+
+	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
+	return ret;
+}
+
+static void __init uncore_type_exit(struct intel_uncore_type *type)
+{
+	int i;
+
+	for (i = 0; i < type->num_boxes; i++)
+		free_percpu(type->pmus[i].box);
+	kfree(type->pmus);
+	type->pmus = NULL;
+	kfree(type->attr_groups[1]);
+	type->attr_groups[1] = NULL;
+}
+
+static void uncore_types_exit(struct intel_uncore_type **types)
+{
+	int i;
+	for (i = 0; types[i]; i++)
+		uncore_type_exit(types[i]);
+}
+
+static int __init uncore_type_init(struct intel_uncore_type *type)
+{
+	struct intel_uncore_pmu *pmus;
+	struct attribute_group *events_group;
+	struct attribute **attrs;
+	int i, j;
+
+	pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
+	if (!pmus)
+		return -ENOMEM;
+
+	type->unconstrainted = (struct event_constraint)
+		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
+				0, type->num_counters, 0);
+
+	for (i = 0; i < type->num_boxes; i++) {
+		pmus[i].func_id = -1;
+		pmus[i].pmu_idx = i;
+		pmus[i].type = type;
+		INIT_LIST_HEAD(&pmus[i].box_list);
+		pmus[i].box = alloc_percpu(struct intel_uncore_box *);
+		if (!pmus[i].box)
+			goto fail;
+	}
+
+	if (type->event_descs) {
+		i = 0;
+		while (type->event_descs[i].attr.attr.name)
+			i++;
+
+		events_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
+					sizeof(*events_group), GFP_KERNEL);
+		if (!events_group)
+			goto fail;
+
+		attrs = (struct attribute **)(events_group + 1);
+		events_group->name = "events";
+		events_group->attrs = attrs;
+
+		for (j = 0; j < i; j++)
+			attrs[j] = &type->event_descs[j].attr.attr;
+
+		type->attr_groups[1] = events_group;
+	}
+
+	type->pmus = pmus;
+	return 0;
+fail:
+	uncore_type_exit(type);
+	return -ENOMEM;
+}
+
+static int __init uncore_types_init(struct intel_uncore_type **types)
+{
+	int i, ret;
+
+	for (i = 0; types[i]; i++) {
+		ret = uncore_type_init(types[i]);
+		if (ret)
+			goto fail;
+	}
+	return 0;
+fail:
+	while (--i >= 0)
+		uncore_type_exit(types[i]);
+	return ret;
+}
+
+static struct pci_driver *uncore_pci_driver;
+static bool pcidrv_registered;
+
+/*
+ * add a pci uncore device
+ */
+static int __devinit uncore_pci_add(struct intel_uncore_type *type,
+				    struct pci_dev *pdev)
+{
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	int i, phys_id;
+
+	phys_id = pcibus_to_physid[pdev->bus->number];
+	if (phys_id < 0)
+		return -ENODEV;
+
+	box = uncore_alloc_box(0);
+	if (!box)
+		return -ENOMEM;
+
+	/*
+	 * for performance monitoring unit with multiple boxes,
+	 * each box has a different function id.
+	 */
+	for (i = 0; i < type->num_boxes; i++) {
+		pmu = &type->pmus[i];
+		if (pmu->func_id == pdev->devfn)
+			break;
+		if (pmu->func_id < 0) {
+			pmu->func_id = pdev->devfn;
+			break;
+		}
+		pmu = NULL;
+	}
+
+	if (!pmu) {
+		kfree(box);
+		return -EINVAL;
+	}
+
+	box->phys_id = phys_id;
+	box->pci_dev = pdev;
+	box->pmu = pmu;
+	uncore_box_init(box);
+	pci_set_drvdata(pdev, box);
+
+	raw_spin_lock(&uncore_box_lock);
+	list_add_tail(&box->list, &pmu->box_list);
+	raw_spin_unlock(&uncore_box_lock);
+
+	return 0;
+}
+
+static void uncore_pci_remove(struct pci_dev *pdev)
+{
+	struct intel_uncore_box *box = pci_get_drvdata(pdev);
+	struct intel_uncore_pmu *pmu = box->pmu;
+	int cpu, phys_id = pcibus_to_physid[pdev->bus->number];
+
+	if (WARN_ON_ONCE(phys_id != box->phys_id))
+		return;
+
+	raw_spin_lock(&uncore_box_lock);
+	list_del(&box->list);
+	raw_spin_unlock(&uncore_box_lock);
+
+	for_each_possible_cpu(cpu) {
+		if (*per_cpu_ptr(pmu->box, cpu) == box) {
+			*per_cpu_ptr(pmu->box, cpu) = NULL;
+			atomic_dec(&box->refcnt);
+		}
+	}
+
+	WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
+	kfree(box);
+}
+
+static int __devinit uncore_pci_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id)
+{
+	struct intel_uncore_type *type;
+
+	type = (struct intel_uncore_type *)id->driver_data;
+	return uncore_pci_add(type, pdev);
+}
+
+static int __init uncore_pci_init(void)
+{
+	int ret;
+
+	switch (boot_cpu_data.x86_model) {
+	case 45: /* Sandy Bridge-EP */
+		pci_uncores = snbep_pci_uncores;
+		uncore_pci_driver = &snbep_uncore_pci_driver;
+		snbep_pci2phy_map_init();
+		break;
+	default:
+		return 0;
+	}
+
+	ret = uncore_types_init(pci_uncores);
+	if (ret)
+		return ret;
+
+	uncore_pci_driver->probe = uncore_pci_probe;
+	uncore_pci_driver->remove = uncore_pci_remove;
+
+	ret = pci_register_driver(uncore_pci_driver);
+	if (ret == 0)
+		pcidrv_registered = true;
+	else
+		uncore_types_exit(pci_uncores);
+
+	return ret;
+}
+
+static void __init uncore_pci_exit(void)
+{
+	if (pcidrv_registered) {
+		pcidrv_registered = false;
+		pci_unregister_driver(uncore_pci_driver);
+		uncore_types_exit(pci_uncores);
+	}
+}
+
+static void __cpuinit uncore_cpu_dying(int cpu)
+{
+	struct intel_uncore_type *type;
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	int i, j;
+
+	for (i = 0; msr_uncores[i]; i++) {
+		type = msr_uncores[i];
+		for (j = 0; j < type->num_boxes; j++) {
+			pmu = &type->pmus[j];
+			box = *per_cpu_ptr(pmu->box, cpu);
+			*per_cpu_ptr(pmu->box, cpu) = NULL;
+			if (box && atomic_dec_and_test(&box->refcnt))
+				kfree(box);
+		}
+	}
+}
+
+static int __cpuinit uncore_cpu_starting(int cpu)
+{
+	struct intel_uncore_type *type;
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box, *exist;
+	int i, j, k, phys_id;
+
+	phys_id = topology_physical_package_id(cpu);
+
+	for (i = 0; msr_uncores[i]; i++) {
+		type = msr_uncores[i];
+		for (j = 0; j < type->num_boxes; j++) {
+			pmu = &type->pmus[j];
+			box = *per_cpu_ptr(pmu->box, cpu);
+			/* called by uncore_cpu_init? */
+			if (box && box->phys_id >= 0) {
+				uncore_box_init(box);
+				continue;
+			}
+
+			for_each_online_cpu(k) {
+				exist = *per_cpu_ptr(pmu->box, k);
+				if (exist && exist->phys_id == phys_id) {
+					atomic_inc(&exist->refcnt);
+					*per_cpu_ptr(pmu->box, cpu) = exist;
+					kfree(box);
+					box = NULL;
+					break;
+				}
+			}
+
+			if (box) {
+				box->phys_id = phys_id;
+				uncore_box_init(box);
+			}
+		}
+	}
+	return 0;
+}
+
+static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
+{
+	struct intel_uncore_type *type;
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	int i, j;
+
+	for (i = 0; msr_uncores[i]; i++) {
+		type = msr_uncores[i];
+		for (j = 0; j < type->num_boxes; j++) {
+			pmu = &type->pmus[j];
+			if (pmu->func_id < 0)
+				pmu->func_id = j;
+
+			box = uncore_alloc_box(cpu);
+			if (!box)
+				return -ENOMEM;
+
+			box->pmu = pmu;
+			box->phys_id = phys_id;
+			*per_cpu_ptr(pmu->box, cpu) = box;
+		}
+	}
+	return 0;
+}
+
+static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores,
+					    int old_cpu, int new_cpu)
+{
+	struct intel_uncore_type *type;
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	int i, j;
+
+	for (i = 0; uncores[i]; i++) {
+		type = uncores[i];
+		for (j = 0; j < type->num_boxes; j++) {
+			pmu = &type->pmus[j];
+			if (old_cpu < 0)
+				box = uncore_pmu_to_box(pmu, new_cpu);
+			else
+				box = uncore_pmu_to_box(pmu, old_cpu);
+			if (!box)
+				continue;
+
+			if (old_cpu < 0) {
+				WARN_ON_ONCE(box->cpu != -1);
+				box->cpu = new_cpu;
+				continue;
+			}
+
+			WARN_ON_ONCE(box->cpu != old_cpu);
+			if (new_cpu >= 0) {
+				uncore_pmu_cancel_hrtimer(box);
+				perf_pmu_migrate_context(&pmu->pmu,
+						old_cpu, new_cpu);
+				box->cpu = new_cpu;
+			} else {
+				box->cpu = -1;
+			}
+		}
+	}
+}
+
+static void __cpuinit uncore_event_exit_cpu(int cpu)
+{
+	int i, phys_id, target;
+
+	/* if exiting cpu is used for collecting uncore events */
+	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
+		return;
+
+	/* find a new cpu to collect uncore events */
+	phys_id = topology_physical_package_id(cpu);
+	target = -1;
+	for_each_online_cpu(i) {
+		if (i == cpu)
+			continue;
+		if (phys_id == topology_physical_package_id(i)) {
+			target = i;
+			break;
+		}
+	}
+
+	/* migrate uncore events to the new cpu */
+	if (target >= 0)
+		cpumask_set_cpu(target, &uncore_cpu_mask);
+
+	uncore_change_context(msr_uncores, cpu, target);
+	uncore_change_context(pci_uncores, cpu, target);
+}
+
+static void __cpuinit uncore_event_init_cpu(int cpu)
+{
+	int i, phys_id;
+
+	phys_id = topology_physical_package_id(cpu);
+	for_each_cpu(i, &uncore_cpu_mask) {
+		if (phys_id == topology_physical_package_id(i))
+			return;
+	}
+
+	cpumask_set_cpu(cpu, &uncore_cpu_mask);
+
+	uncore_change_context(msr_uncores, -1, cpu);
+	uncore_change_context(pci_uncores, -1, cpu);
+}
+
+static int __cpuinit uncore_cpu_notifier(struct notifier_block *self,
+					 unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	/* allocate/free data structure for uncore box */
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		uncore_cpu_prepare(cpu, -1);
+		break;
+	case CPU_STARTING:
+		uncore_cpu_starting(cpu);
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_DYING:
+		uncore_cpu_dying(cpu);
+		break;
+	default:
+		break;
+	}
+
+	/* select the cpu that collects uncore events */
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DOWN_FAILED:
+	case CPU_STARTING:
+		uncore_event_init_cpu(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		uncore_event_exit_cpu(cpu);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block uncore_cpu_nb __cpuinitdata = {
+	.notifier_call = uncore_cpu_notifier,
+	/*
+	 * to migrate uncore events, our notifier should be executed
+	 * before perf core's notifier.
+	 */
+	.priority = CPU_PRI_PERF + 1,
+};
+
+static void __init uncore_cpu_setup(void *dummy)
+{
+	uncore_cpu_starting(smp_processor_id());
+}
+
+static int __init uncore_cpu_init(void)
+{
+	int ret, cpu;
+
+	switch (boot_cpu_data.x86_model) {
+	case 26: /* Nehalem */
+	case 30:
+	case 37: /* Westmere */
+	case 44:
+		msr_uncores = nhm_msr_uncores;
+		break;
+	case 42: /* Sandy Bridge */
+		msr_uncores = snb_msr_uncores;
+		break;
+	case 45: /* Sandy Birdge-EP */
+		msr_uncores = snbep_msr_uncores;
+		break;
+	default:
+		return 0;
+	}
+
+	ret = uncore_types_init(msr_uncores);
+	if (ret)
+		return ret;
+
+	get_online_cpus();
+
+	for_each_online_cpu(cpu) {
+		int i, phys_id = topology_physical_package_id(cpu);
+
+		for_each_cpu(i, &uncore_cpu_mask) {
+			if (phys_id == topology_physical_package_id(i)) {
+				phys_id = -1;
+				break;
+			}
+		}
+		if (phys_id < 0)
+			continue;
+
+		uncore_cpu_prepare(cpu, phys_id);
+		uncore_event_init_cpu(cpu);
+	}
+	on_each_cpu(uncore_cpu_setup, NULL, 1);
+
+	register_cpu_notifier(&uncore_cpu_nb);
+
+	put_online_cpus();
+
+	return 0;
+}
+
+static int __init uncore_pmus_register(void)
+{
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_type *type;
+	int i, j;
+
+	for (i = 0; msr_uncores[i]; i++) {
+		type = msr_uncores[i];
+		for (j = 0; j < type->num_boxes; j++) {
+			pmu = &type->pmus[j];
+			uncore_pmu_register(pmu);
+		}
+	}
+
+	for (i = 0; pci_uncores[i]; i++) {
+		type = pci_uncores[i];
+		for (j = 0; j < type->num_boxes; j++) {
+			pmu = &type->pmus[j];
+			uncore_pmu_register(pmu);
+		}
+	}
+
+	return 0;
+}
+
+static int __init intel_uncore_init(void)
+{
+	int ret;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return -ENODEV;
+
+	ret = uncore_pci_init();
+	if (ret)
+		goto fail;
+	ret = uncore_cpu_init();
+	if (ret) {
+		uncore_pci_exit();
+		goto fail;
+	}
+
+	uncore_pmus_register();
+	return 0;
+fail:
+	return ret;
+}
+device_initcall(intel_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
new file mode 100644
index 0000000..4d52db0
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -0,0 +1,406 @@
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/perf_event.h>
+#include "perf_event.h"
+
+#define UNCORE_PMU_NAME_LEN		32
+#define UNCORE_BOX_HASH_SIZE		8
+
+#define UNCORE_PMU_HRTIMER_INTERVAL	(60 * NSEC_PER_SEC)
+
+#define UNCORE_FIXED_EVENT		0xffff
+#define UNCORE_PMC_IDX_MAX_GENERIC	8
+#define UNCORE_PMC_IDX_FIXED		UNCORE_PMC_IDX_MAX_GENERIC
+#define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FIXED + 1)
+
+#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
+
+/* SNB event control */
+#define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
+#define SNB_UNC_CTL_UMASK_MASK			0x0000ff00
+#define SNB_UNC_CTL_EDGE_DET			(1 << 18)
+#define SNB_UNC_CTL_EN				(1 << 22)
+#define SNB_UNC_CTL_INVERT			(1 << 23)
+#define SNB_UNC_CTL_CMASK_MASK			0x1f000000
+#define NHM_UNC_CTL_CMASK_MASK			0xff000000
+#define NHM_UNC_FIXED_CTR_CTL_EN		(1 << 0)
+
+#define SNB_UNC_RAW_EVENT_MASK			(SNB_UNC_CTL_EV_SEL_MASK | \
+						 SNB_UNC_CTL_UMASK_MASK | \
+						 SNB_UNC_CTL_EDGE_DET | \
+						 SNB_UNC_CTL_INVERT | \
+						 SNB_UNC_CTL_CMASK_MASK)
+
+#define NHM_UNC_RAW_EVENT_MASK			(SNB_UNC_CTL_EV_SEL_MASK | \
+						 SNB_UNC_CTL_UMASK_MASK | \
+						 SNB_UNC_CTL_EDGE_DET | \
+						 SNB_UNC_CTL_INVERT | \
+						 NHM_UNC_CTL_CMASK_MASK)
+
+/* SNB global control register */
+#define SNB_UNC_PERF_GLOBAL_CTL                 0x391
+#define SNB_UNC_FIXED_CTR_CTRL                  0x394
+#define SNB_UNC_FIXED_CTR                       0x395
+
+/* SNB uncore global control */
+#define SNB_UNC_GLOBAL_CTL_CORE_ALL             ((1 << 4) - 1)
+#define SNB_UNC_GLOBAL_CTL_EN                   (1 << 29)
+
+/* SNB Cbo register */
+#define SNB_UNC_CBO_0_PERFEVTSEL0               0x700
+#define SNB_UNC_CBO_0_PER_CTR0                  0x706
+#define SNB_UNC_CBO_MSR_OFFSET                  0x10
+
+/* NHM global control register */
+#define NHM_UNC_PERF_GLOBAL_CTL                 0x391
+#define NHM_UNC_FIXED_CTR                       0x394
+#define NHM_UNC_FIXED_CTR_CTRL                  0x395
+
+/* NHM uncore global control */
+#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL            ((1ULL << 8) - 1)
+#define NHM_UNC_GLOBAL_CTL_EN_FC                (1ULL << 32)
+
+/* NHM uncore register */
+#define NHM_UNC_PERFEVTSEL0                     0x3c0
+#define NHM_UNC_UNCORE_PMC0                     0x3b0
+
+/* SNB-EP Box level control */
+#define SNBEP_PMON_BOX_CTL_RST_CTRL	(1 << 0)
+#define SNBEP_PMON_BOX_CTL_RST_CTRS	(1 << 1)
+#define SNBEP_PMON_BOX_CTL_FRZ		(1 << 8)
+#define SNBEP_PMON_BOX_CTL_FRZ_EN	(1 << 16)
+#define SNBEP_PMON_BOX_CTL_INT		(SNBEP_PMON_BOX_CTL_RST_CTRL | \
+					 SNBEP_PMON_BOX_CTL_RST_CTRS | \
+					 SNBEP_PMON_BOX_CTL_FRZ_EN)
+/* SNB-EP event control */
+#define SNBEP_PMON_CTL_EV_SEL_MASK	0x000000ff
+#define SNBEP_PMON_CTL_UMASK_MASK	0x0000ff00
+#define SNBEP_PMON_CTL_RST		(1 << 17)
+#define SNBEP_PMON_CTL_EDGE_DET		(1 << 18)
+#define SNBEP_PMON_CTL_EV_SEL_EXT	(1 << 21)	/* only for QPI */
+#define SNBEP_PMON_CTL_EN		(1 << 22)
+#define SNBEP_PMON_CTL_INVERT		(1 << 23)
+#define SNBEP_PMON_CTL_TRESH_MASK	0xff000000
+#define SNBEP_PMON_RAW_EVENT_MASK	(SNBEP_PMON_CTL_EV_SEL_MASK | \
+					 SNBEP_PMON_CTL_UMASK_MASK | \
+					 SNBEP_PMON_CTL_EDGE_DET | \
+					 SNBEP_PMON_CTL_INVERT | \
+					 SNBEP_PMON_CTL_TRESH_MASK)
+
+/* SNB-EP Ubox event control */
+#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK		0x1f000000
+#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK		\
+				(SNBEP_PMON_CTL_EV_SEL_MASK | \
+				 SNBEP_PMON_CTL_UMASK_MASK | \
+				 SNBEP_PMON_CTL_EDGE_DET | \
+				 SNBEP_PMON_CTL_INVERT | \
+				 SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
+
+/* SNB-EP PCU event control */
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK	0x0000c000
+#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK	0x1f000000
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT	(1 << 30)
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET	(1 << 31)
+#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK	\
+				(SNBEP_PMON_CTL_EV_SEL_MASK | \
+				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+				 SNBEP_PMON_CTL_EDGE_DET | \
+				 SNBEP_PMON_CTL_INVERT | \
+				 SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
+				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+				 SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+
+/* SNB-EP pci control register */
+#define SNBEP_PCI_PMON_BOX_CTL			0xf4
+#define SNBEP_PCI_PMON_CTL0			0xd8
+/* SNB-EP pci counter register */
+#define SNBEP_PCI_PMON_CTR0			0xa0
+
+/* SNB-EP home agent register */
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0	0x40
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1	0x44
+#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH	0x48
+/* SNB-EP memory controller register */
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL		0xf0
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR		0xd0
+/* SNB-EP QPI register */
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0		0x228
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1		0x22c
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0		0x238
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1		0x23c
+
+/* SNB-EP Ubox register */
+#define SNBEP_U_MSR_PMON_CTR0			0xc16
+#define SNBEP_U_MSR_PMON_CTL0			0xc10
+
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL		0xc08
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR		0xc09
+
+/* SNB-EP Cbo register */
+#define SNBEP_C0_MSR_PMON_CTR0			0xd16
+#define SNBEP_C0_MSR_PMON_CTL0			0xd10
+#define SNBEP_C0_MSR_PMON_BOX_FILTER		0xd14
+#define SNBEP_C0_MSR_PMON_BOX_CTL		0xd04
+#define SNBEP_CBO_MSR_OFFSET			0x20
+
+/* SNB-EP PCU register */
+#define SNBEP_PCU_MSR_PMON_CTR0			0xc36
+#define SNBEP_PCU_MSR_PMON_CTL0			0xc30
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER		0xc34
+#define SNBEP_PCU_MSR_PMON_BOX_CTL		0xc24
+#define SNBEP_PCU_MSR_CORE_C3_CTR		0x3fc
+#define SNBEP_PCU_MSR_CORE_C6_CTR		0x3fd
+
+struct intel_uncore_ops;
+struct intel_uncore_pmu;
+struct intel_uncore_box;
+struct uncore_event_desc;
+
+struct intel_uncore_type {
+	const char *name;
+	int num_counters;
+	int num_boxes;
+	int perf_ctr_bits;
+	int fixed_ctr_bits;
+	int single_fixed;
+	unsigned perf_ctr;
+	unsigned event_ctl;
+	unsigned event_mask;
+	unsigned fixed_ctr;
+	unsigned fixed_ctl;
+	unsigned box_ctl;
+	unsigned msr_offset;
+	struct event_constraint unconstrainted;
+	struct event_constraint *constraints;
+	struct intel_uncore_pmu *pmus;
+	struct intel_uncore_ops *ops;
+	struct uncore_event_desc *event_descs;
+	const struct attribute_group *attr_groups[3];
+};
+
+#define format_group attr_groups[0]
+
+struct intel_uncore_ops {
+	void (*init_box)(struct intel_uncore_box *);
+	void (*disable_box)(struct intel_uncore_box *);
+	void (*enable_box)(struct intel_uncore_box *);
+	void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
+	void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
+	u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
+};
+
+struct intel_uncore_pmu {
+	struct pmu pmu;
+	char name[UNCORE_PMU_NAME_LEN];
+	int pmu_idx;
+	int func_id;
+	struct intel_uncore_type *type;
+	struct intel_uncore_box ** __percpu box;
+	struct list_head box_list;
+};
+
+struct intel_uncore_box {
+	int phys_id;
+	int n_active;	/* number of active events */
+	int n_events;
+	int cpu;	/* cpu to collect events */
+	unsigned long flags;
+	atomic_t refcnt;
+	struct perf_event *events[UNCORE_PMC_IDX_MAX];
+	struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
+	unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
+	u64 tags[UNCORE_PMC_IDX_MAX];
+	struct pci_dev *pci_dev;
+	struct intel_uncore_pmu *pmu;
+	struct hrtimer hrtimer;
+	struct list_head list;
+};
+
+#define UNCORE_BOX_FLAG_INITIATED	0
+
+struct uncore_event_desc {
+	struct kobj_attribute attr;
+	const char *config;
+};
+
+#define INTEL_UNCORE_EVENT_DESC(_name, _config)			\
+{								\
+	.attr	= __ATTR(_name, 0444, uncore_event_show, NULL),	\
+	.config	= _config,					\
+}
+
+#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)			\
+static ssize_t __uncore_##_var##_show(struct kobject *kobj,		\
+				struct kobj_attribute *attr,		\
+				char *page)				\
+{									\
+	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
+	return sprintf(page, _format "\n");				\
+}									\
+static struct kobj_attribute format_attr_##_var =			\
+	__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
+
+
+static ssize_t uncore_event_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	struct uncore_event_desc *event =
+		container_of(attr, struct uncore_event_desc, attr);
+	return sprintf(buf, "%s", event->config);
+}
+
+static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
+{
+	return box->pmu->type->box_ctl;
+}
+
+static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box)
+{
+	return box->pmu->type->fixed_ctl;
+}
+
+static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
+{
+	return box->pmu->type->fixed_ctr;
+}
+
+static inline
+unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
+{
+	return idx * 4 + box->pmu->type->event_ctl;
+}
+
+static inline
+unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+	return idx * 8 + box->pmu->type->perf_ctr;
+}
+
+static inline
+unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
+{
+	if (!box->pmu->type->box_ctl)
+		return 0;
+	return box->pmu->type->box_ctl +
+		box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
+{
+	if (!box->pmu->type->fixed_ctl)
+		return 0;
+	return box->pmu->type->fixed_ctl +
+		box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
+{
+	return box->pmu->type->fixed_ctr +
+		box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
+{
+	return idx + box->pmu->type->event_ctl +
+		box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+	return idx + box->pmu->type->perf_ctr +
+		box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
+{
+	if (box->pci_dev)
+		return uncore_pci_fixed_ctl(box);
+	else
+		return uncore_msr_fixed_ctl(box);
+}
+
+static inline
+unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
+{
+	if (box->pci_dev)
+		return uncore_pci_fixed_ctr(box);
+	else
+		return uncore_msr_fixed_ctr(box);
+}
+
+static inline
+unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
+{
+	if (box->pci_dev)
+		return uncore_pci_event_ctl(box, idx);
+	else
+		return uncore_msr_event_ctl(box, idx);
+}
+
+static inline
+unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+	if (box->pci_dev)
+		return uncore_pci_perf_ctr(box, idx);
+	else
+		return uncore_msr_perf_ctr(box, idx);
+}
+
+static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
+{
+	return box->pmu->type->perf_ctr_bits;
+}
+
+static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
+{
+	return box->pmu->type->fixed_ctr_bits;
+}
+
+static inline int uncore_num_counters(struct intel_uncore_box *box)
+{
+	return box->pmu->type->num_counters;
+}
+
+static inline void uncore_disable_box(struct intel_uncore_box *box)
+{
+	if (box->pmu->type->ops->disable_box)
+		box->pmu->type->ops->disable_box(box);
+}
+
+static inline void uncore_enable_box(struct intel_uncore_box *box)
+{
+	if (box->pmu->type->ops->enable_box)
+		box->pmu->type->ops->enable_box(box);
+}
+
+static inline void uncore_disable_event(struct intel_uncore_box *box,
+				struct perf_event *event)
+{
+	box->pmu->type->ops->disable_event(box, event);
+}
+
+static inline void uncore_enable_event(struct intel_uncore_box *box,
+				struct perf_event *event)
+{
+	box->pmu->type->ops->enable_event(box, event);
+}
+
+static inline u64 uncore_read_counter(struct intel_uncore_box *box,
+				struct perf_event *event)
+{
+	return box->pmu->type->ops->read_counter(box, event);
+}
+
+static inline void uncore_box_init(struct intel_uncore_box *box)
+{
+	if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+		if (box->pmu->type->ops->init_box)
+			box->pmu->type->ops->init_box(box);
+	}
+}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index dc4e910..36fd420 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -409,9 +409,10 @@
  * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
  * @mm: the probed address space.
  * @arch_uprobe: the probepoint information.
+ * @addr: virtual address at which to install the probepoint
  * Return 0 on success or a -ve number on error.
  */
-int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm)
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
 {
 	int ret;
 	struct insn insn;
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 176a939..1aff183 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -207,6 +207,9 @@
 	 *   bit 1:		enabled
 	 *   bit 2:		filter_active
 	 *   bit 3:		enabled cmd record
+	 *   bit 4:		allow trace by non root (cap any)
+	 *   bit 5:		failed to apply filter
+	 *   bit 6:		ftrace internal event (do not enable)
 	 *
 	 * Changes to flags must hold the event_mutex.
 	 *
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ab741b0..5f18702 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2755,6 +2755,17 @@
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB7	0x3c27
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB8	0x3c2e
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB9	0x3c2f
+#define PCI_DEVICE_ID_INTEL_UNC_HA	0x3c46
+#define PCI_DEVICE_ID_INTEL_UNC_IMC0	0x3cb0
+#define PCI_DEVICE_ID_INTEL_UNC_IMC1	0x3cb1
+#define PCI_DEVICE_ID_INTEL_UNC_IMC2	0x3cb4
+#define PCI_DEVICE_ID_INTEL_UNC_IMC3	0x3cb5
+#define PCI_DEVICE_ID_INTEL_UNC_QPI0	0x3c41
+#define PCI_DEVICE_ID_INTEL_UNC_QPI1	0x3c42
+#define PCI_DEVICE_ID_INTEL_UNC_R2PCIE	0x3c43
+#define PCI_DEVICE_ID_INTEL_UNC_R3QPI0	0x3c44
+#define PCI_DEVICE_ID_INTEL_UNC_R3QPI1	0x3c45
+#define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX	0x3ce0
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 45db49f..76c5c8b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -677,6 +677,7 @@
 			u64		last_tag;
 			unsigned long	config_base;
 			unsigned long	event_base;
+			int		event_base_rdpmc;
 			int		idx;
 			int		last_cpu;
 
@@ -1106,6 +1107,8 @@
 				struct task_struct *task,
 				perf_overflow_handler_t callback,
 				void *context);
+extern void perf_pmu_migrate_context(struct pmu *pmu,
+				int src_cpu, int dst_cpu);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4059c0f..c7cfa69 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1581,7 +1581,6 @@
 #endif
 #ifdef CONFIG_UPROBES
 	struct uprobe_task *utask;
-	int uprobe_srcu_id;
 #endif
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d7d71d6..f1cf0ed 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1645,6 +1645,8 @@
 	lockdep_assert_held(&ctx->mutex);
 
 	event->ctx = ctx;
+	if (event->cpu != -1)
+		event->cpu = cpu;
 
 	if (!task) {
 		/*
@@ -6252,6 +6254,8 @@
 		}
 	}
 
+	get_online_cpus();
+
 	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
 				 NULL, NULL);
 	if (IS_ERR(event)) {
@@ -6304,7 +6308,7 @@
 	/*
 	 * Get the target context (task or percpu):
 	 */
-	ctx = find_get_context(pmu, task, cpu);
+	ctx = find_get_context(pmu, task, event->cpu);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto err_alloc;
@@ -6377,20 +6381,23 @@
 	mutex_lock(&ctx->mutex);
 
 	if (move_group) {
-		perf_install_in_context(ctx, group_leader, cpu);
+		synchronize_rcu();
+		perf_install_in_context(ctx, group_leader, event->cpu);
 		get_ctx(ctx);
 		list_for_each_entry(sibling, &group_leader->sibling_list,
 				    group_entry) {
-			perf_install_in_context(ctx, sibling, cpu);
+			perf_install_in_context(ctx, sibling, event->cpu);
 			get_ctx(ctx);
 		}
 	}
 
-	perf_install_in_context(ctx, event, cpu);
+	perf_install_in_context(ctx, event, event->cpu);
 	++ctx->generation;
 	perf_unpin_context(ctx);
 	mutex_unlock(&ctx->mutex);
 
+	put_online_cpus();
+
 	event->owner = current;
 
 	mutex_lock(&current->perf_event_mutex);
@@ -6419,6 +6426,7 @@
 err_alloc:
 	free_event(event);
 err_task:
+	put_online_cpus();
 	if (task)
 		put_task_struct(task);
 err_group_fd:
@@ -6479,6 +6487,39 @@
 }
 EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
 
+void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
+{
+	struct perf_event_context *src_ctx;
+	struct perf_event_context *dst_ctx;
+	struct perf_event *event, *tmp;
+	LIST_HEAD(events);
+
+	src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
+	dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
+
+	mutex_lock(&src_ctx->mutex);
+	list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
+				 event_entry) {
+		perf_remove_from_context(event);
+		put_ctx(src_ctx);
+		list_add(&event->event_entry, &events);
+	}
+	mutex_unlock(&src_ctx->mutex);
+
+	synchronize_rcu();
+
+	mutex_lock(&dst_ctx->mutex);
+	list_for_each_entry_safe(event, tmp, &events, event_entry) {
+		list_del(&event->event_entry);
+		if (event->state >= PERF_EVENT_STATE_OFF)
+			event->state = PERF_EVENT_STATE_INACTIVE;
+		perf_install_in_context(dst_ctx, event, dst_cpu);
+		get_ctx(dst_ctx);
+	}
+	mutex_unlock(&dst_ctx->mutex);
+}
+EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
+
 static void sync_child_event(struct perf_event *child_event,
 			       struct task_struct *child)
 {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 985be4d..f935327 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -38,13 +38,29 @@
 #define UINSNS_PER_PAGE			(PAGE_SIZE/UPROBE_XOL_SLOT_BYTES)
 #define MAX_UPROBE_XOL_SLOTS		UINSNS_PER_PAGE
 
-static struct srcu_struct uprobes_srcu;
 static struct rb_root uprobes_tree = RB_ROOT;
 
 static DEFINE_SPINLOCK(uprobes_treelock);	/* serialize rbtree access */
 
 #define UPROBES_HASH_SZ	13
 
+/*
+ * We need separate register/unregister and mmap/munmap lock hashes because
+ * of mmap_sem nesting.
+ *
+ * uprobe_register() needs to install probes on (potentially) all processes
+ * and thus needs to acquire multiple mmap_sems (consequtively, not
+ * concurrently), whereas uprobe_mmap() is called while holding mmap_sem
+ * for the particular process doing the mmap.
+ *
+ * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem
+ * because of lock order against i_mmap_mutex. This means there's a hole in
+ * the register vma iteration where a mmap() can happen.
+ *
+ * Thus uprobe_register() can race with uprobe_mmap() and we can try and
+ * install a probe where one is already installed.
+ */
+
 /* serialize (un)register */
 static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
 
@@ -61,17 +77,6 @@
  */
 static atomic_t uprobe_events = ATOMIC_INIT(0);
 
-/*
- * Maintain a temporary per vma info that can be used to search if a vma
- * has already been handled. This structure is introduced since extending
- * vm_area_struct wasnt recommended.
- */
-struct vma_info {
-	struct list_head	probe_list;
-	struct mm_struct	*mm;
-	loff_t			vaddr;
-};
-
 struct uprobe {
 	struct rb_node		rb_node;	/* node in the rb tree */
 	atomic_t		ref;
@@ -100,7 +105,8 @@
 	if (!is_register)
 		return true;
 
-	if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC))
+	if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED))
+				== (VM_READ|VM_EXEC))
 		return true;
 
 	return false;
@@ -129,33 +135,17 @@
 static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *ptep;
-	spinlock_t *ptl;
 	unsigned long addr;
-	int err = -EFAULT;
+	spinlock_t *ptl;
+	pte_t *ptep;
 
 	addr = page_address_in_vma(page, vma);
 	if (addr == -EFAULT)
-		goto out;
+		return -EFAULT;
 
-	pgd = pgd_offset(mm, addr);
-	if (!pgd_present(*pgd))
-		goto out;
-
-	pud = pud_offset(pgd, addr);
-	if (!pud_present(*pud))
-		goto out;
-
-	pmd = pmd_offset(pud, addr);
-	if (!pmd_present(*pmd))
-		goto out;
-
-	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	ptep = page_check_address(page, mm, addr, &ptl, 0);
 	if (!ptep)
-		goto out;
+		return -EAGAIN;
 
 	get_page(kpage);
 	page_add_new_anon_rmap(kpage, vma, addr);
@@ -174,10 +164,8 @@
 		try_to_free_swap(page);
 	put_page(page);
 	pte_unmap_unlock(ptep, ptl);
-	err = 0;
 
-out:
-	return err;
+	return 0;
 }
 
 /**
@@ -222,9 +210,8 @@
 	void *vaddr_old, *vaddr_new;
 	struct vm_area_struct *vma;
 	struct uprobe *uprobe;
-	loff_t addr;
 	int ret;
-
+retry:
 	/* Read the page with vaddr into memory */
 	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
 	if (ret <= 0)
@@ -246,10 +233,6 @@
 	if (mapping != vma->vm_file->f_mapping)
 		goto put_out;
 
-	addr = vma_address(vma, uprobe->offset);
-	if (vaddr != (unsigned long)addr)
-		goto put_out;
-
 	ret = -ENOMEM;
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
 	if (!new_page)
@@ -267,11 +250,7 @@
 	vaddr_new = kmap_atomic(new_page);
 
 	memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
-
-	/* poke the new insn in, ASSUMES we don't cross page boundary */
-	vaddr &= ~PAGE_MASK;
-	BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
-	memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
+	memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE);
 
 	kunmap_atomic(vaddr_new);
 	kunmap_atomic(vaddr_old);
@@ -291,6 +270,8 @@
 put_out:
 	put_page(old_page);
 
+	if (unlikely(ret == -EAGAIN))
+		goto retry;
 	return ret;
 }
 
@@ -312,7 +293,7 @@
 	void *vaddr_new;
 	int ret;
 
-	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL);
+	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
 	if (ret <= 0)
 		return ret;
 
@@ -333,10 +314,20 @@
 	uprobe_opcode_t opcode;
 	int result;
 
+	if (current->mm == mm) {
+		pagefault_disable();
+		result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr,
+								sizeof(opcode));
+		pagefault_enable();
+
+		if (likely(result == 0))
+			goto out;
+	}
+
 	result = read_opcode(mm, vaddr, &opcode);
 	if (result)
 		return result;
-
+out:
 	if (is_swbp_insn(&opcode))
 		return 1;
 
@@ -355,7 +346,9 @@
 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
 	int result;
-
+	/*
+	 * See the comment near uprobes_hash().
+	 */
 	result = is_swbp_at_addr(mm, vaddr);
 	if (result == 1)
 		return -EEXIST;
@@ -520,7 +513,6 @@
 	uprobe->inode = igrab(inode);
 	uprobe->offset = offset;
 	init_rwsem(&uprobe->consumer_rwsem);
-	INIT_LIST_HEAD(&uprobe->pending_list);
 
 	/* add to uprobes_tree, sorted on inode:offset */
 	cur_uprobe = insert_uprobe(uprobe);
@@ -588,20 +580,22 @@
 }
 
 static int
-__copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn,
-			unsigned long nbytes, unsigned long offset)
+__copy_insn(struct address_space *mapping, struct file *filp, char *insn,
+			unsigned long nbytes, loff_t offset)
 {
-	struct file *filp = vma->vm_file;
 	struct page *page;
 	void *vaddr;
-	unsigned long off1;
-	unsigned long idx;
+	unsigned long off;
+	pgoff_t idx;
 
 	if (!filp)
 		return -EINVAL;
 
-	idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT);
-	off1 = offset &= ~PAGE_MASK;
+	if (!mapping->a_ops->readpage)
+		return -EIO;
+
+	idx = offset >> PAGE_CACHE_SHIFT;
+	off = offset & ~PAGE_MASK;
 
 	/*
 	 * Ensure that the page that has the original instruction is
@@ -612,22 +606,20 @@
 		return PTR_ERR(page);
 
 	vaddr = kmap_atomic(page);
-	memcpy(insn, vaddr + off1, nbytes);
+	memcpy(insn, vaddr + off, nbytes);
 	kunmap_atomic(vaddr);
 	page_cache_release(page);
 
 	return 0;
 }
 
-static int
-copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
+static int copy_insn(struct uprobe *uprobe, struct file *filp)
 {
 	struct address_space *mapping;
 	unsigned long nbytes;
 	int bytes;
 
-	addr &= ~PAGE_MASK;
-	nbytes = PAGE_SIZE - addr;
+	nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK);
 	mapping = uprobe->inode->i_mapping;
 
 	/* Instruction at end of binary; copy only available bytes */
@@ -638,13 +630,13 @@
 
 	/* Instruction at the page-boundary; copy bytes in second page */
 	if (nbytes < bytes) {
-		if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes,
-				bytes - nbytes, uprobe->offset + nbytes))
-			return -ENOMEM;
-
+		int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes,
+				bytes - nbytes, uprobe->offset + nbytes);
+		if (err)
+			return err;
 		bytes = nbytes;
 	}
-	return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset);
+	return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);
 }
 
 /*
@@ -672,9 +664,8 @@
  */
 static int
 install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
-			struct vm_area_struct *vma, loff_t vaddr)
+			struct vm_area_struct *vma, unsigned long vaddr)
 {
-	unsigned long addr;
 	int ret;
 
 	/*
@@ -687,20 +678,22 @@
 	if (!uprobe->consumers)
 		return -EEXIST;
 
-	addr = (unsigned long)vaddr;
-
 	if (!(uprobe->flags & UPROBE_COPY_INSN)) {
-		ret = copy_insn(uprobe, vma, addr);
+		ret = copy_insn(uprobe, vma->vm_file);
 		if (ret)
 			return ret;
 
 		if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
-			return -EEXIST;
+			return -ENOTSUPP;
 
-		ret = arch_uprobe_analyze_insn(&uprobe->arch, mm);
+		ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
 		if (ret)
 			return ret;
 
+		/* write_opcode() assumes we don't cross page boundary */
+		BUG_ON((uprobe->offset & ~PAGE_MASK) +
+				UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
+
 		uprobe->flags |= UPROBE_COPY_INSN;
 	}
 
@@ -713,7 +706,7 @@
 	 * Hence increment before and decrement on failure.
 	 */
 	atomic_inc(&mm->uprobes_state.count);
-	ret = set_swbp(&uprobe->arch, mm, addr);
+	ret = set_swbp(&uprobe->arch, mm, vaddr);
 	if (ret)
 		atomic_dec(&mm->uprobes_state.count);
 
@@ -721,27 +714,21 @@
 }
 
 static void
-remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr)
+remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true))
+	if (!set_orig_insn(&uprobe->arch, mm, vaddr, true))
 		atomic_dec(&mm->uprobes_state.count);
 }
 
 /*
- * There could be threads that have hit the breakpoint and are entering the
- * notifier code and trying to acquire the uprobes_treelock. The thread
- * calling delete_uprobe() that is removing the uprobe from the rb_tree can
- * race with these threads and might acquire the uprobes_treelock compared
- * to some of the breakpoint hit threads. In such a case, the breakpoint
- * hit threads will not find the uprobe. The current unregistering thread
- * waits till all other threads have hit a breakpoint, to acquire the
- * uprobes_treelock before the uprobe is removed from the rbtree.
+ * There could be threads that have already hit the breakpoint. They
+ * will recheck the current insn and restart if find_uprobe() fails.
+ * See find_active_uprobe().
  */
 static void delete_uprobe(struct uprobe *uprobe)
 {
 	unsigned long flags;
 
-	synchronize_srcu(&uprobes_srcu);
 	spin_lock_irqsave(&uprobes_treelock, flags);
 	rb_erase(&uprobe->rb_node, &uprobes_tree);
 	spin_unlock_irqrestore(&uprobes_treelock, flags);
@@ -750,139 +737,135 @@
 	atomic_dec(&uprobe_events);
 }
 
-static struct vma_info *
-__find_next_vma_info(struct address_space *mapping, struct list_head *head,
-			struct vma_info *vi, loff_t offset, bool is_register)
+struct map_info {
+	struct map_info *next;
+	struct mm_struct *mm;
+	unsigned long vaddr;
+};
+
+static inline struct map_info *free_map_info(struct map_info *info)
 {
+	struct map_info *next = info->next;
+	kfree(info);
+	return next;
+}
+
+static struct map_info *
+build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
+{
+	unsigned long pgoff = offset >> PAGE_SHIFT;
 	struct prio_tree_iter iter;
 	struct vm_area_struct *vma;
-	struct vma_info *tmpvi;
-	unsigned long pgoff;
-	int existing_vma;
-	loff_t vaddr;
+	struct map_info *curr = NULL;
+	struct map_info *prev = NULL;
+	struct map_info *info;
+	int more = 0;
 
-	pgoff = offset >> PAGE_SHIFT;
-
+ again:
+	mutex_lock(&mapping->i_mmap_mutex);
 	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
 		if (!valid_vma(vma, is_register))
 			continue;
 
-		existing_vma = 0;
-		vaddr = vma_address(vma, offset);
-
-		list_for_each_entry(tmpvi, head, probe_list) {
-			if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) {
-				existing_vma = 1;
-				break;
-			}
+		if (!prev && !more) {
+			/*
+			 * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through
+			 * reclaim. This is optimistic, no harm done if it fails.
+			 */
+			prev = kmalloc(sizeof(struct map_info),
+					GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN);
+			if (prev)
+				prev->next = NULL;
+		}
+		if (!prev) {
+			more++;
+			continue;
 		}
 
-		/*
-		 * Another vma needs a probe to be installed. However skip
-		 * installing the probe if the vma is about to be unlinked.
-		 */
-		if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) {
-			vi->mm = vma->vm_mm;
-			vi->vaddr = vaddr;
-			list_add(&vi->probe_list, head);
+		if (!atomic_inc_not_zero(&vma->vm_mm->mm_users))
+			continue;
 
-			return vi;
-		}
+		info = prev;
+		prev = prev->next;
+		info->next = curr;
+		curr = info;
+
+		info->mm = vma->vm_mm;
+		info->vaddr = vma_address(vma, offset);
 	}
-
-	return NULL;
-}
-
-/*
- * Iterate in the rmap prio tree  and find a vma where a probe has not
- * yet been inserted.
- */
-static struct vma_info *
-find_next_vma_info(struct address_space *mapping, struct list_head *head,
-		loff_t offset, bool is_register)
-{
-	struct vma_info *vi, *retvi;
-
-	vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL);
-	if (!vi)
-		return ERR_PTR(-ENOMEM);
-
-	mutex_lock(&mapping->i_mmap_mutex);
-	retvi = __find_next_vma_info(mapping, head, vi, offset, is_register);
 	mutex_unlock(&mapping->i_mmap_mutex);
 
-	if (!retvi)
-		kfree(vi);
+	if (!more)
+		goto out;
 
-	return retvi;
+	prev = curr;
+	while (curr) {
+		mmput(curr->mm);
+		curr = curr->next;
+	}
+
+	do {
+		info = kmalloc(sizeof(struct map_info), GFP_KERNEL);
+		if (!info) {
+			curr = ERR_PTR(-ENOMEM);
+			goto out;
+		}
+		info->next = prev;
+		prev = info;
+	} while (--more);
+
+	goto again;
+ out:
+	while (prev)
+		prev = free_map_info(prev);
+	return curr;
 }
 
 static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 {
-	struct list_head try_list;
-	struct vm_area_struct *vma;
-	struct address_space *mapping;
-	struct vma_info *vi, *tmpvi;
-	struct mm_struct *mm;
-	loff_t vaddr;
-	int ret;
+	struct map_info *info;
+	int err = 0;
 
-	mapping = uprobe->inode->i_mapping;
-	INIT_LIST_HEAD(&try_list);
+	info = build_map_info(uprobe->inode->i_mapping,
+					uprobe->offset, is_register);
+	if (IS_ERR(info))
+		return PTR_ERR(info);
 
-	ret = 0;
+	while (info) {
+		struct mm_struct *mm = info->mm;
+		struct vm_area_struct *vma;
 
-	for (;;) {
-		vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register);
-		if (!vi)
-			break;
+		if (err)
+			goto free;
 
-		if (IS_ERR(vi)) {
-			ret = PTR_ERR(vi);
-			break;
-		}
+		down_write(&mm->mmap_sem);
+		vma = find_vma(mm, (unsigned long)info->vaddr);
+		if (!vma || !valid_vma(vma, is_register))
+			goto unlock;
 
-		mm = vi->mm;
-		down_read(&mm->mmap_sem);
-		vma = find_vma(mm, (unsigned long)vi->vaddr);
-		if (!vma || !valid_vma(vma, is_register)) {
-			list_del(&vi->probe_list);
-			kfree(vi);
-			up_read(&mm->mmap_sem);
-			mmput(mm);
-			continue;
-		}
-		vaddr = vma_address(vma, uprobe->offset);
 		if (vma->vm_file->f_mapping->host != uprobe->inode ||
-						vaddr != vi->vaddr) {
-			list_del(&vi->probe_list);
-			kfree(vi);
-			up_read(&mm->mmap_sem);
-			mmput(mm);
-			continue;
-		}
+		    vma_address(vma, uprobe->offset) != info->vaddr)
+			goto unlock;
 
-		if (is_register)
-			ret = install_breakpoint(uprobe, mm, vma, vi->vaddr);
-		else
-			remove_breakpoint(uprobe, mm, vi->vaddr);
-
-		up_read(&mm->mmap_sem);
-		mmput(mm);
 		if (is_register) {
-			if (ret && ret == -EEXIST)
-				ret = 0;
-			if (ret)
-				break;
+			err = install_breakpoint(uprobe, mm, vma, info->vaddr);
+			/*
+			 * We can race against uprobe_mmap(), see the
+			 * comment near uprobe_hash().
+			 */
+			if (err == -EEXIST)
+				err = 0;
+		} else {
+			remove_breakpoint(uprobe, mm, info->vaddr);
 		}
+ unlock:
+		up_write(&mm->mmap_sem);
+ free:
+		mmput(mm);
+		info = free_map_info(info);
 	}
 
-	list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) {
-		list_del(&vi->probe_list);
-		kfree(vi);
-	}
-
-	return ret;
+	return err;
 }
 
 static int __uprobe_register(struct uprobe *uprobe)
@@ -1048,7 +1031,7 @@
 int uprobe_mmap(struct vm_area_struct *vma)
 {
 	struct list_head tmp_list;
-	struct uprobe *uprobe, *u;
+	struct uprobe *uprobe;
 	struct inode *inode;
 	int ret, count;
 
@@ -1066,12 +1049,9 @@
 	ret = 0;
 	count = 0;
 
-	list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
-		loff_t vaddr;
-
-		list_del(&uprobe->pending_list);
+	list_for_each_entry(uprobe, &tmp_list, pending_list) {
 		if (!ret) {
-			vaddr = vma_address(vma, uprobe->offset);
+			loff_t vaddr = vma_address(vma, uprobe->offset);
 
 			if (vaddr < vma->vm_start || vaddr >= vma->vm_end) {
 				put_uprobe(uprobe);
@@ -1079,8 +1059,10 @@
 			}
 
 			ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
-
-			/* Ignore double add: */
+			/*
+			 * We can race against uprobe_register(), see the
+			 * comment near uprobe_hash().
+			 */
 			if (ret == -EEXIST) {
 				ret = 0;
 
@@ -1115,7 +1097,7 @@
 void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
 	struct list_head tmp_list;
-	struct uprobe *uprobe, *u;
+	struct uprobe *uprobe;
 	struct inode *inode;
 
 	if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
@@ -1132,11 +1114,8 @@
 	mutex_lock(uprobes_mmap_hash(inode));
 	build_probe_list(inode, &tmp_list);
 
-	list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
-		loff_t vaddr;
-
-		list_del(&uprobe->pending_list);
-		vaddr = vma_address(vma, uprobe->offset);
+	list_for_each_entry(uprobe, &tmp_list, pending_list) {
+		loff_t vaddr = vma_address(vma, uprobe->offset);
 
 		if (vaddr >= start && vaddr < end) {
 			/*
@@ -1378,9 +1357,6 @@
 {
 	struct uprobe_task *utask = t->utask;
 
-	if (t->uprobe_srcu_id != -1)
-		srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id);
-
 	if (!utask)
 		return;
 
@@ -1398,7 +1374,6 @@
 void uprobe_copy_process(struct task_struct *t)
 {
 	t->utask = NULL;
-	t->uprobe_srcu_id = -1;
 }
 
 /*
@@ -1417,7 +1392,6 @@
 	if (unlikely(!utask))
 		return NULL;
 
-	utask->active_uprobe = NULL;
 	current->utask = utask;
 	return utask;
 }
@@ -1479,41 +1453,64 @@
 	return false;
 }
 
+static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
+{
+	struct mm_struct *mm = current->mm;
+	struct uprobe *uprobe = NULL;
+	struct vm_area_struct *vma;
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, bp_vaddr);
+	if (vma && vma->vm_start <= bp_vaddr) {
+		if (valid_vma(vma, false)) {
+			struct inode *inode;
+			loff_t offset;
+
+			inode = vma->vm_file->f_mapping->host;
+			offset = bp_vaddr - vma->vm_start;
+			offset += (vma->vm_pgoff << PAGE_SHIFT);
+			uprobe = find_uprobe(inode, offset);
+		}
+
+		if (!uprobe)
+			*is_swbp = is_swbp_at_addr(mm, bp_vaddr);
+	} else {
+		*is_swbp = -EFAULT;
+	}
+	up_read(&mm->mmap_sem);
+
+	return uprobe;
+}
+
 /*
  * Run handler and ask thread to singlestep.
  * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
  */
 static void handle_swbp(struct pt_regs *regs)
 {
-	struct vm_area_struct *vma;
 	struct uprobe_task *utask;
 	struct uprobe *uprobe;
-	struct mm_struct *mm;
 	unsigned long bp_vaddr;
+	int uninitialized_var(is_swbp);
 
-	uprobe = NULL;
 	bp_vaddr = uprobe_get_swbp_addr(regs);
-	mm = current->mm;
-	down_read(&mm->mmap_sem);
-	vma = find_vma(mm, bp_vaddr);
-
-	if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) {
-		struct inode *inode;
-		loff_t offset;
-
-		inode = vma->vm_file->f_mapping->host;
-		offset = bp_vaddr - vma->vm_start;
-		offset += (vma->vm_pgoff << PAGE_SHIFT);
-		uprobe = find_uprobe(inode, offset);
-	}
-
-	srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id);
-	current->uprobe_srcu_id = -1;
-	up_read(&mm->mmap_sem);
+	uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
 
 	if (!uprobe) {
-		/* No matching uprobe; signal SIGTRAP. */
-		send_sig(SIGTRAP, current, 0);
+		if (is_swbp > 0) {
+			/* No matching uprobe; signal SIGTRAP. */
+			send_sig(SIGTRAP, current, 0);
+		} else {
+			/*
+			 * Either we raced with uprobe_unregister() or we can't
+			 * access this memory. The latter is only possible if
+			 * another thread plays with our ->mm. In both cases
+			 * we can simply restart. If this vma was unmapped we
+			 * can pretend this insn was not executed yet and get
+			 * the (correct) SIGSEGV after restart.
+			 */
+			instruction_pointer_set(regs, bp_vaddr);
+		}
 		return;
 	}
 
@@ -1620,7 +1617,6 @@
 		utask->state = UTASK_BP_HIT;
 
 	set_thread_flag(TIF_UPROBE);
-	current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu);
 
 	return 1;
 }
@@ -1655,7 +1651,6 @@
 		mutex_init(&uprobes_mutex[i]);
 		mutex_init(&uprobes_mmap_mutex[i]);
 	}
-	init_srcu_struct(&uprobes_srcu);
 
 	return register_die_notifier(&uprobe_exception_nb);
 }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a008663..b4f20fb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -312,7 +312,7 @@
 
 static int __register_ftrace_function(struct ftrace_ops *ops)
 {
-	if (ftrace_disabled)
+	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
 	if (FTRACE_WARN_ON(ops == &global_ops))
@@ -4299,16 +4299,12 @@
 
 	mutex_lock(&ftrace_lock);
 
-	if (unlikely(ftrace_disabled))
-		goto out_unlock;
-
 	ret = __register_ftrace_function(ops);
 	if (!ret)
 		ret = ftrace_startup(ops, 0);
 
-
- out_unlock:
 	mutex_unlock(&ftrace_lock);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(register_ftrace_function);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index df611a0..123b189 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1325,4 +1325,4 @@
 
 	return 0;
 }
-device_initcall(init_events);
+early_initcall(init_events);
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index a3dbadb..f3c716a 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -12,7 +12,7 @@
 
 DESCRIPTION
 -----------
-This 'perf bench' command is general framework for benchmark suites.
+This 'perf bench' command is a general framework for benchmark suites.
 
 COMMON OPTIONS
 --------------
@@ -45,14 +45,20 @@
 'sched'::
 	Scheduler and IPC mechanisms.
 
+'mem'::
+	Memory access performance.
+
+'all'::
+	All benchmark subsystems.
+
 SUITES FOR 'sched'
 ~~~~~~~~~~~~~~~~~~
 *messaging*::
 Suite for evaluating performance of scheduler and IPC mechanisms.
 Based on hackbench by Rusty Russell.
 
-Options of *pipe*
-^^^^^^^^^^^^^^^^^
+Options of *messaging*
+^^^^^^^^^^^^^^^^^^^^^^
 -p::
 --pipe::
 Use pipe() instead of socketpair()
@@ -115,6 +121,72 @@
                 59004 ops/sec
 ---------------------
 
+SUITES FOR 'mem'
+~~~~~~~~~~~~~~~~
+*memcpy*::
+Suite for evaluating performance of simple memory copy in various ways.
+
+Options of *memcpy*
+^^^^^^^^^^^^^^^^^^^
+-l::
+--length::
+Specify length of memory to copy (default: 1MB).
+Available units are B, KB, MB, GB and TB (case insensitive).
+
+-r::
+--routine::
+Specify routine to copy (default: default).
+Available routines are depend on the architecture.
+On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported.
+
+-i::
+--iterations::
+Repeat memcpy invocation this number of times.
+
+-c::
+--clock::
+Use perf's cpu-cycles event instead of gettimeofday syscall.
+
+-o::
+--only-prefault::
+Show only the result with page faults before memcpy.
+
+-n::
+--no-prefault::
+Show only the result without page faults before memcpy.
+
+*memset*::
+Suite for evaluating performance of simple memory set in various ways.
+
+Options of *memset*
+^^^^^^^^^^^^^^^^^^^
+-l::
+--length::
+Specify length of memory to set (default: 1MB).
+Available units are B, KB, MB, GB and TB (case insensitive).
+
+-r::
+--routine::
+Specify routine to set (default: default).
+Available routines are depend on the architecture.
+On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported.
+
+-i::
+--iterations::
+Repeat memset invocation this number of times.
+
+-c::
+--clock::
+Use perf's cpu-cycles event instead of gettimeofday syscall.
+
+-o::
+--only-prefault::
+Show only the result with page faults before memset.
+
+-n::
+--no-prefault::
+Show only the result without page faults before memset.
+
 SEE ALSO
 --------
 linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 2d89f02..495210a 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -57,7 +57,7 @@
 
 -s::
 --sort=::
-	Sort by key(s): pid, comm, dso, symbol, parent.
+	Sort by key(s): pid, comm, dso, symbol, parent, srcline.
 
 -p::
 --parent=<regex>::
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 4a5680c..5b80d84 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -112,7 +112,7 @@
 
 -s::
 --sort::
-	Sort by key(s): pid, comm, dso, symbol, parent
+	Sort by key(s): pid, comm, dso, symbol, parent, srcline.
 
 -n::
 --show-nr-samples::
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 0eee64c..75d74e5 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -155,7 +155,7 @@
 
 ### --- END CONFIGURATION SECTION ---
 
-BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)/util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
 BASIC_LDFLAGS =
 
 # Guard against environment variables
@@ -503,6 +503,7 @@
 		LIB_OBJS += $(OUTPUT)ui/progress.o
 		LIB_OBJS += $(OUTPUT)ui/util.o
 		LIB_OBJS += $(OUTPUT)ui/tui/setup.o
+		LIB_OBJS += $(OUTPUT)ui/tui/util.o
 		LIB_H += ui/browser.h
 		LIB_H += ui/browsers/map.h
 		LIB_H += ui/helpline.h
@@ -522,13 +523,18 @@
 		msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);
 		BASIC_CFLAGS += -DNO_GTK2_SUPPORT
 	else
+		ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2)),y)
+			BASIC_CFLAGS += -DHAVE_GTK_INFO_BAR
+		endif
 		BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0)
 		EXTLIBS += $(shell pkg-config --libs gtk+-2.0)
 		LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
 		LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
+		LIB_OBJS += $(OUTPUT)ui/gtk/util.o
 		# Make sure that it'd be included only once.
 		ifneq ($(findstring -DNO_NEWT_SUPPORT,$(BASIC_CFLAGS)),)
 			LIB_OBJS += $(OUTPUT)ui/setup.o
+			LIB_OBJS += $(OUTPUT)ui/util.o
 		endif
 	endif
 endif
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 7155722..d990365 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -32,13 +32,13 @@
 static const struct option options[] = {
 	OPT_STRING('l', "length", &length_str, "1MB",
 		    "Specify length of memory to copy. "
-		    "available unit: B, MB, GB (upper and lower)"),
+		    "Available units: B, KB, MB, GB and TB (upper and lower)"),
 	OPT_STRING('r', "routine", &routine, "default",
 		    "Specify routine to copy"),
 	OPT_INTEGER('i', "iterations", &iterations,
 		    "repeat memcpy() invocation this number of times"),
 	OPT_BOOLEAN('c', "clock", &use_clock,
-		    "Use CPU clock for measuring"),
+		    "Use cycles event instead of gettimeofday() for measuring"),
 	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
 		    "Show only the result with page faults before memcpy()"),
 	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
index e907918..bf0d5f55 100644
--- a/tools/perf/bench/mem-memset.c
+++ b/tools/perf/bench/mem-memset.c
@@ -31,14 +31,14 @@
 
 static const struct option options[] = {
 	OPT_STRING('l', "length", &length_str, "1MB",
-		    "Specify length of memory to copy. "
-		    "available unit: B, MB, GB (upper and lower)"),
+		    "Specify length of memory to set. "
+		    "Available units: B, KB, MB, GB and TB (upper and lower)"),
 	OPT_STRING('r', "routine", &routine, "default",
-		    "Specify routine to copy"),
+		    "Specify routine to set"),
 	OPT_INTEGER('i', "iterations", &iterations,
 		    "repeat memset() invocation this number of times"),
 	OPT_BOOLEAN('c', "clock", &use_clock,
-		    "Use CPU clock for measuring"),
+		    "Use cycles event instead of gettimeofday() for measuring"),
 	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
 		    "Show only the result with page faults before memset()"),
 	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index b0e74ab..1f31002 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -33,7 +33,7 @@
 };
 						\
 /* sentinel: easy for help */
-#define suite_all { "all", "test all suite (pseudo suite)", NULL }
+#define suite_all { "all", "Test all benchmark suites", NULL }
 
 static struct bench_suite sched_suites[] = {
 	{ "messaging",
@@ -75,7 +75,7 @@
 	  "memory access performance",
 	  mem_suites },
 	{ "all",		/* sentinel: easy for help */
-	  "test all subsystem (pseudo subsystem)",
+	  "all benchmark subsystem",
 	  NULL },
 	{ NULL,
 	  NULL,
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index acd78dc..0dd5a05 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -60,7 +60,7 @@
 	list_for_each_entry(pos, &session->evlist->entries, node) {
 		bool first = true;
 
-		printf("%s", event_name(pos));
+		printf("%s", perf_evsel__name(pos));
 
 		if (details->verbose || details->freq) {
 			comma_printf(&first, " sample_freq=%" PRIu64,
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 547af48..ce35015 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -57,6 +57,11 @@
 
 #define PATH_SYS_NODE	"/sys/devices/system/node"
 
+struct perf_kmem {
+	struct perf_tool    tool;
+	struct perf_session *session;
+};
+
 static void init_cpunode_map(void)
 {
 	FILE *fp;
@@ -278,14 +283,16 @@
 	s_alloc->alloc_cpu = -1;
 }
 
-static void process_raw_event(union perf_event *raw_event __used, void *data,
+static void process_raw_event(struct perf_tool *tool,
+			      union perf_event *raw_event __used, void *data,
 			      int cpu, u64 timestamp, struct thread *thread)
 {
+	struct perf_kmem *kmem = container_of(tool, struct perf_kmem, tool);
 	struct event_format *event;
 	int type;
 
-	type = trace_parse_common_type(data);
-	event = trace_find_event(type);
+	type = trace_parse_common_type(kmem->session->pevent, data);
+	event = pevent_find_event(kmem->session->pevent, type);
 
 	if (!strcmp(event->name, "kmalloc") ||
 	    !strcmp(event->name, "kmem_cache_alloc")) {
@@ -306,7 +313,7 @@
 	}
 }
 
-static int process_sample_event(struct perf_tool *tool __used,
+static int process_sample_event(struct perf_tool *tool,
 				union perf_event *event,
 				struct perf_sample *sample,
 				struct perf_evsel *evsel __used,
@@ -322,16 +329,18 @@
 
 	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-	process_raw_event(event, sample->raw_data, sample->cpu,
+	process_raw_event(tool, event, sample->raw_data, sample->cpu,
 			  sample->time, thread);
 
 	return 0;
 }
 
-static struct perf_tool perf_kmem = {
-	.sample			= process_sample_event,
-	.comm			= perf_event__process_comm,
-	.ordered_samples	= true,
+static struct perf_kmem perf_kmem = {
+	.tool = {
+		.sample			= process_sample_event,
+		.comm			= perf_event__process_comm,
+		.ordered_samples	= true,
+	},
 };
 
 static double fragmentation(unsigned long n_req, unsigned long n_alloc)
@@ -486,11 +495,15 @@
 static int __cmd_kmem(void)
 {
 	int err = -EINVAL;
-	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
-							 0, false, &perf_kmem);
+	struct perf_session *session;
+
+	session = perf_session__new(input_name, O_RDONLY, 0, false,
+				    &perf_kmem.tool);
 	if (session == NULL)
 		return -ENOMEM;
 
+	perf_kmem.session = session;
+
 	if (perf_session__create_kernel_maps(session) < 0)
 		goto out_delete;
 
@@ -498,7 +511,7 @@
 		goto out_delete;
 
 	setup_pager();
-	err = perf_session__process_events(session, &perf_kmem);
+	err = perf_session__process_events(session, &perf_kmem.tool);
 	if (err != 0)
 		goto out_delete;
 	sort_result();
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index fd53319..b3c4285 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -724,8 +724,8 @@
 	struct event_format *event;
 	int type;
 
-	type = trace_parse_common_type(data);
-	event = trace_find_event(type);
+	type = trace_parse_common_type(session->pevent, data);
+	event = pevent_find_event(session->pevent, type);
 
 	if (!strcmp(event->name, "lock_acquire"))
 		process_lock_acquire_event(data, event, cpu, timestamp, thread);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f95840d..f5a6452 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -265,7 +265,7 @@
 
 			if (err == ENOENT) {
 				ui__error("The %s event is not supported.\n",
-					    event_name(pos));
+					  perf_evsel__name(pos));
 				exit(EXIT_FAILURE);
 			}
 
@@ -916,7 +916,7 @@
 		usage_with_options(record_usage, record_options);
 
 	list_for_each_entry(pos, &evsel_list->entries, node) {
-		if (perf_header__push_event(pos->attr.config, event_name(pos)))
+		if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
 			goto out_free_fd;
 	}
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 25249f7..69b1c11 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -69,7 +69,7 @@
 
 	if ((sort__has_parent || symbol_conf.use_callchain)
 	    && sample->callchain) {
-		err = machine__resolve_callchain(machine, evsel, al->thread,
+		err = machine__resolve_callchain(machine, al->thread,
 						 sample->callchain, &parent);
 		if (err)
 			return err;
@@ -140,7 +140,7 @@
 	struct hist_entry *he;
 
 	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
-		err = machine__resolve_callchain(machine, evsel, al->thread,
+		err = machine__resolve_callchain(machine, al->thread,
 						 sample->callchain, &parent);
 		if (err)
 			return err;
@@ -230,7 +230,7 @@
 	struct perf_report *rep = container_of(tool, struct perf_report, tool);
 
 	if (rep->show_threads) {
-		const char *name = evsel ? event_name(evsel) : "unknown";
+		const char *name = evsel ? perf_evsel__name(evsel) : "unknown";
 		perf_read_values_add_value(&rep->show_threads_values,
 					   event->read.pid, event->read.tid,
 					   event->read.id,
@@ -239,17 +239,18 @@
 	}
 
 	dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid,
-		    evsel ? event_name(evsel) : "FAIL",
+		    evsel ? perf_evsel__name(evsel) : "FAIL",
 		    event->read.value);
 
 	return 0;
 }
 
+/* For pipe mode, sample_type is not currently set */
 static int perf_report__setup_sample_type(struct perf_report *rep)
 {
 	struct perf_session *self = rep->session;
 
-	if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
+	if (!self->fd_pipe && !(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
 		if (sort__has_parent) {
 			ui__error("Selected --sort parent, but no "
 				    "callchain data. Did you call "
@@ -272,7 +273,8 @@
 	}
 
 	if (sort__branch_mode == 1) {
-		if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
+		if (!self->fd_pipe &&
+		    !(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
 			ui__error("Selected -b but no branch data. "
 				  "Did you call perf record without -b?\n");
 			return -1;
@@ -314,7 +316,7 @@
 
 	list_for_each_entry(pos, &evlist->entries, node) {
 		struct hists *hists = &pos->hists;
-		const char *evname = event_name(pos);
+		const char *evname = perf_evsel__name(pos);
 
 		hists__fprintf_nr_sample_events(hists, evname, stdout);
 		hists__fprintf(hists, NULL, false, true, 0, 0, stdout);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index b125e07..7a9ad2b 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -43,6 +43,11 @@
 
 static unsigned long		nr_tasks;
 
+struct perf_sched {
+	struct perf_tool    tool;
+	struct perf_session *session;
+};
+
 struct sched_atom;
 
 struct task_desc {
@@ -1597,11 +1602,13 @@
 						 struct perf_evsel *evsel,
 						 struct machine *machine)
 {
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+	struct pevent *pevent = sched->session->pevent;
 	struct thread *thread = machine__findnew_thread(machine, sample->pid);
 
 	if (thread == NULL) {
 		pr_debug("problem processing %s event, skipping it.\n",
-			 evsel->name);
+			 perf_evsel__name(evsel));
 		return -1;
 	}
 
@@ -1612,7 +1619,8 @@
 		tracepoint_handler f = evsel->handler.func;
 
 		if (evsel->handler.data == NULL)
-			evsel->handler.data = trace_find_event(evsel->attr.config);
+			evsel->handler.data = pevent_find_event(pevent,
+							  evsel->attr.config);
 
 		f(tool, evsel->handler.data, sample, machine, thread);
 	}
@@ -1620,12 +1628,14 @@
 	return 0;
 }
 
-static struct perf_tool perf_sched = {
-	.sample			= perf_sched__process_tracepoint_sample,
-	.comm			= perf_event__process_comm,
-	.lost			= perf_event__process_lost,
-	.fork			= perf_event__process_task,
-	.ordered_samples	= true,
+static struct perf_sched perf_sched = {
+	.tool = {
+		.sample		 = perf_sched__process_tracepoint_sample,
+		.comm		 = perf_event__process_comm,
+		.lost		 = perf_event__process_lost,
+		.fork		 = perf_event__process_task,
+		.ordered_samples = true,
+	},
 };
 
 static void read_events(bool destroy, struct perf_session **psession)
@@ -1640,16 +1650,20 @@
 		{ "sched:sched_process_exit", process_sched_exit_event, },
 		{ "sched:sched_migrate_task", process_sched_migrate_task_event, },
 	};
-	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
-							 0, false, &perf_sched);
+	struct perf_session *session;
+
+	session = perf_session__new(input_name, O_RDONLY, 0, false,
+				    &perf_sched.tool);
 	if (session == NULL)
 		die("No Memory");
 
-	err = perf_evlist__set_tracepoints_handlers_array(session->evlist, handlers);
+	perf_sched.session = session;
+
+	err = perf_session__set_tracepoints_handlers(session, handlers);
 	assert(err == 0);
 
 	if (perf_session__has_traces(session, "record -R")) {
-		err = perf_session__process_events(session, &perf_sched);
+		err = perf_session__process_events(session, &perf_sched.tool);
 		if (err)
 			die("Failed to process events, error %d", err);
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 8e395a5..1e60ab7 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -28,6 +28,11 @@
 static const char		*cpu_list;
 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 
+struct perf_script {
+	struct perf_tool    tool;
+	struct perf_session *session;
+};
+
 enum perf_output_field {
 	PERF_OUTPUT_COMM            = 1U << 0,
 	PERF_OUTPUT_TID             = 1U << 1,
@@ -137,10 +142,11 @@
 
 #define PRINT_FIELD(x)  (output[attr->type].fields & PERF_OUTPUT_##x)
 
-static int perf_event_attr__check_stype(struct perf_event_attr *attr,
-				  u64 sample_type, const char *sample_msg,
-				  enum perf_output_field field)
+static int perf_evsel__check_stype(struct perf_evsel *evsel,
+				   u64 sample_type, const char *sample_msg,
+				   enum perf_output_field field)
 {
+	struct perf_event_attr *attr = &evsel->attr;
 	int type = attr->type;
 	const char *evname;
 
@@ -148,7 +154,7 @@
 		return 0;
 
 	if (output[type].user_set) {
-		evname = __event_name(attr->type, attr->config);
+		evname = perf_evsel__name(evsel);
 		pr_err("Samples for '%s' event do not have %s attribute set. "
 		       "Cannot print '%s' field.\n",
 		       evname, sample_msg, output_field2str(field));
@@ -157,7 +163,7 @@
 
 	/* user did not ask for it explicitly so remove from the default list */
 	output[type].fields &= ~field;
-	evname = __event_name(attr->type, attr->config);
+	evname = perf_evsel__name(evsel);
 	pr_debug("Samples for '%s' event do not have %s attribute set. "
 		 "Skipping '%s' field.\n",
 		 evname, sample_msg, output_field2str(field));
@@ -175,8 +181,8 @@
 		return -EINVAL;
 
 	if (PRINT_FIELD(IP)) {
-		if (perf_event_attr__check_stype(attr, PERF_SAMPLE_IP, "IP",
-					   PERF_OUTPUT_IP))
+		if (perf_evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP",
+					    PERF_OUTPUT_IP))
 			return -EINVAL;
 
 		if (!no_callchain &&
@@ -185,8 +191,8 @@
 	}
 
 	if (PRINT_FIELD(ADDR) &&
-		perf_event_attr__check_stype(attr, PERF_SAMPLE_ADDR, "ADDR",
-				       PERF_OUTPUT_ADDR))
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
+					PERF_OUTPUT_ADDR))
 		return -EINVAL;
 
 	if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
@@ -208,18 +214,18 @@
 	}
 
 	if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
-		perf_event_attr__check_stype(attr, PERF_SAMPLE_TID, "TID",
-				       PERF_OUTPUT_TID|PERF_OUTPUT_PID))
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
+					PERF_OUTPUT_TID|PERF_OUTPUT_PID))
 		return -EINVAL;
 
 	if (PRINT_FIELD(TIME) &&
-		perf_event_attr__check_stype(attr, PERF_SAMPLE_TIME, "TIME",
-				       PERF_OUTPUT_TIME))
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME",
+					PERF_OUTPUT_TIME))
 		return -EINVAL;
 
 	if (PRINT_FIELD(CPU) &&
-		perf_event_attr__check_stype(attr, PERF_SAMPLE_CPU, "CPU",
-				       PERF_OUTPUT_CPU))
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
+					PERF_OUTPUT_CPU))
 		return -EINVAL;
 
 	return 0;
@@ -256,11 +262,13 @@
 	return 0;
 }
 
-static void print_sample_start(struct perf_sample *sample,
+static void print_sample_start(struct pevent *pevent,
+			       struct perf_sample *sample,
 			       struct thread *thread,
-			       struct perf_event_attr *attr)
+			       struct perf_evsel *evsel)
 {
 	int type;
+	struct perf_event_attr *attr = &evsel->attr;
 	struct event_format *event;
 	const char *evname = NULL;
 	unsigned long secs;
@@ -300,12 +308,18 @@
 
 	if (PRINT_FIELD(EVNAME)) {
 		if (attr->type == PERF_TYPE_TRACEPOINT) {
-			type = trace_parse_common_type(sample->raw_data);
-			event = trace_find_event(type);
+			/*
+			 * XXX Do we really need this here?
+			 * perf_evlist__set_tracepoint_names should have done
+			 * this already
+			 */
+			type = trace_parse_common_type(pevent,
+						       sample->raw_data);
+			event = pevent_find_event(pevent, type);
 			if (event)
 				evname = event->name;
 		} else
-			evname = __event_name(attr->type, attr->config);
+			evname = perf_evsel__name(evsel);
 
 		printf("%s: ", evname ? evname : "[unknown]");
 	}
@@ -387,7 +401,7 @@
 			printf(" ");
 		else
 			printf("\n");
-		perf_event__print_ip(event, sample, machine, evsel,
+		perf_event__print_ip(event, sample, machine,
 				     PRINT_FIELD(SYM), PRINT_FIELD(DSO),
 				     PRINT_FIELD(SYMOFFSET));
 	}
@@ -402,6 +416,7 @@
 }
 
 static void process_event(union perf_event *event __unused,
+			  struct pevent *pevent,
 			  struct perf_sample *sample,
 			  struct perf_evsel *evsel,
 			  struct machine *machine,
@@ -412,7 +427,7 @@
 	if (output[attr->type].fields == 0)
 		return;
 
-	print_sample_start(sample, thread, attr);
+	print_sample_start(pevent, sample, thread, evsel);
 
 	if (is_bts_event(attr)) {
 		print_sample_bts(event, sample, evsel, machine, thread);
@@ -420,7 +435,7 @@
 	}
 
 	if (PRINT_FIELD(TRACE))
-		print_trace_event(sample->cpu, sample->raw_data,
+		print_trace_event(pevent, sample->cpu, sample->raw_data,
 				  sample->raw_size);
 
 	if (PRINT_FIELD(ADDR))
@@ -431,7 +446,7 @@
 			printf(" ");
 		else
 			printf("\n");
-		perf_event__print_ip(event, sample, machine, evsel,
+		perf_event__print_ip(event, sample, machine,
 				     PRINT_FIELD(SYM), PRINT_FIELD(DSO),
 				     PRINT_FIELD(SYMOFFSET));
 	}
@@ -451,7 +466,8 @@
 	return 0;
 }
 
-static int default_generate_script(const char *outfile __unused)
+static int default_generate_script(struct pevent *pevent __unused,
+				   const char *outfile __unused)
 {
 	return 0;
 }
@@ -489,6 +505,7 @@
 				struct machine *machine)
 {
 	struct addr_location al;
+	struct perf_script *scr = container_of(tool, struct perf_script, tool);
 	struct thread *thread = machine__findnew_thread(machine, event->ip.tid);
 
 	if (thread == NULL) {
@@ -520,24 +537,27 @@
 	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
 		return 0;
 
-	scripting_ops->process_event(event, sample, evsel, machine, thread);
+	scripting_ops->process_event(event, scr->session->pevent,
+				     sample, evsel, machine, thread);
 
 	evsel->hists.stats.total_period += sample->period;
 	return 0;
 }
 
-static struct perf_tool perf_script = {
-	.sample		 = process_sample_event,
-	.mmap		 = perf_event__process_mmap,
-	.comm		 = perf_event__process_comm,
-	.exit		 = perf_event__process_task,
-	.fork		 = perf_event__process_task,
-	.attr		 = perf_event__process_attr,
-	.event_type	 = perf_event__process_event_type,
-	.tracing_data	 = perf_event__process_tracing_data,
-	.build_id	 = perf_event__process_build_id,
-	.ordered_samples = true,
-	.ordering_requires_timestamps = true,
+static struct perf_script perf_script = {
+	.tool = {
+		.sample		 = process_sample_event,
+		.mmap		 = perf_event__process_mmap,
+		.comm		 = perf_event__process_comm,
+		.exit		 = perf_event__process_task,
+		.fork		 = perf_event__process_task,
+		.attr		 = perf_event__process_attr,
+		.event_type	 = perf_event__process_event_type,
+		.tracing_data	 = perf_event__process_tracing_data,
+		.build_id	 = perf_event__process_build_id,
+		.ordered_samples = true,
+		.ordering_requires_timestamps = true,
+	},
 };
 
 extern volatile int session_done;
@@ -553,7 +573,7 @@
 
 	signal(SIGINT, sig_handler);
 
-	ret = perf_session__process_events(session, &perf_script);
+	ret = perf_session__process_events(session, &perf_script.tool);
 
 	if (debug_mode)
 		pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered);
@@ -1335,10 +1355,13 @@
 	if (!script_name)
 		setup_pager();
 
-	session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_script);
+	session = perf_session__new(input_name, O_RDONLY, 0, false,
+				    &perf_script.tool);
 	if (session == NULL)
 		return -ENOMEM;
 
+	perf_script.session = session;
+
 	if (cpu_list) {
 		if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap))
 			return -1;
@@ -1384,7 +1407,8 @@
 			return -1;
 		}
 
-		err = scripting_ops->generate_script("perf-script");
+		err = scripting_ops->generate_script(session->pevent,
+						     "perf-script");
 		goto out;
 	}
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 07b5c77..861f0ae 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -391,7 +391,7 @@
 
 	if (verbose) {
 		fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-			event_name(counter), count[0], count[1], count[2]);
+			perf_evsel__name(counter), count[0], count[1], count[2]);
 	}
 
 	/*
@@ -496,7 +496,7 @@
 			    errno == ENXIO) {
 				if (verbose)
 					ui__warning("%s event is not supported by the kernel.\n",
-						    event_name(counter));
+						    perf_evsel__name(counter));
 				counter->supported = false;
 				continue;
 			}
@@ -594,7 +594,7 @@
 			csv_output ? 0 : -4,
 			evsel_list->cpus->map[cpu], csv_sep);
 
-	fprintf(output, fmt, cpustr, msecs, csv_sep, event_name(evsel));
+	fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel));
 
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -792,7 +792,7 @@
 	else
 		cpu = 0;
 
-	fprintf(output, fmt, cpustr, avg, csv_sep, event_name(evsel));
+	fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel));
 
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -908,7 +908,7 @@
 			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
 			csv_sep,
 			csv_output ? 0 : -24,
-			event_name(counter));
+			perf_evsel__name(counter));
 
 		if (counter->cgrp)
 			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
@@ -961,7 +961,7 @@
 				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
 				csv_sep,
 				csv_output ? 0 : -24,
-				event_name(counter));
+				perf_evsel__name(counter));
 
 			if (counter->cgrp)
 				fprintf(output, "%s%s",
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 5a8727c..5ce3030 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -583,7 +583,7 @@
 		if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
 			pr_debug("expected %d %s events, got %d\n",
 				 expected_nr_events[evsel->idx],
-				 event_name(evsel), nr_events[evsel->idx]);
+				 perf_evsel__name(evsel), nr_events[evsel->idx]);
 			goto out_munmap;
 		}
 	}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 6bb0277..e3cab5f 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -245,7 +245,7 @@
 	if (notes->src == NULL)
 		goto out_unlock;
 
-	printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name);
+	printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
 	printf("  Events  Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
 
 	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
@@ -408,7 +408,7 @@
 	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top->print_entries);
 
 	if (top->evlist->nr_entries > 1)
-		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top->sym_evsel));
+		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", perf_evsel__name(top->sym_evsel));
 
 	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
 
@@ -503,13 +503,13 @@
 				fprintf(stderr, "\nAvailable events:");
 
 				list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
-					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel));
+					fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
 
 				prompt_integer(&counter, "Enter details event counter");
 
 				if (counter >= top->evlist->nr_entries) {
 					top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
-					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel));
+					fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
 					sleep(1);
 					break;
 				}
@@ -774,7 +774,7 @@
 
 		if ((sort__has_parent || symbol_conf.use_callchain) &&
 		    sample->callchain) {
-			err = machine__resolve_callchain(machine, evsel, al.thread,
+			err = machine__resolve_callchain(machine, al.thread,
 							 sample->callchain, &parent);
 			if (err)
 				return;
@@ -960,7 +960,7 @@
 
 			if (err == ENOENT) {
 				ui__error("The %s event is not supported.\n",
-					    event_name(counter));
+					  perf_evsel__name(counter));
 				goto out_err;
 			} else if (err == EMFILE) {
 				ui__error("Too many events are opened.\n"
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak
index d9084e0..6c18785 100644
--- a/tools/perf/config/feature-tests.mak
+++ b/tools/perf/config/feature-tests.mak
@@ -78,6 +78,19 @@
         return 0;
 }
 endef
+
+define SOURCE_GTK2_INFOBAR
+#pragma GCC diagnostic ignored \"-Wstrict-prototypes\"
+#include <gtk/gtk.h>
+#pragma GCC diagnostic error \"-Wstrict-prototypes\"
+
+int main(void)
+{
+	gtk_info_bar_new();
+
+	return 0;
+}
+endef
 endif
 
 ifndef NO_LIBPERL
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 34b1c46..67a2703 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -814,7 +814,7 @@
 {
 	struct disasm_line *pos, *n;
 	struct annotation *notes;
-	const size_t size = symbol__size(sym);
+	size_t size;
 	struct map_symbol ms = {
 		.map = map,
 		.sym = sym,
@@ -834,6 +834,8 @@
 	if (sym == NULL)
 		return -1;
 
+	size = symbol__size(sym);
+
 	if (map->dso->annotate_warned)
 		return -1;
 
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 53f6697..482f051 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -23,6 +23,7 @@
 	struct hists	    *hists;
 	struct hist_entry   *he_selection;
 	struct map_symbol   *selection;
+	int		     print_seq;
 	bool		     has_symbols;
 };
 
@@ -800,6 +801,196 @@
 	}
 }
 
+static int hist_browser__fprintf_callchain_node_rb_tree(struct hist_browser *browser,
+							struct callchain_node *chain_node,
+							u64 total, int level,
+							FILE *fp)
+{
+	struct rb_node *node;
+	int offset = level * LEVEL_OFFSET_STEP;
+	u64 new_total, remaining;
+	int printed = 0;
+
+	if (callchain_param.mode == CHAIN_GRAPH_REL)
+		new_total = chain_node->children_hit;
+	else
+		new_total = total;
+
+	remaining = new_total;
+	node = rb_first(&chain_node->rb_root);
+	while (node) {
+		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+		struct rb_node *next = rb_next(node);
+		u64 cumul = callchain_cumul_hits(child);
+		struct callchain_list *chain;
+		char folded_sign = ' ';
+		int first = true;
+		int extra_offset = 0;
+
+		remaining -= cumul;
+
+		list_for_each_entry(chain, &child->val, list) {
+			char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str;
+			const char *str;
+			bool was_first = first;
+
+			if (first)
+				first = false;
+			else
+				extra_offset = LEVEL_OFFSET_STEP;
+
+			folded_sign = callchain_list__folded(chain);
+
+			alloc_str = NULL;
+			str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr));
+			if (was_first) {
+				double percent = cumul * 100.0 / new_total;
+
+				if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0)
+					str = "Not enough memory!";
+				else
+					str = alloc_str;
+			}
+
+			printed += fprintf(fp, "%*s%c %s\n", offset + extra_offset, " ", folded_sign, str);
+			free(alloc_str);
+			if (folded_sign == '+')
+				break;
+		}
+
+		if (folded_sign == '-') {
+			const int new_level = level + (extra_offset ? 2 : 1);
+			printed += hist_browser__fprintf_callchain_node_rb_tree(browser, child, new_total,
+										new_level, fp);
+		}
+
+		node = next;
+	}
+
+	return printed;
+}
+
+static int hist_browser__fprintf_callchain_node(struct hist_browser *browser,
+						struct callchain_node *node,
+						int level, FILE *fp)
+{
+	struct callchain_list *chain;
+	int offset = level * LEVEL_OFFSET_STEP;
+	char folded_sign = ' ';
+	int printed = 0;
+
+	list_for_each_entry(chain, &node->val, list) {
+		char ipstr[BITS_PER_LONG / 4 + 1], *s;
+
+		folded_sign = callchain_list__folded(chain);
+		s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr));
+		printed += fprintf(fp, "%*s%c %s\n", offset, " ", folded_sign, s);
+	}
+
+	if (folded_sign == '-')
+		printed += hist_browser__fprintf_callchain_node_rb_tree(browser, node,
+									browser->hists->stats.total_period,
+									level + 1,  fp);
+	return printed;
+}
+
+static int hist_browser__fprintf_callchain(struct hist_browser *browser,
+					   struct rb_root *chain, int level, FILE *fp)
+{
+	struct rb_node *nd;
+	int printed = 0;
+
+	for (nd = rb_first(chain); nd; nd = rb_next(nd)) {
+		struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
+
+		printed += hist_browser__fprintf_callchain_node(browser, node, level, fp);
+	}
+
+	return printed;
+}
+
+static int hist_browser__fprintf_entry(struct hist_browser *browser,
+				       struct hist_entry *he, FILE *fp)
+{
+	char s[8192];
+	double percent;
+	int printed = 0;
+	char folded_sign = ' ';
+
+	if (symbol_conf.use_callchain)
+		folded_sign = hist_entry__folded(he);
+
+	hist_entry__snprintf(he, s, sizeof(s), browser->hists);
+	percent = (he->period * 100.0) / browser->hists->stats.total_period;
+
+	if (symbol_conf.use_callchain)
+		printed += fprintf(fp, "%c ", folded_sign);
+
+	printed += fprintf(fp, " %5.2f%%", percent);
+
+	if (symbol_conf.show_nr_samples)
+		printed += fprintf(fp, " %11u", he->nr_events);
+
+	if (symbol_conf.show_total_period)
+		printed += fprintf(fp, " %12" PRIu64, he->period);
+
+	printed += fprintf(fp, "%s\n", rtrim(s));
+
+	if (folded_sign == '-')
+		printed += hist_browser__fprintf_callchain(browser, &he->sorted_chain, 1, fp);
+
+	return printed;
+}
+
+static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
+{
+	struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries));
+	int printed = 0;
+
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		printed += hist_browser__fprintf_entry(browser, h, fp);
+		nd = hists__filter_entries(rb_next(nd));
+	}
+
+	return printed;
+}
+
+static int hist_browser__dump(struct hist_browser *browser)
+{
+	char filename[64];
+	FILE *fp;
+
+	while (1) {
+		scnprintf(filename, sizeof(filename), "perf.hist.%d", browser->print_seq);
+		if (access(filename, F_OK))
+			break;
+		/*
+ 		 * XXX: Just an arbitrary lazy upper limit
+ 		 */
+		if (++browser->print_seq == 8192) {
+			ui_helpline__fpush("Too many perf.hist.N files, nothing written!");
+			return -1;
+		}
+	}
+
+	fp = fopen(filename, "w");
+	if (fp == NULL) {
+		char bf[64];
+		strerror_r(errno, bf, sizeof(bf));
+		ui_helpline__fpush("Couldn't write to %s: %s", filename, bf);
+		return -1;
+	}
+
+	++browser->print_seq;
+	hist_browser__fprintf(browser, fp);
+	fclose(fp);
+	ui_helpline__fpush("%s written!", filename);
+
+	return 0;
+}
+
 static struct hist_browser *hist_browser__new(struct hists *hists)
 {
 	struct hist_browser *browser = zalloc(sizeof(*browser));
@@ -937,6 +1128,9 @@
 			    browser->selection->map->dso->annotate_warned)
 				continue;
 			goto do_annotate;
+		case 'P':
+			hist_browser__dump(browser);
+			continue;
 		case 'd':
 			goto zoom_dso;
 		case 't':
@@ -969,6 +1163,7 @@
 					"E             Expand all callchains\n"
 					"d             Zoom into current DSO\n"
 					"t             Zoom into current Thread\n"
+					"P             Print histograms to perf.hist.N\n"
 					"/             Filter symbol by name");
 			continue;
 		case K_ENTER:
@@ -1172,7 +1367,7 @@
 	struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
 	bool current_entry = ui_browser__is_current_entry(browser, row);
 	unsigned long nr_events = evsel->hists.stats.nr_events[PERF_RECORD_SAMPLE];
-	const char *ev_name = event_name(evsel);
+	const char *ev_name = perf_evsel__name(evsel);
 	char bf[256], unit;
 	const char *warn = " ";
 	size_t printed;
@@ -1240,7 +1435,7 @@
 			 */
 			if (timer)
 				timer(arg);
-			ev_name = event_name(pos);
+			ev_name = perf_evsel__name(pos);
 			key = perf_evsel__hists_browse(pos, nr_events, help,
 						       ev_name, true, timer,
 						       arg, delay_secs);
@@ -1309,17 +1504,11 @@
 	ui_helpline__push("Press ESC to exit");
 
 	list_for_each_entry(pos, &evlist->entries, node) {
-		const char *ev_name = event_name(pos);
+		const char *ev_name = perf_evsel__name(pos);
 		size_t line_len = strlen(ev_name) + 7;
 
 		if (menu.b.width < line_len)
 			menu.b.width = line_len;
-		/*
-		 * Cache the evsel name, tracepoints have a _high_ cost per
-		 * event_name() call.
-		 */
-		if (pos->name == NULL)
-			pos->name = strdup(ev_name);
 	}
 
 	return perf_evsel_menu__run(&menu, evlist->nr_entries, help, timer,
@@ -1330,11 +1519,10 @@
 				  void(*timer)(void *arg), void *arg,
 				  int delay_secs)
 {
-
 	if (evlist->nr_entries == 1) {
 		struct perf_evsel *first = list_entry(evlist->entries.next,
 						      struct perf_evsel, node);
-		const char *ev_name = event_name(first);
+		const char *ev_name = perf_evsel__name(first);
 		return perf_evsel__hists_browse(first, evlist->nr_entries, help,
 						ev_name, false, timer, arg,
 						delay_secs);
diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c
index 0656c38..ec12e0b 100644
--- a/tools/perf/ui/gtk/browser.c
+++ b/tools/perf/ui/gtk/browser.c
@@ -11,8 +11,8 @@
 
 static void perf_gtk__signal(int sig)
 {
+	perf_gtk__exit(false);
 	psignal(sig, "perf");
-	gtk_main_quit();
 }
 
 static void perf_gtk__resize_window(GtkWidget *window)
@@ -122,13 +122,59 @@
 	gtk_container_add(GTK_CONTAINER(window), view);
 }
 
+#ifdef HAVE_GTK_INFO_BAR
+static GtkWidget *perf_gtk__setup_info_bar(void)
+{
+	GtkWidget *info_bar;
+	GtkWidget *label;
+	GtkWidget *content_area;
+
+	info_bar = gtk_info_bar_new();
+	gtk_widget_set_no_show_all(info_bar, TRUE);
+
+	label = gtk_label_new("");
+	gtk_widget_show(label);
+
+	content_area = gtk_info_bar_get_content_area(GTK_INFO_BAR(info_bar));
+	gtk_container_add(GTK_CONTAINER(content_area), label);
+
+	gtk_info_bar_add_button(GTK_INFO_BAR(info_bar), GTK_STOCK_OK,
+				GTK_RESPONSE_OK);
+	g_signal_connect(info_bar, "response",
+			 G_CALLBACK(gtk_widget_hide), NULL);
+
+	pgctx->info_bar = info_bar;
+	pgctx->message_label = label;
+
+	return info_bar;
+}
+#endif
+
+static GtkWidget *perf_gtk__setup_statusbar(void)
+{
+	GtkWidget *stbar;
+	unsigned ctxid;
+
+	stbar = gtk_statusbar_new();
+
+	ctxid = gtk_statusbar_get_context_id(GTK_STATUSBAR(stbar),
+					     "perf report");
+	pgctx->statbar = stbar;
+	pgctx->statbar_ctx_id = ctxid;
+
+	return stbar;
+}
+
 int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 				  const char *help __used,
 				  void (*timer) (void *arg)__used,
 				  void *arg __used, int delay_secs __used)
 {
 	struct perf_evsel *pos;
+	GtkWidget *vbox;
 	GtkWidget *notebook;
+	GtkWidget *info_bar;
+	GtkWidget *statbar;
 	GtkWidget *window;
 
 	signal(SIGSEGV, perf_gtk__signal);
@@ -143,11 +189,17 @@
 
 	g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
 
+	pgctx = perf_gtk__activate_context(window);
+	if (!pgctx)
+		return -1;
+
+	vbox = gtk_vbox_new(FALSE, 0);
+
 	notebook = gtk_notebook_new();
 
 	list_for_each_entry(pos, &evlist->entries, node) {
 		struct hists *hists = &pos->hists;
-		const char *evname = event_name(pos);
+		const char *evname = perf_evsel__name(pos);
 		GtkWidget *scrolled_window;
 		GtkWidget *tab_label;
 
@@ -164,7 +216,16 @@
 		gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label);
 	}
 
-	gtk_container_add(GTK_CONTAINER(window), notebook);
+	gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
+
+	info_bar = perf_gtk__setup_info_bar();
+	if (info_bar)
+		gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0);
+
+	statbar = perf_gtk__setup_statusbar();
+	gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
+
+	gtk_container_add(GTK_CONTAINER(window), vbox);
 
 	gtk_widget_show_all(window);
 
@@ -174,5 +235,7 @@
 
 	gtk_main();
 
+	perf_gtk__deactivate_context(&pgctx);
+
 	return 0;
 }
diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
index 75177ee..a4d0f2b 100644
--- a/tools/perf/ui/gtk/gtk.h
+++ b/tools/perf/ui/gtk/gtk.h
@@ -1,8 +1,39 @@
 #ifndef _PERF_GTK_H_
 #define _PERF_GTK_H_ 1
 
+#include <stdbool.h>
+
 #pragma GCC diagnostic ignored "-Wstrict-prototypes"
 #include <gtk/gtk.h>
 #pragma GCC diagnostic error "-Wstrict-prototypes"
 
+
+struct perf_gtk_context {
+	GtkWidget *main_window;
+
+#ifdef HAVE_GTK_INFO_BAR
+	GtkWidget *info_bar;
+	GtkWidget *message_label;
+#endif
+	GtkWidget *statbar;
+	guint statbar_ctx_id;
+};
+
+extern struct perf_gtk_context *pgctx;
+
+static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx)
+{
+	return ctx && ctx->main_window;
+}
+
+struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window);
+int perf_gtk__deactivate_context(struct perf_gtk_context **ctx);
+
+#ifndef HAVE_GTK_INFO_BAR
+static inline GtkWidget *perf_gtk__setup_info_bar(void)
+{
+	return NULL;
+}
+#endif
+
 #endif /* _PERF_GTK_H_ */
diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c
index 8295299..92879ce 100644
--- a/tools/perf/ui/gtk/setup.c
+++ b/tools/perf/ui/gtk/setup.c
@@ -1,12 +1,17 @@
 #include "gtk.h"
 #include "../../util/cache.h"
+#include "../../util/debug.h"
+
+extern struct perf_error_ops perf_gtk_eops;
 
 int perf_gtk__init(void)
 {
+	perf_error__register(&perf_gtk_eops);
 	return gtk_init_check(NULL, NULL) ? 0 : -1;
 }
 
 void perf_gtk__exit(bool wait_for_ok __used)
 {
+	perf_error__unregister(&perf_gtk_eops);
 	gtk_main_quit();
 }
diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c
new file mode 100644
index 0000000..0ead373
--- /dev/null
+++ b/tools/perf/ui/gtk/util.c
@@ -0,0 +1,129 @@
+#include "../util.h"
+#include "../../util/debug.h"
+#include "gtk.h"
+
+#include <string.h>
+
+
+struct perf_gtk_context *pgctx;
+
+struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window)
+{
+	struct perf_gtk_context *ctx;
+
+	ctx = malloc(sizeof(*pgctx));
+	if (ctx)
+		ctx->main_window = window;
+
+	return ctx;
+}
+
+int perf_gtk__deactivate_context(struct perf_gtk_context **ctx)
+{
+	if (!perf_gtk__is_active_context(*ctx))
+		return -1;
+
+	free(*ctx);
+	*ctx = NULL;
+	return 0;
+}
+
+static int perf_gtk__error(const char *format, va_list args)
+{
+	char *msg;
+	GtkWidget *dialog;
+
+	if (!perf_gtk__is_active_context(pgctx) ||
+	    vasprintf(&msg, format, args) < 0) {
+		fprintf(stderr, "Error:\n");
+		vfprintf(stderr, format, args);
+		fprintf(stderr, "\n");
+		return -1;
+	}
+
+	dialog = gtk_message_dialog_new_with_markup(GTK_WINDOW(pgctx->main_window),
+					GTK_DIALOG_DESTROY_WITH_PARENT,
+					GTK_MESSAGE_ERROR,
+					GTK_BUTTONS_CLOSE,
+					"<b>Error</b>\n\n%s", msg);
+	gtk_dialog_run(GTK_DIALOG(dialog));
+
+	gtk_widget_destroy(dialog);
+	free(msg);
+	return 0;
+}
+
+#ifdef HAVE_GTK_INFO_BAR
+static int perf_gtk__warning_info_bar(const char *format, va_list args)
+{
+	char *msg;
+
+	if (!perf_gtk__is_active_context(pgctx) ||
+	    vasprintf(&msg, format, args) < 0) {
+		fprintf(stderr, "Warning:\n");
+		vfprintf(stderr, format, args);
+		fprintf(stderr, "\n");
+		return -1;
+	}
+
+	gtk_label_set_text(GTK_LABEL(pgctx->message_label), msg);
+	gtk_info_bar_set_message_type(GTK_INFO_BAR(pgctx->info_bar),
+				      GTK_MESSAGE_WARNING);
+	gtk_widget_show(pgctx->info_bar);
+
+	free(msg);
+	return 0;
+}
+#else
+static int perf_gtk__warning_statusbar(const char *format, va_list args)
+{
+	char *msg, *p;
+
+	if (!perf_gtk__is_active_context(pgctx) ||
+	    vasprintf(&msg, format, args) < 0) {
+		fprintf(stderr, "Warning:\n");
+		vfprintf(stderr, format, args);
+		fprintf(stderr, "\n");
+		return -1;
+	}
+
+	gtk_statusbar_pop(GTK_STATUSBAR(pgctx->statbar),
+			  pgctx->statbar_ctx_id);
+
+	/* Only first line can be displayed */
+	p = strchr(msg, '\n');
+	if (p)
+		*p = '\0';
+
+	gtk_statusbar_push(GTK_STATUSBAR(pgctx->statbar),
+			   pgctx->statbar_ctx_id, msg);
+
+	free(msg);
+	return 0;
+}
+#endif
+
+struct perf_error_ops perf_gtk_eops = {
+	.error		= perf_gtk__error,
+#ifdef HAVE_GTK_INFO_BAR
+	.warning	= perf_gtk__warning_info_bar,
+#else
+	.warning	= perf_gtk__warning_statusbar,
+#endif
+};
+
+/*
+ * FIXME: Functions below should be implemented properly.
+ *        For now, just add stubs for NO_NEWT=1 build.
+ */
+#ifdef NO_NEWT_SUPPORT
+int ui_helpline__show_help(const char *format __used, va_list ap __used)
+{
+	return 0;
+}
+
+void ui_progress__update(u64 curr __used, u64 total __used,
+			 const char *title __used)
+{
+}
+#endif
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
index d33e943..e813c1d 100644
--- a/tools/perf/ui/tui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -15,6 +15,8 @@
 
 static volatile int ui__need_resize;
 
+extern struct perf_error_ops perf_tui_eops;
+
 void ui__refresh_dimensions(bool force)
 {
 	if (force || ui__need_resize) {
@@ -122,6 +124,8 @@
 	signal(SIGINT, ui__signal);
 	signal(SIGQUIT, ui__signal);
 	signal(SIGTERM, ui__signal);
+
+	perf_error__register(&perf_tui_eops);
 out:
 	return err;
 }
@@ -137,4 +141,6 @@
 	SLsmg_refresh();
 	SLsmg_reset_smg();
 	SLang_reset_tty();
+
+	perf_error__unregister(&perf_tui_eops);
 }
diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c
new file mode 100644
index 0000000..092902e
--- /dev/null
+++ b/tools/perf/ui/tui/util.c
@@ -0,0 +1,243 @@
+#include "../../util/util.h"
+#include <signal.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/ttydefaults.h>
+
+#include "../../util/cache.h"
+#include "../../util/debug.h"
+#include "../browser.h"
+#include "../keysyms.h"
+#include "../helpline.h"
+#include "../ui.h"
+#include "../util.h"
+#include "../libslang.h"
+
+static void ui_browser__argv_write(struct ui_browser *browser,
+				   void *entry, int row)
+{
+	char **arg = entry;
+	bool current_entry = ui_browser__is_current_entry(browser, row);
+
+	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
+						       HE_COLORSET_NORMAL);
+	slsmg_write_nstring(*arg, browser->width);
+}
+
+static int popup_menu__run(struct ui_browser *menu)
+{
+	int key;
+
+	if (ui_browser__show(menu, " ", "ESC: exit, ENTER|->: Select option") < 0)
+		return -1;
+
+	while (1) {
+		key = ui_browser__run(menu, 0);
+
+		switch (key) {
+		case K_RIGHT:
+		case K_ENTER:
+			key = menu->index;
+			break;
+		case K_LEFT:
+		case K_ESC:
+		case 'q':
+		case CTRL('c'):
+			key = -1;
+			break;
+		default:
+			continue;
+		}
+
+		break;
+	}
+
+	ui_browser__hide(menu);
+	return key;
+}
+
+int ui__popup_menu(int argc, char * const argv[])
+{
+	struct ui_browser menu = {
+		.entries    = (void *)argv,
+		.refresh    = ui_browser__argv_refresh,
+		.seek	    = ui_browser__argv_seek,
+		.write	    = ui_browser__argv_write,
+		.nr_entries = argc,
+	};
+
+	return popup_menu__run(&menu);
+}
+
+int ui_browser__input_window(const char *title, const char *text, char *input,
+			     const char *exit_msg, int delay_secs)
+{
+	int x, y, len, key;
+	int max_len = 60, nr_lines = 0;
+	static char buf[50];
+	const char *t;
+
+	t = text;
+	while (1) {
+		const char *sep = strchr(t, '\n');
+
+		if (sep == NULL)
+			sep = strchr(t, '\0');
+		len = sep - t;
+		if (max_len < len)
+			max_len = len;
+		++nr_lines;
+		if (*sep == '\0')
+			break;
+		t = sep + 1;
+	}
+
+	max_len += 2;
+	nr_lines += 8;
+	y = SLtt_Screen_Rows / 2 - nr_lines / 2;
+	x = SLtt_Screen_Cols / 2 - max_len / 2;
+
+	SLsmg_set_color(0);
+	SLsmg_draw_box(y, x++, nr_lines, max_len);
+	if (title) {
+		SLsmg_gotorc(y, x + 1);
+		SLsmg_write_string((char *)title);
+	}
+	SLsmg_gotorc(++y, x);
+	nr_lines -= 7;
+	max_len -= 2;
+	SLsmg_write_wrapped_string((unsigned char *)text, y, x,
+				   nr_lines, max_len, 1);
+	y += nr_lines;
+	len = 5;
+	while (len--) {
+		SLsmg_gotorc(y + len - 1, x);
+		SLsmg_write_nstring((char *)" ", max_len);
+	}
+	SLsmg_draw_box(y++, x + 1, 3, max_len - 2);
+
+	SLsmg_gotorc(y + 3, x);
+	SLsmg_write_nstring((char *)exit_msg, max_len);
+	SLsmg_refresh();
+
+	x += 2;
+	len = 0;
+	key = ui__getch(delay_secs);
+	while (key != K_TIMER && key != K_ENTER && key != K_ESC) {
+		if (key == K_BKSPC) {
+			if (len == 0)
+				goto next_key;
+			SLsmg_gotorc(y, x + --len);
+			SLsmg_write_char(' ');
+		} else {
+			buf[len] = key;
+			SLsmg_gotorc(y, x + len++);
+			SLsmg_write_char(key);
+		}
+		SLsmg_refresh();
+
+		/* XXX more graceful overflow handling needed */
+		if (len == sizeof(buf) - 1) {
+			ui_helpline__push("maximum size of symbol name reached!");
+			key = K_ENTER;
+			break;
+		}
+next_key:
+		key = ui__getch(delay_secs);
+	}
+
+	buf[len] = '\0';
+	strncpy(input, buf, len+1);
+	return key;
+}
+
+int ui__question_window(const char *title, const char *text,
+			const char *exit_msg, int delay_secs)
+{
+	int x, y;
+	int max_len = 0, nr_lines = 0;
+	const char *t;
+
+	t = text;
+	while (1) {
+		const char *sep = strchr(t, '\n');
+		int len;
+
+		if (sep == NULL)
+			sep = strchr(t, '\0');
+		len = sep - t;
+		if (max_len < len)
+			max_len = len;
+		++nr_lines;
+		if (*sep == '\0')
+			break;
+		t = sep + 1;
+	}
+
+	max_len += 2;
+	nr_lines += 4;
+	y = SLtt_Screen_Rows / 2 - nr_lines / 2,
+	x = SLtt_Screen_Cols / 2 - max_len / 2;
+
+	SLsmg_set_color(0);
+	SLsmg_draw_box(y, x++, nr_lines, max_len);
+	if (title) {
+		SLsmg_gotorc(y, x + 1);
+		SLsmg_write_string((char *)title);
+	}
+	SLsmg_gotorc(++y, x);
+	nr_lines -= 2;
+	max_len -= 2;
+	SLsmg_write_wrapped_string((unsigned char *)text, y, x,
+				   nr_lines, max_len, 1);
+	SLsmg_gotorc(y + nr_lines - 2, x);
+	SLsmg_write_nstring((char *)" ", max_len);
+	SLsmg_gotorc(y + nr_lines - 1, x);
+	SLsmg_write_nstring((char *)exit_msg, max_len);
+	SLsmg_refresh();
+	return ui__getch(delay_secs);
+}
+
+int ui__help_window(const char *text)
+{
+	return ui__question_window("Help", text, "Press any key...", 0);
+}
+
+int ui__dialog_yesno(const char *msg)
+{
+	return ui__question_window(NULL, msg, "Enter: Yes, ESC: No", 0);
+}
+
+static int __ui__warning(const char *title, const char *format, va_list args)
+{
+	char *s;
+
+	if (vasprintf(&s, format, args) > 0) {
+		int key;
+
+		pthread_mutex_lock(&ui__lock);
+		key = ui__question_window(title, s, "Press any key...", 0);
+		pthread_mutex_unlock(&ui__lock);
+		free(s);
+		return key;
+	}
+
+	fprintf(stderr, "%s\n", title);
+	vfprintf(stderr, format, args);
+	return K_ESC;
+}
+
+static int perf_tui__error(const char *format, va_list args)
+{
+	return __ui__warning("Error:", format, args);
+}
+
+static int perf_tui__warning(const char *format, va_list args)
+{
+	return __ui__warning("Warning:", format, args);
+}
+
+struct perf_error_ops perf_tui_eops = {
+	.error		= perf_tui__error,
+	.warning	= perf_tui__warning,
+};
diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c
index ad4374a..4f98977 100644
--- a/tools/perf/ui/util.c
+++ b/tools/perf/ui/util.c
@@ -1,250 +1,85 @@
-#include "../util.h"
-#include <signal.h>
-#include <stdbool.h>
-#include <string.h>
-#include <sys/ttydefaults.h>
-
-#include "../cache.h"
-#include "../debug.h"
-#include "browser.h"
-#include "keysyms.h"
-#include "helpline.h"
-#include "ui.h"
 #include "util.h"
-#include "libslang.h"
+#include "../debug.h"
 
-static void ui_browser__argv_write(struct ui_browser *browser,
-				   void *entry, int row)
+
+/*
+ * Default error logging functions
+ */
+static int perf_stdio__error(const char *format, va_list args)
 {
-	char **arg = entry;
-	bool current_entry = ui_browser__is_current_entry(browser, row);
-
-	ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
-						       HE_COLORSET_NORMAL);
-	slsmg_write_nstring(*arg, browser->width);
-}
-
-static int popup_menu__run(struct ui_browser *menu)
-{
-	int key;
-
-	if (ui_browser__show(menu, " ", "ESC: exit, ENTER|->: Select option") < 0)
-		return -1;
-
-	while (1) {
-		key = ui_browser__run(menu, 0);
-
-		switch (key) {
-		case K_RIGHT:
-		case K_ENTER:
-			key = menu->index;
-			break;
-		case K_LEFT:
-		case K_ESC:
-		case 'q':
-		case CTRL('c'):
-			key = -1;
-			break;
-		default:
-			continue;
-		}
-
-		break;
-	}
-
-	ui_browser__hide(menu);
-	return key;
-}
-
-int ui__popup_menu(int argc, char * const argv[])
-{
-	struct ui_browser menu = {
-		.entries    = (void *)argv,
-		.refresh    = ui_browser__argv_refresh,
-		.seek	    = ui_browser__argv_seek,
-		.write	    = ui_browser__argv_write,
-		.nr_entries = argc,
-	};
-
-	return popup_menu__run(&menu);
-}
-
-int ui_browser__input_window(const char *title, const char *text, char *input,
-			     const char *exit_msg, int delay_secs)
-{
-	int x, y, len, key;
-	int max_len = 60, nr_lines = 0;
-	static char buf[50];
-	const char *t;
-
-	t = text;
-	while (1) {
-		const char *sep = strchr(t, '\n');
-
-		if (sep == NULL)
-			sep = strchr(t, '\0');
-		len = sep - t;
-		if (max_len < len)
-			max_len = len;
-		++nr_lines;
-		if (*sep == '\0')
-			break;
-		t = sep + 1;
-	}
-
-	max_len += 2;
-	nr_lines += 8;
-	y = SLtt_Screen_Rows / 2 - nr_lines / 2;
-	x = SLtt_Screen_Cols / 2 - max_len / 2;
-
-	SLsmg_set_color(0);
-	SLsmg_draw_box(y, x++, nr_lines, max_len);
-	if (title) {
-		SLsmg_gotorc(y, x + 1);
-		SLsmg_write_string((char *)title);
-	}
-	SLsmg_gotorc(++y, x);
-	nr_lines -= 7;
-	max_len -= 2;
-	SLsmg_write_wrapped_string((unsigned char *)text, y, x,
-				   nr_lines, max_len, 1);
-	y += nr_lines;
-	len = 5;
-	while (len--) {
-		SLsmg_gotorc(y + len - 1, x);
-		SLsmg_write_nstring((char *)" ", max_len);
-	}
-	SLsmg_draw_box(y++, x + 1, 3, max_len - 2);
-
-	SLsmg_gotorc(y + 3, x);
-	SLsmg_write_nstring((char *)exit_msg, max_len);
-	SLsmg_refresh();
-
-	x += 2;
-	len = 0;
-	key = ui__getch(delay_secs);
-	while (key != K_TIMER && key != K_ENTER && key != K_ESC) {
-		if (key == K_BKSPC) {
-			if (len == 0)
-				goto next_key;
-			SLsmg_gotorc(y, x + --len);
-			SLsmg_write_char(' ');
-		} else {
-			buf[len] = key;
-			SLsmg_gotorc(y, x + len++);
-			SLsmg_write_char(key);
-		}
-		SLsmg_refresh();
-
-		/* XXX more graceful overflow handling needed */
-		if (len == sizeof(buf) - 1) {
-			ui_helpline__push("maximum size of symbol name reached!");
-			key = K_ENTER;
-			break;
-		}
-next_key:
-		key = ui__getch(delay_secs);
-	}
-
-	buf[len] = '\0';
-	strncpy(input, buf, len+1);
-	return key;
-}
-
-int ui__question_window(const char *title, const char *text,
-			const char *exit_msg, int delay_secs)
-{
-	int x, y;
-	int max_len = 0, nr_lines = 0;
-	const char *t;
-
-	t = text;
-	while (1) {
-		const char *sep = strchr(t, '\n');
-		int len;
-
-		if (sep == NULL)
-			sep = strchr(t, '\0');
-		len = sep - t;
-		if (max_len < len)
-			max_len = len;
-		++nr_lines;
-		if (*sep == '\0')
-			break;
-		t = sep + 1;
-	}
-
-	max_len += 2;
-	nr_lines += 4;
-	y = SLtt_Screen_Rows / 2 - nr_lines / 2,
-	x = SLtt_Screen_Cols / 2 - max_len / 2;
-
-	SLsmg_set_color(0);
-	SLsmg_draw_box(y, x++, nr_lines, max_len);
-	if (title) {
-		SLsmg_gotorc(y, x + 1);
-		SLsmg_write_string((char *)title);
-	}
-	SLsmg_gotorc(++y, x);
-	nr_lines -= 2;
-	max_len -= 2;
-	SLsmg_write_wrapped_string((unsigned char *)text, y, x,
-				   nr_lines, max_len, 1);
-	SLsmg_gotorc(y + nr_lines - 2, x);
-	SLsmg_write_nstring((char *)" ", max_len);
-	SLsmg_gotorc(y + nr_lines - 1, x);
-	SLsmg_write_nstring((char *)exit_msg, max_len);
-	SLsmg_refresh();
-	return ui__getch(delay_secs);
-}
-
-int ui__help_window(const char *text)
-{
-	return ui__question_window("Help", text, "Press any key...", 0);
-}
-
-int ui__dialog_yesno(const char *msg)
-{
-	return ui__question_window(NULL, msg, "Enter: Yes, ESC: No", 0);
-}
-
-int __ui__warning(const char *title, const char *format, va_list args)
-{
-	char *s;
-
-	if (use_browser > 0 && vasprintf(&s, format, args) > 0) {
-		int key;
-
-		pthread_mutex_lock(&ui__lock);
-		key = ui__question_window(title, s, "Press any key...", 0);
-		pthread_mutex_unlock(&ui__lock);
-		free(s);
-		return key;
-	}
-
-	fprintf(stderr, "%s:\n", title);
+	fprintf(stderr, "Error:\n");
 	vfprintf(stderr, format, args);
-	return K_ESC;
+	return 0;
+}
+
+static int perf_stdio__warning(const char *format, va_list args)
+{
+	fprintf(stderr, "Warning:\n");
+	vfprintf(stderr, format, args);
+	return 0;
+}
+
+static struct perf_error_ops default_eops =
+{
+	.error		= perf_stdio__error,
+	.warning	= perf_stdio__warning,
+};
+
+static struct perf_error_ops *perf_eops = &default_eops;
+
+
+int ui__error(const char *format, ...)
+{
+	int ret;
+	va_list args;
+
+	va_start(args, format);
+	ret = perf_eops->error(format, args);
+	va_end(args);
+
+	return ret;
 }
 
 int ui__warning(const char *format, ...)
 {
-	int key;
+	int ret;
 	va_list args;
 
 	va_start(args, format);
-	key = __ui__warning("Warning", format, args);
+	ret = perf_eops->warning(format, args);
 	va_end(args);
-	return key;
+
+	return ret;
 }
 
-int ui__error(const char *format, ...)
-{
-	int key;
-	va_list args;
 
-	va_start(args, format);
-	key = __ui__warning("Error", format, args);
-	va_end(args);
-	return key;
+/**
+ * perf_error__register - Register error logging functions
+ * @eops: The pointer to error logging function struct
+ *
+ * Register UI-specific error logging functions. Before calling this,
+ * other logging functions should be unregistered, if any.
+ */
+int perf_error__register(struct perf_error_ops *eops)
+{
+	if (perf_eops != &default_eops)
+		return -1;
+
+	perf_eops = eops;
+	return 0;
+}
+
+/**
+ * perf_error__unregister - Unregister error logging functions
+ * @eops: The pointer to error logging function struct
+ *
+ * Unregister already registered error logging functions.
+ */
+int perf_error__unregister(struct perf_error_ops *eops)
+{
+	if (perf_eops != eops)
+		return -1;
+
+	perf_eops = &default_eops;
+	return 0;
 }
diff --git a/tools/perf/ui/util.h b/tools/perf/ui/util.h
index 2d1738b..361f08c 100644
--- a/tools/perf/ui/util.h
+++ b/tools/perf/ui/util.h
@@ -9,6 +9,13 @@
 int ui__dialog_yesno(const char *msg);
 int ui__question_window(const char *title, const char *text,
 			const char *exit_msg, int delay_secs);
-int __ui__warning(const char *title, const char *format, va_list args);
+
+struct perf_error_ops {
+	int (*error)(const char *format, va_list args);
+	int (*warning)(const char *format, va_list args);
+};
+
+int perf_error__register(struct perf_error_ops *eops);
+int perf_error__unregister(struct perf_error_ops *eops);
 
 #endif /* _PERF_UI_UTIL_H_ */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index efb1fce..4dfe0bb 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -47,7 +47,7 @@
 	return ret;
 }
 
-#ifdef NO_NEWT_SUPPORT
+#if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT)
 int ui__warning(const char *format, ...)
 {
 	va_list args;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 6bebe7f..015c91d 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -12,8 +12,9 @@
 void trace_event(union perf_event *event);
 
 struct ui_progress;
+struct perf_error_ops;
 
-#ifdef NO_NEWT_SUPPORT
+#if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT)
 static inline int ui_helpline__show_help(const char *format __used, va_list ap __used)
 {
 	return 0;
@@ -23,12 +24,28 @@
 				       const char *title __used) {}
 
 #define ui__error(format, arg...) ui__warning(format, ##arg)
-#else
+
+static inline int
+perf_error__register(struct perf_error_ops *eops __used)
+{
+	return 0;
+}
+
+static inline int
+perf_error__unregister(struct perf_error_ops *eops __used)
+{
+	return 0;
+}
+
+#else /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */
+
 extern char ui_helpline__last_msg[];
 int ui_helpline__show_help(const char *format, va_list ap);
 #include "../ui/progress.h"
 int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2)));
-#endif
+#include "../ui/util.h"
+
+#endif /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */
 
 int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
 int ui__error_paranoid(void);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 7400fb3..f74e956 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -224,8 +224,8 @@
 	return err;
 }
 
-static struct perf_evsel *
-	perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
 {
 	struct perf_evsel *evsel;
 
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 989bee9..40d4d3c 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -73,6 +73,9 @@
 #define perf_evlist__set_tracepoints_handlers_array(evlist, array) \
 	perf_evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array))
 
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id);
+
 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
 			 int cpu, int thread, u64 id);
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 9f6cebd..3d1f696 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -15,7 +15,6 @@
 #include "cpumap.h"
 #include "thread_map.h"
 #include "target.h"
-#include "../../include/linux/perf_event.h"
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
@@ -78,7 +77,7 @@
 	"ref-cycles",
 };
 
-const char *__perf_evsel__hw_name(u64 config)
+static const char *__perf_evsel__hw_name(u64 config)
 {
 	if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
 		return perf_evsel__hw_names[config];
@@ -86,16 +85,15 @@
 	return "unknown-hardware";
 }
 
-static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
+static int perf_evsel__add_modifiers(struct perf_evsel *evsel, char *bf, size_t size)
 {
-	int colon = 0;
+	int colon = 0, r = 0;
 	struct perf_event_attr *attr = &evsel->attr;
-	int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(attr->config));
 	bool exclude_guest_default = false;
 
 #define MOD_PRINT(context, mod)	do {					\
 		if (!attr->exclude_##context) {				\
-			if (!colon) colon = r++;			\
+			if (!colon) colon = ++r;			\
 			r += scnprintf(bf + r, size - r, "%c", mod);	\
 		} } while(0)
 
@@ -108,7 +106,7 @@
 
 	if (attr->precise_ip) {
 		if (!colon)
-			colon = r++;
+			colon = ++r;
 		r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
 		exclude_guest_default = true;
 	}
@@ -119,39 +117,182 @@
 	}
 #undef MOD_PRINT
 	if (colon)
-		bf[colon] = ':';
+		bf[colon - 1] = ':';
 	return r;
 }
 
-int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size)
+static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
 {
-	int ret;
+	int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->attr.config));
+	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+static const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
+	"cpu-clock",
+	"task-clock",
+	"page-faults",
+	"context-switches",
+	"CPU-migrations",
+	"minor-faults",
+	"major-faults",
+	"alignment-faults",
+	"emulation-faults",
+};
+
+static const char *__perf_evsel__sw_name(u64 config)
+{
+	if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
+		return perf_evsel__sw_names[config];
+	return "unknown-software";
+}
+
+static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->attr.config));
+	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_EVSEL__MAX_ALIASES] = {
+ { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},
+ { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	},
+ { "LLC",	"L2",							},
+ { "dTLB",	"d-tlb",	"Data-TLB",				},
+ { "iTLB",	"i-tlb",	"Instruction-TLB",			},
+ { "branch",	"branches",	"bpu",		"btb",		"bpc",	},
+ { "node",								},
+};
+
+const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
+				   [PERF_EVSEL__MAX_ALIASES] = {
+ { "load",	"loads",	"read",					},
+ { "store",	"stores",	"write",				},
+ { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	},
+};
+
+const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
+				       [PERF_EVSEL__MAX_ALIASES] = {
+ { "refs",	"Reference",	"ops",		"access",		},
+ { "misses",	"miss",							},
+};
+
+#define C(x)		PERF_COUNT_HW_CACHE_##x
+#define CACHE_READ	(1 << C(OP_READ))
+#define CACHE_WRITE	(1 << C(OP_WRITE))
+#define CACHE_PREFETCH	(1 << C(OP_PREFETCH))
+#define COP(x)		(1 << x)
+
+/*
+ * cache operartion stat
+ * L1I : Read and prefetch only
+ * ITLB and BPU : Read-only
+ */
+static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
+ [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH),
+ [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(ITLB)]	= (CACHE_READ),
+ [C(BPU)]	= (CACHE_READ),
+ [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+};
+
+bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
+{
+	if (perf_evsel__hw_cache_stat[type] & COP(op))
+		return true;	/* valid */
+	else
+		return false;	/* invalid */
+}
+
+int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
+					    char *bf, size_t size)
+{
+	if (result) {
+		return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
+				 perf_evsel__hw_cache_op[op][0],
+				 perf_evsel__hw_cache_result[result][0]);
+	}
+
+	return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
+			 perf_evsel__hw_cache_op[op][1]);
+}
+
+static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
+{
+	u8 op, result, type = (config >>  0) & 0xff;
+	const char *err = "unknown-ext-hardware-cache-type";
+
+	if (type > PERF_COUNT_HW_CACHE_MAX)
+		goto out_err;
+
+	op = (config >>  8) & 0xff;
+	err = "unknown-ext-hardware-cache-op";
+	if (op > PERF_COUNT_HW_CACHE_OP_MAX)
+		goto out_err;
+
+	result = (config >> 16) & 0xff;
+	err = "unknown-ext-hardware-cache-result";
+	if (result > PERF_COUNT_HW_CACHE_RESULT_MAX)
+		goto out_err;
+
+	err = "invalid-cache";
+	if (!perf_evsel__is_cache_op_valid(type, op))
+		goto out_err;
+
+	return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
+out_err:
+	return scnprintf(bf, size, "%s", err);
+}
+
+static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	int ret = __perf_evsel__hw_cache_name(evsel->attr.config, bf, size);
+	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
+}
+
+static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+	int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config);
+	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
+}
+
+const char *perf_evsel__name(struct perf_evsel *evsel)
+{
+	char bf[128];
+
+	if (evsel->name)
+		return evsel->name;
 
 	switch (evsel->attr.type) {
 	case PERF_TYPE_RAW:
-		ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config);
+		perf_evsel__raw_name(evsel, bf, sizeof(bf));
 		break;
 
 	case PERF_TYPE_HARDWARE:
-		ret = perf_evsel__hw_name(evsel, bf, size);
+		perf_evsel__hw_name(evsel, bf, sizeof(bf));
 		break;
+
+	case PERF_TYPE_HW_CACHE:
+		perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
+		break;
+
+	case PERF_TYPE_SOFTWARE:
+		perf_evsel__sw_name(evsel, bf, sizeof(bf));
+		break;
+
+	case PERF_TYPE_TRACEPOINT:
+		scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
+		break;
+
 	default:
-		/*
-		 * FIXME
- 		 *
-		 * This is the minimal perf_evsel__name so that we can
-		 * reconstruct event names taking into account event modifiers.
-		 *
-		 * The old event_name uses it now for raw anr hw events, so that
-		 * we don't drag all the parsing stuff into the python binding.
-		 *
-		 * On the next devel cycle the rest of the event naming will be
-		 * brought here.
- 		 */
-		return 0;
+		scnprintf(bf, sizeof(bf), "%s", "unknown attr type");
+		break;
 	}
 
-	return ret;
+	evsel->name = strdup(bf);
+
+	return evsel->name ?: "unknown";
 }
 
 void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts,
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4ba8b56..67cc503 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -83,8 +83,19 @@
 			struct perf_record_opts *opts,
 			struct perf_evsel *first);
 
-const char* __perf_evsel__hw_name(u64 config);
-int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size);
+bool perf_evsel__is_cache_op_valid(u8 type, u8 op);
+
+#define PERF_EVSEL__MAX_ALIASES 8
+
+extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
+				       [PERF_EVSEL__MAX_ALIASES];
+extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
+					  [PERF_EVSEL__MAX_ALIASES];
+const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
+				       [PERF_EVSEL__MAX_ALIASES];
+int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
+					    char *bf, size_t size);
+const char *perf_evsel__name(struct perf_evsel *evsel);
 
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index e909d43..5a47aba 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -641,7 +641,7 @@
 		/*
 		 * write event string as passed on cmdline
 		 */
-		ret = do_write_string(fd, event_name(attr));
+		ret = do_write_string(fd, perf_evsel__name(attr));
 		if (ret < 0)
 			return ret;
 		/*
@@ -1474,15 +1474,15 @@
 
 static int process_tracing_data(struct perf_file_section *section __unused,
 			      struct perf_header *ph __unused,
-			      int feat __unused, int fd)
+			      int feat __unused, int fd, void *data)
 {
-	trace_report(fd, false);
+	trace_report(fd, data, false);
 	return 0;
 }
 
 static int process_build_id(struct perf_file_section *section,
 			    struct perf_header *ph,
-			    int feat __unused, int fd)
+			    int feat __unused, int fd, void *data __used)
 {
 	if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
 		pr_debug("Failed to read buildids, continuing...\n");
@@ -1493,7 +1493,7 @@
 	int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
 	void (*print)(struct perf_header *h, int fd, FILE *fp);
 	int (*process)(struct perf_file_section *section,
-		       struct perf_header *h, int feat, int fd);
+		       struct perf_header *h, int feat, int fd, void *data);
 	const char *name;
 	bool full_only;
 };
@@ -1988,7 +1988,7 @@
 
 static int perf_file_section__process(struct perf_file_section *section,
 				      struct perf_header *ph,
-				      int feat, int fd, void *data __used)
+				      int feat, int fd, void *data)
 {
 	if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
 		pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
@@ -2004,7 +2004,7 @@
 	if (!feat_ops[feat].process)
 		return 0;
 
-	return feat_ops[feat].process(section, ph, feat, fd);
+	return feat_ops[feat].process(section, ph, feat, fd, data);
 }
 
 static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
@@ -2093,9 +2093,11 @@
 	return ret <= 0 ? -1 : 0;
 }
 
-static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel)
+static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel,
+					   struct pevent *pevent)
 {
-	struct event_format *event = trace_find_event(evsel->attr.config);
+	struct event_format *event = pevent_find_event(pevent,
+						       evsel->attr.config);
 	char bf[128];
 
 	if (event == NULL)
@@ -2109,13 +2111,14 @@
 	return 0;
 }
 
-static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist)
+static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist,
+					     struct pevent *pevent)
 {
 	struct perf_evsel *pos;
 
 	list_for_each_entry(pos, &evlist->entries, node) {
 		if (pos->attr.type == PERF_TYPE_TRACEPOINT &&
-		    perf_evsel__set_tracepoint_name(pos))
+		    perf_evsel__set_tracepoint_name(pos, pevent))
 			return -1;
 	}
 
@@ -2198,12 +2201,12 @@
 		event_count =  f_header.event_types.size / sizeof(struct perf_trace_event_type);
 	}
 
-	perf_header__process_sections(header, fd, NULL,
+	perf_header__process_sections(header, fd, &session->pevent,
 				      perf_file_section__process);
 
 	lseek(fd, header->data_offset, SEEK_SET);
 
-	if (perf_evlist__set_tracepoint_names(session->evlist))
+	if (perf_evlist__set_tracepoint_names(session->evlist, session->pevent))
 		goto out_delete_evlist;
 
 	header->frozen = 1;
@@ -2419,8 +2422,8 @@
 	lseek(session->fd, offset + sizeof(struct tracing_data_event),
 	      SEEK_SET);
 
-	size_read = trace_report(session->fd, session->repipe);
-
+	size_read = trace_report(session->fd, &session->pevent,
+				 session->repipe);
 	padding = ALIGN(size_read, sizeof(u64)) - size_read;
 
 	if (read(session->fd, buf, padding) < 0)
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 34bb556..0b096c2 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -47,6 +47,7 @@
 	HISTC_SYMBOL_TO,
 	HISTC_DSO_FROM,
 	HISTC_DSO_TO,
+	HISTC_SRCLINE,
 	HISTC_NR_COLS, /* Last entry */
 };
 
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 81371ba..c14c665 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -157,7 +157,7 @@
 void machine__delete(struct machine *self);
 
 int machine__resolve_callchain(struct machine *machine,
-			       struct perf_evsel *evsel, struct thread *thread,
+			       struct thread *thread,
 			       struct ip_callchain *chain,
 			       struct symbol **parent);
 int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name,
diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c
index 76b98e2..a0f61a2 100644
--- a/tools/perf/util/parse-events-test.c
+++ b/tools/perf/util/parse-events-test.c
@@ -413,19 +413,63 @@
 {
 	struct perf_evsel *evsel;
 
-	/* cpu/config=1,name=krava1/u */
+	/* cpu/config=1,name=krava/u */
 	evsel = list_entry(evlist->entries.next, struct perf_evsel, node);
 	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
 	TEST_ASSERT_VAL("wrong config",  1 == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong name", !strcmp(evsel->name, "krava"));
+	TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava"));
 
-	/* cpu/config=2/" */
+	/* cpu/config=2/u" */
 	evsel = list_entry(evsel->node.next, struct perf_evsel, node);
 	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
 	TEST_ASSERT_VAL("wrong config",  2 == evsel->attr.config);
-	TEST_ASSERT_VAL("wrong name", !strcmp(evsel->name, "raw 0x2"));
+	TEST_ASSERT_VAL("wrong name",
+			!strcmp(perf_evsel__name(evsel), "raw 0x2:u"));
+
+	return 0;
+}
+
+static int test__checkterms_simple(struct list_head *terms)
+{
+	struct parse_events__term *term;
+
+	/* config=10 */
+	term = list_entry(terms->next, struct parse_events__term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 10);
+	TEST_ASSERT_VAL("wrong config", !term->config);
+
+	/* config1 */
+	term = list_entry(term->list.next, struct parse_events__term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 1);
+	TEST_ASSERT_VAL("wrong config", !term->config);
+
+	/* config2=3 */
+	term = list_entry(term->list.next, struct parse_events__term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 3);
+	TEST_ASSERT_VAL("wrong config", !term->config);
+
+	/* umask=1*/
+	term = list_entry(term->list.next, struct parse_events__term, list);
+	TEST_ASSERT_VAL("wrong type term",
+			term->type_term == PARSE_EVENTS__TERM_TYPE_USER);
+	TEST_ASSERT_VAL("wrong type val",
+			term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+	TEST_ASSERT_VAL("wrong val", term->val.num == 1);
+	TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask"));
 
 	return 0;
 }
@@ -559,7 +603,23 @@
 #define TEST__EVENTS_PMU_CNT (sizeof(test__events_pmu) / \
 			      sizeof(struct test__event_st))
 
-static int test(struct test__event_st *e)
+struct test__term {
+	const char *str;
+	__u32 type;
+	int (*check)(struct list_head *terms);
+};
+
+static struct test__term test__terms[] = {
+	[0] = {
+		.str   = "config=10,config1,config2=3,umask=1",
+		.check = test__checkterms_simple,
+	},
+};
+
+#define TEST__TERMS_CNT (sizeof(test__terms) / \
+			 sizeof(struct test__term))
+
+static int test_event(struct test__event_st *e)
 {
 	struct perf_evlist *evlist;
 	int ret;
@@ -590,7 +650,48 @@
 		struct test__event_st *e = &events[i];
 
 		pr_debug("running test %d '%s'\n", i, e->name);
-		ret = test(e);
+		ret = test_event(e);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int test_term(struct test__term *t)
+{
+	struct list_head *terms;
+	int ret;
+
+	terms = malloc(sizeof(*terms));
+	if (!terms)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(terms);
+
+	ret = parse_events_terms(terms, t->str);
+	if (ret) {
+		pr_debug("failed to parse terms '%s', err %d\n",
+			 t->str , ret);
+		return ret;
+	}
+
+	ret = t->check(terms);
+	parse_events__free_terms(terms);
+
+	return ret;
+}
+
+static int test_terms(struct test__term *terms, unsigned cnt)
+{
+	int ret = 0;
+	unsigned i;
+
+	for (i = 0; i < cnt; i++) {
+		struct test__term *t = &terms[i];
+
+		pr_debug("running test %d '%s'\n", i, t->str);
+		ret = test_term(t);
 		if (ret)
 			break;
 	}
@@ -617,9 +718,21 @@
 {
 	int ret;
 
-	ret = test_events(test__events, TEST__EVENTS_CNT);
-	if (!ret && test_pmu())
-		ret = test_events(test__events_pmu, TEST__EVENTS_PMU_CNT);
+	do {
+		ret = test_events(test__events, TEST__EVENTS_CNT);
+		if (ret)
+			break;
+
+		if (test_pmu()) {
+			ret = test_events(test__events_pmu,
+					  TEST__EVENTS_PMU_CNT);
+			if (ret)
+				break;
+		}
+
+		ret = test_terms(test__terms, TEST__TERMS_CNT);
+
+	} while (0);
 
 	return ret;
 }
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 05dbc8b..0cc27da 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -11,6 +11,8 @@
 #include "cache.h"
 #include "header.h"
 #include "debugfs.h"
+#include "parse-events-bison.h"
+#define YY_EXTRA_TYPE int
 #include "parse-events-flex.h"
 #include "pmu.h"
 
@@ -26,7 +28,7 @@
 #ifdef PARSER_DEBUG
 extern int parse_events_debug;
 #endif
-int parse_events_parse(struct list_head *list, int *idx);
+int parse_events_parse(void *data, void *scanner);
 
 #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
 #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
@@ -62,63 +64,6 @@
 #define PERF_EVENT_TYPE(config)		__PERF_EVENT_FIELD(config, TYPE)
 #define PERF_EVENT_ID(config)		__PERF_EVENT_FIELD(config, EVENT)
 
-static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
-	"cpu-clock",
-	"task-clock",
-	"page-faults",
-	"context-switches",
-	"CPU-migrations",
-	"minor-faults",
-	"major-faults",
-	"alignment-faults",
-	"emulation-faults",
-};
-
-#define MAX_ALIASES 8
-
-static const char *hw_cache[PERF_COUNT_HW_CACHE_MAX][MAX_ALIASES] = {
- { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},
- { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	},
- { "LLC",	"L2",							},
- { "dTLB",	"d-tlb",	"Data-TLB",				},
- { "iTLB",	"i-tlb",	"Instruction-TLB",			},
- { "branch",	"branches",	"bpu",		"btb",		"bpc",	},
- { "node",								},
-};
-
-static const char *hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][MAX_ALIASES] = {
- { "load",	"loads",	"read",					},
- { "store",	"stores",	"write",				},
- { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	},
-};
-
-static const char *hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
-				  [MAX_ALIASES] = {
- { "refs",	"Reference",	"ops",		"access",		},
- { "misses",	"miss",							},
-};
-
-#define C(x)		PERF_COUNT_HW_CACHE_##x
-#define CACHE_READ	(1 << C(OP_READ))
-#define CACHE_WRITE	(1 << C(OP_WRITE))
-#define CACHE_PREFETCH	(1 << C(OP_PREFETCH))
-#define COP(x)		(1 << x)
-
-/*
- * cache operartion stat
- * L1I : Read and prefetch only
- * ITLB and BPU : Read-only
- */
-static unsigned long hw_cache_stat[C(MAX)] = {
- [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
- [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH),
- [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
- [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
- [C(ITLB)]	= (CACHE_READ),
- [C(BPU)]	= (CACHE_READ),
- [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
-};
-
 #define for_each_subsystem(sys_dir, sys_dirent, sys_next)	       \
 	while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next)	       \
 	if (sys_dirent.d_type == DT_DIR &&				       \
@@ -218,48 +163,6 @@
 	return NULL;
 }
 
-#define TP_PATH_LEN (MAX_EVENT_LENGTH * 2 + 1)
-static const char *tracepoint_id_to_name(u64 config)
-{
-	static char buf[TP_PATH_LEN];
-	struct tracepoint_path *path;
-
-	path = tracepoint_id_to_path(config);
-	if (path) {
-		snprintf(buf, TP_PATH_LEN, "%s:%s", path->system, path->name);
-		free(path->name);
-		free(path->system);
-		free(path);
-	} else
-		snprintf(buf, TP_PATH_LEN, "%s:%s", "unknown", "unknown");
-
-	return buf;
-}
-
-static int is_cache_op_valid(u8 cache_type, u8 cache_op)
-{
-	if (hw_cache_stat[cache_type] & COP(cache_op))
-		return 1;	/* valid */
-	else
-		return 0;	/* invalid */
-}
-
-static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result)
-{
-	static char name[50];
-
-	if (cache_result) {
-		sprintf(name, "%s-%s-%s", hw_cache[cache_type][0],
-			hw_cache_op[cache_op][0],
-			hw_cache_result[cache_result][0]);
-	} else {
-		sprintf(name, "%s-%s", hw_cache[cache_type][0],
-			hw_cache_op[cache_op][1]);
-	}
-
-	return name;
-}
-
 const char *event_type(int type)
 {
 	switch (type) {
@@ -282,76 +185,6 @@
 	return "unknown";
 }
 
-const char *event_name(struct perf_evsel *evsel)
-{
-	u64 config = evsel->attr.config;
-	int type = evsel->attr.type;
-
-	if (type == PERF_TYPE_RAW || type == PERF_TYPE_HARDWARE) {
-		/*
- 		 * XXX minimal fix, see comment on perf_evsen__name, this static buffer
- 		 * will go away together with event_name in the next devel cycle.
- 		 */
-		static char bf[128];
-		perf_evsel__name(evsel, bf, sizeof(bf));
-		return bf;
-	}
-
-	if (evsel->name)
-		return evsel->name;
-
-	return __event_name(type, config);
-}
-
-const char *__event_name(int type, u64 config)
-{
-	static char buf[32];
-
-	if (type == PERF_TYPE_RAW) {
-		sprintf(buf, "raw 0x%" PRIx64, config);
-		return buf;
-	}
-
-	switch (type) {
-	case PERF_TYPE_HARDWARE:
-		return __perf_evsel__hw_name(config);
-
-	case PERF_TYPE_HW_CACHE: {
-		u8 cache_type, cache_op, cache_result;
-
-		cache_type   = (config >>  0) & 0xff;
-		if (cache_type > PERF_COUNT_HW_CACHE_MAX)
-			return "unknown-ext-hardware-cache-type";
-
-		cache_op     = (config >>  8) & 0xff;
-		if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX)
-			return "unknown-ext-hardware-cache-op";
-
-		cache_result = (config >> 16) & 0xff;
-		if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX)
-			return "unknown-ext-hardware-cache-result";
-
-		if (!is_cache_op_valid(cache_type, cache_op))
-			return "invalid-cache";
-
-		return event_cache_name(cache_type, cache_op, cache_result);
-	}
-
-	case PERF_TYPE_SOFTWARE:
-		if (config < PERF_COUNT_SW_MAX && sw_event_names[config])
-			return sw_event_names[config];
-		return "unknown-software";
-
-	case PERF_TYPE_TRACEPOINT:
-		return tracepoint_id_to_name(config);
-
-	default:
-		break;
-	}
-
-	return "unknown";
-}
-
 static int add_event(struct list_head **_list, int *idx,
 		     struct perf_event_attr *attr, char *name)
 {
@@ -373,19 +206,20 @@
 		return -ENOMEM;
 	}
 
-	evsel->name = strdup(name);
+	if (name)
+		evsel->name = strdup(name);
 	list_add_tail(&evsel->node, list);
 	*_list = list;
 	return 0;
 }
 
-static int parse_aliases(char *str, const char *names[][MAX_ALIASES], int size)
+static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
 {
 	int i, j;
 	int n, longest = -1;
 
 	for (i = 0; i < size; i++) {
-		for (j = 0; j < MAX_ALIASES && names[i][j]; j++) {
+		for (j = 0; j < PERF_EVSEL__MAX_ALIASES && names[i][j]; j++) {
 			n = strlen(names[i][j]);
 			if (n > longest && !strncasecmp(str, names[i][j], n))
 				longest = n;
@@ -410,7 +244,7 @@
 	 * No fallback - if we cannot get a clear cache type
 	 * then bail out:
 	 */
-	cache_type = parse_aliases(type, hw_cache,
+	cache_type = parse_aliases(type, perf_evsel__hw_cache,
 				   PERF_COUNT_HW_CACHE_MAX);
 	if (cache_type == -1)
 		return -EINVAL;
@@ -423,18 +257,18 @@
 		snprintf(name + n, MAX_NAME_LEN - n, "-%s\n", str);
 
 		if (cache_op == -1) {
-			cache_op = parse_aliases(str, hw_cache_op,
+			cache_op = parse_aliases(str, perf_evsel__hw_cache_op,
 						 PERF_COUNT_HW_CACHE_OP_MAX);
 			if (cache_op >= 0) {
-				if (!is_cache_op_valid(cache_type, cache_op))
+				if (!perf_evsel__is_cache_op_valid(cache_type, cache_op))
 					return -EINVAL;
 				continue;
 			}
 		}
 
 		if (cache_result == -1) {
-			cache_result = parse_aliases(str, hw_cache_result,
-						PERF_COUNT_HW_CACHE_RESULT_MAX);
+			cache_result = parse_aliases(str, perf_evsel__hw_cache_result,
+						     PERF_COUNT_HW_CACHE_RESULT_MAX);
 			if (cache_result >= 0)
 				continue;
 		}
@@ -666,8 +500,7 @@
 	    config_attr(&attr, head_config, 1))
 		return -EINVAL;
 
-	return add_event(list, idx, &attr,
-			 (char *) __event_name(type, config));
+	return add_event(list, idx, &attr, NULL);
 }
 
 static int parse_events__is_name_term(struct parse_events__term *term)
@@ -675,8 +508,7 @@
 	return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
 }
 
-static char *pmu_event_name(struct perf_event_attr *attr,
-			    struct list_head *head_terms)
+static char *pmu_event_name(struct list_head *head_terms)
 {
 	struct parse_events__term *term;
 
@@ -684,7 +516,7 @@
 		if (parse_events__is_name_term(term))
 			return term->val.str;
 
-	return (char *) __event_name(PERF_TYPE_RAW, attr->config);
+	return NULL;
 }
 
 int parse_events_add_pmu(struct list_head **list, int *idx,
@@ -699,6 +531,9 @@
 
 	memset(&attr, 0, sizeof(attr));
 
+	if (perf_pmu__check_alias(pmu, head_config))
+		return -EINVAL;
+
 	/*
 	 * Configure hardcoded terms first, no need to check
 	 * return value when called with fail == 0 ;)
@@ -709,7 +544,7 @@
 		return -EINVAL;
 
 	return add_event(list, idx, &attr,
-			 pmu_event_name(&attr, head_config));
+			 pmu_event_name(head_config));
 }
 
 void parse_events_update_lists(struct list_head *list_event,
@@ -787,27 +622,62 @@
 	return 0;
 }
 
-int parse_events(struct perf_evlist *evlist, const char *str, int unset __used)
+static int parse_events__scanner(const char *str, void *data, int start_token)
 {
-	LIST_HEAD(list);
-	LIST_HEAD(list_tmp);
 	YY_BUFFER_STATE buffer;
-	int ret, idx = evlist->nr_entries;
+	void *scanner;
+	int ret;
 
-	buffer = parse_events__scan_string(str);
+	ret = parse_events_lex_init_extra(start_token, &scanner);
+	if (ret)
+		return ret;
+
+	buffer = parse_events__scan_string(str, scanner);
 
 #ifdef PARSER_DEBUG
 	parse_events_debug = 1;
 #endif
-	ret = parse_events_parse(&list, &idx);
+	ret = parse_events_parse(data, scanner);
 
-	parse_events__flush_buffer(buffer);
-	parse_events__delete_buffer(buffer);
-	parse_events_lex_destroy();
+	parse_events__flush_buffer(buffer, scanner);
+	parse_events__delete_buffer(buffer, scanner);
+	parse_events_lex_destroy(scanner);
+	return ret;
+}
 
+/*
+ * parse event config string, return a list of event terms.
+ */
+int parse_events_terms(struct list_head *terms, const char *str)
+{
+	struct parse_events_data__terms data = {
+		.terms = NULL,
+	};
+	int ret;
+
+	ret = parse_events__scanner(str, &data, PE_START_TERMS);
 	if (!ret) {
-		int entries = idx - evlist->nr_entries;
-		perf_evlist__splice_list_tail(evlist, &list, entries);
+		list_splice(data.terms, terms);
+		free(data.terms);
+		return 0;
+	}
+
+	parse_events__free_terms(data.terms);
+	return ret;
+}
+
+int parse_events(struct perf_evlist *evlist, const char *str, int unset __used)
+{
+	struct parse_events_data__events data = {
+		.list = LIST_HEAD_INIT(data.list),
+		.idx  = evlist->nr_entries,
+	};
+	int ret;
+
+	ret = parse_events__scanner(str, &data, PE_START_EVENTS);
+	if (!ret) {
+		int entries = data.idx - evlist->nr_entries;
+		perf_evlist__splice_list_tail(evlist, &data.list, entries);
 		return 0;
 	}
 
@@ -970,16 +840,17 @@
 int print_hwcache_events(const char *event_glob)
 {
 	unsigned int type, op, i, printed = 0;
+	char name[64];
 
 	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
 		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
 			/* skip invalid cache type */
-			if (!is_cache_op_valid(type, op))
+			if (!perf_evsel__is_cache_op_valid(type, op))
 				continue;
 
 			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
-				char *name = event_cache_name(type, op, i);
-
+				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
+									name, sizeof(name));
 				if (event_glob != NULL && !strglobmatch(name, event_glob))
 					continue;
 
@@ -1106,6 +977,13 @@
 			config, str, 0);
 }
 
+int parse_events__term_clone(struct parse_events__term **new,
+			     struct parse_events__term *term)
+{
+	return new_term(new, term->type_val, term->type_term, term->config,
+			term->val.str, term->val.num);
+}
+
 void parse_events__free_terms(struct list_head *terms)
 {
 	struct parse_events__term *term, *h;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 8cac57a..ee9c218 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -26,13 +26,12 @@
 extern bool have_tracepoints(struct list_head *evlist);
 
 const char *event_type(int type);
-const char *event_name(struct perf_evsel *event);
-extern const char *__event_name(int type, u64 config);
 
 extern int parse_events_option(const struct option *opt, const char *str,
 			       int unset);
 extern int parse_events(struct perf_evlist *evlist, const char *str,
 			int unset);
+extern int parse_events_terms(struct list_head *terms, const char *str);
 extern int parse_filter(const struct option *opt, const char *str, int unset);
 
 #define EVENTS_HELP_MAX (128*1024)
@@ -63,11 +62,22 @@
 	struct list_head list;
 };
 
+struct parse_events_data__events {
+	struct list_head list;
+	int idx;
+};
+
+struct parse_events_data__terms {
+	struct list_head *terms;
+};
+
 int parse_events__is_hardcoded_term(struct parse_events__term *term);
 int parse_events__term_num(struct parse_events__term **_term,
 			   int type_term, char *config, long num);
 int parse_events__term_str(struct parse_events__term **_term,
 			   int type_term, char *config, char *str);
+int parse_events__term_clone(struct parse_events__term **new,
+			     struct parse_events__term *term);
 void parse_events__free_terms(struct list_head *terms);
 int parse_events_modifier(struct list_head *list, char *str);
 int parse_events_add_tracepoint(struct list_head **list, int *idx,
@@ -83,8 +93,7 @@
 			 char *pmu , struct list_head *head_config);
 void parse_events_update_lists(struct list_head *list_event,
 			       struct list_head *list_all);
-void parse_events_error(struct list_head *list_all,
-			int *idx, char const *msg);
+void parse_events_error(void *data, void *scanner, char const *msg);
 int parse_events__test(void);
 
 void print_events(const char *event_glob);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 618a8e7..488362e 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -1,4 +1,6 @@
 
+%option reentrant
+%option bison-bridge
 %option prefix="parse_events_"
 %option stack
 
@@ -8,7 +10,10 @@
 #include "parse-events-bison.h"
 #include "parse-events.h"
 
-static int __value(char *str, int base, int token)
+char *parse_events_get_text(yyscan_t yyscanner);
+YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
+
+static int __value(YYSTYPE *yylval, char *str, int base, int token)
 {
 	long num;
 
@@ -17,35 +22,48 @@
 	if (errno)
 		return PE_ERROR;
 
-	parse_events_lval.num = num;
+	yylval->num = num;
 	return token;
 }
 
-static int value(int base)
+static int value(yyscan_t scanner, int base)
 {
-	return __value(parse_events_text, base, PE_VALUE);
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	return __value(yylval, text, base, PE_VALUE);
 }
 
-static int raw(void)
+static int raw(yyscan_t scanner)
 {
-	return __value(parse_events_text + 1, 16, PE_RAW);
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	return __value(yylval, text + 1, 16, PE_RAW);
 }
 
-static int str(int token)
+static int str(yyscan_t scanner, int token)
 {
-	parse_events_lval.str = strdup(parse_events_text);
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	yylval->str = strdup(text);
 	return token;
 }
 
-static int sym(int type, int config)
+static int sym(yyscan_t scanner, int type, int config)
 {
-	parse_events_lval.num = (type << 16) + config;
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+	yylval->num = (type << 16) + config;
 	return PE_VALUE_SYM;
 }
 
-static int term(int type)
+static int term(yyscan_t scanner, int type)
 {
-	parse_events_lval.num = type;
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+	yylval->num = type;
 	return PE_TERM;
 }
 
@@ -61,25 +79,38 @@
 modifier_bp	[rwx]
 
 %%
-cpu-cycles|cycles				{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); }
-stalled-cycles-frontend|idle-cycles-frontend	{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
-stalled-cycles-backend|idle-cycles-backend	{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
-instructions					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); }
-cache-references				{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); }
-cache-misses					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); }
-branch-instructions|branches			{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
-branch-misses					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); }
-bus-cycles					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); }
-ref-cycles					{ return sym(PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); }
-cpu-clock					{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); }
-task-clock					{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); }
-page-faults|faults				{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); }
-minor-faults					{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); }
-major-faults					{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); }
-context-switches|cs				{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); }
-cpu-migrations|migrations			{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); }
-alignment-faults				{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
-emulation-faults				{ return sym(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
+
+%{
+	{
+		int start_token;
+
+		start_token = (int) parse_events_get_extra(yyscanner);
+		if (start_token) {
+			parse_events_set_extra(NULL, yyscanner);
+			return start_token;
+		}
+         }
+%}
+
+cpu-cycles|cycles				{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); }
+stalled-cycles-frontend|idle-cycles-frontend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
+stalled-cycles-backend|idle-cycles-backend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+instructions					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); }
+cache-references				{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); }
+cache-misses					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); }
+branch-instructions|branches			{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
+branch-misses					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); }
+bus-cycles					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); }
+ref-cycles					{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); }
+cpu-clock					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); }
+task-clock					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); }
+page-faults|faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); }
+minor-faults					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); }
+major-faults					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); }
+context-switches|cs				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); }
+cpu-migrations|migrations			{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); }
+alignment-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
+emulation-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
 
 L1-dcache|l1-d|l1d|L1-data		|
 L1-icache|l1-i|l1i|L1-instruction	|
@@ -87,14 +118,14 @@
 dTLB|d-tlb|Data-TLB			|
 iTLB|i-tlb|Instruction-TLB		|
 branch|branches|bpu|btb|bpc		|
-node					{ return str(PE_NAME_CACHE_TYPE); }
+node					{ return str(yyscanner, PE_NAME_CACHE_TYPE); }
 
 load|loads|read				|
 store|stores|write			|
 prefetch|prefetches			|
 speculative-read|speculative-load	|
 refs|Reference|ops|access		|
-misses|miss				{ return str(PE_NAME_CACHE_OP_RESULT); }
+misses|miss				{ return str(yyscanner, PE_NAME_CACHE_OP_RESULT); }
 
 	/*
 	 * These are event config hardcoded term names to be specified
@@ -102,20 +133,20 @@
 	 * so we can put them here directly. In case the we have a conflict
 	 * in future, this needs to go into '//' condition block.
 	 */
-config			{ return term(PARSE_EVENTS__TERM_TYPE_CONFIG); }
-config1			{ return term(PARSE_EVENTS__TERM_TYPE_CONFIG1); }
-config2			{ return term(PARSE_EVENTS__TERM_TYPE_CONFIG2); }
-name			{ return term(PARSE_EVENTS__TERM_TYPE_NAME); }
-period			{ return term(PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
-branch_type		{ return term(PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
+config			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
+config1			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
+config2			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
+name			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
+period			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
+branch_type		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
 
 mem:			{ BEGIN(mem); return PE_PREFIX_MEM; }
-r{num_raw_hex}		{ return raw(); }
-{num_dec}		{ return value(10); }
-{num_hex}		{ return value(16); }
+r{num_raw_hex}		{ return raw(yyscanner); }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
 
-{modifier_event}	{ return str(PE_MODIFIER_EVENT); }
-{name}			{ return str(PE_NAME); }
+{modifier_event}	{ return str(yyscanner, PE_MODIFIER_EVENT); }
+{name}			{ return str(yyscanner, PE_NAME); }
 "/"			{ return '/'; }
 -			{ return '-'; }
 ,			{ return ','; }
@@ -123,17 +154,17 @@
 =			{ return '='; }
 
 <mem>{
-{modifier_bp}		{ return str(PE_MODIFIER_BP); }
+{modifier_bp}		{ return str(yyscanner, PE_MODIFIER_BP); }
 :			{ return ':'; }
-{num_dec}		{ return value(10); }
-{num_hex}		{ return value(16); }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
 	/*
 	 * We need to separate 'mem:' scanner part, in order to get specific
 	 * modifier bits parsed out. Otherwise we would need to handle PE_NAME
 	 * and we'd need to parse it manually. During the escape from <mem>
 	 * state we need to put the escaping char back, so we dont miss it.
 	 */
-.			{ unput(*parse_events_text); BEGIN(INITIAL); }
+.			{ unput(*yytext); BEGIN(INITIAL); }
 	/*
 	 * We destroy the scanner after reaching EOF,
 	 * but anyway just to be sure get back to INIT state.
@@ -143,7 +174,7 @@
 
 %%
 
-int parse_events_wrap(void)
+int parse_events_wrap(void *scanner __used)
 {
 	return 1;
 }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 362cc59..9525c45 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -1,7 +1,8 @@
-
+%pure-parser
 %name-prefix "parse_events_"
-%parse-param {struct list_head *list_all}
-%parse-param {int *idx}
+%parse-param {void *_data}
+%parse-param {void *scanner}
+%lex-param {void* scanner}
 
 %{
 
@@ -12,8 +13,9 @@
 #include "types.h"
 #include "util.h"
 #include "parse-events.h"
+#include "parse-events-bison.h"
 
-extern int parse_events_lex (void);
+extern int parse_events_lex (YYSTYPE* lvalp, void* scanner);
 
 #define ABORT_ON(val) \
 do { \
@@ -23,6 +25,7 @@
 
 %}
 
+%token PE_START_EVENTS PE_START_TERMS
 %token PE_VALUE PE_VALUE_SYM PE_RAW PE_TERM
 %token PE_NAME
 %token PE_MODIFIER_EVENT PE_MODIFIER_BP
@@ -58,24 +61,33 @@
 }
 %%
 
+start:
+PE_START_EVENTS events
+|
+PE_START_TERMS  terms
+
 events:
 events ',' event | event
 
 event:
 event_def PE_MODIFIER_EVENT
 {
+	struct parse_events_data__events *data = _data;
+
 	/*
 	 * Apply modifier on all events added by single event definition
 	 * (there could be more events added for multiple tracepoint
 	 * definitions via '*?'.
 	 */
 	ABORT_ON(parse_events_modifier($1, $2));
-	parse_events_update_lists($1, list_all);
+	parse_events_update_lists($1, &data->list);
 }
 |
 event_def
 {
-	parse_events_update_lists($1, list_all);
+	struct parse_events_data__events *data = _data;
+
+	parse_events_update_lists($1, &data->list);
 }
 
 event_def: event_pmu |
@@ -89,9 +101,10 @@
 event_pmu:
 PE_NAME '/' event_config '/'
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_pmu(&list, idx, $1, $3));
+	ABORT_ON(parse_events_add_pmu(&list, &data->idx, $1, $3));
 	parse_events__free_terms($3);
 	$$ = list;
 }
@@ -99,94 +112,115 @@
 event_legacy_symbol:
 PE_VALUE_SYM '/' event_config '/'
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 	int type = $1 >> 16;
 	int config = $1 & 255;
 
-	ABORT_ON(parse_events_add_numeric(&list, idx, type, config, $3));
+	ABORT_ON(parse_events_add_numeric(&list, &data->idx,
+					  type, config, $3));
 	parse_events__free_terms($3);
 	$$ = list;
 }
 |
 PE_VALUE_SYM sep_slash_dc
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 	int type = $1 >> 16;
 	int config = $1 & 255;
 
-	ABORT_ON(parse_events_add_numeric(&list, idx, type, config, NULL));
+	ABORT_ON(parse_events_add_numeric(&list, &data->idx,
+					  type, config, NULL));
 	$$ = list;
 }
 
 event_legacy_cache:
 PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_cache(&list, idx, $1, $3, $5));
+	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, $5));
 	$$ = list;
 }
 |
 PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_cache(&list, idx, $1, $3, NULL));
+	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, NULL));
 	$$ = list;
 }
 |
 PE_NAME_CACHE_TYPE
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_cache(&list, idx, $1, NULL, NULL));
+	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, NULL, NULL));
 	$$ = list;
 }
 
 event_legacy_mem:
 PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_breakpoint(&list, idx, (void *) $2, $4));
+	ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
+					     (void *) $2, $4));
 	$$ = list;
 }
 |
 PE_PREFIX_MEM PE_VALUE sep_dc
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_breakpoint(&list, idx, (void *) $2, NULL));
+	ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
+					     (void *) $2, NULL));
 	$$ = list;
 }
 
 event_legacy_tracepoint:
 PE_NAME ':' PE_NAME
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_tracepoint(&list, idx, $1, $3));
+	ABORT_ON(parse_events_add_tracepoint(&list, &data->idx, $1, $3));
 	$$ = list;
 }
 
 event_legacy_numeric:
 PE_VALUE ':' PE_VALUE
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_numeric(&list, idx, $1, $3, NULL));
+	ABORT_ON(parse_events_add_numeric(&list, &data->idx, $1, $3, NULL));
 	$$ = list;
 }
 
 event_legacy_raw:
 PE_RAW
 {
+	struct parse_events_data__events *data = _data;
 	struct list_head *list = NULL;
 
-	ABORT_ON(parse_events_add_numeric(&list, idx, PERF_TYPE_RAW, $1, NULL));
+	ABORT_ON(parse_events_add_numeric(&list, &data->idx,
+					  PERF_TYPE_RAW, $1, NULL));
 	$$ = list;
 }
 
+terms: event_config
+{
+	struct parse_events_data__terms *data = _data;
+	data->terms = $1;
+}
+
 event_config:
 event_config ',' event_term
 {
@@ -267,8 +301,7 @@
 
 %%
 
-void parse_events_error(struct list_head *list_all __used,
-			int *idx __used,
+void parse_events_error(void *data __used, void *scanner __used,
 			char const *msg __used)
 {
 }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index a119a53..74d0948e 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -80,6 +80,114 @@
 	return 0;
 }
 
+static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file)
+{
+	struct perf_pmu__alias *alias;
+	char buf[256];
+	int ret;
+
+	ret = fread(buf, 1, sizeof(buf), file);
+	if (ret == 0)
+		return -EINVAL;
+	buf[ret] = 0;
+
+	alias = malloc(sizeof(*alias));
+	if (!alias)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&alias->terms);
+	ret = parse_events_terms(&alias->terms, buf);
+	if (ret) {
+		free(alias);
+		return ret;
+	}
+
+	alias->name = strdup(name);
+	list_add_tail(&alias->list, list);
+	return 0;
+}
+
+/*
+ * Process all the sysfs attributes located under the directory
+ * specified in 'dir' parameter.
+ */
+static int pmu_aliases_parse(char *dir, struct list_head *head)
+{
+	struct dirent *evt_ent;
+	DIR *event_dir;
+	int ret = 0;
+
+	event_dir = opendir(dir);
+	if (!event_dir)
+		return -EINVAL;
+
+	while (!ret && (evt_ent = readdir(event_dir))) {
+		char path[PATH_MAX];
+		char *name = evt_ent->d_name;
+		FILE *file;
+
+		if (!strcmp(name, ".") || !strcmp(name, ".."))
+			continue;
+
+		snprintf(path, PATH_MAX, "%s/%s", dir, name);
+
+		ret = -EINVAL;
+		file = fopen(path, "r");
+		if (!file)
+			break;
+		ret = perf_pmu__new_alias(head, name, file);
+		fclose(file);
+	}
+
+	closedir(event_dir);
+	return ret;
+}
+
+/*
+ * Reading the pmu event aliases definition, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes.
+ */
+static int pmu_aliases(char *name, struct list_head *head)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	const char *sysfs;
+
+	sysfs = sysfs_find_mountpoint();
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, PATH_MAX,
+		 "%s/bus/event_source/devices/%s/events", sysfs, name);
+
+	if (stat(path, &st) < 0)
+		return -1;
+
+	if (pmu_aliases_parse(path, head))
+		return -1;
+
+	return 0;
+}
+
+static int pmu_alias_terms(struct perf_pmu__alias *alias,
+			   struct list_head *terms)
+{
+	struct parse_events__term *term, *clone;
+	LIST_HEAD(list);
+	int ret;
+
+	list_for_each_entry(term, &alias->terms, list) {
+		ret = parse_events__term_clone(&clone, term);
+		if (ret) {
+			parse_events__free_terms(&list);
+			return ret;
+		}
+		list_add_tail(&clone->list, &list);
+	}
+	list_splice(&list, terms);
+	return 0;
+}
+
 /*
  * Reading/parsing the default pmu type value, which should be
  * located at:
@@ -118,6 +226,7 @@
 {
 	struct perf_pmu *pmu;
 	LIST_HEAD(format);
+	LIST_HEAD(aliases);
 	__u32 type;
 
 	/*
@@ -135,8 +244,12 @@
 	if (!pmu)
 		return NULL;
 
+	pmu_aliases(name, &aliases);
+
 	INIT_LIST_HEAD(&pmu->format);
+	INIT_LIST_HEAD(&pmu->aliases);
 	list_splice(&format, &pmu->format);
+	list_splice(&aliases, &pmu->aliases);
 	pmu->name = strdup(name);
 	pmu->type = type;
 	return pmu;
@@ -279,6 +392,59 @@
 	return pmu_config(&pmu->format, attr, head_terms);
 }
 
+static struct perf_pmu__alias *pmu_find_alias(struct perf_pmu *pmu,
+					      struct parse_events__term *term)
+{
+	struct perf_pmu__alias *alias;
+	char *name;
+
+	if (parse_events__is_hardcoded_term(term))
+		return NULL;
+
+	if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+		if (term->val.num != 1)
+			return NULL;
+		if (pmu_find_format(&pmu->format, term->config))
+			return NULL;
+		name = term->config;
+	} else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+		if (strcasecmp(term->config, "event"))
+			return NULL;
+		name = term->val.str;
+	} else {
+		return NULL;
+	}
+
+	list_for_each_entry(alias, &pmu->aliases, list) {
+		if (!strcasecmp(alias->name, name))
+			return alias;
+	}
+	return NULL;
+}
+
+/*
+ * Find alias in the terms list and replace it with the terms
+ * defined for the alias
+ */
+int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms)
+{
+	struct parse_events__term *term, *h;
+	struct perf_pmu__alias *alias;
+	int ret;
+
+	list_for_each_entry_safe(term, h, head_terms, list) {
+		alias = pmu_find_alias(pmu, term);
+		if (!alias)
+			continue;
+		ret = pmu_alias_terms(alias, &term->list);
+		if (ret)
+			return ret;
+		list_del(&term->list);
+		free(term);
+	}
+	return 0;
+}
+
 int perf_pmu__new_format(struct list_head *list, char *name,
 			 int config, unsigned long *bits)
 {
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 68c0db9..535f2c5 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -19,17 +19,26 @@
 	struct list_head list;
 };
 
+struct perf_pmu__alias {
+	char *name;
+	struct list_head terms;
+	struct list_head list;
+};
+
 struct perf_pmu {
 	char *name;
 	__u32 type;
 	struct list_head format;
+	struct list_head aliases;
 	struct list_head list;
 };
 
 struct perf_pmu *perf_pmu__find(char *name);
 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
 		     struct list_head *head_terms);
-
+int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms);
+struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
+				struct list_head *head_terms);
 int perf_pmu_wrap(void);
 void perf_pmu_error(struct list_head *list, char *name, char const *msg);
 
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 4c1b3d7..b3620fe 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -233,7 +233,8 @@
 		define_event_symbols(event, ev_name, args->next);
 }
 
-static inline struct event_format *find_cache_event(int type)
+static inline
+struct event_format *find_cache_event(struct pevent *pevent, int type)
 {
 	static char ev_name[256];
 	struct event_format *event;
@@ -241,7 +242,7 @@
 	if (events[type])
 		return events[type];
 
-	events[type] = event = trace_find_event(type);
+	events[type] = event = pevent_find_event(pevent, type);
 	if (!event)
 		return NULL;
 
@@ -252,7 +253,8 @@
 	return event;
 }
 
-static void perl_process_tracepoint(union perf_event *pevent __unused,
+static void perl_process_tracepoint(union perf_event *perf_event __unused,
+				    struct pevent *pevent,
 				    struct perf_sample *sample,
 				    struct perf_evsel *evsel,
 				    struct machine *machine __unused,
@@ -275,13 +277,13 @@
 	if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
 		return;
 
-	type = trace_parse_common_type(data);
+	type = trace_parse_common_type(pevent, data);
 
-	event = find_cache_event(type);
+	event = find_cache_event(pevent, type);
 	if (!event)
 		die("ug! no event found for type %d", type);
 
-	pid = trace_parse_common_pid(data);
+	pid = trace_parse_common_pid(pevent, data);
 
 	sprintf(handler, "%s::%s", event->system, event->name);
 
@@ -314,7 +316,8 @@
 				offset = field->offset;
 			XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
 		} else { /* FIELD_IS_NUMERIC */
-			val = read_size(data + field->offset, field->size);
+			val = read_size(pevent, data + field->offset,
+					field->size);
 			if (field->flags & FIELD_IS_SIGNED) {
 				XPUSHs(sv_2mortal(newSViv(val)));
 			} else {
@@ -368,14 +371,15 @@
 	LEAVE;
 }
 
-static void perl_process_event(union perf_event *pevent,
+static void perl_process_event(union perf_event *event,
+			       struct pevent *pevent,
 			       struct perf_sample *sample,
 			       struct perf_evsel *evsel,
 			       struct machine *machine,
 			       struct thread *thread)
 {
-	perl_process_tracepoint(pevent, sample, evsel, machine, thread);
-	perl_process_event_generic(pevent, sample, evsel, machine, thread);
+	perl_process_tracepoint(event, pevent, sample, evsel, machine, thread);
+	perl_process_event_generic(event, sample, evsel, machine, thread);
 }
 
 static void run_start_sub(void)
@@ -448,7 +452,7 @@
 	return 0;
 }
 
-static int perl_generate_script(const char *outfile)
+static int perl_generate_script(struct pevent *pevent, const char *outfile)
 {
 	struct event_format *event = NULL;
 	struct format_field *f;
@@ -495,7 +499,7 @@
 	fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");
 	fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n");
 
-	while ((event = trace_find_next_event(event))) {
+	while ((event = trace_find_next_event(pevent, event))) {
 		fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
 		fprintf(ofp, "\tmy (");
 
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index acb9795..a8ca2f8 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -190,7 +190,8 @@
 		define_event_symbols(event, ev_name, args->next);
 }
 
-static inline struct event_format *find_cache_event(int type)
+static inline
+struct event_format *find_cache_event(struct pevent *pevent, int type)
 {
 	static char ev_name[256];
 	struct event_format *event;
@@ -198,7 +199,7 @@
 	if (events[type])
 		return events[type];
 
-	events[type] = event = trace_find_event(type);
+	events[type] = event = pevent_find_event(pevent, type);
 	if (!event)
 		return NULL;
 
@@ -209,7 +210,8 @@
 	return event;
 }
 
-static void python_process_event(union perf_event *pevent __unused,
+static void python_process_event(union perf_event *perf_event __unused,
+				 struct pevent *pevent,
 				 struct perf_sample *sample,
 				 struct perf_evsel *evsel __unused,
 				 struct machine *machine __unused,
@@ -233,13 +235,13 @@
 	if (!t)
 		Py_FatalError("couldn't create Python tuple");
 
-	type = trace_parse_common_type(data);
+	type = trace_parse_common_type(pevent, data);
 
-	event = find_cache_event(type);
+	event = find_cache_event(pevent, type);
 	if (!event)
 		die("ug! no event found for type %d", type);
 
-	pid = trace_parse_common_pid(data);
+	pid = trace_parse_common_pid(pevent, data);
 
 	sprintf(handler_name, "%s__%s", event->system, event->name);
 
@@ -284,7 +286,8 @@
 				offset = field->offset;
 			obj = PyString_FromString((char *)data + offset);
 		} else { /* FIELD_IS_NUMERIC */
-			val = read_size(data + field->offset, field->size);
+			val = read_size(pevent, data + field->offset,
+					field->size);
 			if (field->flags & FIELD_IS_SIGNED) {
 				if ((long long)val >= LONG_MIN &&
 				    (long long)val <= LONG_MAX)
@@ -438,7 +441,7 @@
 	return err;
 }
 
-static int python_generate_script(const char *outfile)
+static int python_generate_script(struct pevent *pevent, const char *outfile)
 {
 	struct event_format *event = NULL;
 	struct format_field *f;
@@ -487,7 +490,7 @@
 	fprintf(ofp, "def trace_end():\n");
 	fprintf(ofp, "\tprint \"in trace_end\"\n\n");
 
-	while ((event = trace_find_next_event(event))) {
+	while ((event = trace_find_next_event(pevent, event))) {
 		fprintf(ofp, "def %s__%s(", event->system, event->name);
 		fprintf(ofp, "event_name, ");
 		fprintf(ofp, "context, ");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c3e399b..f5baff1 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -14,6 +14,7 @@
 #include "sort.h"
 #include "util.h"
 #include "cpumap.h"
+#include "event-parse.h"
 
 static int perf_session__open(struct perf_session *self, bool force)
 {
@@ -289,7 +290,6 @@
 }
 
 int machine__resolve_callchain(struct machine *self,
-			       struct perf_evsel *evsel __used,
 			       struct thread *thread,
 			       struct ip_callchain *chain,
 			       struct symbol **parent)
@@ -1449,7 +1449,7 @@
 	ret += hists__fprintf_nr_events(&session->hists, fp);
 
 	list_for_each_entry(pos, &session->evlist->entries, node) {
-		ret += fprintf(fp, "%s stats:\n", event_name(pos));
+		ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
 		ret += hists__fprintf_nr_events(&pos->hists, fp);
 	}
 
@@ -1490,8 +1490,8 @@
 }
 
 void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
-			  struct machine *machine, struct perf_evsel *evsel,
-			  int print_sym, int print_dso, int print_symoffset)
+			  struct machine *machine, int print_sym,
+			  int print_dso, int print_symoffset)
 {
 	struct addr_location al;
 	struct callchain_cursor_node *node;
@@ -1505,7 +1505,7 @@
 
 	if (symbol_conf.use_callchain && sample->callchain) {
 
-		if (machine__resolve_callchain(machine, evsel, al.thread,
+		if (machine__resolve_callchain(machine, al.thread,
 						sample->callchain, NULL) != 0) {
 			if (verbose)
 				error("Failed to resolve callchain. Skipping\n");
@@ -1611,3 +1611,58 @@
 	perf_header__fprintf_info(session, fp, full);
 	fprintf(fp, "# ========\n#\n");
 }
+
+
+int __perf_session__set_tracepoints_handlers(struct perf_session *session,
+					     const struct perf_evsel_str_handler *assocs,
+					     size_t nr_assocs)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct event_format *format;
+	struct perf_evsel *evsel;
+	char *tracepoint, *name;
+	size_t i;
+	int err;
+
+	for (i = 0; i < nr_assocs; i++) {
+		err = -ENOMEM;
+		tracepoint = strdup(assocs[i].name);
+		if (tracepoint == NULL)
+			goto out;
+
+		err = -ENOENT;
+		name = strchr(tracepoint, ':');
+		if (name == NULL)
+			goto out_free;
+
+		*name++ = '\0';
+		format = pevent_find_event_by_name(session->pevent,
+						   tracepoint, name);
+		if (format == NULL) {
+			/*
+			 * Adding a handler for an event not in the session,
+			 * just ignore it.
+			 */
+			goto next;
+		}
+
+		evsel = perf_evlist__find_tracepoint_by_id(evlist, format->id);
+		if (evsel == NULL)
+			goto next;
+
+		err = -EEXIST;
+		if (evsel->handler.func != NULL)
+			goto out_free;
+		evsel->handler.func = assocs[i].handler;
+next:
+		free(tracepoint);
+	}
+
+	err = 0;
+out:
+	return err;
+
+out_free:
+	free(tracepoint);
+	goto out;
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 0c702e3..7c435bd 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -33,6 +33,7 @@
 	struct machine		host_machine;
 	struct rb_root		machines;
 	struct perf_evlist	*evlist;
+	struct pevent		*pevent;
 	/*
 	 * FIXME: Need to split this up further, we need global
 	 *	  stats + per event stats. 'perf diff' also needs
@@ -151,11 +152,20 @@
 					    unsigned int type);
 
 void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
-			  struct machine *machine, struct perf_evsel *evsel,
-			  int print_sym, int print_dso, int print_symoffset);
+			  struct machine *machine, int print_sym,
+			  int print_dso, int print_symoffset);
 
 int perf_session__cpu_bitmap(struct perf_session *session,
 			     const char *cpu_list, unsigned long *cpu_bitmap);
 
 void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full);
+
+struct perf_evsel_str_handler;
+
+int __perf_session__set_tracepoints_handlers(struct perf_session *session,
+					     const struct perf_evsel_str_handler *assocs,
+					     size_t nr_assocs);
+
+#define perf_session__set_tracepoints_handlers(session, array) \
+	__perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array))
 #endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index a272374..0f5a0a4 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -241,6 +241,54 @@
 	.se_width_idx	= HISTC_SYMBOL,
 };
 
+/* --sort srcline */
+
+static int64_t
+sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return (int64_t)(right->ip - left->ip);
+}
+
+static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
+				   size_t size, unsigned int width __used)
+{
+	FILE *fp;
+	char cmd[PATH_MAX + 2], *path = self->srcline, *nl;
+	size_t line_len;
+
+	if (path != NULL)
+		goto out_path;
+
+	snprintf(cmd, sizeof(cmd), "addr2line -e %s %016" PRIx64,
+		 self->ms.map->dso->long_name, self->ip);
+	fp = popen(cmd, "r");
+	if (!fp)
+		goto out_ip;
+
+	if (getline(&path, &line_len, fp) < 0 || !line_len)
+		goto out_ip;
+	fclose(fp);
+	self->srcline = strdup(path);
+	if (self->srcline == NULL)
+		goto out_ip;
+
+	nl = strchr(self->srcline, '\n');
+	if (nl != NULL)
+		*nl = '\0';
+	path = self->srcline;
+out_path:
+	return repsep_snprintf(bf, size, "%s", path);
+out_ip:
+	return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip);
+}
+
+struct sort_entry sort_srcline = {
+	.se_header	= "Source:Line",
+	.se_cmp		= sort__srcline_cmp,
+	.se_snprintf	= hist_entry__srcline_snprintf,
+	.se_width_idx	= HISTC_SRCLINE,
+};
+
 /* --sort parent */
 
 static int64_t
@@ -439,6 +487,7 @@
 	DIM(SORT_PARENT, "parent", sort_parent),
 	DIM(SORT_CPU, "cpu", sort_cpu),
 	DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
+	DIM(SORT_SRCLINE, "srcline", sort_srcline),
 };
 
 int sort_dimension__add(const char *tok)
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 472aa5a..e724b26 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -71,6 +71,7 @@
 	char			level;
 	bool			used;
 	u8			filtered;
+	char			*srcline;
 	struct symbol		*parent;
 	union {
 		unsigned long	  position;
@@ -93,6 +94,7 @@
 	SORT_SYM_FROM,
 	SORT_SYM_TO,
 	SORT_MISPREDICT,
+	SORT_SRCLINE,
 };
 
 /*
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index d583638..199bc4d 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -313,3 +313,25 @@
 	return 0;
 }
 
+/**
+ * rtrim - Removes trailing whitespace from @s.
+ * @s: The string to be stripped.
+ *
+ * Note that the first trailing whitespace is replaced with a %NUL-terminator
+ * in the given string @s. Returns @s.
+ */
+char *rtrim(char *s)
+{
+	size_t size = strlen(s);
+	char *end;
+
+	if (!size)
+		return s;
+
+	end = s + size - 1;
+	while (end >= s && isspace(*end))
+		end--;
+	*(end + 1) = '\0';
+
+	return s;
+}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 3e2e5ea..994f4ff 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1590,11 +1590,62 @@
 	return err;
 }
 
+static int filename__read_debuglink(const char *filename,
+				    char *debuglink, size_t size)
+{
+	int fd, err = -1;
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *data;
+	Elf_Scn *sec;
+	Elf_Kind ek;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		goto out;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL) {
+		pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
+		goto out_close;
+	}
+
+	ek = elf_kind(elf);
+	if (ek != ELF_K_ELF)
+		goto out_close;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL) {
+		pr_err("%s: cannot get elf header.\n", __func__);
+		goto out_close;
+	}
+
+	sec = elf_section_by_name(elf, &ehdr, &shdr,
+				  ".gnu_debuglink", NULL);
+	if (sec == NULL)
+		goto out_close;
+
+	data = elf_getdata(sec, NULL);
+	if (data == NULL)
+		goto out_close;
+
+	/* the start of this section is a zero-terminated string */
+	strncpy(debuglink, data->d_buf, size);
+
+	elf_end(elf);
+
+out_close:
+	close(fd);
+out:
+	return err;
+}
+
 char dso__symtab_origin(const struct dso *dso)
 {
 	static const char origin[] = {
 		[SYMTAB__KALLSYMS]	      = 'k',
 		[SYMTAB__JAVA_JIT]	      = 'j',
+		[SYMTAB__DEBUGLINK]           = 'l',
 		[SYMTAB__BUILD_ID_CACHE]      = 'B',
 		[SYMTAB__FEDORA_DEBUGINFO]    = 'f',
 		[SYMTAB__UBUNTU_DEBUGINFO]    = 'u',
@@ -1662,10 +1713,22 @@
 	 */
 	want_symtab = 1;
 restart:
-	for (dso->symtab_type = SYMTAB__BUILD_ID_CACHE;
+	for (dso->symtab_type = SYMTAB__DEBUGLINK;
 	     dso->symtab_type != SYMTAB__NOT_FOUND;
 	     dso->symtab_type++) {
 		switch (dso->symtab_type) {
+		case SYMTAB__DEBUGLINK: {
+			char *debuglink;
+			strncpy(name, dso->long_name, size);
+			debuglink = name + dso->long_name_len;
+			while (debuglink != name && *debuglink != '/')
+				debuglink--;
+			if (*debuglink == '/')
+				debuglink++;
+			filename__read_debuglink(dso->long_name, debuglink,
+						 size - (debuglink - name));
+			}
+			break;
 		case SYMTAB__BUILD_ID_CACHE:
 			/* skip the locally configured cache if a symfs is given */
 			if (symbol_conf.symfs[0] ||
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index af0752b..a884b99 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -257,6 +257,7 @@
 	SYMTAB__KALLSYMS = 0,
 	SYMTAB__GUEST_KALLSYMS,
 	SYMTAB__JAVA_JIT,
+	SYMTAB__DEBUGLINK,
 	SYMTAB__BUILD_ID_CACHE,
 	SYMTAB__FEDORA_DEBUGINFO,
 	SYMTAB__UBUNTU_DEBUGINFO,
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index abe0e8e..7eeebce 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -65,7 +65,7 @@
 				top->freq ? "Hz" : "");
 	}
 
-	ret += SNPRINTF(bf + ret, size - ret, "%s", event_name(top->sym_evsel));
+	ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel));
 
 	ret += SNPRINTF(bf + ret, size - ret, "], ");
 
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index df2fddb..a51bd86 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -32,29 +32,25 @@
 int header_page_ts_size;
 int header_page_data_offset;
 
-struct pevent *perf_pevent;
-static struct pevent *pevent;
-
 bool latency_format;
 
-int read_trace_init(int file_bigendian, int host_bigendian)
+struct pevent *read_trace_init(int file_bigendian, int host_bigendian)
 {
-	if (pevent)
-		return 0;
+	struct pevent *pevent = pevent_alloc();
 
-	perf_pevent = pevent_alloc();
-	pevent = perf_pevent;
+	if (pevent != NULL) {
+		pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
+		pevent_set_file_bigendian(pevent, file_bigendian);
+		pevent_set_host_bigendian(pevent, host_bigendian);
+	}
 
-	pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT);
-	pevent_set_file_bigendian(pevent, file_bigendian);
-	pevent_set_host_bigendian(pevent, host_bigendian);
-
-	return 0;
+	return pevent;
 }
 
 static int get_common_field(struct scripting_context *context,
 			    int *offset, int *size, const char *type)
 {
+	struct pevent *pevent = context->pevent;
 	struct event_format *event;
 	struct format_field *field;
 
@@ -150,7 +146,7 @@
 	return data + field->offset;
 }
 
-int trace_parse_common_type(void *data)
+int trace_parse_common_type(struct pevent *pevent, void *data)
 {
 	struct pevent_record record;
 
@@ -158,7 +154,7 @@
 	return pevent_data_type(pevent, &record);
 }
 
-int trace_parse_common_pid(void *data)
+int trace_parse_common_pid(struct pevent *pevent, void *data)
 {
 	struct pevent_record record;
 
@@ -166,27 +162,21 @@
 	return pevent_data_pid(pevent, &record);
 }
 
-unsigned long long read_size(void *ptr, int size)
+unsigned long long read_size(struct pevent *pevent, void *ptr, int size)
 {
 	return pevent_read_number(pevent, ptr, size);
 }
 
-struct event_format *trace_find_event(int type)
-{
-	return pevent_find_event(pevent, type);
-}
-
-
-void print_trace_event(int cpu, void *data, int size)
+void print_trace_event(struct pevent *pevent, int cpu, void *data, int size)
 {
 	struct event_format *event;
 	struct pevent_record record;
 	struct trace_seq s;
 	int type;
 
-	type = trace_parse_common_type(data);
+	type = trace_parse_common_type(pevent, data);
 
-	event = trace_find_event(type);
+	event = pevent_find_event(pevent, type);
 	if (!event) {
 		warning("ug! no event found for type %d", type);
 		return;
@@ -203,8 +193,8 @@
 	printf("\n");
 }
 
-void print_event(int cpu, void *data, int size, unsigned long long nsecs,
-		  char *comm)
+void print_event(struct pevent *pevent, int cpu, void *data, int size,
+		 unsigned long long nsecs, char *comm)
 {
 	struct pevent_record record;
 	struct trace_seq s;
@@ -227,7 +217,8 @@
 	printf("\n");
 }
 
-void parse_proc_kallsyms(char *file, unsigned int size __unused)
+void parse_proc_kallsyms(struct pevent *pevent,
+			 char *file, unsigned int size __unused)
 {
 	unsigned long long addr;
 	char *func;
@@ -258,7 +249,8 @@
 	}
 }
 
-void parse_ftrace_printk(char *file, unsigned int size __unused)
+void parse_ftrace_printk(struct pevent *pevent,
+			 char *file, unsigned int size __unused)
 {
 	unsigned long long addr;
 	char *printk;
@@ -282,17 +274,19 @@
 	}
 }
 
-int parse_ftrace_file(char *buf, unsigned long size)
+int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size)
 {
 	return pevent_parse_event(pevent, buf, size, "ftrace");
 }
 
-int parse_event_file(char *buf, unsigned long size, char *sys)
+int parse_event_file(struct pevent *pevent,
+		     char *buf, unsigned long size, char *sys)
 {
 	return pevent_parse_event(pevent, buf, size, sys);
 }
 
-struct event_format *trace_find_next_event(struct event_format *event)
+struct event_format *trace_find_next_event(struct pevent *pevent,
+					   struct event_format *event)
 {
 	static int idx;
 
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index f097e0d..719ed74 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -114,20 +114,20 @@
 	};
 }
 
-static unsigned int read4(void)
+static unsigned int read4(struct pevent *pevent)
 {
 	unsigned int data;
 
 	read_or_die(&data, 4);
-	return __data2host4(perf_pevent, data);
+	return __data2host4(pevent, data);
 }
 
-static unsigned long long read8(void)
+static unsigned long long read8(struct pevent *pevent)
 {
 	unsigned long long data;
 
 	read_or_die(&data, 8);
-	return __data2host8(perf_pevent, data);
+	return __data2host8(pevent, data);
 }
 
 static char *read_string(void)
@@ -168,12 +168,12 @@
 	return str;
 }
 
-static void read_proc_kallsyms(void)
+static void read_proc_kallsyms(struct pevent *pevent)
 {
 	unsigned int size;
 	char *buf;
 
-	size = read4();
+	size = read4(pevent);
 	if (!size)
 		return;
 
@@ -181,29 +181,29 @@
 	read_or_die(buf, size);
 	buf[size] = '\0';
 
-	parse_proc_kallsyms(buf, size);
+	parse_proc_kallsyms(pevent, buf, size);
 
 	free(buf);
 }
 
-static void read_ftrace_printk(void)
+static void read_ftrace_printk(struct pevent *pevent)
 {
 	unsigned int size;
 	char *buf;
 
-	size = read4();
+	size = read4(pevent);
 	if (!size)
 		return;
 
 	buf = malloc_or_die(size);
 	read_or_die(buf, size);
 
-	parse_ftrace_printk(buf, size);
+	parse_ftrace_printk(pevent, buf, size);
 
 	free(buf);
 }
 
-static void read_header_files(void)
+static void read_header_files(struct pevent *pevent)
 {
 	unsigned long long size;
 	char *header_event;
@@ -214,7 +214,7 @@
 	if (memcmp(buf, "header_page", 12) != 0)
 		die("did not read header page");
 
-	size = read8();
+	size = read8(pevent);
 	skip(size);
 
 	/*
@@ -227,47 +227,48 @@
 	if (memcmp(buf, "header_event", 13) != 0)
 		die("did not read header event");
 
-	size = read8();
+	size = read8(pevent);
 	header_event = malloc_or_die(size);
 	read_or_die(header_event, size);
 	free(header_event);
 }
 
-static void read_ftrace_file(unsigned long long size)
+static void read_ftrace_file(struct pevent *pevent, unsigned long long size)
 {
 	char *buf;
 
 	buf = malloc_or_die(size);
 	read_or_die(buf, size);
-	parse_ftrace_file(buf, size);
+	parse_ftrace_file(pevent, buf, size);
 	free(buf);
 }
 
-static void read_event_file(char *sys, unsigned long long size)
+static void read_event_file(struct pevent *pevent, char *sys,
+			    unsigned long long size)
 {
 	char *buf;
 
 	buf = malloc_or_die(size);
 	read_or_die(buf, size);
-	parse_event_file(buf, size, sys);
+	parse_event_file(pevent, buf, size, sys);
 	free(buf);
 }
 
-static void read_ftrace_files(void)
+static void read_ftrace_files(struct pevent *pevent)
 {
 	unsigned long long size;
 	int count;
 	int i;
 
-	count = read4();
+	count = read4(pevent);
 
 	for (i = 0; i < count; i++) {
-		size = read8();
-		read_ftrace_file(size);
+		size = read8(pevent);
+		read_ftrace_file(pevent, size);
 	}
 }
 
-static void read_event_files(void)
+static void read_event_files(struct pevent *pevent)
 {
 	unsigned long long size;
 	char *sys;
@@ -275,15 +276,15 @@
 	int count;
 	int i,x;
 
-	systems = read4();
+	systems = read4(pevent);
 
 	for (i = 0; i < systems; i++) {
 		sys = read_string();
 
-		count = read4();
+		count = read4(pevent);
 		for (x=0; x < count; x++) {
-			size = read8();
-			read_event_file(sys, size);
+			size = read8(pevent);
+			read_event_file(pevent, sys, size);
 		}
 	}
 }
@@ -377,7 +378,7 @@
 	return (unsigned long)ptr - (unsigned long)cpu_data[cpu].page;
 }
 
-struct pevent_record *trace_peek_data(int cpu)
+struct pevent_record *trace_peek_data(struct pevent *pevent, int cpu)
 {
 	struct pevent_record *data;
 	void *page = cpu_data[cpu].page;
@@ -399,15 +400,15 @@
 		/* FIXME: handle header page */
 		if (header_page_ts_size != 8)
 			die("expected a long long type for timestamp");
-		cpu_data[cpu].timestamp = data2host8(perf_pevent, ptr);
+		cpu_data[cpu].timestamp = data2host8(pevent, ptr);
 		ptr += 8;
 		switch (header_page_size_size) {
 		case 4:
-			cpu_data[cpu].page_size = data2host4(perf_pevent, ptr);
+			cpu_data[cpu].page_size = data2host4(pevent, ptr);
 			ptr += 4;
 			break;
 		case 8:
-			cpu_data[cpu].page_size = data2host8(perf_pevent, ptr);
+			cpu_data[cpu].page_size = data2host8(pevent, ptr);
 			ptr += 8;
 			break;
 		default:
@@ -421,10 +422,10 @@
 
 	if (idx >= cpu_data[cpu].page_size) {
 		get_next_page(cpu);
-		return trace_peek_data(cpu);
+		return trace_peek_data(pevent, cpu);
 	}
 
-	type_len_ts = data2host4(perf_pevent, ptr);
+	type_len_ts = data2host4(pevent, ptr);
 	ptr += 4;
 
 	type_len = type_len4host(type_len_ts);
@@ -434,14 +435,14 @@
 	case RINGBUF_TYPE_PADDING:
 		if (!delta)
 			die("error, hit unexpected end of page");
-		length = data2host4(perf_pevent, ptr);
+		length = data2host4(pevent, ptr);
 		ptr += 4;
 		length *= 4;
 		ptr += length;
 		goto read_again;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
-		extend = data2host4(perf_pevent, ptr);
+		extend = data2host4(pevent, ptr);
 		ptr += 4;
 		extend <<= TS_SHIFT;
 		extend += delta;
@@ -452,7 +453,7 @@
 		ptr += 12;
 		break;
 	case 0:
-		length = data2host4(perf_pevent, ptr);
+		length = data2host4(pevent, ptr);
 		ptr += 4;
 		die("here! length=%d", length);
 		break;
@@ -477,17 +478,17 @@
 	return data;
 }
 
-struct pevent_record *trace_read_data(int cpu)
+struct pevent_record *trace_read_data(struct pevent *pevent, int cpu)
 {
 	struct pevent_record *data;
 
-	data = trace_peek_data(cpu);
+	data = trace_peek_data(pevent, cpu);
 	cpu_data[cpu].next = NULL;
 
 	return data;
 }
 
-ssize_t trace_report(int fd, bool __repipe)
+ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe)
 {
 	char buf[BUFSIZ];
 	char test[] = { 23, 8, 68 };
@@ -519,30 +520,32 @@
 	file_bigendian = buf[0];
 	host_bigendian = bigendian();
 
-	read_trace_init(file_bigendian, host_bigendian);
+	*ppevent = read_trace_init(file_bigendian, host_bigendian);
+	if (*ppevent == NULL)
+		die("read_trace_init failed");
 
 	read_or_die(buf, 1);
 	long_size = buf[0];
 
-	page_size = read4();
+	page_size = read4(*ppevent);
 
-	read_header_files();
+	read_header_files(*ppevent);
 
-	read_ftrace_files();
-	read_event_files();
-	read_proc_kallsyms();
-	read_ftrace_printk();
+	read_ftrace_files(*ppevent);
+	read_event_files(*ppevent);
+	read_proc_kallsyms(*ppevent);
+	read_ftrace_printk(*ppevent);
 
 	size = calc_data_size - 1;
 	calc_data_size = 0;
 	repipe = false;
 
 	if (show_funcs) {
-		pevent_print_funcs(perf_pevent);
+		pevent_print_funcs(*ppevent);
 		return size;
 	}
 	if (show_printk) {
-		pevent_print_printk(perf_pevent);
+		pevent_print_printk(*ppevent);
 		return size;
 	}
 
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 18ae6c1..474aa7a 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -36,6 +36,7 @@
 }
 
 static void process_event_unsupported(union perf_event *event __unused,
+				      struct pevent *pevent __unused,
 				      struct perf_sample *sample __unused,
 				      struct perf_evsel *evsel __unused,
 				      struct machine *machine __unused,
@@ -61,7 +62,8 @@
 	return -1;
 }
 
-static int python_generate_script_unsupported(const char *outfile __unused)
+static int python_generate_script_unsupported(struct pevent *pevent __unused,
+					      const char *outfile __unused)
 {
 	print_python_unsupported_msg();
 
@@ -122,7 +124,8 @@
 	return -1;
 }
 
-static int perl_generate_script_unsupported(const char *outfile __unused)
+static int perl_generate_script_unsupported(struct pevent *pevent __unused,
+					    const char *outfile __unused)
 {
 	print_perl_unsupported_msg();
 
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 639852a..8fef1d6 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -8,6 +8,7 @@
 struct machine;
 struct perf_sample;
 union perf_event;
+struct perf_tool;
 struct thread;
 
 extern int header_page_size_size;
@@ -29,35 +30,36 @@
 
 int bigendian(void);
 
-int read_trace_init(int file_bigendian, int host_bigendian);
-void print_trace_event(int cpu, void *data, int size);
+struct pevent *read_trace_init(int file_bigendian, int host_bigendian);
+void print_trace_event(struct pevent *pevent, int cpu, void *data, int size);
 
-void print_event(int cpu, void *data, int size, unsigned long long nsecs,
-		  char *comm);
+void print_event(struct pevent *pevent, int cpu, void *data, int size,
+		 unsigned long long nsecs, char *comm);
 
-int parse_ftrace_file(char *buf, unsigned long size);
-int parse_event_file(char *buf, unsigned long size, char *sys);
+int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size);
+int parse_event_file(struct pevent *pevent,
+		     char *buf, unsigned long size, char *sys);
 
-struct pevent_record *trace_peek_data(int cpu);
-struct event_format *trace_find_event(int type);
+struct pevent_record *trace_peek_data(struct pevent *pevent, int cpu);
 
 unsigned long long
 raw_field_value(struct event_format *event, const char *name, void *data);
 void *raw_field_ptr(struct event_format *event, const char *name, void *data);
 
-void parse_proc_kallsyms(char *file, unsigned int size __unused);
-void parse_ftrace_printk(char *file, unsigned int size __unused);
+void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size);
+void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size);
 
-ssize_t trace_report(int fd, bool repipe);
+ssize_t trace_report(int fd, struct pevent **pevent, bool repipe);
 
-int trace_parse_common_type(void *data);
-int trace_parse_common_pid(void *data);
+int trace_parse_common_type(struct pevent *pevent, void *data);
+int trace_parse_common_pid(struct pevent *pevent, void *data);
 
-struct event_format *trace_find_next_event(struct event_format *event);
-unsigned long long read_size(void *ptr, int size);
+struct event_format *trace_find_next_event(struct pevent *pevent,
+					   struct event_format *event);
+unsigned long long read_size(struct pevent *pevent, void *ptr, int size);
 unsigned long long eval_flag(const char *flag);
 
-struct pevent_record *trace_read_data(int cpu);
+struct pevent_record *trace_read_data(struct pevent *pevent, int cpu);
 int read_tracing_data(int fd, struct list_head *pattrs);
 
 struct tracing_data {
@@ -77,11 +79,12 @@
 	int (*start_script) (const char *script, int argc, const char **argv);
 	int (*stop_script) (void);
 	void (*process_event) (union perf_event *event,
+			       struct pevent *pevent,
 			       struct perf_sample *sample,
 			       struct perf_evsel *evsel,
 			       struct machine *machine,
 			       struct thread *thread);
-	int (*generate_script) (const char *outfile);
+	int (*generate_script) (struct pevent *pevent, const char *outfile);
 };
 
 int script_spec_register(const char *spec, struct scripting_ops *ops);
@@ -90,6 +93,7 @@
 void setup_python_scripting(void);
 
 struct scripting_context {
+	struct pevent *pevent;
 	void *event_data;
 };
 
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 2daaedb..b13c733 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -264,4 +264,6 @@
 
 size_t hex_width(u64 v);
 
+char *rtrim(char *s);
+
 #endif