Blame - arch/powerpc/kernel/perf_event.c - SHIFTPHONES/mainline/linux

blob: b6cf8f1f4d35b834f5fb62977c66d38aaf143654 [file] [log] [blame]

Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	1	/*
				2	* Performance event support - powerpc architecture code
				3	*
				4	* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*/
				11	#include <linux/kernel.h>
				12	#include <linux/sched.h>
				13	#include <linux/perf_event.h>
				14	#include <linux/percpu.h>
				15	#include <linux/hardirq.h>
				16	#include <asm/reg.h>
				17	#include <asm/pmc.h>
				18	#include <asm/machdep.h>
				19	#include <asm/firmware.h>
				20	#include <asm/ptrace.h>
				21
				22	struct cpu_hw_events {
				23	int n_events;
				24	int n_percpu;
				25	int disabled;
				26	int n_added;
				27	int n_limited;
				28	u8 pmcs_enabled;
				29	struct perf_event *event[MAX_HWEVENTS];
				30	u64 events[MAX_HWEVENTS];
				31	unsigned int flags[MAX_HWEVENTS];
				32	unsigned long mmcr[3];
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	33	struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
				34	u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	35	u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
				36	unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
				37	unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
				38	};
				39	DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
				40
				41	struct power_pmu *ppmu;
				42
				43	/*
Ingo Molnar	57c0c15	2009-09-21 12:20:38 +0200	[diff] [blame]	44	* Normally, to ignore kernel events we set the FCS (freeze counters
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	45	* in supervisor mode) bit in MMCR0, but if the kernel runs with the
				46	* hypervisor bit set in the MSR, or if we are running on a processor
				47	* where the hypervisor bit is forced to 1 (as on Apple G5 processors),
				48	* then we need to use the FCHV bit to ignore kernel events.
				49	*/
				50	static unsigned int freeze_events_kernel = MMCR0_FCS;
				51
				52	/*
				53	* 32-bit doesn't have MMCRA but does have an MMCR2,
				54	* and a few other names are different.
				55	*/
				56	#ifdef CONFIG_PPC32
				57
				58	#define MMCR0_FCHV 0
				59	#define MMCR0_PMCjCE MMCR0_PMCnCE
				60
				61	#define SPRN_MMCRA SPRN_MMCR2
				62	#define MMCRA_SAMPLE_ENABLE 0
				63
				64	static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
				65	{
				66	return 0;
				67	}
				68	static inline void perf_get_data_addr(struct pt_regs regs, u64 addrp) { }
				69	static inline u32 perf_get_misc_flags(struct pt_regs *regs)
				70	{
				71	return 0;
				72	}
				73	static inline void perf_read_regs(struct pt_regs *regs) { }
				74	static inline int perf_intr_is_nmi(struct pt_regs *regs)
				75	{
				76	return 0;
				77	}
				78
				79	#endif /* CONFIG_PPC32 */
				80
				81	/*
				82	* Things that are specific to 64-bit implementations.
				83	*/
				84	#ifdef CONFIG_PPC64
				85
				86	static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
				87	{
				88	unsigned long mmcra = regs->dsisr;
				89
				90	if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
				91	unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
				92	if (slot > 1)
				93	return 4 * (slot - 1);
				94	}
				95	return 0;
				96	}
				97
				98	/*
				99	* The user wants a data address recorded.
				100	* If we're not doing instruction sampling, give them the SDAR
				101	* (sampled data address). If we are doing instruction sampling, then
				102	* only give them the SDAR if it corresponds to the instruction
				103	* pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC
				104	* bit in MMCRA.
				105	*/
				106	static inline void perf_get_data_addr(struct pt_regs regs, u64 addrp)
				107	{
				108	unsigned long mmcra = regs->dsisr;
				109	unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
				110	POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
				111
				112	if (!(mmcra & MMCRA_SAMPLE_ENABLE) \|\| (mmcra & sdsync))
				113	*addrp = mfspr(SPRN_SDAR);
				114	}
				115
				116	static inline u32 perf_get_misc_flags(struct pt_regs *regs)
				117	{
				118	unsigned long mmcra = regs->dsisr;
Michael Neuling	7abb840	2009-10-14 19:32:15 +0000	[diff] [blame]	119	unsigned long sihv = MMCRA_SIHV;
				120	unsigned long sipr = MMCRA_SIPR;
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	121
				122	if (TRAP(regs) != 0xf00)
				123	return 0; /* not a PMU interrupt */
				124
				125	if (ppmu->flags & PPMU_ALT_SIPR) {
Michael Neuling	7abb840	2009-10-14 19:32:15 +0000	[diff] [blame]	126	sihv = POWER6_MMCRA_SIHV;
				127	sipr = POWER6_MMCRA_SIPR;
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	128	}
Michael Neuling	7abb840	2009-10-14 19:32:15 +0000	[diff] [blame]	129
				130	/* PR has priority over HV, so order below is important */
				131	if (mmcra & sipr)
				132	return PERF_RECORD_MISC_USER;
				133	if ((mmcra & sihv) && (freeze_events_kernel != MMCR0_FCHV))
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	134	return PERF_RECORD_MISC_HYPERVISOR;
Michael Neuling	7abb840	2009-10-14 19:32:15 +0000	[diff] [blame]	135	return PERF_RECORD_MISC_KERNEL;
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	136	}
				137
				138	/*
				139	* Overload regs->dsisr to store MMCRA so we only need to read it once
				140	* on each interrupt.
				141	*/
				142	static inline void perf_read_regs(struct pt_regs *regs)
				143	{
				144	regs->dsisr = mfspr(SPRN_MMCRA);
				145	}
				146
				147	/*
				148	* If interrupts were soft-disabled when a PMU interrupt occurs, treat
				149	* it as an NMI.
				150	*/
				151	static inline int perf_intr_is_nmi(struct pt_regs *regs)
				152	{
				153	return !regs->softe;
				154	}
				155
				156	#endif /* CONFIG_PPC64 */
				157
				158	static void perf_event_interrupt(struct pt_regs *regs);
				159
				160	void perf_event_print_debug(void)
				161	{
				162	}
				163
				164	/*
Ingo Molnar	57c0c15	2009-09-21 12:20:38 +0200	[diff] [blame]	165	* Read one performance monitor counter (PMC).
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	166	*/
				167	static unsigned long read_pmc(int idx)
				168	{
				169	unsigned long val;
				170
				171	switch (idx) {
				172	case 1:
				173	val = mfspr(SPRN_PMC1);
				174	break;
				175	case 2:
				176	val = mfspr(SPRN_PMC2);
				177	break;
				178	case 3:
				179	val = mfspr(SPRN_PMC3);
				180	break;
				181	case 4:
				182	val = mfspr(SPRN_PMC4);
				183	break;
				184	case 5:
				185	val = mfspr(SPRN_PMC5);
				186	break;
				187	case 6:
				188	val = mfspr(SPRN_PMC6);
				189	break;
				190	#ifdef CONFIG_PPC64
				191	case 7:
				192	val = mfspr(SPRN_PMC7);
				193	break;
				194	case 8:
				195	val = mfspr(SPRN_PMC8);
				196	break;
				197	#endif /* CONFIG_PPC64 */
				198	default:
				199	printk(KERN_ERR "oops trying to read PMC%d\n", idx);
				200	val = 0;
				201	}
				202	return val;
				203	}
				204
				205	/*
				206	* Write one PMC.
				207	*/
				208	static void write_pmc(int idx, unsigned long val)
				209	{
				210	switch (idx) {
				211	case 1:
				212	mtspr(SPRN_PMC1, val);
				213	break;
				214	case 2:
				215	mtspr(SPRN_PMC2, val);
				216	break;
				217	case 3:
				218	mtspr(SPRN_PMC3, val);
				219	break;
				220	case 4:
				221	mtspr(SPRN_PMC4, val);
				222	break;
				223	case 5:
				224	mtspr(SPRN_PMC5, val);
				225	break;
				226	case 6:
				227	mtspr(SPRN_PMC6, val);
				228	break;
				229	#ifdef CONFIG_PPC64
				230	case 7:
				231	mtspr(SPRN_PMC7, val);
				232	break;
				233	case 8:
				234	mtspr(SPRN_PMC8, val);
				235	break;
				236	#endif /* CONFIG_PPC64 */
				237	default:
				238	printk(KERN_ERR "oops trying to write PMC%d\n", idx);
				239	}
				240	}
				241
				242	/*
				243	* Check if a set of events can all go on the PMU at once.
				244	* If they can't, this will look at alternative codes for the events
				245	* and see if any combination of alternative codes is feasible.
				246	* The feasible set is returned in event_id[].
				247	*/
				248	static int power_check_constraints(struct cpu_hw_events *cpuhw,
				249	u64 event_id[], unsigned int cflags[],
				250	int n_ev)
				251	{
				252	unsigned long mask, value, nv;
				253	unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
				254	int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS];
				255	int i, j;
				256	unsigned long addf = ppmu->add_fields;
				257	unsigned long tadd = ppmu->test_adder;
				258
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	259	if (n_ev > ppmu->n_counter)
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	260	return -1;
				261
				262	/* First see if the events will go on as-is */
				263	for (i = 0; i < n_ev; ++i) {
				264	if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
				265	&& !ppmu->limited_pmc_event(event_id[i])) {
				266	ppmu->get_alternatives(event_id[i], cflags[i],
				267	cpuhw->alternatives[i]);
				268	event_id[i] = cpuhw->alternatives[i][0];
				269	}
				270	if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
				271	&cpuhw->avalues[i][0]))
				272	return -1;
				273	}
				274	value = mask = 0;
				275	for (i = 0; i < n_ev; ++i) {
				276	nv = (value \| cpuhw->avalues[i][0]) +
				277	(value & cpuhw->avalues[i][0] & addf);
				278	if ((((nv + tadd) ^ value) & mask) != 0 \|\|
				279	(((nv + tadd) ^ cpuhw->avalues[i][0]) &
				280	cpuhw->amasks[i][0]) != 0)
				281	break;
				282	value = nv;
				283	mask \|= cpuhw->amasks[i][0];
				284	}
				285	if (i == n_ev)
				286	return 0; /* all OK */
				287
				288	/* doesn't work, gather alternatives... */
				289	if (!ppmu->get_alternatives)
				290	return -1;
				291	for (i = 0; i < n_ev; ++i) {
				292	choice[i] = 0;
				293	n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i],
				294	cpuhw->alternatives[i]);
				295	for (j = 1; j < n_alt[i]; ++j)
				296	ppmu->get_constraint(cpuhw->alternatives[i][j],
				297	&cpuhw->amasks[i][j],
				298	&cpuhw->avalues[i][j]);
				299	}
				300
				301	/* enumerate all possibilities and see if any will work */
				302	i = 0;
				303	j = -1;
				304	value = mask = nv = 0;
				305	while (i < n_ev) {
				306	if (j >= 0) {
				307	/* we're backtracking, restore context */
				308	value = svalues[i];
				309	mask = smasks[i];
				310	j = choice[i];
				311	}
				312	/*
				313	* See if any alternative k for event_id i,
				314	* where k > j, will satisfy the constraints.
				315	*/
				316	while (++j < n_alt[i]) {
				317	nv = (value \| cpuhw->avalues[i][j]) +
				318	(value & cpuhw->avalues[i][j] & addf);
				319	if ((((nv + tadd) ^ value) & mask) == 0 &&
				320	(((nv + tadd) ^ cpuhw->avalues[i][j])
				321	& cpuhw->amasks[i][j]) == 0)
				322	break;
				323	}
				324	if (j >= n_alt[i]) {
				325	/*
				326	* No feasible alternative, backtrack
				327	* to event_id i-1 and continue enumerating its
				328	* alternatives from where we got up to.
				329	*/
				330	if (--i < 0)
				331	return -1;
				332	} else {
				333	/*
				334	* Found a feasible alternative for event_id i,
				335	* remember where we got up to with this event_id,
				336	* go on to the next event_id, and start with
				337	* the first alternative for it.
				338	*/
				339	choice[i] = j;
				340	svalues[i] = value;
				341	smasks[i] = mask;
				342	value = nv;
				343	mask \|= cpuhw->amasks[i][j];
				344	++i;
				345	j = -1;
				346	}
				347	}
				348
				349	/* OK, we have a feasible combination, tell the caller the solution */
				350	for (i = 0; i < n_ev; ++i)
				351	event_id[i] = cpuhw->alternatives[i][choice[i]];
				352	return 0;
				353	}
				354
				355	/*
				356	* Check if newly-added events have consistent settings for
				357	* exclude_{user,kernel,hv} with each other and any previously
				358	* added events.
				359	*/
				360	static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
				361	int n_prev, int n_new)
				362	{
				363	int eu = 0, ek = 0, eh = 0;
				364	int i, n, first;
				365	struct perf_event *event;
				366
				367	n = n_prev + n_new;
				368	if (n <= 1)
				369	return 0;
				370
				371	first = 1;
				372	for (i = 0; i < n; ++i) {
				373	if (cflags[i] & PPMU_LIMITED_PMC_OK) {
				374	cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
				375	continue;
				376	}
				377	event = ctrs[i];
				378	if (first) {
				379	eu = event->attr.exclude_user;
				380	ek = event->attr.exclude_kernel;
				381	eh = event->attr.exclude_hv;
				382	first = 0;
				383	} else if (event->attr.exclude_user != eu \|\|
				384	event->attr.exclude_kernel != ek \|\|
				385	event->attr.exclude_hv != eh) {
				386	return -EAGAIN;
				387	}
				388	}
				389
				390	if (eu \|\| ek \|\| eh)
				391	for (i = 0; i < n; ++i)
				392	if (cflags[i] & PPMU_LIMITED_PMC_OK)
				393	cflags[i] \|= PPMU_LIMITED_PMC_REQD;
				394
				395	return 0;
				396	}
				397
				398	static void power_pmu_read(struct perf_event *event)
				399	{
				400	s64 val, delta, prev;
				401
				402	if (!event->hw.idx)
				403	return;
				404	/*
				405	* Performance monitor interrupts come even when interrupts
				406	* are soft-disabled, as long as interrupts are hard-enabled.
				407	* Therefore we treat them like NMIs.
				408	*/
				409	do {
				410	prev = atomic64_read(&event->hw.prev_count);
				411	barrier();
				412	val = read_pmc(event->hw.idx);
				413	} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
				414
Ingo Molnar	57c0c15	2009-09-21 12:20:38 +0200	[diff] [blame]	415	/* The counters are only 32 bits wide */
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	416	delta = (val - prev) & 0xfffffffful;
				417	atomic64_add(delta, &event->count);
				418	atomic64_sub(delta, &event->hw.period_left);
				419	}
				420
				421	/*
				422	* On some machines, PMC5 and PMC6 can't be written, don't respect
				423	* the freeze conditions, and don't generate interrupts. This tells
				424	* us if `event' is using such a PMC.
				425	*/
				426	static int is_limited_pmc(int pmcnum)
				427	{
				428	return (ppmu->flags & PPMU_LIMITED_PMC5_6)
				429	&& (pmcnum == 5 \|\| pmcnum == 6);
				430	}
				431
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	432	static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	433	unsigned long pmc5, unsigned long pmc6)
				434	{
				435	struct perf_event *event;
				436	u64 val, prev, delta;
				437	int i;
				438
				439	for (i = 0; i < cpuhw->n_limited; ++i) {
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	440	event = cpuhw->limited_counter[i];
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	441	if (!event->hw.idx)
				442	continue;
				443	val = (event->hw.idx == 5) ? pmc5 : pmc6;
				444	prev = atomic64_read(&event->hw.prev_count);
				445	event->hw.idx = 0;
				446	delta = (val - prev) & 0xfffffffful;
				447	atomic64_add(delta, &event->count);
				448	}
				449	}
				450
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	451	static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	452	unsigned long pmc5, unsigned long pmc6)
				453	{
				454	struct perf_event *event;
				455	u64 val;
				456	int i;
				457
				458	for (i = 0; i < cpuhw->n_limited; ++i) {
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	459	event = cpuhw->limited_counter[i];
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	460	event->hw.idx = cpuhw->limited_hwidx[i];
				461	val = (event->hw.idx == 5) ? pmc5 : pmc6;
				462	atomic64_set(&event->hw.prev_count, val);
				463	perf_event_update_userpage(event);
				464	}
				465	}
				466
				467	/*
				468	* Since limited events don't respect the freeze conditions, we
				469	* have to read them immediately after freezing or unfreezing the
				470	* other events. We try to keep the values from the limited
				471	* events as consistent as possible by keeping the delay (in
				472	* cycles and instructions) between freezing/unfreezing and reading
				473	* the limited events as small and consistent as possible.
				474	* Therefore, if any limited events are in use, we read them
				475	* both, and always in the same order, to minimize variability,
				476	* and do it inside the same asm that writes MMCR0.
				477	*/
				478	static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
				479	{
				480	unsigned long pmc5, pmc6;
				481
				482	if (!cpuhw->n_limited) {
				483	mtspr(SPRN_MMCR0, mmcr0);
				484	return;
				485	}
				486
				487	/*
				488	* Write MMCR0, then read PMC5 and PMC6 immediately.
				489	* To ensure we don't get a performance monitor interrupt
				490	* between writing MMCR0 and freezing/thawing the limited
				491	* events, we first write MMCR0 with the event overflow
				492	* interrupt enable bits turned off.
				493	*/
				494	asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
				495	: "=&r" (pmc5), "=&r" (pmc6)
				496	: "r" (mmcr0 & ~(MMCR0_PMC1CE \| MMCR0_PMCjCE)),
				497	"i" (SPRN_MMCR0),
				498	"i" (SPRN_PMC5), "i" (SPRN_PMC6));
				499
				500	if (mmcr0 & MMCR0_FC)
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	501	freeze_limited_counters(cpuhw, pmc5, pmc6);
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	502	else
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	503	thaw_limited_counters(cpuhw, pmc5, pmc6);
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	504
				505	/*
				506	* Write the full MMCR0 including the event overflow interrupt
				507	* enable bits, if necessary.
				508	*/
				509	if (mmcr0 & (MMCR0_PMC1CE \| MMCR0_PMCjCE))
				510	mtspr(SPRN_MMCR0, mmcr0);
				511	}
				512
				513	/*
				514	* Disable all events to prevent PMU interrupts and to allow
				515	* events to be added or removed.
				516	*/
				517	void hw_perf_disable(void)
				518	{
				519	struct cpu_hw_events *cpuhw;
				520	unsigned long flags;
				521
				522	if (!ppmu)
				523	return;
				524	local_irq_save(flags);
				525	cpuhw = &__get_cpu_var(cpu_hw_events);
				526
				527	if (!cpuhw->disabled) {
				528	cpuhw->disabled = 1;
				529	cpuhw->n_added = 0;
				530
				531	/*
				532	* Check if we ever enabled the PMU on this cpu.
				533	*/
				534	if (!cpuhw->pmcs_enabled) {
				535	ppc_enable_pmcs();
				536	cpuhw->pmcs_enabled = 1;
				537	}
				538
				539	/*
				540	* Disable instruction sampling if it was enabled
				541	*/
				542	if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
				543	mtspr(SPRN_MMCRA,
				544	cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
				545	mb();
				546	}
				547
				548	/*
Ingo Molnar	57c0c15	2009-09-21 12:20:38 +0200	[diff] [blame]	549	* Set the 'freeze counters' bit.
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	550	* The barrier is to make sure the mtspr has been
				551	* executed and the PMU has frozen the events
				552	* before we return.
				553	*/
				554	write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) \| MMCR0_FC);
				555	mb();
				556	}
				557	local_irq_restore(flags);
				558	}
				559
				560	/*
				561	* Re-enable all events if disable == 0.
				562	* If we were previously disabled and events were added, then
				563	* put the new config on the PMU.
				564	*/
				565	void hw_perf_enable(void)
				566	{
				567	struct perf_event *event;
				568	struct cpu_hw_events *cpuhw;
				569	unsigned long flags;
				570	long i;
				571	unsigned long val;
				572	s64 left;
				573	unsigned int hwc_index[MAX_HWEVENTS];
				574	int n_lim;
				575	int idx;
				576
				577	if (!ppmu)
				578	return;
				579	local_irq_save(flags);
				580	cpuhw = &__get_cpu_var(cpu_hw_events);
				581	if (!cpuhw->disabled) {
				582	local_irq_restore(flags);
				583	return;
				584	}
				585	cpuhw->disabled = 0;
				586
				587	/*
				588	* If we didn't change anything, or only removed events,
				589	* no need to recalculate MMCR* settings and reset the PMCs.
				590	* Just reenable the PMU with the current MMCR* settings
				591	* (possibly updated for removal of events).
				592	*/
				593	if (!cpuhw->n_added) {
				594	mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
				595	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
				596	if (cpuhw->n_events == 0)
				597	ppc_set_pmu_inuse(0);
				598	goto out_enable;
				599	}
				600
				601	/*
				602	* Compute MMCR* values for the new set of events
				603	*/
				604	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
				605	cpuhw->mmcr)) {
				606	/* shouldn't ever get here */
				607	printk(KERN_ERR "oops compute_mmcr failed\n");
				608	goto out;
				609	}
				610
				611	/*
				612	* Add in MMCR0 freeze bits corresponding to the
				613	* attr.exclude_* bits for the first event.
				614	* We have already checked that all events have the
				615	* same values for these bits as the first event.
				616	*/
				617	event = cpuhw->event[0];
				618	if (event->attr.exclude_user)
				619	cpuhw->mmcr[0] \|= MMCR0_FCP;
				620	if (event->attr.exclude_kernel)
				621	cpuhw->mmcr[0] \|= freeze_events_kernel;
				622	if (event->attr.exclude_hv)
				623	cpuhw->mmcr[0] \|= MMCR0_FCHV;
				624
				625	/*
				626	* Write the new configuration to MMCR* with the freeze
				627	* bit set and set the hardware events to their initial values.
				628	* Then unfreeze the events.
				629	*/
				630	ppc_set_pmu_inuse(1);
				631	mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
				632	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
				633	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE \| MMCR0_PMCjCE))
				634	\| MMCR0_FC);
				635
				636	/*
				637	* Read off any pre-existing events that need to move
				638	* to another PMC.
				639	*/
				640	for (i = 0; i < cpuhw->n_events; ++i) {
				641	event = cpuhw->event[i];
				642	if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) {
				643	power_pmu_read(event);
				644	write_pmc(event->hw.idx, 0);
				645	event->hw.idx = 0;
				646	}
				647	}
				648
				649	/*
				650	* Initialize the PMCs for all the new and moved events.
				651	*/
				652	cpuhw->n_limited = n_lim = 0;
				653	for (i = 0; i < cpuhw->n_events; ++i) {
				654	event = cpuhw->event[i];
				655	if (event->hw.idx)
				656	continue;
				657	idx = hwc_index[i] + 1;
				658	if (is_limited_pmc(idx)) {
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	659	cpuhw->limited_counter[n_lim] = event;
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	660	cpuhw->limited_hwidx[n_lim] = idx;
				661	++n_lim;
				662	continue;
				663	}
				664	val = 0;
				665	if (event->hw.sample_period) {
				666	left = atomic64_read(&event->hw.period_left);
				667	if (left < 0x80000000L)
				668	val = 0x80000000L - left;
				669	}
				670	atomic64_set(&event->hw.prev_count, val);
				671	event->hw.idx = idx;
				672	write_pmc(idx, val);
				673	perf_event_update_userpage(event);
				674	}
				675	cpuhw->n_limited = n_lim;
				676	cpuhw->mmcr[0] \|= MMCR0_PMXE \| MMCR0_FCECE;
				677
				678	out_enable:
				679	mb();
				680	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
				681
				682	/*
				683	* Enable instruction sampling if necessary
				684	*/
				685	if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
				686	mb();
				687	mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
				688	}
				689
				690	out:
				691	local_irq_restore(flags);
				692	}
				693
				694	static int collect_events(struct perf_event *group, int max_count,
				695	struct perf_event ctrs[], u64 events,
				696	unsigned int *flags)
				697	{
				698	int n = 0;
				699	struct perf_event *event;
				700
				701	if (!is_software_event(group)) {
				702	if (n >= max_count)
				703	return -1;
				704	ctrs[n] = group;
				705	flags[n] = group->hw.event_base;
				706	events[n++] = group->hw.config;
				707	}
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	708	list_for_each_entry(event, &group->sibling_list, group_entry) {
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	709	if (!is_software_event(event) &&
				710	event->state != PERF_EVENT_STATE_OFF) {
				711	if (n >= max_count)
				712	return -1;
				713	ctrs[n] = event;
				714	flags[n] = event->hw.event_base;
				715	events[n++] = event->hw.config;
				716	}
				717	}
				718	return n;
				719	}
				720
Peter Zijlstra	6e37738	2010-02-11 13:21:58 +0100	[diff] [blame]	721	static void event_sched_in(struct perf_event *event)
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	722	{
				723	event->state = PERF_EVENT_STATE_ACTIVE;
Peter Zijlstra	6e37738	2010-02-11 13:21:58 +0100	[diff] [blame]	724	event->oncpu = smp_processor_id();
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	725	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
				726	if (is_software_event(event))
				727	event->pmu->enable(event);
				728	}
				729
				730	/*
				731	* Called to enable a whole group of events.
				732	* Returns 1 if the group was enabled, or -EAGAIN if it could not be.
				733	* Assumes the caller has disabled interrupts and has
				734	* frozen the PMU with hw_perf_save_disable.
				735	*/
				736	int hw_perf_group_sched_in(struct perf_event *group_leader,
				737	struct perf_cpu_context *cpuctx,
Peter Zijlstra	6e37738	2010-02-11 13:21:58 +0100	[diff] [blame]	738	struct perf_event_context *ctx)
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	739	{
				740	struct cpu_hw_events *cpuhw;
				741	long i, n, n0;
				742	struct perf_event *sub;
				743
				744	if (!ppmu)
				745	return 0;
				746	cpuhw = &__get_cpu_var(cpu_hw_events);
				747	n0 = cpuhw->n_events;
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	748	n = collect_events(group_leader, ppmu->n_counter - n0,
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	749	&cpuhw->event[n0], &cpuhw->events[n0],
				750	&cpuhw->flags[n0]);
				751	if (n < 0)
				752	return -EAGAIN;
				753	if (check_excludes(cpuhw->event, cpuhw->flags, n0, n))
				754	return -EAGAIN;
				755	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0);
				756	if (i < 0)
				757	return -EAGAIN;
				758	cpuhw->n_events = n0 + n;
				759	cpuhw->n_added += n;
				760
				761	/*
				762	* OK, this group can go on; update event states etc.,
				763	* and enable any software events
				764	*/
				765	for (i = n0; i < n0 + n; ++i)
				766	cpuhw->event[i]->hw.config = cpuhw->events[i];
				767	cpuctx->active_oncpu += n;
				768	n = 1;
Peter Zijlstra	6e37738	2010-02-11 13:21:58 +0100	[diff] [blame]	769	event_sched_in(group_leader);
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	770	list_for_each_entry(sub, &group_leader->sibling_list, group_entry) {
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	771	if (sub->state != PERF_EVENT_STATE_OFF) {
Peter Zijlstra	6e37738	2010-02-11 13:21:58 +0100	[diff] [blame]	772	event_sched_in(sub);
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	773	++n;
				774	}
				775	}
				776	ctx->nr_active += n;
				777
				778	return 1;
				779	}
				780
				781	/*
				782	* Add a event to the PMU.
				783	* If all events are not already frozen, then we disable and
				784	* re-enable the PMU in order to get hw_perf_enable to do the
				785	* actual work of reconfiguring the PMU.
				786	*/
				787	static int power_pmu_enable(struct perf_event *event)
				788	{
				789	struct cpu_hw_events *cpuhw;
				790	unsigned long flags;
				791	int n0;
				792	int ret = -EAGAIN;
				793
				794	local_irq_save(flags);
				795	perf_disable();
				796
				797	/*
				798	* Add the event to the list (if there is room)
				799	* and check whether the total set is still feasible.
				800	*/
				801	cpuhw = &__get_cpu_var(cpu_hw_events);
				802	n0 = cpuhw->n_events;
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	803	if (n0 >= ppmu->n_counter)
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	804	goto out;
				805	cpuhw->event[n0] = event;
				806	cpuhw->events[n0] = event->hw.config;
				807	cpuhw->flags[n0] = event->hw.event_base;
				808	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
				809	goto out;
				810	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
				811	goto out;
				812
				813	event->hw.config = cpuhw->events[n0];
				814	++cpuhw->n_events;
				815	++cpuhw->n_added;
				816
				817	ret = 0;
				818	out:
				819	perf_enable();
				820	local_irq_restore(flags);
				821	return ret;
				822	}
				823
				824	/*
				825	* Remove a event from the PMU.
				826	*/
				827	static void power_pmu_disable(struct perf_event *event)
				828	{
				829	struct cpu_hw_events *cpuhw;
				830	long i;
				831	unsigned long flags;
				832
				833	local_irq_save(flags);
				834	perf_disable();
				835
				836	power_pmu_read(event);
				837
				838	cpuhw = &__get_cpu_var(cpu_hw_events);
				839	for (i = 0; i < cpuhw->n_events; ++i) {
				840	if (event == cpuhw->event[i]) {
				841	while (++i < cpuhw->n_events)
				842	cpuhw->event[i-1] = cpuhw->event[i];
				843	--cpuhw->n_events;
				844	ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);
				845	if (event->hw.idx) {
				846	write_pmc(event->hw.idx, 0);
				847	event->hw.idx = 0;
				848	}
				849	perf_event_update_userpage(event);
				850	break;
				851	}
				852	}
				853	for (i = 0; i < cpuhw->n_limited; ++i)
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	854	if (event == cpuhw->limited_counter[i])
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	855	break;
				856	if (i < cpuhw->n_limited) {
				857	while (++i < cpuhw->n_limited) {
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	858	cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	859	cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
				860	}
				861	--cpuhw->n_limited;
				862	}
				863	if (cpuhw->n_events == 0) {
				864	/* disable exceptions if no events are running */
				865	cpuhw->mmcr[0] &= ~(MMCR0_PMXE \| MMCR0_FCECE);
				866	}
				867
				868	perf_enable();
				869	local_irq_restore(flags);
				870	}
				871
				872	/*
				873	* Re-enable interrupts on a event after they were throttled
				874	* because they were coming too fast.
				875	*/
				876	static void power_pmu_unthrottle(struct perf_event *event)
				877	{
				878	s64 val, left;
				879	unsigned long flags;
				880
				881	if (!event->hw.idx \|\| !event->hw.sample_period)
				882	return;
				883	local_irq_save(flags);
				884	perf_disable();
				885	power_pmu_read(event);
				886	left = event->hw.sample_period;
				887	event->hw.last_period = left;
				888	val = 0;
				889	if (left < 0x80000000L)
				890	val = 0x80000000L - left;
				891	write_pmc(event->hw.idx, val);
				892	atomic64_set(&event->hw.prev_count, val);
				893	atomic64_set(&event->hw.period_left, left);
				894	perf_event_update_userpage(event);
				895	perf_enable();
				896	local_irq_restore(flags);
				897	}
				898
				899	struct pmu power_pmu = {
				900	.enable = power_pmu_enable,
				901	.disable = power_pmu_disable,
				902	.read = power_pmu_read,
				903	.unthrottle = power_pmu_unthrottle,
				904	};
				905
				906	/*
				907	* Return 1 if we might be able to put event on a limited PMC,
				908	* or 0 if not.
				909	* A event can only go on a limited PMC if it counts something
				910	* that a limited PMC can count, doesn't require interrupts, and
				911	* doesn't exclude any processor mode.
				912	*/
				913	static int can_go_on_limited_pmc(struct perf_event *event, u64 ev,
				914	unsigned int flags)
				915	{
				916	int n;
				917	u64 alt[MAX_EVENT_ALTERNATIVES];
				918
				919	if (event->attr.exclude_user
				920	\|\| event->attr.exclude_kernel
				921	\|\| event->attr.exclude_hv
				922	\|\| event->attr.sample_period)
				923	return 0;
				924
				925	if (ppmu->limited_pmc_event(ev))
				926	return 1;
				927
				928	/*
				929	* The requested event_id isn't on a limited PMC already;
				930	* see if any alternative code goes on a limited PMC.
				931	*/
				932	if (!ppmu->get_alternatives)
				933	return 0;
				934
				935	flags \|= PPMU_LIMITED_PMC_OK \| PPMU_LIMITED_PMC_REQD;
				936	n = ppmu->get_alternatives(ev, flags, alt);
				937
				938	return n > 0;
				939	}
				940
				941	/*
				942	* Find an alternative event_id that goes on a normal PMC, if possible,
				943	* and return the event_id code, or 0 if there is no such alternative.
				944	* (Note: event_id code 0 is "don't count" on all machines.)
				945	*/
				946	static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
				947	{
				948	u64 alt[MAX_EVENT_ALTERNATIVES];
				949	int n;
				950
				951	flags &= ~(PPMU_LIMITED_PMC_OK \| PPMU_LIMITED_PMC_REQD);
				952	n = ppmu->get_alternatives(ev, flags, alt);
				953	if (!n)
				954	return 0;
				955	return alt[0];
				956	}
				957
				958	/* Number of perf_events counting hardware events */
				959	static atomic_t num_events;
				960	/* Used to avoid races in calling reserve/release_pmc_hardware */
				961	static DEFINE_MUTEX(pmc_reserve_mutex);
				962
				963	/*
				964	* Release the PMU if this is the last perf_event.
				965	*/
				966	static void hw_perf_event_destroy(struct perf_event *event)
				967	{
				968	if (!atomic_add_unless(&num_events, -1, 1)) {
				969	mutex_lock(&pmc_reserve_mutex);
				970	if (atomic_dec_return(&num_events) == 0)
				971	release_pmc_hardware();
				972	mutex_unlock(&pmc_reserve_mutex);
				973	}
				974	}
				975
				976	/*
				977	* Translate a generic cache event_id config to a raw event_id code.
				978	*/
				979	static int hw_perf_cache_event(u64 config, u64 *eventp)
				980	{
				981	unsigned long type, op, result;
				982	int ev;
				983
				984	if (!ppmu->cache_events)
				985	return -EINVAL;
				986
				987	/* unpack config */
				988	type = config & 0xff;
				989	op = (config >> 8) & 0xff;
				990	result = (config >> 16) & 0xff;
				991
				992	if (type >= PERF_COUNT_HW_CACHE_MAX \|\|
				993	op >= PERF_COUNT_HW_CACHE_OP_MAX \|\|
				994	result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
				995	return -EINVAL;
				996
				997	ev = (*ppmu->cache_events)[type][op][result];
				998	if (ev == 0)
				999	return -EOPNOTSUPP;
				1000	if (ev == -1)
				1001	return -EINVAL;
				1002	*eventp = ev;
				1003	return 0;
				1004	}
				1005
				1006	const struct pmu hw_perf_event_init(struct perf_event event)
				1007	{
				1008	u64 ev;
				1009	unsigned long flags;
				1010	struct perf_event *ctrs[MAX_HWEVENTS];
				1011	u64 events[MAX_HWEVENTS];
				1012	unsigned int cflags[MAX_HWEVENTS];
				1013	int n;
				1014	int err;
				1015	struct cpu_hw_events *cpuhw;
				1016
				1017	if (!ppmu)
				1018	return ERR_PTR(-ENXIO);
				1019	switch (event->attr.type) {
				1020	case PERF_TYPE_HARDWARE:
				1021	ev = event->attr.config;
				1022	if (ev >= ppmu->n_generic \|\| ppmu->generic_events[ev] == 0)
				1023	return ERR_PTR(-EOPNOTSUPP);
				1024	ev = ppmu->generic_events[ev];
				1025	break;
				1026	case PERF_TYPE_HW_CACHE:
				1027	err = hw_perf_cache_event(event->attr.config, &ev);
				1028	if (err)
				1029	return ERR_PTR(err);
				1030	break;
				1031	case PERF_TYPE_RAW:
				1032	ev = event->attr.config;
				1033	break;
				1034	default:
				1035	return ERR_PTR(-EINVAL);
				1036	}
				1037	event->hw.config_base = ev;
				1038	event->hw.idx = 0;
				1039
				1040	/*
				1041	* If we are not running on a hypervisor, force the
				1042	* exclude_hv bit to 0 so that we don't care what
				1043	* the user set it to.
				1044	*/
				1045	if (!firmware_has_feature(FW_FEATURE_LPAR))
				1046	event->attr.exclude_hv = 0;
				1047
				1048	/*
				1049	* If this is a per-task event, then we can use
				1050	* PM_RUN_* events interchangeably with their non RUN_*
				1051	* equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
				1052	* XXX we should check if the task is an idle task.
				1053	*/
				1054	flags = 0;
				1055	if (event->ctx->task)
				1056	flags \|= PPMU_ONLY_COUNT_RUN;
				1057
				1058	/*
				1059	* If this machine has limited events, check whether this
				1060	* event_id could go on a limited event.
				1061	*/
				1062	if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
				1063	if (can_go_on_limited_pmc(event, ev, flags)) {
				1064	flags \|= PPMU_LIMITED_PMC_OK;
				1065	} else if (ppmu->limited_pmc_event(ev)) {
				1066	/*
				1067	* The requested event_id is on a limited PMC,
				1068	* but we can't use a limited PMC; see if any
				1069	* alternative goes on a normal PMC.
				1070	*/
				1071	ev = normal_pmc_alternative(ev, flags);
				1072	if (!ev)
				1073	return ERR_PTR(-EINVAL);
				1074	}
				1075	}
				1076
				1077	/*
				1078	* If this is in a group, check if it can go on with all the
				1079	* other hardware events in the group. We assume the event
				1080	* hasn't been linked into its leader's sibling list at this point.
				1081	*/
				1082	n = 0;
				1083	if (event->group_leader != event) {
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	1084	n = collect_events(event->group_leader, ppmu->n_counter - 1,
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	1085	ctrs, events, cflags);
				1086	if (n < 0)
				1087	return ERR_PTR(-EINVAL);
				1088	}
				1089	events[n] = ev;
				1090	ctrs[n] = event;
				1091	cflags[n] = flags;
				1092	if (check_excludes(ctrs, cflags, n, 1))
				1093	return ERR_PTR(-EINVAL);
				1094
				1095	cpuhw = &get_cpu_var(cpu_hw_events);
				1096	err = power_check_constraints(cpuhw, events, cflags, n + 1);
				1097	put_cpu_var(cpu_hw_events);
				1098	if (err)
				1099	return ERR_PTR(-EINVAL);
				1100
				1101	event->hw.config = events[n];
				1102	event->hw.event_base = cflags[n];
				1103	event->hw.last_period = event->hw.sample_period;
				1104	atomic64_set(&event->hw.period_left, event->hw.last_period);
				1105
				1106	/*
				1107	* See if we need to reserve the PMU.
				1108	* If no events are currently in use, then we have to take a
				1109	* mutex to ensure that we don't race with another task doing
				1110	* reserve_pmc_hardware or release_pmc_hardware.
				1111	*/
				1112	err = 0;
				1113	if (!atomic_inc_not_zero(&num_events)) {
				1114	mutex_lock(&pmc_reserve_mutex);
				1115	if (atomic_read(&num_events) == 0 &&
				1116	reserve_pmc_hardware(perf_event_interrupt))
				1117	err = -EBUSY;
				1118	else
				1119	atomic_inc(&num_events);
				1120	mutex_unlock(&pmc_reserve_mutex);
				1121	}
				1122	event->destroy = hw_perf_event_destroy;
				1123
				1124	if (err)
				1125	return ERR_PTR(err);
				1126	return &power_pmu;
				1127	}
				1128
				1129	/*
Ingo Molnar	57c0c15	2009-09-21 12:20:38 +0200	[diff] [blame]	1130	* A counter has overflowed; update its count and record
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	1131	* things if requested. Note that interrupts are hard-disabled
				1132	* here so there is no possibility of being interrupted.
				1133	*/
				1134	static void record_and_restart(struct perf_event *event, unsigned long val,
				1135	struct pt_regs *regs, int nmi)
				1136	{
				1137	u64 period = event->hw.sample_period;
				1138	s64 prev, delta, left;
				1139	int record = 0;
				1140
				1141	/* we don't have to worry about interrupts here */
				1142	prev = atomic64_read(&event->hw.prev_count);
				1143	delta = (val - prev) & 0xfffffffful;
				1144	atomic64_add(delta, &event->count);
				1145
				1146	/*
				1147	* See if the total period for this event has expired,
				1148	* and update for the next period.
				1149	*/
				1150	val = 0;
				1151	left = atomic64_read(&event->hw.period_left) - delta;
				1152	if (period) {
				1153	if (left <= 0) {
				1154	left += period;
				1155	if (left <= 0)
				1156	left = period;
				1157	record = 1;
				1158	}
				1159	if (left < 0x80000000LL)
				1160	val = 0x80000000LL - left;
				1161	}
				1162
				1163	/*
				1164	* Finally record data if requested.
				1165	*/
				1166	if (record) {
				1167	struct perf_sample_data data = {
Anton Blanchard	bc284e5	2009-09-21 16:56:10 +0000	[diff] [blame]	1168	.addr = ~0ULL,
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	1169	.period = event->hw.last_period,
				1170	};
				1171
				1172	if (event->attr.sample_type & PERF_SAMPLE_ADDR)
				1173	perf_get_data_addr(regs, &data.addr);
				1174
				1175	if (perf_event_overflow(event, nmi, &data, regs)) {
				1176	/*
				1177	* Interrupts are coming too fast - throttle them
				1178	* by setting the event to 0, so it will be
				1179	* at least 2^30 cycles until the next interrupt
				1180	* (assuming each event counts at most 2 counts
				1181	* per cycle).
				1182	*/
				1183	val = 0;
				1184	left = ~0ULL >> 1;
				1185	}
				1186	}
				1187
				1188	write_pmc(event->hw.idx, val);
				1189	atomic64_set(&event->hw.prev_count, val);
				1190	atomic64_set(&event->hw.period_left, left);
				1191	perf_event_update_userpage(event);
				1192	}
				1193
				1194	/*
				1195	* Called from generic code to get the misc flags (i.e. processor mode)
				1196	* for an event_id.
				1197	*/
				1198	unsigned long perf_misc_flags(struct pt_regs *regs)
				1199	{
				1200	u32 flags = perf_get_misc_flags(regs);
				1201
				1202	if (flags)
				1203	return flags;
				1204	return user_mode(regs) ? PERF_RECORD_MISC_USER :
				1205	PERF_RECORD_MISC_KERNEL;
				1206	}
				1207
				1208	/*
				1209	* Called from generic code to get the instruction pointer
				1210	* for an event_id.
				1211	*/
				1212	unsigned long perf_instruction_pointer(struct pt_regs *regs)
				1213	{
				1214	unsigned long ip;
				1215
				1216	if (TRAP(regs) != 0xf00)
				1217	return regs->nip; /* not a PMU interrupt */
				1218
				1219	ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
				1220	return ip;
				1221	}
				1222
				1223	/*
				1224	* Performance monitor interrupt stuff
				1225	*/
				1226	static void perf_event_interrupt(struct pt_regs *regs)
				1227	{
				1228	int i;
				1229	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
				1230	struct perf_event *event;
				1231	unsigned long val;
				1232	int found = 0;
				1233	int nmi;
				1234
				1235	if (cpuhw->n_limited)
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	1236	freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	1237	mfspr(SPRN_PMC6));
				1238
				1239	perf_read_regs(regs);
				1240
				1241	nmi = perf_intr_is_nmi(regs);
				1242	if (nmi)
				1243	nmi_enter();
				1244	else
				1245	irq_enter();
				1246
				1247	for (i = 0; i < cpuhw->n_events; ++i) {
				1248	event = cpuhw->event[i];
				1249	if (!event->hw.idx \|\| is_limited_pmc(event->hw.idx))
				1250	continue;
				1251	val = read_pmc(event->hw.idx);
				1252	if ((int)val < 0) {
				1253	/* event has overflowed */
				1254	found = 1;
				1255	record_and_restart(event, val, regs, nmi);
				1256	}
				1257	}
				1258
				1259	/*
				1260	* In case we didn't find and reset the event that caused
				1261	* the interrupt, scan all events and reset any that are
				1262	* negative, to avoid getting continual interrupts.
				1263	* Any that we processed in the previous loop will not be negative.
				1264	*/
				1265	if (!found) {
Paul Mackerras	a8f90e9	2009-09-22 09:48:08 +1000	[diff] [blame]	1266	for (i = 0; i < ppmu->n_counter; ++i) {
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	1267	if (is_limited_pmc(i + 1))
				1268	continue;
				1269	val = read_pmc(i + 1);
				1270	if ((int)val < 0)
				1271	write_pmc(i + 1, 0);
				1272	}
				1273	}
				1274
				1275	/*
				1276	* Reset MMCR0 to its normal value. This will set PMXE and
Ingo Molnar	57c0c15	2009-09-21 12:20:38 +0200	[diff] [blame]	1277	* clear FC (freeze counters) and PMAO (perf mon alert occurred)
Ingo Molnar	cdd6c48	2009-09-21 12:02:48 +0200	[diff] [blame]	1278	* and thus allow interrupts to occur again.
				1279	* XXX might want to use MSR.PM to keep the events frozen until
				1280	* we get back out of this interrupt.
				1281	*/
				1282	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
				1283
				1284	if (nmi)
				1285	nmi_exit();
				1286	else
				1287	irq_exit();
				1288	}
				1289
				1290	void hw_perf_event_setup(int cpu)
				1291	{
				1292	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
				1293
				1294	if (!ppmu)
				1295	return;
				1296	memset(cpuhw, 0, sizeof(*cpuhw));
				1297	cpuhw->mmcr[0] = MMCR0_FC;
				1298	}
				1299
				1300	int register_power_pmu(struct power_pmu *pmu)
				1301	{
				1302	if (ppmu)
				1303	return -EBUSY; /* something's already registered */
				1304
				1305	ppmu = pmu;
				1306	pr_info("%s performance monitor hardware support registered\n",
				1307	pmu->name);
				1308
				1309	#ifdef MSR_HV
				1310	/*
				1311	* Use FCHV to ignore kernel events if MSR.HV is set.
				1312	*/
				1313	if (mfmsr() & MSR_HV)
				1314	freeze_events_kernel = MMCR0_FCHV;
				1315	#endif /* CONFIG_PPC64 */
				1316
				1317	return 0;
				1318	}