blob: 43ac5af338d8c910c2295a7484453ab6b8a01b2a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11#include <linux/oprofile.h>
12#include <linux/smp.h>
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +020013#include <linux/ptrace.h>
14#include <linux/nmi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <asm/msr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <asm/fixmap.h>
17#include <asm/apic.h>
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +020018
Linus Torvalds1da177e2005-04-16 15:20:36 -070019
20#include "op_x86_model.h"
21#include "op_counter.h"
22
23#define NUM_EVENTS 39
24
25#define NUM_COUNTERS_NON_HT 8
26#define NUM_ESCRS_NON_HT 45
27#define NUM_CCCRS_NON_HT 18
28#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
29
30#define NUM_COUNTERS_HT2 4
31#define NUM_ESCRS_HT2 23
32#define NUM_CCCRS_HT2 9
33#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
34
35static unsigned int num_counters = NUM_COUNTERS_NON_HT;
Don Zickuscb9c4482006-09-26 10:52:26 +020036static unsigned int num_controls = NUM_CONTROLS_NON_HT;
Linus Torvalds1da177e2005-04-16 15:20:36 -070037
38/* this has to be checked dynamically since the
39 hyper-threadedness of a chip is discovered at
40 kernel boot-time. */
41static inline void setup_num_counters(void)
42{
43#ifdef CONFIG_SMP
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +020044 if (smp_num_siblings == 2) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070045 num_counters = NUM_COUNTERS_HT2;
Don Zickuscb9c4482006-09-26 10:52:26 +020046 num_controls = NUM_CONTROLS_HT2;
47 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#endif
49}
50
51static int inline addr_increment(void)
52{
53#ifdef CONFIG_SMP
54 return smp_num_siblings == 2 ? 2 : 1;
55#else
56 return 1;
57#endif
58}
59
60
61/* tables to simulate simplified hardware view of p4 registers */
62struct p4_counter_binding {
63 int virt_counter;
64 int counter_address;
65 int cccr_address;
66};
67
68struct p4_event_binding {
69 int escr_select; /* value to put in CCCR */
70 int event_select; /* value to put in ESCR */
71 struct {
72 int virt_counter; /* for this counter... */
73 int escr_address; /* use this ESCR */
74 } bindings[2];
75};
76
77/* nb: these CTR_* defines are a duplicate of defines in
78 event/i386.p4*events. */
79
80
81#define CTR_BPU_0 (1 << 0)
82#define CTR_MS_0 (1 << 1)
83#define CTR_FLAME_0 (1 << 2)
84#define CTR_IQ_4 (1 << 3)
85#define CTR_BPU_2 (1 << 4)
86#define CTR_MS_2 (1 << 5)
87#define CTR_FLAME_2 (1 << 6)
88#define CTR_IQ_5 (1 << 7)
89
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +020090static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
92 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
93 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
94 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
95 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
96 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
97 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
98 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
99};
100
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200101#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103/* p4 event codes in libop/op_event.h are indices into this table. */
104
105static struct p4_event_binding p4_events[NUM_EVENTS] = {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200106
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 { /* BRANCH_RETIRED */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200108 0x05, 0x06,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
110 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
111 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200112
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113 { /* MISPRED_BRANCH_RETIRED */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200114 0x04, 0x03,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
116 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
117 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119 { /* TC_DELIVER_MODE */
120 0x01, 0x01,
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200121 { { CTR_MS_0, MSR_P4_TC_ESCR0},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122 { CTR_MS_2, MSR_P4_TC_ESCR1} }
123 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200124
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125 { /* BPU_FETCH_REQUEST */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200126 0x00, 0x03,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
128 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
129 },
130
131 { /* ITLB_REFERENCE */
132 0x03, 0x18,
133 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
134 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
135 },
136
137 { /* MEMORY_CANCEL */
138 0x05, 0x02,
139 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
140 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
141 },
142
143 { /* MEMORY_COMPLETE */
144 0x02, 0x08,
145 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
146 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
147 },
148
149 { /* LOAD_PORT_REPLAY */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200150 0x02, 0x04,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
152 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
153 },
154
155 { /* STORE_PORT_REPLAY */
156 0x02, 0x05,
157 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
158 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
159 },
160
161 { /* MOB_LOAD_REPLAY */
162 0x02, 0x03,
163 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
164 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
165 },
166
167 { /* PAGE_WALK_TYPE */
168 0x04, 0x01,
169 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
170 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
171 },
172
173 { /* BSQ_CACHE_REFERENCE */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200174 0x07, 0x0c,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
176 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
177 },
178
179 { /* IOQ_ALLOCATION */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200180 0x06, 0x03,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
182 { 0, 0 } }
183 },
184
185 { /* IOQ_ACTIVE_ENTRIES */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200186 0x06, 0x1a,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
188 { 0, 0 } }
189 },
190
191 { /* FSB_DATA_ACTIVITY */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200192 0x06, 0x17,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
194 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
195 },
196
197 { /* BSQ_ALLOCATION */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200198 0x07, 0x05,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
200 { 0, 0 } }
201 },
202
203 { /* BSQ_ACTIVE_ENTRIES */
204 0x07, 0x06,
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200205 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 { 0, 0 } }
207 },
208
209 { /* X87_ASSIST */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200210 0x05, 0x03,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
212 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
213 },
214
215 { /* SSE_INPUT_ASSIST */
216 0x01, 0x34,
217 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
218 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
219 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200220
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221 { /* PACKED_SP_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200222 0x01, 0x08,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
224 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
225 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200226
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 { /* PACKED_DP_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200228 0x01, 0x0c,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
230 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
231 },
232
233 { /* SCALAR_SP_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200234 0x01, 0x0a,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
236 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
237 },
238
239 { /* SCALAR_DP_UOP */
240 0x01, 0x0e,
241 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
242 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
243 },
244
245 { /* 64BIT_MMX_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200246 0x01, 0x02,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
248 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
249 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200250
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 { /* 128BIT_MMX_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200252 0x01, 0x1a,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
254 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
255 },
256
257 { /* X87_FP_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200258 0x01, 0x04,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
260 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
261 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200262
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 { /* X87_SIMD_MOVES_UOP */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200264 0x01, 0x2e,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
266 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
267 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200268
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 { /* MACHINE_CLEAR */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200270 0x05, 0x02,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
272 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
273 },
274
275 { /* GLOBAL_POWER_EVENTS */
276 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
277 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
278 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
279 },
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200280
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 { /* TC_MS_XFER */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200282 0x00, 0x05,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 { { CTR_MS_0, MSR_P4_MS_ESCR0},
284 { CTR_MS_2, MSR_P4_MS_ESCR1} }
285 },
286
287 { /* UOP_QUEUE_WRITES */
288 0x00, 0x09,
289 { { CTR_MS_0, MSR_P4_MS_ESCR0},
290 { CTR_MS_2, MSR_P4_MS_ESCR1} }
291 },
292
293 { /* FRONT_END_EVENT */
294 0x05, 0x08,
295 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
296 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
297 },
298
299 { /* EXECUTION_EVENT */
300 0x05, 0x0c,
301 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
302 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
303 },
304
305 { /* REPLAY_EVENT */
306 0x05, 0x09,
307 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
308 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
309 },
310
311 { /* INSTR_RETIRED */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200312 0x04, 0x02,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
314 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
315 },
316
317 { /* UOPS_RETIRED */
318 0x04, 0x01,
319 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
320 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
321 },
322
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200323 { /* UOP_TYPE */
324 0x02, 0x02,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
326 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
327 },
328
329 { /* RETIRED_MISPRED_BRANCH_TYPE */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200330 0x02, 0x05,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
332 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
333 },
334
335 { /* RETIRED_BRANCH_TYPE */
336 0x02, 0x04,
337 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
338 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
339 }
340};
341
342
343#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
344
345#define ESCR_RESERVED_BITS 0x80000003
346#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
347#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
348#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
349#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
350#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
351#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
352#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200353#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
354#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355
356#define CCCR_RESERVED_BITS 0x38030FFF
357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
358#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
359#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
360#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200364#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
365#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
367#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
368
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200369#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
370#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
371#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
372#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
374
375
376/* this assigns a "stagger" to the current CPU, which is used throughout
377 the code in this module as an extra array offset, to select the "even"
378 or "odd" part of all the divided resources. */
379static unsigned int get_stagger(void)
380{
381#ifdef CONFIG_SMP
382 int cpu = smp_processor_id();
Mike Travisd5a74302007-10-16 01:24:05 -0700383 return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200384#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 return 0;
386}
387
388
389/* finally, mediate access to a real hardware counter
390 by passing a "virtual" counter numer to this macro,
391 along with your stagger setting. */
392#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
393
394static unsigned long reset_value[NUM_COUNTERS_NON_HT];
395
396
397static void p4_fill_in_addresses(struct op_msrs * const msrs)
398{
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200399 unsigned int i;
Don Zickuscb9c4482006-09-26 10:52:26 +0200400 unsigned int addr, cccraddr, stag;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401
402 setup_num_counters();
403 stag = get_stagger();
404
Don Zickuscb9c4482006-09-26 10:52:26 +0200405 /* initialize some registers */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200406 for (i = 0; i < num_counters; ++i)
Don Zickuscb9c4482006-09-26 10:52:26 +0200407 msrs->counters[i].addr = 0;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200408 for (i = 0; i < num_controls; ++i)
Don Zickuscb9c4482006-09-26 10:52:26 +0200409 msrs->controls[i].addr = 0;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200410
Don Zickuscb9c4482006-09-26 10:52:26 +0200411 /* the counter & cccr registers we pay attention to */
412 for (i = 0; i < num_counters; ++i) {
413 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
414 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200415 if (reserve_perfctr_nmi(addr)) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200416 msrs->counters[i].addr = addr;
417 msrs->controls[i].addr = cccraddr;
418 }
419 }
420
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 /* 43 ESCR registers in three or four discontiguous group */
422 for (addr = MSR_P4_BSU_ESCR0 + stag;
423 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200424 if (reserve_evntsel_nmi(addr))
425 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 }
427
428 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
429 * to avoid special case in nmi_{save|restore}_registers() */
430 if (boot_cpu_data.x86_model >= 0x3) {
431 for (addr = MSR_P4_BSU_ESCR0 + stag;
432 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200433 if (reserve_evntsel_nmi(addr))
434 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 }
436 } else {
437 for (addr = MSR_P4_IQ_ESCR0 + stag;
438 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200439 if (reserve_evntsel_nmi(addr))
440 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 }
442 }
443
444 for (addr = MSR_P4_RAT_ESCR0 + stag;
445 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200446 if (reserve_evntsel_nmi(addr))
447 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 }
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200449
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 for (addr = MSR_P4_MS_ESCR0 + stag;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200451 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200452 if (reserve_evntsel_nmi(addr))
453 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 }
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200455
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 for (addr = MSR_P4_IX_ESCR0 + stag;
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200457 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200458 if (reserve_evntsel_nmi(addr))
459 msrs->controls[i].addr = addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 }
461
462 /* there are 2 remaining non-contiguously located ESCRs */
463
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200464 if (num_counters == NUM_COUNTERS_NON_HT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 /* standard non-HT CPUs handle both remaining ESCRs*/
Don Zickuscb9c4482006-09-26 10:52:26 +0200466 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
467 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
468 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
469 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
471 } else if (stag == 0) {
472 /* HT CPUs give the first remainder to the even thread, as
473 the 32nd control register */
Don Zickuscb9c4482006-09-26 10:52:26 +0200474 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
475 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476
477 } else {
478 /* and two copies of the second to the odd thread,
479 for the 22st and 23nd control registers */
Don Zickuscb9c4482006-09-26 10:52:26 +0200480 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
481 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
482 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
483 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 }
485}
486
487
488static void pmc_setup_one_p4_counter(unsigned int ctr)
489{
490 int i;
491 int const maxbind = 2;
492 unsigned int cccr = 0;
493 unsigned int escr = 0;
494 unsigned int high = 0;
495 unsigned int counter_bit;
496 struct p4_event_binding *ev = NULL;
497 unsigned int stag;
498
499 stag = get_stagger();
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200500
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 /* convert from counter *number* to counter *bit* */
502 counter_bit = 1 << VIRT_CTR(stag, ctr);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200503
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 /* find our event binding structure. */
505 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200506 printk(KERN_ERR
507 "oprofile: P4 event code 0x%lx out of range\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 counter_config[ctr].event);
509 return;
510 }
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200511
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 ev = &(p4_events[counter_config[ctr].event - 1]);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200513
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 for (i = 0; i < maxbind; i++) {
515 if (ev->bindings[i].virt_counter & counter_bit) {
516
517 /* modify ESCR */
518 ESCR_READ(escr, high, ev, i);
519 ESCR_CLEAR(escr);
520 if (stag == 0) {
521 ESCR_SET_USR_0(escr, counter_config[ctr].user);
522 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
523 } else {
524 ESCR_SET_USR_1(escr, counter_config[ctr].user);
525 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
526 }
527 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200528 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 ESCR_WRITE(escr, high, ev, i);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200530
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 /* modify CCCR */
532 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
533 CCCR_CLEAR(cccr);
534 CCCR_SET_REQUIRED_BITS(cccr);
535 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200536 if (stag == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 CCCR_SET_PMI_OVF_0(cccr);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200538 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 CCCR_SET_PMI_OVF_1(cccr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
541 return;
542 }
543 }
544
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200545 printk(KERN_ERR
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
547 counter_config[ctr].event, stag, ctr);
548}
549
550
551static void p4_setup_ctrs(struct op_msrs const * const msrs)
552{
553 unsigned int i;
554 unsigned int low, high;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 unsigned int stag;
556
557 stag = get_stagger();
558
559 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200560 if (!MISC_PMC_ENABLED_P(low)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 printk(KERN_ERR "oprofile: P4 PMC not available\n");
562 return;
563 }
564
565 /* clear the cccrs we will use */
566 for (i = 0 ; i < num_counters ; i++) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200567 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
Don Zickuscb9c4482006-09-26 10:52:26 +0200568 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
570 CCCR_CLEAR(low);
571 CCCR_SET_REQUIRED_BITS(low);
572 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
573 }
574
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 /* clear all escrs (including those outside our concern) */
Don Zickuscb9c4482006-09-26 10:52:26 +0200576 for (i = num_counters; i < num_controls; i++) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200577 if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
Don Zickuscb9c4482006-09-26 10:52:26 +0200578 continue;
579 wrmsr(msrs->controls[i].addr, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 }
581
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 /* setup all counters */
583 for (i = 0 ; i < num_counters ; ++i) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200584 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 reset_value[i] = counter_config[i].count;
586 pmc_setup_one_p4_counter(i);
587 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
588 } else {
589 reset_value[i] = 0;
590 }
591 }
592}
593
594
595static int p4_check_ctrs(struct pt_regs * const regs,
596 struct op_msrs const * const msrs)
597{
598 unsigned long ctr, low, high, stag, real;
599 int i;
600
601 stag = get_stagger();
602
603 for (i = 0; i < num_counters; ++i) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200604
605 if (!reset_value[i])
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606 continue;
607
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200608 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 * there is some eccentricity in the hardware which
610 * requires that we perform 2 extra corrections:
611 *
612 * - check both the CCCR:OVF flag for overflow and the
613 * counter high bit for un-flagged overflows.
614 *
615 * - write the counter back twice to ensure it gets
616 * updated properly.
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200617 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 * the former seems to be related to extra NMIs happening
619 * during the current NMI; the latter is reported as errata
620 * N15 in intel doc 249199-029, pentium 4 specification
621 * update, though their suggested work-around does not
622 * appear to solve the problem.
623 */
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200624
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 real = VIRT_CTR(stag, i);
626
627 CCCR_READ(low, high, real);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200628 CTR_READ(ctr, high, real);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
630 oprofile_add_sample(regs, i);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200631 CTR_WRITE(reset_value[i], real);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 CCCR_CLEAR_OVF(low);
633 CCCR_WRITE(low, high, real);
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200634 CTR_WRITE(reset_value[i], real);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 }
636 }
637
638 /* P4 quirk: you have to re-unmask the apic vector */
639 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
640
641 /* See op_model_ppro.c */
642 return 1;
643}
644
645
646static void p4_start(struct op_msrs const * const msrs)
647{
648 unsigned int low, high, stag;
649 int i;
650
651 stag = get_stagger();
652
653 for (i = 0; i < num_counters; ++i) {
654 if (!reset_value[i])
655 continue;
656 CCCR_READ(low, high, VIRT_CTR(stag, i));
657 CCCR_SET_ENABLE(low);
658 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
659 }
660}
661
662
663static void p4_stop(struct op_msrs const * const msrs)
664{
665 unsigned int low, high, stag;
666 int i;
667
668 stag = get_stagger();
669
670 for (i = 0; i < num_counters; ++i) {
Don Zickuscb9c4482006-09-26 10:52:26 +0200671 if (!reset_value[i])
672 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 CCCR_READ(low, high, VIRT_CTR(stag, i));
674 CCCR_SET_DISABLE(low);
675 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
676 }
677}
678
Don Zickuscb9c4482006-09-26 10:52:26 +0200679static void p4_shutdown(struct op_msrs const * const msrs)
680{
681 int i;
682
683 for (i = 0 ; i < num_counters ; ++i) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200684 if (CTR_IS_RESERVED(msrs, i))
Don Zickuscb9c4482006-09-26 10:52:26 +0200685 release_perfctr_nmi(msrs->counters[i].addr);
686 }
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200687 /*
688 * some of the control registers are specially reserved in
Don Zickuscb9c4482006-09-26 10:52:26 +0200689 * conjunction with the counter registers (hence the starting offset).
690 * This saves a few bits.
691 */
692 for (i = num_counters ; i < num_controls ; ++i) {
Paolo Ciarrocchi20211e42008-08-18 21:25:38 +0200693 if (CTRL_IS_RESERVED(msrs, i))
Don Zickuscb9c4482006-09-26 10:52:26 +0200694 release_evntsel_nmi(msrs->controls[i].addr);
695 }
696}
697
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698
699#ifdef CONFIG_SMP
700struct op_x86_model_spec const op_p4_ht2_spec = {
701 .num_counters = NUM_COUNTERS_HT2,
702 .num_controls = NUM_CONTROLS_HT2,
703 .fill_in_addresses = &p4_fill_in_addresses,
704 .setup_ctrs = &p4_setup_ctrs,
705 .check_ctrs = &p4_check_ctrs,
706 .start = &p4_start,
Don Zickuscb9c4482006-09-26 10:52:26 +0200707 .stop = &p4_stop,
708 .shutdown = &p4_shutdown
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709};
710#endif
711
712struct op_x86_model_spec const op_p4_spec = {
713 .num_counters = NUM_COUNTERS_NON_HT,
714 .num_controls = NUM_CONTROLS_NON_HT,
715 .fill_in_addresses = &p4_fill_in_addresses,
716 .setup_ctrs = &p4_setup_ctrs,
717 .check_ctrs = &p4_check_ctrs,
718 .start = &p4_start,
Don Zickuscb9c4482006-09-26 10:52:26 +0200719 .stop = &p4_stop,
720 .shutdown = &p4_shutdown
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721};