blob: f7ee07cb581885d123a864a6df9b61536d056f2d [file] [log] [blame]
Jiri Olsa353120b2019-10-07 14:53:09 +02001// SPDX-License-Identifier: GPL-2.0
Jiri Olsa32c261c2019-10-07 14:53:12 +02002#include <sys/mman.h>
Jiri Olsa7c4d4182019-10-07 14:53:18 +02003#include <inttypes.h>
4#include <asm/bug.h>
5#include <errno.h>
Jiri Olsa151ed5d2019-10-07 14:53:20 +02006#include <string.h>
Jiri Olsa7728fa02019-10-07 14:53:17 +02007#include <linux/ring_buffer.h>
8#include <linux/perf_event.h>
9#include <perf/mmap.h>
Jiri Olsa151ed5d2019-10-07 14:53:20 +020010#include <perf/event.h>
Rob Herring47d01e72021-04-14 11:07:39 -050011#include <perf/evsel.h>
Jiri Olsa353120b2019-10-07 14:53:09 +020012#include <internal/mmap.h>
Jiri Olsabf59b302019-10-07 14:53:11 +020013#include <internal/lib.h>
Jiri Olsa80e53d12019-10-07 14:53:15 +020014#include <linux/kernel.h>
Rob Herring47d01e72021-04-14 11:07:39 -050015#include <linux/math64.h>
Jiri Olsa7c4d4182019-10-07 14:53:18 +020016#include "internal.h"
Jiri Olsa353120b2019-10-07 14:53:09 +020017
Jiri Olsa6eb65f72019-10-17 12:59:09 +020018void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
19 bool overwrite, libperf_unmap_cb_t unmap_cb)
Jiri Olsa353120b2019-10-07 14:53:09 +020020{
21 map->fd = -1;
22 map->overwrite = overwrite;
Jiri Olsa80e53d12019-10-07 14:53:15 +020023 map->unmap_cb = unmap_cb;
Jiri Olsa353120b2019-10-07 14:53:09 +020024 refcount_set(&map->refcnt, 0);
Jiri Olsa6eb65f72019-10-17 12:59:09 +020025 if (prev)
26 prev->next = map;
Jiri Olsa353120b2019-10-07 14:53:09 +020027}
Jiri Olsabf59b302019-10-07 14:53:11 +020028
29size_t perf_mmap__mmap_len(struct perf_mmap *map)
30{
31 return map->mask + 1 + page_size;
32}
Jiri Olsa32c261c2019-10-07 14:53:12 +020033
34int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
Ian Rogers6d188042022-01-04 22:13:51 -080035 int fd, struct perf_cpu cpu)
Jiri Olsa32c261c2019-10-07 14:53:12 +020036{
37 map->prev = 0;
38 map->mask = mp->mask;
39 map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
40 MAP_SHARED, fd, 0);
41 if (map->base == MAP_FAILED) {
42 map->base = NULL;
43 return -1;
44 }
45
46 map->fd = fd;
47 map->cpu = cpu;
48 return 0;
49}
Jiri Olsae75710f2019-10-07 14:53:13 +020050
Jiri Olsa59d7ea622019-10-07 14:53:14 +020051void perf_mmap__munmap(struct perf_mmap *map)
52{
53 if (map && map->base != NULL) {
54 munmap(map->base, perf_mmap__mmap_len(map));
55 map->base = NULL;
56 map->fd = -1;
57 refcount_set(&map->refcnt, 0);
58 }
Jiri Olsa80e53d12019-10-07 14:53:15 +020059 if (map && map->unmap_cb)
60 map->unmap_cb(map);
Jiri Olsa59d7ea622019-10-07 14:53:14 +020061}
62
Jiri Olsae75710f2019-10-07 14:53:13 +020063void perf_mmap__get(struct perf_mmap *map)
64{
65 refcount_inc(&map->refcnt);
66}
Jiri Olsa80e53d12019-10-07 14:53:15 +020067
68void perf_mmap__put(struct perf_mmap *map)
69{
70 BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
71
72 if (refcount_dec_and_test(&map->refcnt))
73 perf_mmap__munmap(map);
74}
Jiri Olsa7728fa02019-10-07 14:53:17 +020075
76static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
77{
78 ring_buffer_write_tail(md->base, tail);
79}
80
81u64 perf_mmap__read_head(struct perf_mmap *map)
82{
83 return ring_buffer_read_head(map->base);
84}
85
86static bool perf_mmap__empty(struct perf_mmap *map)
87{
88 struct perf_event_mmap_page *pc = map->base;
89
90 return perf_mmap__read_head(map) == map->prev && !pc->aux_size;
91}
92
93void perf_mmap__consume(struct perf_mmap *map)
94{
95 if (!map->overwrite) {
96 u64 old = map->prev;
97
98 perf_mmap__write_tail(map, old);
99 }
100
101 if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
102 perf_mmap__put(map);
103}
Jiri Olsa7c4d4182019-10-07 14:53:18 +0200104
105static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
106{
107 struct perf_event_header *pheader;
108 u64 evt_head = *start;
109 int size = mask + 1;
110
111 pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
112 pheader = (struct perf_event_header *)(buf + (*start & mask));
113 while (true) {
114 if (evt_head - *start >= (unsigned int)size) {
115 pr_debug("Finished reading overwrite ring buffer: rewind\n");
116 if (evt_head - *start > (unsigned int)size)
117 evt_head -= pheader->size;
118 *end = evt_head;
119 return 0;
120 }
121
122 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
123
124 if (pheader->size == 0) {
125 pr_debug("Finished reading overwrite ring buffer: get start\n");
126 *end = evt_head;
127 return 0;
128 }
129
130 evt_head += pheader->size;
131 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
132 }
133 WARN_ONCE(1, "Shouldn't get here\n");
134 return -1;
135}
136
137/*
138 * Report the start and end of the available data in ringbuffer
139 */
140static int __perf_mmap__read_init(struct perf_mmap *md)
141{
142 u64 head = perf_mmap__read_head(md);
143 u64 old = md->prev;
144 unsigned char *data = md->base + page_size;
145 unsigned long size;
146
147 md->start = md->overwrite ? head : old;
148 md->end = md->overwrite ? old : head;
149
150 if ((md->end - md->start) < md->flush)
151 return -EAGAIN;
152
153 size = md->end - md->start;
154 if (size > (unsigned long)(md->mask) + 1) {
155 if (!md->overwrite) {
156 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
157
158 md->prev = head;
159 perf_mmap__consume(md);
160 return -EAGAIN;
161 }
162
163 /*
164 * Backward ring buffer is full. We still have a chance to read
165 * most of data from it.
166 */
167 if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
168 return -EINVAL;
169 }
170
171 return 0;
172}
173
174int perf_mmap__read_init(struct perf_mmap *map)
175{
176 /*
177 * Check if event was unmapped due to a POLLHUP/POLLERR.
178 */
179 if (!refcount_read(&map->refcnt))
180 return -ENOENT;
181
182 return __perf_mmap__read_init(map);
183}
Jiri Olsa32fdc2c2019-10-07 14:53:19 +0200184
185/*
186 * Mandatory for overwrite mode
187 * The direction of overwrite mode is backward.
188 * The last perf_mmap__read() will set tail to map->core.prev.
189 * Need to correct the map->core.prev to head which is the end of next read.
190 */
191void perf_mmap__read_done(struct perf_mmap *map)
192{
193 /*
194 * Check if event was unmapped due to a POLLHUP/POLLERR.
195 */
196 if (!refcount_read(&map->refcnt))
197 return;
198
199 map->prev = perf_mmap__read_head(map);
200}
Jiri Olsa151ed5d2019-10-07 14:53:20 +0200201
202/* When check_messup is true, 'end' must points to a good entry */
203static union perf_event *perf_mmap__read(struct perf_mmap *map,
204 u64 *startp, u64 end)
205{
206 unsigned char *data = map->base + page_size;
207 union perf_event *event = NULL;
208 int diff = end - *startp;
209
210 if (diff >= (int)sizeof(event->header)) {
211 size_t size;
212
213 event = (union perf_event *)&data[*startp & map->mask];
214 size = event->header.size;
215
216 if (size < sizeof(event->header) || diff < (int)size)
217 return NULL;
218
219 /*
220 * Event straddles the mmap boundary -- header should always
221 * be inside due to u64 alignment of output.
222 */
223 if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
224 unsigned int offset = *startp;
225 unsigned int len = min(sizeof(*event), size), cpy;
226 void *dst = map->event_copy;
227
228 do {
229 cpy = min(map->mask + 1 - (offset & map->mask), len);
230 memcpy(dst, &data[offset & map->mask], cpy);
231 offset += cpy;
232 dst += cpy;
233 len -= cpy;
234 } while (len);
235
236 event = (union perf_event *)map->event_copy;
237 }
238
239 *startp += size;
240 }
241
242 return event;
243}
244
245/*
246 * Read event from ring buffer one by one.
247 * Return one event for each call.
248 *
249 * Usage:
250 * perf_mmap__read_init()
251 * while(event = perf_mmap__read_event()) {
252 * //process the event
253 * perf_mmap__consume()
254 * }
255 * perf_mmap__read_done()
256 */
257union perf_event *perf_mmap__read_event(struct perf_mmap *map)
258{
259 union perf_event *event;
260
261 /*
262 * Check if event was unmapped due to a POLLHUP/POLLERR.
263 */
264 if (!refcount_read(&map->refcnt))
265 return NULL;
266
267 /* non-overwirte doesn't pause the ringbuffer */
268 if (!map->overwrite)
269 map->end = perf_mmap__read_head(map);
270
271 event = perf_mmap__read(map, &map->start, map->end);
272
273 if (!map->overwrite)
274 map->prev = map->start;
275
276 return event;
277}
Rob Herring47d01e72021-04-14 11:07:39 -0500278
279#if defined(__i386__) || defined(__x86_64__)
280static u64 read_perf_counter(unsigned int counter)
281{
282 unsigned int low, high;
283
284 asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
285
286 return low | ((u64)high) << 32;
287}
288
289static u64 read_timestamp(void)
290{
291 unsigned int low, high;
292
293 asm volatile("rdtsc" : "=a" (low), "=d" (high));
294
295 return low | ((u64)high) << 32;
296}
297#else
298static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
299static u64 read_timestamp(void) { return 0; }
300#endif
301
302int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
303{
304 struct perf_event_mmap_page *pc = map->base;
305 u32 seq, idx, time_mult = 0, time_shift = 0;
306 u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
307
308 if (!pc || !pc->cap_user_rdpmc)
309 return -1;
310
311 do {
312 seq = READ_ONCE(pc->lock);
313 barrier();
314
315 count->ena = READ_ONCE(pc->time_enabled);
316 count->run = READ_ONCE(pc->time_running);
317
318 if (pc->cap_user_time && count->ena != count->run) {
319 cyc = read_timestamp();
320 time_mult = READ_ONCE(pc->time_mult);
321 time_shift = READ_ONCE(pc->time_shift);
322 time_offset = READ_ONCE(pc->time_offset);
323
324 if (pc->cap_user_time_short) {
325 time_cycles = READ_ONCE(pc->time_cycles);
326 time_mask = READ_ONCE(pc->time_mask);
327 }
328 }
329
330 idx = READ_ONCE(pc->index);
331 cnt = READ_ONCE(pc->offset);
332 if (pc->cap_user_rdpmc && idx) {
333 s64 evcnt = read_perf_counter(idx - 1);
334 u16 width = READ_ONCE(pc->pmc_width);
335
336 evcnt <<= 64 - width;
337 evcnt >>= 64 - width;
338 cnt += evcnt;
339 } else
340 return -1;
341
342 barrier();
343 } while (READ_ONCE(pc->lock) != seq);
344
345 if (count->ena != count->run) {
346 u64 delta;
347
348 /* Adjust for cap_usr_time_short, a nop if not */
349 cyc = time_cycles + ((cyc - time_cycles) & time_mask);
350
351 delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
352
353 count->ena += delta;
354 if (idx)
355 count->run += delta;
Rob Herring47d01e72021-04-14 11:07:39 -0500356 }
357
358 count->val = cnt;
359
360 return 0;
361}