Arnaldo Carvalho de Melo | f8a9530 | 2011-01-30 10:46:46 -0200 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> |
| 3 | * |
| 4 | * Parts came from builtin-{top,stat,record}.c, see those files for further |
| 5 | * copyright notes. |
| 6 | * |
| 7 | * Released under the GPL v2. (and only v2, not any later version) |
| 8 | */ |
Arnaldo Carvalho de Melo | 5c58104 | 2011-01-11 22:30:02 -0200 | [diff] [blame] | 9 | #include <poll.h> |
Arnaldo Carvalho de Melo | f8a9530 | 2011-01-30 10:46:46 -0200 | [diff] [blame^] | 10 | #include "cpumap.h" |
| 11 | #include "thread_map.h" |
Arnaldo Carvalho de Melo | 361c99a | 2011-01-11 20:56:53 -0200 | [diff] [blame] | 12 | #include "evlist.h" |
| 13 | #include "evsel.h" |
| 14 | #include "util.h" |
| 15 | |
Arnaldo Carvalho de Melo | f8a9530 | 2011-01-30 10:46:46 -0200 | [diff] [blame^] | 16 | #include <sys/mman.h> |
| 17 | |
Arnaldo Carvalho de Melo | 70db753 | 2011-01-12 22:39:13 -0200 | [diff] [blame] | 18 | #include <linux/bitops.h> |
| 19 | #include <linux/hash.h> |
| 20 | |
Arnaldo Carvalho de Melo | f8a9530 | 2011-01-30 10:46:46 -0200 | [diff] [blame^] | 21 | #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) |
| 22 | #define SID(e, x, y) xyarray__entry(e->id, x, y) |
| 23 | |
Arnaldo Carvalho de Melo | ef1d1af | 2011-01-18 21:41:45 -0200 | [diff] [blame] | 24 | void perf_evlist__init(struct perf_evlist *evlist) |
| 25 | { |
| 26 | int i; |
| 27 | |
| 28 | for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) |
| 29 | INIT_HLIST_HEAD(&evlist->heads[i]); |
| 30 | INIT_LIST_HEAD(&evlist->entries); |
| 31 | } |
| 32 | |
Arnaldo Carvalho de Melo | 361c99a | 2011-01-11 20:56:53 -0200 | [diff] [blame] | 33 | struct perf_evlist *perf_evlist__new(void) |
| 34 | { |
| 35 | struct perf_evlist *evlist = zalloc(sizeof(*evlist)); |
| 36 | |
Arnaldo Carvalho de Melo | ef1d1af | 2011-01-18 21:41:45 -0200 | [diff] [blame] | 37 | if (evlist != NULL) |
| 38 | perf_evlist__init(evlist); |
Arnaldo Carvalho de Melo | 361c99a | 2011-01-11 20:56:53 -0200 | [diff] [blame] | 39 | |
| 40 | return evlist; |
| 41 | } |
| 42 | |
| 43 | static void perf_evlist__purge(struct perf_evlist *evlist) |
| 44 | { |
| 45 | struct perf_evsel *pos, *n; |
| 46 | |
| 47 | list_for_each_entry_safe(pos, n, &evlist->entries, node) { |
| 48 | list_del_init(&pos->node); |
| 49 | perf_evsel__delete(pos); |
| 50 | } |
| 51 | |
| 52 | evlist->nr_entries = 0; |
| 53 | } |
| 54 | |
Arnaldo Carvalho de Melo | ef1d1af | 2011-01-18 21:41:45 -0200 | [diff] [blame] | 55 | void perf_evlist__exit(struct perf_evlist *evlist) |
| 56 | { |
| 57 | free(evlist->mmap); |
| 58 | free(evlist->pollfd); |
| 59 | evlist->mmap = NULL; |
| 60 | evlist->pollfd = NULL; |
| 61 | } |
| 62 | |
Arnaldo Carvalho de Melo | 361c99a | 2011-01-11 20:56:53 -0200 | [diff] [blame] | 63 | void perf_evlist__delete(struct perf_evlist *evlist) |
| 64 | { |
| 65 | perf_evlist__purge(evlist); |
Arnaldo Carvalho de Melo | ef1d1af | 2011-01-18 21:41:45 -0200 | [diff] [blame] | 66 | perf_evlist__exit(evlist); |
Arnaldo Carvalho de Melo | 361c99a | 2011-01-11 20:56:53 -0200 | [diff] [blame] | 67 | free(evlist); |
| 68 | } |
| 69 | |
| 70 | void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) |
| 71 | { |
| 72 | list_add_tail(&entry->node, &evlist->entries); |
| 73 | ++evlist->nr_entries; |
| 74 | } |
| 75 | |
| 76 | int perf_evlist__add_default(struct perf_evlist *evlist) |
| 77 | { |
| 78 | struct perf_event_attr attr = { |
| 79 | .type = PERF_TYPE_HARDWARE, |
| 80 | .config = PERF_COUNT_HW_CPU_CYCLES, |
| 81 | }; |
| 82 | struct perf_evsel *evsel = perf_evsel__new(&attr, 0); |
| 83 | |
| 84 | if (evsel == NULL) |
| 85 | return -ENOMEM; |
| 86 | |
| 87 | perf_evlist__add(evlist, evsel); |
| 88 | return 0; |
| 89 | } |
Arnaldo Carvalho de Melo | 5c58104 | 2011-01-11 22:30:02 -0200 | [diff] [blame] | 90 | |
| 91 | int perf_evlist__alloc_pollfd(struct perf_evlist *evlist, int ncpus, int nthreads) |
| 92 | { |
| 93 | int nfds = ncpus * nthreads * evlist->nr_entries; |
| 94 | evlist->pollfd = malloc(sizeof(struct pollfd) * nfds); |
| 95 | return evlist->pollfd != NULL ? 0 : -ENOMEM; |
| 96 | } |
Arnaldo Carvalho de Melo | 70082dd | 2011-01-12 17:03:24 -0200 | [diff] [blame] | 97 | |
| 98 | void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) |
| 99 | { |
| 100 | fcntl(fd, F_SETFL, O_NONBLOCK); |
| 101 | evlist->pollfd[evlist->nr_fds].fd = fd; |
| 102 | evlist->pollfd[evlist->nr_fds].events = POLLIN; |
| 103 | evlist->nr_fds++; |
| 104 | } |
Arnaldo Carvalho de Melo | 70db753 | 2011-01-12 22:39:13 -0200 | [diff] [blame] | 105 | |
Arnaldo Carvalho de Melo | f8a9530 | 2011-01-30 10:46:46 -0200 | [diff] [blame^] | 106 | static int perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, |
| 107 | int cpu, int thread, int fd) |
| 108 | { |
| 109 | struct perf_sample_id *sid; |
| 110 | u64 read_data[4] = { 0, }; |
| 111 | int hash, id_idx = 1; /* The first entry is the counter value */ |
| 112 | |
| 113 | if (!(evsel->attr.read_format & PERF_FORMAT_ID) || |
| 114 | read(fd, &read_data, sizeof(read_data)) == -1) |
| 115 | return -1; |
| 116 | |
| 117 | if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
| 118 | ++id_idx; |
| 119 | if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
| 120 | ++id_idx; |
| 121 | |
| 122 | sid = SID(evsel, cpu, thread); |
| 123 | sid->id = read_data[id_idx]; |
| 124 | sid->evsel = evsel; |
| 125 | hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); |
| 126 | hlist_add_head(&sid->node, &evlist->heads[hash]); |
| 127 | return 0; |
| 128 | } |
| 129 | |
Arnaldo Carvalho de Melo | 70db753 | 2011-01-12 22:39:13 -0200 | [diff] [blame] | 130 | struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) |
| 131 | { |
| 132 | struct hlist_head *head; |
| 133 | struct hlist_node *pos; |
| 134 | struct perf_sample_id *sid; |
| 135 | int hash; |
| 136 | |
| 137 | if (evlist->nr_entries == 1) |
| 138 | return list_entry(evlist->entries.next, struct perf_evsel, node); |
| 139 | |
| 140 | hash = hash_64(id, PERF_EVLIST__HLIST_BITS); |
| 141 | head = &evlist->heads[hash]; |
| 142 | |
| 143 | hlist_for_each_entry(sid, pos, head, node) |
| 144 | if (sid->id == id) |
| 145 | return sid->evsel; |
| 146 | return NULL; |
| 147 | } |
Arnaldo Carvalho de Melo | 04391de | 2011-01-15 10:40:59 -0200 | [diff] [blame] | 148 | |
Arnaldo Carvalho de Melo | 8115d60 | 2011-01-29 14:01:45 -0200 | [diff] [blame] | 149 | union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu) |
Arnaldo Carvalho de Melo | 04391de | 2011-01-15 10:40:59 -0200 | [diff] [blame] | 150 | { |
| 151 | /* XXX Move this to perf.c, making it generally available */ |
| 152 | unsigned int page_size = sysconf(_SC_PAGE_SIZE); |
| 153 | struct perf_mmap *md = &evlist->mmap[cpu]; |
| 154 | unsigned int head = perf_mmap__read_head(md); |
| 155 | unsigned int old = md->prev; |
| 156 | unsigned char *data = md->base + page_size; |
Arnaldo Carvalho de Melo | 8115d60 | 2011-01-29 14:01:45 -0200 | [diff] [blame] | 157 | union perf_event *event = NULL; |
Arnaldo Carvalho de Melo | 04391de | 2011-01-15 10:40:59 -0200 | [diff] [blame] | 158 | |
Arnaldo Carvalho de Melo | 7bb4115 | 2011-01-29 09:08:13 -0200 | [diff] [blame] | 159 | if (evlist->overwrite) { |
Arnaldo Carvalho de Melo | 04391de | 2011-01-15 10:40:59 -0200 | [diff] [blame] | 160 | /* |
Arnaldo Carvalho de Melo | 7bb4115 | 2011-01-29 09:08:13 -0200 | [diff] [blame] | 161 | * If we're further behind than half the buffer, there's a chance |
| 162 | * the writer will bite our tail and mess up the samples under us. |
| 163 | * |
| 164 | * If we somehow ended up ahead of the head, we got messed up. |
| 165 | * |
| 166 | * In either case, truncate and restart at head. |
Arnaldo Carvalho de Melo | 04391de | 2011-01-15 10:40:59 -0200 | [diff] [blame] | 167 | */ |
Arnaldo Carvalho de Melo | 7bb4115 | 2011-01-29 09:08:13 -0200 | [diff] [blame] | 168 | int diff = head - old; |
| 169 | if (diff > md->mask / 2 || diff < 0) { |
| 170 | fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); |
| 171 | |
| 172 | /* |
| 173 | * head points to a known good entry, start there. |
| 174 | */ |
| 175 | old = head; |
| 176 | } |
Arnaldo Carvalho de Melo | 04391de | 2011-01-15 10:40:59 -0200 | [diff] [blame] | 177 | } |
| 178 | |
| 179 | if (old != head) { |
| 180 | size_t size; |
| 181 | |
Arnaldo Carvalho de Melo | 8115d60 | 2011-01-29 14:01:45 -0200 | [diff] [blame] | 182 | event = (union perf_event *)&data[old & md->mask]; |
Arnaldo Carvalho de Melo | 04391de | 2011-01-15 10:40:59 -0200 | [diff] [blame] | 183 | size = event->header.size; |
| 184 | |
| 185 | /* |
| 186 | * Event straddles the mmap boundary -- header should always |
| 187 | * be inside due to u64 alignment of output. |
| 188 | */ |
| 189 | if ((old & md->mask) + size != ((old + size) & md->mask)) { |
| 190 | unsigned int offset = old; |
| 191 | unsigned int len = min(sizeof(*event), size), cpy; |
| 192 | void *dst = &evlist->event_copy; |
| 193 | |
| 194 | do { |
| 195 | cpy = min(md->mask + 1 - (offset & md->mask), len); |
| 196 | memcpy(dst, &data[offset & md->mask], cpy); |
| 197 | offset += cpy; |
| 198 | dst += cpy; |
| 199 | len -= cpy; |
| 200 | } while (len); |
| 201 | |
| 202 | event = &evlist->event_copy; |
| 203 | } |
| 204 | |
| 205 | old += size; |
| 206 | } |
| 207 | |
| 208 | md->prev = old; |
Arnaldo Carvalho de Melo | 7bb4115 | 2011-01-29 09:08:13 -0200 | [diff] [blame] | 209 | |
| 210 | if (!evlist->overwrite) |
| 211 | perf_mmap__write_tail(md, old); |
| 212 | |
Arnaldo Carvalho de Melo | 04391de | 2011-01-15 10:40:59 -0200 | [diff] [blame] | 213 | return event; |
| 214 | } |
Arnaldo Carvalho de Melo | f8a9530 | 2011-01-30 10:46:46 -0200 | [diff] [blame^] | 215 | |
| 216 | void perf_evlist__munmap(struct perf_evlist *evlist, int ncpus) |
| 217 | { |
| 218 | int cpu; |
| 219 | |
| 220 | for (cpu = 0; cpu < ncpus; cpu++) { |
| 221 | if (evlist->mmap[cpu].base != NULL) { |
| 222 | munmap(evlist->mmap[cpu].base, evlist->mmap_len); |
| 223 | evlist->mmap[cpu].base = NULL; |
| 224 | } |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | int perf_evlist__alloc_mmap(struct perf_evlist *evlist, int ncpus) |
| 229 | { |
| 230 | evlist->mmap = zalloc(ncpus * sizeof(struct perf_mmap)); |
| 231 | return evlist->mmap != NULL ? 0 : -ENOMEM; |
| 232 | } |
| 233 | |
| 234 | static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot, |
| 235 | int mask, int fd) |
| 236 | { |
| 237 | evlist->mmap[cpu].prev = 0; |
| 238 | evlist->mmap[cpu].mask = mask; |
| 239 | evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot, |
| 240 | MAP_SHARED, fd, 0); |
| 241 | if (evlist->mmap[cpu].base == MAP_FAILED) |
| 242 | return -1; |
| 243 | |
| 244 | perf_evlist__add_pollfd(evlist, fd); |
| 245 | return 0; |
| 246 | } |
| 247 | |
| 248 | /** perf_evlist__mmap - Create per cpu maps to receive events |
| 249 | * |
| 250 | * @evlist - list of events |
| 251 | * @cpus - cpu map being monitored |
| 252 | * @threads - threads map being monitored |
| 253 | * @pages - map length in pages |
| 254 | * @overwrite - overwrite older events? |
| 255 | * |
| 256 | * If overwrite is false the user needs to signal event consuption using: |
| 257 | * |
| 258 | * struct perf_mmap *m = &evlist->mmap[cpu]; |
| 259 | * unsigned int head = perf_mmap__read_head(m); |
| 260 | * |
| 261 | * perf_mmap__write_tail(m, head) |
| 262 | */ |
| 263 | int perf_evlist__mmap(struct perf_evlist *evlist, struct cpu_map *cpus, |
| 264 | struct thread_map *threads, int pages, bool overwrite) |
| 265 | { |
| 266 | unsigned int page_size = sysconf(_SC_PAGE_SIZE); |
| 267 | int mask = pages * page_size - 1, cpu; |
| 268 | struct perf_evsel *first_evsel, *evsel; |
| 269 | int thread, prot = PROT_READ | (overwrite ? 0 : PROT_WRITE); |
| 270 | |
| 271 | if (evlist->mmap == NULL && |
| 272 | perf_evlist__alloc_mmap(evlist, cpus->nr) < 0) |
| 273 | return -ENOMEM; |
| 274 | |
| 275 | if (evlist->pollfd == NULL && |
| 276 | perf_evlist__alloc_pollfd(evlist, cpus->nr, threads->nr) < 0) |
| 277 | return -ENOMEM; |
| 278 | |
| 279 | evlist->overwrite = overwrite; |
| 280 | evlist->mmap_len = (pages + 1) * page_size; |
| 281 | first_evsel = list_entry(evlist->entries.next, struct perf_evsel, node); |
| 282 | |
| 283 | list_for_each_entry(evsel, &evlist->entries, node) { |
| 284 | if ((evsel->attr.read_format & PERF_FORMAT_ID) && |
| 285 | evsel->id == NULL && |
| 286 | perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0) |
| 287 | return -ENOMEM; |
| 288 | |
| 289 | for (cpu = 0; cpu < cpus->nr; cpu++) { |
| 290 | for (thread = 0; thread < threads->nr; thread++) { |
| 291 | int fd = FD(evsel, cpu, thread); |
| 292 | |
| 293 | if (evsel->idx || thread) { |
| 294 | if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, |
| 295 | FD(first_evsel, cpu, 0)) != 0) |
| 296 | goto out_unmap; |
| 297 | } else if (__perf_evlist__mmap(evlist, cpu, prot, mask, fd) < 0) |
| 298 | goto out_unmap; |
| 299 | |
| 300 | if ((evsel->attr.read_format & PERF_FORMAT_ID) && |
| 301 | perf_evlist__id_hash(evlist, evsel, cpu, thread, fd) < 0) |
| 302 | goto out_unmap; |
| 303 | } |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | return 0; |
| 308 | |
| 309 | out_unmap: |
| 310 | for (cpu = 0; cpu < cpus->nr; cpu++) { |
| 311 | if (evlist->mmap[cpu].base != NULL) { |
| 312 | munmap(evlist->mmap[cpu].base, evlist->mmap_len); |
| 313 | evlist->mmap[cpu].base = NULL; |
| 314 | } |
| 315 | } |
| 316 | return -1; |
| 317 | } |