blob: 1ef43c212d9a590e87aa074a0ce9e7ccc4aae973 [file] [log] [blame]
Li Zefanba77c9e2009-11-20 15:53:25 +08001#include "builtin.h"
2#include "perf.h"
3
4#include "util/util.h"
5#include "util/cache.h"
6#include "util/symbol.h"
7#include "util/thread.h"
8#include "util/header.h"
9
10#include "util/parse-options.h"
11#include "util/trace-event.h"
12
13#include "util/debug.h"
14#include "util/data_map.h"
15
16#include <linux/rbtree.h>
17
18struct alloc_stat;
19typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
20
21static char const *input_name = "perf.data";
22
23static struct perf_header *header;
24static u64 sample_type;
25
26static int alloc_flag;
27static int caller_flag;
28
29sort_fn_t alloc_sort_fn;
30sort_fn_t caller_sort_fn;
31
32static int alloc_lines = -1;
33static int caller_lines = -1;
34
Li Zefan7707b6b2009-11-24 13:25:48 +080035static bool raw_ip;
36
Li Zefanba77c9e2009-11-20 15:53:25 +080037static char *cwd;
38static int cwdlen;
39
40struct alloc_stat {
41 union {
Li Zefan7707b6b2009-11-24 13:25:48 +080042 u64 call_site;
Li Zefanba77c9e2009-11-20 15:53:25 +080043 u64 ptr;
44 };
45 u64 bytes_req;
46 u64 bytes_alloc;
47 u32 hit;
48
49 struct rb_node node;
50};
51
52static struct rb_root root_alloc_stat;
53static struct rb_root root_alloc_sorted;
54static struct rb_root root_caller_stat;
55static struct rb_root root_caller_sorted;
56
57static unsigned long total_requested, total_allocated;
58
59struct raw_event_sample {
60 u32 size;
61 char data[0];
62};
63
64static int
65process_comm_event(event_t *event, unsigned long offset, unsigned long head)
66{
67 struct thread *thread = threads__findnew(event->comm.pid);
68
69 dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
70 (void *)(offset + head),
71 (void *)(long)(event->header.size),
72 event->comm.comm, event->comm.pid);
73
74 if (thread == NULL ||
75 thread__set_comm(thread, event->comm.comm)) {
76 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
77 return -1;
78 }
79
80 return 0;
81}
82
83static void insert_alloc_stat(unsigned long ptr,
84 int bytes_req, int bytes_alloc)
85{
86 struct rb_node **node = &root_alloc_stat.rb_node;
87 struct rb_node *parent = NULL;
88 struct alloc_stat *data = NULL;
89
90 if (!alloc_flag)
91 return;
92
93 while (*node) {
94 parent = *node;
95 data = rb_entry(*node, struct alloc_stat, node);
96
97 if (ptr > data->ptr)
98 node = &(*node)->rb_right;
99 else if (ptr < data->ptr)
100 node = &(*node)->rb_left;
101 else
102 break;
103 }
104
105 if (data && data->ptr == ptr) {
106 data->hit++;
107 data->bytes_req += bytes_req;
108 data->bytes_alloc += bytes_req;
109 } else {
110 data = malloc(sizeof(*data));
111 data->ptr = ptr;
112 data->hit = 1;
113 data->bytes_req = bytes_req;
114 data->bytes_alloc = bytes_alloc;
115
116 rb_link_node(&data->node, parent, node);
117 rb_insert_color(&data->node, &root_alloc_stat);
118 }
119}
120
121static void insert_caller_stat(unsigned long call_site,
122 int bytes_req, int bytes_alloc)
123{
124 struct rb_node **node = &root_caller_stat.rb_node;
125 struct rb_node *parent = NULL;
126 struct alloc_stat *data = NULL;
127
128 if (!caller_flag)
129 return;
130
131 while (*node) {
132 parent = *node;
133 data = rb_entry(*node, struct alloc_stat, node);
134
135 if (call_site > data->call_site)
136 node = &(*node)->rb_right;
137 else if (call_site < data->call_site)
138 node = &(*node)->rb_left;
139 else
140 break;
141 }
142
143 if (data && data->call_site == call_site) {
144 data->hit++;
145 data->bytes_req += bytes_req;
146 data->bytes_alloc += bytes_req;
147 } else {
148 data = malloc(sizeof(*data));
149 data->call_site = call_site;
150 data->hit = 1;
151 data->bytes_req = bytes_req;
152 data->bytes_alloc = bytes_alloc;
153
154 rb_link_node(&data->node, parent, node);
155 rb_insert_color(&data->node, &root_caller_stat);
156 }
157}
158
159static void process_alloc_event(struct raw_event_sample *raw,
160 struct event *event,
161 int cpu __used,
162 u64 timestamp __used,
163 struct thread *thread __used,
164 int node __used)
165{
166 unsigned long call_site;
167 unsigned long ptr;
168 int bytes_req;
169 int bytes_alloc;
170
171 ptr = raw_field_value(event, "ptr", raw->data);
172 call_site = raw_field_value(event, "call_site", raw->data);
173 bytes_req = raw_field_value(event, "bytes_req", raw->data);
174 bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data);
175
176 insert_alloc_stat(ptr, bytes_req, bytes_alloc);
177 insert_caller_stat(call_site, bytes_req, bytes_alloc);
178
179 total_requested += bytes_req;
180 total_allocated += bytes_alloc;
181}
182
183static void process_free_event(struct raw_event_sample *raw __used,
184 struct event *event __used,
185 int cpu __used,
186 u64 timestamp __used,
187 struct thread *thread __used)
188{
189}
190
191static void
192process_raw_event(event_t *raw_event __used, void *more_data,
193 int cpu, u64 timestamp, struct thread *thread)
194{
195 struct raw_event_sample *raw = more_data;
196 struct event *event;
197 int type;
198
199 type = trace_parse_common_type(raw->data);
200 event = trace_find_event(type);
201
202 if (!strcmp(event->name, "kmalloc") ||
203 !strcmp(event->name, "kmem_cache_alloc")) {
204 process_alloc_event(raw, event, cpu, timestamp, thread, 0);
205 return;
206 }
207
208 if (!strcmp(event->name, "kmalloc_node") ||
209 !strcmp(event->name, "kmem_cache_alloc_node")) {
210 process_alloc_event(raw, event, cpu, timestamp, thread, 1);
211 return;
212 }
213
214 if (!strcmp(event->name, "kfree") ||
215 !strcmp(event->name, "kmem_cache_free")) {
216 process_free_event(raw, event, cpu, timestamp, thread);
217 return;
218 }
219}
220
221static int
222process_sample_event(event_t *event, unsigned long offset, unsigned long head)
223{
224 u64 ip = event->ip.ip;
225 u64 timestamp = -1;
226 u32 cpu = -1;
227 u64 period = 1;
228 void *more_data = event->ip.__more_data;
229 struct thread *thread = threads__findnew(event->ip.pid);
230
231 if (sample_type & PERF_SAMPLE_TIME) {
232 timestamp = *(u64 *)more_data;
233 more_data += sizeof(u64);
234 }
235
236 if (sample_type & PERF_SAMPLE_CPU) {
237 cpu = *(u32 *)more_data;
238 more_data += sizeof(u32);
239 more_data += sizeof(u32); /* reserved */
240 }
241
242 if (sample_type & PERF_SAMPLE_PERIOD) {
243 period = *(u64 *)more_data;
244 more_data += sizeof(u64);
245 }
246
247 dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
248 (void *)(offset + head),
249 (void *)(long)(event->header.size),
250 event->header.misc,
251 event->ip.pid, event->ip.tid,
252 (void *)(long)ip,
253 (long long)period);
254
255 if (thread == NULL) {
256 pr_debug("problem processing %d event, skipping it.\n",
257 event->header.type);
258 return -1;
259 }
260
261 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
262
263 process_raw_event(event, more_data, cpu, timestamp, thread);
264
265 return 0;
266}
267
268static int sample_type_check(u64 type)
269{
270 sample_type = type;
271
272 if (!(sample_type & PERF_SAMPLE_RAW)) {
273 fprintf(stderr,
274 "No trace sample to read. Did you call perf record "
275 "without -R?");
276 return -1;
277 }
278
279 return 0;
280}
281
282static struct perf_file_handler file_handler = {
283 .process_sample_event = process_sample_event,
284 .process_comm_event = process_comm_event,
285 .sample_type_check = sample_type_check,
286};
287
288static int read_events(void)
289{
290 register_idle_thread();
291 register_perf_file_handler(&file_handler);
292
Arnaldo Carvalho de Melocc612d82009-11-23 16:39:10 -0200293 return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0,
Li Zefanba77c9e2009-11-20 15:53:25 +0800294 &cwdlen, &cwd);
295}
296
297static double fragmentation(unsigned long n_req, unsigned long n_alloc)
298{
299 if (n_alloc == 0)
300 return 0.0;
301 else
302 return 100.0 - (100.0 * n_req / n_alloc);
303}
304
305static void __print_result(struct rb_root *root, int n_lines, int is_caller)
306{
307 struct rb_node *next;
308
Arnaldo Carvalho de Melo1b145ae2009-11-23 17:51:09 -0200309 printf("%.78s\n", graph_dotted_line);
310 printf("%-28s|", is_caller ? "Callsite": "Alloc Ptr");
311 printf("Total_alloc/Per | Total_req/Per | Hit | Frag\n");
312 printf("%.78s\n", graph_dotted_line);
Li Zefanba77c9e2009-11-20 15:53:25 +0800313
314 next = rb_first(root);
315
316 while (next && n_lines--) {
Arnaldo Carvalho de Melo1b145ae2009-11-23 17:51:09 -0200317 struct alloc_stat *data = rb_entry(next, struct alloc_stat,
318 node);
319 struct symbol *sym = NULL;
320 char bf[BUFSIZ];
321 u64 addr;
Li Zefanba77c9e2009-11-20 15:53:25 +0800322
Arnaldo Carvalho de Melo1b145ae2009-11-23 17:51:09 -0200323 if (is_caller) {
324 addr = data->call_site;
Li Zefan7707b6b2009-11-24 13:25:48 +0800325 if (!raw_ip)
326 sym = kernel_maps__find_symbol(addr,
327 NULL, NULL);
Arnaldo Carvalho de Melo1b145ae2009-11-23 17:51:09 -0200328 } else
329 addr = data->ptr;
Li Zefanba77c9e2009-11-20 15:53:25 +0800330
Arnaldo Carvalho de Melo1b145ae2009-11-23 17:51:09 -0200331 if (sym != NULL)
Li Zefan7707b6b2009-11-24 13:25:48 +0800332 snprintf(bf, sizeof(bf), "%s+%Lx", sym->name,
Arnaldo Carvalho de Melo1b145ae2009-11-23 17:51:09 -0200333 addr - sym->start);
334 else
335 snprintf(bf, sizeof(bf), "%#Lx", addr);
336
337 printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f%%\n",
338 bf, (unsigned long long)data->bytes_alloc,
Li Zefanba77c9e2009-11-20 15:53:25 +0800339 (unsigned long)data->bytes_alloc / data->hit,
340 (unsigned long long)data->bytes_req,
341 (unsigned long)data->bytes_req / data->hit,
342 (unsigned long)data->hit,
343 fragmentation(data->bytes_req, data->bytes_alloc));
344
345 next = rb_next(next);
346 }
347
348 if (n_lines == -1)
Li Zefan7707b6b2009-11-24 13:25:48 +0800349 printf(" ... | ... | ... | ... | ... \n");
Li Zefanba77c9e2009-11-20 15:53:25 +0800350
Li Zefan7707b6b2009-11-24 13:25:48 +0800351 printf("%.78s\n", graph_dotted_line);
Li Zefanba77c9e2009-11-20 15:53:25 +0800352}
353
354static void print_summary(void)
355{
356 printf("\nSUMMARY\n=======\n");
357 printf("Total bytes requested: %lu\n", total_requested);
358 printf("Total bytes allocated: %lu\n", total_allocated);
359 printf("Total bytes wasted on internal fragmentation: %lu\n",
360 total_allocated - total_requested);
361 printf("Internal fragmentation: %f%%\n",
362 fragmentation(total_requested, total_allocated));
363}
364
365static void print_result(void)
366{
367 if (caller_flag)
368 __print_result(&root_caller_sorted, caller_lines, 1);
369 if (alloc_flag)
370 __print_result(&root_alloc_sorted, alloc_lines, 0);
371 print_summary();
372}
373
374static void sort_insert(struct rb_root *root, struct alloc_stat *data,
375 sort_fn_t sort_fn)
376{
377 struct rb_node **new = &(root->rb_node);
378 struct rb_node *parent = NULL;
379
380 while (*new) {
381 struct alloc_stat *this;
382 int cmp;
383
384 this = rb_entry(*new, struct alloc_stat, node);
385 parent = *new;
386
387 cmp = sort_fn(data, this);
388
389 if (cmp > 0)
390 new = &((*new)->rb_left);
391 else
392 new = &((*new)->rb_right);
393 }
394
395 rb_link_node(&data->node, parent, new);
396 rb_insert_color(&data->node, root);
397}
398
399static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
400 sort_fn_t sort_fn)
401{
402 struct rb_node *node;
403 struct alloc_stat *data;
404
405 for (;;) {
406 node = rb_first(root);
407 if (!node)
408 break;
409
410 rb_erase(node, root);
411 data = rb_entry(node, struct alloc_stat, node);
412 sort_insert(root_sorted, data, sort_fn);
413 }
414}
415
416static void sort_result(void)
417{
418 __sort_result(&root_alloc_stat, &root_alloc_sorted, alloc_sort_fn);
419 __sort_result(&root_caller_stat, &root_caller_sorted, caller_sort_fn);
420}
421
422static int __cmd_kmem(void)
423{
424 setup_pager();
425 read_events();
426 sort_result();
427 print_result();
428
429 return 0;
430}
431
432static const char * const kmem_usage[] = {
433 "perf kmem [<options>] {record}",
434 NULL
435};
436
437
438static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
439{
440 if (l->ptr < r->ptr)
441 return -1;
442 else if (l->ptr > r->ptr)
443 return 1;
444 return 0;
445}
446
447static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
448{
449 if (l->call_site < r->call_site)
450 return -1;
451 else if (l->call_site > r->call_site)
452 return 1;
453 return 0;
454}
455
Pekka Enbergf3ced7c2009-11-22 11:58:00 +0200456static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
457{
458 if (l->hit < r->hit)
459 return -1;
460 else if (l->hit > r->hit)
461 return 1;
462 return 0;
463}
464
Li Zefanba77c9e2009-11-20 15:53:25 +0800465static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
466{
467 if (l->bytes_alloc < r->bytes_alloc)
468 return -1;
469 else if (l->bytes_alloc > r->bytes_alloc)
470 return 1;
471 return 0;
472}
473
Pekka Enbergf3ced7c2009-11-22 11:58:00 +0200474static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
475{
476 double x, y;
477
478 x = fragmentation(l->bytes_req, l->bytes_alloc);
479 y = fragmentation(r->bytes_req, r->bytes_alloc);
480
481 if (x < y)
482 return -1;
483 else if (x > y)
484 return 1;
485 return 0;
486}
487
Li Zefanba77c9e2009-11-20 15:53:25 +0800488static int parse_sort_opt(const struct option *opt __used,
489 const char *arg, int unset __used)
490{
491 sort_fn_t sort_fn;
492
493 if (!arg)
494 return -1;
495
496 if (strcmp(arg, "ptr") == 0)
497 sort_fn = ptr_cmp;
498 else if (strcmp(arg, "call_site") == 0)
499 sort_fn = callsite_cmp;
Pekka Enbergf3ced7c2009-11-22 11:58:00 +0200500 else if (strcmp(arg, "hit") == 0)
501 sort_fn = hit_cmp;
Li Zefanba77c9e2009-11-20 15:53:25 +0800502 else if (strcmp(arg, "bytes") == 0)
503 sort_fn = bytes_cmp;
Pekka Enbergf3ced7c2009-11-22 11:58:00 +0200504 else if (strcmp(arg, "frag") == 0)
505 sort_fn = frag_cmp;
Li Zefanba77c9e2009-11-20 15:53:25 +0800506 else
507 return -1;
508
509 if (caller_flag > alloc_flag)
510 caller_sort_fn = sort_fn;
511 else
512 alloc_sort_fn = sort_fn;
513
514 return 0;
515}
516
517static int parse_stat_opt(const struct option *opt __used,
518 const char *arg, int unset __used)
519{
520 if (!arg)
521 return -1;
522
523 if (strcmp(arg, "alloc") == 0)
524 alloc_flag = (caller_flag + 1);
525 else if (strcmp(arg, "caller") == 0)
526 caller_flag = (alloc_flag + 1);
527 else
528 return -1;
529 return 0;
530}
531
532static int parse_line_opt(const struct option *opt __used,
533 const char *arg, int unset __used)
534{
535 int lines;
536
537 if (!arg)
538 return -1;
539
540 lines = strtoul(arg, NULL, 10);
541
542 if (caller_flag > alloc_flag)
543 caller_lines = lines;
544 else
545 alloc_lines = lines;
546
547 return 0;
548}
549
550static const struct option kmem_options[] = {
551 OPT_STRING('i', "input", &input_name, "file",
552 "input file name"),
553 OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>",
554 "stat selector, Pass 'alloc' or 'caller'.",
555 parse_stat_opt),
556 OPT_CALLBACK('s', "sort", NULL, "key",
Pekka Enbergf3ced7c2009-11-22 11:58:00 +0200557 "sort by key: ptr, call_site, hit, bytes, frag",
Li Zefanba77c9e2009-11-20 15:53:25 +0800558 parse_sort_opt),
559 OPT_CALLBACK('l', "line", NULL, "num",
560 "show n lins",
561 parse_line_opt),
Li Zefan7707b6b2009-11-24 13:25:48 +0800562 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
Li Zefanba77c9e2009-11-20 15:53:25 +0800563 OPT_END()
564};
565
566static const char *record_args[] = {
567 "record",
568 "-a",
569 "-R",
570 "-M",
571 "-f",
572 "-c", "1",
573 "-e", "kmem:kmalloc",
574 "-e", "kmem:kmalloc_node",
575 "-e", "kmem:kfree",
576 "-e", "kmem:kmem_cache_alloc",
577 "-e", "kmem:kmem_cache_alloc_node",
578 "-e", "kmem:kmem_cache_free",
579};
580
581static int __cmd_record(int argc, const char **argv)
582{
583 unsigned int rec_argc, i, j;
584 const char **rec_argv;
585
586 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
587 rec_argv = calloc(rec_argc + 1, sizeof(char *));
588
589 for (i = 0; i < ARRAY_SIZE(record_args); i++)
590 rec_argv[i] = strdup(record_args[i]);
591
592 for (j = 1; j < (unsigned int)argc; j++, i++)
593 rec_argv[i] = argv[j];
594
595 return cmd_record(i, rec_argv, NULL);
596}
597
598int cmd_kmem(int argc, const char **argv, const char *prefix __used)
599{
600 symbol__init(0);
601
602 argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
603
604 if (argc && !strncmp(argv[0], "rec", 3))
605 return __cmd_record(argc, argv);
606 else if (argc)
607 usage_with_options(kmem_usage, kmem_options);
608
609 if (!alloc_sort_fn)
610 alloc_sort_fn = bytes_cmp;
611 if (!caller_sort_fn)
612 caller_sort_fn = bytes_cmp;
613
614 return __cmd_kmem();
615}
616