perf report: Add support for callchain graph output
Currently, the printing of callchains is done in a single
vertical level, this is the "flat" mode:
8.25% [k] copy_user_generic_string
4.19%
copy_user_generic_string
generic_file_aio_read
do_sync_read
vfs_read
sys_pread64
system_call_fastpath
pread64
This patch introduces a new "graph" mode which provides a
hierarchical output of factorized paths recursively sorted:
8.25% [k] copy_user_generic_string
|
|--4.31%-- generic_file_aio_read
| do_sync_read
| vfs_read
| |
| |--4.19%-- sys_pread64
| | system_call_fastpath
| | pread64
| |
| --0.12%-- sys_read
| system_call_fastpath
| __read
|
|--3.24%-- generic_file_buffered_write
| __generic_file_aio_write_nolock
| generic_file_aio_write
| do_sync_write
| reiserfs_file_write
| vfs_write
| |
| |--3.14%-- sys_pwrite64
| | system_call_fastpath
| | __pwrite64
| |
| --0.10%-- sys_write
[...]
The command line has then changed.
By providing the -c option, the callchain will output in the
flat mode by default.
But you can override it:
perf report -c graph
or
perf report -c flat
You can also pass the abreviated mode:
perf report -c g
or
perf report -c gra
will both make use of the graph mode.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1246550301-8954-3-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 3c4a91f..a9873aa 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -19,9 +19,9 @@
#define chain_for_each_child(child, parent) \
list_for_each_entry(child, &parent->children, brothers)
-
static void
-rb_insert_callchain(struct rb_root *root, struct callchain_node *chain)
+rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
+ enum chain_mode mode)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -31,10 +31,22 @@
parent = *p;
rnode = rb_entry(parent, struct callchain_node, rb_node);
- if (rnode->hit < chain->hit)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
+ switch (mode) {
+ case FLAT:
+ if (rnode->hit < chain->hit)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ break;
+ case GRAPH:
+ if (rnode->cumul_hit < chain->cumul_hit)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ break;
+ default:
+ break;
+ }
}
rb_link_node(&chain->rb_node, parent, p);
@@ -45,15 +57,36 @@
* Once we get every callchains from the stream, we can now
* sort them by hit
*/
-void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node)
+void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node)
{
struct callchain_node *child;
chain_for_each_child(child, node)
- sort_chain_to_rbtree(rb_root, child);
+ sort_chain_flat(rb_root, child);
if (node->hit)
- rb_insert_callchain(rb_root, node);
+ rb_insert_callchain(rb_root, node, FLAT);
+}
+
+static void __sort_chain_graph(struct callchain_node *node)
+{
+ struct callchain_node *child;
+
+ node->rb_root = RB_ROOT;
+ node->cumul_hit = node->hit;
+
+ chain_for_each_child(child, node) {
+ __sort_chain_graph(child);
+ rb_insert_callchain(&node->rb_root, child, GRAPH);
+ node->cumul_hit += child->cumul_hit;
+ }
+}
+
+void
+sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root)
+{
+ __sort_chain_graph(chain_root);
+ rb_root->rb_node = chain_root->rb_root.rb_node;
}
/*