blob: 946ed45f7d2fd49cda8abe74fd6ec48c68c959dd [file] [log] [blame]
Ingo Molnar82f67cd2007-02-16 01:28:13 -08001/*
2 * kernel/time/timer_stats.c
3 *
4 * Collect timer usage statistics.
5 *
6 * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
7 * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 *
9 * timer_stats is based on timer_top, a similar functionality which was part of
10 * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
11 * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
12 * on dynamic allocation of the statistics entries and linear search based
13 * lookup combined with a global lock, rather than the static array, hash
14 * and per-CPU locking which is used by timer_stats. It was written for the
15 * pre hrtimer kernel code and therefore did not take hrtimers into account.
16 * Nevertheless it provided the base for the timer_stats implementation and
17 * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
18 * for this effort.
19 *
20 * timer_top.c is
21 * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
22 * Written by Daniel Petrini <d.pensator@gmail.com>
23 * timer_top.c was released under the GNU General Public License version 2
24 *
25 * We export the addresses and counting of timer functions being called,
26 * the pid and cmdline from the owner process if applicable.
27 *
28 * Start/stop data collection:
29 * # echo 1[0] >/proc/timer_stats
30 *
31 * Display the information collected so far:
32 * # cat /proc/timer_stats
33 *
34 * This program is free software; you can redistribute it and/or modify
35 * it under the terms of the GNU General Public License version 2 as
36 * published by the Free Software Foundation.
37 */
38
39#include <linux/proc_fs.h>
40#include <linux/module.h>
41#include <linux/spinlock.h>
42#include <linux/sched.h>
43#include <linux/seq_file.h>
44#include <linux/kallsyms.h>
45
46#include <asm/uaccess.h>
47
48/*
49 * This is our basic unit of interest: a timer expiry event identified
50 * by the timer, its start/expire functions and the PID of the task that
51 * started the timer. We count the number of times an event happens:
52 */
53struct entry {
54 /*
55 * Hash list:
56 */
57 struct entry *next;
58
59 /*
60 * Hash keys:
61 */
62 void *timer;
63 void *start_func;
64 void *expire_func;
65 pid_t pid;
66
67 /*
68 * Number of timeout events:
69 */
70 unsigned long count;
71
72 /*
73 * We save the command-line string to preserve
74 * this information past task exit:
75 */
76 char comm[TASK_COMM_LEN + 1];
77
78} ____cacheline_aligned_in_smp;
79
80/*
81 * Spinlock protecting the tables - not taken during lookup:
82 */
83static DEFINE_SPINLOCK(table_lock);
84
85/*
86 * Per-CPU lookup locks for fast hash lookup:
87 */
88static DEFINE_PER_CPU(spinlock_t, lookup_lock);
89
90/*
91 * Mutex to serialize state changes with show-stats activities:
92 */
93static DEFINE_MUTEX(show_mutex);
94
95/*
96 * Collection status, active/inactive:
97 */
98static int __read_mostly active;
99
100/*
101 * Beginning/end timestamps of measurement:
102 */
103static ktime_t time_start, time_stop;
104
105/*
106 * tstat entry structs only get allocated while collection is
107 * active and never freed during that time - this simplifies
108 * things quite a bit.
109 *
110 * They get freed when a new collection period is started.
111 */
112#define MAX_ENTRIES_BITS 10
113#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS)
114
115static unsigned long nr_entries;
116static struct entry entries[MAX_ENTRIES];
117
118static atomic_t overflow_count;
119
120static void reset_entries(void)
121{
122 nr_entries = 0;
123 memset(entries, 0, sizeof(entries));
124 atomic_set(&overflow_count, 0);
125}
126
127static struct entry *alloc_entry(void)
128{
129 if (nr_entries >= MAX_ENTRIES)
130 return NULL;
131
132 return entries + nr_entries++;
133}
134
135/*
136 * The entries are in a hash-table, for fast lookup:
137 */
138#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1)
139#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS)
140#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1)
141
142#define __tstat_hashfn(entry) \
143 (((unsigned long)(entry)->timer ^ \
144 (unsigned long)(entry)->start_func ^ \
145 (unsigned long)(entry)->expire_func ^ \
146 (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK)
147
148#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry))
149
150static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
151
152static int match_entries(struct entry *entry1, struct entry *entry2)
153{
154 return entry1->timer == entry2->timer &&
155 entry1->start_func == entry2->start_func &&
156 entry1->expire_func == entry2->expire_func &&
157 entry1->pid == entry2->pid;
158}
159
160/*
161 * Look up whether an entry matching this item is present
162 * in the hash already. Must be called with irqs off and the
163 * lookup lock held:
164 */
165static struct entry *tstat_lookup(struct entry *entry, char *comm)
166{
167 struct entry **head, *curr, *prev;
168
169 head = tstat_hashentry(entry);
170 curr = *head;
171
172 /*
173 * The fastpath is when the entry is already hashed,
174 * we do this with the lookup lock held, but with the
175 * table lock not held:
176 */
177 while (curr) {
178 if (match_entries(curr, entry))
179 return curr;
180
181 curr = curr->next;
182 }
183 /*
184 * Slowpath: allocate, set up and link a new hash entry:
185 */
186 prev = NULL;
187 curr = *head;
188
189 spin_lock(&table_lock);
190 /*
191 * Make sure we have not raced with another CPU:
192 */
193 while (curr) {
194 if (match_entries(curr, entry))
195 goto out_unlock;
196
197 prev = curr;
198 curr = curr->next;
199 }
200
201 curr = alloc_entry();
202 if (curr) {
203 *curr = *entry;
204 curr->count = 0;
205 memcpy(curr->comm, comm, TASK_COMM_LEN);
206 if (prev)
207 prev->next = curr;
208 else
209 *head = curr;
210 curr->next = NULL;
211 }
212 out_unlock:
213 spin_unlock(&table_lock);
214
215 return curr;
216}
217
218/**
219 * timer_stats_update_stats - Update the statistics for a timer.
220 * @timer: pointer to either a timer_list or a hrtimer
221 * @pid: the pid of the task which set up the timer
222 * @startf: pointer to the function which did the timer setup
223 * @timerf: pointer to the timer callback function of the timer
224 * @comm: name of the process which set up the timer
225 *
226 * When the timer is already registered, then the event counter is
227 * incremented. Otherwise the timer is registered in a free slot.
228 */
229void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
230 void *timerf, char * comm)
231{
232 /*
233 * It doesnt matter which lock we take:
234 */
235 spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id());
236 struct entry *entry, input;
237 unsigned long flags;
238
239 input.timer = timer;
240 input.start_func = startf;
241 input.expire_func = timerf;
242 input.pid = pid;
243
244 spin_lock_irqsave(lock, flags);
245 if (!active)
246 goto out_unlock;
247
248 entry = tstat_lookup(&input, comm);
249 if (likely(entry))
250 entry->count++;
251 else
252 atomic_inc(&overflow_count);
253
254 out_unlock:
255 spin_unlock_irqrestore(lock, flags);
256}
257
258static void print_name_offset(struct seq_file *m, unsigned long addr)
259{
260 char namebuf[KSYM_NAME_LEN+1];
Ingo Molnar82f67cd2007-02-16 01:28:13 -0800261 const char *sym_name;
Ingo Molnar82f67cd2007-02-16 01:28:13 -0800262
Alexey Dobriyanffb45122007-05-08 00:28:41 -0700263 sym_name = kallsyms_lookup(addr, NULL, NULL, NULL, namebuf);
Ingo Molnar82f67cd2007-02-16 01:28:13 -0800264 if (sym_name)
265 seq_printf(m, "%s", sym_name);
266 else
267 seq_printf(m, "<%p>", (void *)addr);
268}
269
270static int tstats_show(struct seq_file *m, void *v)
271{
272 struct timespec period;
273 struct entry *entry;
274 unsigned long ms;
275 long events = 0;
276 ktime_t time;
277 int i;
278
279 mutex_lock(&show_mutex);
280 /*
281 * If still active then calculate up to now:
282 */
283 if (active)
284 time_stop = ktime_get();
285
286 time = ktime_sub(time_stop, time_start);
287
288 period = ktime_to_timespec(time);
289 ms = period.tv_nsec / 1000000;
290
291 seq_puts(m, "Timer Stats Version: v0.1\n");
292 seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
293 if (atomic_read(&overflow_count))
294 seq_printf(m, "Overflow: %d entries\n",
295 atomic_read(&overflow_count));
296
297 for (i = 0; i < nr_entries; i++) {
298 entry = entries + i;
299 seq_printf(m, "%4lu, %5d %-16s ",
300 entry->count, entry->pid, entry->comm);
301
302 print_name_offset(m, (unsigned long)entry->start_func);
303 seq_puts(m, " (");
304 print_name_offset(m, (unsigned long)entry->expire_func);
305 seq_puts(m, ")\n");
306
307 events += entry->count;
308 }
309
310 ms += period.tv_sec * 1000;
311 if (!ms)
312 ms = 1;
313
314 if (events && period.tv_sec)
315 seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events,
316 events / period.tv_sec, events * 1000 / ms);
317 else
318 seq_printf(m, "%ld total events\n", events);
319
320 mutex_unlock(&show_mutex);
321
322 return 0;
323}
324
325/*
326 * After a state change, make sure all concurrent lookup/update
327 * activities have stopped:
328 */
329static void sync_access(void)
330{
331 unsigned long flags;
332 int cpu;
333
334 for_each_online_cpu(cpu) {
335 spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags);
336 /* nothing */
337 spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags);
338 }
339}
340
341static ssize_t tstats_write(struct file *file, const char __user *buf,
342 size_t count, loff_t *offs)
343{
344 char ctl[2];
345
346 if (count != 2 || *offs)
347 return -EINVAL;
348
349 if (copy_from_user(ctl, buf, count))
350 return -EFAULT;
351
352 mutex_lock(&show_mutex);
353 switch (ctl[0]) {
354 case '0':
355 if (active) {
356 active = 0;
357 time_stop = ktime_get();
358 sync_access();
359 }
360 break;
361 case '1':
362 if (!active) {
363 reset_entries();
364 time_start = ktime_get();
365 active = 1;
366 }
367 break;
368 default:
369 count = -EINVAL;
370 }
371 mutex_unlock(&show_mutex);
372
373 return count;
374}
375
376static int tstats_open(struct inode *inode, struct file *filp)
377{
378 return single_open(filp, tstats_show, NULL);
379}
380
381static struct file_operations tstats_fops = {
382 .open = tstats_open,
383 .read = seq_read,
384 .write = tstats_write,
385 .llseek = seq_lseek,
386 .release = seq_release,
387};
388
389void __init init_timer_stats(void)
390{
391 int cpu;
392
393 for_each_possible_cpu(cpu)
394 spin_lock_init(&per_cpu(lookup_lock, cpu));
395}
396
397static int __init init_tstats_procfs(void)
398{
399 struct proc_dir_entry *pe;
400
401 pe = create_proc_entry("timer_stats", 0644, NULL);
402 if (!pe)
403 return -ENOMEM;
404
405 pe->proc_fops = &tstats_fops;
406
407 return 0;
408}
409__initcall(init_tstats_procfs);