procfs: add num_to_str() to speed up /proc/stat
== stat_check.py
num = 0
with open("/proc/stat") as f:
while num < 1000 :
data = f.read()
f.seek(0, 0)
num = num + 1
==
perf shows
20.39% stat_check.py [kernel.kallsyms] [k] format_decode
13.41% stat_check.py [kernel.kallsyms] [k] number
12.61% stat_check.py [kernel.kallsyms] [k] vsnprintf
10.85% stat_check.py [kernel.kallsyms] [k] memcpy
4.85% stat_check.py [kernel.kallsyms] [k] radix_tree_lookup
4.43% stat_check.py [kernel.kallsyms] [k] seq_printf
This patch removes most of calls to vsnprintf() by adding num_to_str()
and seq_print_decimal_ull(), which prints decimal numbers without rich
functions provided by printf().
On my 8cpu box.
== Before patch ==
[root@bluextal test]# time ./stat_check.py
real 0m0.150s
user 0m0.026s
sys 0m0.121s
== After patch ==
[root@bluextal test]# time ./stat_check.py
real 0m0.055s
user 0m0.022s
sys 0m0.030s
[akpm@linux-foundation.org: remove incorrect comment, use less statck in num_to_str(), move comment from .h to .c, simplify seq_put_decimal_ull()]
[andrea@betterlinux.com: avoid breaking the ABI in /proc/stat]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrea Righi <andrea@betterlinux.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Glauber Costa <glommer@parallels.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Turner <pjt@google.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index ac44611..6a0c62d 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -89,18 +89,19 @@
}
sum += arch_irq_stat();
- seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu "
- "%llu\n",
- (unsigned long long)cputime64_to_clock_t(user),
- (unsigned long long)cputime64_to_clock_t(nice),
- (unsigned long long)cputime64_to_clock_t(system),
- (unsigned long long)cputime64_to_clock_t(idle),
- (unsigned long long)cputime64_to_clock_t(iowait),
- (unsigned long long)cputime64_to_clock_t(irq),
- (unsigned long long)cputime64_to_clock_t(softirq),
- (unsigned long long)cputime64_to_clock_t(steal),
- (unsigned long long)cputime64_to_clock_t(guest),
- (unsigned long long)cputime64_to_clock_t(guest_nice));
+ seq_puts(p, "cpu ");
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
+ seq_putc(p, '\n');
+
for_each_online_cpu(i) {
/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
@@ -113,26 +114,24 @@
steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
- seq_printf(p,
- "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
- "%llu\n",
- i,
- (unsigned long long)cputime64_to_clock_t(user),
- (unsigned long long)cputime64_to_clock_t(nice),
- (unsigned long long)cputime64_to_clock_t(system),
- (unsigned long long)cputime64_to_clock_t(idle),
- (unsigned long long)cputime64_to_clock_t(iowait),
- (unsigned long long)cputime64_to_clock_t(irq),
- (unsigned long long)cputime64_to_clock_t(softirq),
- (unsigned long long)cputime64_to_clock_t(steal),
- (unsigned long long)cputime64_to_clock_t(guest),
- (unsigned long long)cputime64_to_clock_t(guest_nice));
+ seq_printf(p, "cpu%d", i);
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
+ seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
+ seq_putc(p, '\n');
}
seq_printf(p, "intr %llu", (unsigned long long)sum);
/* sum again ? it could be updated? */
for_each_irq_nr(j)
- seq_printf(p, " %u", kstat_irqs(j));
+ seq_put_decimal_ull(p, ' ', kstat_irqs(j));
seq_printf(p,
"\nctxt %llu\n"
@@ -149,7 +148,7 @@
seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq);
for (i = 0; i < NR_SOFTIRQS; i++)
- seq_printf(p, " %u", per_softirq_sums[i]);
+ seq_put_decimal_ull(p, ' ', per_softirq_sums[i]);
seq_putc(p, '\n');
return 0;