perf report: Fix --stdio output alignment when --showcpuutilization used

Current perf report output is broken if --showcpuutilization is used.
Combination with -n and/or --show-total-period make things worse.
This patch fixes it as follows:

before:
    48.25%    48.25%     0.00%    sleep  [kernel.kallsyms]  [k] trace_hardirqs_off
    34.99%    34.99%     0.00%    sleep  [kernel.kallsyms]  [k] __find_get_block_slow
    15.99%    15.99%     0.00%    sleep  [kernel.kallsyms]  [k] lock_release_holdtime
     0.77%     0.77%     0.00%    sleep  [kernel.kallsyms]  [k] native_write_msr_safe

after:
    48.25%    48.25%     0.00%    sleep  [kernel.kallsyms]  [k] trace_hardirqs_off
    34.99%    34.99%     0.00%    sleep  [kernel.kallsyms]  [k] __find_get_block_slow
    15.99%    15.99%     0.00%    sleep  [kernel.kallsyms]  [k] lock_release_holdtime
     0.77%     0.77%     0.00%    sleep  [kernel.kallsyms]  [k] native_write_msr_safe

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1325957132-10600-8-git-send-email-namhyung@gmail.com
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 4df4495..6f505d1 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -919,6 +919,24 @@
 
 	fprintf(fp, "# %s", pair ? "Baseline" : "Overhead");
 
+	if (symbol_conf.show_cpu_utilization) {
+		if (sep) {
+			ret += fprintf(fp, "%csys", *sep);
+			ret += fprintf(fp, "%cus", *sep);
+			if (perf_guest) {
+				ret += fprintf(fp, "%cguest sys", *sep);
+				ret += fprintf(fp, "%cguest us", *sep);
+			}
+		} else {
+			ret += fprintf(fp, "     sys  ");
+			ret += fprintf(fp, "      us  ");
+			if (perf_guest) {
+				ret += fprintf(fp, "  guest sys  ");
+				ret += fprintf(fp, "  guest us  ");
+			}
+		}
+	}
+
 	if (symbol_conf.show_nr_samples) {
 		if (sep)
 			fprintf(fp, "%cSamples", *sep);
@@ -933,24 +951,6 @@
 			ret += fprintf(fp, "   Period    ");
 	}
 
-	if (symbol_conf.show_cpu_utilization) {
-		if (sep) {
-			ret += fprintf(fp, "%csys", *sep);
-			ret += fprintf(fp, "%cus", *sep);
-			if (perf_guest) {
-				ret += fprintf(fp, "%cguest sys", *sep);
-				ret += fprintf(fp, "%cguest us", *sep);
-			}
-		} else {
-			ret += fprintf(fp, "  sys  ");
-			ret += fprintf(fp, "  us  ");
-			if (perf_guest) {
-				ret += fprintf(fp, "  guest sys  ");
-				ret += fprintf(fp, "  guest us  ");
-			}
-		}
-	}
-
 	if (pair) {
 		if (sep)
 			ret += fprintf(fp, "%cDelta", *sep);
@@ -995,6 +995,8 @@
 		goto print_entries;
 
 	fprintf(fp, "# ........");
+	if (symbol_conf.show_cpu_utilization)
+		fprintf(fp, "   .......   .......");
 	if (symbol_conf.show_nr_samples)
 		fprintf(fp, " ..........");
 	if (symbol_conf.show_total_period)