Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
User visible changes:
- Add ability to specify to select which registers to record,
to reduce the size of perf.data files, and also allow printing
the registers in 'perf script': (Stephane Eranian)
# perf record --intr-regs=AX,SP usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.016 MB perf.data (8 samples) ]
# perf script -F ip,sym,iregs | tail -5
ffffffff8105f42a native_write_msr_safe AX:0xf SP:0xffff8802629c3c00
ffffffff8105f42a native_write_msr_safe AX:0xf SP:0xffff8802629c3c00
ffffffff81761ac0 _raw_spin_lock AX:0xffff8801bfcf8020 SP:0xffff8802629c3ce8
ffffffff81202bf8 __vma_adjust_trans_huge AX:0x7ffc75200000 SP:0xffff8802629c3b30
ffffffff8122b089 dput AX:0x101 SP:0xffff8802629c3c78
#
Infrastructure changes:
- Open event on evsel cpus and threads. (Kan Liang)
- Add new bpf API to get name from a BPF object. (Wang Nan)
Build fixes:
- Fix build on powerpc broken by pt/bts. (Adrian Hunter)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4fa4bc4..4252fc2 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -880,15 +880,26 @@
}
struct bpf_object *bpf_object__open_buffer(void *obj_buf,
- size_t obj_buf_sz)
+ size_t obj_buf_sz,
+ const char *name)
{
+ char tmp_name[64];
+
/* param validation */
if (!obj_buf || obj_buf_sz <= 0)
return NULL;
- pr_debug("loading object from buffer\n");
+ if (!name) {
+ snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
+ (unsigned long)obj_buf,
+ (unsigned long)obj_buf_sz);
+ tmp_name[sizeof(tmp_name) - 1] = '\0';
+ name = tmp_name;
+ }
+ pr_debug("loading object '%s' from buffer\n",
+ name);
- return __bpf_object__open("[buffer]", obj_buf, obj_buf_sz);
+ return __bpf_object__open(name, obj_buf, obj_buf_sz);
}
int bpf_object__unload(struct bpf_object *obj)
@@ -975,6 +986,14 @@
return next;
}
+const char *
+bpf_object__get_name(struct bpf_object *obj)
+{
+ if (!obj)
+ return NULL;
+ return obj->path;
+}
+
struct bpf_program *
bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
{
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index ea8adc2..f16170c 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -28,12 +28,14 @@
struct bpf_object *bpf_object__open(const char *path);
struct bpf_object *bpf_object__open_buffer(void *obj_buf,
- size_t obj_buf_sz);
+ size_t obj_buf_sz,
+ const char *name);
void bpf_object__close(struct bpf_object *object);
/* Load/unload object into/from kernel */
int bpf_object__load(struct bpf_object *obj);
int bpf_object__unload(struct bpf_object *obj);
+const char *bpf_object__get_name(struct bpf_object *obj);
struct bpf_object *bpf_object__next(struct bpf_object *prev);
#define bpf_object__for_each_safe(pos, tmp) \
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 347a273..2e9ce77 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -276,7 +276,11 @@
--intr-regs::
Capture machine state (registers) at interrupt, i.e., on counter overflows for
each sample. List of captured registers depends on the architecture. This option
-is off by default.
+is off by default. It is possible to select the registers to sample using their
+symbolic names, e.g. on x86, ax, si. To list the available registers use
+--intr-regs=\?. To name registers, pass a comma separated list such as
+--intr-regs=ax,bx. The list of register is architecture dependent.
+
--running-time::
Record running and enabled time for read events (:S)
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 614b2c7..dc3ec78 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,7 +116,7 @@
--fields::
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
- srcline, period, flags.
+ srcline, period, iregs, flags.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 2c55e1b..ff63649f 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -2,6 +2,7 @@
libperf-y += tsc.o
libperf-y += pmu.o
libperf-y += kvm-stat.o
+libperf-y += perf_regs.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
new file mode 100644
index 0000000..087c84e
--- /dev/null
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -0,0 +1,30 @@
+#include "../../perf.h"
+#include "../../util/perf_regs.h"
+
+#define REG(n, b) { .name = #n, .mask = 1ULL << (b) }
+#define REG_END { .name = NULL }
+const struct sample_reg sample_reg_masks[] = {
+ REG(AX, PERF_REG_X86_AX),
+ REG(BX, PERF_REG_X86_BX),
+ REG(CX, PERF_REG_X86_CX),
+ REG(DX, PERF_REG_X86_DX),
+ REG(SI, PERF_REG_X86_SI),
+ REG(DI, PERF_REG_X86_DI),
+ REG(BP, PERF_REG_X86_BP),
+ REG(SP, PERF_REG_X86_SP),
+ REG(IP, PERF_REG_X86_IP),
+ REG(FLAGS, PERF_REG_X86_FLAGS),
+ REG(CS, PERF_REG_X86_CS),
+ REG(SS, PERF_REG_X86_SS),
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ REG(R8, PERF_REG_X86_R8),
+ REG(R9, PERF_REG_X86_R9),
+ REG(R10, PERF_REG_X86_R10),
+ REG(R11, PERF_REG_X86_R11),
+ REG(R12, PERF_REG_X86_R12),
+ REG(R13, PERF_REG_X86_R13),
+ REG(R14, PERF_REG_X86_R14),
+ REG(R15, PERF_REG_X86_R15),
+#endif
+ REG_END
+};
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a660022..142eeb3 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -27,8 +27,10 @@
#include "util/cpumap.h"
#include "util/thread_map.h"
#include "util/data.h"
+#include "util/perf_regs.h"
#include "util/auxtrace.h"
#include "util/parse-branch-options.h"
+#include "util/parse-regs-options.h"
#include <unistd.h>
#include <sched.h>
@@ -279,7 +281,7 @@
evlist__for_each(evlist, pos) {
try_again:
- if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
+ if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
if (verbose)
ui__warning("%s\n", msg);
@@ -1080,8 +1082,9 @@
"sample transaction flags (special events only)"),
OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
"use per-thread mmaps"),
- OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs,
- "Sample machine registers on interrupt"),
+ OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
+ "sample selected machine registers on interrupt,"
+ " use -I ? to list register names", parse_regs),
OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
"Record running/enabled time of read (:S) events"),
OPT_CALLBACK('k', "clockid", &record.opts,
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 4430340..eb51325 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -6,6 +6,7 @@
#include "util/exec_cmd.h"
#include "util/header.h"
#include "util/parse-options.h"
+#include "util/perf_regs.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/symbol.h"
@@ -46,6 +47,7 @@
PERF_OUTPUT_SYMOFFSET = 1U << 11,
PERF_OUTPUT_SRCLINE = 1U << 12,
PERF_OUTPUT_PERIOD = 1U << 13,
+ PERF_OUTPUT_IREGS = 1U << 14,
};
struct output_option {
@@ -66,6 +68,7 @@
{.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
{.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
{.str = "period", .field = PERF_OUTPUT_PERIOD},
+ {.str = "iregs", .field = PERF_OUTPUT_IREGS},
};
/* default set to maintain compatibility with current format */
@@ -255,6 +258,11 @@
PERF_OUTPUT_PERIOD))
return -EINVAL;
+ if (PRINT_FIELD(IREGS) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS",
+ PERF_OUTPUT_IREGS))
+ return -EINVAL;
+
return 0;
}
@@ -352,6 +360,24 @@
return 0;
}
+static void print_sample_iregs(union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct thread *thread __maybe_unused,
+ struct perf_event_attr *attr)
+{
+ struct regs_dump *regs = &sample->intr_regs;
+ uint64_t mask = attr->sample_regs_intr;
+ unsigned i = 0, r;
+
+ if (!regs)
+ return;
+
+ for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
+ u64 val = regs->regs[i++];
+ printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val);
+ }
+}
+
static void print_sample_start(struct perf_sample *sample,
struct thread *thread,
struct perf_evsel *evsel)
@@ -525,6 +551,9 @@
PERF_MAX_STACK_DEPTH);
}
+ if (PRINT_FIELD(IREGS))
+ print_sample_iregs(event, sample, thread, attr);
+
printf("\n");
}
@@ -1643,7 +1672,7 @@
"comma separated output fields prepend with 'type:'. "
"Valid types: hw,sw,trace,raw. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
- "addr,symoff,period,flags", parse_output_fields),
+ "addr,symoff,period,iregs,flags", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cccb4cf..90129ac 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -54,7 +54,6 @@
bool sample_time_set;
bool callgraph_set;
bool period;
- bool sample_intr_regs;
bool running_time;
bool full_auxtrace;
bool auxtrace_snapshot_mode;
@@ -64,6 +63,7 @@
unsigned int auxtrace_mmap_pages;
unsigned int user_freq;
u64 branch_stack;
+ u64 sample_intr_regs;
u64 default_interval;
u64 user_interval;
size_t auxtrace_snapshot_size;
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index a337356..52d5597 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -26,7 +26,7 @@
{
struct bpf_object *obj;
- obj = bpf_object__open_buffer(obj_buf, obj_buf_sz);
+ obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL);
if (!obj)
return -1;
bpf_object__close(obj);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e912856..349bc96 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -75,6 +75,7 @@
libperf-y += srcline.o
libperf-y += data.o
libperf-$(CONFIG_X86) += tsc.o
+libperf-$(CONFIG_AUXTRACE) += tsc.o
libperf-y += cloexec.o
libperf-y += thread-stack.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
@@ -82,6 +83,7 @@
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
libperf-$(CONFIG_AUXTRACE) += intel-bts.o
libperf-y += parse-branch-options.o
+libperf-y += parse-regs-options.o
libperf-$(CONFIG_LIBELF) += symbol-elf.o
libperf-$(CONFIG_LIBELF) += probe-file.o
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 8d00039..d51a520 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1181,6 +1181,10 @@
if (evsel->filter == NULL)
continue;
+ /*
+ * filters only work for tracepoint event, which doesn't have cpu limit.
+ * So evlist and evsel should always be same.
+ */
err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter);
if (err) {
*err_evsel = evsel;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index bac25f4..c53f791 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -787,7 +787,7 @@
perf_evsel__config_callgraph(evsel, opts, &callchain_param);
if (opts->sample_intr_regs) {
- attr->sample_regs_intr = PERF_REGS_MASK;
+ attr->sample_regs_intr = opts->sample_intr_regs;
perf_evsel__set_sample_bit(evsel, REGS_INTR);
}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 9e4eb8f..d23138c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -146,6 +146,9 @@
case 4:
intel_pt_insn->rel = bswap_32(insn->immediate.value);
break;
+ default:
+ intel_pt_insn->rel = 0;
+ break;
}
#else
intel_pt_insn->rel = insn->immediate.value;
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
new file mode 100644
index 0000000..4f2c1c2
--- /dev/null
+++ b/tools/perf/util/parse-regs-options.c
@@ -0,0 +1,71 @@
+#include "perf.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/parse-options.h"
+#include "util/parse-regs-options.h"
+
+int
+parse_regs(const struct option *opt, const char *str, int unset)
+{
+ uint64_t *mode = (uint64_t *)opt->value;
+ const struct sample_reg *r;
+ char *s, *os = NULL, *p;
+ int ret = -1;
+
+ if (unset)
+ return 0;
+
+ /*
+ * cannot set it twice
+ */
+ if (*mode)
+ return -1;
+
+ /* str may be NULL in case no arg is passed to -I */
+ if (str) {
+ /* because str is read-only */
+ s = os = strdup(str);
+ if (!s)
+ return -1;
+
+ for (;;) {
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ if (!strcmp(s, "?")) {
+ fprintf(stderr, "available registers: ");
+ for (r = sample_reg_masks; r->name; r++) {
+ fprintf(stderr, "%s ", r->name);
+ }
+ fputc('\n', stderr);
+ /* just printing available regs */
+ return -1;
+ }
+ for (r = sample_reg_masks; r->name; r++) {
+ if (!strcasecmp(s, r->name))
+ break;
+ }
+ if (!r->name) {
+ ui__warning("unknown register %s,"
+ " check man page\n", s);
+ goto error;
+ }
+
+ *mode |= r->mask;
+
+ if (!p)
+ break;
+
+ s = p + 1;
+ }
+ }
+ ret = 0;
+
+ /* default to all possible regs */
+ if (*mode == 0)
+ *mode = PERF_REGS_MASK;
+error:
+ free(os);
+ return ret;
+}
diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h
new file mode 100644
index 0000000..7d762b1
--- /dev/null
+++ b/tools/perf/util/parse-regs-options.h
@@ -0,0 +1,5 @@
+#ifndef _PERF_PARSE_REGS_OPTIONS_H
+#define _PERF_PARSE_REGS_OPTIONS_H 1
+struct option;
+int parse_regs(const struct option *opt, const char *str, int unset);
+#endif /* _PERF_PARSE_REGS_OPTIONS_H */
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index 980dbf7..92c1fff 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -5,6 +5,13 @@
struct regs_dump;
+struct sample_reg {
+ const char *name;
+ uint64_t mask;
+};
+
+extern const struct sample_reg sample_reg_masks[];
+
#ifdef HAVE_PERF_REGS_SUPPORT
#include <perf_regs.h>