Merge branch 'perf/urgent' into perf/core
We cherry-picked 3 commits into perf/urgent, merge them back to allow
conflict-free work on those files.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 99cfe36..7523340a 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,10 +12,6 @@
#ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__
-/* ARM performance counters start from 1 (in the cp15 accesses) so use the
- * same indexes here for consistency. */
-#define PERF_EVENT_INDEX_OFFSET 1
-
/* ARM perf PMU IDs for use by internal perf clients. */
enum arm_perf_pmu_ids {
ARM_PERF_PMU_ID_XSCALE1 = 0,
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h
index a69e015..c52ea55 100644
--- a/arch/frv/include/asm/perf_event.h
+++ b/arch/frv/include/asm/perf_event.h
@@ -12,6 +12,4 @@
#ifndef _ASM_PERF_EVENT_H
#define _ASM_PERF_EVENT_H
-#define PERF_EVENT_INDEX_OFFSET 0
-
#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h
index 6c2910f..8b8526b 100644
--- a/arch/hexagon/include/asm/perf_event.h
+++ b/arch/hexagon/include/asm/perf_event.h
@@ -19,6 +19,4 @@
#ifndef _ASM_PERF_EVENT_H
#define _ASM_PERF_EVENT_H
-#define PERF_EVENT_INDEX_OFFSET 0
-
#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 8f1df12..1a8093f 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -61,8 +61,6 @@
extern unsigned long perf_misc_flags(struct pt_regs *regs);
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-#define PERF_EVENT_INDEX_OFFSET 1
-
/*
* Only override the default definitions in include/linux/perf_event.h
* if we have hardware PMU support.
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 10a140f..d614ab5 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1187,6 +1187,11 @@
return err;
}
+static int power_pmu_event_idx(struct perf_event *event)
+{
+ return event->hw.idx;
+}
+
struct pmu power_pmu = {
.pmu_enable = power_pmu_enable,
.pmu_disable = power_pmu_disable,
@@ -1199,6 +1204,7 @@
.start_txn = power_pmu_start_txn,
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
+ .event_idx = power_pmu_event_idx,
};
/*
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index a75f168..4eb444e 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -6,4 +6,3 @@
/* Empty, just to avoid compiling error */
-#define PERF_EVENT_INDEX_OFFSET 0
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 096c975..9b922c1 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -188,8 +188,6 @@
#ifdef CONFIG_PERF_EVENTS
extern void perf_events_lapic_init(void);
-#define PERF_EVENT_INDEX_OFFSET 0
-
/*
* Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
* This flag is otherwise unused and ABI specified to be 0, so nobody should
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5adce10..f8bddb5 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/bitops.h>
+#include <linux/device.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -31,6 +32,7 @@
#include <asm/compat.h>
#include <asm/smp.h>
#include <asm/alternative.h>
+#include <asm/timer.h>
#include "perf_event.h"
@@ -1210,6 +1212,8 @@
break;
case CPU_STARTING:
+ if (x86_pmu.attr_rdpmc)
+ set_in_cr4(X86_CR4_PCE);
if (x86_pmu.cpu_starting)
x86_pmu.cpu_starting(cpu);
break;
@@ -1319,6 +1323,8 @@
}
}
+ x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1542,10 +1548,71 @@
return err;
}
+static int x86_pmu_event_idx(struct perf_event *event)
+{
+ int idx = event->hw.idx;
+
+ if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
+ idx -= X86_PMC_IDX_FIXED;
+ idx |= 1 << 30;
+ }
+
+ return idx + 1;
+}
+
+static ssize_t get_attr_rdpmc(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
+}
+
+static void change_rdpmc(void *info)
+{
+ bool enable = !!(unsigned long)info;
+
+ if (enable)
+ set_in_cr4(X86_CR4_PCE);
+ else
+ clear_in_cr4(X86_CR4_PCE);
+}
+
+static ssize_t set_attr_rdpmc(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long val = simple_strtoul(buf, NULL, 0);
+
+ if (!!val != !!x86_pmu.attr_rdpmc) {
+ x86_pmu.attr_rdpmc = !!val;
+ smp_call_function(change_rdpmc, (void *)val, 1);
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
+
+static struct attribute *x86_pmu_attrs[] = {
+ &dev_attr_rdpmc.attr,
+ NULL,
+};
+
+static struct attribute_group x86_pmu_attr_group = {
+ .attrs = x86_pmu_attrs,
+};
+
+static const struct attribute_group *x86_pmu_attr_groups[] = {
+ &x86_pmu_attr_group,
+ NULL,
+};
+
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
+ .attr_groups = x86_pmu_attr_groups,
+
.event_init = x86_pmu_event_init,
.add = x86_pmu_add,
@@ -1557,8 +1624,23 @@
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
+
+ .event_idx = x86_pmu_event_idx,
};
+void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ return;
+
+ if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+ return;
+
+ userpg->time_mult = this_cpu_read(cyc2ns);
+ userpg->time_shift = CYC2NS_SCALE_FACTOR;
+ userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+}
+
/*
* callchain support
*/
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8944062..513d617 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -307,6 +307,14 @@
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
+ /*
+ * sysfs attrs
+ */
+ int attr_rdpmc;
+
+ /*
+ * CPU Hotplug hooks
+ */
int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 5ce8b14..f7c6958 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -1,6 +1,38 @@
#ifndef _LINUX_JUMP_LABEL_H
#define _LINUX_JUMP_LABEL_H
+/*
+ * Jump label support
+ *
+ * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
+ * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Jump labels provide an interface to generate dynamic branches using
+ * self-modifying code. Assuming toolchain and architecture support the result
+ * of a "if (static_branch(&key))" statement is a unconditional branch (which
+ * defaults to false - and the true block is placed out of line).
+ *
+ * However at runtime we can change the 'static' branch target using
+ * jump_label_{inc,dec}(). These function as a 'reference' count on the key
+ * object and for as long as there are references all branches referring to
+ * that particular key will point to the (out of line) true block.
+ *
+ * Since this relies on modifying code the jump_label_{inc,dec}() functions
+ * must be considered absolute slow paths (machine wide synchronization etc.).
+ * OTOH, since the affected branches are unconditional their runtime overhead
+ * will be absolutely minimal, esp. in the default (off) case where the total
+ * effect is a single NOP of appropriate size. The on case will patch in a jump
+ * to the out-of-line block.
+ *
+ * When the control is directly exposed to userspace it is prudent to delay the
+ * decrement to avoid high frequency code modifications which can (and do)
+ * cause significant performance degradation. Struct jump_label_key_deferred and
+ * jump_label_dec_deferred() provide for this.
+ *
+ * Lacking toolchain and or architecture support, it falls back to a simple
+ * conditional branch.
+ */
+
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/workqueue.h>
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index abb2776..412b790 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -291,12 +291,14 @@
__s64 offset; /* add to hardware event value */
__u64 time_enabled; /* time event active */
__u64 time_running; /* time event on cpu */
+ __u32 time_mult, time_shift;
+ __u64 time_offset;
/*
* Hole for extension of the self monitor capabilities
*/
- __u64 __reserved[123]; /* align to 1k */
+ __u64 __reserved[121]; /* align to 1k */
/*
* Control data for the mmap() data buffer.
@@ -616,6 +618,7 @@
struct list_head entry;
struct device *dev;
+ const struct attribute_group **attr_groups;
char *name;
int type;
@@ -681,6 +684,12 @@
* for each successful ->add() during the transaction.
*/
void (*cancel_txn) (struct pmu *pmu); /* optional */
+
+ /*
+ * Will return the value for perf_event_mmap_page::index for this event,
+ * if no implementation is provided it will default to: event->hw.idx + 1.
+ */
+ int (*event_idx) (struct perf_event *event); /*optional */
};
/**
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index 17df434..39a8a43 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -23,11 +23,23 @@
} \
} while (0)
+#ifndef TRACE_HEADER_MULTI_READ
+enum {
+ TRACE_SIGNAL_DELIVERED,
+ TRACE_SIGNAL_IGNORED,
+ TRACE_SIGNAL_ALREADY_PENDING,
+ TRACE_SIGNAL_OVERFLOW_FAIL,
+ TRACE_SIGNAL_LOSE_INFO,
+};
+#endif
+
/**
* signal_generate - called when a signal is generated
* @sig: signal number
* @info: pointer to struct siginfo
* @task: pointer to struct task_struct
+ * @group: shared or private
+ * @result: TRACE_SIGNAL_*
*
* Current process sends a 'sig' signal to 'task' process with
* 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV,
@@ -37,9 +49,10 @@
*/
TRACE_EVENT(signal_generate,
- TP_PROTO(int sig, struct siginfo *info, struct task_struct *task),
+ TP_PROTO(int sig, struct siginfo *info, struct task_struct *task,
+ int group, int result),
- TP_ARGS(sig, info, task),
+ TP_ARGS(sig, info, task, group, result),
TP_STRUCT__entry(
__field( int, sig )
@@ -47,6 +60,8 @@
__field( int, code )
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
+ __field( int, group )
+ __field( int, result )
),
TP_fast_assign(
@@ -54,11 +69,14 @@
TP_STORE_SIGINFO(__entry, info);
memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
__entry->pid = task->pid;
+ __entry->group = group;
+ __entry->result = result;
),
- TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d",
+ TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d",
__entry->sig, __entry->errno, __entry->code,
- __entry->comm, __entry->pid)
+ __entry->comm, __entry->pid, __entry->group,
+ __entry->result)
);
/**
@@ -101,65 +119,6 @@
__entry->sa_handler, __entry->sa_flags)
);
-DECLARE_EVENT_CLASS(signal_queue_overflow,
-
- TP_PROTO(int sig, int group, struct siginfo *info),
-
- TP_ARGS(sig, group, info),
-
- TP_STRUCT__entry(
- __field( int, sig )
- __field( int, group )
- __field( int, errno )
- __field( int, code )
- ),
-
- TP_fast_assign(
- __entry->sig = sig;
- __entry->group = group;
- TP_STORE_SIGINFO(__entry, info);
- ),
-
- TP_printk("sig=%d group=%d errno=%d code=%d",
- __entry->sig, __entry->group, __entry->errno, __entry->code)
-);
-
-/**
- * signal_overflow_fail - called when signal queue is overflow
- * @sig: signal number
- * @group: signal to process group or not (bool)
- * @info: pointer to struct siginfo
- *
- * Kernel fails to generate 'sig' signal with 'info' siginfo, because
- * siginfo queue is overflow, and the signal is dropped.
- * 'group' is not 0 if the signal will be sent to a process group.
- * 'sig' is always one of RT signals.
- */
-DEFINE_EVENT(signal_queue_overflow, signal_overflow_fail,
-
- TP_PROTO(int sig, int group, struct siginfo *info),
-
- TP_ARGS(sig, group, info)
-);
-
-/**
- * signal_lose_info - called when siginfo is lost
- * @sig: signal number
- * @group: signal to process group or not (bool)
- * @info: pointer to struct siginfo
- *
- * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo
- * queue is overflow.
- * 'group' is not 0 if the signal will be sent to a process group.
- * 'sig' is always one of non-RT signals.
- */
-DEFINE_EVENT(signal_queue_overflow, signal_lose_info,
-
- TP_PROTO(int sig, int group, struct siginfo *info),
-
- TP_ARGS(sig, group, info)
-);
-
#endif /* _TRACE_SIGNAL_H */
/* This part must be outside protection */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ba36013..7c3b9de 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3229,10 +3229,6 @@
return 0;
}
-#ifndef PERF_EVENT_INDEX_OFFSET
-# define PERF_EVENT_INDEX_OFFSET 0
-#endif
-
static int perf_event_index(struct perf_event *event)
{
if (event->hw.state & PERF_HES_STOPPED)
@@ -3241,21 +3237,26 @@
if (event->state != PERF_EVENT_STATE_ACTIVE)
return 0;
- return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
+ return event->pmu->event_idx(event);
}
static void calc_timer_values(struct perf_event *event,
+ u64 *now,
u64 *enabled,
u64 *running)
{
- u64 now, ctx_time;
+ u64 ctx_time;
- now = perf_clock();
- ctx_time = event->shadow_ctx_time + now;
+ *now = perf_clock();
+ ctx_time = event->shadow_ctx_time + *now;
*enabled = ctx_time - event->tstamp_enabled;
*running = ctx_time - event->tstamp_running;
}
+void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+}
+
/*
* Callers need to ensure there can be no nesting of this function, otherwise
* the seqlock logic goes bad. We can not serialize this because the arch
@@ -3265,7 +3266,7 @@
{
struct perf_event_mmap_page *userpg;
struct ring_buffer *rb;
- u64 enabled, running;
+ u64 enabled, running, now;
rcu_read_lock();
/*
@@ -3277,7 +3278,7 @@
* because of locking issue as we can be called in
* NMI context
*/
- calc_timer_values(event, &enabled, &running);
+ calc_timer_values(event, &now, &enabled, &running);
rb = rcu_dereference(event->rb);
if (!rb)
goto unlock;
@@ -3293,7 +3294,7 @@
barrier();
userpg->index = perf_event_index(event);
userpg->offset = perf_event_count(event);
- if (event->state == PERF_EVENT_STATE_ACTIVE)
+ if (userpg->index)
userpg->offset -= local64_read(&event->hw.prev_count);
userpg->time_enabled = enabled +
@@ -3302,6 +3303,8 @@
userpg->time_running = running +
atomic64_read(&event->child_total_time_running);
+ perf_update_user_clock(userpg, now);
+
barrier();
++userpg->lock;
preempt_enable();
@@ -3559,6 +3562,8 @@
event->mmap_user = get_current_user();
vma->vm_mm->pinned_vm += event->mmap_locked;
+ perf_event_update_userpage(event);
+
unlock:
if (!ret)
atomic_inc(&event->mmap_count);
@@ -3790,7 +3795,7 @@
static void perf_output_read(struct perf_output_handle *handle,
struct perf_event *event)
{
- u64 enabled = 0, running = 0;
+ u64 enabled = 0, running = 0, now;
u64 read_format = event->attr.read_format;
/*
@@ -3803,7 +3808,7 @@
* NMI context
*/
if (read_format & PERF_FORMAT_TOTAL_TIMES)
- calc_timer_values(event, &enabled, &running);
+ calc_timer_values(event, &now, &enabled, &running);
if (event->attr.read_format & PERF_FORMAT_GROUP)
perf_output_read_group(handle, event, enabled, running);
@@ -5022,6 +5027,11 @@
return 0;
}
+static int perf_swevent_event_idx(struct perf_event *event)
+{
+ return 0;
+}
+
static struct pmu perf_swevent = {
.task_ctx_nr = perf_sw_context,
@@ -5031,6 +5041,8 @@
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
+
+ .event_idx = perf_swevent_event_idx,
};
#ifdef CONFIG_EVENT_TRACING
@@ -5117,6 +5129,8 @@
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
+
+ .event_idx = perf_swevent_event_idx,
};
static inline void perf_tp_register(void)
@@ -5336,6 +5350,8 @@
.start = cpu_clock_event_start,
.stop = cpu_clock_event_stop,
.read = cpu_clock_event_read,
+
+ .event_idx = perf_swevent_event_idx,
};
/*
@@ -5408,6 +5424,8 @@
.start = task_clock_event_start,
.stop = task_clock_event_stop,
.read = task_clock_event_read,
+
+ .event_idx = perf_swevent_event_idx,
};
static void perf_pmu_nop_void(struct pmu *pmu)
@@ -5435,6 +5453,11 @@
perf_pmu_enable(pmu);
}
+static int perf_event_idx_default(struct perf_event *event)
+{
+ return event->hw.idx + 1;
+}
+
/*
* Ensures all contexts with the same task_ctx_nr have the same
* pmu_cpu_context too.
@@ -5521,6 +5544,7 @@
if (!pmu->dev)
goto out;
+ pmu->dev->groups = pmu->attr_groups;
device_initialize(pmu->dev);
ret = dev_set_name(pmu->dev, "%s", pmu->name);
if (ret)
@@ -5624,6 +5648,9 @@
pmu->pmu_disable = perf_pmu_nop_void;
}
+ if (!pmu->event_idx)
+ pmu->event_idx = perf_event_idx_default;
+
list_add_rcu(&pmu->entry, &pmus);
ret = 0;
unlock:
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index b7971d6..b0309f7 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -613,6 +613,11 @@
bp->hw.state = PERF_HES_STOPPED;
}
+static int hw_breakpoint_event_idx(struct perf_event *bp)
+{
+ return 0;
+}
+
static struct pmu perf_breakpoint = {
.task_ctx_nr = perf_sw_context, /* could eventually get its own */
@@ -622,6 +627,8 @@
.start = hw_breakpoint_start,
.stop = hw_breakpoint_stop,
.read = hw_breakpoint_pmu_read,
+
+ .event_idx = hw_breakpoint_event_idx,
};
int __init init_hw_breakpoint(void)
diff --git a/kernel/signal.c b/kernel/signal.c
index c73c428..8511e39 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1054,13 +1054,13 @@
struct sigpending *pending;
struct sigqueue *q;
int override_rlimit;
-
- trace_signal_generate(sig, info, t);
+ int ret = 0, result;
assert_spin_locked(&t->sighand->siglock);
+ result = TRACE_SIGNAL_IGNORED;
if (!prepare_signal(sig, t, from_ancestor_ns))
- return 0;
+ goto ret;
pending = group ? &t->signal->shared_pending : &t->pending;
/*
@@ -1068,8 +1068,11 @@
* exactly one non-rt signal, so that we can get more
* detailed information about the cause of the signal.
*/
+ result = TRACE_SIGNAL_ALREADY_PENDING;
if (legacy_queue(pending, sig))
- return 0;
+ goto ret;
+
+ result = TRACE_SIGNAL_DELIVERED;
/*
* fast-pathed signals for kernel-internal things like SIGSTOP
* or SIGKILL.
@@ -1127,14 +1130,15 @@
* signal was rt and sent by user using something
* other than kill().
*/
- trace_signal_overflow_fail(sig, group, info);
- return -EAGAIN;
+ result = TRACE_SIGNAL_OVERFLOW_FAIL;
+ ret = -EAGAIN;
+ goto ret;
} else {
/*
* This is a silent loss of information. We still
* send the signal, but the *info bits are lost.
*/
- trace_signal_lose_info(sig, group, info);
+ result = TRACE_SIGNAL_LOSE_INFO;
}
}
@@ -1142,7 +1146,9 @@
signalfd_notify(t, sig);
sigaddset(&pending->signal, sig);
complete_signal(sig, t, group);
- return 0;
+ret:
+ trace_signal_generate(sig, info, t, group, result);
+ return ret;
}
static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
@@ -1585,7 +1591,7 @@
int sig = q->info.si_signo;
struct sigpending *pending;
unsigned long flags;
- int ret;
+ int ret, result;
BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
@@ -1594,6 +1600,7 @@
goto ret;
ret = 1; /* the signal is ignored */
+ result = TRACE_SIGNAL_IGNORED;
if (!prepare_signal(sig, t, 0))
goto out;
@@ -1605,6 +1612,7 @@
*/
BUG_ON(q->info.si_code != SI_TIMER);
q->info.si_overrun++;
+ result = TRACE_SIGNAL_ALREADY_PENDING;
goto out;
}
q->info.si_overrun = 0;
@@ -1614,7 +1622,9 @@
list_add_tail(&q->list, &pending->list);
sigaddset(&pending->signal, sig);
complete_signal(sig, t, group);
+ result = TRACE_SIGNAL_DELIVERED;
out:
+ trace_signal_generate(sig, &q->info, t, group, result);
unlock_task_sighand(t, &flags);
ret:
return ret;
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 2937f7e..ff9a66e 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -58,6 +58,10 @@
--tid=::
Record events on existing thread ID.
+-u::
+--uid=::
+ Record events in threads owned by uid. Name or number.
+
-r::
--realtime=::
Collect data with this RT SCHED_FIFO priority.
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index b1a5bbb..ab1454e 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -78,6 +78,10 @@
--tid=<tid>::
Profile events on existing thread ID.
+-u::
+--uid=::
+ Record events in threads owned by uid. Name or number.
+
-r <priority>::
--realtime=<priority>::
Collect data with this RT SCHED_FIFO priority.
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7c12650..d64f581 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -61,7 +61,7 @@
ifeq (${IS_X86_64}, 1)
RAW_ARCH := x86_64
ARCH_CFLAGS := -DARCH_X86_64
- ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
+ ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
endif
endif
@@ -359,8 +359,10 @@
BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
ifeq ($(RAW_ARCH),x86_64)
BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
endif
BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index f7781c6..a09bece 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -4,6 +4,7 @@
extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
+extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
index d588b87..d66ab79 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -2,3 +2,11 @@
MEMCPY_FN(__memcpy,
"x86-64-unrolled",
"unrolled memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(memcpy_c,
+ "x86-64-movsq",
+ "movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(memcpy_c_e,
+ "x86-64-movsb",
+ "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index a57b66e..a20780b 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -1,2 +1,6 @@
-
+#define memcpy MEMCPY /* don't hide glibc's memcpy() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#define Lmemcpy_c globl memcpy_c; memcpy_c
+#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
#include "../../../arch/x86/lib/memcpy_64.S"
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index db82021..6ad2b1c 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -24,6 +24,7 @@
static const char *length_str = "1MB";
static const char *routine = "default";
+static int iterations = 1;
static bool use_clock;
static int clock_fd;
static bool only_prefault;
@@ -35,6 +36,8 @@
"available unit: B, MB, GB (upper and lower)"),
OPT_STRING('r', "routine", &routine, "default",
"Specify routine to copy"),
+ OPT_INTEGER('i', "iterations", &iterations,
+ "repeat memcpy() invocation this number of times"),
OPT_BOOLEAN('c', "clock", &use_clock,
"Use CPU clock for measuring"),
OPT_BOOLEAN('o', "only-prefault", &only_prefault,
@@ -121,6 +124,7 @@
{
u64 clock_start = 0ULL, clock_end = 0ULL;
void *src = NULL, *dst = NULL;
+ int i;
alloc_mem(&src, &dst, len);
@@ -128,7 +132,8 @@
fn(dst, src, len);
clock_start = get_clock();
- fn(dst, src, len);
+ for (i = 0; i < iterations; ++i)
+ fn(dst, src, len);
clock_end = get_clock();
free(src);
@@ -140,6 +145,7 @@
{
struct timeval tv_start, tv_end, tv_diff;
void *src = NULL, *dst = NULL;
+ int i;
alloc_mem(&src, &dst, len);
@@ -147,7 +153,8 @@
fn(dst, src, len);
BUG_ON(gettimeofday(&tv_start, NULL));
- fn(dst, src, len);
+ for (i = 0; i < iterations; ++i)
+ fn(dst, src, len);
BUG_ON(gettimeofday(&tv_end, NULL));
timersub(&tv_end, &tv_start, &tv_diff);
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
new file mode 100644
index 0000000..a040fa7
--- /dev/null
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -0,0 +1,12 @@
+
+#ifdef ARCH_X86_64
+
+#define MEMSET_FN(fn, name, desc) \
+ extern void *fn(void *, int, size_t);
+
+#include "mem-memset-x86-64-asm-def.h"
+
+#undef MEMSET_FN
+
+#endif
+
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
new file mode 100644
index 0000000..a71dff9
--- /dev/null
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -0,0 +1,12 @@
+
+MEMSET_FN(__memset,
+ "x86-64-unrolled",
+ "unrolled memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(memset_c,
+ "x86-64-stosq",
+ "movsq-based memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(memset_c_e,
+ "x86-64-stosb",
+ "movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
new file mode 100644
index 0000000..cb92170
--- /dev/null
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -0,0 +1,6 @@
+#define memset MEMSET /* don't hide glibc's memset() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#define Lmemset_c globl memset_c; memset_c
+#define Lmemset_c_e globl memset_c_e; memset_c_e
+#include "../../../arch/x86/lib/memset_64.S"
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
new file mode 100644
index 0000000..59d4933
--- /dev/null
+++ b/tools/perf/bench/mem-memset.c
@@ -0,0 +1,298 @@
+/*
+ * mem-memset.c
+ *
+ * memset: Simple memory set in various ways
+ *
+ * Trivial clone of mem-memcpy.c.
+ */
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "mem-memset-arch.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char *length_str = "1MB";
+static const char *routine = "default";
+static int iterations = 1;
+static bool use_clock;
+static int clock_fd;
+static bool only_prefault;
+static bool no_prefault;
+
+static const struct option options[] = {
+ OPT_STRING('l', "length", &length_str, "1MB",
+ "Specify length of memory to copy. "
+ "available unit: B, MB, GB (upper and lower)"),
+ OPT_STRING('r', "routine", &routine, "default",
+ "Specify routine to copy"),
+ OPT_INTEGER('i', "iterations", &iterations,
+ "repeat memset() invocation this number of times"),
+ OPT_BOOLEAN('c', "clock", &use_clock,
+ "Use CPU clock for measuring"),
+ OPT_BOOLEAN('o', "only-prefault", &only_prefault,
+ "Show only the result with page faults before memset()"),
+ OPT_BOOLEAN('n', "no-prefault", &no_prefault,
+ "Show only the result without page faults before memset()"),
+ OPT_END()
+};
+
+typedef void *(*memset_t)(void *, int, size_t);
+
+struct routine {
+ const char *name;
+ const char *desc;
+ memset_t fn;
+};
+
+static const struct routine routines[] = {
+ { "default",
+ "Default memset() provided by glibc",
+ memset },
+#ifdef ARCH_X86_64
+
+#define MEMSET_FN(fn, name, desc) { name, desc, fn },
+#include "mem-memset-x86-64-asm-def.h"
+#undef MEMSET_FN
+
+#endif
+
+ { NULL,
+ NULL,
+ NULL }
+};
+
+static const char * const bench_mem_memset_usage[] = {
+ "perf bench mem memset <options>",
+ NULL
+};
+
+static struct perf_event_attr clock_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+ clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+
+ if (clock_fd < 0 && errno == ENOSYS)
+ die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+ else
+ BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+ int ret;
+ u64 clk;
+
+ ret = read(clock_fd, &clk, sizeof(u64));
+ BUG_ON(ret != sizeof(u64));
+
+ return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+ return (double)ts->tv_sec +
+ (double)ts->tv_usec / (double)1000000;
+}
+
+static void alloc_mem(void **dst, size_t length)
+{
+ *dst = zalloc(length);
+ if (!dst)
+ die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memset_clock(memset_t fn, size_t len, bool prefault)
+{
+ u64 clock_start = 0ULL, clock_end = 0ULL;
+ void *dst = NULL;
+ int i;
+
+ alloc_mem(&dst, len);
+
+ if (prefault)
+ fn(dst, -1, len);
+
+ clock_start = get_clock();
+ for (i = 0; i < iterations; ++i)
+ fn(dst, i, len);
+ clock_end = get_clock();
+
+ free(dst);
+ return clock_end - clock_start;
+}
+
+static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
+{
+ struct timeval tv_start, tv_end, tv_diff;
+ void *dst = NULL;
+ int i;
+
+ alloc_mem(&dst, len);
+
+ if (prefault)
+ fn(dst, -1, len);
+
+ BUG_ON(gettimeofday(&tv_start, NULL));
+ for (i = 0; i < iterations; ++i)
+ fn(dst, i, len);
+ BUG_ON(gettimeofday(&tv_end, NULL));
+
+ timersub(&tv_end, &tv_start, &tv_diff);
+
+ free(dst);
+ return (double)((double)len / timeval2double(&tv_diff));
+}
+
+#define pf (no_prefault ? 0 : 1)
+
+#define print_bps(x) do { \
+ if (x < K) \
+ printf(" %14lf B/Sec", x); \
+ else if (x < K * K) \
+ printf(" %14lfd KB/Sec", x / K); \
+ else if (x < K * K * K) \
+ printf(" %14lf MB/Sec", x / K / K); \
+ else \
+ printf(" %14lf GB/Sec", x / K / K / K); \
+ } while (0)
+
+int bench_mem_memset(int argc, const char **argv,
+ const char *prefix __used)
+{
+ int i;
+ size_t len;
+ double result_bps[2];
+ u64 result_clock[2];
+
+ argc = parse_options(argc, argv, options,
+ bench_mem_memset_usage, 0);
+
+ if (use_clock)
+ init_clock();
+
+ len = (size_t)perf_atoll((char *)length_str);
+
+ result_clock[0] = result_clock[1] = 0ULL;
+ result_bps[0] = result_bps[1] = 0.0;
+
+ if ((s64)len <= 0) {
+ fprintf(stderr, "Invalid length:%s\n", length_str);
+ return 1;
+ }
+
+ /* same to without specifying either of prefault and no-prefault */
+ if (only_prefault && no_prefault)
+ only_prefault = no_prefault = false;
+
+ for (i = 0; routines[i].name; i++) {
+ if (!strcmp(routines[i].name, routine))
+ break;
+ }
+ if (!routines[i].name) {
+ printf("Unknown routine:%s\n", routine);
+ printf("Available routines...\n");
+ for (i = 0; routines[i].name; i++) {
+ printf("\t%s ... %s\n",
+ routines[i].name, routines[i].desc);
+ }
+ return 1;
+ }
+
+ if (bench_format == BENCH_FORMAT_DEFAULT)
+ printf("# Copying %s Bytes ...\n\n", length_str);
+
+ if (!only_prefault && !no_prefault) {
+ /* show both of results */
+ if (use_clock) {
+ result_clock[0] =
+ do_memset_clock(routines[i].fn, len, false);
+ result_clock[1] =
+ do_memset_clock(routines[i].fn, len, true);
+ } else {
+ result_bps[0] =
+ do_memset_gettimeofday(routines[i].fn,
+ len, false);
+ result_bps[1] =
+ do_memset_gettimeofday(routines[i].fn,
+ len, true);
+ }
+ } else {
+ if (use_clock) {
+ result_clock[pf] =
+ do_memset_clock(routines[i].fn,
+ len, only_prefault);
+ } else {
+ result_bps[pf] =
+ do_memset_gettimeofday(routines[i].fn,
+ len, only_prefault);
+ }
+ }
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ if (!only_prefault && !no_prefault) {
+ if (use_clock) {
+ printf(" %14lf Clock/Byte\n",
+ (double)result_clock[0]
+ / (double)len);
+ printf(" %14lf Clock/Byte (with prefault)\n ",
+ (double)result_clock[1]
+ / (double)len);
+ } else {
+ print_bps(result_bps[0]);
+ printf("\n");
+ print_bps(result_bps[1]);
+ printf(" (with prefault)\n");
+ }
+ } else {
+ if (use_clock) {
+ printf(" %14lf Clock/Byte",
+ (double)result_clock[pf]
+ / (double)len);
+ } else
+ print_bps(result_bps[pf]);
+
+ printf("%s\n", only_prefault ? " (with prefault)" : "");
+ }
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ if (!only_prefault && !no_prefault) {
+ if (use_clock) {
+ printf("%lf %lf\n",
+ (double)result_clock[0] / (double)len,
+ (double)result_clock[1] / (double)len);
+ } else {
+ printf("%lf %lf\n",
+ result_bps[0], result_bps[1]);
+ }
+ } else {
+ if (use_clock) {
+ printf("%lf\n", (double)result_clock[pf]
+ / (double)len);
+ } else
+ printf("%lf\n", result_bps[pf]);
+ }
+ break;
+ default:
+ /* reaching this means there's some disaster: */
+ die("unknown format: %d\n", bench_format);
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index fcb9626..b0e74ab 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -52,6 +52,9 @@
{ "memcpy",
"Simple memory copy in various ways",
bench_mem_memcpy },
+ { "memset",
+ "Simple memory set in various ways",
+ bench_mem_memset },
suite_all,
{ NULL,
NULL,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0abfb18..32870ee 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -44,6 +44,7 @@
struct perf_evlist *evlist;
struct perf_session *session;
const char *progname;
+ const char *uid_str;
int output;
unsigned int page_size;
int realtime_prio;
@@ -727,6 +728,7 @@
OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
"monitor event in cgroup name only",
parse_cgroups),
+ OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
OPT_END()
};
@@ -748,7 +750,7 @@
argc = parse_options(argc, argv, record_options, record_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
- !rec->opts.system_wide && !rec->opts.cpu_list)
+ !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str)
usage_with_options(record_usage, record_options);
if (rec->force && rec->append_file) {
@@ -788,11 +790,17 @@
goto out_symbol_exit;
}
+ rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid,
+ rec->opts.target_pid);
+ if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1)
+ goto out_free_fd;
+
if (rec->opts.target_pid != -1)
rec->opts.target_tid = rec->opts.target_pid;
if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
- rec->opts.target_tid, rec->opts.cpu_list) < 0)
+ rec->opts.target_tid, rec->opts.uid,
+ rec->opts.cpu_list) < 0)
usage_with_options(record_usage, record_options);
list_for_each_entry(pos, &evsel_list->entries, node) {
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f5d2a63..459b862 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1201,7 +1201,7 @@
if (target_pid != -1)
target_tid = target_pid;
- evsel_list->threads = thread_map__new(target_pid, target_tid);
+ evsel_list->threads = thread_map__new(target_pid, target_tid, UINT_MAX);
if (evsel_list->threads == NULL) {
pr_err("Problems finding threads of monitor\n");
usage_with_options(stat_usage, options);
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 3854e86..70c4eb2 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -15,6 +15,8 @@
#include "util/thread_map.h"
#include "../../include/linux/hw_breakpoint.h"
+#include <sys/mman.h>
+
static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym)
{
bool *visited = symbol__priv(sym);
@@ -276,7 +278,7 @@
return -1;
}
- threads = thread_map__new(-1, getpid());
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
if (threads == NULL) {
pr_debug("thread_map__new\n");
return -1;
@@ -342,7 +344,7 @@
return -1;
}
- threads = thread_map__new(-1, getpid());
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
if (threads == NULL) {
pr_debug("thread_map__new\n");
return -1;
@@ -490,7 +492,7 @@
expected_nr_events[i] = random() % 257;
}
- threads = thread_map__new(-1, getpid());
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
if (threads == NULL) {
pr_debug("thread_map__new\n");
return -1;
@@ -1054,7 +1056,7 @@
* we're monitoring, the one forked there.
*/
err = perf_evlist__create_maps(evlist, opts.target_pid,
- opts.target_tid, opts.cpu_list);
+ opts.target_tid, UINT_MAX, opts.cpu_list);
if (err < 0) {
pr_debug("Not enough memory to create thread/cpu maps\n");
goto out_delete_evlist;
@@ -1296,6 +1298,173 @@
return (err < 0 || errs > 0) ? -1 : 0;
}
+
+#if defined(__x86_64__) || defined(__i386__)
+
+#define barrier() asm volatile("" ::: "memory")
+
+static u64 rdpmc(unsigned int counter)
+{
+ unsigned int low, high;
+
+ asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+ return low | ((u64)high) << 32;
+}
+
+static u64 rdtsc(void)
+{
+ unsigned int low, high;
+
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+ return low | ((u64)high) << 32;
+}
+
+static u64 mmap_read_self(void *addr)
+{
+ struct perf_event_mmap_page *pc = addr;
+ u32 seq, idx, time_mult = 0, time_shift = 0;
+ u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
+
+ do {
+ seq = pc->lock;
+ barrier();
+
+ enabled = pc->time_enabled;
+ running = pc->time_running;
+
+ if (enabled != running) {
+ cyc = rdtsc();
+ time_mult = pc->time_mult;
+ time_shift = pc->time_shift;
+ time_offset = pc->time_offset;
+ }
+
+ idx = pc->index;
+ count = pc->offset;
+ if (idx)
+ count += rdpmc(idx - 1);
+
+ barrier();
+ } while (pc->lock != seq);
+
+ if (enabled != running) {
+ u64 quot, rem;
+
+ quot = (cyc >> time_shift);
+ rem = cyc & ((1 << time_shift) - 1);
+ delta = time_offset + quot * time_mult +
+ ((rem * time_mult) >> time_shift);
+
+ enabled += delta;
+ if (idx)
+ running += delta;
+
+ quot = count / running;
+ rem = count % running;
+ count = quot * enabled + (rem * enabled) / running;
+ }
+
+ return count;
+}
+
+/*
+ * If the RDPMC instruction faults then signal this back to the test parent task:
+ */
+static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used)
+{
+ exit(-1);
+}
+
+static int __test__rdpmc(void)
+{
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ volatile int tmp = 0;
+ u64 i, loops = 1000;
+ int n;
+ int fd;
+ void *addr;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .exclude_kernel = 1,
+ };
+ u64 delta_sum = 0;
+ struct sigaction sa;
+
+ sigfillset(&sa.sa_mask);
+ sa.sa_sigaction = segfault_handler;
+ sigaction(SIGSEGV, &sa, NULL);
+
+ fprintf(stderr, "\n\n");
+
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd < 0) {
+ die("Error: sys_perf_event_open() syscall returned "
+ "with %d (%s)\n", fd, strerror(errno));
+ }
+
+ addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (addr == (void *)(-1)) {
+ die("Error: mmap() syscall returned "
+ "with (%s)\n", strerror(errno));
+ }
+
+ for (n = 0; n < 6; n++) {
+ u64 stamp, now, delta;
+
+ stamp = mmap_read_self(addr);
+
+ for (i = 0; i < loops; i++)
+ tmp++;
+
+ now = mmap_read_self(addr);
+ loops *= 10;
+
+ delta = now - stamp;
+ fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta);
+
+ delta_sum += delta;
+ }
+
+ munmap(addr, page_size);
+ close(fd);
+
+ fprintf(stderr, " ");
+
+ if (!delta_sum)
+ return -1;
+
+ return 0;
+}
+
+static int test__rdpmc(void)
+{
+ int status = 0;
+ int wret = 0;
+ int ret;
+ int pid;
+
+ pid = fork();
+ if (pid < 0)
+ return -1;
+
+ if (!pid) {
+ ret = __test__rdpmc();
+
+ exit(ret);
+ }
+
+ wret = waitpid(pid, &status, 0);
+ if (wret < 0 || status)
+ return -1;
+
+ return 0;
+}
+
+#endif
+
static struct test {
const char *desc;
int (*func)(void);
@@ -1320,6 +1489,12 @@
.desc = "parse events tests",
.func = test__parse_events,
},
+#if defined(__x86_64__) || defined(__i386__)
+ {
+ .desc = "x86 rdpmc test",
+ .func = test__rdpmc,
+ },
+#endif
{
.desc = "Validate PERF_RECORD_* events & perf_sample fields",
.func = test__PERF_RECORD,
@@ -1412,7 +1587,5 @@
if (symbol__init() < 0)
return -1;
- setup_pager();
-
return __cmd_test(argc, argv);
}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index dd162aa..d869b21 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -64,7 +64,6 @@
#include <linux/unistd.h>
#include <linux/types.h>
-
void get_term_dimensions(struct winsize *ws)
{
char *s = getenv("LINES");
@@ -544,10 +543,20 @@
static void *display_thread_tui(void *arg)
{
+ struct perf_evsel *pos;
struct perf_top *top = arg;
const char *help = "For a higher level overview, try: perf top --sort comm,dso";
perf_top__sort_new_samples(top);
+
+ /*
+ * Initialize the uid_filter_str, in the future the TUI will allow
+ * Zooming in/out UIDs. For now juse use whatever the user passed
+ * via --uid.
+ */
+ list_for_each_entry(pos, &top->evlist->entries, node)
+ pos->hists.uid_filter_str = top->uid_str;
+
perf_evlist__tui_browse_hists(top->evlist, help,
perf_top__sort_new_samples,
top, top->delay_secs);
@@ -956,7 +965,7 @@
if (ret)
goto out_delete;
- if (top->target_tid != -1)
+ if (top->target_tid != -1 || top->uid != UINT_MAX)
perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
perf_event__process,
&top->session->host_machine);
@@ -1096,6 +1105,7 @@
.delay_secs = 2,
.target_pid = -1,
.target_tid = -1,
+ .uid = UINT_MAX,
.freq = 1000, /* 1 KHz */
.sample_id_all_avail = true,
.mmap_pages = 128,
@@ -1169,6 +1179,7 @@
"Display raw encoding of assembly instructions (default)"),
OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
"Specify disassembler style (e.g. -M intel for intel syntax)"),
+ OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"),
OPT_END()
};
@@ -1194,6 +1205,10 @@
setup_browser(false);
+ top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid);
+ if (top.uid_str != NULL && top.uid == UINT_MAX - 1)
+ goto out_delete_evlist;
+
/* CPU and PID are mutually exclusive */
if (top.target_tid > 0 && top.cpu_list) {
printf("WARNING: PID switch overriding CPU\n");
@@ -1205,7 +1220,7 @@
top.target_tid = top.target_pid;
if (perf_evlist__create_maps(top.evlist, top.target_pid,
- top.target_tid, top.cpu_list) < 0)
+ top.target_tid, top.uid, top.cpu_list) < 0)
usage_with_options(top_usage, options);
if (!top.evlist->nr_entries &&
@@ -1269,6 +1284,7 @@
status = __cmd_top(&top);
+out_delete_evlist:
perf_evlist__delete(top.evlist);
return status;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 64f8bee..92af168 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -188,6 +188,7 @@
struct perf_record_opts {
pid_t target_pid;
pid_t target_tid;
+ uid_t uid;
bool call_graph;
bool group;
bool inherit_stat;
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 6893eec..adc72f0 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -166,6 +166,17 @@
return cpus;
}
+size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp)
+{
+ int i;
+ size_t printed = fprintf(fp, "%d cpu%s: ",
+ map->nr, map->nr > 1 ? "s" : "");
+ for (i = 0; i < map->nr; ++i)
+ printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]);
+
+ return printed + fprintf(fp, "\n");
+}
+
struct cpu_map *cpu_map__dummy_new(void)
{
struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 072c0a3..c415185 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -1,6 +1,8 @@
#ifndef __PERF_CPUMAP_H
#define __PERF_CPUMAP_H
+#include <stdio.h>
+
struct cpu_map {
int nr;
int map[];
@@ -10,4 +12,6 @@
struct cpu_map *cpu_map__dummy_new(void);
void cpu_map__delete(struct cpu_map *map);
+size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
+
#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 3f16e08..a6d50e3 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -594,14 +594,14 @@
}
int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
- pid_t target_tid, const char *cpu_list)
+ pid_t target_tid, uid_t uid, const char *cpu_list)
{
- evlist->threads = thread_map__new(target_pid, target_tid);
+ evlist->threads = thread_map__new(target_pid, target_tid, uid);
if (evlist->threads == NULL)
return -1;
- if (cpu_list == NULL && target_tid != -1)
+ if (uid != UINT_MAX || (cpu_list == NULL && target_tid != -1))
evlist->cpus = cpu_map__dummy_new();
else
evlist->cpus = cpu_map__new(cpu_list);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 8922aee..9c51660 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -107,7 +107,7 @@
}
int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
- pid_t target_tid, const char *cpu_list);
+ pid_t tid, uid_t uid, const char *cpu_list);
void perf_evlist__delete_maps(struct perf_evlist *evlist);
int perf_evlist__set_filters(struct perf_evlist *evlist);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index f55f0a8d..0d48613 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -55,6 +55,7 @@
u64 nr_entries;
const struct thread *thread_filter;
const struct dso *dso_filter;
+ const char *uid_filter_str;
pthread_mutex_t lock;
struct events_stats stats;
u64 event_stream;
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
index bb4198e..afe3819 100644
--- a/tools/perf/util/include/asm/dwarf2.h
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -2,10 +2,12 @@
#ifndef PERF_DWARF2_H
#define PERF_DWARF2_H
-/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */
#define CFI_STARTPROC
#define CFI_ENDPROC
+#define CFI_REMEMBER_STATE
+#define CFI_RESTORE_STATE
#endif /* PERF_DWARF2_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 29cb654..b9bbdd2 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1729,7 +1729,7 @@
}
ret = 0;
- printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":");
+ printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
for (i = 0; i < ntevs; i++) {
tev = &tevs[i];
if (pev->event)
@@ -1784,7 +1784,7 @@
if (ret >= 0) {
/* Show how to use the event. */
- printf("\nYou can now use it on all perf tools, such as:\n\n");
+ printf("\nYou can now use it in all perf tools, such as:\n\n");
printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group,
tev->event);
}
@@ -1959,7 +1959,7 @@
goto error;
}
- printf("Remove event: %s\n", ent->s);
+ printf("Removed event: %s\n", ent->s);
return 0;
error:
pr_warning("Failed to delete event: %s\n", strerror(-ret));
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 9dd47a4..e03b58a 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -425,14 +425,14 @@
static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
PyObject *args, PyObject *kwargs)
{
- static char *kwlist[] = { "pid", "tid", NULL };
- int pid = -1, tid = -1;
+ static char *kwlist[] = { "pid", "tid", "uid", NULL };
+ int pid = -1, tid = -1, uid = UINT_MAX;
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii",
- kwlist, &pid, &tid))
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii",
+ kwlist, &pid, &tid, &uid))
return -1;
- pthreads->threads = thread_map__new(pid, tid);
+ pthreads->threads = thread_map__new(pid, tid, uid);
if (pthreads->threads == NULL)
return -1;
return 0;
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index a5df131..3d4b6c5 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -1,6 +1,11 @@
#include <dirent.h>
+#include <limits.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
#include "thread_map.h"
/* Skip "." and ".." directories */
@@ -23,7 +28,7 @@
sprintf(name, "/proc/%d/task", pid);
items = scandir(name, &namelist, filter, NULL);
if (items <= 0)
- return NULL;
+ return NULL;
threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
if (threads != NULL) {
@@ -51,10 +56,99 @@
return threads;
}
-struct thread_map *thread_map__new(pid_t pid, pid_t tid)
+struct thread_map *thread_map__new_by_uid(uid_t uid)
+{
+ DIR *proc;
+ int max_threads = 32, items, i;
+ char path[256];
+ struct dirent dirent, *next, **namelist = NULL;
+ struct thread_map *threads = malloc(sizeof(*threads) +
+ max_threads * sizeof(pid_t));
+ if (threads == NULL)
+ goto out;
+
+ proc = opendir("/proc");
+ if (proc == NULL)
+ goto out_free_threads;
+
+ threads->nr = 0;
+
+ while (!readdir_r(proc, &dirent, &next) && next) {
+ char *end;
+ bool grow = false;
+ struct stat st;
+ pid_t pid = strtol(dirent.d_name, &end, 10);
+
+ if (*end) /* only interested in proper numerical dirents */
+ continue;
+
+ snprintf(path, sizeof(path), "/proc/%s", dirent.d_name);
+
+ if (stat(path, &st) != 0)
+ continue;
+
+ if (st.st_uid != uid)
+ continue;
+
+ snprintf(path, sizeof(path), "/proc/%d/task", pid);
+ items = scandir(path, &namelist, filter, NULL);
+ if (items <= 0)
+ goto out_free_closedir;
+
+ while (threads->nr + items >= max_threads) {
+ max_threads *= 2;
+ grow = true;
+ }
+
+ if (grow) {
+ struct thread_map *tmp;
+
+ tmp = realloc(threads, (sizeof(*threads) +
+ max_threads * sizeof(pid_t)));
+ if (tmp == NULL)
+ goto out_free_namelist;
+
+ threads = tmp;
+ }
+
+ for (i = 0; i < items; i++)
+ threads->map[threads->nr + i] = atoi(namelist[i]->d_name);
+
+ for (i = 0; i < items; i++)
+ free(namelist[i]);
+ free(namelist);
+
+ threads->nr += items;
+ }
+
+out_closedir:
+ closedir(proc);
+out:
+ return threads;
+
+out_free_threads:
+ free(threads);
+ return NULL;
+
+out_free_namelist:
+ for (i = 0; i < items; i++)
+ free(namelist[i]);
+ free(namelist);
+
+out_free_closedir:
+ free(threads);
+ threads = NULL;
+ goto out_closedir;
+}
+
+struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
{
if (pid != -1)
return thread_map__new_by_pid(pid);
+
+ if (tid == -1 && uid != UINT_MAX)
+ return thread_map__new_by_uid(uid);
+
return thread_map__new_by_tid(tid);
}
@@ -62,3 +156,14 @@
{
free(threads);
}
+
+size_t thread_map__fprintf(struct thread_map *threads, FILE *fp)
+{
+ int i;
+ size_t printed = fprintf(fp, "%d thread%s: ",
+ threads->nr, threads->nr > 1 ? "s" : "");
+ for (i = 0; i < threads->nr; ++i)
+ printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]);
+
+ return printed + fprintf(fp, "\n");
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 3cb9073..c75ddba 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -2,6 +2,7 @@
#define __PERF_THREAD_MAP_H
#include <sys/types.h>
+#include <stdio.h>
struct thread_map {
int nr;
@@ -10,6 +11,10 @@
struct thread_map *thread_map__new_by_pid(pid_t pid);
struct thread_map *thread_map__new_by_tid(pid_t tid);
-struct thread_map *thread_map__new(pid_t pid, pid_t tid);
+struct thread_map *thread_map__new_by_uid(uid_t uid);
+struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
void thread_map__delete(struct thread_map *threads);
+
+size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
+
#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 500471d..e4370ca 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -75,6 +75,9 @@
else if (top->target_tid != -1)
ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d",
top->target_tid);
+ else if (top->uid_str != NULL)
+ ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
+ top->uid_str);
else
ret += SNPRINTF(bf + ret, size - ret, " (all");
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index a248f3c..def3e53 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -24,6 +24,7 @@
int print_entries, count_filter, delay_secs;
int freq;
pid_t target_pid, target_tid;
+ uid_t uid;
bool hide_kernel_symbols, hide_user_symbols, zero;
bool system_wide;
bool use_tui, use_stdio;
@@ -45,6 +46,7 @@
int realtime_prio;
int sym_pcnt_filter;
const char *sym_filter;
+ const char *uid_str;
};
size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index e81aef1..bfba049 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -839,6 +839,9 @@
nr_events = convert_unit(nr_events, &unit);
printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name);
+ if (self->uid_filter_str)
+ printed += snprintf(bf + printed, size - printed,
+ ", UID: %s", self->uid_filter_str);
if (thread)
printed += snprintf(bf + printed, size - printed,
", Thread: %s(%d)",
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index d76d1c0..d0c0139 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -7,6 +7,7 @@
* Copyright (C) Linus Torvalds, 2005
*/
#include "util.h"
+#include "debug.h"
static void report(const char *prefix, const char *err, va_list params)
{
@@ -81,3 +82,41 @@
warn_routine(warn, params);
va_end(params);
}
+
+uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid)
+{
+ struct passwd pwd, *result;
+ char buf[1024];
+
+ if (str == NULL)
+ return UINT_MAX;
+
+ /* CPU and PID are mutually exclusive */
+ if (tid > 0 || pid > 0) {
+ ui__warning("PID/TID switch overriding UID\n");
+ sleep(1);
+ return UINT_MAX;
+ }
+
+ getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
+
+ if (result == NULL) {
+ char *endptr;
+ int uid = strtol(str, &endptr, 10);
+
+ if (*endptr != '\0') {
+ ui__error("Invalid user %s\n", str);
+ return UINT_MAX - 1;
+ }
+
+ getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
+
+ if (result == NULL) {
+ ui__error("Problems obtaining information for user %s\n",
+ str);
+ return UINT_MAX - 1;
+ }
+ }
+
+ return result->pw_uid;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index ecf9898..232d17e 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -245,6 +245,8 @@
void event_attr_init(struct perf_event_attr *attr);
+uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid);
+
#define _STR(x) #x
#define STR(x) _STR(x)