perf: Optimize the hotpath by converting the perf output buffer to local_t
Since there is now only a single writer, we can use
local_t instead and avoid all these pesky LOCK insn.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f1f853a..ce76676 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -485,6 +485,7 @@
#include <linux/ftrace.h>
#include <linux/cpu.h>
#include <asm/atomic.h>
+#include <asm/local.h>
#define PERF_MAX_STACK_DEPTH 255
@@ -588,20 +589,18 @@
#ifdef CONFIG_PERF_USE_VMALLOC
struct work_struct work;
#endif
- int data_order;
+ int data_order; /* allocation order */
int nr_pages; /* nr of data pages */
int writable; /* are we writable */
int nr_locked; /* nr pages mlocked */
atomic_t poll; /* POLL_ for wakeups */
- atomic_t events; /* event_id limit */
- atomic_long_t head; /* write position */
-
- atomic_t wakeup; /* needs a wakeup */
- atomic_t lost; /* nr records lost */
-
- atomic_t nest; /* nested writers */
+ local_t head; /* write position */
+ local_t nest; /* nested writers */
+ local_t events; /* event limit */
+ local_t wakeup; /* needs a wakeup */
+ local_t lost; /* nr records lost */
long watermark; /* wakeup watermark */