blob: 41ca394feb22f4e920cbbf711e201b8e1f5ee903 [file] [log] [blame]
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001/*
2 * Generic ring buffer
3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/ring_buffer.h>
Ingo Molnar14131f22009-02-26 18:47:11 +01007#include <linux/trace_clock.h>
Steven Rostedt78d904b2009-02-05 18:43:07 -05008#include <linux/ftrace_irq.h>
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04009#include <linux/spinlock.h>
10#include <linux/debugfs.h>
11#include <linux/uaccess.h>
Steven Rostedta81bd802009-02-06 01:45:16 -050012#include <linux/hardirq.h>
Vegard Nossum1744a212009-02-28 08:29:44 +010013#include <linux/kmemcheck.h>
Steven Rostedt7a8e76a2008-09-29 23:02:38 -040014#include <linux/module.h>
15#include <linux/percpu.h>
16#include <linux/mutex.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090017#include <linux/slab.h>
Steven Rostedt7a8e76a2008-09-29 23:02:38 -040018#include <linux/init.h>
19#include <linux/hash.h>
20#include <linux/list.h>
Steven Rostedt554f7862009-03-11 22:00:13 -040021#include <linux/cpu.h>
Steven Rostedt7a8e76a2008-09-29 23:02:38 -040022#include <linux/fs.h>
23
Christoph Lameter79615762010-01-05 15:34:50 +090024#include <asm/local.h>
Steven Rostedt182e9f52008-11-03 23:15:56 -050025#include "trace.h"
26
Steven Rostedt033601a2008-11-21 12:41:55 -050027/*
Steven Rostedtd1b182a2009-04-15 16:53:47 -040028 * The ring buffer header is special. We must manually up keep it.
29 */
30int ring_buffer_print_entry_header(struct trace_seq *s)
31{
32 int ret;
33
Lai Jiangshan334d4162009-04-24 11:27:05 +080034 ret = trace_seq_printf(s, "# compressed entry header\n");
35 ret = trace_seq_printf(s, "\ttype_len : 5 bits\n");
Steven Rostedtd1b182a2009-04-15 16:53:47 -040036 ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n");
37 ret = trace_seq_printf(s, "\tarray : 32 bits\n");
38 ret = trace_seq_printf(s, "\n");
39 ret = trace_seq_printf(s, "\tpadding : type == %d\n",
40 RINGBUF_TYPE_PADDING);
41 ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
42 RINGBUF_TYPE_TIME_EXTEND);
Lai Jiangshan334d4162009-04-24 11:27:05 +080043 ret = trace_seq_printf(s, "\tdata max type_len == %d\n",
44 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
Steven Rostedtd1b182a2009-04-15 16:53:47 -040045
46 return ret;
47}
48
49/*
Steven Rostedt5cc98542009-03-12 22:24:17 -040050 * The ring buffer is made up of a list of pages. A separate list of pages is
51 * allocated for each CPU. A writer may only write to a buffer that is
52 * associated with the CPU it is currently executing on. A reader may read
53 * from any per cpu buffer.
54 *
55 * The reader is special. For each per cpu buffer, the reader has its own
56 * reader page. When a reader has read the entire reader page, this reader
57 * page is swapped with another page in the ring buffer.
58 *
59 * Now, as long as the writer is off the reader page, the reader can do what
60 * ever it wants with that page. The writer will never write to that page
61 * again (as long as it is out of the ring buffer).
62 *
63 * Here's some silly ASCII art.
64 *
65 * +------+
66 * |reader| RING BUFFER
67 * |page |
68 * +------+ +---+ +---+ +---+
69 * | |-->| |-->| |
70 * +---+ +---+ +---+
71 * ^ |
72 * | |
73 * +---------------+
74 *
75 *
76 * +------+
77 * |reader| RING BUFFER
78 * |page |------------------v
79 * +------+ +---+ +---+ +---+
80 * | |-->| |-->| |
81 * +---+ +---+ +---+
82 * ^ |
83 * | |
84 * +---------------+
85 *
86 *
87 * +------+
88 * |reader| RING BUFFER
89 * |page |------------------v
90 * +------+ +---+ +---+ +---+
91 * ^ | |-->| |-->| |
92 * | +---+ +---+ +---+
93 * | |
94 * | |
95 * +------------------------------+
96 *
97 *
98 * +------+
99 * |buffer| RING BUFFER
100 * |page |------------------v
101 * +------+ +---+ +---+ +---+
102 * ^ | | | |-->| |
103 * | New +---+ +---+ +---+
104 * | Reader------^ |
105 * | page |
106 * +------------------------------+
107 *
108 *
109 * After we make this swap, the reader can hand this page off to the splice
110 * code and be done with it. It can even allocate a new page if it needs to
111 * and swap that into the ring buffer.
112 *
113 * We will be using cmpxchg soon to make all this lockless.
114 *
115 */
116
117/*
Steven Rostedt033601a2008-11-21 12:41:55 -0500118 * A fast way to enable or disable all ring buffers is to
119 * call tracing_on or tracing_off. Turning off the ring buffers
120 * prevents all ring buffers from being recorded to.
121 * Turning this switch on, makes it OK to write to the
122 * ring buffer, if the ring buffer is enabled itself.
123 *
124 * There's three layers that must be on in order to write
125 * to the ring buffer.
126 *
127 * 1) This global flag must be set.
128 * 2) The ring buffer must be enabled for recording.
129 * 3) The per cpu buffer must be enabled for recording.
130 *
131 * In case of an anomaly, this global flag has a bit set that
132 * will permantly disable all ring buffers.
133 */
134
135/*
136 * Global flag to disable all recording to ring buffers
137 * This has two bits: ON, DISABLED
138 *
139 * ON DISABLED
140 * ---- ----------
141 * 0 0 : ring buffers are off
142 * 1 0 : ring buffers are on
143 * X 1 : ring buffers are permanently disabled
144 */
145
146enum {
147 RB_BUFFERS_ON_BIT = 0,
148 RB_BUFFERS_DISABLED_BIT = 1,
149};
150
151enum {
152 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
153 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
154};
155
Hannes Eder5e398412009-02-10 19:44:34 +0100156static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
Steven Rostedta3583242008-11-11 15:01:42 -0500157
Steven Rostedt474d32b2009-03-03 19:51:40 -0500158#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
159
Steven Rostedta3583242008-11-11 15:01:42 -0500160/**
161 * tracing_on - enable all tracing buffers
162 *
163 * This function enables all tracing buffers that may have been
164 * disabled with tracing_off.
165 */
166void tracing_on(void)
167{
Steven Rostedt033601a2008-11-21 12:41:55 -0500168 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
Steven Rostedta3583242008-11-11 15:01:42 -0500169}
Robert Richterc4f50182008-12-11 16:49:22 +0100170EXPORT_SYMBOL_GPL(tracing_on);
Steven Rostedta3583242008-11-11 15:01:42 -0500171
172/**
173 * tracing_off - turn off all tracing buffers
174 *
175 * This function stops all tracing buffers from recording data.
176 * It does not disable any overhead the tracers themselves may
177 * be causing. This function simply causes all recording to
178 * the ring buffers to fail.
179 */
180void tracing_off(void)
181{
Steven Rostedt033601a2008-11-21 12:41:55 -0500182 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
183}
Robert Richterc4f50182008-12-11 16:49:22 +0100184EXPORT_SYMBOL_GPL(tracing_off);
Steven Rostedt033601a2008-11-21 12:41:55 -0500185
186/**
187 * tracing_off_permanent - permanently disable ring buffers
188 *
189 * This function, once called, will disable all ring buffers
Wenji Huangc3706f02009-02-10 01:03:18 -0500190 * permanently.
Steven Rostedt033601a2008-11-21 12:41:55 -0500191 */
192void tracing_off_permanent(void)
193{
194 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
Steven Rostedta3583242008-11-11 15:01:42 -0500195}
196
Steven Rostedt988ae9d2009-02-14 19:17:02 -0500197/**
198 * tracing_is_on - show state of ring buffers enabled
199 */
200int tracing_is_on(void)
201{
202 return ring_buffer_flags == RB_BUFFERS_ON;
203}
204EXPORT_SYMBOL_GPL(tracing_is_on);
205
Steven Rostedte3d6bf02009-03-03 13:53:07 -0500206#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
Andrew Morton67d34722009-01-09 12:27:09 -0800207#define RB_ALIGNMENT 4U
Lai Jiangshan334d4162009-04-24 11:27:05 +0800208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
Steven Rostedtc7b09302009-06-11 11:12:00 -0400209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
Lai Jiangshan334d4162009-04-24 11:27:05 +0800210
Steven Rostedt22710482010-03-18 17:54:19 -0400211#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
212# define RB_FORCE_8BYTE_ALIGNMENT 0
213# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
214#else
215# define RB_FORCE_8BYTE_ALIGNMENT 1
216# define RB_ARCH_ALIGNMENT 8U
217#endif
218
Lai Jiangshan334d4162009-04-24 11:27:05 +0800219/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
220#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400221
222enum {
223 RB_LEN_TIME_EXTEND = 8,
224 RB_LEN_TIME_STAMP = 16,
225};
226
Tom Zanussi2d622712009-03-22 03:30:49 -0500227static inline int rb_null_event(struct ring_buffer_event *event)
228{
Steven Rostedta1863c22009-09-03 10:23:58 -0400229 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
Tom Zanussi2d622712009-03-22 03:30:49 -0500230}
231
232static void rb_event_set_padding(struct ring_buffer_event *event)
233{
Steven Rostedta1863c22009-09-03 10:23:58 -0400234 /* padding has a NULL time_delta */
Lai Jiangshan334d4162009-04-24 11:27:05 +0800235 event->type_len = RINGBUF_TYPE_PADDING;
Tom Zanussi2d622712009-03-22 03:30:49 -0500236 event->time_delta = 0;
237}
238
Tom Zanussi2d622712009-03-22 03:30:49 -0500239static unsigned
240rb_event_data_length(struct ring_buffer_event *event)
241{
242 unsigned length;
243
Lai Jiangshan334d4162009-04-24 11:27:05 +0800244 if (event->type_len)
245 length = event->type_len * RB_ALIGNMENT;
Tom Zanussi2d622712009-03-22 03:30:49 -0500246 else
247 length = event->array[0];
248 return length + RB_EVNT_HDR_SIZE;
249}
250
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400251/* inline for ring buffer fast paths */
Andrew Morton34a148b2009-01-09 12:27:09 -0800252static unsigned
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400253rb_event_length(struct ring_buffer_event *event)
254{
Lai Jiangshan334d4162009-04-24 11:27:05 +0800255 switch (event->type_len) {
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400256 case RINGBUF_TYPE_PADDING:
Tom Zanussi2d622712009-03-22 03:30:49 -0500257 if (rb_null_event(event))
258 /* undefined */
259 return -1;
Lai Jiangshan334d4162009-04-24 11:27:05 +0800260 return event->array[0] + RB_EVNT_HDR_SIZE;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400261
262 case RINGBUF_TYPE_TIME_EXTEND:
263 return RB_LEN_TIME_EXTEND;
264
265 case RINGBUF_TYPE_TIME_STAMP:
266 return RB_LEN_TIME_STAMP;
267
268 case RINGBUF_TYPE_DATA:
Tom Zanussi2d622712009-03-22 03:30:49 -0500269 return rb_event_data_length(event);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400270 default:
271 BUG();
272 }
273 /* not hit */
274 return 0;
275}
276
277/**
278 * ring_buffer_event_length - return the length of the event
279 * @event: the event to get the length of
280 */
281unsigned ring_buffer_event_length(struct ring_buffer_event *event)
282{
Robert Richter465634a2009-01-07 15:32:11 +0100283 unsigned length = rb_event_length(event);
Lai Jiangshan334d4162009-04-24 11:27:05 +0800284 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
Robert Richter465634a2009-01-07 15:32:11 +0100285 return length;
286 length -= RB_EVNT_HDR_SIZE;
287 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
288 length -= sizeof(event->array[0]);
289 return length;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400290}
Robert Richterc4f50182008-12-11 16:49:22 +0100291EXPORT_SYMBOL_GPL(ring_buffer_event_length);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400292
293/* inline for ring buffer fast paths */
Andrew Morton34a148b2009-01-09 12:27:09 -0800294static void *
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400295rb_event_data(struct ring_buffer_event *event)
296{
Lai Jiangshan334d4162009-04-24 11:27:05 +0800297 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400298 /* If length is in len field, then array[0] has the data */
Lai Jiangshan334d4162009-04-24 11:27:05 +0800299 if (event->type_len)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400300 return (void *)&event->array[0];
301 /* Otherwise length is in array[0] and array[1] has the data */
302 return (void *)&event->array[1];
303}
304
305/**
306 * ring_buffer_event_data - return the data of the event
307 * @event: the event to get the data from
308 */
309void *ring_buffer_event_data(struct ring_buffer_event *event)
310{
311 return rb_event_data(event);
312}
Robert Richterc4f50182008-12-11 16:49:22 +0100313EXPORT_SYMBOL_GPL(ring_buffer_event_data);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400314
315#define for_each_buffer_cpu(buffer, cpu) \
Rusty Russell9e01c1b2009-01-01 10:12:22 +1030316 for_each_cpu(cpu, buffer->cpumask)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400317
318#define TS_SHIFT 27
319#define TS_MASK ((1ULL << TS_SHIFT) - 1)
320#define TS_DELTA_TEST (~TS_MASK)
321
Steven Rostedtabc9b562008-12-02 15:34:06 -0500322struct buffer_data_page {
Steven Rostedte4c2ce82008-10-01 11:14:54 -0400323 u64 time_stamp; /* page time stamp */
Wenji Huangc3706f02009-02-10 01:03:18 -0500324 local_t commit; /* write committed index */
Steven Rostedtabc9b562008-12-02 15:34:06 -0500325 unsigned char data[]; /* data of buffer page */
326};
327
Steven Rostedt77ae3652009-03-27 11:00:29 -0400328/*
329 * Note, the buffer_page list must be first. The buffer pages
330 * are allocated in cache lines, which means that each buffer
331 * page will be at the beginning of a cache line, and thus
332 * the least significant bits will be zero. We use this to
333 * add flags in the list struct pointers, to make the ring buffer
334 * lockless.
335 */
Steven Rostedtabc9b562008-12-02 15:34:06 -0500336struct buffer_page {
Steven Rostedt778c55d2009-05-01 18:44:45 -0400337 struct list_head list; /* list of buffer pages */
Steven Rostedtabc9b562008-12-02 15:34:06 -0500338 local_t write; /* index for next write */
Steven Rostedt6f807ac2008-10-04 02:00:58 -0400339 unsigned read; /* index for next read */
Steven Rostedt778c55d2009-05-01 18:44:45 -0400340 local_t entries; /* entries on this page */
Steven Rostedtabc9b562008-12-02 15:34:06 -0500341 struct buffer_data_page *page; /* Actual data page */
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400342};
343
Steven Rostedt77ae3652009-03-27 11:00:29 -0400344/*
345 * The buffer page counters, write and entries, must be reset
346 * atomically when crossing page boundaries. To synchronize this
347 * update, two counters are inserted into the number. One is
348 * the actual counter for the write position or count on the page.
349 *
350 * The other is a counter of updaters. Before an update happens
351 * the update partition of the counter is incremented. This will
352 * allow the updater to update the counter atomically.
353 *
354 * The counter is 20 bits, and the state data is 12.
355 */
356#define RB_WRITE_MASK 0xfffff
357#define RB_WRITE_INTCNT (1 << 20)
358
Steven Rostedt044fa782008-12-02 23:50:03 -0500359static void rb_init_page(struct buffer_data_page *bpage)
Steven Rostedtabc9b562008-12-02 15:34:06 -0500360{
Steven Rostedt044fa782008-12-02 23:50:03 -0500361 local_set(&bpage->commit, 0);
Steven Rostedtabc9b562008-12-02 15:34:06 -0500362}
363
Steven Rostedt474d32b2009-03-03 19:51:40 -0500364/**
365 * ring_buffer_page_len - the size of data on the page.
366 * @page: The page to read
367 *
368 * Returns the amount of data on the page, including buffer page header.
369 */
Steven Rostedtef7a4a12009-03-03 00:27:49 -0500370size_t ring_buffer_page_len(void *page)
371{
Steven Rostedt474d32b2009-03-03 19:51:40 -0500372 return local_read(&((struct buffer_data_page *)page)->commit)
373 + BUF_PAGE_HDR_SIZE;
Steven Rostedtef7a4a12009-03-03 00:27:49 -0500374}
375
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400376/*
Steven Rostedted568292008-09-29 23:02:40 -0400377 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
378 * this issue out.
379 */
Andrew Morton34a148b2009-01-09 12:27:09 -0800380static void free_buffer_page(struct buffer_page *bpage)
Steven Rostedted568292008-09-29 23:02:40 -0400381{
Andrew Morton34a148b2009-01-09 12:27:09 -0800382 free_page((unsigned long)bpage->page);
Steven Rostedte4c2ce82008-10-01 11:14:54 -0400383 kfree(bpage);
Steven Rostedted568292008-09-29 23:02:40 -0400384}
385
386/*
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400387 * We need to fit the time_stamp delta into 27 bits.
388 */
389static inline int test_time_stamp(u64 delta)
390{
391 if (delta & TS_DELTA_TEST)
392 return 1;
393 return 0;
394}
395
Steven Rostedt474d32b2009-03-03 19:51:40 -0500396#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400397
Steven Rostedtbe957c42009-05-11 14:42:53 -0400398/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
399#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
400
Steven Rostedtea05b572009-06-03 09:30:10 -0400401/* Max number of timestamps that can fit on a page */
402#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP)
403
Steven Rostedtd1b182a2009-04-15 16:53:47 -0400404int ring_buffer_print_page_header(struct trace_seq *s)
405{
406 struct buffer_data_page field;
407 int ret;
408
409 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
Tom Zanussi26a50742009-10-06 01:09:50 -0500410 "offset:0;\tsize:%u;\tsigned:%u;\n",
411 (unsigned int)sizeof(field.time_stamp),
412 (unsigned int)is_signed_type(u64));
Steven Rostedtd1b182a2009-04-15 16:53:47 -0400413
414 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
Tom Zanussi26a50742009-10-06 01:09:50 -0500415 "offset:%u;\tsize:%u;\tsigned:%u;\n",
Steven Rostedtd1b182a2009-04-15 16:53:47 -0400416 (unsigned int)offsetof(typeof(field), commit),
Tom Zanussi26a50742009-10-06 01:09:50 -0500417 (unsigned int)sizeof(field.commit),
418 (unsigned int)is_signed_type(long));
Steven Rostedtd1b182a2009-04-15 16:53:47 -0400419
420 ret = trace_seq_printf(s, "\tfield: char data;\t"
Tom Zanussi26a50742009-10-06 01:09:50 -0500421 "offset:%u;\tsize:%u;\tsigned:%u;\n",
Steven Rostedtd1b182a2009-04-15 16:53:47 -0400422 (unsigned int)offsetof(typeof(field), data),
Tom Zanussi26a50742009-10-06 01:09:50 -0500423 (unsigned int)BUF_PAGE_SIZE,
424 (unsigned int)is_signed_type(char));
Steven Rostedtd1b182a2009-04-15 16:53:47 -0400425
426 return ret;
427}
428
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400429/*
430 * head_page == tail_page && head == tail then buffer is empty.
431 */
432struct ring_buffer_per_cpu {
433 int cpu;
434 struct ring_buffer *buffer;
Steven Rostedt77ae3652009-03-27 11:00:29 -0400435 spinlock_t reader_lock; /* serialize readers */
Thomas Gleixner445c8952009-12-02 19:49:50 +0100436 arch_spinlock_t lock;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400437 struct lock_class_key lock_key;
Steven Rostedt3adc54f2009-03-30 15:32:01 -0400438 struct list_head *pages;
Steven Rostedt6f807ac2008-10-04 02:00:58 -0400439 struct buffer_page *head_page; /* read from head */
440 struct buffer_page *tail_page; /* write to tail */
Wenji Huangc3706f02009-02-10 01:03:18 -0500441 struct buffer_page *commit_page; /* committed pages */
Steven Rostedtd7690412008-10-01 00:29:53 -0400442 struct buffer_page *reader_page;
Steven Rostedt77ae3652009-03-27 11:00:29 -0400443 local_t commit_overrun;
444 local_t overrun;
Steven Rostedte4906ef2009-04-30 20:49:44 -0400445 local_t entries;
Steven Rostedtfa743952009-06-16 12:37:57 -0400446 local_t committing;
447 local_t commits;
Steven Rostedt77ae3652009-03-27 11:00:29 -0400448 unsigned long read;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400449 u64 write_stamp;
450 u64 read_stamp;
451 atomic_t record_disabled;
452};
453
454struct ring_buffer {
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400455 unsigned pages;
456 unsigned flags;
457 int cpus;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400458 atomic_t record_disabled;
Arnaldo Carvalho de Melo00f62f62009-02-09 17:04:06 -0200459 cpumask_var_t cpumask;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400460
Peter Zijlstra1f8a6a12009-06-08 18:18:39 +0200461 struct lock_class_key *reader_lock_key;
462
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400463 struct mutex mutex;
464
465 struct ring_buffer_per_cpu **buffers;
Steven Rostedt554f7862009-03-11 22:00:13 -0400466
Steven Rostedt59222ef2009-03-12 11:46:03 -0400467#ifdef CONFIG_HOTPLUG_CPU
Steven Rostedt554f7862009-03-11 22:00:13 -0400468 struct notifier_block cpu_notify;
469#endif
Steven Rostedt37886f62009-03-17 17:22:06 -0400470 u64 (*clock)(void);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400471};
472
473struct ring_buffer_iter {
474 struct ring_buffer_per_cpu *cpu_buffer;
475 unsigned long head;
476 struct buffer_page *head_page;
Steven Rostedt492a74f2010-01-25 15:17:47 -0500477 struct buffer_page *cache_reader_page;
478 unsigned long cache_read;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400479 u64 read_stamp;
480};
481
Steven Rostedtf536aaf2008-11-10 23:07:30 -0500482/* buffer may be either ring_buffer or ring_buffer_per_cpu */
Steven Rostedt077c5402009-09-03 19:53:46 -0400483#define RB_WARN_ON(b, cond) \
484 ({ \
485 int _____ret = unlikely(cond); \
486 if (_____ret) { \
487 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
488 struct ring_buffer_per_cpu *__b = \
489 (void *)b; \
490 atomic_inc(&__b->buffer->record_disabled); \
491 } else \
492 atomic_inc(&b->record_disabled); \
493 WARN_ON(1); \
494 } \
495 _____ret; \
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -0500496 })
Steven Rostedtf536aaf2008-11-10 23:07:30 -0500497
Steven Rostedt37886f62009-03-17 17:22:06 -0400498/* Up this if you want to test the TIME_EXTENTS and normalization */
499#define DEBUG_SHIFT 0
500
Jiri Olsa6d3f1e12009-10-23 19:36:19 -0400501static inline u64 rb_time_stamp(struct ring_buffer *buffer)
Steven Rostedt88eb0122009-05-11 16:28:23 -0400502{
503 /* shift to debug/test normalization and TIME_EXTENTS */
504 return buffer->clock() << DEBUG_SHIFT;
505}
506
Steven Rostedt37886f62009-03-17 17:22:06 -0400507u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
508{
509 u64 time;
510
511 preempt_disable_notrace();
Jiri Olsa6d3f1e12009-10-23 19:36:19 -0400512 time = rb_time_stamp(buffer);
Steven Rostedt37886f62009-03-17 17:22:06 -0400513 preempt_enable_no_resched_notrace();
514
515 return time;
516}
517EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
518
519void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
520 int cpu, u64 *ts)
521{
522 /* Just stupid testing the normalize function and deltas */
523 *ts >>= DEBUG_SHIFT;
524}
525EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
526
Steven Rostedt77ae3652009-03-27 11:00:29 -0400527/*
528 * Making the ring buffer lockless makes things tricky.
529 * Although writes only happen on the CPU that they are on,
530 * and they only need to worry about interrupts. Reads can
531 * happen on any CPU.
532 *
533 * The reader page is always off the ring buffer, but when the
534 * reader finishes with a page, it needs to swap its page with
535 * a new one from the buffer. The reader needs to take from
536 * the head (writes go to the tail). But if a writer is in overwrite
537 * mode and wraps, it must push the head page forward.
538 *
539 * Here lies the problem.
540 *
541 * The reader must be careful to replace only the head page, and
542 * not another one. As described at the top of the file in the
543 * ASCII art, the reader sets its old page to point to the next
544 * page after head. It then sets the page after head to point to
545 * the old reader page. But if the writer moves the head page
546 * during this operation, the reader could end up with the tail.
547 *
548 * We use cmpxchg to help prevent this race. We also do something
549 * special with the page before head. We set the LSB to 1.
550 *
551 * When the writer must push the page forward, it will clear the
552 * bit that points to the head page, move the head, and then set
553 * the bit that points to the new head page.
554 *
555 * We also don't want an interrupt coming in and moving the head
556 * page on another writer. Thus we use the second LSB to catch
557 * that too. Thus:
558 *
559 * head->list->prev->next bit 1 bit 0
560 * ------- -------
561 * Normal page 0 0
562 * Points to head page 0 1
563 * New head page 1 0
564 *
565 * Note we can not trust the prev pointer of the head page, because:
566 *
567 * +----+ +-----+ +-----+
568 * | |------>| T |---X--->| N |
569 * | |<------| | | |
570 * +----+ +-----+ +-----+
571 * ^ ^ |
572 * | +-----+ | |
573 * +----------| R |----------+ |
574 * | |<-----------+
575 * +-----+
576 *
577 * Key: ---X--> HEAD flag set in pointer
578 * T Tail page
579 * R Reader page
580 * N Next page
581 *
582 * (see __rb_reserve_next() to see where this happens)
583 *
584 * What the above shows is that the reader just swapped out
585 * the reader page with a page in the buffer, but before it
586 * could make the new header point back to the new page added
587 * it was preempted by a writer. The writer moved forward onto
588 * the new page added by the reader and is about to move forward
589 * again.
590 *
591 * You can see, it is legitimate for the previous pointer of
592 * the head (or any page) not to point back to itself. But only
593 * temporarially.
594 */
595
596#define RB_PAGE_NORMAL 0UL
597#define RB_PAGE_HEAD 1UL
598#define RB_PAGE_UPDATE 2UL
599
600
601#define RB_FLAG_MASK 3UL
602
603/* PAGE_MOVED is not part of the mask */
604#define RB_PAGE_MOVED 4UL
605
606/*
607 * rb_list_head - remove any bit
608 */
609static struct list_head *rb_list_head(struct list_head *list)
610{
611 unsigned long val = (unsigned long)list;
612
613 return (struct list_head *)(val & ~RB_FLAG_MASK);
614}
615
616/*
Jiri Olsa6d3f1e12009-10-23 19:36:19 -0400617 * rb_is_head_page - test if the given page is the head page
Steven Rostedt77ae3652009-03-27 11:00:29 -0400618 *
619 * Because the reader may move the head_page pointer, we can
620 * not trust what the head page is (it may be pointing to
621 * the reader page). But if the next page is a header page,
622 * its flags will be non zero.
623 */
624static int inline
625rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
626 struct buffer_page *page, struct list_head *list)
627{
628 unsigned long val;
629
630 val = (unsigned long)list->next;
631
632 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
633 return RB_PAGE_MOVED;
634
635 return val & RB_FLAG_MASK;
636}
637
638/*
639 * rb_is_reader_page
640 *
641 * The unique thing about the reader page, is that, if the
642 * writer is ever on it, the previous pointer never points
643 * back to the reader page.
644 */
645static int rb_is_reader_page(struct buffer_page *page)
646{
647 struct list_head *list = page->list.prev;
648
649 return rb_list_head(list->next) != &page->list;
650}
651
652/*
653 * rb_set_list_to_head - set a list_head to be pointing to head.
654 */
655static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
656 struct list_head *list)
657{
658 unsigned long *ptr;
659
660 ptr = (unsigned long *)&list->next;
661 *ptr |= RB_PAGE_HEAD;
662 *ptr &= ~RB_PAGE_UPDATE;
663}
664
665/*
666 * rb_head_page_activate - sets up head page
667 */
668static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
669{
670 struct buffer_page *head;
671
672 head = cpu_buffer->head_page;
673 if (!head)
674 return;
675
676 /*
677 * Set the previous list pointer to have the HEAD flag.
678 */
679 rb_set_list_to_head(cpu_buffer, head->list.prev);
680}
681
682static void rb_list_head_clear(struct list_head *list)
683{
684 unsigned long *ptr = (unsigned long *)&list->next;
685
686 *ptr &= ~RB_FLAG_MASK;
687}
688
689/*
690 * rb_head_page_dactivate - clears head page ptr (for free list)
691 */
692static void
693rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
694{
695 struct list_head *hd;
696
697 /* Go through the whole list and clear any pointers found. */
698 rb_list_head_clear(cpu_buffer->pages);
699
700 list_for_each(hd, cpu_buffer->pages)
701 rb_list_head_clear(hd);
702}
703
704static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
705 struct buffer_page *head,
706 struct buffer_page *prev,
707 int old_flag, int new_flag)
708{
709 struct list_head *list;
710 unsigned long val = (unsigned long)&head->list;
711 unsigned long ret;
712
713 list = &prev->list;
714
715 val &= ~RB_FLAG_MASK;
716
Steven Rostedt08a40812009-09-14 09:31:35 -0400717 ret = cmpxchg((unsigned long *)&list->next,
718 val | old_flag, val | new_flag);
Steven Rostedt77ae3652009-03-27 11:00:29 -0400719
720 /* check if the reader took the page */
721 if ((ret & ~RB_FLAG_MASK) != val)
722 return RB_PAGE_MOVED;
723
724 return ret & RB_FLAG_MASK;
725}
726
727static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
728 struct buffer_page *head,
729 struct buffer_page *prev,
730 int old_flag)
731{
732 return rb_head_page_set(cpu_buffer, head, prev,
733 old_flag, RB_PAGE_UPDATE);
734}
735
736static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
737 struct buffer_page *head,
738 struct buffer_page *prev,
739 int old_flag)
740{
741 return rb_head_page_set(cpu_buffer, head, prev,
742 old_flag, RB_PAGE_HEAD);
743}
744
745static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
746 struct buffer_page *head,
747 struct buffer_page *prev,
748 int old_flag)
749{
750 return rb_head_page_set(cpu_buffer, head, prev,
751 old_flag, RB_PAGE_NORMAL);
752}
753
754static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
755 struct buffer_page **bpage)
756{
757 struct list_head *p = rb_list_head((*bpage)->list.next);
758
759 *bpage = list_entry(p, struct buffer_page, list);
760}
761
762static struct buffer_page *
763rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
764{
765 struct buffer_page *head;
766 struct buffer_page *page;
767 struct list_head *list;
768 int i;
769
770 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
771 return NULL;
772
773 /* sanity check */
774 list = cpu_buffer->pages;
775 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
776 return NULL;
777
778 page = head = cpu_buffer->head_page;
779 /*
780 * It is possible that the writer moves the header behind
781 * where we started, and we miss in one loop.
782 * A second loop should grab the header, but we'll do
783 * three loops just because I'm paranoid.
784 */
785 for (i = 0; i < 3; i++) {
786 do {
787 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
788 cpu_buffer->head_page = page;
789 return page;
790 }
791 rb_inc_page(cpu_buffer, &page);
792 } while (page != head);
793 }
794
795 RB_WARN_ON(cpu_buffer, 1);
796
797 return NULL;
798}
799
800static int rb_head_page_replace(struct buffer_page *old,
801 struct buffer_page *new)
802{
803 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
804 unsigned long val;
805 unsigned long ret;
806
807 val = *ptr & ~RB_FLAG_MASK;
808 val |= RB_PAGE_HEAD;
809
Steven Rostedt08a40812009-09-14 09:31:35 -0400810 ret = cmpxchg(ptr, val, (unsigned long)&new->list);
Steven Rostedt77ae3652009-03-27 11:00:29 -0400811
812 return ret == val;
813}
814
815/*
816 * rb_tail_page_update - move the tail page forward
817 *
818 * Returns 1 if moved tail page, 0 if someone else did.
819 */
820static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
821 struct buffer_page *tail_page,
822 struct buffer_page *next_page)
823{
824 struct buffer_page *old_tail;
825 unsigned long old_entries;
826 unsigned long old_write;
827 int ret = 0;
828
829 /*
830 * The tail page now needs to be moved forward.
831 *
832 * We need to reset the tail page, but without messing
833 * with possible erasing of data brought in by interrupts
834 * that have moved the tail page and are currently on it.
835 *
836 * We add a counter to the write field to denote this.
837 */
838 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
839 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
840
841 /*
842 * Just make sure we have seen our old_write and synchronize
843 * with any interrupts that come in.
844 */
845 barrier();
846
847 /*
848 * If the tail page is still the same as what we think
849 * it is, then it is up to us to update the tail
850 * pointer.
851 */
852 if (tail_page == cpu_buffer->tail_page) {
853 /* Zero the write counter */
854 unsigned long val = old_write & ~RB_WRITE_MASK;
855 unsigned long eval = old_entries & ~RB_WRITE_MASK;
856
857 /*
858 * This will only succeed if an interrupt did
859 * not come in and change it. In which case, we
860 * do not want to modify it.
Lai Jiangshanda706d82009-07-15 16:27:30 +0800861 *
862 * We add (void) to let the compiler know that we do not care
863 * about the return value of these functions. We use the
864 * cmpxchg to only update if an interrupt did not already
865 * do it for us. If the cmpxchg fails, we don't care.
Steven Rostedt77ae3652009-03-27 11:00:29 -0400866 */
Lai Jiangshanda706d82009-07-15 16:27:30 +0800867 (void)local_cmpxchg(&next_page->write, old_write, val);
868 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
Steven Rostedt77ae3652009-03-27 11:00:29 -0400869
870 /*
871 * No need to worry about races with clearing out the commit.
872 * it only can increment when a commit takes place. But that
873 * only happens in the outer most nested commit.
874 */
875 local_set(&next_page->page->commit, 0);
876
877 old_tail = cmpxchg(&cpu_buffer->tail_page,
878 tail_page, next_page);
879
880 if (old_tail == tail_page)
881 ret = 1;
882 }
883
884 return ret;
885}
886
887static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
888 struct buffer_page *bpage)
889{
890 unsigned long val = (unsigned long)bpage;
891
892 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
893 return 1;
894
895 return 0;
896}
897
898/**
899 * rb_check_list - make sure a pointer to a list has the last bits zero
900 */
901static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
902 struct list_head *list)
903{
904 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
905 return 1;
906 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
907 return 1;
908 return 0;
909}
910
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400911/**
912 * check_pages - integrity check of buffer pages
913 * @cpu_buffer: CPU buffer with pages to test
914 *
Wenji Huangc3706f02009-02-10 01:03:18 -0500915 * As a safety measure we check to make sure the data pages have not
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400916 * been corrupted.
917 */
918static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
919{
Steven Rostedt3adc54f2009-03-30 15:32:01 -0400920 struct list_head *head = cpu_buffer->pages;
Steven Rostedt044fa782008-12-02 23:50:03 -0500921 struct buffer_page *bpage, *tmp;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400922
Steven Rostedt77ae3652009-03-27 11:00:29 -0400923 rb_head_page_deactivate(cpu_buffer);
924
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -0500925 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
926 return -1;
927 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
928 return -1;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400929
Steven Rostedt77ae3652009-03-27 11:00:29 -0400930 if (rb_check_list(cpu_buffer, head))
931 return -1;
932
Steven Rostedt044fa782008-12-02 23:50:03 -0500933 list_for_each_entry_safe(bpage, tmp, head, list) {
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -0500934 if (RB_WARN_ON(cpu_buffer,
Steven Rostedt044fa782008-12-02 23:50:03 -0500935 bpage->list.next->prev != &bpage->list))
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -0500936 return -1;
937 if (RB_WARN_ON(cpu_buffer,
Steven Rostedt044fa782008-12-02 23:50:03 -0500938 bpage->list.prev->next != &bpage->list))
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -0500939 return -1;
Steven Rostedt77ae3652009-03-27 11:00:29 -0400940 if (rb_check_list(cpu_buffer, &bpage->list))
941 return -1;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400942 }
943
Steven Rostedt77ae3652009-03-27 11:00:29 -0400944 rb_head_page_activate(cpu_buffer);
945
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400946 return 0;
947}
948
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400949static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
950 unsigned nr_pages)
951{
Steven Rostedt044fa782008-12-02 23:50:03 -0500952 struct buffer_page *bpage, *tmp;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400953 unsigned long addr;
954 LIST_HEAD(pages);
955 unsigned i;
956
Steven Rostedt3adc54f2009-03-30 15:32:01 -0400957 WARN_ON(!nr_pages);
958
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400959 for (i = 0; i < nr_pages; i++) {
Steven Rostedt044fa782008-12-02 23:50:03 -0500960 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
Steven Rostedtaa1e0e3b2008-10-02 19:18:09 -0400961 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
Steven Rostedt044fa782008-12-02 23:50:03 -0500962 if (!bpage)
Steven Rostedte4c2ce82008-10-01 11:14:54 -0400963 goto free_pages;
Steven Rostedt77ae3652009-03-27 11:00:29 -0400964
965 rb_check_bpage(cpu_buffer, bpage);
966
Steven Rostedt044fa782008-12-02 23:50:03 -0500967 list_add(&bpage->list, &pages);
Steven Rostedte4c2ce82008-10-01 11:14:54 -0400968
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400969 addr = __get_free_page(GFP_KERNEL);
970 if (!addr)
971 goto free_pages;
Steven Rostedt044fa782008-12-02 23:50:03 -0500972 bpage->page = (void *)addr;
973 rb_init_page(bpage->page);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400974 }
975
Steven Rostedt3adc54f2009-03-30 15:32:01 -0400976 /*
977 * The ring buffer page list is a circular list that does not
978 * start and end with a list head. All page list items point to
979 * other pages.
980 */
981 cpu_buffer->pages = pages.next;
982 list_del(&pages);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400983
984 rb_check_pages(cpu_buffer);
985
986 return 0;
987
988 free_pages:
Steven Rostedt044fa782008-12-02 23:50:03 -0500989 list_for_each_entry_safe(bpage, tmp, &pages, list) {
990 list_del_init(&bpage->list);
991 free_buffer_page(bpage);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -0400992 }
993 return -ENOMEM;
994}
995
996static struct ring_buffer_per_cpu *
997rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
998{
999 struct ring_buffer_per_cpu *cpu_buffer;
Steven Rostedt044fa782008-12-02 23:50:03 -05001000 struct buffer_page *bpage;
Steven Rostedtd7690412008-10-01 00:29:53 -04001001 unsigned long addr;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001002 int ret;
1003
1004 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
1005 GFP_KERNEL, cpu_to_node(cpu));
1006 if (!cpu_buffer)
1007 return NULL;
1008
1009 cpu_buffer->cpu = cpu;
1010 cpu_buffer->buffer = buffer;
Steven Rostedtf83c9d02008-11-11 18:47:44 +01001011 spin_lock_init(&cpu_buffer->reader_lock);
Peter Zijlstra1f8a6a12009-06-08 18:18:39 +02001012 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
Thomas Gleixneredc35bd2009-12-03 12:38:57 +01001013 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001014
Steven Rostedt044fa782008-12-02 23:50:03 -05001015 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
Steven Rostedte4c2ce82008-10-01 11:14:54 -04001016 GFP_KERNEL, cpu_to_node(cpu));
Steven Rostedt044fa782008-12-02 23:50:03 -05001017 if (!bpage)
Steven Rostedte4c2ce82008-10-01 11:14:54 -04001018 goto fail_free_buffer;
1019
Steven Rostedt77ae3652009-03-27 11:00:29 -04001020 rb_check_bpage(cpu_buffer, bpage);
1021
Steven Rostedt044fa782008-12-02 23:50:03 -05001022 cpu_buffer->reader_page = bpage;
Steven Rostedtd7690412008-10-01 00:29:53 -04001023 addr = __get_free_page(GFP_KERNEL);
1024 if (!addr)
Steven Rostedte4c2ce82008-10-01 11:14:54 -04001025 goto fail_free_reader;
Steven Rostedt044fa782008-12-02 23:50:03 -05001026 bpage->page = (void *)addr;
1027 rb_init_page(bpage->page);
Steven Rostedte4c2ce82008-10-01 11:14:54 -04001028
Steven Rostedtd7690412008-10-01 00:29:53 -04001029 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
Steven Rostedtd7690412008-10-01 00:29:53 -04001030
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001031 ret = rb_allocate_pages(cpu_buffer, buffer->pages);
1032 if (ret < 0)
Steven Rostedtd7690412008-10-01 00:29:53 -04001033 goto fail_free_reader;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001034
1035 cpu_buffer->head_page
Steven Rostedt3adc54f2009-03-30 15:32:01 -04001036 = list_entry(cpu_buffer->pages, struct buffer_page, list);
Steven Rostedtbf41a152008-10-04 02:00:59 -04001037 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001038
Steven Rostedt77ae3652009-03-27 11:00:29 -04001039 rb_head_page_activate(cpu_buffer);
1040
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001041 return cpu_buffer;
1042
Steven Rostedtd7690412008-10-01 00:29:53 -04001043 fail_free_reader:
1044 free_buffer_page(cpu_buffer->reader_page);
1045
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001046 fail_free_buffer:
1047 kfree(cpu_buffer);
1048 return NULL;
1049}
1050
1051static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
1052{
Steven Rostedt3adc54f2009-03-30 15:32:01 -04001053 struct list_head *head = cpu_buffer->pages;
Steven Rostedt044fa782008-12-02 23:50:03 -05001054 struct buffer_page *bpage, *tmp;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001055
Steven Rostedtd7690412008-10-01 00:29:53 -04001056 free_buffer_page(cpu_buffer->reader_page);
1057
Steven Rostedt77ae3652009-03-27 11:00:29 -04001058 rb_head_page_deactivate(cpu_buffer);
1059
Steven Rostedt3adc54f2009-03-30 15:32:01 -04001060 if (head) {
1061 list_for_each_entry_safe(bpage, tmp, head, list) {
1062 list_del_init(&bpage->list);
1063 free_buffer_page(bpage);
1064 }
1065 bpage = list_entry(head, struct buffer_page, list);
Steven Rostedt044fa782008-12-02 23:50:03 -05001066 free_buffer_page(bpage);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001067 }
Steven Rostedt3adc54f2009-03-30 15:32:01 -04001068
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001069 kfree(cpu_buffer);
1070}
1071
Steven Rostedt59222ef2009-03-12 11:46:03 -04001072#ifdef CONFIG_HOTPLUG_CPU
Frederic Weisbecker09c9e842009-03-21 04:33:36 +01001073static int rb_cpu_notify(struct notifier_block *self,
1074 unsigned long action, void *hcpu);
Steven Rostedt554f7862009-03-11 22:00:13 -04001075#endif
1076
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001077/**
1078 * ring_buffer_alloc - allocate a new ring_buffer
Robert Richter68814b52008-11-24 12:24:12 +01001079 * @size: the size in bytes per cpu that is needed.
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001080 * @flags: attributes to set for the ring buffer.
1081 *
1082 * Currently the only flag that is available is the RB_FL_OVERWRITE
1083 * flag. This flag means that the buffer will overwrite old data
1084 * when the buffer wraps. If this flag is not set, the buffer will
1085 * drop data when the tail hits the head.
1086 */
Peter Zijlstra1f8a6a12009-06-08 18:18:39 +02001087struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1088 struct lock_class_key *key)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001089{
1090 struct ring_buffer *buffer;
1091 int bsize;
1092 int cpu;
1093
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001094 /* keep it in its own cache line */
1095 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
1096 GFP_KERNEL);
1097 if (!buffer)
1098 return NULL;
1099
Rusty Russell9e01c1b2009-01-01 10:12:22 +10301100 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1101 goto fail_free_buffer;
1102
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001103 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1104 buffer->flags = flags;
Steven Rostedt37886f62009-03-17 17:22:06 -04001105 buffer->clock = trace_clock_local;
Peter Zijlstra1f8a6a12009-06-08 18:18:39 +02001106 buffer->reader_lock_key = key;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001107
1108 /* need at least two pages */
Steven Rostedt5f78abe2009-06-17 14:11:10 -04001109 if (buffer->pages < 2)
1110 buffer->pages = 2;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001111
Frederic Weisbecker3bf832c2009-03-19 14:47:33 +01001112 /*
1113 * In case of non-hotplug cpu, if the ring-buffer is allocated
1114 * in early initcall, it will not be notified of secondary cpus.
1115 * In that off case, we need to allocate for all possible cpus.
1116 */
1117#ifdef CONFIG_HOTPLUG_CPU
Steven Rostedt554f7862009-03-11 22:00:13 -04001118 get_online_cpus();
1119 cpumask_copy(buffer->cpumask, cpu_online_mask);
Frederic Weisbecker3bf832c2009-03-19 14:47:33 +01001120#else
1121 cpumask_copy(buffer->cpumask, cpu_possible_mask);
1122#endif
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001123 buffer->cpus = nr_cpu_ids;
1124
1125 bsize = sizeof(void *) * nr_cpu_ids;
1126 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
1127 GFP_KERNEL);
1128 if (!buffer->buffers)
Rusty Russell9e01c1b2009-01-01 10:12:22 +10301129 goto fail_free_cpumask;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001130
1131 for_each_buffer_cpu(buffer, cpu) {
1132 buffer->buffers[cpu] =
1133 rb_allocate_cpu_buffer(buffer, cpu);
1134 if (!buffer->buffers[cpu])
1135 goto fail_free_buffers;
1136 }
1137
Steven Rostedt59222ef2009-03-12 11:46:03 -04001138#ifdef CONFIG_HOTPLUG_CPU
Steven Rostedt554f7862009-03-11 22:00:13 -04001139 buffer->cpu_notify.notifier_call = rb_cpu_notify;
1140 buffer->cpu_notify.priority = 0;
1141 register_cpu_notifier(&buffer->cpu_notify);
1142#endif
1143
1144 put_online_cpus();
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001145 mutex_init(&buffer->mutex);
1146
1147 return buffer;
1148
1149 fail_free_buffers:
1150 for_each_buffer_cpu(buffer, cpu) {
1151 if (buffer->buffers[cpu])
1152 rb_free_cpu_buffer(buffer->buffers[cpu]);
1153 }
1154 kfree(buffer->buffers);
1155
Rusty Russell9e01c1b2009-01-01 10:12:22 +10301156 fail_free_cpumask:
1157 free_cpumask_var(buffer->cpumask);
Steven Rostedt554f7862009-03-11 22:00:13 -04001158 put_online_cpus();
Rusty Russell9e01c1b2009-01-01 10:12:22 +10301159
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001160 fail_free_buffer:
1161 kfree(buffer);
1162 return NULL;
1163}
Peter Zijlstra1f8a6a12009-06-08 18:18:39 +02001164EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001165
1166/**
1167 * ring_buffer_free - free a ring buffer.
1168 * @buffer: the buffer to free.
1169 */
1170void
1171ring_buffer_free(struct ring_buffer *buffer)
1172{
1173 int cpu;
1174
Steven Rostedt554f7862009-03-11 22:00:13 -04001175 get_online_cpus();
1176
Steven Rostedt59222ef2009-03-12 11:46:03 -04001177#ifdef CONFIG_HOTPLUG_CPU
Steven Rostedt554f7862009-03-11 22:00:13 -04001178 unregister_cpu_notifier(&buffer->cpu_notify);
1179#endif
1180
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001181 for_each_buffer_cpu(buffer, cpu)
1182 rb_free_cpu_buffer(buffer->buffers[cpu]);
1183
Steven Rostedt554f7862009-03-11 22:00:13 -04001184 put_online_cpus();
1185
Eric Dumazetbd3f0222009-08-07 12:49:29 +02001186 kfree(buffer->buffers);
Rusty Russell9e01c1b2009-01-01 10:12:22 +10301187 free_cpumask_var(buffer->cpumask);
1188
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001189 kfree(buffer);
1190}
Robert Richterc4f50182008-12-11 16:49:22 +01001191EXPORT_SYMBOL_GPL(ring_buffer_free);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001192
Steven Rostedt37886f62009-03-17 17:22:06 -04001193void ring_buffer_set_clock(struct ring_buffer *buffer,
1194 u64 (*clock)(void))
1195{
1196 buffer->clock = clock;
1197}
1198
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001199static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1200
1201static void
1202rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1203{
Steven Rostedt044fa782008-12-02 23:50:03 -05001204 struct buffer_page *bpage;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001205 struct list_head *p;
1206 unsigned i;
1207
Lai Jiangshanf7112942009-11-03 19:42:45 +08001208 spin_lock_irq(&cpu_buffer->reader_lock);
Steven Rostedt77ae3652009-03-27 11:00:29 -04001209 rb_head_page_deactivate(cpu_buffer);
1210
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001211 for (i = 0; i < nr_pages; i++) {
Steven Rostedt3adc54f2009-03-30 15:32:01 -04001212 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
Julia Lawall292f60c2010-03-29 17:37:02 +02001213 goto out;
Steven Rostedt3adc54f2009-03-30 15:32:01 -04001214 p = cpu_buffer->pages->next;
Steven Rostedt044fa782008-12-02 23:50:03 -05001215 bpage = list_entry(p, struct buffer_page, list);
1216 list_del_init(&bpage->list);
1217 free_buffer_page(bpage);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001218 }
Steven Rostedt3adc54f2009-03-30 15:32:01 -04001219 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
Julia Lawall292f60c2010-03-29 17:37:02 +02001220 goto out;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001221
1222 rb_reset_cpu(cpu_buffer);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001223 rb_check_pages(cpu_buffer);
1224
Julia Lawall292f60c2010-03-29 17:37:02 +02001225out:
Steven Rostedtdd7f5942009-12-10 23:20:52 -05001226 spin_unlock_irq(&cpu_buffer->reader_lock);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001227}
1228
1229static void
1230rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1231 struct list_head *pages, unsigned nr_pages)
1232{
Steven Rostedt044fa782008-12-02 23:50:03 -05001233 struct buffer_page *bpage;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001234 struct list_head *p;
1235 unsigned i;
1236
Steven Rostedt77ae3652009-03-27 11:00:29 -04001237 spin_lock_irq(&cpu_buffer->reader_lock);
1238 rb_head_page_deactivate(cpu_buffer);
1239
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001240 for (i = 0; i < nr_pages; i++) {
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -05001241 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
Julia Lawall292f60c2010-03-29 17:37:02 +02001242 goto out;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001243 p = pages->next;
Steven Rostedt044fa782008-12-02 23:50:03 -05001244 bpage = list_entry(p, struct buffer_page, list);
1245 list_del_init(&bpage->list);
Steven Rostedt3adc54f2009-03-30 15:32:01 -04001246 list_add_tail(&bpage->list, cpu_buffer->pages);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001247 }
1248 rb_reset_cpu(cpu_buffer);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001249 rb_check_pages(cpu_buffer);
1250
Julia Lawall292f60c2010-03-29 17:37:02 +02001251out:
Steven Rostedtdd7f5942009-12-10 23:20:52 -05001252 spin_unlock_irq(&cpu_buffer->reader_lock);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001253}
1254
1255/**
1256 * ring_buffer_resize - resize the ring buffer
1257 * @buffer: the buffer to resize.
1258 * @size: the new size.
1259 *
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001260 * Minimum size is 2 * BUF_PAGE_SIZE.
1261 *
1262 * Returns -1 on failure.
1263 */
1264int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1265{
1266 struct ring_buffer_per_cpu *cpu_buffer;
1267 unsigned nr_pages, rm_pages, new_pages;
Steven Rostedt044fa782008-12-02 23:50:03 -05001268 struct buffer_page *bpage, *tmp;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001269 unsigned long buffer_size;
1270 unsigned long addr;
1271 LIST_HEAD(pages);
1272 int i, cpu;
1273
Ingo Molnaree51a1d2008-11-13 14:58:31 +01001274 /*
1275 * Always succeed at resizing a non-existent buffer:
1276 */
1277 if (!buffer)
1278 return size;
1279
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001280 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1281 size *= BUF_PAGE_SIZE;
1282 buffer_size = buffer->pages * BUF_PAGE_SIZE;
1283
1284 /* we need a minimum of two pages */
1285 if (size < BUF_PAGE_SIZE * 2)
1286 size = BUF_PAGE_SIZE * 2;
1287
1288 if (size == buffer_size)
1289 return size;
1290
Steven Rostedt18421012009-12-10 22:54:27 -05001291 atomic_inc(&buffer->record_disabled);
1292
1293 /* Make sure all writers are done with this buffer. */
1294 synchronize_sched();
1295
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001296 mutex_lock(&buffer->mutex);
Steven Rostedt554f7862009-03-11 22:00:13 -04001297 get_online_cpus();
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001298
1299 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1300
1301 if (size < buffer_size) {
1302
1303 /* easy case, just free pages */
Steven Rostedt554f7862009-03-11 22:00:13 -04001304 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages))
1305 goto out_fail;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001306
1307 rm_pages = buffer->pages - nr_pages;
1308
1309 for_each_buffer_cpu(buffer, cpu) {
1310 cpu_buffer = buffer->buffers[cpu];
1311 rb_remove_pages(cpu_buffer, rm_pages);
1312 }
1313 goto out;
1314 }
1315
1316 /*
1317 * This is a bit more difficult. We only want to add pages
1318 * when we can allocate enough for all CPUs. We do this
1319 * by allocating all the pages and storing them on a local
1320 * link list. If we succeed in our allocation, then we
1321 * add these pages to the cpu_buffers. Otherwise we just free
1322 * them all and return -ENOMEM;
1323 */
Steven Rostedt554f7862009-03-11 22:00:13 -04001324 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages))
1325 goto out_fail;
Steven Rostedtf536aaf2008-11-10 23:07:30 -05001326
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001327 new_pages = nr_pages - buffer->pages;
1328
1329 for_each_buffer_cpu(buffer, cpu) {
1330 for (i = 0; i < new_pages; i++) {
Steven Rostedt044fa782008-12-02 23:50:03 -05001331 bpage = kzalloc_node(ALIGN(sizeof(*bpage),
Steven Rostedte4c2ce82008-10-01 11:14:54 -04001332 cache_line_size()),
1333 GFP_KERNEL, cpu_to_node(cpu));
Steven Rostedt044fa782008-12-02 23:50:03 -05001334 if (!bpage)
Steven Rostedte4c2ce82008-10-01 11:14:54 -04001335 goto free_pages;
Steven Rostedt044fa782008-12-02 23:50:03 -05001336 list_add(&bpage->list, &pages);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001337 addr = __get_free_page(GFP_KERNEL);
1338 if (!addr)
1339 goto free_pages;
Steven Rostedt044fa782008-12-02 23:50:03 -05001340 bpage->page = (void *)addr;
1341 rb_init_page(bpage->page);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001342 }
1343 }
1344
1345 for_each_buffer_cpu(buffer, cpu) {
1346 cpu_buffer = buffer->buffers[cpu];
1347 rb_insert_pages(cpu_buffer, &pages, new_pages);
1348 }
1349
Steven Rostedt554f7862009-03-11 22:00:13 -04001350 if (RB_WARN_ON(buffer, !list_empty(&pages)))
1351 goto out_fail;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001352
1353 out:
1354 buffer->pages = nr_pages;
Steven Rostedt554f7862009-03-11 22:00:13 -04001355 put_online_cpus();
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001356 mutex_unlock(&buffer->mutex);
1357
Steven Rostedt18421012009-12-10 22:54:27 -05001358 atomic_dec(&buffer->record_disabled);
1359
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001360 return size;
1361
1362 free_pages:
Steven Rostedt044fa782008-12-02 23:50:03 -05001363 list_for_each_entry_safe(bpage, tmp, &pages, list) {
1364 list_del_init(&bpage->list);
1365 free_buffer_page(bpage);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001366 }
Steven Rostedt554f7862009-03-11 22:00:13 -04001367 put_online_cpus();
Vegard Nossum641d2f62008-11-18 19:22:13 +01001368 mutex_unlock(&buffer->mutex);
Steven Rostedt18421012009-12-10 22:54:27 -05001369 atomic_dec(&buffer->record_disabled);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001370 return -ENOMEM;
Steven Rostedt554f7862009-03-11 22:00:13 -04001371
1372 /*
1373 * Something went totally wrong, and we are too paranoid
1374 * to even clean up the mess.
1375 */
1376 out_fail:
1377 put_online_cpus();
1378 mutex_unlock(&buffer->mutex);
Steven Rostedt18421012009-12-10 22:54:27 -05001379 atomic_dec(&buffer->record_disabled);
Steven Rostedt554f7862009-03-11 22:00:13 -04001380 return -1;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001381}
Robert Richterc4f50182008-12-11 16:49:22 +01001382EXPORT_SYMBOL_GPL(ring_buffer_resize);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001383
Steven Rostedt8789a9e2008-12-02 15:34:07 -05001384static inline void *
Steven Rostedt044fa782008-12-02 23:50:03 -05001385__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
Steven Rostedt8789a9e2008-12-02 15:34:07 -05001386{
Steven Rostedt044fa782008-12-02 23:50:03 -05001387 return bpage->data + index;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05001388}
1389
Steven Rostedt044fa782008-12-02 23:50:03 -05001390static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001391{
Steven Rostedt044fa782008-12-02 23:50:03 -05001392 return bpage->page->data + index;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001393}
1394
1395static inline struct ring_buffer_event *
Steven Rostedtd7690412008-10-01 00:29:53 -04001396rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001397{
Steven Rostedt6f807ac2008-10-04 02:00:58 -04001398 return __rb_page_index(cpu_buffer->reader_page,
1399 cpu_buffer->reader_page->read);
1400}
1401
1402static inline struct ring_buffer_event *
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001403rb_iter_head_event(struct ring_buffer_iter *iter)
1404{
Steven Rostedt6f807ac2008-10-04 02:00:58 -04001405 return __rb_page_index(iter->head_page, iter->head);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001406}
1407
Steven Rostedt77ae3652009-03-27 11:00:29 -04001408static inline unsigned long rb_page_write(struct buffer_page *bpage)
Steven Rostedtbf41a152008-10-04 02:00:59 -04001409{
Steven Rostedt77ae3652009-03-27 11:00:29 -04001410 return local_read(&bpage->write) & RB_WRITE_MASK;
Steven Rostedtbf41a152008-10-04 02:00:59 -04001411}
1412
1413static inline unsigned rb_page_commit(struct buffer_page *bpage)
1414{
Steven Rostedtabc9b562008-12-02 15:34:06 -05001415 return local_read(&bpage->page->commit);
Steven Rostedtbf41a152008-10-04 02:00:59 -04001416}
1417
Steven Rostedt77ae3652009-03-27 11:00:29 -04001418static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1419{
1420 return local_read(&bpage->entries) & RB_WRITE_MASK;
1421}
1422
Steven Rostedtbf41a152008-10-04 02:00:59 -04001423/* Size is determined by what has been commited */
1424static inline unsigned rb_page_size(struct buffer_page *bpage)
1425{
1426 return rb_page_commit(bpage);
1427}
1428
1429static inline unsigned
1430rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
1431{
1432 return rb_page_commit(cpu_buffer->commit_page);
1433}
1434
Steven Rostedtbf41a152008-10-04 02:00:59 -04001435static inline unsigned
1436rb_event_index(struct ring_buffer_event *event)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001437{
Steven Rostedtbf41a152008-10-04 02:00:59 -04001438 unsigned long addr = (unsigned long)event;
1439
Steven Rostedt22f470f2009-06-11 09:29:58 -04001440 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001441}
1442
Steven Rostedt0f0c85f2009-05-11 16:08:00 -04001443static inline int
Steven Rostedtfa743952009-06-16 12:37:57 -04001444rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1445 struct ring_buffer_event *event)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001446{
Steven Rostedtbf41a152008-10-04 02:00:59 -04001447 unsigned long addr = (unsigned long)event;
1448 unsigned long index;
1449
1450 index = rb_event_index(event);
1451 addr &= PAGE_MASK;
1452
1453 return cpu_buffer->commit_page->page == (void *)addr &&
1454 rb_commit_index(cpu_buffer) == index;
1455}
1456
Andrew Morton34a148b2009-01-09 12:27:09 -08001457static void
Steven Rostedtbf41a152008-10-04 02:00:59 -04001458rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1459{
Steven Rostedt77ae3652009-03-27 11:00:29 -04001460 unsigned long max_count;
1461
Steven Rostedtbf41a152008-10-04 02:00:59 -04001462 /*
1463 * We only race with interrupts and NMIs on this CPU.
1464 * If we own the commit event, then we can commit
1465 * all others that interrupted us, since the interruptions
1466 * are in stack format (they finish before they come
1467 * back to us). This allows us to do a simple loop to
1468 * assign the commit to the tail.
1469 */
Steven Rostedta8ccf1d2008-12-23 11:32:24 -05001470 again:
Steven Rostedt77ae3652009-03-27 11:00:29 -04001471 max_count = cpu_buffer->buffer->pages * 100;
1472
Steven Rostedtbf41a152008-10-04 02:00:59 -04001473 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
Steven Rostedt77ae3652009-03-27 11:00:29 -04001474 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
1475 return;
1476 if (RB_WARN_ON(cpu_buffer,
1477 rb_is_reader_page(cpu_buffer->tail_page)))
1478 return;
1479 local_set(&cpu_buffer->commit_page->page->commit,
1480 rb_page_write(cpu_buffer->commit_page));
Steven Rostedtbf41a152008-10-04 02:00:59 -04001481 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
Steven Rostedtabc9b562008-12-02 15:34:06 -05001482 cpu_buffer->write_stamp =
1483 cpu_buffer->commit_page->page->time_stamp;
Steven Rostedtbf41a152008-10-04 02:00:59 -04001484 /* add barrier to keep gcc from optimizing too much */
1485 barrier();
1486 }
1487 while (rb_commit_index(cpu_buffer) !=
1488 rb_page_write(cpu_buffer->commit_page)) {
Steven Rostedt77ae3652009-03-27 11:00:29 -04001489
1490 local_set(&cpu_buffer->commit_page->page->commit,
1491 rb_page_write(cpu_buffer->commit_page));
1492 RB_WARN_ON(cpu_buffer,
1493 local_read(&cpu_buffer->commit_page->page->commit) &
1494 ~RB_WRITE_MASK);
Steven Rostedtbf41a152008-10-04 02:00:59 -04001495 barrier();
1496 }
Steven Rostedta8ccf1d2008-12-23 11:32:24 -05001497
1498 /* again, keep gcc from optimizing */
1499 barrier();
1500
1501 /*
1502 * If an interrupt came in just after the first while loop
1503 * and pushed the tail page forward, we will be left with
1504 * a dangling commit that will never go forward.
1505 */
1506 if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
1507 goto again;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001508}
1509
Steven Rostedtd7690412008-10-01 00:29:53 -04001510static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001511{
Steven Rostedtabc9b562008-12-02 15:34:06 -05001512 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
Steven Rostedt6f807ac2008-10-04 02:00:58 -04001513 cpu_buffer->reader_page->read = 0;
Steven Rostedtd7690412008-10-01 00:29:53 -04001514}
1515
Andrew Morton34a148b2009-01-09 12:27:09 -08001516static void rb_inc_iter(struct ring_buffer_iter *iter)
Steven Rostedtd7690412008-10-01 00:29:53 -04001517{
1518 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1519
1520 /*
1521 * The iterator could be on the reader page (it starts there).
1522 * But the head could have moved, since the reader was
1523 * found. Check for this case and assign the iterator
1524 * to the head page instead of next.
1525 */
1526 if (iter->head_page == cpu_buffer->reader_page)
Steven Rostedt77ae3652009-03-27 11:00:29 -04001527 iter->head_page = rb_set_head_page(cpu_buffer);
Steven Rostedtd7690412008-10-01 00:29:53 -04001528 else
1529 rb_inc_page(cpu_buffer, &iter->head_page);
1530
Steven Rostedtabc9b562008-12-02 15:34:06 -05001531 iter->read_stamp = iter->head_page->page->time_stamp;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001532 iter->head = 0;
1533}
1534
1535/**
1536 * ring_buffer_update_event - update event type and data
1537 * @event: the even to update
1538 * @type: the type of event
1539 * @length: the size of the event field in the ring buffer
1540 *
1541 * Update the type and data fields of the event. The length
1542 * is the actual size that is written to the ring buffer,
1543 * and with this, we can determine what to place into the
1544 * data field.
1545 */
Andrew Morton34a148b2009-01-09 12:27:09 -08001546static void
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001547rb_update_event(struct ring_buffer_event *event,
1548 unsigned type, unsigned length)
1549{
Lai Jiangshan334d4162009-04-24 11:27:05 +08001550 event->type_len = type;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001551
1552 switch (type) {
1553
1554 case RINGBUF_TYPE_PADDING:
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001555 case RINGBUF_TYPE_TIME_EXTEND:
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001556 case RINGBUF_TYPE_TIME_STAMP:
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001557 break;
1558
Lai Jiangshan334d4162009-04-24 11:27:05 +08001559 case 0:
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001560 length -= RB_EVNT_HDR_SIZE;
Steven Rostedt22710482010-03-18 17:54:19 -04001561 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001562 event->array[0] = length;
Lai Jiangshan334d4162009-04-24 11:27:05 +08001563 else
1564 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001565 break;
1566 default:
1567 BUG();
1568 }
1569}
1570
Steven Rostedt77ae3652009-03-27 11:00:29 -04001571/*
1572 * rb_handle_head_page - writer hit the head page
1573 *
1574 * Returns: +1 to retry page
1575 * 0 to continue
1576 * -1 on error
1577 */
1578static int
1579rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1580 struct buffer_page *tail_page,
1581 struct buffer_page *next_page)
1582{
1583 struct buffer_page *new_head;
1584 int entries;
1585 int type;
1586 int ret;
1587
1588 entries = rb_page_entries(next_page);
1589
1590 /*
1591 * The hard part is here. We need to move the head
1592 * forward, and protect against both readers on
1593 * other CPUs and writers coming in via interrupts.
1594 */
1595 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
1596 RB_PAGE_HEAD);
1597
1598 /*
1599 * type can be one of four:
1600 * NORMAL - an interrupt already moved it for us
1601 * HEAD - we are the first to get here.
1602 * UPDATE - we are the interrupt interrupting
1603 * a current move.
1604 * MOVED - a reader on another CPU moved the next
1605 * pointer to its reader page. Give up
1606 * and try again.
1607 */
1608
1609 switch (type) {
1610 case RB_PAGE_HEAD:
1611 /*
1612 * We changed the head to UPDATE, thus
1613 * it is our responsibility to update
1614 * the counters.
1615 */
1616 local_add(entries, &cpu_buffer->overrun);
1617
1618 /*
1619 * The entries will be zeroed out when we move the
1620 * tail page.
1621 */
1622
1623 /* still more to do */
1624 break;
1625
1626 case RB_PAGE_UPDATE:
1627 /*
1628 * This is an interrupt that interrupt the
1629 * previous update. Still more to do.
1630 */
1631 break;
1632 case RB_PAGE_NORMAL:
1633 /*
1634 * An interrupt came in before the update
1635 * and processed this for us.
1636 * Nothing left to do.
1637 */
1638 return 1;
1639 case RB_PAGE_MOVED:
1640 /*
1641 * The reader is on another CPU and just did
1642 * a swap with our next_page.
1643 * Try again.
1644 */
1645 return 1;
1646 default:
1647 RB_WARN_ON(cpu_buffer, 1); /* WTF??? */
1648 return -1;
1649 }
1650
1651 /*
1652 * Now that we are here, the old head pointer is
1653 * set to UPDATE. This will keep the reader from
1654 * swapping the head page with the reader page.
1655 * The reader (on another CPU) will spin till
1656 * we are finished.
1657 *
1658 * We just need to protect against interrupts
1659 * doing the job. We will set the next pointer
1660 * to HEAD. After that, we set the old pointer
1661 * to NORMAL, but only if it was HEAD before.
1662 * otherwise we are an interrupt, and only
1663 * want the outer most commit to reset it.
1664 */
1665 new_head = next_page;
1666 rb_inc_page(cpu_buffer, &new_head);
1667
1668 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
1669 RB_PAGE_NORMAL);
1670
1671 /*
1672 * Valid returns are:
1673 * HEAD - an interrupt came in and already set it.
1674 * NORMAL - One of two things:
1675 * 1) We really set it.
1676 * 2) A bunch of interrupts came in and moved
1677 * the page forward again.
1678 */
1679 switch (ret) {
1680 case RB_PAGE_HEAD:
1681 case RB_PAGE_NORMAL:
1682 /* OK */
1683 break;
1684 default:
1685 RB_WARN_ON(cpu_buffer, 1);
1686 return -1;
1687 }
1688
1689 /*
1690 * It is possible that an interrupt came in,
1691 * set the head up, then more interrupts came in
1692 * and moved it again. When we get back here,
1693 * the page would have been set to NORMAL but we
1694 * just set it back to HEAD.
1695 *
1696 * How do you detect this? Well, if that happened
1697 * the tail page would have moved.
1698 */
1699 if (ret == RB_PAGE_NORMAL) {
1700 /*
1701 * If the tail had moved passed next, then we need
1702 * to reset the pointer.
1703 */
1704 if (cpu_buffer->tail_page != tail_page &&
1705 cpu_buffer->tail_page != next_page)
1706 rb_head_page_set_normal(cpu_buffer, new_head,
1707 next_page,
1708 RB_PAGE_HEAD);
1709 }
1710
1711 /*
1712 * If this was the outer most commit (the one that
1713 * changed the original pointer from HEAD to UPDATE),
1714 * then it is up to us to reset it to NORMAL.
1715 */
1716 if (type == RB_PAGE_HEAD) {
1717 ret = rb_head_page_set_normal(cpu_buffer, next_page,
1718 tail_page,
1719 RB_PAGE_UPDATE);
1720 if (RB_WARN_ON(cpu_buffer,
1721 ret != RB_PAGE_UPDATE))
1722 return -1;
1723 }
1724
1725 return 0;
1726}
1727
Andrew Morton34a148b2009-01-09 12:27:09 -08001728static unsigned rb_calculate_event_length(unsigned length)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001729{
1730 struct ring_buffer_event event; /* Used only for sizeof array */
1731
1732 /* zero length can cause confusions */
1733 if (!length)
1734 length = 1;
1735
Steven Rostedt22710482010-03-18 17:54:19 -04001736 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001737 length += sizeof(event.array[0]);
1738
1739 length += RB_EVNT_HDR_SIZE;
Steven Rostedt22710482010-03-18 17:54:19 -04001740 length = ALIGN(length, RB_ARCH_ALIGNMENT);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001741
1742 return length;
1743}
1744
Steven Rostedtc7b09302009-06-11 11:12:00 -04001745static inline void
1746rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1747 struct buffer_page *tail_page,
1748 unsigned long tail, unsigned long length)
1749{
1750 struct ring_buffer_event *event;
1751
1752 /*
1753 * Only the event that crossed the page boundary
1754 * must fill the old tail_page with padding.
1755 */
1756 if (tail >= BUF_PAGE_SIZE) {
1757 local_sub(length, &tail_page->write);
1758 return;
1759 }
1760
1761 event = __rb_page_index(tail_page, tail);
Linus Torvaldsb0b70652009-06-20 10:56:46 -07001762 kmemcheck_annotate_bitfield(event, bitfield);
Steven Rostedtc7b09302009-06-11 11:12:00 -04001763
1764 /*
1765 * If this event is bigger than the minimum size, then
1766 * we need to be careful that we don't subtract the
1767 * write counter enough to allow another writer to slip
1768 * in on this page.
1769 * We put in a discarded commit instead, to make sure
1770 * that this space is not used again.
1771 *
1772 * If we are less than the minimum size, we don't need to
1773 * worry about it.
1774 */
1775 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
1776 /* No room for any events */
1777
1778 /* Mark the rest of the page with padding */
1779 rb_event_set_padding(event);
1780
1781 /* Set the write back to the previous setting */
1782 local_sub(length, &tail_page->write);
1783 return;
1784 }
1785
1786 /* Put in a discarded event */
1787 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
1788 event->type_len = RINGBUF_TYPE_PADDING;
1789 /* time delta must be non zero */
1790 event->time_delta = 1;
Steven Rostedtc7b09302009-06-11 11:12:00 -04001791
1792 /* Set write to end of buffer */
1793 length = (tail + length) - BUF_PAGE_SIZE;
1794 local_sub(length, &tail_page->write);
1795}
Steven Rostedt6634ff22009-05-06 15:30:07 -04001796
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001797static struct ring_buffer_event *
Steven Rostedt6634ff22009-05-06 15:30:07 -04001798rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1799 unsigned long length, unsigned long tail,
Steven Rostedt6634ff22009-05-06 15:30:07 -04001800 struct buffer_page *tail_page, u64 *ts)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001801{
Steven Rostedt5a50e332009-11-17 08:43:01 -05001802 struct buffer_page *commit_page = cpu_buffer->commit_page;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001803 struct ring_buffer *buffer = cpu_buffer->buffer;
Steven Rostedt77ae3652009-03-27 11:00:29 -04001804 struct buffer_page *next_page;
1805 int ret;
Steven Rostedtaa20ae82009-05-05 21:16:11 -04001806
1807 next_page = tail_page;
1808
Steven Rostedtaa20ae82009-05-05 21:16:11 -04001809 rb_inc_page(cpu_buffer, &next_page);
1810
Steven Rostedtaa20ae82009-05-05 21:16:11 -04001811 /*
1812 * If for some reason, we had an interrupt storm that made
1813 * it all the way around the buffer, bail, and warn
1814 * about it.
1815 */
1816 if (unlikely(next_page == commit_page)) {
Steven Rostedt77ae3652009-03-27 11:00:29 -04001817 local_inc(&cpu_buffer->commit_overrun);
Steven Rostedtaa20ae82009-05-05 21:16:11 -04001818 goto out_reset;
1819 }
1820
Steven Rostedt77ae3652009-03-27 11:00:29 -04001821 /*
1822 * This is where the fun begins!
1823 *
1824 * We are fighting against races between a reader that
1825 * could be on another CPU trying to swap its reader
1826 * page with the buffer head.
1827 *
1828 * We are also fighting against interrupts coming in and
1829 * moving the head or tail on us as well.
1830 *
1831 * If the next page is the head page then we have filled
1832 * the buffer, unless the commit page is still on the
1833 * reader page.
1834 */
1835 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
Steven Rostedtaa20ae82009-05-05 21:16:11 -04001836
Steven Rostedt77ae3652009-03-27 11:00:29 -04001837 /*
1838 * If the commit is not on the reader page, then
1839 * move the header page.
1840 */
1841 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
1842 /*
1843 * If we are not in overwrite mode,
1844 * this is easy, just stop here.
1845 */
1846 if (!(buffer->flags & RB_FL_OVERWRITE))
1847 goto out_reset;
Steven Rostedtaa20ae82009-05-05 21:16:11 -04001848
Steven Rostedt77ae3652009-03-27 11:00:29 -04001849 ret = rb_handle_head_page(cpu_buffer,
1850 tail_page,
1851 next_page);
1852 if (ret < 0)
1853 goto out_reset;
1854 if (ret)
1855 goto out_again;
1856 } else {
1857 /*
1858 * We need to be careful here too. The
1859 * commit page could still be on the reader
1860 * page. We could have a small buffer, and
1861 * have filled up the buffer with events
1862 * from interrupts and such, and wrapped.
1863 *
1864 * Note, if the tail page is also the on the
1865 * reader_page, we let it move out.
1866 */
1867 if (unlikely((cpu_buffer->commit_page !=
1868 cpu_buffer->tail_page) &&
1869 (cpu_buffer->commit_page ==
1870 cpu_buffer->reader_page))) {
1871 local_inc(&cpu_buffer->commit_overrun);
1872 goto out_reset;
1873 }
Steven Rostedtaa20ae82009-05-05 21:16:11 -04001874 }
1875 }
1876
Steven Rostedt77ae3652009-03-27 11:00:29 -04001877 ret = rb_tail_page_update(cpu_buffer, tail_page, next_page);
1878 if (ret) {
1879 /*
1880 * Nested commits always have zero deltas, so
1881 * just reread the time stamp
1882 */
Jiri Olsa6d3f1e12009-10-23 19:36:19 -04001883 *ts = rb_time_stamp(buffer);
Steven Rostedt77ae3652009-03-27 11:00:29 -04001884 next_page->page->time_stamp = *ts;
Steven Rostedtaa20ae82009-05-05 21:16:11 -04001885 }
1886
Steven Rostedt77ae3652009-03-27 11:00:29 -04001887 out_again:
Steven Rostedtaa20ae82009-05-05 21:16:11 -04001888
Steven Rostedt77ae3652009-03-27 11:00:29 -04001889 rb_reset_tail(cpu_buffer, tail_page, tail, length);
Steven Rostedtaa20ae82009-05-05 21:16:11 -04001890
1891 /* fail and let the caller try again */
1892 return ERR_PTR(-EAGAIN);
1893
Steven Rostedt45141d42009-02-12 13:19:48 -05001894 out_reset:
Lai Jiangshan6f3b3442009-01-12 11:06:18 +08001895 /* reset write */
Steven Rostedtc7b09302009-06-11 11:12:00 -04001896 rb_reset_tail(cpu_buffer, tail_page, tail, length);
Lai Jiangshan6f3b3442009-01-12 11:06:18 +08001897
Steven Rostedtbf41a152008-10-04 02:00:59 -04001898 return NULL;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001899}
1900
Steven Rostedt6634ff22009-05-06 15:30:07 -04001901static struct ring_buffer_event *
1902__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1903 unsigned type, unsigned long length, u64 *ts)
1904{
Steven Rostedt5a50e332009-11-17 08:43:01 -05001905 struct buffer_page *tail_page;
Steven Rostedt6634ff22009-05-06 15:30:07 -04001906 struct ring_buffer_event *event;
1907 unsigned long tail, write;
1908
Steven Rostedt6634ff22009-05-06 15:30:07 -04001909 tail_page = cpu_buffer->tail_page;
1910 write = local_add_return(length, &tail_page->write);
Steven Rostedt77ae3652009-03-27 11:00:29 -04001911
1912 /* set write to only the index of the write */
1913 write &= RB_WRITE_MASK;
Steven Rostedt6634ff22009-05-06 15:30:07 -04001914 tail = write - length;
1915
1916 /* See if we shot pass the end of this buffer page */
1917 if (write > BUF_PAGE_SIZE)
1918 return rb_move_tail(cpu_buffer, length, tail,
Steven Rostedt5a50e332009-11-17 08:43:01 -05001919 tail_page, ts);
Steven Rostedt6634ff22009-05-06 15:30:07 -04001920
1921 /* We reserved something on the buffer */
1922
Steven Rostedt6634ff22009-05-06 15:30:07 -04001923 event = __rb_page_index(tail_page, tail);
Vegard Nossum1744a212009-02-28 08:29:44 +01001924 kmemcheck_annotate_bitfield(event, bitfield);
Steven Rostedt6634ff22009-05-06 15:30:07 -04001925 rb_update_event(event, type, length);
1926
1927 /* The passed in type is zero for DATA */
1928 if (likely(!type))
1929 local_inc(&tail_page->entries);
1930
1931 /*
Steven Rostedtfa743952009-06-16 12:37:57 -04001932 * If this is the first commit on the page, then update
1933 * its timestamp.
Steven Rostedt6634ff22009-05-06 15:30:07 -04001934 */
Steven Rostedtfa743952009-06-16 12:37:57 -04001935 if (!tail)
1936 tail_page->page->time_stamp = *ts;
Steven Rostedt6634ff22009-05-06 15:30:07 -04001937
1938 return event;
1939}
1940
Steven Rostedtedd813bf2009-06-02 23:00:53 -04001941static inline int
1942rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1943 struct ring_buffer_event *event)
1944{
1945 unsigned long new_index, old_index;
1946 struct buffer_page *bpage;
1947 unsigned long index;
1948 unsigned long addr;
1949
1950 new_index = rb_event_index(event);
1951 old_index = new_index + rb_event_length(event);
1952 addr = (unsigned long)event;
1953 addr &= PAGE_MASK;
1954
1955 bpage = cpu_buffer->tail_page;
1956
1957 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
Steven Rostedt77ae3652009-03-27 11:00:29 -04001958 unsigned long write_mask =
1959 local_read(&bpage->write) & ~RB_WRITE_MASK;
Steven Rostedtedd813bf2009-06-02 23:00:53 -04001960 /*
1961 * This is on the tail page. It is possible that
1962 * a write could come in and move the tail page
1963 * and write to the next page. That is fine
1964 * because we just shorten what is on this page.
1965 */
Steven Rostedt77ae3652009-03-27 11:00:29 -04001966 old_index += write_mask;
1967 new_index += write_mask;
Steven Rostedtedd813bf2009-06-02 23:00:53 -04001968 index = local_cmpxchg(&bpage->write, old_index, new_index);
1969 if (index == old_index)
1970 return 1;
1971 }
1972
1973 /* could not discard */
1974 return 0;
1975}
1976
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001977static int
1978rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1979 u64 *ts, u64 *delta)
1980{
1981 struct ring_buffer_event *event;
1982 static int once;
Steven Rostedtbf41a152008-10-04 02:00:59 -04001983 int ret;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001984
1985 if (unlikely(*delta > (1ULL << 59) && !once++)) {
1986 printk(KERN_WARNING "Delta way too big! %llu"
1987 " ts=%llu write stamp = %llu\n",
Stephen Rothwelle2862c92008-10-27 17:43:28 +11001988 (unsigned long long)*delta,
1989 (unsigned long long)*ts,
1990 (unsigned long long)cpu_buffer->write_stamp);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04001991 WARN_ON(1);
1992 }
1993
1994 /*
1995 * The delta is too big, we to add a
1996 * new timestamp.
1997 */
1998 event = __rb_reserve_next(cpu_buffer,
1999 RINGBUF_TYPE_TIME_EXTEND,
2000 RB_LEN_TIME_EXTEND,
2001 ts);
2002 if (!event)
Steven Rostedtbf41a152008-10-04 02:00:59 -04002003 return -EBUSY;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002004
Steven Rostedtbf41a152008-10-04 02:00:59 -04002005 if (PTR_ERR(event) == -EAGAIN)
2006 return -EAGAIN;
2007
2008 /* Only a commited time event can update the write stamp */
Steven Rostedtfa743952009-06-16 12:37:57 -04002009 if (rb_event_is_commit(cpu_buffer, event)) {
Steven Rostedtbf41a152008-10-04 02:00:59 -04002010 /*
Steven Rostedtfa743952009-06-16 12:37:57 -04002011 * If this is the first on the page, then it was
2012 * updated with the page itself. Try to discard it
2013 * and if we can't just make it zero.
Steven Rostedtbf41a152008-10-04 02:00:59 -04002014 */
2015 if (rb_event_index(event)) {
2016 event->time_delta = *delta & TS_MASK;
2017 event->array[0] = *delta >> TS_SHIFT;
2018 } else {
Steven Rostedtea05b572009-06-03 09:30:10 -04002019 /* try to discard, since we do not need this */
2020 if (!rb_try_to_discard(cpu_buffer, event)) {
2021 /* nope, just zero it */
2022 event->time_delta = 0;
2023 event->array[0] = 0;
2024 }
Steven Rostedtbf41a152008-10-04 02:00:59 -04002025 }
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002026 cpu_buffer->write_stamp = *ts;
Steven Rostedtbf41a152008-10-04 02:00:59 -04002027 /* let the caller know this was the commit */
2028 ret = 1;
2029 } else {
Steven Rostedtedd813bf2009-06-02 23:00:53 -04002030 /* Try to discard the event */
2031 if (!rb_try_to_discard(cpu_buffer, event)) {
2032 /* Darn, this is just wasted space */
2033 event->time_delta = 0;
2034 event->array[0] = 0;
Steven Rostedtedd813bf2009-06-02 23:00:53 -04002035 }
Steven Rostedtf57a8a12009-06-05 14:11:30 -04002036 ret = 0;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002037 }
2038
Steven Rostedtbf41a152008-10-04 02:00:59 -04002039 *delta = 0;
2040
2041 return ret;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002042}
2043
Steven Rostedtfa743952009-06-16 12:37:57 -04002044static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2045{
2046 local_inc(&cpu_buffer->committing);
2047 local_inc(&cpu_buffer->commits);
2048}
2049
2050static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2051{
2052 unsigned long commits;
2053
2054 if (RB_WARN_ON(cpu_buffer,
2055 !local_read(&cpu_buffer->committing)))
2056 return;
2057
2058 again:
2059 commits = local_read(&cpu_buffer->commits);
2060 /* synchronize with interrupts */
2061 barrier();
2062 if (local_read(&cpu_buffer->committing) == 1)
2063 rb_set_commit_to_write(cpu_buffer);
2064
2065 local_dec(&cpu_buffer->committing);
2066
2067 /* synchronize with interrupts */
2068 barrier();
2069
2070 /*
2071 * Need to account for interrupts coming in between the
2072 * updating of the commit page and the clearing of the
2073 * committing counter.
2074 */
2075 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2076 !local_read(&cpu_buffer->committing)) {
2077 local_inc(&cpu_buffer->committing);
2078 goto again;
2079 }
2080}
2081
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002082static struct ring_buffer_event *
Steven Rostedt62f0b3e2009-09-04 14:11:34 -04002083rb_reserve_next_event(struct ring_buffer *buffer,
2084 struct ring_buffer_per_cpu *cpu_buffer,
Steven Rostedt1cd8d732009-05-11 14:08:09 -04002085 unsigned long length)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002086{
2087 struct ring_buffer_event *event;
Steven Rostedt168b6b12009-05-11 22:11:05 -04002088 u64 ts, delta = 0;
Steven Rostedtbf41a152008-10-04 02:00:59 -04002089 int commit = 0;
Steven Rostedt818e3dd2008-10-31 09:58:35 -04002090 int nr_loops = 0;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002091
Steven Rostedtfa743952009-06-16 12:37:57 -04002092 rb_start_commit(cpu_buffer);
2093
Steven Rostedt85bac322009-09-04 14:24:40 -04002094#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
Steven Rostedt62f0b3e2009-09-04 14:11:34 -04002095 /*
2096 * Due to the ability to swap a cpu buffer from a buffer
2097 * it is possible it was swapped before we committed.
2098 * (committing stops a swap). We check for it here and
2099 * if it happened, we have to fail the write.
2100 */
2101 barrier();
2102 if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
2103 local_dec(&cpu_buffer->committing);
2104 local_dec(&cpu_buffer->commits);
2105 return NULL;
2106 }
Steven Rostedt85bac322009-09-04 14:24:40 -04002107#endif
Steven Rostedt62f0b3e2009-09-04 14:11:34 -04002108
Steven Rostedtbe957c42009-05-11 14:42:53 -04002109 length = rb_calculate_event_length(length);
Steven Rostedtbf41a152008-10-04 02:00:59 -04002110 again:
Steven Rostedt818e3dd2008-10-31 09:58:35 -04002111 /*
2112 * We allow for interrupts to reenter here and do a trace.
2113 * If one does, it will cause this original code to loop
2114 * back here. Even with heavy interrupts happening, this
2115 * should only happen a few times in a row. If this happens
2116 * 1000 times in a row, there must be either an interrupt
2117 * storm or we have something buggy.
2118 * Bail!
2119 */
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -05002120 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
Steven Rostedtfa743952009-06-16 12:37:57 -04002121 goto out_fail;
Steven Rostedt818e3dd2008-10-31 09:58:35 -04002122
Jiri Olsa6d3f1e12009-10-23 19:36:19 -04002123 ts = rb_time_stamp(cpu_buffer->buffer);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002124
Steven Rostedtbf41a152008-10-04 02:00:59 -04002125 /*
2126 * Only the first commit can update the timestamp.
2127 * Yes there is a race here. If an interrupt comes in
2128 * just after the conditional and it traces too, then it
2129 * will also check the deltas. More than one timestamp may
2130 * also be made. But only the entry that did the actual
2131 * commit will be something other than zero.
2132 */
Steven Rostedt0f0c85f2009-05-11 16:08:00 -04002133 if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
2134 rb_page_write(cpu_buffer->tail_page) ==
2135 rb_commit_index(cpu_buffer))) {
Steven Rostedt168b6b12009-05-11 22:11:05 -04002136 u64 diff;
Steven Rostedtbf41a152008-10-04 02:00:59 -04002137
Steven Rostedt168b6b12009-05-11 22:11:05 -04002138 diff = ts - cpu_buffer->write_stamp;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002139
Steven Rostedt168b6b12009-05-11 22:11:05 -04002140 /* make sure this diff is calculated here */
Steven Rostedtbf41a152008-10-04 02:00:59 -04002141 barrier();
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002142
Steven Rostedtbf41a152008-10-04 02:00:59 -04002143 /* Did the write stamp get updated already? */
2144 if (unlikely(ts < cpu_buffer->write_stamp))
Steven Rostedt168b6b12009-05-11 22:11:05 -04002145 goto get_event;
Steven Rostedtbf41a152008-10-04 02:00:59 -04002146
Steven Rostedt168b6b12009-05-11 22:11:05 -04002147 delta = diff;
2148 if (unlikely(test_time_stamp(delta))) {
Steven Rostedtbf41a152008-10-04 02:00:59 -04002149
2150 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
Steven Rostedtbf41a152008-10-04 02:00:59 -04002151 if (commit == -EBUSY)
Steven Rostedtfa743952009-06-16 12:37:57 -04002152 goto out_fail;
Steven Rostedtbf41a152008-10-04 02:00:59 -04002153
2154 if (commit == -EAGAIN)
2155 goto again;
2156
2157 RB_WARN_ON(cpu_buffer, commit < 0);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002158 }
Steven Rostedt168b6b12009-05-11 22:11:05 -04002159 }
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002160
Steven Rostedt168b6b12009-05-11 22:11:05 -04002161 get_event:
Steven Rostedt1cd8d732009-05-11 14:08:09 -04002162 event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
Steven Rostedt168b6b12009-05-11 22:11:05 -04002163 if (unlikely(PTR_ERR(event) == -EAGAIN))
Steven Rostedtbf41a152008-10-04 02:00:59 -04002164 goto again;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002165
Steven Rostedtfa743952009-06-16 12:37:57 -04002166 if (!event)
2167 goto out_fail;
Steven Rostedtbf41a152008-10-04 02:00:59 -04002168
Steven Rostedtfa743952009-06-16 12:37:57 -04002169 if (!rb_event_is_commit(cpu_buffer, event))
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002170 delta = 0;
2171
2172 event->time_delta = delta;
2173
2174 return event;
Steven Rostedtfa743952009-06-16 12:37:57 -04002175
2176 out_fail:
2177 rb_end_commit(cpu_buffer);
2178 return NULL;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002179}
2180
Paul Mundt1155de42009-06-25 14:30:12 +09002181#ifdef CONFIG_TRACING
2182
Steven Rostedtaa18efb2009-04-20 16:16:11 -04002183#define TRACE_RECURSIVE_DEPTH 16
Steven Rostedt261842b2009-04-16 21:41:52 -04002184
2185static int trace_recursive_lock(void)
2186{
Steven Rostedtaa18efb2009-04-20 16:16:11 -04002187 current->trace_recursion++;
Steven Rostedt261842b2009-04-16 21:41:52 -04002188
Steven Rostedtaa18efb2009-04-20 16:16:11 -04002189 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
2190 return 0;
Steven Rostedt261842b2009-04-16 21:41:52 -04002191
Steven Rostedtaa18efb2009-04-20 16:16:11 -04002192 /* Disable all tracing before we do anything else */
2193 tracing_off_permanent();
Frederic Weisbeckere057a5e2009-04-19 23:38:12 +02002194
Steven Rostedt7d7d2b82009-04-27 12:37:49 -04002195 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
Steven Rostedtaa18efb2009-04-20 16:16:11 -04002196 "HC[%lu]:SC[%lu]:NMI[%lu]\n",
2197 current->trace_recursion,
2198 hardirq_count() >> HARDIRQ_SHIFT,
2199 softirq_count() >> SOFTIRQ_SHIFT,
2200 in_nmi());
Frederic Weisbeckere057a5e2009-04-19 23:38:12 +02002201
Steven Rostedtaa18efb2009-04-20 16:16:11 -04002202 WARN_ON_ONCE(1);
2203 return -1;
Steven Rostedt261842b2009-04-16 21:41:52 -04002204}
2205
2206static void trace_recursive_unlock(void)
2207{
Steven Rostedtaa18efb2009-04-20 16:16:11 -04002208 WARN_ON_ONCE(!current->trace_recursion);
Steven Rostedt261842b2009-04-16 21:41:52 -04002209
Steven Rostedtaa18efb2009-04-20 16:16:11 -04002210 current->trace_recursion--;
Steven Rostedt261842b2009-04-16 21:41:52 -04002211}
2212
Paul Mundt1155de42009-06-25 14:30:12 +09002213#else
2214
2215#define trace_recursive_lock() (0)
2216#define trace_recursive_unlock() do { } while (0)
2217
2218#endif
2219
Steven Rostedtbf41a152008-10-04 02:00:59 -04002220static DEFINE_PER_CPU(int, rb_need_resched);
2221
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002222/**
2223 * ring_buffer_lock_reserve - reserve a part of the buffer
2224 * @buffer: the ring buffer to reserve from
2225 * @length: the length of the data to reserve (excluding event header)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002226 *
2227 * Returns a reseverd event on the ring buffer to copy directly to.
2228 * The user of this interface will need to get the body to write into
2229 * and can use the ring_buffer_event_data() interface.
2230 *
2231 * The length is the length of the data needed, not the event length
2232 * which also includes the event header.
2233 *
2234 * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
2235 * If NULL is returned, then nothing has been allocated or locked.
2236 */
2237struct ring_buffer_event *
Arnaldo Carvalho de Melo0a987752009-02-05 16:12:56 -02002238ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002239{
2240 struct ring_buffer_per_cpu *cpu_buffer;
2241 struct ring_buffer_event *event;
Steven Rostedtbf41a152008-10-04 02:00:59 -04002242 int cpu, resched;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002243
Steven Rostedt033601a2008-11-21 12:41:55 -05002244 if (ring_buffer_flags != RB_BUFFERS_ON)
Steven Rostedta3583242008-11-11 15:01:42 -05002245 return NULL;
2246
Steven Rostedtbf41a152008-10-04 02:00:59 -04002247 /* If we are tracing schedule, we don't want to recurse */
Steven Rostedt182e9f52008-11-03 23:15:56 -05002248 resched = ftrace_preempt_disable();
Steven Rostedtbf41a152008-10-04 02:00:59 -04002249
Lai Jiangshan52fbe9c2010-03-08 14:50:43 +08002250 if (atomic_read(&buffer->record_disabled))
2251 goto out_nocheck;
2252
Steven Rostedt261842b2009-04-16 21:41:52 -04002253 if (trace_recursive_lock())
2254 goto out_nocheck;
2255
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002256 cpu = raw_smp_processor_id();
2257
Rusty Russell9e01c1b2009-01-01 10:12:22 +10302258 if (!cpumask_test_cpu(cpu, buffer->cpumask))
Steven Rostedtd7690412008-10-01 00:29:53 -04002259 goto out;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002260
2261 cpu_buffer = buffer->buffers[cpu];
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002262
2263 if (atomic_read(&cpu_buffer->record_disabled))
Steven Rostedtd7690412008-10-01 00:29:53 -04002264 goto out;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002265
Steven Rostedtbe957c42009-05-11 14:42:53 -04002266 if (length > BUF_MAX_DATA_SIZE)
Steven Rostedtbf41a152008-10-04 02:00:59 -04002267 goto out;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002268
Steven Rostedt62f0b3e2009-09-04 14:11:34 -04002269 event = rb_reserve_next_event(buffer, cpu_buffer, length);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002270 if (!event)
Steven Rostedtd7690412008-10-01 00:29:53 -04002271 goto out;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002272
Steven Rostedtbf41a152008-10-04 02:00:59 -04002273 /*
2274 * Need to store resched state on this cpu.
2275 * Only the first needs to.
2276 */
2277
2278 if (preempt_count() == 1)
2279 per_cpu(rb_need_resched, cpu) = resched;
2280
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002281 return event;
2282
Steven Rostedtd7690412008-10-01 00:29:53 -04002283 out:
Steven Rostedt261842b2009-04-16 21:41:52 -04002284 trace_recursive_unlock();
2285
2286 out_nocheck:
Steven Rostedt182e9f52008-11-03 23:15:56 -05002287 ftrace_preempt_enable(resched);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002288 return NULL;
2289}
Robert Richterc4f50182008-12-11 16:49:22 +01002290EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002291
Steven Rostedta1863c22009-09-03 10:23:58 -04002292static void
2293rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002294 struct ring_buffer_event *event)
2295{
Steven Rostedtfa743952009-06-16 12:37:57 -04002296 /*
2297 * The event first in the commit queue updates the
2298 * time stamp.
2299 */
2300 if (rb_event_is_commit(cpu_buffer, event))
2301 cpu_buffer->write_stamp += event->time_delta;
Steven Rostedta1863c22009-09-03 10:23:58 -04002302}
Steven Rostedtbf41a152008-10-04 02:00:59 -04002303
Steven Rostedta1863c22009-09-03 10:23:58 -04002304static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2305 struct ring_buffer_event *event)
2306{
2307 local_inc(&cpu_buffer->entries);
2308 rb_update_write_stamp(cpu_buffer, event);
Steven Rostedtfa743952009-06-16 12:37:57 -04002309 rb_end_commit(cpu_buffer);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002310}
2311
2312/**
2313 * ring_buffer_unlock_commit - commit a reserved
2314 * @buffer: The buffer to commit to
2315 * @event: The event pointer to commit.
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002316 *
2317 * This commits the data to the ring buffer, and releases any locks held.
2318 *
2319 * Must be paired with ring_buffer_lock_reserve.
2320 */
2321int ring_buffer_unlock_commit(struct ring_buffer *buffer,
Arnaldo Carvalho de Melo0a987752009-02-05 16:12:56 -02002322 struct ring_buffer_event *event)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002323{
2324 struct ring_buffer_per_cpu *cpu_buffer;
2325 int cpu = raw_smp_processor_id();
2326
2327 cpu_buffer = buffer->buffers[cpu];
2328
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002329 rb_commit(cpu_buffer, event);
2330
Steven Rostedt261842b2009-04-16 21:41:52 -04002331 trace_recursive_unlock();
2332
Steven Rostedtbf41a152008-10-04 02:00:59 -04002333 /*
2334 * Only the last preempt count needs to restore preemption.
2335 */
Steven Rostedt182e9f52008-11-03 23:15:56 -05002336 if (preempt_count() == 1)
2337 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
2338 else
Steven Rostedtbf41a152008-10-04 02:00:59 -04002339 preempt_enable_no_resched_notrace();
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002340
2341 return 0;
2342}
Robert Richterc4f50182008-12-11 16:49:22 +01002343EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002344
Frederic Weisbeckerf3b9aae2009-04-19 23:39:33 +02002345static inline void rb_event_discard(struct ring_buffer_event *event)
2346{
Lai Jiangshan334d4162009-04-24 11:27:05 +08002347 /* array[0] holds the actual length for the discarded event */
2348 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2349 event->type_len = RINGBUF_TYPE_PADDING;
Frederic Weisbeckerf3b9aae2009-04-19 23:39:33 +02002350 /* time delta must be non zero */
2351 if (!event->time_delta)
2352 event->time_delta = 1;
2353}
2354
Steven Rostedta1863c22009-09-03 10:23:58 -04002355/*
2356 * Decrement the entries to the page that an event is on.
2357 * The event does not even need to exist, only the pointer
2358 * to the page it is on. This may only be called before the commit
2359 * takes place.
2360 */
2361static inline void
2362rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
2363 struct ring_buffer_event *event)
2364{
2365 unsigned long addr = (unsigned long)event;
2366 struct buffer_page *bpage = cpu_buffer->commit_page;
2367 struct buffer_page *start;
2368
2369 addr &= PAGE_MASK;
2370
2371 /* Do the likely case first */
2372 if (likely(bpage->page == (void *)addr)) {
2373 local_dec(&bpage->entries);
2374 return;
2375 }
2376
2377 /*
2378 * Because the commit page may be on the reader page we
2379 * start with the next page and check the end loop there.
2380 */
2381 rb_inc_page(cpu_buffer, &bpage);
2382 start = bpage;
2383 do {
2384 if (bpage->page == (void *)addr) {
2385 local_dec(&bpage->entries);
2386 return;
2387 }
2388 rb_inc_page(cpu_buffer, &bpage);
2389 } while (bpage != start);
2390
2391 /* commit not part of this buffer?? */
2392 RB_WARN_ON(cpu_buffer, 1);
2393}
2394
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002395/**
Steven Rostedtfa1b47d2009-04-02 00:09:41 -04002396 * ring_buffer_commit_discard - discard an event that has not been committed
2397 * @buffer: the ring buffer
2398 * @event: non committed event to discard
2399 *
Steven Rostedtdc892f72009-09-03 15:33:41 -04002400 * Sometimes an event that is in the ring buffer needs to be ignored.
2401 * This function lets the user discard an event in the ring buffer
2402 * and then that event will not be read later.
2403 *
2404 * This function only works if it is called before the the item has been
2405 * committed. It will try to free the event from the ring buffer
Steven Rostedtfa1b47d2009-04-02 00:09:41 -04002406 * if another event has not been added behind it.
2407 *
2408 * If another event has been added behind it, it will set the event
2409 * up as discarded, and perform the commit.
2410 *
2411 * If this function is called, do not call ring_buffer_unlock_commit on
2412 * the event.
2413 */
2414void ring_buffer_discard_commit(struct ring_buffer *buffer,
2415 struct ring_buffer_event *event)
2416{
2417 struct ring_buffer_per_cpu *cpu_buffer;
Steven Rostedtfa1b47d2009-04-02 00:09:41 -04002418 int cpu;
2419
2420 /* The event is discarded regardless */
Frederic Weisbeckerf3b9aae2009-04-19 23:39:33 +02002421 rb_event_discard(event);
Steven Rostedtfa1b47d2009-04-02 00:09:41 -04002422
Steven Rostedtfa743952009-06-16 12:37:57 -04002423 cpu = smp_processor_id();
2424 cpu_buffer = buffer->buffers[cpu];
2425
Steven Rostedtfa1b47d2009-04-02 00:09:41 -04002426 /*
2427 * This must only be called if the event has not been
2428 * committed yet. Thus we can assume that preemption
2429 * is still disabled.
2430 */
Steven Rostedtfa743952009-06-16 12:37:57 -04002431 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
Steven Rostedtfa1b47d2009-04-02 00:09:41 -04002432
Steven Rostedta1863c22009-09-03 10:23:58 -04002433 rb_decrement_entry(cpu_buffer, event);
Steven Rostedt0f2541d2009-08-05 12:02:48 -04002434 if (rb_try_to_discard(cpu_buffer, event))
Steven Rostedtedd813bf2009-06-02 23:00:53 -04002435 goto out;
Steven Rostedtfa1b47d2009-04-02 00:09:41 -04002436
2437 /*
2438 * The commit is still visible by the reader, so we
Steven Rostedta1863c22009-09-03 10:23:58 -04002439 * must still update the timestamp.
Steven Rostedtfa1b47d2009-04-02 00:09:41 -04002440 */
Steven Rostedta1863c22009-09-03 10:23:58 -04002441 rb_update_write_stamp(cpu_buffer, event);
Steven Rostedtfa1b47d2009-04-02 00:09:41 -04002442 out:
Steven Rostedtfa743952009-06-16 12:37:57 -04002443 rb_end_commit(cpu_buffer);
Steven Rostedtfa1b47d2009-04-02 00:09:41 -04002444
Frederic Weisbeckerf3b9aae2009-04-19 23:39:33 +02002445 trace_recursive_unlock();
2446
Steven Rostedtfa1b47d2009-04-02 00:09:41 -04002447 /*
2448 * Only the last preempt count needs to restore preemption.
2449 */
2450 if (preempt_count() == 1)
2451 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
2452 else
2453 preempt_enable_no_resched_notrace();
2454
2455}
2456EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
2457
2458/**
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002459 * ring_buffer_write - write data to the buffer without reserving
2460 * @buffer: The ring buffer to write to.
2461 * @length: The length of the data being written (excluding the event header)
2462 * @data: The data to write to the buffer.
2463 *
2464 * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
2465 * one function. If you already have the data to write to the buffer, it
2466 * may be easier to simply call this function.
2467 *
2468 * Note, like ring_buffer_lock_reserve, the length is the length of the data
2469 * and not the length of the event which would hold the header.
2470 */
2471int ring_buffer_write(struct ring_buffer *buffer,
2472 unsigned long length,
2473 void *data)
2474{
2475 struct ring_buffer_per_cpu *cpu_buffer;
2476 struct ring_buffer_event *event;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002477 void *body;
2478 int ret = -EBUSY;
Steven Rostedtbf41a152008-10-04 02:00:59 -04002479 int cpu, resched;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002480
Steven Rostedt033601a2008-11-21 12:41:55 -05002481 if (ring_buffer_flags != RB_BUFFERS_ON)
Steven Rostedta3583242008-11-11 15:01:42 -05002482 return -EBUSY;
2483
Steven Rostedt182e9f52008-11-03 23:15:56 -05002484 resched = ftrace_preempt_disable();
Steven Rostedtbf41a152008-10-04 02:00:59 -04002485
Lai Jiangshan52fbe9c2010-03-08 14:50:43 +08002486 if (atomic_read(&buffer->record_disabled))
2487 goto out;
2488
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002489 cpu = raw_smp_processor_id();
2490
Rusty Russell9e01c1b2009-01-01 10:12:22 +10302491 if (!cpumask_test_cpu(cpu, buffer->cpumask))
Steven Rostedtd7690412008-10-01 00:29:53 -04002492 goto out;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002493
2494 cpu_buffer = buffer->buffers[cpu];
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002495
2496 if (atomic_read(&cpu_buffer->record_disabled))
2497 goto out;
2498
Steven Rostedtbe957c42009-05-11 14:42:53 -04002499 if (length > BUF_MAX_DATA_SIZE)
2500 goto out;
2501
Steven Rostedt62f0b3e2009-09-04 14:11:34 -04002502 event = rb_reserve_next_event(buffer, cpu_buffer, length);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002503 if (!event)
2504 goto out;
2505
2506 body = rb_event_data(event);
2507
2508 memcpy(body, data, length);
2509
2510 rb_commit(cpu_buffer, event);
2511
2512 ret = 0;
2513 out:
Steven Rostedt182e9f52008-11-03 23:15:56 -05002514 ftrace_preempt_enable(resched);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002515
2516 return ret;
2517}
Robert Richterc4f50182008-12-11 16:49:22 +01002518EXPORT_SYMBOL_GPL(ring_buffer_write);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002519
Andrew Morton34a148b2009-01-09 12:27:09 -08002520static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
Steven Rostedtbf41a152008-10-04 02:00:59 -04002521{
2522 struct buffer_page *reader = cpu_buffer->reader_page;
Steven Rostedt77ae3652009-03-27 11:00:29 -04002523 struct buffer_page *head = rb_set_head_page(cpu_buffer);
Steven Rostedtbf41a152008-10-04 02:00:59 -04002524 struct buffer_page *commit = cpu_buffer->commit_page;
2525
Steven Rostedt77ae3652009-03-27 11:00:29 -04002526 /* In case of error, head will be NULL */
2527 if (unlikely(!head))
2528 return 1;
2529
Steven Rostedtbf41a152008-10-04 02:00:59 -04002530 return reader->read == rb_page_commit(reader) &&
2531 (commit == reader ||
2532 (commit == head &&
2533 head->read == rb_page_commit(commit)));
2534}
2535
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002536/**
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002537 * ring_buffer_record_disable - stop all writes into the buffer
2538 * @buffer: The ring buffer to stop writes to.
2539 *
2540 * This prevents all writes to the buffer. Any attempt to write
2541 * to the buffer after this will fail and return NULL.
2542 *
2543 * The caller should call synchronize_sched() after this.
2544 */
2545void ring_buffer_record_disable(struct ring_buffer *buffer)
2546{
2547 atomic_inc(&buffer->record_disabled);
2548}
Robert Richterc4f50182008-12-11 16:49:22 +01002549EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002550
2551/**
2552 * ring_buffer_record_enable - enable writes to the buffer
2553 * @buffer: The ring buffer to enable writes
2554 *
2555 * Note, multiple disables will need the same number of enables
Adam Buchbinderc41b20e2009-12-11 16:35:39 -05002556 * to truly enable the writing (much like preempt_disable).
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002557 */
2558void ring_buffer_record_enable(struct ring_buffer *buffer)
2559{
2560 atomic_dec(&buffer->record_disabled);
2561}
Robert Richterc4f50182008-12-11 16:49:22 +01002562EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002563
2564/**
2565 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
2566 * @buffer: The ring buffer to stop writes to.
2567 * @cpu: The CPU buffer to stop
2568 *
2569 * This prevents all writes to the buffer. Any attempt to write
2570 * to the buffer after this will fail and return NULL.
2571 *
2572 * The caller should call synchronize_sched() after this.
2573 */
2574void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
2575{
2576 struct ring_buffer_per_cpu *cpu_buffer;
2577
Rusty Russell9e01c1b2009-01-01 10:12:22 +10302578 if (!cpumask_test_cpu(cpu, buffer->cpumask))
Steven Rostedt8aabee52009-03-12 13:13:49 -04002579 return;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002580
2581 cpu_buffer = buffer->buffers[cpu];
2582 atomic_inc(&cpu_buffer->record_disabled);
2583}
Robert Richterc4f50182008-12-11 16:49:22 +01002584EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002585
2586/**
2587 * ring_buffer_record_enable_cpu - enable writes to the buffer
2588 * @buffer: The ring buffer to enable writes
2589 * @cpu: The CPU to enable.
2590 *
2591 * Note, multiple disables will need the same number of enables
Adam Buchbinderc41b20e2009-12-11 16:35:39 -05002592 * to truly enable the writing (much like preempt_disable).
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002593 */
2594void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2595{
2596 struct ring_buffer_per_cpu *cpu_buffer;
2597
Rusty Russell9e01c1b2009-01-01 10:12:22 +10302598 if (!cpumask_test_cpu(cpu, buffer->cpumask))
Steven Rostedt8aabee52009-03-12 13:13:49 -04002599 return;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002600
2601 cpu_buffer = buffer->buffers[cpu];
2602 atomic_dec(&cpu_buffer->record_disabled);
2603}
Robert Richterc4f50182008-12-11 16:49:22 +01002604EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002605
2606/**
2607 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
2608 * @buffer: The ring buffer
2609 * @cpu: The per CPU buffer to get the entries from.
2610 */
2611unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
2612{
2613 struct ring_buffer_per_cpu *cpu_buffer;
Steven Rostedt8aabee52009-03-12 13:13:49 -04002614 unsigned long ret;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002615
Rusty Russell9e01c1b2009-01-01 10:12:22 +10302616 if (!cpumask_test_cpu(cpu, buffer->cpumask))
Steven Rostedt8aabee52009-03-12 13:13:49 -04002617 return 0;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002618
2619 cpu_buffer = buffer->buffers[cpu];
Steven Rostedt77ae3652009-03-27 11:00:29 -04002620 ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
Steven Rostedte4906ef2009-04-30 20:49:44 -04002621 - cpu_buffer->read;
Steven Rostedt554f7862009-03-11 22:00:13 -04002622
2623 return ret;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002624}
Robert Richterc4f50182008-12-11 16:49:22 +01002625EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002626
2627/**
2628 * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
2629 * @buffer: The ring buffer
2630 * @cpu: The per CPU buffer to get the number of overruns from
2631 */
2632unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
2633{
2634 struct ring_buffer_per_cpu *cpu_buffer;
Steven Rostedt8aabee52009-03-12 13:13:49 -04002635 unsigned long ret;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002636
Rusty Russell9e01c1b2009-01-01 10:12:22 +10302637 if (!cpumask_test_cpu(cpu, buffer->cpumask))
Steven Rostedt8aabee52009-03-12 13:13:49 -04002638 return 0;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002639
2640 cpu_buffer = buffer->buffers[cpu];
Steven Rostedt77ae3652009-03-27 11:00:29 -04002641 ret = local_read(&cpu_buffer->overrun);
Steven Rostedt554f7862009-03-11 22:00:13 -04002642
2643 return ret;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002644}
Robert Richterc4f50182008-12-11 16:49:22 +01002645EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002646
2647/**
Steven Rostedtf0d2c682009-04-29 13:43:37 -04002648 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
2649 * @buffer: The ring buffer
2650 * @cpu: The per CPU buffer to get the number of overruns from
2651 */
2652unsigned long
2653ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
2654{
2655 struct ring_buffer_per_cpu *cpu_buffer;
2656 unsigned long ret;
2657
2658 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2659 return 0;
2660
2661 cpu_buffer = buffer->buffers[cpu];
Steven Rostedt77ae3652009-03-27 11:00:29 -04002662 ret = local_read(&cpu_buffer->commit_overrun);
Steven Rostedtf0d2c682009-04-29 13:43:37 -04002663
2664 return ret;
2665}
2666EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
2667
2668/**
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002669 * ring_buffer_entries - get the number of entries in a buffer
2670 * @buffer: The ring buffer
2671 *
2672 * Returns the total number of entries in the ring buffer
2673 * (all CPU entries)
2674 */
2675unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2676{
2677 struct ring_buffer_per_cpu *cpu_buffer;
2678 unsigned long entries = 0;
2679 int cpu;
2680
2681 /* if you care about this being correct, lock the buffer */
2682 for_each_buffer_cpu(buffer, cpu) {
2683 cpu_buffer = buffer->buffers[cpu];
Steven Rostedte4906ef2009-04-30 20:49:44 -04002684 entries += (local_read(&cpu_buffer->entries) -
Steven Rostedt77ae3652009-03-27 11:00:29 -04002685 local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002686 }
2687
2688 return entries;
2689}
Robert Richterc4f50182008-12-11 16:49:22 +01002690EXPORT_SYMBOL_GPL(ring_buffer_entries);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002691
2692/**
Jiri Olsa67b394f2009-10-23 19:36:18 -04002693 * ring_buffer_overruns - get the number of overruns in buffer
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002694 * @buffer: The ring buffer
2695 *
2696 * Returns the total number of overruns in the ring buffer
2697 * (all CPU entries)
2698 */
2699unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
2700{
2701 struct ring_buffer_per_cpu *cpu_buffer;
2702 unsigned long overruns = 0;
2703 int cpu;
2704
2705 /* if you care about this being correct, lock the buffer */
2706 for_each_buffer_cpu(buffer, cpu) {
2707 cpu_buffer = buffer->buffers[cpu];
Steven Rostedt77ae3652009-03-27 11:00:29 -04002708 overruns += local_read(&cpu_buffer->overrun);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002709 }
2710
2711 return overruns;
2712}
Robert Richterc4f50182008-12-11 16:49:22 +01002713EXPORT_SYMBOL_GPL(ring_buffer_overruns);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002714
Steven Rostedt642edba2008-11-12 00:01:26 -05002715static void rb_iter_reset(struct ring_buffer_iter *iter)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002716{
2717 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2718
Steven Rostedtd7690412008-10-01 00:29:53 -04002719 /* Iterator usage is expected to have record disabled */
2720 if (list_empty(&cpu_buffer->reader_page->list)) {
Steven Rostedt77ae3652009-03-27 11:00:29 -04002721 iter->head_page = rb_set_head_page(cpu_buffer);
2722 if (unlikely(!iter->head_page))
2723 return;
2724 iter->head = iter->head_page->read;
Steven Rostedtd7690412008-10-01 00:29:53 -04002725 } else {
2726 iter->head_page = cpu_buffer->reader_page;
Steven Rostedt6f807ac2008-10-04 02:00:58 -04002727 iter->head = cpu_buffer->reader_page->read;
Steven Rostedtd7690412008-10-01 00:29:53 -04002728 }
2729 if (iter->head)
2730 iter->read_stamp = cpu_buffer->read_stamp;
2731 else
Steven Rostedtabc9b562008-12-02 15:34:06 -05002732 iter->read_stamp = iter->head_page->page->time_stamp;
Steven Rostedt492a74f2010-01-25 15:17:47 -05002733 iter->cache_reader_page = cpu_buffer->reader_page;
2734 iter->cache_read = cpu_buffer->read;
Steven Rostedt642edba2008-11-12 00:01:26 -05002735}
Steven Rostedtf83c9d02008-11-11 18:47:44 +01002736
Steven Rostedt642edba2008-11-12 00:01:26 -05002737/**
2738 * ring_buffer_iter_reset - reset an iterator
2739 * @iter: The iterator to reset
2740 *
2741 * Resets the iterator, so that it will start from the beginning
2742 * again.
2743 */
2744void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
2745{
Steven Rostedt554f7862009-03-11 22:00:13 -04002746 struct ring_buffer_per_cpu *cpu_buffer;
Steven Rostedt642edba2008-11-12 00:01:26 -05002747 unsigned long flags;
2748
Steven Rostedt554f7862009-03-11 22:00:13 -04002749 if (!iter)
2750 return;
2751
2752 cpu_buffer = iter->cpu_buffer;
2753
Steven Rostedt642edba2008-11-12 00:01:26 -05002754 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2755 rb_iter_reset(iter);
Steven Rostedtf83c9d02008-11-11 18:47:44 +01002756 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002757}
Robert Richterc4f50182008-12-11 16:49:22 +01002758EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002759
2760/**
2761 * ring_buffer_iter_empty - check if an iterator has no more to read
2762 * @iter: The iterator to check
2763 */
2764int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
2765{
2766 struct ring_buffer_per_cpu *cpu_buffer;
2767
2768 cpu_buffer = iter->cpu_buffer;
2769
Steven Rostedtbf41a152008-10-04 02:00:59 -04002770 return iter->head_page == cpu_buffer->commit_page &&
2771 iter->head == rb_commit_index(cpu_buffer);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002772}
Robert Richterc4f50182008-12-11 16:49:22 +01002773EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002774
2775static void
2776rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2777 struct ring_buffer_event *event)
2778{
2779 u64 delta;
2780
Lai Jiangshan334d4162009-04-24 11:27:05 +08002781 switch (event->type_len) {
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002782 case RINGBUF_TYPE_PADDING:
2783 return;
2784
2785 case RINGBUF_TYPE_TIME_EXTEND:
2786 delta = event->array[0];
2787 delta <<= TS_SHIFT;
2788 delta += event->time_delta;
2789 cpu_buffer->read_stamp += delta;
2790 return;
2791
2792 case RINGBUF_TYPE_TIME_STAMP:
2793 /* FIXME: not implemented */
2794 return;
2795
2796 case RINGBUF_TYPE_DATA:
2797 cpu_buffer->read_stamp += event->time_delta;
2798 return;
2799
2800 default:
2801 BUG();
2802 }
2803 return;
2804}
2805
2806static void
2807rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
2808 struct ring_buffer_event *event)
2809{
2810 u64 delta;
2811
Lai Jiangshan334d4162009-04-24 11:27:05 +08002812 switch (event->type_len) {
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002813 case RINGBUF_TYPE_PADDING:
2814 return;
2815
2816 case RINGBUF_TYPE_TIME_EXTEND:
2817 delta = event->array[0];
2818 delta <<= TS_SHIFT;
2819 delta += event->time_delta;
2820 iter->read_stamp += delta;
2821 return;
2822
2823 case RINGBUF_TYPE_TIME_STAMP:
2824 /* FIXME: not implemented */
2825 return;
2826
2827 case RINGBUF_TYPE_DATA:
2828 iter->read_stamp += event->time_delta;
2829 return;
2830
2831 default:
2832 BUG();
2833 }
2834 return;
2835}
2836
Steven Rostedtd7690412008-10-01 00:29:53 -04002837static struct buffer_page *
2838rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002839{
Steven Rostedtd7690412008-10-01 00:29:53 -04002840 struct buffer_page *reader = NULL;
2841 unsigned long flags;
Steven Rostedt818e3dd2008-10-31 09:58:35 -04002842 int nr_loops = 0;
Steven Rostedt77ae3652009-03-27 11:00:29 -04002843 int ret;
Steven Rostedtd7690412008-10-01 00:29:53 -04002844
Steven Rostedt3e03fb72008-11-06 00:09:43 -05002845 local_irq_save(flags);
Thomas Gleixner0199c4e2009-12-02 20:01:25 +01002846 arch_spin_lock(&cpu_buffer->lock);
Steven Rostedtd7690412008-10-01 00:29:53 -04002847
2848 again:
Steven Rostedt818e3dd2008-10-31 09:58:35 -04002849 /*
2850 * This should normally only loop twice. But because the
2851 * start of the reader inserts an empty page, it causes
2852 * a case where we will loop three times. There should be no
2853 * reason to loop four times (that I know of).
2854 */
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -05002855 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
Steven Rostedt818e3dd2008-10-31 09:58:35 -04002856 reader = NULL;
2857 goto out;
2858 }
2859
Steven Rostedtd7690412008-10-01 00:29:53 -04002860 reader = cpu_buffer->reader_page;
2861
2862 /* If there's more to read, return this page */
Steven Rostedtbf41a152008-10-04 02:00:59 -04002863 if (cpu_buffer->reader_page->read < rb_page_size(reader))
Steven Rostedtd7690412008-10-01 00:29:53 -04002864 goto out;
2865
2866 /* Never should we have an index greater than the size */
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -05002867 if (RB_WARN_ON(cpu_buffer,
2868 cpu_buffer->reader_page->read > rb_page_size(reader)))
2869 goto out;
Steven Rostedtd7690412008-10-01 00:29:53 -04002870
2871 /* check if we caught up to the tail */
2872 reader = NULL;
Steven Rostedtbf41a152008-10-04 02:00:59 -04002873 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
Steven Rostedtd7690412008-10-01 00:29:53 -04002874 goto out;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002875
2876 /*
Steven Rostedtd7690412008-10-01 00:29:53 -04002877 * Reset the reader page to size zero.
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002878 */
Steven Rostedt77ae3652009-03-27 11:00:29 -04002879 local_set(&cpu_buffer->reader_page->write, 0);
2880 local_set(&cpu_buffer->reader_page->entries, 0);
2881 local_set(&cpu_buffer->reader_page->page->commit, 0);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002882
Steven Rostedt77ae3652009-03-27 11:00:29 -04002883 spin:
2884 /*
2885 * Splice the empty reader page into the list around the head.
2886 */
2887 reader = rb_set_head_page(cpu_buffer);
Steven Rostedt0e1ff5d2010-01-06 20:40:44 -05002888 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
Steven Rostedtd7690412008-10-01 00:29:53 -04002889 cpu_buffer->reader_page->list.prev = reader->list.prev;
Steven Rostedtbf41a152008-10-04 02:00:59 -04002890
Steven Rostedt3adc54f2009-03-30 15:32:01 -04002891 /*
2892 * cpu_buffer->pages just needs to point to the buffer, it
2893 * has no specific buffer page to point to. Lets move it out
2894 * of our way so we don't accidently swap it.
2895 */
2896 cpu_buffer->pages = reader->list.prev;
2897
Steven Rostedt77ae3652009-03-27 11:00:29 -04002898 /* The reader page will be pointing to the new head */
2899 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
Steven Rostedtd7690412008-10-01 00:29:53 -04002900
2901 /*
Steven Rostedt77ae3652009-03-27 11:00:29 -04002902 * Here's the tricky part.
2903 *
2904 * We need to move the pointer past the header page.
2905 * But we can only do that if a writer is not currently
2906 * moving it. The page before the header page has the
2907 * flag bit '1' set if it is pointing to the page we want.
2908 * but if the writer is in the process of moving it
2909 * than it will be '2' or already moved '0'.
Steven Rostedtd7690412008-10-01 00:29:53 -04002910 */
Steven Rostedtd7690412008-10-01 00:29:53 -04002911
Steven Rostedt77ae3652009-03-27 11:00:29 -04002912 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
2913
2914 /*
2915 * If we did not convert it, then we must try again.
2916 */
2917 if (!ret)
2918 goto spin;
2919
2920 /*
2921 * Yeah! We succeeded in replacing the page.
2922 *
2923 * Now make the new head point back to the reader page.
2924 */
David Sharp5ded3dc62010-01-06 17:12:07 -08002925 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
Steven Rostedt77ae3652009-03-27 11:00:29 -04002926 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
Steven Rostedtd7690412008-10-01 00:29:53 -04002927
2928 /* Finally update the reader page to the new head */
2929 cpu_buffer->reader_page = reader;
2930 rb_reset_reader_page(cpu_buffer);
2931
2932 goto again;
2933
2934 out:
Thomas Gleixner0199c4e2009-12-02 20:01:25 +01002935 arch_spin_unlock(&cpu_buffer->lock);
Steven Rostedt3e03fb72008-11-06 00:09:43 -05002936 local_irq_restore(flags);
Steven Rostedtd7690412008-10-01 00:29:53 -04002937
2938 return reader;
2939}
2940
2941static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
2942{
2943 struct ring_buffer_event *event;
2944 struct buffer_page *reader;
2945 unsigned length;
2946
2947 reader = rb_get_reader_page(cpu_buffer);
2948
2949 /* This function should not be called when buffer is empty */
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -05002950 if (RB_WARN_ON(cpu_buffer, !reader))
2951 return;
Steven Rostedtd7690412008-10-01 00:29:53 -04002952
2953 event = rb_reader_event(cpu_buffer);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002954
Steven Rostedta1863c22009-09-03 10:23:58 -04002955 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
Steven Rostedte4906ef2009-04-30 20:49:44 -04002956 cpu_buffer->read++;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002957
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002958 rb_update_read_stamp(cpu_buffer, event);
2959
Steven Rostedtd7690412008-10-01 00:29:53 -04002960 length = rb_event_length(event);
Steven Rostedt6f807ac2008-10-04 02:00:58 -04002961 cpu_buffer->reader_page->read += length;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002962}
2963
2964static void rb_advance_iter(struct ring_buffer_iter *iter)
2965{
2966 struct ring_buffer *buffer;
2967 struct ring_buffer_per_cpu *cpu_buffer;
2968 struct ring_buffer_event *event;
2969 unsigned length;
2970
2971 cpu_buffer = iter->cpu_buffer;
2972 buffer = cpu_buffer->buffer;
2973
2974 /*
2975 * Check if we are at the end of the buffer.
2976 */
Steven Rostedtbf41a152008-10-04 02:00:59 -04002977 if (iter->head >= rb_page_size(iter->head_page)) {
Steven Rostedtea05b572009-06-03 09:30:10 -04002978 /* discarded commits can make the page empty */
2979 if (iter->head_page == cpu_buffer->commit_page)
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -05002980 return;
Steven Rostedtd7690412008-10-01 00:29:53 -04002981 rb_inc_iter(iter);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002982 return;
2983 }
2984
2985 event = rb_iter_head_event(iter);
2986
2987 length = rb_event_length(event);
2988
2989 /*
2990 * This should not be called to advance the header if we are
2991 * at the tail of the buffer.
2992 */
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -05002993 if (RB_WARN_ON(cpu_buffer,
Steven Rostedtf536aaf2008-11-10 23:07:30 -05002994 (iter->head_page == cpu_buffer->commit_page) &&
Steven Rostedt3e89c7bb2008-11-11 15:28:41 -05002995 (iter->head + length > rb_commit_index(cpu_buffer))))
2996 return;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04002997
2998 rb_update_iter_read_stamp(iter, event);
2999
3000 iter->head += length;
3001
3002 /* check for end of page padding */
Steven Rostedtbf41a152008-10-04 02:00:59 -04003003 if ((iter->head >= rb_page_size(iter->head_page)) &&
3004 (iter->head_page != cpu_buffer->commit_page))
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003005 rb_advance_iter(iter);
3006}
3007
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003008static struct ring_buffer_event *
Robert Richterd8eeb2d2009-07-31 14:58:04 +02003009rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003010{
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003011 struct ring_buffer_event *event;
Steven Rostedtd7690412008-10-01 00:29:53 -04003012 struct buffer_page *reader;
Steven Rostedt818e3dd2008-10-31 09:58:35 -04003013 int nr_loops = 0;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003014
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003015 again:
Steven Rostedt818e3dd2008-10-31 09:58:35 -04003016 /*
3017 * We repeat when a timestamp is encountered. It is possible
3018 * to get multiple timestamps from an interrupt entering just
Steven Rostedtea05b572009-06-03 09:30:10 -04003019 * as one timestamp is about to be written, or from discarded
3020 * commits. The most that we can have is the number on a single page.
Steven Rostedt818e3dd2008-10-31 09:58:35 -04003021 */
Steven Rostedtea05b572009-06-03 09:30:10 -04003022 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
Steven Rostedt818e3dd2008-10-31 09:58:35 -04003023 return NULL;
Steven Rostedt818e3dd2008-10-31 09:58:35 -04003024
Steven Rostedtd7690412008-10-01 00:29:53 -04003025 reader = rb_get_reader_page(cpu_buffer);
3026 if (!reader)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003027 return NULL;
3028
Steven Rostedtd7690412008-10-01 00:29:53 -04003029 event = rb_reader_event(cpu_buffer);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003030
Lai Jiangshan334d4162009-04-24 11:27:05 +08003031 switch (event->type_len) {
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003032 case RINGBUF_TYPE_PADDING:
Tom Zanussi2d622712009-03-22 03:30:49 -05003033 if (rb_null_event(event))
3034 RB_WARN_ON(cpu_buffer, 1);
3035 /*
3036 * Because the writer could be discarding every
3037 * event it creates (which would probably be bad)
3038 * if we were to go back to "again" then we may never
3039 * catch up, and will trigger the warn on, or lock
3040 * the box. Return the padding, and we will release
3041 * the current locks, and try again.
3042 */
Tom Zanussi2d622712009-03-22 03:30:49 -05003043 return event;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003044
3045 case RINGBUF_TYPE_TIME_EXTEND:
3046 /* Internal data, OK to advance */
Steven Rostedtd7690412008-10-01 00:29:53 -04003047 rb_advance_reader(cpu_buffer);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003048 goto again;
3049
3050 case RINGBUF_TYPE_TIME_STAMP:
3051 /* FIXME: not implemented */
Steven Rostedtd7690412008-10-01 00:29:53 -04003052 rb_advance_reader(cpu_buffer);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003053 goto again;
3054
3055 case RINGBUF_TYPE_DATA:
3056 if (ts) {
3057 *ts = cpu_buffer->read_stamp + event->time_delta;
Robert Richterd8eeb2d2009-07-31 14:58:04 +02003058 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
Steven Rostedt37886f62009-03-17 17:22:06 -04003059 cpu_buffer->cpu, ts);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003060 }
3061 return event;
3062
3063 default:
3064 BUG();
3065 }
3066
3067 return NULL;
3068}
Robert Richterc4f50182008-12-11 16:49:22 +01003069EXPORT_SYMBOL_GPL(ring_buffer_peek);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003070
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003071static struct ring_buffer_event *
3072rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003073{
3074 struct ring_buffer *buffer;
3075 struct ring_buffer_per_cpu *cpu_buffer;
3076 struct ring_buffer_event *event;
Steven Rostedt818e3dd2008-10-31 09:58:35 -04003077 int nr_loops = 0;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003078
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003079 cpu_buffer = iter->cpu_buffer;
3080 buffer = cpu_buffer->buffer;
3081
Steven Rostedt492a74f2010-01-25 15:17:47 -05003082 /*
3083 * Check if someone performed a consuming read to
3084 * the buffer. A consuming read invalidates the iterator
3085 * and we need to reset the iterator in this case.
3086 */
3087 if (unlikely(iter->cache_read != cpu_buffer->read ||
3088 iter->cache_reader_page != cpu_buffer->reader_page))
3089 rb_iter_reset(iter);
3090
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003091 again:
Steven Rostedt3c05d742010-01-26 16:14:08 -05003092 if (ring_buffer_iter_empty(iter))
3093 return NULL;
3094
Steven Rostedt818e3dd2008-10-31 09:58:35 -04003095 /*
Steven Rostedtea05b572009-06-03 09:30:10 -04003096 * We repeat when a timestamp is encountered.
3097 * We can get multiple timestamps by nested interrupts or also
3098 * if filtering is on (discarding commits). Since discarding
3099 * commits can be frequent we can get a lot of timestamps.
3100 * But we limit them by not adding timestamps if they begin
3101 * at the start of a page.
Steven Rostedt818e3dd2008-10-31 09:58:35 -04003102 */
Steven Rostedtea05b572009-06-03 09:30:10 -04003103 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
Steven Rostedt818e3dd2008-10-31 09:58:35 -04003104 return NULL;
Steven Rostedt818e3dd2008-10-31 09:58:35 -04003105
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003106 if (rb_per_cpu_empty(cpu_buffer))
3107 return NULL;
3108
Steven Rostedt3c05d742010-01-26 16:14:08 -05003109 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3110 rb_inc_iter(iter);
3111 goto again;
3112 }
3113
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003114 event = rb_iter_head_event(iter);
3115
Lai Jiangshan334d4162009-04-24 11:27:05 +08003116 switch (event->type_len) {
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003117 case RINGBUF_TYPE_PADDING:
Tom Zanussi2d622712009-03-22 03:30:49 -05003118 if (rb_null_event(event)) {
3119 rb_inc_iter(iter);
3120 goto again;
3121 }
3122 rb_advance_iter(iter);
3123 return event;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003124
3125 case RINGBUF_TYPE_TIME_EXTEND:
3126 /* Internal data, OK to advance */
3127 rb_advance_iter(iter);
3128 goto again;
3129
3130 case RINGBUF_TYPE_TIME_STAMP:
3131 /* FIXME: not implemented */
3132 rb_advance_iter(iter);
3133 goto again;
3134
3135 case RINGBUF_TYPE_DATA:
3136 if (ts) {
3137 *ts = iter->read_stamp + event->time_delta;
Steven Rostedt37886f62009-03-17 17:22:06 -04003138 ring_buffer_normalize_time_stamp(buffer,
3139 cpu_buffer->cpu, ts);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003140 }
3141 return event;
3142
3143 default:
3144 BUG();
3145 }
3146
3147 return NULL;
3148}
Robert Richterc4f50182008-12-11 16:49:22 +01003149EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003150
Steven Rostedt8d707e82009-06-16 21:22:48 -04003151static inline int rb_ok_to_lock(void)
3152{
3153 /*
3154 * If an NMI die dumps out the content of the ring buffer
3155 * do not grab locks. We also permanently disable the ring
3156 * buffer too. A one time deal is all you get from reading
3157 * the ring buffer from an NMI.
3158 */
Steven Rostedt464e85e2009-08-05 15:26:37 -04003159 if (likely(!in_nmi()))
Steven Rostedt8d707e82009-06-16 21:22:48 -04003160 return 1;
3161
3162 tracing_off_permanent();
3163 return 0;
3164}
3165
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003166/**
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003167 * ring_buffer_peek - peek at the next event to be read
3168 * @buffer: The ring buffer to read
3169 * @cpu: The cpu to peak at
3170 * @ts: The timestamp counter of this event.
3171 *
3172 * This will return the event that will be read next, but does
3173 * not consume the data.
3174 */
3175struct ring_buffer_event *
3176ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
3177{
3178 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
Steven Rostedt8aabee52009-03-12 13:13:49 -04003179 struct ring_buffer_event *event;
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003180 unsigned long flags;
Steven Rostedt8d707e82009-06-16 21:22:48 -04003181 int dolock;
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003182
Steven Rostedt554f7862009-03-11 22:00:13 -04003183 if (!cpumask_test_cpu(cpu, buffer->cpumask))
Steven Rostedt8aabee52009-03-12 13:13:49 -04003184 return NULL;
Steven Rostedt554f7862009-03-11 22:00:13 -04003185
Steven Rostedt8d707e82009-06-16 21:22:48 -04003186 dolock = rb_ok_to_lock();
Tom Zanussi2d622712009-03-22 03:30:49 -05003187 again:
Steven Rostedt8d707e82009-06-16 21:22:48 -04003188 local_irq_save(flags);
3189 if (dolock)
3190 spin_lock(&cpu_buffer->reader_lock);
Robert Richterd8eeb2d2009-07-31 14:58:04 +02003191 event = rb_buffer_peek(cpu_buffer, ts);
Robert Richter469535a2009-07-30 19:19:18 +02003192 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3193 rb_advance_reader(cpu_buffer);
Steven Rostedt8d707e82009-06-16 21:22:48 -04003194 if (dolock)
3195 spin_unlock(&cpu_buffer->reader_lock);
3196 local_irq_restore(flags);
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003197
Steven Rostedt1b959e12009-09-03 10:12:13 -04003198 if (event && event->type_len == RINGBUF_TYPE_PADDING)
Tom Zanussi2d622712009-03-22 03:30:49 -05003199 goto again;
Tom Zanussi2d622712009-03-22 03:30:49 -05003200
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003201 return event;
3202}
3203
3204/**
3205 * ring_buffer_iter_peek - peek at the next event to be read
3206 * @iter: The ring buffer iterator
3207 * @ts: The timestamp counter of this event.
3208 *
3209 * This will return the event that will be read next, but does
3210 * not increment the iterator.
3211 */
3212struct ring_buffer_event *
3213ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3214{
3215 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3216 struct ring_buffer_event *event;
3217 unsigned long flags;
3218
Tom Zanussi2d622712009-03-22 03:30:49 -05003219 again:
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003220 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3221 event = rb_iter_peek(iter, ts);
3222 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3223
Steven Rostedt1b959e12009-09-03 10:12:13 -04003224 if (event && event->type_len == RINGBUF_TYPE_PADDING)
Tom Zanussi2d622712009-03-22 03:30:49 -05003225 goto again;
Tom Zanussi2d622712009-03-22 03:30:49 -05003226
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003227 return event;
3228}
3229
3230/**
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003231 * ring_buffer_consume - return an event and consume it
3232 * @buffer: The ring buffer to get the next event from
3233 *
3234 * Returns the next event in the ring buffer, and that event is consumed.
3235 * Meaning, that sequential reads will keep returning a different event,
3236 * and eventually empty the ring buffer if the producer is slower.
3237 */
3238struct ring_buffer_event *
3239ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
3240{
Steven Rostedt554f7862009-03-11 22:00:13 -04003241 struct ring_buffer_per_cpu *cpu_buffer;
3242 struct ring_buffer_event *event = NULL;
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003243 unsigned long flags;
Steven Rostedt8d707e82009-06-16 21:22:48 -04003244 int dolock;
3245
3246 dolock = rb_ok_to_lock();
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003247
Tom Zanussi2d622712009-03-22 03:30:49 -05003248 again:
Steven Rostedt554f7862009-03-11 22:00:13 -04003249 /* might be called in atomic */
3250 preempt_disable();
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003251
Steven Rostedt554f7862009-03-11 22:00:13 -04003252 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3253 goto out;
3254
3255 cpu_buffer = buffer->buffers[cpu];
Steven Rostedt8d707e82009-06-16 21:22:48 -04003256 local_irq_save(flags);
3257 if (dolock)
3258 spin_lock(&cpu_buffer->reader_lock);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003259
Robert Richterd8eeb2d2009-07-31 14:58:04 +02003260 event = rb_buffer_peek(cpu_buffer, ts);
Robert Richter469535a2009-07-30 19:19:18 +02003261 if (event)
3262 rb_advance_reader(cpu_buffer);
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003263
Steven Rostedt8d707e82009-06-16 21:22:48 -04003264 if (dolock)
3265 spin_unlock(&cpu_buffer->reader_lock);
3266 local_irq_restore(flags);
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003267
Steven Rostedt554f7862009-03-11 22:00:13 -04003268 out:
3269 preempt_enable();
3270
Steven Rostedt1b959e12009-09-03 10:12:13 -04003271 if (event && event->type_len == RINGBUF_TYPE_PADDING)
Tom Zanussi2d622712009-03-22 03:30:49 -05003272 goto again;
Tom Zanussi2d622712009-03-22 03:30:49 -05003273
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003274 return event;
3275}
Robert Richterc4f50182008-12-11 16:49:22 +01003276EXPORT_SYMBOL_GPL(ring_buffer_consume);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003277
3278/**
3279 * ring_buffer_read_start - start a non consuming read of the buffer
3280 * @buffer: The ring buffer to read from
3281 * @cpu: The cpu buffer to iterate over
3282 *
3283 * This starts up an iteration through the buffer. It also disables
3284 * the recording to the buffer until the reading is finished.
3285 * This prevents the reading from being corrupted. This is not
3286 * a consuming read, so a producer is not expected.
3287 *
3288 * Must be paired with ring_buffer_finish.
3289 */
3290struct ring_buffer_iter *
3291ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3292{
3293 struct ring_buffer_per_cpu *cpu_buffer;
Steven Rostedt8aabee52009-03-12 13:13:49 -04003294 struct ring_buffer_iter *iter;
Steven Rostedtd7690412008-10-01 00:29:53 -04003295 unsigned long flags;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003296
Rusty Russell9e01c1b2009-01-01 10:12:22 +10303297 if (!cpumask_test_cpu(cpu, buffer->cpumask))
Steven Rostedt8aabee52009-03-12 13:13:49 -04003298 return NULL;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003299
3300 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
3301 if (!iter)
Steven Rostedt8aabee52009-03-12 13:13:49 -04003302 return NULL;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003303
3304 cpu_buffer = buffer->buffers[cpu];
3305
3306 iter->cpu_buffer = cpu_buffer;
3307
3308 atomic_inc(&cpu_buffer->record_disabled);
3309 synchronize_sched();
3310
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003311 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
Thomas Gleixner0199c4e2009-12-02 20:01:25 +01003312 arch_spin_lock(&cpu_buffer->lock);
Steven Rostedt642edba2008-11-12 00:01:26 -05003313 rb_iter_reset(iter);
Thomas Gleixner0199c4e2009-12-02 20:01:25 +01003314 arch_spin_unlock(&cpu_buffer->lock);
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003315 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003316
3317 return iter;
3318}
Robert Richterc4f50182008-12-11 16:49:22 +01003319EXPORT_SYMBOL_GPL(ring_buffer_read_start);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003320
3321/**
3322 * ring_buffer_finish - finish reading the iterator of the buffer
3323 * @iter: The iterator retrieved by ring_buffer_start
3324 *
3325 * This re-enables the recording to the buffer, and frees the
3326 * iterator.
3327 */
3328void
3329ring_buffer_read_finish(struct ring_buffer_iter *iter)
3330{
3331 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3332
3333 atomic_dec(&cpu_buffer->record_disabled);
3334 kfree(iter);
3335}
Robert Richterc4f50182008-12-11 16:49:22 +01003336EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003337
3338/**
3339 * ring_buffer_read - read the next item in the ring buffer by the iterator
3340 * @iter: The ring buffer iterator
3341 * @ts: The time stamp of the event read.
3342 *
3343 * This reads the next event in the ring buffer and increments the iterator.
3344 */
3345struct ring_buffer_event *
3346ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
3347{
3348 struct ring_buffer_event *event;
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003349 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3350 unsigned long flags;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003351
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003352 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
Steven Rostedt7e9391c2009-09-03 10:02:09 -04003353 again:
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003354 event = rb_iter_peek(iter, ts);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003355 if (!event)
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003356 goto out;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003357
Steven Rostedt7e9391c2009-09-03 10:02:09 -04003358 if (event->type_len == RINGBUF_TYPE_PADDING)
3359 goto again;
3360
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003361 rb_advance_iter(iter);
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003362 out:
3363 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003364
3365 return event;
3366}
Robert Richterc4f50182008-12-11 16:49:22 +01003367EXPORT_SYMBOL_GPL(ring_buffer_read);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003368
3369/**
3370 * ring_buffer_size - return the size of the ring buffer (in bytes)
3371 * @buffer: The ring buffer.
3372 */
3373unsigned long ring_buffer_size(struct ring_buffer *buffer)
3374{
3375 return BUF_PAGE_SIZE * buffer->pages;
3376}
Robert Richterc4f50182008-12-11 16:49:22 +01003377EXPORT_SYMBOL_GPL(ring_buffer_size);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003378
3379static void
3380rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3381{
Steven Rostedt77ae3652009-03-27 11:00:29 -04003382 rb_head_page_deactivate(cpu_buffer);
3383
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003384 cpu_buffer->head_page
Steven Rostedt3adc54f2009-03-30 15:32:01 -04003385 = list_entry(cpu_buffer->pages, struct buffer_page, list);
Steven Rostedtbf41a152008-10-04 02:00:59 -04003386 local_set(&cpu_buffer->head_page->write, 0);
Steven Rostedt778c55d2009-05-01 18:44:45 -04003387 local_set(&cpu_buffer->head_page->entries, 0);
Steven Rostedtabc9b562008-12-02 15:34:06 -05003388 local_set(&cpu_buffer->head_page->page->commit, 0);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003389
Steven Rostedt6f807ac2008-10-04 02:00:58 -04003390 cpu_buffer->head_page->read = 0;
Steven Rostedtbf41a152008-10-04 02:00:59 -04003391
3392 cpu_buffer->tail_page = cpu_buffer->head_page;
3393 cpu_buffer->commit_page = cpu_buffer->head_page;
3394
3395 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
3396 local_set(&cpu_buffer->reader_page->write, 0);
Steven Rostedt778c55d2009-05-01 18:44:45 -04003397 local_set(&cpu_buffer->reader_page->entries, 0);
Steven Rostedtabc9b562008-12-02 15:34:06 -05003398 local_set(&cpu_buffer->reader_page->page->commit, 0);
Steven Rostedt6f807ac2008-10-04 02:00:58 -04003399 cpu_buffer->reader_page->read = 0;
Steven Rostedtd7690412008-10-01 00:29:53 -04003400
Steven Rostedt77ae3652009-03-27 11:00:29 -04003401 local_set(&cpu_buffer->commit_overrun, 0);
3402 local_set(&cpu_buffer->overrun, 0);
Steven Rostedte4906ef2009-04-30 20:49:44 -04003403 local_set(&cpu_buffer->entries, 0);
Steven Rostedtfa743952009-06-16 12:37:57 -04003404 local_set(&cpu_buffer->committing, 0);
3405 local_set(&cpu_buffer->commits, 0);
Steven Rostedt77ae3652009-03-27 11:00:29 -04003406 cpu_buffer->read = 0;
Steven Rostedt69507c02009-01-21 18:45:57 -05003407
3408 cpu_buffer->write_stamp = 0;
3409 cpu_buffer->read_stamp = 0;
Steven Rostedt77ae3652009-03-27 11:00:29 -04003410
3411 rb_head_page_activate(cpu_buffer);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003412}
3413
3414/**
3415 * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
3416 * @buffer: The ring buffer to reset a per cpu buffer of
3417 * @cpu: The CPU buffer to be reset
3418 */
3419void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3420{
3421 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3422 unsigned long flags;
3423
Rusty Russell9e01c1b2009-01-01 10:12:22 +10303424 if (!cpumask_test_cpu(cpu, buffer->cpumask))
Steven Rostedt8aabee52009-03-12 13:13:49 -04003425 return;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003426
Steven Rostedt41ede232009-05-01 20:26:54 -04003427 atomic_inc(&cpu_buffer->record_disabled);
3428
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003429 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3430
Steven Rostedt41b6a952009-09-02 09:59:48 -04003431 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3432 goto out;
3433
Thomas Gleixner0199c4e2009-12-02 20:01:25 +01003434 arch_spin_lock(&cpu_buffer->lock);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003435
3436 rb_reset_cpu(cpu_buffer);
3437
Thomas Gleixner0199c4e2009-12-02 20:01:25 +01003438 arch_spin_unlock(&cpu_buffer->lock);
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003439
Steven Rostedt41b6a952009-09-02 09:59:48 -04003440 out:
Steven Rostedtf83c9d02008-11-11 18:47:44 +01003441 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
Steven Rostedt41ede232009-05-01 20:26:54 -04003442
3443 atomic_dec(&cpu_buffer->record_disabled);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003444}
Robert Richterc4f50182008-12-11 16:49:22 +01003445EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003446
3447/**
3448 * ring_buffer_reset - reset a ring buffer
3449 * @buffer: The ring buffer to reset all cpu buffers
3450 */
3451void ring_buffer_reset(struct ring_buffer *buffer)
3452{
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003453 int cpu;
3454
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003455 for_each_buffer_cpu(buffer, cpu)
Steven Rostedtd7690412008-10-01 00:29:53 -04003456 ring_buffer_reset_cpu(buffer, cpu);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003457}
Robert Richterc4f50182008-12-11 16:49:22 +01003458EXPORT_SYMBOL_GPL(ring_buffer_reset);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003459
3460/**
3461 * rind_buffer_empty - is the ring buffer empty?
3462 * @buffer: The ring buffer to test
3463 */
3464int ring_buffer_empty(struct ring_buffer *buffer)
3465{
3466 struct ring_buffer_per_cpu *cpu_buffer;
Steven Rostedtd4788202009-06-17 00:39:43 -04003467 unsigned long flags;
Steven Rostedt8d707e82009-06-16 21:22:48 -04003468 int dolock;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003469 int cpu;
Steven Rostedtd4788202009-06-17 00:39:43 -04003470 int ret;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003471
Steven Rostedt8d707e82009-06-16 21:22:48 -04003472 dolock = rb_ok_to_lock();
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003473
3474 /* yes this is racy, but if you don't like the race, lock the buffer */
3475 for_each_buffer_cpu(buffer, cpu) {
3476 cpu_buffer = buffer->buffers[cpu];
Steven Rostedt8d707e82009-06-16 21:22:48 -04003477 local_irq_save(flags);
3478 if (dolock)
3479 spin_lock(&cpu_buffer->reader_lock);
Steven Rostedtd4788202009-06-17 00:39:43 -04003480 ret = rb_per_cpu_empty(cpu_buffer);
Steven Rostedt8d707e82009-06-16 21:22:48 -04003481 if (dolock)
3482 spin_unlock(&cpu_buffer->reader_lock);
3483 local_irq_restore(flags);
3484
Steven Rostedtd4788202009-06-17 00:39:43 -04003485 if (!ret)
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003486 return 0;
3487 }
Steven Rostedt554f7862009-03-11 22:00:13 -04003488
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003489 return 1;
3490}
Robert Richterc4f50182008-12-11 16:49:22 +01003491EXPORT_SYMBOL_GPL(ring_buffer_empty);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003492
3493/**
3494 * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
3495 * @buffer: The ring buffer
3496 * @cpu: The CPU buffer to test
3497 */
3498int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
3499{
3500 struct ring_buffer_per_cpu *cpu_buffer;
Steven Rostedtd4788202009-06-17 00:39:43 -04003501 unsigned long flags;
Steven Rostedt8d707e82009-06-16 21:22:48 -04003502 int dolock;
Steven Rostedt8aabee52009-03-12 13:13:49 -04003503 int ret;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003504
Rusty Russell9e01c1b2009-01-01 10:12:22 +10303505 if (!cpumask_test_cpu(cpu, buffer->cpumask))
Steven Rostedt8aabee52009-03-12 13:13:49 -04003506 return 1;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003507
Steven Rostedt8d707e82009-06-16 21:22:48 -04003508 dolock = rb_ok_to_lock();
Steven Rostedt554f7862009-03-11 22:00:13 -04003509
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003510 cpu_buffer = buffer->buffers[cpu];
Steven Rostedt8d707e82009-06-16 21:22:48 -04003511 local_irq_save(flags);
3512 if (dolock)
3513 spin_lock(&cpu_buffer->reader_lock);
Steven Rostedt554f7862009-03-11 22:00:13 -04003514 ret = rb_per_cpu_empty(cpu_buffer);
Steven Rostedt8d707e82009-06-16 21:22:48 -04003515 if (dolock)
3516 spin_unlock(&cpu_buffer->reader_lock);
3517 local_irq_restore(flags);
Steven Rostedt554f7862009-03-11 22:00:13 -04003518
3519 return ret;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003520}
Robert Richterc4f50182008-12-11 16:49:22 +01003521EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003522
Steven Rostedt85bac322009-09-04 14:24:40 -04003523#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003524/**
3525 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
3526 * @buffer_a: One buffer to swap with
3527 * @buffer_b: The other buffer to swap with
3528 *
3529 * This function is useful for tracers that want to take a "snapshot"
3530 * of a CPU buffer and has another back up buffer lying around.
3531 * it is expected that the tracer handles the cpu buffer not being
3532 * used at the moment.
3533 */
3534int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
3535 struct ring_buffer *buffer_b, int cpu)
3536{
3537 struct ring_buffer_per_cpu *cpu_buffer_a;
3538 struct ring_buffer_per_cpu *cpu_buffer_b;
Steven Rostedt554f7862009-03-11 22:00:13 -04003539 int ret = -EINVAL;
3540
Rusty Russell9e01c1b2009-01-01 10:12:22 +10303541 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
3542 !cpumask_test_cpu(cpu, buffer_b->cpumask))
Steven Rostedt554f7862009-03-11 22:00:13 -04003543 goto out;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003544
3545 /* At least make sure the two buffers are somewhat the same */
Lai Jiangshan6d102bc2008-12-17 17:48:23 +08003546 if (buffer_a->pages != buffer_b->pages)
Steven Rostedt554f7862009-03-11 22:00:13 -04003547 goto out;
3548
3549 ret = -EAGAIN;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003550
Steven Rostedt97b17ef2009-01-21 15:24:56 -05003551 if (ring_buffer_flags != RB_BUFFERS_ON)
Steven Rostedt554f7862009-03-11 22:00:13 -04003552 goto out;
Steven Rostedt97b17ef2009-01-21 15:24:56 -05003553
3554 if (atomic_read(&buffer_a->record_disabled))
Steven Rostedt554f7862009-03-11 22:00:13 -04003555 goto out;
Steven Rostedt97b17ef2009-01-21 15:24:56 -05003556
3557 if (atomic_read(&buffer_b->record_disabled))
Steven Rostedt554f7862009-03-11 22:00:13 -04003558 goto out;
Steven Rostedt97b17ef2009-01-21 15:24:56 -05003559
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003560 cpu_buffer_a = buffer_a->buffers[cpu];
3561 cpu_buffer_b = buffer_b->buffers[cpu];
3562
Steven Rostedt97b17ef2009-01-21 15:24:56 -05003563 if (atomic_read(&cpu_buffer_a->record_disabled))
Steven Rostedt554f7862009-03-11 22:00:13 -04003564 goto out;
Steven Rostedt97b17ef2009-01-21 15:24:56 -05003565
3566 if (atomic_read(&cpu_buffer_b->record_disabled))
Steven Rostedt554f7862009-03-11 22:00:13 -04003567 goto out;
Steven Rostedt97b17ef2009-01-21 15:24:56 -05003568
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003569 /*
3570 * We can't do a synchronize_sched here because this
3571 * function can be called in atomic context.
3572 * Normally this will be called from the same CPU as cpu.
3573 * If not it's up to the caller to protect this.
3574 */
3575 atomic_inc(&cpu_buffer_a->record_disabled);
3576 atomic_inc(&cpu_buffer_b->record_disabled);
3577
Steven Rostedt98277992009-09-02 10:56:15 -04003578 ret = -EBUSY;
3579 if (local_read(&cpu_buffer_a->committing))
3580 goto out_dec;
3581 if (local_read(&cpu_buffer_b->committing))
3582 goto out_dec;
3583
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003584 buffer_a->buffers[cpu] = cpu_buffer_b;
3585 buffer_b->buffers[cpu] = cpu_buffer_a;
3586
3587 cpu_buffer_b->buffer = buffer_a;
3588 cpu_buffer_a->buffer = buffer_b;
3589
Steven Rostedt98277992009-09-02 10:56:15 -04003590 ret = 0;
3591
3592out_dec:
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003593 atomic_dec(&cpu_buffer_a->record_disabled);
3594 atomic_dec(&cpu_buffer_b->record_disabled);
Steven Rostedt554f7862009-03-11 22:00:13 -04003595out:
Steven Rostedt554f7862009-03-11 22:00:13 -04003596 return ret;
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003597}
Robert Richterc4f50182008-12-11 16:49:22 +01003598EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
Steven Rostedt85bac322009-09-04 14:24:40 -04003599#endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */
Steven Rostedt7a8e76a2008-09-29 23:02:38 -04003600
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003601/**
3602 * ring_buffer_alloc_read_page - allocate a page to read from buffer
3603 * @buffer: the buffer to allocate for.
3604 *
3605 * This function is used in conjunction with ring_buffer_read_page.
3606 * When reading a full page from the ring buffer, these functions
3607 * can be used to speed up the process. The calling function should
3608 * allocate a few pages first with this function. Then when it
3609 * needs to get pages from the ring buffer, it passes the result
3610 * of this function into ring_buffer_read_page, which will swap
3611 * the page that was allocated, with the read page of the buffer.
3612 *
3613 * Returns:
3614 * The page allocated, or NULL on error.
3615 */
3616void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
3617{
Steven Rostedt044fa782008-12-02 23:50:03 -05003618 struct buffer_data_page *bpage;
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003619 unsigned long addr;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003620
3621 addr = __get_free_page(GFP_KERNEL);
3622 if (!addr)
3623 return NULL;
3624
Steven Rostedt044fa782008-12-02 23:50:03 -05003625 bpage = (void *)addr;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003626
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003627 rb_init_page(bpage);
3628
Steven Rostedt044fa782008-12-02 23:50:03 -05003629 return bpage;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003630}
Steven Rostedtd6ce96d2009-05-05 01:15:24 -04003631EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003632
3633/**
3634 * ring_buffer_free_read_page - free an allocated read page
3635 * @buffer: the buffer the page was allocate for
3636 * @data: the page to free
3637 *
3638 * Free a page allocated from ring_buffer_alloc_read_page.
3639 */
3640void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
3641{
3642 free_page((unsigned long)data);
3643}
Steven Rostedtd6ce96d2009-05-05 01:15:24 -04003644EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003645
3646/**
3647 * ring_buffer_read_page - extract a page from the ring buffer
3648 * @buffer: buffer to extract from
3649 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003650 * @len: amount to extract
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003651 * @cpu: the cpu of the buffer to extract
3652 * @full: should the extraction only happen when the page is full.
3653 *
3654 * This function will pull out a page from the ring buffer and consume it.
3655 * @data_page must be the address of the variable that was returned
3656 * from ring_buffer_alloc_read_page. This is because the page might be used
3657 * to swap with a page in the ring buffer.
3658 *
3659 * for example:
Lai Jiangshanb85fa012009-02-09 14:21:14 +08003660 * rpage = ring_buffer_alloc_read_page(buffer);
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003661 * if (!rpage)
3662 * return error;
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003663 * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
Lai Jiangshan667d2412009-02-09 14:21:17 +08003664 * if (ret >= 0)
3665 * process_page(rpage, ret);
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003666 *
3667 * When @full is set, the function will not return true unless
3668 * the writer is off the reader page.
3669 *
3670 * Note: it is up to the calling functions to handle sleeps and wakeups.
3671 * The ring buffer can be used anywhere in the kernel and can not
3672 * blindly call wake_up. The layer that uses the ring buffer must be
3673 * responsible for that.
3674 *
3675 * Returns:
Lai Jiangshan667d2412009-02-09 14:21:17 +08003676 * >=0 if data has been transferred, returns the offset of consumed data.
3677 * <0 if no data has been transferred.
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003678 */
3679int ring_buffer_read_page(struct ring_buffer *buffer,
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003680 void **data_page, size_t len, int cpu, int full)
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003681{
3682 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3683 struct ring_buffer_event *event;
Steven Rostedt044fa782008-12-02 23:50:03 -05003684 struct buffer_data_page *bpage;
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003685 struct buffer_page *reader;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003686 unsigned long flags;
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003687 unsigned int commit;
Lai Jiangshan667d2412009-02-09 14:21:17 +08003688 unsigned int read;
Steven Rostedt4f3640f2009-03-03 23:52:42 -05003689 u64 save_timestamp;
Lai Jiangshan667d2412009-02-09 14:21:17 +08003690 int ret = -1;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003691
Steven Rostedt554f7862009-03-11 22:00:13 -04003692 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3693 goto out;
3694
Steven Rostedt474d32b2009-03-03 19:51:40 -05003695 /*
3696 * If len is not big enough to hold the page header, then
3697 * we can not copy anything.
3698 */
3699 if (len <= BUF_PAGE_HDR_SIZE)
Steven Rostedt554f7862009-03-11 22:00:13 -04003700 goto out;
Steven Rostedt474d32b2009-03-03 19:51:40 -05003701
3702 len -= BUF_PAGE_HDR_SIZE;
3703
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003704 if (!data_page)
Steven Rostedt554f7862009-03-11 22:00:13 -04003705 goto out;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003706
Steven Rostedt044fa782008-12-02 23:50:03 -05003707 bpage = *data_page;
3708 if (!bpage)
Steven Rostedt554f7862009-03-11 22:00:13 -04003709 goto out;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003710
3711 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3712
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003713 reader = rb_get_reader_page(cpu_buffer);
3714 if (!reader)
Steven Rostedt554f7862009-03-11 22:00:13 -04003715 goto out_unlock;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003716
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003717 event = rb_reader_event(cpu_buffer);
Lai Jiangshan667d2412009-02-09 14:21:17 +08003718
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003719 read = reader->read;
3720 commit = rb_page_commit(reader);
3721
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003722 /*
Steven Rostedt474d32b2009-03-03 19:51:40 -05003723 * If this page has been partially read or
3724 * if len is not big enough to read the rest of the page or
3725 * a writer is still on the page, then
3726 * we must copy the data from the page to the buffer.
3727 * Otherwise, we can simply swap the page with the one passed in.
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003728 */
Steven Rostedt474d32b2009-03-03 19:51:40 -05003729 if (read || (len < (commit - read)) ||
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003730 cpu_buffer->reader_page == cpu_buffer->commit_page) {
Lai Jiangshan667d2412009-02-09 14:21:17 +08003731 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
Steven Rostedt474d32b2009-03-03 19:51:40 -05003732 unsigned int rpos = read;
3733 unsigned int pos = 0;
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003734 unsigned int size;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003735
3736 if (full)
Steven Rostedt554f7862009-03-11 22:00:13 -04003737 goto out_unlock;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003738
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003739 if (len > (commit - read))
3740 len = (commit - read);
3741
3742 size = rb_event_length(event);
3743
3744 if (len < size)
Steven Rostedt554f7862009-03-11 22:00:13 -04003745 goto out_unlock;
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003746
Steven Rostedt4f3640f2009-03-03 23:52:42 -05003747 /* save the current timestamp, since the user will need it */
3748 save_timestamp = cpu_buffer->read_stamp;
3749
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003750 /* Need to copy one event at a time */
3751 do {
Steven Rostedt474d32b2009-03-03 19:51:40 -05003752 memcpy(bpage->data + pos, rpage->data + rpos, size);
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003753
3754 len -= size;
3755
3756 rb_advance_reader(cpu_buffer);
Steven Rostedt474d32b2009-03-03 19:51:40 -05003757 rpos = reader->read;
3758 pos += size;
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003759
3760 event = rb_reader_event(cpu_buffer);
3761 size = rb_event_length(event);
3762 } while (len > size);
Lai Jiangshan667d2412009-02-09 14:21:17 +08003763
3764 /* update bpage */
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003765 local_set(&bpage->commit, pos);
Steven Rostedt4f3640f2009-03-03 23:52:42 -05003766 bpage->time_stamp = save_timestamp;
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003767
Steven Rostedt474d32b2009-03-03 19:51:40 -05003768 /* we copied everything to the beginning */
3769 read = 0;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003770 } else {
Steven Rostedtafbab762009-05-01 19:40:05 -04003771 /* update the entry counter */
Steven Rostedt77ae3652009-03-27 11:00:29 -04003772 cpu_buffer->read += rb_page_entries(reader);
Steven Rostedtafbab762009-05-01 19:40:05 -04003773
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003774 /* swap the pages */
Steven Rostedt044fa782008-12-02 23:50:03 -05003775 rb_init_page(bpage);
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003776 bpage = reader->page;
3777 reader->page = *data_page;
3778 local_set(&reader->write, 0);
Steven Rostedt778c55d2009-05-01 18:44:45 -04003779 local_set(&reader->entries, 0);
Steven Rostedtef7a4a12009-03-03 00:27:49 -05003780 reader->read = 0;
Steven Rostedt044fa782008-12-02 23:50:03 -05003781 *data_page = bpage;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003782 }
Lai Jiangshan667d2412009-02-09 14:21:17 +08003783 ret = read;
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003784
Steven Rostedt554f7862009-03-11 22:00:13 -04003785 out_unlock:
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003786 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3787
Steven Rostedt554f7862009-03-11 22:00:13 -04003788 out:
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003789 return ret;
3790}
Steven Rostedtd6ce96d2009-05-05 01:15:24 -04003791EXPORT_SYMBOL_GPL(ring_buffer_read_page);
Steven Rostedt8789a9e2008-12-02 15:34:07 -05003792
Paul Mundt1155de42009-06-25 14:30:12 +09003793#ifdef CONFIG_TRACING
Steven Rostedta3583242008-11-11 15:01:42 -05003794static ssize_t
3795rb_simple_read(struct file *filp, char __user *ubuf,
3796 size_t cnt, loff_t *ppos)
3797{
Hannes Eder5e398412009-02-10 19:44:34 +01003798 unsigned long *p = filp->private_data;
Steven Rostedta3583242008-11-11 15:01:42 -05003799 char buf[64];
3800 int r;
3801
Steven Rostedt033601a2008-11-21 12:41:55 -05003802 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
3803 r = sprintf(buf, "permanently disabled\n");
3804 else
3805 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
Steven Rostedta3583242008-11-11 15:01:42 -05003806
3807 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3808}
3809
3810static ssize_t
3811rb_simple_write(struct file *filp, const char __user *ubuf,
3812 size_t cnt, loff_t *ppos)
3813{
Hannes Eder5e398412009-02-10 19:44:34 +01003814 unsigned long *p = filp->private_data;
Steven Rostedta3583242008-11-11 15:01:42 -05003815 char buf[64];
Hannes Eder5e398412009-02-10 19:44:34 +01003816 unsigned long val;
Steven Rostedta3583242008-11-11 15:01:42 -05003817 int ret;
3818
3819 if (cnt >= sizeof(buf))
3820 return -EINVAL;
3821
3822 if (copy_from_user(&buf, ubuf, cnt))
3823 return -EFAULT;
3824
3825 buf[cnt] = 0;
3826
3827 ret = strict_strtoul(buf, 10, &val);
3828 if (ret < 0)
3829 return ret;
3830
Steven Rostedt033601a2008-11-21 12:41:55 -05003831 if (val)
3832 set_bit(RB_BUFFERS_ON_BIT, p);
3833 else
3834 clear_bit(RB_BUFFERS_ON_BIT, p);
Steven Rostedta3583242008-11-11 15:01:42 -05003835
3836 (*ppos)++;
3837
3838 return cnt;
3839}
3840
Steven Rostedt5e2336a2009-03-05 21:44:55 -05003841static const struct file_operations rb_simple_fops = {
Steven Rostedta3583242008-11-11 15:01:42 -05003842 .open = tracing_open_generic,
3843 .read = rb_simple_read,
3844 .write = rb_simple_write,
3845};
3846
3847
3848static __init int rb_init_debugfs(void)
3849{
3850 struct dentry *d_tracer;
Steven Rostedta3583242008-11-11 15:01:42 -05003851
3852 d_tracer = tracing_init_dentry();
3853
Frederic Weisbecker5452af62009-03-27 00:25:38 +01003854 trace_create_file("tracing_on", 0644, d_tracer,
3855 &ring_buffer_flags, &rb_simple_fops);
Steven Rostedta3583242008-11-11 15:01:42 -05003856
3857 return 0;
3858}
3859
3860fs_initcall(rb_init_debugfs);
Paul Mundt1155de42009-06-25 14:30:12 +09003861#endif
Steven Rostedt554f7862009-03-11 22:00:13 -04003862
Steven Rostedt59222ef2009-03-12 11:46:03 -04003863#ifdef CONFIG_HOTPLUG_CPU
Frederic Weisbecker09c9e842009-03-21 04:33:36 +01003864static int rb_cpu_notify(struct notifier_block *self,
3865 unsigned long action, void *hcpu)
Steven Rostedt554f7862009-03-11 22:00:13 -04003866{
3867 struct ring_buffer *buffer =
3868 container_of(self, struct ring_buffer, cpu_notify);
3869 long cpu = (long)hcpu;
3870
3871 switch (action) {
3872 case CPU_UP_PREPARE:
3873 case CPU_UP_PREPARE_FROZEN:
Rusty Russell3f237a72009-06-12 21:15:30 +09303874 if (cpumask_test_cpu(cpu, buffer->cpumask))
Steven Rostedt554f7862009-03-11 22:00:13 -04003875 return NOTIFY_OK;
3876
3877 buffer->buffers[cpu] =
3878 rb_allocate_cpu_buffer(buffer, cpu);
3879 if (!buffer->buffers[cpu]) {
3880 WARN(1, "failed to allocate ring buffer on CPU %ld\n",
3881 cpu);
3882 return NOTIFY_OK;
3883 }
3884 smp_wmb();
Rusty Russell3f237a72009-06-12 21:15:30 +09303885 cpumask_set_cpu(cpu, buffer->cpumask);
Steven Rostedt554f7862009-03-11 22:00:13 -04003886 break;
3887 case CPU_DOWN_PREPARE:
3888 case CPU_DOWN_PREPARE_FROZEN:
3889 /*
3890 * Do nothing.
3891 * If we were to free the buffer, then the user would
3892 * lose any trace that was in the buffer.
3893 */
3894 break;
3895 default:
3896 break;
3897 }
3898 return NOTIFY_OK;
3899}
3900#endif