tracing: Use percpu stack trace buffer more intelligently The per cpu stack trace buffer usage pattern is odd at best. The buffer has place for 512 stack trace entries on 64-bit and 1024 on 32-bit. When interrupts or exceptions nest after the per cpu buffer was acquired the stacktrace length is hardcoded to 8 entries. 512/1024 stack trace entries in kernel stacks are unrealistic so the buffer is a complete waste. Split the buffer into 4 nest levels, which are 128/256 entries per level. This allows nesting contexts (interrupts, exceptions) to utilize the cpu buffer for stack retrieval and avoids the fixed length allocation along with the conditional execution pathes. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Alexander Potapenko <glider@google.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: linux-mm@kvack.org Cc: David Rientjes <rientjes@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: kasan-dev@googlegroups.com Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Akinobu Mita <akinobu.mita@gmail.com> Cc: Christoph Hellwig <hch@lst.de> Cc: iommu@lists.linux-foundation.org Cc: Robin Murphy <robin.murphy@arm.com> Cc: Marek Szyprowski <m.szyprowski@samsung.com> Cc: Johannes Thumshirn <jthumshirn@suse.de> Cc: David Sterba <dsterba@suse.com> Cc: Chris Mason <clm@fb.com> Cc: Josef Bacik <josef@toxicpanda.com> Cc: linux-btrfs@vger.kernel.org Cc: dm-devel@redhat.com Cc: Mike Snitzer <snitzer@redhat.com> Cc: Alasdair Kergon <agk@redhat.com> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: intel-gfx@lists.freedesktop.org Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: dri-devel@lists.freedesktop.org Cc: David Airlie <airlied@linux.ie> Cc: Jani Nikula <jani.nikula@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Tom Zanussi <tom.zanussi@linux.intel.com> Cc: Miroslav Benes <mbenes@suse.cz> Cc: linux-arch@vger.kernel.org Link: https://lkml.kernel.org/r/20190425094803.066064076@linutronix.de

commit: 2a820bf74918d61ea54f7c1001f4a6a2e457577c [log] [tgz]
author: Thomas Gleixner <tglx@linutronix.de> Thu Apr 25 11:45:14 2019 +0200
committer: Thomas Gleixner <tglx@linutronix.de> Mon Apr 29 12:37:55 2019 +0200
tree: 02b12357fccdcda29ea5b4fb9adf70886a3b4cc0
parent: e7d916632b528e8cccc8e9ccca81acfc591a5fde [diff] [blame]
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 21153e6..4fc9300 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c

@@ -2749,12 +2749,21 @@ trace_function(struct trace_array *tr,
 
 #ifdef CONFIG_STACKTRACE
 
-#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
+/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
+#define FTRACE_KSTACK_NESTING	4
+
+#define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
+
 struct ftrace_stack {
-	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
+	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
 };
 
-static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
+
+struct ftrace_stacks {
+	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
+};
+
+static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
 
 static void __ftrace_trace_stack(struct ring_buffer *buffer,
@@ -2763,10 +2772,11 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 {
 	struct trace_event_call *call = &event_kernel_stack;
 	struct ring_buffer_event *event;
+	struct ftrace_stack *fstack;
 	struct stack_entry *entry;
 	struct stack_trace trace;
-	int use_stack;
-	int size = FTRACE_STACK_ENTRIES;
+	int size = FTRACE_KSTACK_ENTRIES;
+	int stackidx;
 
 	trace.nr_entries	= 0;
 	trace.skip		= skip;
@@ -2788,29 +2798,32 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 	 */
 	preempt_disable_notrace();
 
-	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
+	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
+
+	/* This should never happen. If it does, yell once and skip */
+	if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
+		goto out;
+
 	/*
-	 * We don't need any atomic variables, just a barrier.
-	 * If an interrupt comes in, we don't care, because it would
-	 * have exited and put the counter back to what we want.
-	 * We just need a barrier to keep gcc from moving things
-	 * around.
+	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
+	 * interrupt will either see the value pre increment or post
+	 * increment. If the interrupt happens pre increment it will have
+	 * restored the counter when it returns.  We just need a barrier to
+	 * keep gcc from moving things around.
 	 */
 	barrier();
-	if (use_stack == 1) {
-		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
-		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
 
-		if (regs)
-			save_stack_trace_regs(regs, &trace);
-		else
-			save_stack_trace(&trace);
+	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
+	trace.entries		= fstack->calls;
+	trace.max_entries	= FTRACE_KSTACK_ENTRIES;
 
-		if (trace.nr_entries > size)
-			size = trace.nr_entries;
-	} else
-		/* From now on, use_stack is a boolean */
-		use_stack = 0;
+	if (regs)
+		save_stack_trace_regs(regs, &trace);
+	else
+		save_stack_trace(&trace);
+
+	if (trace.nr_entries > size)
+		size = trace.nr_entries;
 
 	size *= sizeof(unsigned long);
 
@@ -2820,19 +2833,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 		goto out;
 	entry = ring_buffer_event_data(event);
 
-	memset(&entry->caller, 0, size);
-
-	if (use_stack)
-		memcpy(&entry->caller, trace.entries,
-		       trace.nr_entries * sizeof(unsigned long));
-	else {
-		trace.max_entries	= FTRACE_STACK_ENTRIES;
-		trace.entries		= entry->caller;
-		if (regs)
-			save_stack_trace_regs(regs, &trace);
-		else
-			save_stack_trace(&trace);
-	}
+	memcpy(&entry->caller, trace.entries, size);
 
 	entry->size = trace.nr_entries;
commit	2a820bf74918d61ea54f7c1001f4a6a2e457577c	[log] [tgz]
author	Thomas Gleixner <tglx@linutronix.de>	Thu Apr 25 11:45:14 2019 +0200
committer	Thomas Gleixner <tglx@linutronix.de>	Mon Apr 29 12:37:55 2019 +0200
tree	02b12357fccdcda29ea5b4fb9adf70886a3b4cc0
parent	e7d916632b528e8cccc8e9ccca81acfc591a5fde [diff] [blame]