printk/nmi: Prevent deadlock when accessing the main log buffer in NMI
The commit 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI
when logbuf_lock is available") brought back the possible deadlocks
in printk() and NMI.
The check of logbuf_lock is done only in printk_nmi_enter() to prevent
mixed output. But another CPU might take the lock later, enter NMI, and:
+ Both NMIs might be serialized by yet another lock, for example,
the one in nmi_cpu_backtrace().
+ The other CPU might get stopped in NMI, see smp_send_stop()
in panic().
The only safe solution is to use trylock when storing the message
into the main log-buffer. It might cause reordering when some lines
go to the main lock buffer directly and others are delayed via
the per-CPU buffer. It means that it is not useful in general.
This patch replaces the problematic NMI deferred context with NMI
direct context. It can be used to mark a code that might produce
many messages in NMI and the risk of losing them is more critical
than problems with eventual reordering.
The context is then used when dumping trace buffers on oops. It was
the primary motivation for the original fix. Also the reordering is
even smaller issue there because some traces have their own time stamps.
Finally, nmi_cpu_backtrace() need not longer be serialized because
it will always us the per-CPU buffers again.
Fixes: 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI when logbuf_lock is available")
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20180627142028.11259-1-pmladek@suse.com
To: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 6d7e800..3ede9f46 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -148,9 +148,13 @@ void early_printk(const char *s, ...) { }
#ifdef CONFIG_PRINTK_NMI
extern void printk_nmi_enter(void);
extern void printk_nmi_exit(void);
+extern void printk_nmi_direct_enter(void);
+extern void printk_nmi_direct_exit(void);
#else
static inline void printk_nmi_enter(void) { }
static inline void printk_nmi_exit(void) { }
+static inline void printk_nmi_direct_enter(void) { }
+static inline void printk_nmi_direct_exit(void) { }
#endif /* PRINTK_NMI */
#ifdef CONFIG_PRINTK
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 2a7d040..0f18988 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -19,11 +19,16 @@
#ifdef CONFIG_PRINTK
#define PRINTK_SAFE_CONTEXT_MASK 0x3fffffff
-#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000
+#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x40000000
#define PRINTK_NMI_CONTEXT_MASK 0x80000000
extern raw_spinlock_t logbuf_lock;
+__printf(5, 0)
+int vprintk_store(int facility, int level,
+ const char *dict, size_t dictlen,
+ const char *fmt, va_list args);
+
__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
__printf(1, 0) int vprintk_func(const char *fmt, va_list args);
@@ -54,6 +59,8 @@ void __printk_safe_exit(void);
local_irq_enable(); \
} while (0)
+void defer_console_output(void);
+
#else
__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index d7d0913..a0a74c5 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -308,24 +308,33 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
void printk_nmi_enter(void)
{
- /*
- * The size of the extra per-CPU buffer is limited. Use it only when
- * the main one is locked. If this CPU is not in the safe context,
- * the lock must be taken on another CPU and we could wait for it.
- */
- if ((this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) &&
- raw_spin_is_locked(&logbuf_lock)) {
- this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
- } else {
- this_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK);
- }
+ this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
}
void printk_nmi_exit(void)
{
- this_cpu_and(printk_context,
- ~(PRINTK_NMI_CONTEXT_MASK |
- PRINTK_NMI_DEFERRED_CONTEXT_MASK));
+ this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
+}
+
+/*
+ * Marks a code that might produce many messages in NMI context
+ * and the risk of losing them is more critical than eventual
+ * reordering.
+ *
+ * It has effect only when called in NMI context. Then printk()
+ * will try to store the messages into the main logbuf directly
+ * and use the per-CPU buffers only as a fallback when the lock
+ * is not available.
+ */
+void printk_nmi_direct_enter(void)
+{
+ if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
+ this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK);
+}
+
+void printk_nmi_direct_exit(void)
+{
+ this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK);
}
#else
@@ -363,6 +372,20 @@ void __printk_safe_exit(void)
__printf(1, 0) int vprintk_func(const char *fmt, va_list args)
{
+ /*
+ * Try to use the main logbuf even in NMI. But avoid calling console
+ * drivers that might have their own locks.
+ */
+ if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
+ raw_spin_trylock(&logbuf_lock)) {
+ int len;
+
+ len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
+ raw_spin_unlock(&logbuf_lock);
+ defer_console_output();
+ return len;
+ }
+
/* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
return vprintk_nmi(fmt, args);
@@ -371,13 +394,6 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)
return vprintk_safe(fmt, args);
- /*
- * Use the main logbuf when logbuf_lock is available in NMI.
- * But avoid calling console drivers that might have their own locks.
- */
- if (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK)
- return vprintk_deferred(fmt, args);
-
/* No obstacles. */
return vprintk_default(fmt, args);
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bcd9303..f106ad1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8265,6 +8265,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
tracing_off();
local_irq_save(flags);
+ printk_nmi_direct_enter();
/* Simulate the iterator */
trace_init_global_iter(&iter);
@@ -8344,7 +8345,8 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
for_each_tracing_cpu(cpu) {
atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
}
- atomic_dec(&dump_running);
+ atomic_dec(&dump_running);
+ printk_nmi_direct_exit();
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(ftrace_dump);
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 61a6b5a..15ca78e 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -87,11 +87,9 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
bool nmi_cpu_backtrace(struct pt_regs *regs)
{
- static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
int cpu = smp_processor_id();
if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
- arch_spin_lock(&lock);
if (regs && cpu_in_idle(instruction_pointer(regs))) {
pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n",
cpu, (void *)instruction_pointer(regs));
@@ -102,7 +100,6 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
else
dump_stack();
}
- arch_spin_unlock(&lock);
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
return true;
}