blob: d488311efab1f18e5819c6b7454afdf987ba9b0b [file] [log] [blame]
Thomas Gleixner40b0b3f2019-06-03 07:44:46 +02001// SPDX-License-Identifier: GPL-2.0-only
Michael Ellermancc532912005-12-04 18:39:43 +11002/*
3 * Architecture specific (PPC64) functions for kexec based crash dumps.
4 *
5 * Copyright (C) 2005, IBM Corp.
6 *
7 * Created by: Haren Myneni
Michael Ellermancc532912005-12-04 18:39:43 +11008 */
9
Michael Ellermancc532912005-12-04 18:39:43 +110010#include <linux/kernel.h>
11#include <linux/smp.h>
12#include <linux/reboot.h>
13#include <linux/kexec.h>
Paul Gortmaker66b15db2011-05-27 10:46:24 -040014#include <linux/export.h>
Michael Ellermancc532912005-12-04 18:39:43 +110015#include <linux/crash_dump.h>
Michael Ellermancc532912005-12-04 18:39:43 +110016#include <linux/delay.h>
Michael Ellermand6c1a902006-04-04 13:43:01 +020017#include <linux/irq.h>
Michael Ellermancc532912005-12-04 18:39:43 +110018#include <linux/types.h>
19
20#include <asm/processor.h>
21#include <asm/machdep.h>
David Wilderc0ce7d02006-06-23 15:29:34 -070022#include <asm/kexec.h>
David S. Millerd9b2b2a2008-02-13 16:56:49 -080023#include <asm/prom.h>
Haren Mynenif6cc82f2006-01-10 19:25:25 -080024#include <asm/smp.h>
Michael Neuling496b0102008-01-18 15:50:30 +110025#include <asm/setjmp.h>
David Howellsae3a1972012-03-28 18:30:02 +010026#include <asm/debug.h>
Michael Ellermancc532912005-12-04 18:39:43 +110027
Anton Blanchard549e88a2011-11-30 00:23:16 +000028/*
29 * The primary CPU waits a while for all secondary CPUs to enter. This is to
30 * avoid sending an IPI if the secondary CPUs are entering
31 * crash_kexec_secondary on their own (eg via a system reset).
32 *
33 * The secondary timeout has to be longer than the primary. Both timeouts are
34 * in milliseconds.
35 */
36#define PRIMARY_TIMEOUT 500
37#define SECONDARY_TIMEOUT 1000
38
39#define IPI_TIMEOUT 10000
40#define REAL_MODE_TIMEOUT 10000
41
Anton Blanchard2440c012011-11-30 00:23:17 +000042static int time_to_dump;
Balbir Singh4145f352017-12-15 19:14:55 +110043/*
44 * crash_wake_offline should be set to 1 by platforms that intend to wake
45 * up offline cpus prior to jumping to a kdump kernel. Currently powernv
46 * sets it to 1, since we want to avoid things from happening when an
47 * offline CPU wakes up due to something like an HMI (malfunction error),
48 * which propagates to all threads.
49 */
50int crash_wake_offline;
Michael Ellermancc532912005-12-04 18:39:43 +110051
Anton Blanchard158d5b5e2011-01-21 13:43:59 +110052#define CRASH_HANDLER_MAX 3
Suraj Jitindar Singh1d145162016-05-11 10:57:32 +100053/* List of shutdown handles */
54static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX];
Michael Neuling496b0102008-01-18 15:50:30 +110055static DEFINE_SPINLOCK(crash_handlers_lock);
56
Anton Blanchard07fe0c62011-11-30 00:23:11 +000057static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
58static int crash_shutdown_cpu = -1;
59
60static int handle_fault(struct pt_regs *regs)
61{
62 if (crash_shutdown_cpu == smp_processor_id())
63 longjmp(crash_shutdown_buf, 1);
64 return 0;
65}
66
Michael Ellermancc532912005-12-04 18:39:43 +110067#ifdef CONFIG_SMP
Michael Ellermancc532912005-12-04 18:39:43 +110068
Christian Kujau897e01a2012-01-17 19:13:05 +000069static atomic_t cpus_in_crash;
Balbir Singh4145f352017-12-15 19:14:55 +110070void crash_ipi_callback(struct pt_regs *regs)
Michael Ellermancc532912005-12-04 18:39:43 +110071{
Anton Blanchard2440c012011-11-30 00:23:17 +000072 static cpumask_t cpus_state_saved = CPU_MASK_NONE;
73
Michael Ellermancc532912005-12-04 18:39:43 +110074 int cpu = smp_processor_id();
75
Paul Mackerrasd04c56f2006-10-04 16:47:49 +100076 hard_irq_disable();
Anton Blanchard2440c012011-11-30 00:23:17 +000077 if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
Magnus Damm85916f82006-12-06 20:40:41 -080078 crash_save_cpu(regs, cpu);
Anton Blanchard2440c012011-11-30 00:23:17 +000079 cpumask_set_cpu(cpu, &cpus_state_saved);
80 }
81
82 atomic_inc(&cpus_in_crash);
Peter Zijlstrac6450732014-03-13 19:00:35 +010083 smp_mb__after_atomic();
David Wilderc0ce7d02006-06-23 15:29:34 -070084
85 /*
David Wilderc0ce7d02006-06-23 15:29:34 -070086 * Starting the kdump boot.
87 * This barrier is needed to make sure that all CPUs are stopped.
David Wilderc0ce7d02006-06-23 15:29:34 -070088 */
Anton Blanchard2440c012011-11-30 00:23:17 +000089 while (!time_to_dump)
David Wilderc0ce7d02006-06-23 15:29:34 -070090 cpu_relax();
91
Michael Ellermancc532912005-12-04 18:39:43 +110092 if (ppc_md.kexec_cpu_down)
93 ppc_md.kexec_cpu_down(1, 1);
Michael Ellermanb6f35b42006-07-05 14:39:43 +100094
95#ifdef CONFIG_PPC64
Michael Ellermancc532912005-12-04 18:39:43 +110096 kexec_smp_wait();
Michael Ellermanb6f35b42006-07-05 14:39:43 +100097#else
98 for (;;); /* FIXME */
99#endif
100
Michael Ellermancc532912005-12-04 18:39:43 +1100101 /* NOTREACHED */
102}
103
David Wilderc0ce7d02006-06-23 15:29:34 -0700104static void crash_kexec_prepare_cpus(int cpu)
Michael Ellermancc532912005-12-04 18:39:43 +1100105{
106 unsigned int msecs;
David Wilderc0ce7d02006-06-23 15:29:34 -0700107 unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
Anton Blanchard07fe0c62011-11-30 00:23:11 +0000108 int tries = 0;
109 int (*old_handler)(struct pt_regs *regs);
Michael Ellermancc532912005-12-04 18:39:43 +1100110
Anton Blanchard9b00ac02011-11-30 00:23:10 +0000111 printk(KERN_EMERG "Sending IPI to other CPUs\n");
112
Balbir Singh4145f352017-12-15 19:14:55 +1100113 if (crash_wake_offline)
114 ncpus = num_present_cpus() - 1;
115
Michael Ellermancc532912005-12-04 18:39:43 +1100116 crash_send_ipi(crash_ipi_callback);
117 smp_wmb();
118
Anton Blanchard07fe0c62011-11-30 00:23:11 +0000119again:
Michael Ellermancc532912005-12-04 18:39:43 +1100120 /*
Anton Blanchard158d5b5e2011-01-21 13:43:59 +1100121 * FIXME: Until we will have the way to stop other CPUs reliably,
Michael Ellermancc532912005-12-04 18:39:43 +1100122 * the crash CPU will send an IPI and wait for other CPUs to
David Wilderc0ce7d02006-06-23 15:29:34 -0700123 * respond.
Michael Ellermancc532912005-12-04 18:39:43 +1100124 */
Anton Blanchard549e88a2011-11-30 00:23:16 +0000125 msecs = IPI_TIMEOUT;
Anton Blanchard2440c012011-11-30 00:23:17 +0000126 while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
Michael Ellermancc532912005-12-04 18:39:43 +1100127 mdelay(1);
Michael Ellermancc532912005-12-04 18:39:43 +1100128
129 /* Would it be better to replace the trap vector here? */
130
Anton Blanchard2440c012011-11-30 00:23:17 +0000131 if (atomic_read(&cpus_in_crash) >= ncpus) {
Anton Blanchard07fe0c62011-11-30 00:23:11 +0000132 printk(KERN_EMERG "IPI complete\n");
133 return;
David Wilderc0ce7d02006-06-23 15:29:34 -0700134 }
Anton Blanchard9b00ac02011-11-30 00:23:10 +0000135
Anton Blanchard07fe0c62011-11-30 00:23:11 +0000136 printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
Anton Blanchard2440c012011-11-30 00:23:17 +0000137 ncpus - atomic_read(&cpus_in_crash));
Anton Blanchard07fe0c62011-11-30 00:23:11 +0000138
139 /*
140 * If we have a panic timeout set then we can't wait indefinitely
141 * for someone to activate system reset. We also give up on the
142 * second time through if system reset fail to work.
143 */
144 if ((panic_timeout > 0) || (tries > 0))
145 return;
146
147 /*
148 * A system reset will cause all CPUs to take an 0x100 exception.
149 * The primary CPU returns here via setjmp, and the secondary
150 * CPUs reexecute the crash_kexec_secondary path.
151 */
152 old_handler = __debugger;
153 __debugger = handle_fault;
154 crash_shutdown_cpu = smp_processor_id();
155
156 if (setjmp(crash_shutdown_buf) == 0) {
157 printk(KERN_EMERG "Activate system reset (dumprestart) "
158 "to stop other cpu(s)\n");
159
160 /*
161 * A system reset will force all CPUs to execute the
162 * crash code again. We need to reset cpus_in_crash so we
163 * wait for everyone to do this.
164 */
Anton Blanchard2440c012011-11-30 00:23:17 +0000165 atomic_set(&cpus_in_crash, 0);
Anton Blanchard07fe0c62011-11-30 00:23:11 +0000166 smp_mb();
167
Anton Blanchard2440c012011-11-30 00:23:17 +0000168 while (atomic_read(&cpus_in_crash) < ncpus)
Anton Blanchard07fe0c62011-11-30 00:23:11 +0000169 cpu_relax();
170 }
171
172 crash_shutdown_cpu = -1;
173 __debugger = old_handler;
174
175 tries++;
176 goto again;
Michael Ellermancc532912005-12-04 18:39:43 +1100177}
David Wilderc0ce7d02006-06-23 15:29:34 -0700178
179/*
Anton Blanchard9b00ac02011-11-30 00:23:10 +0000180 * This function will be called by secondary cpus.
David Wilderc0ce7d02006-06-23 15:29:34 -0700181 */
182void crash_kexec_secondary(struct pt_regs *regs)
183{
David Wilderc0ce7d02006-06-23 15:29:34 -0700184 unsigned long flags;
Anton Blanchard549e88a2011-11-30 00:23:16 +0000185 int msecs = SECONDARY_TIMEOUT;
David Wilderc0ce7d02006-06-23 15:29:34 -0700186
187 local_irq_save(flags);
Anton Blanchard9b00ac02011-11-30 00:23:10 +0000188
Anton Blanchard549e88a2011-11-30 00:23:16 +0000189 /* Wait for the primary crash CPU to signal its progress */
David Wilderc0ce7d02006-06-23 15:29:34 -0700190 while (crashing_cpu < 0) {
191 if (--msecs < 0) {
Anton Blanchard9b00ac02011-11-30 00:23:10 +0000192 /* No response, kdump image may not have been loaded */
David Wilderc0ce7d02006-06-23 15:29:34 -0700193 local_irq_restore(flags);
194 return;
195 }
Anton Blanchard9b00ac02011-11-30 00:23:10 +0000196
David Wilderc0ce7d02006-06-23 15:29:34 -0700197 mdelay(1);
David Wilderc0ce7d02006-06-23 15:29:34 -0700198 }
Anton Blanchard9b00ac02011-11-30 00:23:10 +0000199
David Wilderc0ce7d02006-06-23 15:29:34 -0700200 crash_ipi_callback(regs);
201}
202
Paul Gortmaker7c7a81b2011-04-13 06:30:08 +0000203#else /* ! CONFIG_SMP */
Paul Gortmaker7c7a81b2011-04-13 06:30:08 +0000204
David Wilderc0ce7d02006-06-23 15:29:34 -0700205static void crash_kexec_prepare_cpus(int cpu)
Michael Ellermancc532912005-12-04 18:39:43 +1100206{
207 /*
Anton Blanchard8c274742011-11-30 00:23:12 +0000208 * move the secondaries to us so that we can copy
Michael Ellermancc532912005-12-04 18:39:43 +1100209 * the new kernel 0-0x100 safely
210 *
211 * do this if kexec in setup.c ?
212 */
Michael Ellermanb6f35b42006-07-05 14:39:43 +1000213#ifdef CONFIG_PPC64
Michael Ellermancc532912005-12-04 18:39:43 +1100214 smp_release_cpus();
Michael Ellermanb6f35b42006-07-05 14:39:43 +1000215#else
216 /* FIXME */
217#endif
Michael Ellermancc532912005-12-04 18:39:43 +1100218}
219
David Wilderc0ce7d02006-06-23 15:29:34 -0700220void crash_kexec_secondary(struct pt_regs *regs)
221{
David Wilderc0ce7d02006-06-23 15:29:34 -0700222}
Paul Gortmaker7c7a81b2011-04-13 06:30:08 +0000223#endif /* CONFIG_SMP */
Michael Ellermancc532912005-12-04 18:39:43 +1100224
Ben Hutchings7707e412011-04-24 15:04:31 +0000225/* wait for all the CPUs to hit real mode but timeout if they don't come in */
Scott Woodeeaab662015-10-06 22:48:16 -0500226#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
227static void __maybe_unused crash_kexec_wait_realmode(int cpu)
Ben Hutchings7707e412011-04-24 15:04:31 +0000228{
229 unsigned int msecs;
230 int i;
231
Anton Blanchard549e88a2011-11-30 00:23:16 +0000232 msecs = REAL_MODE_TIMEOUT;
Milton Millerbd9e5ee2011-05-10 19:28:41 +0000233 for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
Ben Hutchings7707e412011-04-24 15:04:31 +0000234 if (i == cpu)
235 continue;
236
Nicholas Piggind2e60072018-02-14 01:08:12 +1000237 while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
Ben Hutchings7707e412011-04-24 15:04:31 +0000238 barrier();
Michael Neuling63f21a52011-07-04 20:40:10 +0000239 if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
Ben Hutchings7707e412011-04-24 15:04:31 +0000240 break;
Ben Hutchings7707e412011-04-24 15:04:31 +0000241 msecs--;
242 mdelay(1);
243 }
244 }
245 mb();
246}
247#else
248static inline void crash_kexec_wait_realmode(int cpu) {}
Scott Woodeeaab662015-10-06 22:48:16 -0500249#endif /* CONFIG_SMP && CONFIG_PPC64 */
Ben Hutchings7707e412011-04-24 15:04:31 +0000250
Michael Neuling496b0102008-01-18 15:50:30 +1100251/*
252 * Register a function to be called on shutdown. Only use this if you
253 * can't reset your device in the second kernel.
254 */
255int crash_shutdown_register(crash_shutdown_t handler)
256{
257 unsigned int i, rc;
258
259 spin_lock(&crash_handlers_lock);
260 for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
261 if (!crash_shutdown_handles[i]) {
262 /* Insert handle at first empty entry */
263 crash_shutdown_handles[i] = handler;
264 rc = 0;
265 break;
266 }
267
268 if (i == CRASH_HANDLER_MAX) {
269 printk(KERN_ERR "Crash shutdown handles full, "
270 "not registered.\n");
271 rc = 1;
272 }
273
274 spin_unlock(&crash_handlers_lock);
275 return rc;
276}
277EXPORT_SYMBOL(crash_shutdown_register);
278
279int crash_shutdown_unregister(crash_shutdown_t handler)
280{
281 unsigned int i, rc;
282
283 spin_lock(&crash_handlers_lock);
284 for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
285 if (crash_shutdown_handles[i] == handler)
286 break;
287
288 if (i == CRASH_HANDLER_MAX) {
289 printk(KERN_ERR "Crash shutdown handle not found\n");
290 rc = 1;
291 } else {
292 /* Shift handles down */
Suraj Jitindar Singh1d145162016-05-11 10:57:32 +1000293 for (; i < (CRASH_HANDLER_MAX - 1); i++)
Michael Neuling496b0102008-01-18 15:50:30 +1100294 crash_shutdown_handles[i] =
295 crash_shutdown_handles[i+1];
Suraj Jitindar Singh1d145162016-05-11 10:57:32 +1000296 /*
297 * Reset last entry to NULL now that it has been shifted down,
298 * this will allow new handles to be added here.
299 */
300 crash_shutdown_handles[i] = NULL;
Michael Neuling496b0102008-01-18 15:50:30 +1100301 rc = 0;
302 }
303
304 spin_unlock(&crash_handlers_lock);
305 return rc;
306}
307EXPORT_SYMBOL(crash_shutdown_unregister);
308
Michael Ellermancc532912005-12-04 18:39:43 +1100309void default_machine_crash_shutdown(struct pt_regs *regs)
310{
Michael Neuling496b0102008-01-18 15:50:30 +1100311 unsigned int i;
312 int (*old_handler)(struct pt_regs *regs);
313
Michael Ellermancc532912005-12-04 18:39:43 +1100314 /*
315 * This function is only called after the system
Lee Revellf18190b2006-06-26 18:30:00 +0200316 * has panicked or is otherwise in a critical state.
Michael Ellermancc532912005-12-04 18:39:43 +1100317 * The minimum amount of code to allow a kexec'd kernel
318 * to run successfully needs to happen here.
319 *
320 * In practice this means stopping other cpus in
321 * an SMP system.
322 * The kernel is broken so disable interrupts.
323 */
Paul Mackerrasd04c56f2006-10-04 16:47:49 +1000324 hard_irq_disable();
Michael Ellermancc532912005-12-04 18:39:43 +1100325
Anton Blanchard249ec222010-08-02 20:39:41 +0000326 /*
327 * Make a note of crashing cpu. Will be used in machine_kexec
328 * such that another IPI will not be sent.
329 */
330 crashing_cpu = smp_processor_id();
Anton Blanchard549e88a2011-11-30 00:23:16 +0000331
332 /*
333 * If we came in via system reset, wait a while for the secondary
334 * CPUs to enter.
335 */
336 if (TRAP(regs) == 0x100)
337 mdelay(PRIMARY_TIMEOUT);
338
Anton Blanchard249ec222010-08-02 20:39:41 +0000339 crash_kexec_prepare_cpus(crashing_cpu);
Anton Blanchard2440c012011-11-30 00:23:17 +0000340
341 crash_save_cpu(regs, crashing_cpu);
342
343 time_to_dump = 1;
344
Anton Blanchard249ec222010-08-02 20:39:41 +0000345 crash_kexec_wait_realmode(crashing_cpu);
Anton Blanchard249ec222010-08-02 20:39:41 +0000346
Matthew McClintockc71635d2010-09-16 17:58:23 -0500347 machine_kexec_mask_interrupts();
Michael Ellermand6c1a902006-04-04 13:43:01 +0200348
Michael Ellermancc532912005-12-04 18:39:43 +1100349 /*
Anton Blanchard8c274742011-11-30 00:23:12 +0000350 * Call registered shutdown routines safely. Swap out
Michael Neuling496b0102008-01-18 15:50:30 +1100351 * __debugger_fault_handler, and replace on exit.
352 */
353 old_handler = __debugger_fault_handler;
354 __debugger_fault_handler = handle_fault;
Anton Blanchard06440792010-05-10 16:25:51 +0000355 crash_shutdown_cpu = smp_processor_id();
Suraj Jitindar Singha7d63922016-07-11 14:17:31 +1000356 for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) {
Michael Neuling496b0102008-01-18 15:50:30 +1100357 if (setjmp(crash_shutdown_buf) == 0) {
358 /*
359 * Insert syncs and delay to ensure
360 * instructions in the dangerous region don't
361 * leak away from this protected region.
362 */
363 asm volatile("sync; isync");
364 /* dangerous region */
365 crash_shutdown_handles[i]();
366 asm volatile("sync; isync");
367 }
368 }
Anton Blanchard06440792010-05-10 16:25:51 +0000369 crash_shutdown_cpu = -1;
Michael Neuling496b0102008-01-18 15:50:30 +1100370 __debugger_fault_handler = old_handler;
371
David Wilderc0ce7d02006-06-23 15:29:34 -0700372 if (ppc_md.kexec_cpu_down)
373 ppc_md.kexec_cpu_down(1, 0);
Michael Ellermancc532912005-12-04 18:39:43 +1100374}