blob: 59a4b4172b68f23e9a375ff5bd519b5602437899 [file] [log] [blame]
Thomas Gleixnerd2912cb2019-06-04 10:11:33 +02001// SPDX-License-Identifier: GPL-2.0-only
Geoff Levandd28f6df2016-06-23 17:54:48 +00002/*
3 * kexec for arm64
4 *
5 * Copyright (C) Linaro.
6 * Copyright (C) Huawei Futurewei Technologies.
Geoff Levandd28f6df2016-06-23 17:54:48 +00007 */
8
AKASHI Takahiro78fd5842017-04-03 11:24:36 +09009#include <linux/interrupt.h>
10#include <linux/irq.h>
11#include <linux/kernel.h>
Geoff Levandd28f6df2016-06-23 17:54:48 +000012#include <linux/kexec.h>
AKASHI Takahiro254a41c2017-04-03 11:24:35 +090013#include <linux/page-flags.h>
Mike Rapoport6d47c232021-07-07 18:07:59 -070014#include <linux/set_memory.h>
Geoff Levandd28f6df2016-06-23 17:54:48 +000015#include <linux/smp.h>
16
17#include <asm/cacheflush.h>
18#include <asm/cpu_ops.h>
James Morse0fbeb312017-11-02 12:12:34 +000019#include <asm/daifflags.h>
AKASHI Takahiro20a16622017-04-03 11:24:37 +090020#include <asm/memory.h>
Takahiro Akashi98d2e152017-04-03 11:24:34 +090021#include <asm/mmu.h>
Geoff Levandd28f6df2016-06-23 17:54:48 +000022#include <asm/mmu_context.h>
Takahiro Akashi98d2e152017-04-03 11:24:34 +090023#include <asm/page.h>
Pasha Tatashin08eae0e2021-09-30 14:31:06 +000024#include <asm/trans_pgd.h>
Geoff Levandd28f6df2016-06-23 17:54:48 +000025
26#include "cpu-reset.h"
27
28/* Global variables for the arm64_relocate_new_kernel routine. */
29extern const unsigned char arm64_relocate_new_kernel[];
30extern const unsigned long arm64_relocate_new_kernel_size;
31
Geoff Levand221f2c72016-06-23 17:54:48 +000032/**
33 * kexec_image_info - For debugging output.
34 */
35#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
36static void _kexec_image_info(const char *func, int line,
37 const struct kimage *kimage)
38{
39 unsigned long i;
40
41 pr_debug("%s:%d:\n", func, line);
42 pr_debug(" kexec kimage info:\n");
43 pr_debug(" type: %d\n", kimage->type);
44 pr_debug(" start: %lx\n", kimage->start);
45 pr_debug(" head: %lx\n", kimage->head);
46 pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
Pasha Tatashin08eae0e2021-09-30 14:31:06 +000047 pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem);
Pavel Tatashin4c3c3122021-01-25 14:19:14 -050048 pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
Pasha Tatashin08eae0e2021-09-30 14:31:06 +000049 pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors);
Geoff Levand221f2c72016-06-23 17:54:48 +000050
51 for (i = 0; i < kimage->nr_segments; i++) {
52 pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
53 i,
54 kimage->segment[i].mem,
55 kimage->segment[i].mem + kimage->segment[i].memsz,
56 kimage->segment[i].memsz,
57 kimage->segment[i].memsz / PAGE_SIZE);
58 }
59}
60
Geoff Levandd28f6df2016-06-23 17:54:48 +000061void machine_kexec_cleanup(struct kimage *kimage)
62{
63 /* Empty routine needed to avoid build errors. */
64}
65
66/**
67 * machine_kexec_prepare - Prepare for a kexec reboot.
68 *
69 * Called from the core kexec code when a kernel image is loaded.
70 * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
71 * are stuck in the kernel. This avoids a panic once we hit machine_kexec().
72 */
73int machine_kexec_prepare(struct kimage *kimage)
74{
Geoff Levandd28f6df2016-06-23 17:54:48 +000075 if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
76 pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
77 return -EBUSY;
78 }
79
80 return 0;
81}
82
83/**
84 * kexec_list_flush - Helper to flush the kimage list and source pages to PoC.
85 */
86static void kexec_list_flush(struct kimage *kimage)
87{
88 kimage_entry_t *entry;
89
Pasha Tatashin878fdbd2021-09-30 14:31:05 +000090 dcache_clean_inval_poc((unsigned long)kimage,
91 (unsigned long)kimage + sizeof(*kimage));
92
Geoff Levandd28f6df2016-06-23 17:54:48 +000093 for (entry = &kimage->head; ; entry++) {
94 unsigned int flag;
Fuad Tabba814b1862021-05-24 09:29:55 +010095 unsigned long addr;
Geoff Levandd28f6df2016-06-23 17:54:48 +000096
97 /* flush the list entries. */
Fuad Tabbafade9c22021-05-24 09:30:01 +010098 dcache_clean_inval_poc((unsigned long)entry,
Fuad Tabba814b1862021-05-24 09:29:55 +010099 (unsigned long)entry +
100 sizeof(kimage_entry_t));
Geoff Levandd28f6df2016-06-23 17:54:48 +0000101
102 flag = *entry & IND_FLAGS;
103 if (flag == IND_DONE)
104 break;
105
Fuad Tabba814b1862021-05-24 09:29:55 +0100106 addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK);
Geoff Levandd28f6df2016-06-23 17:54:48 +0000107
108 switch (flag) {
109 case IND_INDIRECTION:
110 /* Set entry point just before the new list page. */
111 entry = (kimage_entry_t *)addr - 1;
112 break;
113 case IND_SOURCE:
114 /* flush the source pages. */
Fuad Tabbafade9c22021-05-24 09:30:01 +0100115 dcache_clean_inval_poc(addr, addr + PAGE_SIZE);
Geoff Levandd28f6df2016-06-23 17:54:48 +0000116 break;
117 case IND_DESTINATION:
118 break;
119 default:
120 BUG();
121 }
122 }
123}
124
125/**
126 * kexec_segment_flush - Helper to flush the kimage segments to PoC.
127 */
128static void kexec_segment_flush(const struct kimage *kimage)
129{
130 unsigned long i;
131
132 pr_debug("%s:\n", __func__);
133
134 for (i = 0; i < kimage->nr_segments; i++) {
135 pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
136 i,
137 kimage->segment[i].mem,
138 kimage->segment[i].mem + kimage->segment[i].memsz,
139 kimage->segment[i].memsz,
140 kimage->segment[i].memsz / PAGE_SIZE);
141
Fuad Tabbafade9c22021-05-24 09:30:01 +0100142 dcache_clean_inval_poc(
Fuad Tabba814b1862021-05-24 09:29:55 +0100143 (unsigned long)phys_to_virt(kimage->segment[i].mem),
144 (unsigned long)phys_to_virt(kimage->segment[i].mem) +
145 kimage->segment[i].memsz);
Geoff Levandd28f6df2016-06-23 17:54:48 +0000146 }
147}
148
Pasha Tatashin08eae0e2021-09-30 14:31:06 +0000149/* Allocates pages for kexec page table */
150static void *kexec_page_alloc(void *arg)
151{
152 struct kimage *kimage = (struct kimage *)arg;
153 struct page *page = kimage_alloc_control_pages(kimage, 0);
154
155 if (!page)
156 return NULL;
157
158 memset(page_address(page), 0, PAGE_SIZE);
159
160 return page_address(page);
161}
162
Pasha Tatashin0d8732e2021-09-30 14:31:02 +0000163int machine_kexec_post_load(struct kimage *kimage)
164{
165 void *reloc_code = page_to_virt(kimage->control_code_page);
Pasha Tatashin08eae0e2021-09-30 14:31:06 +0000166 struct trans_pgd_info info = {
167 .trans_alloc_page = kexec_page_alloc,
168 .trans_alloc_arg = kimage,
169 };
Pasha Tatashin0d8732e2021-09-30 14:31:02 +0000170
Pasha Tatashin5bb68342021-09-30 14:31:03 +0000171 /* If in place, relocation is not used, only flush next kernel */
172 if (kimage->head & IND_DONE) {
Pasha Tatashin0d8732e2021-09-30 14:31:02 +0000173 kexec_segment_flush(kimage);
Pasha Tatashin5bb68342021-09-30 14:31:03 +0000174 kexec_image_info(kimage);
175 return 0;
176 }
Pasha Tatashin0d8732e2021-09-30 14:31:02 +0000177
Pasha Tatashin08eae0e2021-09-30 14:31:06 +0000178 kimage->arch.el2_vectors = 0;
179 if (is_hyp_nvhe()) {
180 int rc = trans_pgd_copy_el2_vectors(&info,
181 &kimage->arch.el2_vectors);
182 if (rc)
183 return rc;
184 }
185
Pasha Tatashin0d8732e2021-09-30 14:31:02 +0000186 memcpy(reloc_code, arm64_relocate_new_kernel,
187 arm64_relocate_new_kernel_size);
188 kimage->arch.kern_reloc = __pa(reloc_code);
Pasha Tatashin0d8732e2021-09-30 14:31:02 +0000189
190 /* Flush the reloc_code in preparation for its execution. */
191 dcache_clean_inval_poc((unsigned long)reloc_code,
192 (unsigned long)reloc_code +
193 arm64_relocate_new_kernel_size);
194 icache_inval_pou((uintptr_t)reloc_code,
195 (uintptr_t)reloc_code +
196 arm64_relocate_new_kernel_size);
Pasha Tatashin5bb68342021-09-30 14:31:03 +0000197 kexec_list_flush(kimage);
198 kexec_image_info(kimage);
Pasha Tatashin0d8732e2021-09-30 14:31:02 +0000199
200 return 0;
201}
202
Geoff Levandd28f6df2016-06-23 17:54:48 +0000203/**
204 * machine_kexec - Do the kexec reboot.
205 *
206 * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
207 */
208void machine_kexec(struct kimage *kimage)
209{
AKASHI Takahiro78fd5842017-04-03 11:24:36 +0900210 bool in_kexec_crash = (kimage == kexec_crash_image);
211 bool stuck_cpus = cpus_are_stuck_in_kernel();
Geoff Levandd28f6df2016-06-23 17:54:48 +0000212
213 /*
214 * New cpus may have become stuck_in_kernel after we loaded the image.
215 */
AKASHI Takahiro78fd5842017-04-03 11:24:36 +0900216 BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
217 WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
218 "Some CPUs may be stale, kdump will be unreliable.\n");
Geoff Levandd28f6df2016-06-23 17:54:48 +0000219
Geoff Levandd28f6df2016-06-23 17:54:48 +0000220 pr_info("Bye!\n");
221
James Morse0fbeb312017-11-02 12:12:34 +0000222 local_daif_mask();
Geoff Levandd28f6df2016-06-23 17:54:48 +0000223
224 /*
Pasha Tatashin5bb68342021-09-30 14:31:03 +0000225 * Both restart and cpu_soft_restart will shutdown the MMU, disable data
226 * caches. However, restart will start new kernel or purgatory directly,
227 * cpu_soft_restart will transfer control to arm64_relocate_new_kernel
AKASHI Takahiro4c9e7e62018-11-15 14:52:52 +0900228 * In kexec case, kimage->start points to purgatory assuming that
229 * kernel entry and dtb address are embedded in purgatory by
230 * userspace (kexec-tools).
231 * In kexec_file case, the kernel starts directly without purgatory.
Geoff Levandd28f6df2016-06-23 17:54:48 +0000232 */
Pasha Tatashin5bb68342021-09-30 14:31:03 +0000233 if (kimage->head & IND_DONE) {
234 typeof(__cpu_soft_restart) *restart;
235
236 cpu_install_idmap();
237 restart = (void *)__pa_symbol(function_nocfi(__cpu_soft_restart));
238 restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem,
239 0, 0);
240 } else {
Pasha Tatashin08eae0e2021-09-30 14:31:06 +0000241 if (is_hyp_nvhe())
242 __hyp_set_vectors(kimage->arch.el2_vectors);
Pasha Tatashin878fdbd2021-09-30 14:31:05 +0000243 cpu_soft_restart(kimage->arch.kern_reloc, virt_to_phys(kimage),
244 0, 0);
Pasha Tatashin5bb68342021-09-30 14:31:03 +0000245 }
Geoff Levandd28f6df2016-06-23 17:54:48 +0000246
247 BUG(); /* Should never get here. */
248}
249
AKASHI Takahiro78fd5842017-04-03 11:24:36 +0900250static void machine_kexec_mask_interrupts(void)
251{
252 unsigned int i;
253 struct irq_desc *desc;
254
255 for_each_irq_desc(i, desc) {
256 struct irq_chip *chip;
257 int ret;
258
259 chip = irq_desc_get_chip(desc);
260 if (!chip)
261 continue;
262
263 /*
264 * First try to remove the active state. If this
265 * fails, try to EOI the interrupt.
266 */
267 ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
268
269 if (ret && irqd_irq_inprogress(&desc->irq_data) &&
270 chip->irq_eoi)
271 chip->irq_eoi(&desc->irq_data);
272
273 if (chip->irq_mask)
274 chip->irq_mask(&desc->irq_data);
275
276 if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
277 chip->irq_disable(&desc->irq_data);
278 }
279}
280
281/**
282 * machine_crash_shutdown - shutdown non-crashing cpus and save registers
283 */
Geoff Levandd28f6df2016-06-23 17:54:48 +0000284void machine_crash_shutdown(struct pt_regs *regs)
285{
AKASHI Takahiro78fd5842017-04-03 11:24:36 +0900286 local_irq_disable();
287
288 /* shutdown non-crashing cpus */
Hoeun Ryua88ce632017-08-17 11:24:27 +0900289 crash_smp_send_stop();
AKASHI Takahiro78fd5842017-04-03 11:24:36 +0900290
291 /* for crashing cpu */
292 crash_save_cpu(regs, smp_processor_id());
293 machine_kexec_mask_interrupts();
294
295 pr_info("Starting crashdump kernel...\n");
Geoff Levandd28f6df2016-06-23 17:54:48 +0000296}
Takahiro Akashi98d2e152017-04-03 11:24:34 +0900297
298void arch_kexec_protect_crashkres(void)
299{
300 int i;
301
Takahiro Akashi98d2e152017-04-03 11:24:34 +0900302 for (i = 0; i < kexec_crash_image->nr_segments; i++)
303 set_memory_valid(
304 __phys_to_virt(kexec_crash_image->segment[i].mem),
305 kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
306}
307
308void arch_kexec_unprotect_crashkres(void)
309{
310 int i;
311
312 for (i = 0; i < kexec_crash_image->nr_segments; i++)
313 set_memory_valid(
314 __phys_to_virt(kexec_crash_image->segment[i].mem),
315 kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
316}
AKASHI Takahiro254a41c2017-04-03 11:24:35 +0900317
318#ifdef CONFIG_HIBERNATION
319/*
320 * To preserve the crash dump kernel image, the relevant memory segments
321 * should be mapped again around the hibernation.
322 */
323void crash_prepare_suspend(void)
324{
325 if (kexec_crash_image)
326 arch_kexec_unprotect_crashkres();
327}
328
329void crash_post_resume(void)
330{
331 if (kexec_crash_image)
332 arch_kexec_protect_crashkres();
333}
334
335/*
336 * crash_is_nosave
337 *
338 * Return true only if a page is part of reserved memory for crash dump kernel,
339 * but does not hold any data of loaded kernel image.
340 *
341 * Note that all the pages in crash dump kernel memory have been initially
David Hildenbrandd9fa9d92019-03-05 15:47:28 -0800342 * marked as Reserved as memory was allocated via memblock_reserve().
AKASHI Takahiro254a41c2017-04-03 11:24:35 +0900343 *
344 * In hibernation, the pages which are Reserved and yet "nosave" are excluded
345 * from the hibernation iamge. crash_is_nosave() does thich check for crash
346 * dump kernel and will reduce the total size of hibernation image.
347 */
348
349bool crash_is_nosave(unsigned long pfn)
350{
351 int i;
352 phys_addr_t addr;
353
354 if (!crashk_res.end)
355 return false;
356
357 /* in reserved memory? */
358 addr = __pfn_to_phys(pfn);
359 if ((addr < crashk_res.start) || (crashk_res.end < addr))
360 return false;
361
362 if (!kexec_crash_image)
363 return true;
364
365 /* not part of loaded kernel image? */
366 for (i = 0; i < kexec_crash_image->nr_segments; i++)
367 if (addr >= kexec_crash_image->segment[i].mem &&
368 addr < (kexec_crash_image->segment[i].mem +
369 kexec_crash_image->segment[i].memsz))
370 return false;
371
372 return true;
373}
374
375void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
376{
377 unsigned long addr;
378 struct page *page;
379
380 for (addr = begin; addr < end; addr += PAGE_SIZE) {
381 page = phys_to_page(addr);
AKASHI Takahiro254a41c2017-04-03 11:24:35 +0900382 free_reserved_page(page);
383 }
384}
385#endif /* CONFIG_HIBERNATION */