Blame - kernel/kexec_core.c - SHIFTPHONES/mainline/linux

blob: 1c03dfb4abfd31fa2d15fbd74c1c3e11794e3920 [file] [log] [blame]

Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1	/*
				2	* kexec.c - kexec system call core code.
				3	* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
				4	*
				5	* This source code is licensed under the GNU General Public License,
				6	* Version 2. See the file COPYING for more details.
				7	*/
				8
Minfei Huang	de90a6b	2015-11-06 16:32:45 -0800	[diff] [blame]	9	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	10
				11	#include <linux/capability.h>
				12	#include <linux/mm.h>
				13	#include <linux/file.h>
				14	#include <linux/slab.h>
				15	#include <linux/fs.h>
				16	#include <linux/kexec.h>
				17	#include <linux/mutex.h>
				18	#include <linux/list.h>
				19	#include <linux/highmem.h>
				20	#include <linux/syscalls.h>
				21	#include <linux/reboot.h>
				22	#include <linux/ioport.h>
				23	#include <linux/hardirq.h>
				24	#include <linux/elf.h>
				25	#include <linux/elfcore.h>
				26	#include <linux/utsname.h>
				27	#include <linux/numa.h>
				28	#include <linux/suspend.h>
				29	#include <linux/device.h>
				30	#include <linux/freezer.h>
				31	#include <linux/pm.h>
				32	#include <linux/cpu.h>
				33	#include <linux/uaccess.h>
				34	#include <linux/io.h>
				35	#include <linux/console.h>
				36	#include <linux/vmalloc.h>
				37	#include <linux/swap.h>
				38	#include <linux/syscore_ops.h>
				39	#include <linux/compiler.h>
				40	#include <linux/hugetlb.h>
				41
				42	#include <asm/page.h>
				43	#include <asm/sections.h>
				44
				45	#include <crypto/hash.h>
				46	#include <crypto/sha.h>
				47	#include "kexec_internal.h"
				48
				49	DEFINE_MUTEX(kexec_mutex);
				50
				51	/* Per cpu memory for storing cpu states in case of system crash. */
				52	note_buf_t __percpu *crash_notes;
				53
				54	/* vmcoreinfo stuff */
				55	static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
				56	u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
				57	size_t vmcoreinfo_size;
				58	size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
				59
				60	/* Flag to indicate we are going to kexec a new kernel */
				61	bool kexec_in_progress = false;
				62
				63
				64	/* Location of the reserved area for the crash kernel */
				65	struct resource crashk_res = {
				66	.name = "Crash kernel",
				67	.start = 0,
				68	.end = 0,
Toshi Kani	1a085d0	2016-01-26 21:57:23 +0100	[diff] [blame]	69	.flags = IORESOURCE_BUSY \| IORESOURCE_SYSTEM_RAM,
				70	.desc = IORES_DESC_CRASH_KERNEL
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	71	};
				72	struct resource crashk_low_res = {
				73	.name = "Crash kernel",
				74	.start = 0,
				75	.end = 0,
Toshi Kani	1a085d0	2016-01-26 21:57:23 +0100	[diff] [blame]	76	.flags = IORESOURCE_BUSY \| IORESOURCE_SYSTEM_RAM,
				77	.desc = IORES_DESC_CRASH_KERNEL
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	78	};
				79
				80	int kexec_should_crash(struct task_struct *p)
				81	{
				82	/*
				83	* If crash_kexec_post_notifiers is enabled, don't run
				84	* crash_kexec() here yet, which must be run after panic
				85	* notifiers in panic().
				86	*/
				87	if (crash_kexec_post_notifiers)
				88	return 0;
				89	/*
				90	* There are 4 panic() calls in do_exit() path, each of which
				91	* corresponds to each of these 4 conditions.
				92	*/
				93	if (in_interrupt() \|\| !p->pid \|\| is_global_init(p) \|\| panic_on_oops)
				94	return 1;
				95	return 0;
				96	}
				97
				98	/*
				99	* When kexec transitions to the new kernel there is a one-to-one
				100	* mapping between physical and virtual addresses. On processors
				101	* where you can disable the MMU this is trivial, and easy. For
				102	* others it is still a simple predictable page table to setup.
				103	*
				104	* In that environment kexec copies the new kernel to its final
				105	* resting place. This means I can only support memory whose
				106	* physical address can fit in an unsigned long. In particular
				107	* addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
				108	* If the assembly stub has more restrictive requirements
				109	* KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
				110	* defined more restrictively in <asm/kexec.h>.
				111	*
				112	* The code for the transition from the current kernel to the
				113	* the new kernel is placed in the control_code_buffer, whose size
				114	* is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
				115	* page of memory is necessary, but some architectures require more.
				116	* Because this memory must be identity mapped in the transition from
				117	* virtual to physical addresses it must live in the range
				118	* 0 - TASK_SIZE, as only the user space mappings are arbitrarily
				119	* modifiable.
				120	*
				121	* The assembly stub in the control code buffer is passed a linked list
				122	* of descriptor pages detailing the source pages of the new kernel,
				123	* and the destination addresses of those source pages. As this data
				124	* structure is not used in the context of the current OS, it must
				125	* be self-contained.
				126	*
				127	* The code has been made to work with highmem pages and will use a
				128	* destination page in its final resting place (if it happens
				129	* to allocate it). The end product of this is that most of the
				130	* physical address space, and most of RAM can be used.
				131	*
				132	* Future directions include:
				133	* - allocating a page table with the control code buffer identity
				134	* mapped, to simplify machine_kexec and make kexec_on_panic more
				135	* reliable.
				136	*/
				137
				138	/*
				139	* KIMAGE_NO_DEST is an impossible destination address..., for
				140	* allocating pages whose destination address we do not care about.
				141	*/
				142	#define KIMAGE_NO_DEST (-1UL)
				143
				144	static struct page kimage_alloc_page(struct kimage image,
				145	gfp_t gfp_mask,
				146	unsigned long dest);
				147
				148	int sanity_check_segment_list(struct kimage *image)
				149	{
				150	int result, i;
				151	unsigned long nr_segments = image->nr_segments;
				152
				153	/*
				154	* Verify we have good destination addresses. The caller is
				155	* responsible for making certain we don't attempt to load
				156	* the new image into invalid or reserved areas of RAM. This
				157	* just verifies it is an address we can use.
				158	*
				159	* Since the kernel does everything in page size chunks ensure
				160	* the destination addresses are page aligned. Too many
				161	* special cases crop of when we don't do this. The most
				162	* insidious is getting overlapping destination addresses
				163	* simply because addresses are changed to page size
				164	* granularity.
				165	*/
				166	result = -EADDRNOTAVAIL;
				167	for (i = 0; i < nr_segments; i++) {
				168	unsigned long mstart, mend;
				169
				170	mstart = image->segment[i].mem;
				171	mend = mstart + image->segment[i].memsz;
				172	if ((mstart & ~PAGE_MASK) \|\| (mend & ~PAGE_MASK))
				173	return result;
				174	if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
				175	return result;
				176	}
				177
				178	/* Verify our destination addresses do not overlap.
				179	* If we alloed overlapping destination addresses
				180	* through very weird things can happen with no
				181	* easy explanation as one segment stops on another.
				182	*/
				183	result = -EINVAL;
				184	for (i = 0; i < nr_segments; i++) {
				185	unsigned long mstart, mend;
				186	unsigned long j;
				187
				188	mstart = image->segment[i].mem;
				189	mend = mstart + image->segment[i].memsz;
				190	for (j = 0; j < i; j++) {
				191	unsigned long pstart, pend;
				192
				193	pstart = image->segment[j].mem;
				194	pend = pstart + image->segment[j].memsz;
				195	/* Do the segments overlap ? */
				196	if ((mend > pstart) && (mstart < pend))
				197	return result;
				198	}
				199	}
				200
				201	/* Ensure our buffer sizes are strictly less than
				202	* our memory sizes. This should always be the case,
				203	* and it is easier to check up front than to be surprised
				204	* later on.
				205	*/
				206	result = -EINVAL;
				207	for (i = 0; i < nr_segments; i++) {
				208	if (image->segment[i].bufsz > image->segment[i].memsz)
				209	return result;
				210	}
				211
				212	/*
				213	* Verify we have good destination addresses. Normally
				214	* the caller is responsible for making certain we don't
				215	* attempt to load the new image into invalid or reserved
				216	* areas of RAM. But crash kernels are preloaded into a
				217	* reserved area of ram. We must ensure the addresses
				218	* are in the reserved area otherwise preloading the
				219	* kernel could corrupt things.
				220	*/
				221
				222	if (image->type == KEXEC_TYPE_CRASH) {
				223	result = -EADDRNOTAVAIL;
				224	for (i = 0; i < nr_segments; i++) {
				225	unsigned long mstart, mend;
				226
				227	mstart = image->segment[i].mem;
				228	mend = mstart + image->segment[i].memsz - 1;
				229	/* Ensure we are within the crash kernel limits */
				230	if ((mstart < crashk_res.start) \|\|
				231	(mend > crashk_res.end))
				232	return result;
				233	}
				234	}
				235
				236	return 0;
				237	}
				238
				239	struct kimage *do_kimage_alloc_init(void)
				240	{
				241	struct kimage *image;
				242
				243	/* Allocate a controlling structure */
				244	image = kzalloc(sizeof(*image), GFP_KERNEL);
				245	if (!image)
				246	return NULL;
				247
				248	image->head = 0;
				249	image->entry = &image->head;
				250	image->last_entry = &image->head;
				251	image->control_page = ~0; /* By default this does not apply */
				252	image->type = KEXEC_TYPE_DEFAULT;
				253
				254	/* Initialize the list of control pages */
				255	INIT_LIST_HEAD(&image->control_pages);
				256
				257	/* Initialize the list of destination pages */
				258	INIT_LIST_HEAD(&image->dest_pages);
				259
				260	/* Initialize the list of unusable pages */
				261	INIT_LIST_HEAD(&image->unusable_pages);
				262
				263	return image;
				264	}
				265
				266	int kimage_is_destination_range(struct kimage *image,
				267	unsigned long start,
				268	unsigned long end)
				269	{
				270	unsigned long i;
				271
				272	for (i = 0; i < image->nr_segments; i++) {
				273	unsigned long mstart, mend;
				274
				275	mstart = image->segment[i].mem;
				276	mend = mstart + image->segment[i].memsz;
				277	if ((end > mstart) && (start < mend))
				278	return 1;
				279	}
				280
				281	return 0;
				282	}
				283
				284	static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
				285	{
				286	struct page *pages;
				287
				288	pages = alloc_pages(gfp_mask, order);
				289	if (pages) {
				290	unsigned int count, i;
				291
				292	pages->mapping = NULL;
				293	set_page_private(pages, order);
				294	count = 1 << order;
				295	for (i = 0; i < count; i++)
				296	SetPageReserved(pages + i);
				297	}
				298
				299	return pages;
				300	}
				301
				302	static void kimage_free_pages(struct page *page)
				303	{
				304	unsigned int order, count, i;
				305
				306	order = page_private(page);
				307	count = 1 << order;
				308	for (i = 0; i < count; i++)
				309	ClearPageReserved(page + i);
				310	__free_pages(page, order);
				311	}
				312
				313	void kimage_free_page_list(struct list_head *list)
				314	{
Geliang Tang	2b24692	2016-01-20 15:00:34 -0800	[diff] [blame]	315	struct page page, next;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	316
Geliang Tang	2b24692	2016-01-20 15:00:34 -0800	[diff] [blame]	317	list_for_each_entry_safe(page, next, list, lru) {
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	318	list_del(&page->lru);
				319	kimage_free_pages(page);
				320	}
				321	}
				322
				323	static struct page kimage_alloc_normal_control_pages(struct kimage image,
				324	unsigned int order)
				325	{
				326	/* Control pages are special, they are the intermediaries
				327	* that are needed while we copy the rest of the pages
				328	* to their final resting place. As such they must
				329	* not conflict with either the destination addresses
				330	* or memory the kernel is already using.
				331	*
				332	* The only case where we really need more than one of
				333	* these are for architectures where we cannot disable
				334	* the MMU and must instead generate an identity mapped
				335	* page table for all of the memory.
				336	*
				337	* At worst this runs in O(N) of the image size.
				338	*/
				339	struct list_head extra_pages;
				340	struct page *pages;
				341	unsigned int count;
				342
				343	count = 1 << order;
				344	INIT_LIST_HEAD(&extra_pages);
				345
				346	/* Loop while I can allocate a page and the page allocated
				347	* is a destination page.
				348	*/
				349	do {
				350	unsigned long pfn, epfn, addr, eaddr;
				351
				352	pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order);
				353	if (!pages)
				354	break;
				355	pfn = page_to_pfn(pages);
				356	epfn = pfn + count;
				357	addr = pfn << PAGE_SHIFT;
				358	eaddr = epfn << PAGE_SHIFT;
				359	if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) \|\|
				360	kimage_is_destination_range(image, addr, eaddr)) {
				361	list_add(&pages->lru, &extra_pages);
				362	pages = NULL;
				363	}
				364	} while (!pages);
				365
				366	if (pages) {
				367	/* Remember the allocated page... */
				368	list_add(&pages->lru, &image->control_pages);
				369
				370	/* Because the page is already in it's destination
				371	* location we will never allocate another page at
				372	* that address. Therefore kimage_alloc_pages
				373	* will not return it (again) and we don't need
				374	* to give it an entry in image->segment[].
				375	*/
				376	}
				377	/* Deal with the destination pages I have inadvertently allocated.
				378	*
				379	* Ideally I would convert multi-page allocations into single
				380	* page allocations, and add everything to image->dest_pages.
				381	*
				382	* For now it is simpler to just free the pages.
				383	*/
				384	kimage_free_page_list(&extra_pages);
				385
				386	return pages;
				387	}
				388
				389	static struct page kimage_alloc_crash_control_pages(struct kimage image,
				390	unsigned int order)
				391	{
				392	/* Control pages are special, they are the intermediaries
				393	* that are needed while we copy the rest of the pages
				394	* to their final resting place. As such they must
				395	* not conflict with either the destination addresses
				396	* or memory the kernel is already using.
				397	*
				398	* Control pages are also the only pags we must allocate
				399	* when loading a crash kernel. All of the other pages
				400	* are specified by the segments and we just memcpy
				401	* into them directly.
				402	*
				403	* The only case where we really need more than one of
				404	* these are for architectures where we cannot disable
				405	* the MMU and must instead generate an identity mapped
				406	* page table for all of the memory.
				407	*
				408	* Given the low demand this implements a very simple
				409	* allocator that finds the first hole of the appropriate
				410	* size in the reserved memory region, and allocates all
				411	* of the memory up to and including the hole.
				412	*/
				413	unsigned long hole_start, hole_end, size;
				414	struct page *pages;
				415
				416	pages = NULL;
				417	size = (1 << order) << PAGE_SHIFT;
				418	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
				419	hole_end = hole_start + size - 1;
				420	while (hole_end <= crashk_res.end) {
				421	unsigned long i;
				422
				423	if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
				424	break;
				425	/* See if I overlap any of the segments */
				426	for (i = 0; i < image->nr_segments; i++) {
				427	unsigned long mstart, mend;
				428
				429	mstart = image->segment[i].mem;
				430	mend = mstart + image->segment[i].memsz - 1;
				431	if ((hole_end >= mstart) && (hole_start <= mend)) {
				432	/* Advance the hole to the end of the segment */
				433	hole_start = (mend + (size - 1)) & ~(size - 1);
				434	hole_end = hole_start + size - 1;
				435	break;
				436	}
				437	}
				438	/* If I don't overlap any segments I have found my hole! */
				439	if (i == image->nr_segments) {
				440	pages = pfn_to_page(hole_start >> PAGE_SHIFT);
Minfei Huang	04e9949	2015-09-09 15:38:58 -0700	[diff] [blame]	441	image->control_page = hole_end;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	442	break;
				443	}
				444	}
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	445
				446	return pages;
				447	}
				448
				449
				450	struct page kimage_alloc_control_pages(struct kimage image,
				451	unsigned int order)
				452	{
				453	struct page *pages = NULL;
				454
				455	switch (image->type) {
				456	case KEXEC_TYPE_DEFAULT:
				457	pages = kimage_alloc_normal_control_pages(image, order);
				458	break;
				459	case KEXEC_TYPE_CRASH:
				460	pages = kimage_alloc_crash_control_pages(image, order);
				461	break;
				462	}
				463
				464	return pages;
				465	}
				466
				467	static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
				468	{
				469	if (*image->entry != 0)
				470	image->entry++;
				471
				472	if (image->entry == image->last_entry) {
				473	kimage_entry_t *ind_page;
				474	struct page *page;
				475
				476	page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
				477	if (!page)
				478	return -ENOMEM;
				479
				480	ind_page = page_address(page);
				481	*image->entry = virt_to_phys(ind_page) \| IND_INDIRECTION;
				482	image->entry = ind_page;
				483	image->last_entry = ind_page +
				484	((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
				485	}
				486	*image->entry = entry;
				487	image->entry++;
				488	*image->entry = 0;
				489
				490	return 0;
				491	}
				492
				493	static int kimage_set_destination(struct kimage *image,
				494	unsigned long destination)
				495	{
				496	int result;
				497
				498	destination &= PAGE_MASK;
				499	result = kimage_add_entry(image, destination \| IND_DESTINATION);
				500
				501	return result;
				502	}
				503
				504
				505	static int kimage_add_page(struct kimage *image, unsigned long page)
				506	{
				507	int result;
				508
				509	page &= PAGE_MASK;
				510	result = kimage_add_entry(image, page \| IND_SOURCE);
				511
				512	return result;
				513	}
				514
				515
				516	static void kimage_free_extra_pages(struct kimage *image)
				517	{
				518	/* Walk through and free any extra destination pages I may have */
				519	kimage_free_page_list(&image->dest_pages);
				520
				521	/* Walk through and free any unusable pages I have cached */
				522	kimage_free_page_list(&image->unusable_pages);
				523
				524	}
				525	void kimage_terminate(struct kimage *image)
				526	{
				527	if (*image->entry != 0)
				528	image->entry++;
				529
				530	*image->entry = IND_DONE;
				531	}
				532
				533	#define for_each_kimage_entry(image, ptr, entry) \
				534	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
				535	ptr = (entry & IND_INDIRECTION) ? \
				536	phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
				537
				538	static void kimage_free_entry(kimage_entry_t entry)
				539	{
				540	struct page *page;
				541
				542	page = pfn_to_page(entry >> PAGE_SHIFT);
				543	kimage_free_pages(page);
				544	}
				545
				546	void kimage_free(struct kimage *image)
				547	{
				548	kimage_entry_t *ptr, entry;
				549	kimage_entry_t ind = 0;
				550
				551	if (!image)
				552	return;
				553
				554	kimage_free_extra_pages(image);
				555	for_each_kimage_entry(image, ptr, entry) {
				556	if (entry & IND_INDIRECTION) {
				557	/* Free the previous indirection page */
				558	if (ind & IND_INDIRECTION)
				559	kimage_free_entry(ind);
				560	/* Save this indirection page until we are
				561	* done with it.
				562	*/
				563	ind = entry;
				564	} else if (entry & IND_SOURCE)
				565	kimage_free_entry(entry);
				566	}
				567	/* Free the final indirection page */
				568	if (ind & IND_INDIRECTION)
				569	kimage_free_entry(ind);
				570
				571	/* Handle any machine specific cleanup */
				572	machine_kexec_cleanup(image);
				573
				574	/* Free the kexec control pages... */
				575	kimage_free_page_list(&image->control_pages);
				576
				577	/*
				578	* Free up any temporary buffers allocated. This might hit if
				579	* error occurred much later after buffer allocation.
				580	*/
				581	if (image->file_mode)
				582	kimage_file_post_load_cleanup(image);
				583
				584	kfree(image);
				585	}
				586
				587	static kimage_entry_t kimage_dst_used(struct kimage image,
				588	unsigned long page)
				589	{
				590	kimage_entry_t *ptr, entry;
				591	unsigned long destination = 0;
				592
				593	for_each_kimage_entry(image, ptr, entry) {
				594	if (entry & IND_DESTINATION)
				595	destination = entry & PAGE_MASK;
				596	else if (entry & IND_SOURCE) {
				597	if (page == destination)
				598	return ptr;
				599	destination += PAGE_SIZE;
				600	}
				601	}
				602
				603	return NULL;
				604	}
				605
				606	static struct page kimage_alloc_page(struct kimage image,
				607	gfp_t gfp_mask,
				608	unsigned long destination)
				609	{
				610	/*
				611	* Here we implement safeguards to ensure that a source page
				612	* is not copied to its destination page before the data on
				613	* the destination page is no longer useful.
				614	*
				615	* To do this we maintain the invariant that a source page is
				616	* either its own destination page, or it is not a
				617	* destination page at all.
				618	*
				619	* That is slightly stronger than required, but the proof
				620	* that no problems will not occur is trivial, and the
				621	* implementation is simply to verify.
				622	*
				623	* When allocating all pages normally this algorithm will run
				624	* in O(N) time, but in the worst case it will run in O(N^2)
				625	* time. If the runtime is a problem the data structures can
				626	* be fixed.
				627	*/
				628	struct page *page;
				629	unsigned long addr;
				630
				631	/*
				632	* Walk through the list of destination pages, and see if I
				633	* have a match.
				634	*/
				635	list_for_each_entry(page, &image->dest_pages, lru) {
				636	addr = page_to_pfn(page) << PAGE_SHIFT;
				637	if (addr == destination) {
				638	list_del(&page->lru);
				639	return page;
				640	}
				641	}
				642	page = NULL;
				643	while (1) {
				644	kimage_entry_t *old;
				645
				646	/* Allocate a page, if we run out of memory give up */
				647	page = kimage_alloc_pages(gfp_mask, 0);
				648	if (!page)
				649	return NULL;
				650	/* If the page cannot be used file it away */
				651	if (page_to_pfn(page) >
				652	(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
				653	list_add(&page->lru, &image->unusable_pages);
				654	continue;
				655	}
				656	addr = page_to_pfn(page) << PAGE_SHIFT;
				657
				658	/* If it is the destination page we want use it */
				659	if (addr == destination)
				660	break;
				661
				662	/* If the page is not a destination page use it */
				663	if (!kimage_is_destination_range(image, addr,
				664	addr + PAGE_SIZE))
				665	break;
				666
				667	/*
				668	* I know that the page is someones destination page.
				669	* See if there is already a source page for this
				670	* destination page. And if so swap the source pages.
				671	*/
				672	old = kimage_dst_used(image, addr);
				673	if (old) {
				674	/* If so move it */
				675	unsigned long old_addr;
				676	struct page *old_page;
				677
				678	old_addr = *old & PAGE_MASK;
				679	old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
				680	copy_highpage(page, old_page);
				681	old = addr \| (old & ~PAGE_MASK);
				682
				683	/* The old page I have found cannot be a
				684	* destination page, so return it if it's
				685	* gfp_flags honor the ones passed in.
				686	*/
				687	if (!(gfp_mask & __GFP_HIGHMEM) &&
				688	PageHighMem(old_page)) {
				689	kimage_free_pages(old_page);
				690	continue;
				691	}
				692	addr = old_addr;
				693	page = old_page;
				694	break;
				695	}
				696	/* Place the page on the destination list, to be used later */
				697	list_add(&page->lru, &image->dest_pages);
				698	}
				699
				700	return page;
				701	}
				702
				703	static int kimage_load_normal_segment(struct kimage *image,
				704	struct kexec_segment *segment)
				705	{
				706	unsigned long maddr;
				707	size_t ubytes, mbytes;
				708	int result;
				709	unsigned char __user *buf = NULL;
				710	unsigned char *kbuf = NULL;
				711
				712	result = 0;
				713	if (image->file_mode)
				714	kbuf = segment->kbuf;
				715	else
				716	buf = segment->buf;
				717	ubytes = segment->bufsz;
				718	mbytes = segment->memsz;
				719	maddr = segment->mem;
				720
				721	result = kimage_set_destination(image, maddr);
				722	if (result < 0)
				723	goto out;
				724
				725	while (mbytes) {
				726	struct page *page;
				727	char *ptr;
				728	size_t uchunk, mchunk;
				729
				730	page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
				731	if (!page) {
				732	result = -ENOMEM;
				733	goto out;
				734	}
				735	result = kimage_add_page(image, page_to_pfn(page)
				736	<< PAGE_SHIFT);
				737	if (result < 0)
				738	goto out;
				739
				740	ptr = kmap(page);
				741	/* Start with a clear page */
				742	clear_page(ptr);
				743	ptr += maddr & ~PAGE_MASK;
				744	mchunk = min_t(size_t, mbytes,
				745	PAGE_SIZE - (maddr & ~PAGE_MASK));
				746	uchunk = min(ubytes, mchunk);
				747
				748	/* For file based kexec, source pages are in kernel memory */
				749	if (image->file_mode)
				750	memcpy(ptr, kbuf, uchunk);
				751	else
				752	result = copy_from_user(ptr, buf, uchunk);
				753	kunmap(page);
				754	if (result) {
				755	result = -EFAULT;
				756	goto out;
				757	}
				758	ubytes -= uchunk;
				759	maddr += mchunk;
				760	if (image->file_mode)
				761	kbuf += mchunk;
				762	else
				763	buf += mchunk;
				764	mbytes -= mchunk;
				765	}
				766	out:
				767	return result;
				768	}
				769
				770	static int kimage_load_crash_segment(struct kimage *image,
				771	struct kexec_segment *segment)
				772	{
				773	/* For crash dumps kernels we simply copy the data from
				774	* user space to it's destination.
				775	* We do things a page at a time for the sake of kmap.
				776	*/
				777	unsigned long maddr;
				778	size_t ubytes, mbytes;
				779	int result;
				780	unsigned char __user *buf = NULL;
				781	unsigned char *kbuf = NULL;
				782
				783	result = 0;
				784	if (image->file_mode)
				785	kbuf = segment->kbuf;
				786	else
				787	buf = segment->buf;
				788	ubytes = segment->bufsz;
				789	mbytes = segment->memsz;
				790	maddr = segment->mem;
				791	while (mbytes) {
				792	struct page *page;
				793	char *ptr;
				794	size_t uchunk, mchunk;
				795
				796	page = pfn_to_page(maddr >> PAGE_SHIFT);
				797	if (!page) {
				798	result = -ENOMEM;
				799	goto out;
				800	}
				801	ptr = kmap(page);
				802	ptr += maddr & ~PAGE_MASK;
				803	mchunk = min_t(size_t, mbytes,
				804	PAGE_SIZE - (maddr & ~PAGE_MASK));
				805	uchunk = min(ubytes, mchunk);
				806	if (mchunk > uchunk) {
				807	/* Zero the trailing part of the page */
				808	memset(ptr + uchunk, 0, mchunk - uchunk);
				809	}
				810
				811	/* For file based kexec, source pages are in kernel memory */
				812	if (image->file_mode)
				813	memcpy(ptr, kbuf, uchunk);
				814	else
				815	result = copy_from_user(ptr, buf, uchunk);
				816	kexec_flush_icache_page(page);
				817	kunmap(page);
				818	if (result) {
				819	result = -EFAULT;
				820	goto out;
				821	}
				822	ubytes -= uchunk;
				823	maddr += mchunk;
				824	if (image->file_mode)
				825	kbuf += mchunk;
				826	else
				827	buf += mchunk;
				828	mbytes -= mchunk;
				829	}
				830	out:
				831	return result;
				832	}
				833
				834	int kimage_load_segment(struct kimage *image,
				835	struct kexec_segment *segment)
				836	{
				837	int result = -ENOMEM;
				838
				839	switch (image->type) {
				840	case KEXEC_TYPE_DEFAULT:
				841	result = kimage_load_normal_segment(image, segment);
				842	break;
				843	case KEXEC_TYPE_CRASH:
				844	result = kimage_load_crash_segment(image, segment);
				845	break;
				846	}
				847
				848	return result;
				849	}
				850
				851	struct kimage *kexec_image;
				852	struct kimage *kexec_crash_image;
				853	int kexec_load_disabled;
				854
Hidehiro Kawai	7bbee5c	2015-12-14 11:19:11 +0100	[diff] [blame]	855	/*
				856	* No panic_cpu check version of crash_kexec(). This function is called
				857	* only when panic_cpu holds the current CPU number; this is the only CPU
				858	* which processes crash_kexec routines.
				859	*/
				860	void __crash_kexec(struct pt_regs *regs)
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	861	{
				862	/* Take the kexec_mutex here to prevent sys_kexec_load
				863	* running on one cpu from replacing the crash kernel
				864	* we are using after a panic on a different cpu.
				865	*
				866	* If the crash kernel was not located in a fixed area
				867	* of memory the xchg(&kexec_crash_image) would be
				868	* sufficient. But since I reuse the memory...
				869	*/
				870	if (mutex_trylock(&kexec_mutex)) {
				871	if (kexec_crash_image) {
				872	struct pt_regs fixed_regs;
				873
				874	crash_setup_regs(&fixed_regs, regs);
				875	crash_save_vmcoreinfo();
				876	machine_crash_shutdown(&fixed_regs);
				877	machine_kexec(kexec_crash_image);
				878	}
				879	mutex_unlock(&kexec_mutex);
				880	}
				881	}
				882
Hidehiro Kawai	7bbee5c	2015-12-14 11:19:11 +0100	[diff] [blame]	883	void crash_kexec(struct pt_regs *regs)
				884	{
				885	int old_cpu, this_cpu;
				886
				887	/*
				888	* Only one CPU is allowed to execute the crash_kexec() code as with
				889	* panic(). Otherwise parallel calls of panic() and crash_kexec()
				890	* may stop each other. To exclude them, we use panic_cpu here too.
				891	*/
				892	this_cpu = raw_smp_processor_id();
				893	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
				894	if (old_cpu == PANIC_CPU_INVALID) {
				895	/* This is the 1st CPU which comes here, so go ahead. */
				896	__crash_kexec(regs);
				897
				898	/*
				899	* Reset panic_cpu to allow another panic()/crash_kexec()
				900	* call.
				901	*/
				902	atomic_set(&panic_cpu, PANIC_CPU_INVALID);
				903	}
				904	}
				905
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	906	size_t crash_get_memory_size(void)
				907	{
				908	size_t size = 0;
				909
				910	mutex_lock(&kexec_mutex);
				911	if (crashk_res.end != crashk_res.start)
				912	size = resource_size(&crashk_res);
				913	mutex_unlock(&kexec_mutex);
				914	return size;
				915	}
				916
				917	void __weak crash_free_reserved_phys_range(unsigned long begin,
				918	unsigned long end)
				919	{
				920	unsigned long addr;
				921
				922	for (addr = begin; addr < end; addr += PAGE_SIZE)
				923	free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
				924	}
				925
				926	int crash_shrink_memory(unsigned long new_size)
				927	{
				928	int ret = 0;
				929	unsigned long start, end;
				930	unsigned long old_size;
				931	struct resource *ram_res;
				932
				933	mutex_lock(&kexec_mutex);
				934
				935	if (kexec_crash_image) {
				936	ret = -ENOENT;
				937	goto unlock;
				938	}
				939	start = crashk_res.start;
				940	end = crashk_res.end;
				941	old_size = (end == 0) ? 0 : end - start + 1;
				942	if (new_size >= old_size) {
				943	ret = (new_size == old_size) ? 0 : -EINVAL;
				944	goto unlock;
				945	}
				946
				947	ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
				948	if (!ram_res) {
				949	ret = -ENOMEM;
				950	goto unlock;
				951	}
				952
				953	start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
				954	end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
				955
				956	crash_map_reserved_pages();
				957	crash_free_reserved_phys_range(end, crashk_res.end);
				958
				959	if ((start == end) && (crashk_res.parent != NULL))
				960	release_resource(&crashk_res);
				961
				962	ram_res->start = end;
				963	ram_res->end = crashk_res.end;
Toshi Kani	1a085d0	2016-01-26 21:57:23 +0100	[diff] [blame]	964	ram_res->flags = IORESOURCE_BUSY \| IORESOURCE_SYSTEM_RAM;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	965	ram_res->name = "System RAM";
				966
				967	crashk_res.end = end - 1;
				968
				969	insert_resource(&iomem_resource, ram_res);
				970	crash_unmap_reserved_pages();
				971
				972	unlock:
				973	mutex_unlock(&kexec_mutex);
				974	return ret;
				975	}
				976
				977	static u32 append_elf_note(u32 buf, char name, unsigned type, void data,
				978	size_t data_len)
				979	{
				980	struct elf_note note;
				981
				982	note.n_namesz = strlen(name) + 1;
				983	note.n_descsz = data_len;
				984	note.n_type = type;
				985	memcpy(buf, &note, sizeof(note));
				986	buf += (sizeof(note) + 3)/4;
				987	memcpy(buf, name, note.n_namesz);
				988	buf += (note.n_namesz + 3)/4;
				989	memcpy(buf, data, note.n_descsz);
				990	buf += (note.n_descsz + 3)/4;
				991
				992	return buf;
				993	}
				994
				995	static void final_note(u32 *buf)
				996	{
				997	struct elf_note note;
				998
				999	note.n_namesz = 0;
				1000	note.n_descsz = 0;
				1001	note.n_type = 0;
				1002	memcpy(buf, &note, sizeof(note));
				1003	}
				1004
				1005	void crash_save_cpu(struct pt_regs *regs, int cpu)
				1006	{
				1007	struct elf_prstatus prstatus;
				1008	u32 *buf;
				1009
				1010	if ((cpu < 0) \|\| (cpu >= nr_cpu_ids))
				1011	return;
				1012
				1013	/* Using ELF notes here is opportunistic.
				1014	* I need a well defined structure format
				1015	* for the data I pass, and I need tags
				1016	* on the data to indicate what information I have
				1017	* squirrelled away. ELF notes happen to provide
				1018	* all of that, so there is no need to invent something new.
				1019	*/
				1020	buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
				1021	if (!buf)
				1022	return;
				1023	memset(&prstatus, 0, sizeof(prstatus));
				1024	prstatus.pr_pid = current->pid;
				1025	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
				1026	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
				1027	&prstatus, sizeof(prstatus));
				1028	final_note(buf);
				1029	}
				1030
				1031	static int __init crash_notes_memory_init(void)
				1032	{
				1033	/* Allocate memory for saving cpu registers. */
Baoquan He	bbb78b8	2015-09-09 15:39:00 -0700	[diff] [blame]	1034	size_t size, align;
				1035
				1036	/*
				1037	* crash_notes could be allocated across 2 vmalloc pages when percpu
				1038	* is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc
				1039	* pages are also on 2 continuous physical pages. In this case the
				1040	* 2nd part of crash_notes in 2nd page could be lost since only the
				1041	* starting address and size of crash_notes are exported through sysfs.
				1042	* Here round up the size of crash_notes to the nearest power of two
				1043	* and pass it to __alloc_percpu as align value. This can make sure
				1044	* crash_notes is allocated inside one physical page.
				1045	*/
				1046	size = sizeof(note_buf_t);
				1047	align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE);
				1048
				1049	/*
				1050	* Break compile if size is bigger than PAGE_SIZE since crash_notes
				1051	* definitely will be in 2 pages with that.
				1052	*/
				1053	BUILD_BUG_ON(size > PAGE_SIZE);
				1054
				1055	crash_notes = __alloc_percpu(size, align);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1056	if (!crash_notes) {
Minfei Huang	de90a6b	2015-11-06 16:32:45 -0800	[diff] [blame]	1057	pr_warn("Memory allocation for saving cpu register states failed\n");
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1058	return -ENOMEM;
				1059	}
				1060	return 0;
				1061	}
				1062	subsys_initcall(crash_notes_memory_init);
				1063
				1064
				1065	/*
				1066	* parsing the "crashkernel" commandline
				1067	*
				1068	* this code is intended to be called from architecture specific code
				1069	*/
				1070
				1071
				1072	/*
				1073	* This function parses command lines in the format
				1074	*
				1075	* crashkernel=ramsize-range:size[,...][@offset]
				1076	*
				1077	* The function returns 0 on success and -EINVAL on failure.
				1078	*/
				1079	static int __init parse_crashkernel_mem(char *cmdline,
				1080	unsigned long long system_ram,
				1081	unsigned long long *crash_size,
				1082	unsigned long long *crash_base)
				1083	{
				1084	char cur = cmdline, tmp;
				1085
				1086	/* for each entry of the comma-separated list */
				1087	do {
				1088	unsigned long long start, end = ULLONG_MAX, size;
				1089
				1090	/* get the start of the range */
				1091	start = memparse(cur, &tmp);
				1092	if (cur == tmp) {
				1093	pr_warn("crashkernel: Memory value expected\n");
				1094	return -EINVAL;
				1095	}
				1096	cur = tmp;
				1097	if (*cur != '-') {
				1098	pr_warn("crashkernel: '-' expected\n");
				1099	return -EINVAL;
				1100	}
				1101	cur++;
				1102
				1103	/* if no ':' is here, than we read the end */
				1104	if (*cur != ':') {
				1105	end = memparse(cur, &tmp);
				1106	if (cur == tmp) {
				1107	pr_warn("crashkernel: Memory value expected\n");
				1108	return -EINVAL;
				1109	}
				1110	cur = tmp;
				1111	if (end <= start) {
				1112	pr_warn("crashkernel: end <= start\n");
				1113	return -EINVAL;
				1114	}
				1115	}
				1116
				1117	if (*cur != ':') {
				1118	pr_warn("crashkernel: ':' expected\n");
				1119	return -EINVAL;
				1120	}
				1121	cur++;
				1122
				1123	size = memparse(cur, &tmp);
				1124	if (cur == tmp) {
				1125	pr_warn("Memory value expected\n");
				1126	return -EINVAL;
				1127	}
				1128	cur = tmp;
				1129	if (size >= system_ram) {
				1130	pr_warn("crashkernel: invalid size\n");
				1131	return -EINVAL;
				1132	}
				1133
				1134	/* match ? */
				1135	if (system_ram >= start && system_ram < end) {
				1136	*crash_size = size;
				1137	break;
				1138	}
				1139	} while (*cur++ == ',');
				1140
				1141	if (*crash_size > 0) {
				1142	while (cur && cur != ' ' && *cur != '@')
				1143	cur++;
				1144	if (*cur == '@') {
				1145	cur++;
				1146	*crash_base = memparse(cur, &tmp);
				1147	if (cur == tmp) {
				1148	pr_warn("Memory value expected after '@'\n");
				1149	return -EINVAL;
				1150	}
				1151	}
				1152	}
				1153
				1154	return 0;
				1155	}
				1156
				1157	/*
				1158	* That function parses "simple" (old) crashkernel command lines like
				1159	*
				1160	* crashkernel=size[@offset]
				1161	*
				1162	* It returns 0 on success and -EINVAL on failure.
				1163	*/
				1164	static int __init parse_crashkernel_simple(char *cmdline,
				1165	unsigned long long *crash_size,
				1166	unsigned long long *crash_base)
				1167	{
				1168	char *cur = cmdline;
				1169
				1170	*crash_size = memparse(cmdline, &cur);
				1171	if (cmdline == cur) {
				1172	pr_warn("crashkernel: memory value expected\n");
				1173	return -EINVAL;
				1174	}
				1175
				1176	if (*cur == '@')
				1177	*crash_base = memparse(cur+1, &cur);
				1178	else if (cur != ' ' && cur != '\0') {
Borislav Petkov	53b90c0	2015-10-19 11:17:47 +0200	[diff] [blame]	1179	pr_warn("crashkernel: unrecognized char: %c\n", *cur);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1180	return -EINVAL;
				1181	}
				1182
				1183	return 0;
				1184	}
				1185
				1186	#define SUFFIX_HIGH 0
				1187	#define SUFFIX_LOW 1
				1188	#define SUFFIX_NULL 2
				1189	static __initdata char *suffix_tbl[] = {
				1190	[SUFFIX_HIGH] = ",high",
				1191	[SUFFIX_LOW] = ",low",
				1192	[SUFFIX_NULL] = NULL,
				1193	};
				1194
				1195	/*
				1196	* That function parses "suffix" crashkernel command lines like
				1197	*
				1198	* crashkernel=size,[high\|low]
				1199	*
				1200	* It returns 0 on success and -EINVAL on failure.
				1201	*/
				1202	static int __init parse_crashkernel_suffix(char *cmdline,
				1203	unsigned long long *crash_size,
				1204	const char *suffix)
				1205	{
				1206	char *cur = cmdline;
				1207
				1208	*crash_size = memparse(cmdline, &cur);
				1209	if (cmdline == cur) {
				1210	pr_warn("crashkernel: memory value expected\n");
				1211	return -EINVAL;
				1212	}
				1213
				1214	/* check with suffix */
				1215	if (strncmp(cur, suffix, strlen(suffix))) {
Borislav Petkov	53b90c0	2015-10-19 11:17:47 +0200	[diff] [blame]	1216	pr_warn("crashkernel: unrecognized char: %c\n", *cur);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1217	return -EINVAL;
				1218	}
				1219	cur += strlen(suffix);
				1220	if (cur != ' ' && cur != '\0') {
Borislav Petkov	53b90c0	2015-10-19 11:17:47 +0200	[diff] [blame]	1221	pr_warn("crashkernel: unrecognized char: %c\n", *cur);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1222	return -EINVAL;
				1223	}
				1224
				1225	return 0;
				1226	}
				1227
				1228	static __init char get_last_crashkernel(char cmdline,
				1229	const char *name,
				1230	const char *suffix)
				1231	{
				1232	char p = cmdline, ck_cmdline = NULL;
				1233
				1234	/* find crashkernel and use the last one if there are more */
				1235	p = strstr(p, name);
				1236	while (p) {
				1237	char *end_p = strchr(p, ' ');
				1238	char *q;
				1239
				1240	if (!end_p)
				1241	end_p = p + strlen(p);
				1242
				1243	if (!suffix) {
				1244	int i;
				1245
				1246	/* skip the one with any known suffix */
				1247	for (i = 0; suffix_tbl[i]; i++) {
				1248	q = end_p - strlen(suffix_tbl[i]);
				1249	if (!strncmp(q, suffix_tbl[i],
				1250	strlen(suffix_tbl[i])))
				1251	goto next;
				1252	}
				1253	ck_cmdline = p;
				1254	} else {
				1255	q = end_p - strlen(suffix);
				1256	if (!strncmp(q, suffix, strlen(suffix)))
				1257	ck_cmdline = p;
				1258	}
				1259	next:
				1260	p = strstr(p+1, name);
				1261	}
				1262
				1263	if (!ck_cmdline)
				1264	return NULL;
				1265
				1266	return ck_cmdline;
				1267	}
				1268
				1269	static int __init __parse_crashkernel(char *cmdline,
				1270	unsigned long long system_ram,
				1271	unsigned long long *crash_size,
				1272	unsigned long long *crash_base,
				1273	const char *name,
				1274	const char *suffix)
				1275	{
				1276	char first_colon, first_space;
				1277	char *ck_cmdline;
				1278
				1279	BUG_ON(!crash_size \|\| !crash_base);
				1280	*crash_size = 0;
				1281	*crash_base = 0;
				1282
				1283	ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
				1284
				1285	if (!ck_cmdline)
				1286	return -EINVAL;
				1287
				1288	ck_cmdline += strlen(name);
				1289
				1290	if (suffix)
				1291	return parse_crashkernel_suffix(ck_cmdline, crash_size,
				1292	suffix);
				1293	/*
				1294	* if the commandline contains a ':', then that's the extended
				1295	* syntax -- if not, it must be the classic syntax
				1296	*/
				1297	first_colon = strchr(ck_cmdline, ':');
				1298	first_space = strchr(ck_cmdline, ' ');
				1299	if (first_colon && (!first_space \|\| first_colon < first_space))
				1300	return parse_crashkernel_mem(ck_cmdline, system_ram,
				1301	crash_size, crash_base);
				1302
				1303	return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
				1304	}
				1305
				1306	/*
				1307	* That function is the entry point for command line parsing and should be
				1308	* called from the arch-specific code.
				1309	*/
				1310	int __init parse_crashkernel(char *cmdline,
				1311	unsigned long long system_ram,
				1312	unsigned long long *crash_size,
				1313	unsigned long long *crash_base)
				1314	{
				1315	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
				1316	"crashkernel=", NULL);
				1317	}
				1318
				1319	int __init parse_crashkernel_high(char *cmdline,
				1320	unsigned long long system_ram,
				1321	unsigned long long *crash_size,
				1322	unsigned long long *crash_base)
				1323	{
				1324	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
				1325	"crashkernel=", suffix_tbl[SUFFIX_HIGH]);
				1326	}
				1327
				1328	int __init parse_crashkernel_low(char *cmdline,
				1329	unsigned long long system_ram,
				1330	unsigned long long *crash_size,
				1331	unsigned long long *crash_base)
				1332	{
				1333	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
				1334	"crashkernel=", suffix_tbl[SUFFIX_LOW]);
				1335	}
				1336
				1337	static void update_vmcoreinfo_note(void)
				1338	{
				1339	u32 *buf = vmcoreinfo_note;
				1340
				1341	if (!vmcoreinfo_size)
				1342	return;
				1343	buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
				1344	vmcoreinfo_size);
				1345	final_note(buf);
				1346	}
				1347
				1348	void crash_save_vmcoreinfo(void)
				1349	{
				1350	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
				1351	update_vmcoreinfo_note();
				1352	}
				1353
				1354	void vmcoreinfo_append_str(const char *fmt, ...)
				1355	{
				1356	va_list args;
				1357	char buf[0x50];
				1358	size_t r;
				1359
				1360	va_start(args, fmt);
				1361	r = vscnprintf(buf, sizeof(buf), fmt, args);
				1362	va_end(args);
				1363
				1364	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
				1365
				1366	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
				1367
				1368	vmcoreinfo_size += r;
				1369	}
				1370
				1371	/*
				1372	* provide an empty default implementation here -- architecture
				1373	* code may override this
				1374	*/
				1375	void __weak arch_crash_save_vmcoreinfo(void)
				1376	{}
				1377
				1378	unsigned long __weak paddr_vmcoreinfo_note(void)
				1379	{
				1380	return __pa((unsigned long)(char *)&vmcoreinfo_note);
				1381	}
				1382
				1383	static int __init crash_save_vmcoreinfo_init(void)
				1384	{
				1385	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
				1386	VMCOREINFO_PAGESIZE(PAGE_SIZE);
				1387
				1388	VMCOREINFO_SYMBOL(init_uts_ns);
				1389	VMCOREINFO_SYMBOL(node_online_map);
				1390	#ifdef CONFIG_MMU
				1391	VMCOREINFO_SYMBOL(swapper_pg_dir);
				1392	#endif
				1393	VMCOREINFO_SYMBOL(_stext);
				1394	VMCOREINFO_SYMBOL(vmap_area_list);
				1395
				1396	#ifndef CONFIG_NEED_MULTIPLE_NODES
				1397	VMCOREINFO_SYMBOL(mem_map);
				1398	VMCOREINFO_SYMBOL(contig_page_data);
				1399	#endif
				1400	#ifdef CONFIG_SPARSEMEM
				1401	VMCOREINFO_SYMBOL(mem_section);
				1402	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
				1403	VMCOREINFO_STRUCT_SIZE(mem_section);
				1404	VMCOREINFO_OFFSET(mem_section, section_mem_map);
				1405	#endif
				1406	VMCOREINFO_STRUCT_SIZE(page);
				1407	VMCOREINFO_STRUCT_SIZE(pglist_data);
				1408	VMCOREINFO_STRUCT_SIZE(zone);
				1409	VMCOREINFO_STRUCT_SIZE(free_area);
				1410	VMCOREINFO_STRUCT_SIZE(list_head);
				1411	VMCOREINFO_SIZE(nodemask_t);
				1412	VMCOREINFO_OFFSET(page, flags);
Joonsoo Kim	0139aa7	2016-05-19 17:10:49 -0700	[diff] [blame]	1413	VMCOREINFO_OFFSET(page, _refcount);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1414	VMCOREINFO_OFFSET(page, mapping);
				1415	VMCOREINFO_OFFSET(page, lru);
				1416	VMCOREINFO_OFFSET(page, _mapcount);
				1417	VMCOREINFO_OFFSET(page, private);
Atsushi Kumagai	8639a84	2016-04-28 16:18:18 -0700	[diff] [blame]	1418	VMCOREINFO_OFFSET(page, compound_dtor);
				1419	VMCOREINFO_OFFSET(page, compound_order);
Atsushi Kumagai	d7f5351	2016-04-28 16:18:21 -0700	[diff] [blame]	1420	VMCOREINFO_OFFSET(page, compound_head);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1421	VMCOREINFO_OFFSET(pglist_data, node_zones);
				1422	VMCOREINFO_OFFSET(pglist_data, nr_zones);
				1423	#ifdef CONFIG_FLAT_NODE_MEM_MAP
				1424	VMCOREINFO_OFFSET(pglist_data, node_mem_map);
				1425	#endif
				1426	VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
				1427	VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
				1428	VMCOREINFO_OFFSET(pglist_data, node_id);
				1429	VMCOREINFO_OFFSET(zone, free_area);
				1430	VMCOREINFO_OFFSET(zone, vm_stat);
				1431	VMCOREINFO_OFFSET(zone, spanned_pages);
				1432	VMCOREINFO_OFFSET(free_area, free_list);
				1433	VMCOREINFO_OFFSET(list_head, next);
				1434	VMCOREINFO_OFFSET(list_head, prev);
				1435	VMCOREINFO_OFFSET(vmap_area, va_start);
				1436	VMCOREINFO_OFFSET(vmap_area, list);
				1437	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
				1438	log_buf_kexec_setup();
				1439	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
				1440	VMCOREINFO_NUMBER(NR_FREE_PAGES);
				1441	VMCOREINFO_NUMBER(PG_lru);
				1442	VMCOREINFO_NUMBER(PG_private);
				1443	VMCOREINFO_NUMBER(PG_swapcache);
				1444	VMCOREINFO_NUMBER(PG_slab);
				1445	#ifdef CONFIG_MEMORY_FAILURE
				1446	VMCOREINFO_NUMBER(PG_hwpoison);
				1447	#endif
				1448	VMCOREINFO_NUMBER(PG_head_mask);
				1449	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
Baoquan He	1303a27	2015-09-09 15:39:03 -0700	[diff] [blame]	1450	#ifdef CONFIG_X86
				1451	VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
				1452	#endif
Atsushi Kumagai	8639a84	2016-04-28 16:18:18 -0700	[diff] [blame]	1453	#ifdef CONFIG_HUGETLB_PAGE
				1454	VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1455	#endif
				1456
				1457	arch_crash_save_vmcoreinfo();
				1458	update_vmcoreinfo_note();
				1459
				1460	return 0;
				1461	}
				1462
				1463	subsys_initcall(crash_save_vmcoreinfo_init);
				1464
				1465	/*
				1466	* Move into place and start executing a preloaded standalone
				1467	* executable. If nothing was preloaded return an error.
				1468	*/
				1469	int kernel_kexec(void)
				1470	{
				1471	int error = 0;
				1472
				1473	if (!mutex_trylock(&kexec_mutex))
				1474	return -EBUSY;
				1475	if (!kexec_image) {
				1476	error = -EINVAL;
				1477	goto Unlock;
				1478	}
				1479
				1480	#ifdef CONFIG_KEXEC_JUMP
				1481	if (kexec_image->preserve_context) {
				1482	lock_system_sleep();
				1483	pm_prepare_console();
				1484	error = freeze_processes();
				1485	if (error) {
				1486	error = -EBUSY;
				1487	goto Restore_console;
				1488	}
				1489	suspend_console();
				1490	error = dpm_suspend_start(PMSG_FREEZE);
				1491	if (error)
				1492	goto Resume_console;
				1493	/* At this point, dpm_suspend_start() has been called,
				1494	* but not dpm_suspend_end(). We must call
				1495	* dpm_suspend_end() now. Otherwise, drivers for
				1496	* some devices (e.g. interrupt controllers) become
				1497	* desynchronized with the actual state of the
				1498	* hardware at resume time, and evil weirdness ensues.
				1499	*/
				1500	error = dpm_suspend_end(PMSG_FREEZE);
				1501	if (error)
				1502	goto Resume_devices;
				1503	error = disable_nonboot_cpus();
				1504	if (error)
				1505	goto Enable_cpus;
				1506	local_irq_disable();
				1507	error = syscore_suspend();
				1508	if (error)
				1509	goto Enable_irqs;
				1510	} else
				1511	#endif
				1512	{
				1513	kexec_in_progress = true;
				1514	kernel_restart_prepare(NULL);
				1515	migrate_to_reboot_cpu();
				1516
				1517	/*
				1518	* migrate_to_reboot_cpu() disables CPU hotplug assuming that
				1519	* no further code needs to use CPU hotplug (which is true in
				1520	* the reboot case). However, the kexec path depends on using
				1521	* CPU hotplug again; so re-enable it here.
				1522	*/
				1523	cpu_hotplug_enable();
				1524	pr_emerg("Starting new kernel\n");
				1525	machine_shutdown();
				1526	}
				1527
				1528	machine_kexec(kexec_image);
				1529
				1530	#ifdef CONFIG_KEXEC_JUMP
				1531	if (kexec_image->preserve_context) {
				1532	syscore_resume();
				1533	Enable_irqs:
				1534	local_irq_enable();
				1535	Enable_cpus:
				1536	enable_nonboot_cpus();
				1537	dpm_resume_start(PMSG_RESTORE);
				1538	Resume_devices:
				1539	dpm_resume_end(PMSG_RESTORE);
				1540	Resume_console:
				1541	resume_console();
				1542	thaw_processes();
				1543	Restore_console:
				1544	pm_restore_console();
				1545	unlock_system_sleep();
				1546	}
				1547	#endif
				1548
				1549	Unlock:
				1550	mutex_unlock(&kexec_mutex);
				1551	return error;
				1552	}
				1553
				1554	/*
				1555	* Add and remove page tables for crashkernel memory
				1556	*
				1557	* Provide an empty default implementation here -- architecture
				1558	* code may override this
				1559	*/
				1560	void __weak crash_map_reserved_pages(void)
				1561	{}
				1562
				1563	void __weak crash_unmap_reserved_pages(void)
				1564	{}