Blame - kernel/kexec.c - SHIFTPHONES/mainline/linux

blob: 9a26eec9eb04b858cfb7d3244071b94f4d4872c9 [file] [log] [blame]

Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1	/*
				2	* kexec.c - kexec system call
				3	* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
				4	*
				5	* This source code is licensed under the GNU General Public License,
				6	* Version 2. See the file COPYING for more details.
				7	*/
				8
Randy.Dunlap	c59ede7	2006-01-11 12:17:46 -0800	[diff] [blame]	9	#include <linux/capability.h>
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	10	#include <linux/mm.h>
				11	#include <linux/file.h>
				12	#include <linux/slab.h>
				13	#include <linux/fs.h>
				14	#include <linux/kexec.h>
				15	#include <linux/spinlock.h>
				16	#include <linux/list.h>
				17	#include <linux/highmem.h>
				18	#include <linux/syscalls.h>
				19	#include <linux/reboot.h>
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	20	#include <linux/ioport.h>
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	21	#include <linux/hardirq.h>
Magnus Damm	85916f8	2006-12-06 20:40:41 -0800	[diff] [blame]	22	#include <linux/elf.h>
				23	#include <linux/elfcore.h>
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	24	#include <linux/utsrelease.h>
				25	#include <linux/utsname.h>
				26	#include <linux/numa.h>
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	27
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	28	#include <asm/page.h>
				29	#include <asm/uaccess.h>
				30	#include <asm/io.h>
				31	#include <asm/system.h>
				32	#include <asm/semaphore.h>
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	33	#include <asm/sections.h>
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	34
Vivek Goyal	cc57165	2006-01-09 20:51:41 -0800	[diff] [blame]	35	/* Per cpu memory for storing cpu states in case of system crash. */
				36	note_buf_t* crash_notes;
				37
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	38	/* vmcoreinfo stuff */
				39	unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
				40	u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
Ken'ichi Ohmichi	d768281	2007-10-16 23:27:28 -0700	[diff] [blame]	41	size_t vmcoreinfo_size;
				42	size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	43
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	44	/* Location of the reserved area for the crash kernel */
				45	struct resource crashk_res = {
				46	.name = "Crash kernel",
				47	.start = 0,
				48	.end = 0,
				49	.flags = IORESOURCE_BUSY \| IORESOURCE_MEM
				50	};
				51
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	52	int kexec_should_crash(struct task_struct *p)
				53	{
Serge E. Hallyn	b460cbc	2007-10-18 23:39:52 -0700	[diff] [blame]	54	if (in_interrupt() \|\| !p->pid \|\| is_global_init(p) \|\| panic_on_oops)
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	55	return 1;
				56	return 0;
				57	}
				58
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	59	/*
				60	* When kexec transitions to the new kernel there is a one-to-one
				61	* mapping between physical and virtual addresses. On processors
				62	* where you can disable the MMU this is trivial, and easy. For
				63	* others it is still a simple predictable page table to setup.
				64	*
				65	* In that environment kexec copies the new kernel to its final
				66	* resting place. This means I can only support memory whose
				67	* physical address can fit in an unsigned long. In particular
				68	* addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
				69	* If the assembly stub has more restrictive requirements
				70	* KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
				71	* defined more restrictively in <asm/kexec.h>.
				72	*
				73	* The code for the transition from the current kernel to the
				74	* the new kernel is placed in the control_code_buffer, whose size
				75	* is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single
				76	* page of memory is necessary, but some architectures require more.
				77	* Because this memory must be identity mapped in the transition from
				78	* virtual to physical addresses it must live in the range
				79	* 0 - TASK_SIZE, as only the user space mappings are arbitrarily
				80	* modifiable.
				81	*
				82	* The assembly stub in the control code buffer is passed a linked list
				83	* of descriptor pages detailing the source pages of the new kernel,
				84	* and the destination addresses of those source pages. As this data
				85	* structure is not used in the context of the current OS, it must
				86	* be self-contained.
				87	*
				88	* The code has been made to work with highmem pages and will use a
				89	* destination page in its final resting place (if it happens
				90	* to allocate it). The end product of this is that most of the
				91	* physical address space, and most of RAM can be used.
				92	*
				93	* Future directions include:
				94	* - allocating a page table with the control code buffer identity
				95	* mapped, to simplify machine_kexec and make kexec_on_panic more
				96	* reliable.
				97	*/
				98
				99	/*
				100	* KIMAGE_NO_DEST is an impossible destination address..., for
				101	* allocating pages whose destination address we do not care about.
				102	*/
				103	#define KIMAGE_NO_DEST (-1UL)
				104
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	105	static int kimage_is_destination_range(struct kimage *image,
				106	unsigned long start, unsigned long end);
				107	static struct page kimage_alloc_page(struct kimage image,
Al Viro	9796fdd	2005-10-21 03:22:03 -0400	[diff] [blame]	108	gfp_t gfp_mask,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	109	unsigned long dest);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	110
				111	static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	112	unsigned long nr_segments,
				113	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	114	{
				115	size_t segment_bytes;
				116	struct kimage *image;
				117	unsigned long i;
				118	int result;
				119
				120	/* Allocate a controlling structure */
				121	result = -ENOMEM;
Burman Yan	4668edc	2006-12-06 20:38:51 -0800	[diff] [blame]	122	image = kzalloc(sizeof(*image), GFP_KERNEL);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	123	if (!image)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	124	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	125
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	126	image->head = 0;
				127	image->entry = &image->head;
				128	image->last_entry = &image->head;
				129	image->control_page = ~0; /* By default this does not apply */
				130	image->start = entry;
				131	image->type = KEXEC_TYPE_DEFAULT;
				132
				133	/* Initialize the list of control pages */
				134	INIT_LIST_HEAD(&image->control_pages);
				135
				136	/* Initialize the list of destination pages */
				137	INIT_LIST_HEAD(&image->dest_pages);
				138
				139	/* Initialize the list of unuseable pages */
				140	INIT_LIST_HEAD(&image->unuseable_pages);
				141
				142	/* Read in the segments */
				143	image->nr_segments = nr_segments;
				144	segment_bytes = nr_segments * sizeof(*segments);
				145	result = copy_from_user(image->segment, segments, segment_bytes);
				146	if (result)
				147	goto out;
				148
				149	/*
				150	* Verify we have good destination addresses. The caller is
				151	* responsible for making certain we don't attempt to load
				152	* the new image into invalid or reserved areas of RAM. This
				153	* just verifies it is an address we can use.
				154	*
				155	* Since the kernel does everything in page size chunks ensure
				156	* the destination addreses are page aligned. Too many
				157	* special cases crop of when we don't do this. The most
				158	* insidious is getting overlapping destination addresses
				159	* simply because addresses are changed to page size
				160	* granularity.
				161	*/
				162	result = -EADDRNOTAVAIL;
				163	for (i = 0; i < nr_segments; i++) {
				164	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	165
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	166	mstart = image->segment[i].mem;
				167	mend = mstart + image->segment[i].memsz;
				168	if ((mstart & ~PAGE_MASK) \|\| (mend & ~PAGE_MASK))
				169	goto out;
				170	if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
				171	goto out;
				172	}
				173
				174	/* Verify our destination addresses do not overlap.
				175	* If we alloed overlapping destination addresses
				176	* through very weird things can happen with no
				177	* easy explanation as one segment stops on another.
				178	*/
				179	result = -EINVAL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	180	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	181	unsigned long mstart, mend;
				182	unsigned long j;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	183
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	184	mstart = image->segment[i].mem;
				185	mend = mstart + image->segment[i].memsz;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	186	for (j = 0; j < i; j++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	187	unsigned long pstart, pend;
				188	pstart = image->segment[j].mem;
				189	pend = pstart + image->segment[j].memsz;
				190	/* Do the segments overlap ? */
				191	if ((mend > pstart) && (mstart < pend))
				192	goto out;
				193	}
				194	}
				195
				196	/* Ensure our buffer sizes are strictly less than
				197	* our memory sizes. This should always be the case,
				198	* and it is easier to check up front than to be surprised
				199	* later on.
				200	*/
				201	result = -EINVAL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	202	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	203	if (image->segment[i].bufsz > image->segment[i].memsz)
				204	goto out;
				205	}
				206
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	207	result = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	208	out:
				209	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	210	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	211	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	212	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	213
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	214	return result;
				215
				216	}
				217
				218	static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	219	unsigned long nr_segments,
				220	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	221	{
				222	int result;
				223	struct kimage *image;
				224
				225	/* Allocate and initialize a controlling structure */
				226	image = NULL;
				227	result = do_kimage_alloc(&image, entry, nr_segments, segments);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	228	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	229	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	230
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	231	*rimage = image;
				232
				233	/*
				234	* Find a location for the control code buffer, and add it
				235	* the vector of segments so that it's pages will also be
				236	* counted as destination pages.
				237	*/
				238	result = -ENOMEM;
				239	image->control_code_page = kimage_alloc_control_pages(image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	240	get_order(KEXEC_CONTROL_CODE_SIZE));
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	241	if (!image->control_code_page) {
				242	printk(KERN_ERR "Could not allocate control_code_buffer\n");
				243	goto out;
				244	}
				245
				246	result = 0;
				247	out:
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	248	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	249	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	250	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	251	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	252
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	253	return result;
				254	}
				255
				256	static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	257	unsigned long nr_segments,
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	258	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	259	{
				260	int result;
				261	struct kimage *image;
				262	unsigned long i;
				263
				264	image = NULL;
				265	/* Verify we have a valid entry point */
				266	if ((entry < crashk_res.start) \|\| (entry > crashk_res.end)) {
				267	result = -EADDRNOTAVAIL;
				268	goto out;
				269	}
				270
				271	/* Allocate and initialize a controlling structure */
				272	result = do_kimage_alloc(&image, entry, nr_segments, segments);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	273	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	274	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	275
				276	/* Enable the special crash kernel control page
				277	* allocation policy.
				278	*/
				279	image->control_page = crashk_res.start;
				280	image->type = KEXEC_TYPE_CRASH;
				281
				282	/*
				283	* Verify we have good destination addresses. Normally
				284	* the caller is responsible for making certain we don't
				285	* attempt to load the new image into invalid or reserved
				286	* areas of RAM. But crash kernels are preloaded into a
				287	* reserved area of ram. We must ensure the addresses
				288	* are in the reserved area otherwise preloading the
				289	* kernel could corrupt things.
				290	*/
				291	result = -EADDRNOTAVAIL;
				292	for (i = 0; i < nr_segments; i++) {
				293	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	294
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	295	mstart = image->segment[i].mem;
Vivek Goyal	50cccc6	2005-06-25 14:57:55 -0700	[diff] [blame]	296	mend = mstart + image->segment[i].memsz - 1;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	297	/* Ensure we are within the crash kernel limits */
				298	if ((mstart < crashk_res.start) \|\| (mend > crashk_res.end))
				299	goto out;
				300	}
				301
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	302	/*
				303	* Find a location for the control code buffer, and add
				304	* the vector of segments so that it's pages will also be
				305	* counted as destination pages.
				306	*/
				307	result = -ENOMEM;
				308	image->control_code_page = kimage_alloc_control_pages(image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	309	get_order(KEXEC_CONTROL_CODE_SIZE));
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	310	if (!image->control_code_page) {
				311	printk(KERN_ERR "Could not allocate control_code_buffer\n");
				312	goto out;
				313	}
				314
				315	result = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	316	out:
				317	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	318	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	319	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	320	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	321
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	322	return result;
				323	}
				324
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	325	static int kimage_is_destination_range(struct kimage *image,
				326	unsigned long start,
				327	unsigned long end)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	328	{
				329	unsigned long i;
				330
				331	for (i = 0; i < image->nr_segments; i++) {
				332	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	333
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	334	mstart = image->segment[i].mem;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	335	mend = mstart + image->segment[i].memsz;
				336	if ((end > mstart) && (start < mend))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	337	return 1;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	338	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	339
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	340	return 0;
				341	}
				342
Al Viro	9796fdd	2005-10-21 03:22:03 -0400	[diff] [blame]	343	static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	344	{
				345	struct page *pages;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	346
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	347	pages = alloc_pages(gfp_mask, order);
				348	if (pages) {
				349	unsigned int count, i;
				350	pages->mapping = NULL;
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	351	set_page_private(pages, order);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	352	count = 1 << order;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	353	for (i = 0; i < count; i++)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	354	SetPageReserved(pages + i);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	355	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	356
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	357	return pages;
				358	}
				359
				360	static void kimage_free_pages(struct page *page)
				361	{
				362	unsigned int order, count, i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	363
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	364	order = page_private(page);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	365	count = 1 << order;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	366	for (i = 0; i < count; i++)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	367	ClearPageReserved(page + i);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	368	__free_pages(page, order);
				369	}
				370
				371	static void kimage_free_page_list(struct list_head *list)
				372	{
				373	struct list_head pos, next;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	374
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	375	list_for_each_safe(pos, next, list) {
				376	struct page *page;
				377
				378	page = list_entry(pos, struct page, lru);
				379	list_del(&page->lru);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	380	kimage_free_pages(page);
				381	}
				382	}
				383
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	384	static struct page kimage_alloc_normal_control_pages(struct kimage image,
				385	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	386	{
				387	/* Control pages are special, they are the intermediaries
				388	* that are needed while we copy the rest of the pages
				389	* to their final resting place. As such they must
				390	* not conflict with either the destination addresses
				391	* or memory the kernel is already using.
				392	*
				393	* The only case where we really need more than one of
				394	* these are for architectures where we cannot disable
				395	* the MMU and must instead generate an identity mapped
				396	* page table for all of the memory.
				397	*
				398	* At worst this runs in O(N) of the image size.
				399	*/
				400	struct list_head extra_pages;
				401	struct page *pages;
				402	unsigned int count;
				403
				404	count = 1 << order;
				405	INIT_LIST_HEAD(&extra_pages);
				406
				407	/* Loop while I can allocate a page and the page allocated
				408	* is a destination page.
				409	*/
				410	do {
				411	unsigned long pfn, epfn, addr, eaddr;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	412
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	413	pages = kimage_alloc_pages(GFP_KERNEL, order);
				414	if (!pages)
				415	break;
				416	pfn = page_to_pfn(pages);
				417	epfn = pfn + count;
				418	addr = pfn << PAGE_SHIFT;
				419	eaddr = epfn << PAGE_SHIFT;
				420	if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) \|\|
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	421	kimage_is_destination_range(image, addr, eaddr)) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	422	list_add(&pages->lru, &extra_pages);
				423	pages = NULL;
				424	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	425	} while (!pages);
				426
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	427	if (pages) {
				428	/* Remember the allocated page... */
				429	list_add(&pages->lru, &image->control_pages);
				430
				431	/* Because the page is already in it's destination
				432	* location we will never allocate another page at
				433	* that address. Therefore kimage_alloc_pages
				434	* will not return it (again) and we don't need
				435	* to give it an entry in image->segment[].
				436	*/
				437	}
				438	/* Deal with the destination pages I have inadvertently allocated.
				439	*
				440	* Ideally I would convert multi-page allocations into single
				441	* page allocations, and add everyting to image->dest_pages.
				442	*
				443	* For now it is simpler to just free the pages.
				444	*/
				445	kimage_free_page_list(&extra_pages);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	446
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	447	return pages;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	448	}
				449
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	450	static struct page kimage_alloc_crash_control_pages(struct kimage image,
				451	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	452	{
				453	/* Control pages are special, they are the intermediaries
				454	* that are needed while we copy the rest of the pages
				455	* to their final resting place. As such they must
				456	* not conflict with either the destination addresses
				457	* or memory the kernel is already using.
				458	*
				459	* Control pages are also the only pags we must allocate
				460	* when loading a crash kernel. All of the other pages
				461	* are specified by the segments and we just memcpy
				462	* into them directly.
				463	*
				464	* The only case where we really need more than one of
				465	* these are for architectures where we cannot disable
				466	* the MMU and must instead generate an identity mapped
				467	* page table for all of the memory.
				468	*
				469	* Given the low demand this implements a very simple
				470	* allocator that finds the first hole of the appropriate
				471	* size in the reserved memory region, and allocates all
				472	* of the memory up to and including the hole.
				473	*/
				474	unsigned long hole_start, hole_end, size;
				475	struct page *pages;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	476
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	477	pages = NULL;
				478	size = (1 << order) << PAGE_SHIFT;
				479	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
				480	hole_end = hole_start + size - 1;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	481	while (hole_end <= crashk_res.end) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	482	unsigned long i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	483
				484	if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	485	break;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	486	if (hole_end > crashk_res.end)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	487	break;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	488	/* See if I overlap any of the segments */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	489	for (i = 0; i < image->nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	490	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	491
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	492	mstart = image->segment[i].mem;
				493	mend = mstart + image->segment[i].memsz - 1;
				494	if ((hole_end >= mstart) && (hole_start <= mend)) {
				495	/* Advance the hole to the end of the segment */
				496	hole_start = (mend + (size - 1)) & ~(size - 1);
				497	hole_end = hole_start + size - 1;
				498	break;
				499	}
				500	}
				501	/* If I don't overlap any segments I have found my hole! */
				502	if (i == image->nr_segments) {
				503	pages = pfn_to_page(hole_start >> PAGE_SHIFT);
				504	break;
				505	}
				506	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	507	if (pages)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	508	image->control_page = hole_end;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	509
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	510	return pages;
				511	}
				512
				513
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	514	struct page kimage_alloc_control_pages(struct kimage image,
				515	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	516	{
				517	struct page *pages = NULL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	518
				519	switch (image->type) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	520	case KEXEC_TYPE_DEFAULT:
				521	pages = kimage_alloc_normal_control_pages(image, order);
				522	break;
				523	case KEXEC_TYPE_CRASH:
				524	pages = kimage_alloc_crash_control_pages(image, order);
				525	break;
				526	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	527
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	528	return pages;
				529	}
				530
				531	static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
				532	{
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	533	if (*image->entry != 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	534	image->entry++;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	535
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	536	if (image->entry == image->last_entry) {
				537	kimage_entry_t *ind_page;
				538	struct page *page;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	539
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	540	page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	541	if (!page)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	542	return -ENOMEM;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	543
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	544	ind_page = page_address(page);
				545	*image->entry = virt_to_phys(ind_page) \| IND_INDIRECTION;
				546	image->entry = ind_page;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	547	image->last_entry = ind_page +
				548	((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	549	}
				550	*image->entry = entry;
				551	image->entry++;
				552	*image->entry = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	553
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	554	return 0;
				555	}
				556
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	557	static int kimage_set_destination(struct kimage *image,
				558	unsigned long destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	559	{
				560	int result;
				561
				562	destination &= PAGE_MASK;
				563	result = kimage_add_entry(image, destination \| IND_DESTINATION);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	564	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	565	image->destination = destination;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	566
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	567	return result;
				568	}
				569
				570
				571	static int kimage_add_page(struct kimage *image, unsigned long page)
				572	{
				573	int result;
				574
				575	page &= PAGE_MASK;
				576	result = kimage_add_entry(image, page \| IND_SOURCE);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	577	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	578	image->destination += PAGE_SIZE;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	579
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	580	return result;
				581	}
				582
				583
				584	static void kimage_free_extra_pages(struct kimage *image)
				585	{
				586	/* Walk through and free any extra destination pages I may have */
				587	kimage_free_page_list(&image->dest_pages);
				588
				589	/* Walk through and free any unuseable pages I have cached */
				590	kimage_free_page_list(&image->unuseable_pages);
				591
				592	}
				593	static int kimage_terminate(struct kimage *image)
				594	{
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	595	if (*image->entry != 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	596	image->entry++;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	597
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	598	*image->entry = IND_DONE;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	599
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	600	return 0;
				601	}
				602
				603	#define for_each_kimage_entry(image, ptr, entry) \
				604	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
				605	ptr = (entry & IND_INDIRECTION)? \
				606	phys_to_virt((entry & PAGE_MASK)): ptr +1)
				607
				608	static void kimage_free_entry(kimage_entry_t entry)
				609	{
				610	struct page *page;
				611
				612	page = pfn_to_page(entry >> PAGE_SHIFT);
				613	kimage_free_pages(page);
				614	}
				615
				616	static void kimage_free(struct kimage *image)
				617	{
				618	kimage_entry_t *ptr, entry;
				619	kimage_entry_t ind = 0;
				620
				621	if (!image)
				622	return;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	623
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	624	kimage_free_extra_pages(image);
				625	for_each_kimage_entry(image, ptr, entry) {
				626	if (entry & IND_INDIRECTION) {
				627	/* Free the previous indirection page */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	628	if (ind & IND_INDIRECTION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	629	kimage_free_entry(ind);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	630	/* Save this indirection page until we are
				631	* done with it.
				632	*/
				633	ind = entry;
				634	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	635	else if (entry & IND_SOURCE)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	636	kimage_free_entry(entry);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	637	}
				638	/* Free the final indirection page */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	639	if (ind & IND_INDIRECTION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	640	kimage_free_entry(ind);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	641
				642	/* Handle any machine specific cleanup */
				643	machine_kexec_cleanup(image);
				644
				645	/* Free the kexec control pages... */
				646	kimage_free_page_list(&image->control_pages);
				647	kfree(image);
				648	}
				649
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	650	static kimage_entry_t kimage_dst_used(struct kimage image,
				651	unsigned long page)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	652	{
				653	kimage_entry_t *ptr, entry;
				654	unsigned long destination = 0;
				655
				656	for_each_kimage_entry(image, ptr, entry) {
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	657	if (entry & IND_DESTINATION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	658	destination = entry & PAGE_MASK;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	659	else if (entry & IND_SOURCE) {
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	660	if (page == destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	661	return ptr;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	662	destination += PAGE_SIZE;
				663	}
				664	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	665
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	666	return NULL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	667	}
				668
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	669	static struct page kimage_alloc_page(struct kimage image,
Al Viro	9796fdd	2005-10-21 03:22:03 -0400	[diff] [blame]	670	gfp_t gfp_mask,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	671	unsigned long destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	672	{
				673	/*
				674	* Here we implement safeguards to ensure that a source page
				675	* is not copied to its destination page before the data on
				676	* the destination page is no longer useful.
				677	*
				678	* To do this we maintain the invariant that a source page is
				679	* either its own destination page, or it is not a
				680	* destination page at all.
				681	*
				682	* That is slightly stronger than required, but the proof
				683	* that no problems will not occur is trivial, and the
				684	* implementation is simply to verify.
				685	*
				686	* When allocating all pages normally this algorithm will run
				687	* in O(N) time, but in the worst case it will run in O(N^2)
				688	* time. If the runtime is a problem the data structures can
				689	* be fixed.
				690	*/
				691	struct page *page;
				692	unsigned long addr;
				693
				694	/*
				695	* Walk through the list of destination pages, and see if I
				696	* have a match.
				697	*/
				698	list_for_each_entry(page, &image->dest_pages, lru) {
				699	addr = page_to_pfn(page) << PAGE_SHIFT;
				700	if (addr == destination) {
				701	list_del(&page->lru);
				702	return page;
				703	}
				704	}
				705	page = NULL;
				706	while (1) {
				707	kimage_entry_t *old;
				708
				709	/* Allocate a page, if we run out of memory give up */
				710	page = kimage_alloc_pages(gfp_mask, 0);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	711	if (!page)
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	712	return NULL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	713	/* If the page cannot be used file it away */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	714	if (page_to_pfn(page) >
				715	(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	716	list_add(&page->lru, &image->unuseable_pages);
				717	continue;
				718	}
				719	addr = page_to_pfn(page) << PAGE_SHIFT;
				720
				721	/* If it is the destination page we want use it */
				722	if (addr == destination)
				723	break;
				724
				725	/* If the page is not a destination page use it */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	726	if (!kimage_is_destination_range(image, addr,
				727	addr + PAGE_SIZE))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	728	break;
				729
				730	/*
				731	* I know that the page is someones destination page.
				732	* See if there is already a source page for this
				733	* destination page. And if so swap the source pages.
				734	*/
				735	old = kimage_dst_used(image, addr);
				736	if (old) {
				737	/* If so move it */
				738	unsigned long old_addr;
				739	struct page *old_page;
				740
				741	old_addr = *old & PAGE_MASK;
				742	old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
				743	copy_highpage(page, old_page);
				744	old = addr \| (old & ~PAGE_MASK);
				745
				746	/* The old page I have found cannot be a
				747	* destination page, so return it.
				748	*/
				749	addr = old_addr;
				750	page = old_page;
				751	break;
				752	}
				753	else {
				754	/* Place the page on the destination list I
				755	* will use it later.
				756	*/
				757	list_add(&page->lru, &image->dest_pages);
				758	}
				759	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	760
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	761	return page;
				762	}
				763
				764	static int kimage_load_normal_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	765	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	766	{
				767	unsigned long maddr;
				768	unsigned long ubytes, mbytes;
				769	int result;
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	770	unsigned char __user *buf;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	771
				772	result = 0;
				773	buf = segment->buf;
				774	ubytes = segment->bufsz;
				775	mbytes = segment->memsz;
				776	maddr = segment->mem;
				777
				778	result = kimage_set_destination(image, maddr);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	779	if (result < 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	780	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	781
				782	while (mbytes) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	783	struct page *page;
				784	char *ptr;
				785	size_t uchunk, mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	786
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	787	page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
Stephen Hemminger	c80544d	2007-10-18 03:07:05 -0700	[diff] [blame]	788	if (!page) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	789	result = -ENOMEM;
				790	goto out;
				791	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	792	result = kimage_add_page(image, page_to_pfn(page)
				793	<< PAGE_SHIFT);
				794	if (result < 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	795	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	796
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	797	ptr = kmap(page);
				798	/* Start with a clear page */
				799	memset(ptr, 0, PAGE_SIZE);
				800	ptr += maddr & ~PAGE_MASK;
				801	mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	802	if (mchunk > mbytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	803	mchunk = mbytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	804
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	805	uchunk = mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	806	if (uchunk > ubytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	807	uchunk = ubytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	808
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	809	result = copy_from_user(ptr, buf, uchunk);
				810	kunmap(page);
				811	if (result) {
				812	result = (result < 0) ? result : -EIO;
				813	goto out;
				814	}
				815	ubytes -= uchunk;
				816	maddr += mchunk;
				817	buf += mchunk;
				818	mbytes -= mchunk;
				819	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	820	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	821	return result;
				822	}
				823
				824	static int kimage_load_crash_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	825	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	826	{
				827	/* For crash dumps kernels we simply copy the data from
				828	* user space to it's destination.
				829	* We do things a page at a time for the sake of kmap.
				830	*/
				831	unsigned long maddr;
				832	unsigned long ubytes, mbytes;
				833	int result;
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	834	unsigned char __user *buf;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	835
				836	result = 0;
				837	buf = segment->buf;
				838	ubytes = segment->bufsz;
				839	mbytes = segment->memsz;
				840	maddr = segment->mem;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	841	while (mbytes) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	842	struct page *page;
				843	char *ptr;
				844	size_t uchunk, mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	845
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	846	page = pfn_to_page(maddr >> PAGE_SHIFT);
Stephen Hemminger	c80544d	2007-10-18 03:07:05 -0700	[diff] [blame]	847	if (!page) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	848	result = -ENOMEM;
				849	goto out;
				850	}
				851	ptr = kmap(page);
				852	ptr += maddr & ~PAGE_MASK;
				853	mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	854	if (mchunk > mbytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	855	mchunk = mbytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	856
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	857	uchunk = mchunk;
				858	if (uchunk > ubytes) {
				859	uchunk = ubytes;
				860	/* Zero the trailing part of the page */
				861	memset(ptr + uchunk, 0, mchunk - uchunk);
				862	}
				863	result = copy_from_user(ptr, buf, uchunk);
Zou Nan hai	a7956113	2006-12-07 09:51:35 -0800	[diff] [blame]	864	kexec_flush_icache_page(page);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	865	kunmap(page);
				866	if (result) {
				867	result = (result < 0) ? result : -EIO;
				868	goto out;
				869	}
				870	ubytes -= uchunk;
				871	maddr += mchunk;
				872	buf += mchunk;
				873	mbytes -= mchunk;
				874	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	875	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	876	return result;
				877	}
				878
				879	static int kimage_load_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	880	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	881	{
				882	int result = -ENOMEM;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	883
				884	switch (image->type) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	885	case KEXEC_TYPE_DEFAULT:
				886	result = kimage_load_normal_segment(image, segment);
				887	break;
				888	case KEXEC_TYPE_CRASH:
				889	result = kimage_load_crash_segment(image, segment);
				890	break;
				891	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	892
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	893	return result;
				894	}
				895
				896	/*
				897	* Exec Kernel system call: for obvious reasons only root may call it.
				898	*
				899	* This call breaks up into three pieces.
				900	* - A generic part which loads the new kernel from the current
				901	* address space, and very carefully places the data in the
				902	* allocated pages.
				903	*
				904	* - A generic part that interacts with the kernel and tells all of
				905	* the devices to shut down. Preventing on-going dmas, and placing
				906	* the devices in a consistent state so a later kernel can
				907	* reinitialize them.
				908	*
				909	* - A machine specific part that includes the syscall number
				910	* and the copies the image to it's final destination. And
				911	* jumps into the image at entry.
				912	*
				913	* kexec does not sync, or unmount filesystems so if you need
				914	* that to happen you need to do that yourself.
				915	*/
Jeff Moyer	c330dda	2006-06-23 02:05:07 -0700	[diff] [blame]	916	struct kimage *kexec_image;
				917	struct kimage *kexec_crash_image;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	918	/*
				919	* A home grown binary mutex.
				920	* Nothing can wait so this mutex is safe to use
				921	* in interrupt context :)
				922	*/
Jeff Moyer	c330dda	2006-06-23 02:05:07 -0700	[diff] [blame]	923	static int kexec_lock;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	924
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	925	asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
				926	struct kexec_segment __user *segments,
				927	unsigned long flags)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	928	{
				929	struct kimage *dest_image, image;
				930	int locked;
				931	int result;
				932
				933	/* We only trust the superuser with rebooting the system. */
				934	if (!capable(CAP_SYS_BOOT))
				935	return -EPERM;
				936
				937	/*
				938	* Verify we have a legal set of flags
				939	* This leaves us room for future extensions.
				940	*/
				941	if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
				942	return -EINVAL;
				943
				944	/* Verify we are on the appropriate architecture */
				945	if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
				946	((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	947	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	948
				949	/* Put an artificial cap on the number
				950	* of segments passed to kexec_load.
				951	*/
				952	if (nr_segments > KEXEC_SEGMENT_MAX)
				953	return -EINVAL;
				954
				955	image = NULL;
				956	result = 0;
				957
				958	/* Because we write directly to the reserved memory
				959	* region when loading crash kernels we need a mutex here to
				960	* prevent multiple crash kernels from attempting to load
				961	* simultaneously, and to prevent a crash kernel from loading
				962	* over the top of a in use crash kernel.
				963	*
				964	* KISS: always take the mutex.
				965	*/
				966	locked = xchg(&kexec_lock, 1);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	967	if (locked)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	968	return -EBUSY;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	969
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	970	dest_image = &kexec_image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	971	if (flags & KEXEC_ON_CRASH)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	972	dest_image = &kexec_crash_image;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	973	if (nr_segments > 0) {
				974	unsigned long i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	975
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	976	/* Loading another kernel to reboot into */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	977	if ((flags & KEXEC_ON_CRASH) == 0)
				978	result = kimage_normal_alloc(&image, entry,
				979	nr_segments, segments);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	980	/* Loading another kernel to switch to if this one crashes */
				981	else if (flags & KEXEC_ON_CRASH) {
				982	/* Free any current crash dump kernel before
				983	* we corrupt it.
				984	*/
				985	kimage_free(xchg(&kexec_crash_image, NULL));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	986	result = kimage_crash_alloc(&image, entry,
				987	nr_segments, segments);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	988	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	989	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	990	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	991
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	992	result = machine_kexec_prepare(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	993	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	994	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	995
				996	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	997	result = kimage_load_segment(image, &image->segment[i]);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	998	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	999	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1000	}
				1001	result = kimage_terminate(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1002	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1003	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1004	}
				1005	/* Install the new kernel, and Uninstall the old */
				1006	image = xchg(dest_image, image);
				1007
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1008	out:
Roland McGrath	0b4a8a7	2006-09-29 02:00:39 -0700	[diff] [blame]	1009	locked = xchg(&kexec_lock, 0); /* Release the mutex */
				1010	BUG_ON(!locked);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1011	kimage_free(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1012
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1013	return result;
				1014	}
				1015
				1016	#ifdef CONFIG_COMPAT
				1017	asmlinkage long compat_sys_kexec_load(unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1018	unsigned long nr_segments,
				1019	struct compat_kexec_segment __user *segments,
				1020	unsigned long flags)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1021	{
				1022	struct compat_kexec_segment in;
				1023	struct kexec_segment out, __user *ksegments;
				1024	unsigned long i, result;
				1025
				1026	/* Don't allow clients that don't understand the native
				1027	* architecture to do anything.
				1028	*/
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1029	if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1030	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1031
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1032	if (nr_segments > KEXEC_SEGMENT_MAX)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1033	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1034
				1035	ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
				1036	for (i=0; i < nr_segments; i++) {
				1037	result = copy_from_user(&in, &segments[i], sizeof(in));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1038	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1039	return -EFAULT;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1040
				1041	out.buf = compat_ptr(in.buf);
				1042	out.bufsz = in.bufsz;
				1043	out.mem = in.mem;
				1044	out.memsz = in.memsz;
				1045
				1046	result = copy_to_user(&ksegments[i], &out, sizeof(out));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1047	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1048	return -EFAULT;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1049	}
				1050
				1051	return sys_kexec_load(entry, nr_segments, ksegments, flags);
				1052	}
				1053	#endif
				1054
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	1055	void crash_kexec(struct pt_regs *regs)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1056	{
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1057	int locked;
				1058
				1059
				1060	/* Take the kexec_lock here to prevent sys_kexec_load
				1061	* running on one cpu from replacing the crash kernel
				1062	* we are using after a panic on a different cpu.
				1063	*
				1064	* If the crash kernel was not located in a fixed area
				1065	* of memory the xchg(&kexec_crash_image) would be
				1066	* sufficient. But since I reuse the memory...
				1067	*/
				1068	locked = xchg(&kexec_lock, 1);
				1069	if (!locked) {
David Wilder	c0ce7d0	2006-06-23 15:29:34 -0700	[diff] [blame]	1070	if (kexec_crash_image) {
Vivek Goyal	e996e58	2006-01-09 20:51:44 -0800	[diff] [blame]	1071	struct pt_regs fixed_regs;
				1072	crash_setup_regs(&fixed_regs, regs);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1073	crash_save_vmcoreinfo();
Vivek Goyal	e996e58	2006-01-09 20:51:44 -0800	[diff] [blame]	1074	machine_crash_shutdown(&fixed_regs);
David Wilder	c0ce7d0	2006-06-23 15:29:34 -0700	[diff] [blame]	1075	machine_kexec(kexec_crash_image);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1076	}
Roland McGrath	0b4a8a7	2006-09-29 02:00:39 -0700	[diff] [blame]	1077	locked = xchg(&kexec_lock, 0);
				1078	BUG_ON(!locked);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1079	}
				1080	}
Vivek Goyal	cc57165	2006-01-09 20:51:41 -0800	[diff] [blame]	1081
Magnus Damm	85916f8	2006-12-06 20:40:41 -0800	[diff] [blame]	1082	static u32 append_elf_note(u32 buf, char name, unsigned type, void data,
				1083	size_t data_len)
				1084	{
				1085	struct elf_note note;
				1086
				1087	note.n_namesz = strlen(name) + 1;
				1088	note.n_descsz = data_len;
				1089	note.n_type = type;
				1090	memcpy(buf, &note, sizeof(note));
				1091	buf += (sizeof(note) + 3)/4;
				1092	memcpy(buf, name, note.n_namesz);
				1093	buf += (note.n_namesz + 3)/4;
				1094	memcpy(buf, data, note.n_descsz);
				1095	buf += (note.n_descsz + 3)/4;
				1096
				1097	return buf;
				1098	}
				1099
				1100	static void final_note(u32 *buf)
				1101	{
				1102	struct elf_note note;
				1103
				1104	note.n_namesz = 0;
				1105	note.n_descsz = 0;
				1106	note.n_type = 0;
				1107	memcpy(buf, &note, sizeof(note));
				1108	}
				1109
				1110	void crash_save_cpu(struct pt_regs *regs, int cpu)
				1111	{
				1112	struct elf_prstatus prstatus;
				1113	u32 *buf;
				1114
				1115	if ((cpu < 0) \|\| (cpu >= NR_CPUS))
				1116	return;
				1117
				1118	/* Using ELF notes here is opportunistic.
				1119	* I need a well defined structure format
				1120	* for the data I pass, and I need tags
				1121	* on the data to indicate what information I have
				1122	* squirrelled away. ELF notes happen to provide
				1123	* all of that, so there is no need to invent something new.
				1124	*/
				1125	buf = (u32*)per_cpu_ptr(crash_notes, cpu);
				1126	if (!buf)
				1127	return;
				1128	memset(&prstatus, 0, sizeof(prstatus));
				1129	prstatus.pr_pid = current->pid;
				1130	elf_core_copy_regs(&prstatus.pr_reg, regs);
Simon Horman	6672f76	2007-05-08 00:28:22 -0700	[diff] [blame]	1131	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
				1132	&prstatus, sizeof(prstatus));
Magnus Damm	85916f8	2006-12-06 20:40:41 -0800	[diff] [blame]	1133	final_note(buf);
				1134	}
				1135
Vivek Goyal	cc57165	2006-01-09 20:51:41 -0800	[diff] [blame]	1136	static int __init crash_notes_memory_init(void)
				1137	{
				1138	/* Allocate memory for saving cpu registers. */
				1139	crash_notes = alloc_percpu(note_buf_t);
				1140	if (!crash_notes) {
				1141	printk("Kexec: Memory allocation for saving cpu register"
				1142	" states failed\n");
				1143	return -ENOMEM;
				1144	}
				1145	return 0;
				1146	}
				1147	module_init(crash_notes_memory_init)
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1148
Bernhard Walle	cba63c3	2007-10-18 23:40:58 -0700	[diff] [blame]	1149
				1150	/*
				1151	* parsing the "crashkernel" commandline
				1152	*
				1153	* this code is intended to be called from architecture specific code
				1154	*/
				1155
				1156
				1157	/*
				1158	* This function parses command lines in the format
				1159	*
				1160	* crashkernel=ramsize-range:size[,...][@offset]
				1161	*
				1162	* The function returns 0 on success and -EINVAL on failure.
				1163	*/
				1164	static int __init parse_crashkernel_mem(char *cmdline,
				1165	unsigned long long system_ram,
				1166	unsigned long long *crash_size,
				1167	unsigned long long *crash_base)
				1168	{
				1169	char cur = cmdline, tmp;
				1170
				1171	/* for each entry of the comma-separated list */
				1172	do {
				1173	unsigned long long start, end = ULLONG_MAX, size;
				1174
				1175	/* get the start of the range */
				1176	start = memparse(cur, &tmp);
				1177	if (cur == tmp) {
				1178	pr_warning("crashkernel: Memory value expected\n");
				1179	return -EINVAL;
				1180	}
				1181	cur = tmp;
				1182	if (*cur != '-') {
				1183	pr_warning("crashkernel: '-' expected\n");
				1184	return -EINVAL;
				1185	}
				1186	cur++;
				1187
				1188	/* if no ':' is here, than we read the end */
				1189	if (*cur != ':') {
				1190	end = memparse(cur, &tmp);
				1191	if (cur == tmp) {
				1192	pr_warning("crashkernel: Memory "
				1193	"value expected\n");
				1194	return -EINVAL;
				1195	}
				1196	cur = tmp;
				1197	if (end <= start) {
				1198	pr_warning("crashkernel: end <= start\n");
				1199	return -EINVAL;
				1200	}
				1201	}
				1202
				1203	if (*cur != ':') {
				1204	pr_warning("crashkernel: ':' expected\n");
				1205	return -EINVAL;
				1206	}
				1207	cur++;
				1208
				1209	size = memparse(cur, &tmp);
				1210	if (cur == tmp) {
				1211	pr_warning("Memory value expected\n");
				1212	return -EINVAL;
				1213	}
				1214	cur = tmp;
				1215	if (size >= system_ram) {
				1216	pr_warning("crashkernel: invalid size\n");
				1217	return -EINVAL;
				1218	}
				1219
				1220	/* match ? */
				1221	if (system_ram >= start && system_ram <= end) {
				1222	*crash_size = size;
				1223	break;
				1224	}
				1225	} while (*cur++ == ',');
				1226
				1227	if (*crash_size > 0) {
				1228	while (cur != ' ' && cur != '@')
				1229	cur++;
				1230	if (*cur == '@') {
				1231	cur++;
				1232	*crash_base = memparse(cur, &tmp);
				1233	if (cur == tmp) {
				1234	pr_warning("Memory value expected "
				1235	"after '@'\n");
				1236	return -EINVAL;
				1237	}
				1238	}
				1239	}
				1240
				1241	return 0;
				1242	}
				1243
				1244	/*
				1245	* That function parses "simple" (old) crashkernel command lines like
				1246	*
				1247	* crashkernel=size[@offset]
				1248	*
				1249	* It returns 0 on success and -EINVAL on failure.
				1250	*/
				1251	static int __init parse_crashkernel_simple(char *cmdline,
				1252	unsigned long long *crash_size,
				1253	unsigned long long *crash_base)
				1254	{
				1255	char *cur = cmdline;
				1256
				1257	*crash_size = memparse(cmdline, &cur);
				1258	if (cmdline == cur) {
				1259	pr_warning("crashkernel: memory value expected\n");
				1260	return -EINVAL;
				1261	}
				1262
				1263	if (*cur == '@')
				1264	*crash_base = memparse(cur+1, &cur);
				1265
				1266	return 0;
				1267	}
				1268
				1269	/*
				1270	* That function is the entry point for command line parsing and should be
				1271	* called from the arch-specific code.
				1272	*/
				1273	int __init parse_crashkernel(char *cmdline,
				1274	unsigned long long system_ram,
				1275	unsigned long long *crash_size,
				1276	unsigned long long *crash_base)
				1277	{
				1278	char p = cmdline, ck_cmdline = NULL;
				1279	char first_colon, first_space;
				1280
				1281	BUG_ON(!crash_size \|\| !crash_base);
				1282	*crash_size = 0;
				1283	*crash_base = 0;
				1284
				1285	/* find crashkernel and use the last one if there are more */
				1286	p = strstr(p, "crashkernel=");
				1287	while (p) {
				1288	ck_cmdline = p;
				1289	p = strstr(p+1, "crashkernel=");
				1290	}
				1291
				1292	if (!ck_cmdline)
				1293	return -EINVAL;
				1294
				1295	ck_cmdline += 12; /* strlen("crashkernel=") */
				1296
				1297	/*
				1298	* if the commandline contains a ':', then that's the extended
				1299	* syntax -- if not, it must be the classic syntax
				1300	*/
				1301	first_colon = strchr(ck_cmdline, ':');
				1302	first_space = strchr(ck_cmdline, ' ');
				1303	if (first_colon && (!first_space \|\| first_colon < first_space))
				1304	return parse_crashkernel_mem(ck_cmdline, system_ram,
				1305	crash_size, crash_base);
				1306	else
				1307	return parse_crashkernel_simple(ck_cmdline, crash_size,
				1308	crash_base);
				1309
				1310	return 0;
				1311	}
				1312
				1313
				1314
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1315	void crash_save_vmcoreinfo(void)
				1316	{
				1317	u32 *buf;
				1318
				1319	if (!vmcoreinfo_size)
				1320	return;
				1321
Ken'ichi Ohmichi	d768281	2007-10-16 23:27:28 -0700	[diff] [blame]	1322	vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds());
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1323
				1324	buf = (u32 *)vmcoreinfo_note;
				1325
				1326	buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
				1327	vmcoreinfo_size);
				1328
				1329	final_note(buf);
				1330	}
				1331
				1332	void vmcoreinfo_append_str(const char *fmt, ...)
				1333	{
				1334	va_list args;
				1335	char buf[0x50];
				1336	int r;
				1337
				1338	va_start(args, fmt);
				1339	r = vsnprintf(buf, sizeof(buf), fmt, args);
				1340	va_end(args);
				1341
				1342	if (r + vmcoreinfo_size > vmcoreinfo_max_size)
				1343	r = vmcoreinfo_max_size - vmcoreinfo_size;
				1344
				1345	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
				1346
				1347	vmcoreinfo_size += r;
				1348	}
				1349
				1350	/*
				1351	* provide an empty default implementation here -- architecture
				1352	* code may override this
				1353	*/
				1354	void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void)
				1355	{}
				1356
				1357	unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void)
				1358	{
				1359	return __pa((unsigned long)(char *)&vmcoreinfo_note);
				1360	}
				1361
				1362	static int __init crash_save_vmcoreinfo_init(void)
				1363	{
Ken'ichi Ohmichi	d768281	2007-10-16 23:27:28 -0700	[diff] [blame]	1364	vmcoreinfo_append_str("OSRELEASE=%s\n", init_uts_ns.name.release);
				1365	vmcoreinfo_append_str("PAGESIZE=%ld\n", PAGE_SIZE);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1366
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1367	VMCOREINFO_SYMBOL(init_uts_ns);
				1368	VMCOREINFO_SYMBOL(node_online_map);
				1369	VMCOREINFO_SYMBOL(swapper_pg_dir);
				1370	VMCOREINFO_SYMBOL(_stext);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1371
				1372	#ifndef CONFIG_NEED_MULTIPLE_NODES
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1373	VMCOREINFO_SYMBOL(mem_map);
				1374	VMCOREINFO_SYMBOL(contig_page_data);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1375	#endif
				1376	#ifdef CONFIG_SPARSEMEM
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1377	VMCOREINFO_SYMBOL(mem_section);
				1378	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
				1379	VMCOREINFO_SIZE(mem_section);
				1380	VMCOREINFO_OFFSET(mem_section, section_mem_map);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1381	#endif
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1382	VMCOREINFO_SIZE(page);
				1383	VMCOREINFO_SIZE(pglist_data);
				1384	VMCOREINFO_SIZE(zone);
				1385	VMCOREINFO_SIZE(free_area);
				1386	VMCOREINFO_SIZE(list_head);
				1387	VMCOREINFO_TYPEDEF_SIZE(nodemask_t);
				1388	VMCOREINFO_OFFSET(page, flags);
				1389	VMCOREINFO_OFFSET(page, _count);
				1390	VMCOREINFO_OFFSET(page, mapping);
				1391	VMCOREINFO_OFFSET(page, lru);
				1392	VMCOREINFO_OFFSET(pglist_data, node_zones);
				1393	VMCOREINFO_OFFSET(pglist_data, nr_zones);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1394	#ifdef CONFIG_FLAT_NODE_MEM_MAP
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1395	VMCOREINFO_OFFSET(pglist_data, node_mem_map);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1396	#endif
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1397	VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
				1398	VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
				1399	VMCOREINFO_OFFSET(pglist_data, node_id);
				1400	VMCOREINFO_OFFSET(zone, free_area);
				1401	VMCOREINFO_OFFSET(zone, vm_stat);
				1402	VMCOREINFO_OFFSET(zone, spanned_pages);
				1403	VMCOREINFO_OFFSET(free_area, free_list);
				1404	VMCOREINFO_OFFSET(list_head, next);
				1405	VMCOREINFO_OFFSET(list_head, prev);
				1406	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
Ken'ichi Ohmichi	83a08e7	2008-01-08 15:33:05 -0800	[diff] [blame]	1407	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1408	VMCOREINFO_NUMBER(NR_FREE_PAGES);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1409
				1410	arch_crash_save_vmcoreinfo();
				1411
				1412	return 0;
				1413	}
				1414
				1415	module_init(crash_save_vmcoreinfo_init)