Blame - kernel/kexec.c - SHIFTPHONES/mainline/linux

blob: 7843548cf2d9576fcc67deeabbb1b1d41f14ddde [file] [log] [blame]

Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1	/*
				2	* kexec.c - kexec system call
				3	* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
				4	*
				5	* This source code is licensed under the GNU General Public License,
				6	* Version 2. See the file COPYING for more details.
				7	*/
				8
				9	#include <linux/mm.h>
				10	#include <linux/file.h>
				11	#include <linux/slab.h>
				12	#include <linux/fs.h>
				13	#include <linux/kexec.h>
				14	#include <linux/spinlock.h>
				15	#include <linux/list.h>
				16	#include <linux/highmem.h>
				17	#include <linux/syscalls.h>
				18	#include <linux/reboot.h>
				19	#include <linux/syscalls.h>
				20	#include <linux/ioport.h>
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	21	#include <linux/hardirq.h>
				22
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	23	#include <asm/page.h>
				24	#include <asm/uaccess.h>
				25	#include <asm/io.h>
				26	#include <asm/system.h>
				27	#include <asm/semaphore.h>
				28
				29	/* Location of the reserved area for the crash kernel */
				30	struct resource crashk_res = {
				31	.name = "Crash kernel",
				32	.start = 0,
				33	.end = 0,
				34	.flags = IORESOURCE_BUSY \| IORESOURCE_MEM
				35	};
				36
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	37	int kexec_should_crash(struct task_struct *p)
				38	{
				39	if (in_interrupt() \|\| !p->pid \|\| p->pid == 1 \|\| panic_on_oops)
				40	return 1;
				41	return 0;
				42	}
				43
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	44	/*
				45	* When kexec transitions to the new kernel there is a one-to-one
				46	* mapping between physical and virtual addresses. On processors
				47	* where you can disable the MMU this is trivial, and easy. For
				48	* others it is still a simple predictable page table to setup.
				49	*
				50	* In that environment kexec copies the new kernel to its final
				51	* resting place. This means I can only support memory whose
				52	* physical address can fit in an unsigned long. In particular
				53	* addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
				54	* If the assembly stub has more restrictive requirements
				55	* KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
				56	* defined more restrictively in <asm/kexec.h>.
				57	*
				58	* The code for the transition from the current kernel to the
				59	* the new kernel is placed in the control_code_buffer, whose size
				60	* is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single
				61	* page of memory is necessary, but some architectures require more.
				62	* Because this memory must be identity mapped in the transition from
				63	* virtual to physical addresses it must live in the range
				64	* 0 - TASK_SIZE, as only the user space mappings are arbitrarily
				65	* modifiable.
				66	*
				67	* The assembly stub in the control code buffer is passed a linked list
				68	* of descriptor pages detailing the source pages of the new kernel,
				69	* and the destination addresses of those source pages. As this data
				70	* structure is not used in the context of the current OS, it must
				71	* be self-contained.
				72	*
				73	* The code has been made to work with highmem pages and will use a
				74	* destination page in its final resting place (if it happens
				75	* to allocate it). The end product of this is that most of the
				76	* physical address space, and most of RAM can be used.
				77	*
				78	* Future directions include:
				79	* - allocating a page table with the control code buffer identity
				80	* mapped, to simplify machine_kexec and make kexec_on_panic more
				81	* reliable.
				82	*/
				83
				84	/*
				85	* KIMAGE_NO_DEST is an impossible destination address..., for
				86	* allocating pages whose destination address we do not care about.
				87	*/
				88	#define KIMAGE_NO_DEST (-1UL)
				89
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	90	static int kimage_is_destination_range(struct kimage *image,
				91	unsigned long start, unsigned long end);
				92	static struct page kimage_alloc_page(struct kimage image,
				93	unsigned int gfp_mask,
				94	unsigned long dest);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	95
				96	static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	97	unsigned long nr_segments,
				98	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	99	{
				100	size_t segment_bytes;
				101	struct kimage *image;
				102	unsigned long i;
				103	int result;
				104
				105	/* Allocate a controlling structure */
				106	result = -ENOMEM;
				107	image = kmalloc(sizeof(*image), GFP_KERNEL);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	108	if (!image)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	109	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	110
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	111	memset(image, 0, sizeof(*image));
				112	image->head = 0;
				113	image->entry = &image->head;
				114	image->last_entry = &image->head;
				115	image->control_page = ~0; /* By default this does not apply */
				116	image->start = entry;
				117	image->type = KEXEC_TYPE_DEFAULT;
				118
				119	/* Initialize the list of control pages */
				120	INIT_LIST_HEAD(&image->control_pages);
				121
				122	/* Initialize the list of destination pages */
				123	INIT_LIST_HEAD(&image->dest_pages);
				124
				125	/* Initialize the list of unuseable pages */
				126	INIT_LIST_HEAD(&image->unuseable_pages);
				127
				128	/* Read in the segments */
				129	image->nr_segments = nr_segments;
				130	segment_bytes = nr_segments * sizeof(*segments);
				131	result = copy_from_user(image->segment, segments, segment_bytes);
				132	if (result)
				133	goto out;
				134
				135	/*
				136	* Verify we have good destination addresses. The caller is
				137	* responsible for making certain we don't attempt to load
				138	* the new image into invalid or reserved areas of RAM. This
				139	* just verifies it is an address we can use.
				140	*
				141	* Since the kernel does everything in page size chunks ensure
				142	* the destination addreses are page aligned. Too many
				143	* special cases crop of when we don't do this. The most
				144	* insidious is getting overlapping destination addresses
				145	* simply because addresses are changed to page size
				146	* granularity.
				147	*/
				148	result = -EADDRNOTAVAIL;
				149	for (i = 0; i < nr_segments; i++) {
				150	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	151
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	152	mstart = image->segment[i].mem;
				153	mend = mstart + image->segment[i].memsz;
				154	if ((mstart & ~PAGE_MASK) \|\| (mend & ~PAGE_MASK))
				155	goto out;
				156	if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
				157	goto out;
				158	}
				159
				160	/* Verify our destination addresses do not overlap.
				161	* If we alloed overlapping destination addresses
				162	* through very weird things can happen with no
				163	* easy explanation as one segment stops on another.
				164	*/
				165	result = -EINVAL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	166	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	167	unsigned long mstart, mend;
				168	unsigned long j;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	169
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	170	mstart = image->segment[i].mem;
				171	mend = mstart + image->segment[i].memsz;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	172	for (j = 0; j < i; j++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	173	unsigned long pstart, pend;
				174	pstart = image->segment[j].mem;
				175	pend = pstart + image->segment[j].memsz;
				176	/* Do the segments overlap ? */
				177	if ((mend > pstart) && (mstart < pend))
				178	goto out;
				179	}
				180	}
				181
				182	/* Ensure our buffer sizes are strictly less than
				183	* our memory sizes. This should always be the case,
				184	* and it is easier to check up front than to be surprised
				185	* later on.
				186	*/
				187	result = -EINVAL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	188	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	189	if (image->segment[i].bufsz > image->segment[i].memsz)
				190	goto out;
				191	}
				192
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	193	result = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	194	out:
				195	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	196	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	197	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	198	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	199
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	200	return result;
				201
				202	}
				203
				204	static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	205	unsigned long nr_segments,
				206	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	207	{
				208	int result;
				209	struct kimage *image;
				210
				211	/* Allocate and initialize a controlling structure */
				212	image = NULL;
				213	result = do_kimage_alloc(&image, entry, nr_segments, segments);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	214	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	215	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	216
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	217	*rimage = image;
				218
				219	/*
				220	* Find a location for the control code buffer, and add it
				221	* the vector of segments so that it's pages will also be
				222	* counted as destination pages.
				223	*/
				224	result = -ENOMEM;
				225	image->control_code_page = kimage_alloc_control_pages(image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	226	get_order(KEXEC_CONTROL_CODE_SIZE));
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	227	if (!image->control_code_page) {
				228	printk(KERN_ERR "Could not allocate control_code_buffer\n");
				229	goto out;
				230	}
				231
				232	result = 0;
				233	out:
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	234	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	235	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	236	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	237	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	238
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	239	return result;
				240	}
				241
				242	static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	243	unsigned long nr_segments,
				244	struct kexec_segment *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	245	{
				246	int result;
				247	struct kimage *image;
				248	unsigned long i;
				249
				250	image = NULL;
				251	/* Verify we have a valid entry point */
				252	if ((entry < crashk_res.start) \|\| (entry > crashk_res.end)) {
				253	result = -EADDRNOTAVAIL;
				254	goto out;
				255	}
				256
				257	/* Allocate and initialize a controlling structure */
				258	result = do_kimage_alloc(&image, entry, nr_segments, segments);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	259	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	260	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	261
				262	/* Enable the special crash kernel control page
				263	* allocation policy.
				264	*/
				265	image->control_page = crashk_res.start;
				266	image->type = KEXEC_TYPE_CRASH;
				267
				268	/*
				269	* Verify we have good destination addresses. Normally
				270	* the caller is responsible for making certain we don't
				271	* attempt to load the new image into invalid or reserved
				272	* areas of RAM. But crash kernels are preloaded into a
				273	* reserved area of ram. We must ensure the addresses
				274	* are in the reserved area otherwise preloading the
				275	* kernel could corrupt things.
				276	*/
				277	result = -EADDRNOTAVAIL;
				278	for (i = 0; i < nr_segments; i++) {
				279	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	280
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	281	mstart = image->segment[i].mem;
Vivek Goyal	50cccc6	2005-06-25 14:57:55 -0700	[diff] [blame]	282	mend = mstart + image->segment[i].memsz - 1;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	283	/* Ensure we are within the crash kernel limits */
				284	if ((mstart < crashk_res.start) \|\| (mend > crashk_res.end))
				285	goto out;
				286	}
				287
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	288	/*
				289	* Find a location for the control code buffer, and add
				290	* the vector of segments so that it's pages will also be
				291	* counted as destination pages.
				292	*/
				293	result = -ENOMEM;
				294	image->control_code_page = kimage_alloc_control_pages(image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	295	get_order(KEXEC_CONTROL_CODE_SIZE));
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	296	if (!image->control_code_page) {
				297	printk(KERN_ERR "Could not allocate control_code_buffer\n");
				298	goto out;
				299	}
				300
				301	result = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	302	out:
				303	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	304	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	305	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	306	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	307
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	308	return result;
				309	}
				310
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	311	static int kimage_is_destination_range(struct kimage *image,
				312	unsigned long start,
				313	unsigned long end)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	314	{
				315	unsigned long i;
				316
				317	for (i = 0; i < image->nr_segments; i++) {
				318	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	319
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	320	mstart = image->segment[i].mem;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	321	mend = mstart + image->segment[i].memsz;
				322	if ((end > mstart) && (start < mend))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	323	return 1;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	324	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	325
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	326	return 0;
				327	}
				328
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	329	static struct page *kimage_alloc_pages(unsigned int gfp_mask,
				330	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	331	{
				332	struct page *pages;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	333
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	334	pages = alloc_pages(gfp_mask, order);
				335	if (pages) {
				336	unsigned int count, i;
				337	pages->mapping = NULL;
				338	pages->private = order;
				339	count = 1 << order;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	340	for (i = 0; i < count; i++)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	341	SetPageReserved(pages + i);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	342	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	343
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	344	return pages;
				345	}
				346
				347	static void kimage_free_pages(struct page *page)
				348	{
				349	unsigned int order, count, i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	350
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	351	order = page->private;
				352	count = 1 << order;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	353	for (i = 0; i < count; i++)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	354	ClearPageReserved(page + i);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	355	__free_pages(page, order);
				356	}
				357
				358	static void kimage_free_page_list(struct list_head *list)
				359	{
				360	struct list_head pos, next;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	361
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	362	list_for_each_safe(pos, next, list) {
				363	struct page *page;
				364
				365	page = list_entry(pos, struct page, lru);
				366	list_del(&page->lru);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	367	kimage_free_pages(page);
				368	}
				369	}
				370
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	371	static struct page kimage_alloc_normal_control_pages(struct kimage image,
				372	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	373	{
				374	/* Control pages are special, they are the intermediaries
				375	* that are needed while we copy the rest of the pages
				376	* to their final resting place. As such they must
				377	* not conflict with either the destination addresses
				378	* or memory the kernel is already using.
				379	*
				380	* The only case where we really need more than one of
				381	* these are for architectures where we cannot disable
				382	* the MMU and must instead generate an identity mapped
				383	* page table for all of the memory.
				384	*
				385	* At worst this runs in O(N) of the image size.
				386	*/
				387	struct list_head extra_pages;
				388	struct page *pages;
				389	unsigned int count;
				390
				391	count = 1 << order;
				392	INIT_LIST_HEAD(&extra_pages);
				393
				394	/* Loop while I can allocate a page and the page allocated
				395	* is a destination page.
				396	*/
				397	do {
				398	unsigned long pfn, epfn, addr, eaddr;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	399
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	400	pages = kimage_alloc_pages(GFP_KERNEL, order);
				401	if (!pages)
				402	break;
				403	pfn = page_to_pfn(pages);
				404	epfn = pfn + count;
				405	addr = pfn << PAGE_SHIFT;
				406	eaddr = epfn << PAGE_SHIFT;
				407	if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) \|\|
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	408	kimage_is_destination_range(image, addr, eaddr)) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	409	list_add(&pages->lru, &extra_pages);
				410	pages = NULL;
				411	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	412	} while (!pages);
				413
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	414	if (pages) {
				415	/* Remember the allocated page... */
				416	list_add(&pages->lru, &image->control_pages);
				417
				418	/* Because the page is already in it's destination
				419	* location we will never allocate another page at
				420	* that address. Therefore kimage_alloc_pages
				421	* will not return it (again) and we don't need
				422	* to give it an entry in image->segment[].
				423	*/
				424	}
				425	/* Deal with the destination pages I have inadvertently allocated.
				426	*
				427	* Ideally I would convert multi-page allocations into single
				428	* page allocations, and add everyting to image->dest_pages.
				429	*
				430	* For now it is simpler to just free the pages.
				431	*/
				432	kimage_free_page_list(&extra_pages);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	433
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	434	return pages;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	435	}
				436
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	437	static struct page kimage_alloc_crash_control_pages(struct kimage image,
				438	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	439	{
				440	/* Control pages are special, they are the intermediaries
				441	* that are needed while we copy the rest of the pages
				442	* to their final resting place. As such they must
				443	* not conflict with either the destination addresses
				444	* or memory the kernel is already using.
				445	*
				446	* Control pages are also the only pags we must allocate
				447	* when loading a crash kernel. All of the other pages
				448	* are specified by the segments and we just memcpy
				449	* into them directly.
				450	*
				451	* The only case where we really need more than one of
				452	* these are for architectures where we cannot disable
				453	* the MMU and must instead generate an identity mapped
				454	* page table for all of the memory.
				455	*
				456	* Given the low demand this implements a very simple
				457	* allocator that finds the first hole of the appropriate
				458	* size in the reserved memory region, and allocates all
				459	* of the memory up to and including the hole.
				460	*/
				461	unsigned long hole_start, hole_end, size;
				462	struct page *pages;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	463
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	464	pages = NULL;
				465	size = (1 << order) << PAGE_SHIFT;
				466	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
				467	hole_end = hole_start + size - 1;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	468	while (hole_end <= crashk_res.end) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	469	unsigned long i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	470
				471	if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	472	break;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	473	if (hole_end > crashk_res.end)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	474	break;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	475	/* See if I overlap any of the segments */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	476	for (i = 0; i < image->nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	477	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	478
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	479	mstart = image->segment[i].mem;
				480	mend = mstart + image->segment[i].memsz - 1;
				481	if ((hole_end >= mstart) && (hole_start <= mend)) {
				482	/* Advance the hole to the end of the segment */
				483	hole_start = (mend + (size - 1)) & ~(size - 1);
				484	hole_end = hole_start + size - 1;
				485	break;
				486	}
				487	}
				488	/* If I don't overlap any segments I have found my hole! */
				489	if (i == image->nr_segments) {
				490	pages = pfn_to_page(hole_start >> PAGE_SHIFT);
				491	break;
				492	}
				493	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	494	if (pages)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	495	image->control_page = hole_end;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	496
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	497	return pages;
				498	}
				499
				500
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	501	struct page kimage_alloc_control_pages(struct kimage image,
				502	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	503	{
				504	struct page *pages = NULL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	505
				506	switch (image->type) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	507	case KEXEC_TYPE_DEFAULT:
				508	pages = kimage_alloc_normal_control_pages(image, order);
				509	break;
				510	case KEXEC_TYPE_CRASH:
				511	pages = kimage_alloc_crash_control_pages(image, order);
				512	break;
				513	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	514
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	515	return pages;
				516	}
				517
				518	static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
				519	{
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	520	if (*image->entry != 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	521	image->entry++;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	522
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	523	if (image->entry == image->last_entry) {
				524	kimage_entry_t *ind_page;
				525	struct page *page;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	526
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	527	page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	528	if (!page)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	529	return -ENOMEM;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	530
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	531	ind_page = page_address(page);
				532	*image->entry = virt_to_phys(ind_page) \| IND_INDIRECTION;
				533	image->entry = ind_page;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	534	image->last_entry = ind_page +
				535	((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	536	}
				537	*image->entry = entry;
				538	image->entry++;
				539	*image->entry = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	540
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	541	return 0;
				542	}
				543
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	544	static int kimage_set_destination(struct kimage *image,
				545	unsigned long destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	546	{
				547	int result;
				548
				549	destination &= PAGE_MASK;
				550	result = kimage_add_entry(image, destination \| IND_DESTINATION);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	551	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	552	image->destination = destination;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	553
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	554	return result;
				555	}
				556
				557
				558	static int kimage_add_page(struct kimage *image, unsigned long page)
				559	{
				560	int result;
				561
				562	page &= PAGE_MASK;
				563	result = kimage_add_entry(image, page \| IND_SOURCE);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	564	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	565	image->destination += PAGE_SIZE;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	566
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	567	return result;
				568	}
				569
				570
				571	static void kimage_free_extra_pages(struct kimage *image)
				572	{
				573	/* Walk through and free any extra destination pages I may have */
				574	kimage_free_page_list(&image->dest_pages);
				575
				576	/* Walk through and free any unuseable pages I have cached */
				577	kimage_free_page_list(&image->unuseable_pages);
				578
				579	}
				580	static int kimage_terminate(struct kimage *image)
				581	{
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	582	if (*image->entry != 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	583	image->entry++;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	584
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	585	*image->entry = IND_DONE;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	586
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	587	return 0;
				588	}
				589
				590	#define for_each_kimage_entry(image, ptr, entry) \
				591	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
				592	ptr = (entry & IND_INDIRECTION)? \
				593	phys_to_virt((entry & PAGE_MASK)): ptr +1)
				594
				595	static void kimage_free_entry(kimage_entry_t entry)
				596	{
				597	struct page *page;
				598
				599	page = pfn_to_page(entry >> PAGE_SHIFT);
				600	kimage_free_pages(page);
				601	}
				602
				603	static void kimage_free(struct kimage *image)
				604	{
				605	kimage_entry_t *ptr, entry;
				606	kimage_entry_t ind = 0;
				607
				608	if (!image)
				609	return;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	610
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	611	kimage_free_extra_pages(image);
				612	for_each_kimage_entry(image, ptr, entry) {
				613	if (entry & IND_INDIRECTION) {
				614	/* Free the previous indirection page */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	615	if (ind & IND_INDIRECTION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	616	kimage_free_entry(ind);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	617	/* Save this indirection page until we are
				618	* done with it.
				619	*/
				620	ind = entry;
				621	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	622	else if (entry & IND_SOURCE)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	623	kimage_free_entry(entry);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	624	}
				625	/* Free the final indirection page */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	626	if (ind & IND_INDIRECTION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	627	kimage_free_entry(ind);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	628
				629	/* Handle any machine specific cleanup */
				630	machine_kexec_cleanup(image);
				631
				632	/* Free the kexec control pages... */
				633	kimage_free_page_list(&image->control_pages);
				634	kfree(image);
				635	}
				636
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	637	static kimage_entry_t kimage_dst_used(struct kimage image,
				638	unsigned long page)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	639	{
				640	kimage_entry_t *ptr, entry;
				641	unsigned long destination = 0;
				642
				643	for_each_kimage_entry(image, ptr, entry) {
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	644	if (entry & IND_DESTINATION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	645	destination = entry & PAGE_MASK;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	646	else if (entry & IND_SOURCE) {
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	647	if (page == destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	648	return ptr;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	649	destination += PAGE_SIZE;
				650	}
				651	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	652
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	653	return 0;
				654	}
				655
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	656	static struct page kimage_alloc_page(struct kimage image,
				657	unsigned int gfp_mask,
				658	unsigned long destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	659	{
				660	/*
				661	* Here we implement safeguards to ensure that a source page
				662	* is not copied to its destination page before the data on
				663	* the destination page is no longer useful.
				664	*
				665	* To do this we maintain the invariant that a source page is
				666	* either its own destination page, or it is not a
				667	* destination page at all.
				668	*
				669	* That is slightly stronger than required, but the proof
				670	* that no problems will not occur is trivial, and the
				671	* implementation is simply to verify.
				672	*
				673	* When allocating all pages normally this algorithm will run
				674	* in O(N) time, but in the worst case it will run in O(N^2)
				675	* time. If the runtime is a problem the data structures can
				676	* be fixed.
				677	*/
				678	struct page *page;
				679	unsigned long addr;
				680
				681	/*
				682	* Walk through the list of destination pages, and see if I
				683	* have a match.
				684	*/
				685	list_for_each_entry(page, &image->dest_pages, lru) {
				686	addr = page_to_pfn(page) << PAGE_SHIFT;
				687	if (addr == destination) {
				688	list_del(&page->lru);
				689	return page;
				690	}
				691	}
				692	page = NULL;
				693	while (1) {
				694	kimage_entry_t *old;
				695
				696	/* Allocate a page, if we run out of memory give up */
				697	page = kimage_alloc_pages(gfp_mask, 0);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	698	if (!page)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	699	return 0;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	700	/* If the page cannot be used file it away */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	701	if (page_to_pfn(page) >
				702	(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	703	list_add(&page->lru, &image->unuseable_pages);
				704	continue;
				705	}
				706	addr = page_to_pfn(page) << PAGE_SHIFT;
				707
				708	/* If it is the destination page we want use it */
				709	if (addr == destination)
				710	break;
				711
				712	/* If the page is not a destination page use it */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	713	if (!kimage_is_destination_range(image, addr,
				714	addr + PAGE_SIZE))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	715	break;
				716
				717	/*
				718	* I know that the page is someones destination page.
				719	* See if there is already a source page for this
				720	* destination page. And if so swap the source pages.
				721	*/
				722	old = kimage_dst_used(image, addr);
				723	if (old) {
				724	/* If so move it */
				725	unsigned long old_addr;
				726	struct page *old_page;
				727
				728	old_addr = *old & PAGE_MASK;
				729	old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
				730	copy_highpage(page, old_page);
				731	old = addr \| (old & ~PAGE_MASK);
				732
				733	/* The old page I have found cannot be a
				734	* destination page, so return it.
				735	*/
				736	addr = old_addr;
				737	page = old_page;
				738	break;
				739	}
				740	else {
				741	/* Place the page on the destination list I
				742	* will use it later.
				743	*/
				744	list_add(&page->lru, &image->dest_pages);
				745	}
				746	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	747
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	748	return page;
				749	}
				750
				751	static int kimage_load_normal_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	752	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	753	{
				754	unsigned long maddr;
				755	unsigned long ubytes, mbytes;
				756	int result;
				757	unsigned char *buf;
				758
				759	result = 0;
				760	buf = segment->buf;
				761	ubytes = segment->bufsz;
				762	mbytes = segment->memsz;
				763	maddr = segment->mem;
				764
				765	result = kimage_set_destination(image, maddr);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	766	if (result < 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	767	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	768
				769	while (mbytes) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	770	struct page *page;
				771	char *ptr;
				772	size_t uchunk, mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	773
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	774	page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
				775	if (page == 0) {
				776	result = -ENOMEM;
				777	goto out;
				778	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	779	result = kimage_add_page(image, page_to_pfn(page)
				780	<< PAGE_SHIFT);
				781	if (result < 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	782	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	783
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	784	ptr = kmap(page);
				785	/* Start with a clear page */
				786	memset(ptr, 0, PAGE_SIZE);
				787	ptr += maddr & ~PAGE_MASK;
				788	mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	789	if (mchunk > mbytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	790	mchunk = mbytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	791
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	792	uchunk = mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	793	if (uchunk > ubytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	794	uchunk = ubytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	795
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	796	result = copy_from_user(ptr, buf, uchunk);
				797	kunmap(page);
				798	if (result) {
				799	result = (result < 0) ? result : -EIO;
				800	goto out;
				801	}
				802	ubytes -= uchunk;
				803	maddr += mchunk;
				804	buf += mchunk;
				805	mbytes -= mchunk;
				806	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	807	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	808	return result;
				809	}
				810
				811	static int kimage_load_crash_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	812	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	813	{
				814	/* For crash dumps kernels we simply copy the data from
				815	* user space to it's destination.
				816	* We do things a page at a time for the sake of kmap.
				817	*/
				818	unsigned long maddr;
				819	unsigned long ubytes, mbytes;
				820	int result;
				821	unsigned char *buf;
				822
				823	result = 0;
				824	buf = segment->buf;
				825	ubytes = segment->bufsz;
				826	mbytes = segment->memsz;
				827	maddr = segment->mem;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	828	while (mbytes) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	829	struct page *page;
				830	char *ptr;
				831	size_t uchunk, mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	832
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	833	page = pfn_to_page(maddr >> PAGE_SHIFT);
				834	if (page == 0) {
				835	result = -ENOMEM;
				836	goto out;
				837	}
				838	ptr = kmap(page);
				839	ptr += maddr & ~PAGE_MASK;
				840	mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	841	if (mchunk > mbytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	842	mchunk = mbytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	843
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	844	uchunk = mchunk;
				845	if (uchunk > ubytes) {
				846	uchunk = ubytes;
				847	/* Zero the trailing part of the page */
				848	memset(ptr + uchunk, 0, mchunk - uchunk);
				849	}
				850	result = copy_from_user(ptr, buf, uchunk);
				851	kunmap(page);
				852	if (result) {
				853	result = (result < 0) ? result : -EIO;
				854	goto out;
				855	}
				856	ubytes -= uchunk;
				857	maddr += mchunk;
				858	buf += mchunk;
				859	mbytes -= mchunk;
				860	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	861	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	862	return result;
				863	}
				864
				865	static int kimage_load_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	866	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	867	{
				868	int result = -ENOMEM;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	869
				870	switch (image->type) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	871	case KEXEC_TYPE_DEFAULT:
				872	result = kimage_load_normal_segment(image, segment);
				873	break;
				874	case KEXEC_TYPE_CRASH:
				875	result = kimage_load_crash_segment(image, segment);
				876	break;
				877	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	878
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	879	return result;
				880	}
				881
				882	/*
				883	* Exec Kernel system call: for obvious reasons only root may call it.
				884	*
				885	* This call breaks up into three pieces.
				886	* - A generic part which loads the new kernel from the current
				887	* address space, and very carefully places the data in the
				888	* allocated pages.
				889	*
				890	* - A generic part that interacts with the kernel and tells all of
				891	* the devices to shut down. Preventing on-going dmas, and placing
				892	* the devices in a consistent state so a later kernel can
				893	* reinitialize them.
				894	*
				895	* - A machine specific part that includes the syscall number
				896	* and the copies the image to it's final destination. And
				897	* jumps into the image at entry.
				898	*
				899	* kexec does not sync, or unmount filesystems so if you need
				900	* that to happen you need to do that yourself.
				901	*/
				902	struct kimage *kexec_image = NULL;
				903	static struct kimage *kexec_crash_image = NULL;
				904	/*
				905	* A home grown binary mutex.
				906	* Nothing can wait so this mutex is safe to use
				907	* in interrupt context :)
				908	*/
				909	static int kexec_lock = 0;
				910
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	911	asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
				912	struct kexec_segment __user *segments,
				913	unsigned long flags)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	914	{
				915	struct kimage *dest_image, image;
				916	int locked;
				917	int result;
				918
				919	/* We only trust the superuser with rebooting the system. */
				920	if (!capable(CAP_SYS_BOOT))
				921	return -EPERM;
				922
				923	/*
				924	* Verify we have a legal set of flags
				925	* This leaves us room for future extensions.
				926	*/
				927	if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
				928	return -EINVAL;
				929
				930	/* Verify we are on the appropriate architecture */
				931	if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
				932	((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	933	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	934
				935	/* Put an artificial cap on the number
				936	* of segments passed to kexec_load.
				937	*/
				938	if (nr_segments > KEXEC_SEGMENT_MAX)
				939	return -EINVAL;
				940
				941	image = NULL;
				942	result = 0;
				943
				944	/* Because we write directly to the reserved memory
				945	* region when loading crash kernels we need a mutex here to
				946	* prevent multiple crash kernels from attempting to load
				947	* simultaneously, and to prevent a crash kernel from loading
				948	* over the top of a in use crash kernel.
				949	*
				950	* KISS: always take the mutex.
				951	*/
				952	locked = xchg(&kexec_lock, 1);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	953	if (locked)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	954	return -EBUSY;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	955
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	956	dest_image = &kexec_image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	957	if (flags & KEXEC_ON_CRASH)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	958	dest_image = &kexec_crash_image;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	959	if (nr_segments > 0) {
				960	unsigned long i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	961
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	962	/* Loading another kernel to reboot into */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	963	if ((flags & KEXEC_ON_CRASH) == 0)
				964	result = kimage_normal_alloc(&image, entry,
				965	nr_segments, segments);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	966	/* Loading another kernel to switch to if this one crashes */
				967	else if (flags & KEXEC_ON_CRASH) {
				968	/* Free any current crash dump kernel before
				969	* we corrupt it.
				970	*/
				971	kimage_free(xchg(&kexec_crash_image, NULL));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	972	result = kimage_crash_alloc(&image, entry,
				973	nr_segments, segments);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	974	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	975	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	976	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	977
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	978	result = machine_kexec_prepare(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	979	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	980	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	981
				982	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	983	result = kimage_load_segment(image, &image->segment[i]);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	984	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	985	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	986	}
				987	result = kimage_terminate(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	988	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	989	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	990	}
				991	/* Install the new kernel, and Uninstall the old */
				992	image = xchg(dest_image, image);
				993
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	994	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	995	xchg(&kexec_lock, 0); /* Release the mutex */
				996	kimage_free(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	997
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	998	return result;
				999	}
				1000
				1001	#ifdef CONFIG_COMPAT
				1002	asmlinkage long compat_sys_kexec_load(unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	1003	unsigned long nr_segments,
				1004	struct compat_kexec_segment __user *segments,
				1005	unsigned long flags)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1006	{
				1007	struct compat_kexec_segment in;
				1008	struct kexec_segment out, __user *ksegments;
				1009	unsigned long i, result;
				1010
				1011	/* Don't allow clients that don't understand the native
				1012	* architecture to do anything.
				1013	*/
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	1014	if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1015	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1016
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	1017	if (nr_segments > KEXEC_SEGMENT_MAX)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1018	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1019
				1020	ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
				1021	for (i=0; i < nr_segments; i++) {
				1022	result = copy_from_user(&in, &segments[i], sizeof(in));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	1023	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1024	return -EFAULT;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1025
				1026	out.buf = compat_ptr(in.buf);
				1027	out.bufsz = in.bufsz;
				1028	out.mem = in.mem;
				1029	out.memsz = in.memsz;
				1030
				1031	result = copy_to_user(&ksegments[i], &out, sizeof(out));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame^]	1032	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1033	return -EFAULT;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1034	}
				1035
				1036	return sys_kexec_load(entry, nr_segments, ksegments, flags);
				1037	}
				1038	#endif
				1039
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	1040	void crash_kexec(struct pt_regs *regs)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1041	{
				1042	struct kimage *image;
				1043	int locked;
				1044
				1045
				1046	/* Take the kexec_lock here to prevent sys_kexec_load
				1047	* running on one cpu from replacing the crash kernel
				1048	* we are using after a panic on a different cpu.
				1049	*
				1050	* If the crash kernel was not located in a fixed area
				1051	* of memory the xchg(&kexec_crash_image) would be
				1052	* sufficient. But since I reuse the memory...
				1053	*/
				1054	locked = xchg(&kexec_lock, 1);
				1055	if (!locked) {
				1056	image = xchg(&kexec_crash_image, NULL);
				1057	if (image) {
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	1058	machine_crash_shutdown(regs);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1059	machine_kexec(image);
				1060	}
				1061	xchg(&kexec_lock, 0);
				1062	}
				1063	}