blob: ab62a9f4701edd22c69910ae05b71edde14bdd99 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/i386/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 *
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
10 *
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
13 *
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
16 *
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
19 *
20 */
21
22/*
23 * This file handles the architecture-dependent parts of initialization
24 */
25
Alexey Dobriyan129f6942005-06-23 00:08:33 -070026#include <linux/config.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include <linux/sched.h>
28#include <linux/mm.h>
Andy Whitcroft05b79bd2005-06-23 00:07:57 -070029#include <linux/mmzone.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/tty.h>
31#include <linux/ioport.h>
32#include <linux/acpi.h>
33#include <linux/apm_bios.h>
34#include <linux/initrd.h>
35#include <linux/bootmem.h>
36#include <linux/seq_file.h>
37#include <linux/console.h>
38#include <linux/mca.h>
39#include <linux/root_dev.h>
40#include <linux/highmem.h>
41#include <linux/module.h>
42#include <linux/efi.h>
43#include <linux/init.h>
44#include <linux/edd.h>
45#include <linux/nodemask.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070046#include <linux/kexec.h>
Vivek Goyal2030eae2005-06-25 14:58:20 -070047#include <linux/crash_dump.h>
Andi Kleene9928672006-01-11 22:43:33 +010048#include <linux/dmi.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070049
Linus Torvalds1da177e2005-04-16 15:20:36 -070050#include <video/edid.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070051
Eric W. Biederman9635b472005-06-25 14:57:41 -070052#include <asm/apic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070053#include <asm/e820.h>
54#include <asm/mpspec.h>
55#include <asm/setup.h>
56#include <asm/arch_hooks.h>
57#include <asm/sections.h>
58#include <asm/io_apic.h>
59#include <asm/ist.h>
60#include <asm/io.h>
61#include "setup_arch_pre.h"
62#include <bios_ebda.h>
63
Vivek Goyal92aa63a2005-06-25 14:58:18 -070064/* Forward Declaration. */
65void __init find_max_pfn(void);
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067/* This value is set up by the early boot code to point to the value
68 immediately after the boot time page tables. It contains a *physical*
69 address, and must not be in the .bss segment! */
70unsigned long init_pg_tables_end __initdata = ~0UL;
71
Li Shaohua0bb31842005-06-25 14:54:55 -070072int disable_pse __devinitdata = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
74/*
75 * Machine setup..
76 */
77
78#ifdef CONFIG_EFI
79int efi_enabled = 0;
80EXPORT_SYMBOL(efi_enabled);
81#endif
82
83/* cpu data as detected by the assembly code in head.S */
84struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
85/* common cpu data for all cpus */
Christoph Lameterc3d8c142005-09-06 15:16:33 -070086struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
Alexey Dobriyan129f6942005-06-23 00:08:33 -070087EXPORT_SYMBOL(boot_cpu_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
89unsigned long mmu_cr4_features;
90
Len Brown84663612005-08-24 12:09:07 -040091#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 int acpi_disabled = 0;
93#else
94 int acpi_disabled = 1;
95#endif
96EXPORT_SYMBOL(acpi_disabled);
97
Len Brown888ba6c2005-08-24 12:07:20 -040098#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -070099int __initdata acpi_force = 0;
100extern acpi_interrupt_flags acpi_sci_flags;
101#endif
102
103/* for MCA, but anyone else can use it if they want */
104unsigned int machine_id;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700105#ifdef CONFIG_MCA
106EXPORT_SYMBOL(machine_id);
107#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108unsigned int machine_submodel_id;
109unsigned int BIOS_revision;
110unsigned int mca_pentium_flag;
111
112/* For PCI or other memory-mapped resources */
113unsigned long pci_mem_start = 0x10000000;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700114#ifdef CONFIG_PCI
115EXPORT_SYMBOL(pci_mem_start);
116#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
118/* Boot loader ID as an integer, for the benefit of proc_dointvec */
119int bootloader_type;
120
121/* user-defined highmem size */
122static unsigned int highmem_pages = -1;
123
124/*
125 * Setup options
126 */
127struct drive_info_struct { char dummy[32]; } drive_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700128#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
129 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
130EXPORT_SYMBOL(drive_info);
131#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132struct screen_info screen_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700133EXPORT_SYMBOL(screen_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134struct apm_info apm_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700135EXPORT_SYMBOL(apm_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136struct sys_desc_table_struct {
137 unsigned short length;
138 unsigned char table[0];
139};
140struct edid_info edid_info;
Antonino A. Daplas5e518d72005-09-09 13:04:34 -0700141EXPORT_SYMBOL_GPL(edid_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142struct ist_info ist_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700143#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
144 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
145EXPORT_SYMBOL(ist_info);
146#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147struct e820map e820;
148
149extern void early_cpu_init(void);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150extern void generic_apic_probe(char *);
151extern int root_mountflags;
152
153unsigned long saved_videomode;
154
155#define RAMDISK_IMAGE_START_MASK 0x07FF
156#define RAMDISK_PROMPT_FLAG 0x8000
157#define RAMDISK_LOAD_FLAG 0x4000
158
159static char command_line[COMMAND_LINE_SIZE];
160
161unsigned char __initdata boot_params[PARAM_SIZE];
162
163static struct resource data_resource = {
164 .name = "Kernel data",
165 .start = 0,
166 .end = 0,
167 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
168};
169
170static struct resource code_resource = {
171 .name = "Kernel code",
172 .start = 0,
173 .end = 0,
174 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
175};
176
177static struct resource system_rom_resource = {
178 .name = "System ROM",
179 .start = 0xf0000,
180 .end = 0xfffff,
181 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
182};
183
184static struct resource extension_rom_resource = {
185 .name = "Extension ROM",
186 .start = 0xe0000,
187 .end = 0xeffff,
188 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
189};
190
191static struct resource adapter_rom_resources[] = { {
192 .name = "Adapter ROM",
193 .start = 0xc8000,
194 .end = 0,
195 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
196}, {
197 .name = "Adapter ROM",
198 .start = 0,
199 .end = 0,
200 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
201}, {
202 .name = "Adapter ROM",
203 .start = 0,
204 .end = 0,
205 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
206}, {
207 .name = "Adapter ROM",
208 .start = 0,
209 .end = 0,
210 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
211}, {
212 .name = "Adapter ROM",
213 .start = 0,
214 .end = 0,
215 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
216}, {
217 .name = "Adapter ROM",
218 .start = 0,
219 .end = 0,
220 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
221} };
222
223#define ADAPTER_ROM_RESOURCES \
224 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
225
226static struct resource video_rom_resource = {
227 .name = "Video ROM",
228 .start = 0xc0000,
229 .end = 0xc7fff,
230 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
231};
232
233static struct resource video_ram_resource = {
234 .name = "Video RAM area",
235 .start = 0xa0000,
236 .end = 0xbffff,
237 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
238};
239
240static struct resource standard_io_resources[] = { {
241 .name = "dma1",
242 .start = 0x0000,
243 .end = 0x001f,
244 .flags = IORESOURCE_BUSY | IORESOURCE_IO
245}, {
246 .name = "pic1",
247 .start = 0x0020,
248 .end = 0x0021,
249 .flags = IORESOURCE_BUSY | IORESOURCE_IO
250}, {
251 .name = "timer0",
252 .start = 0x0040,
253 .end = 0x0043,
254 .flags = IORESOURCE_BUSY | IORESOURCE_IO
255}, {
256 .name = "timer1",
257 .start = 0x0050,
258 .end = 0x0053,
259 .flags = IORESOURCE_BUSY | IORESOURCE_IO
260}, {
261 .name = "keyboard",
262 .start = 0x0060,
263 .end = 0x006f,
264 .flags = IORESOURCE_BUSY | IORESOURCE_IO
265}, {
266 .name = "dma page reg",
267 .start = 0x0080,
268 .end = 0x008f,
269 .flags = IORESOURCE_BUSY | IORESOURCE_IO
270}, {
271 .name = "pic2",
272 .start = 0x00a0,
273 .end = 0x00a1,
274 .flags = IORESOURCE_BUSY | IORESOURCE_IO
275}, {
276 .name = "dma2",
277 .start = 0x00c0,
278 .end = 0x00df,
279 .flags = IORESOURCE_BUSY | IORESOURCE_IO
280}, {
281 .name = "fpu",
282 .start = 0x00f0,
283 .end = 0x00ff,
284 .flags = IORESOURCE_BUSY | IORESOURCE_IO
285} };
286
287#define STANDARD_IO_RESOURCES \
288 (sizeof standard_io_resources / sizeof standard_io_resources[0])
289
290#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
291
292static int __init romchecksum(unsigned char *rom, unsigned long length)
293{
294 unsigned char *p, sum = 0;
295
296 for (p = rom; p < rom + length; p++)
297 sum += *p;
298 return sum == 0;
299}
300
301static void __init probe_roms(void)
302{
303 unsigned long start, length, upper;
304 unsigned char *rom;
305 int i;
306
307 /* video rom */
308 upper = adapter_rom_resources[0].start;
309 for (start = video_rom_resource.start; start < upper; start += 2048) {
310 rom = isa_bus_to_virt(start);
311 if (!romsignature(rom))
312 continue;
313
314 video_rom_resource.start = start;
315
316 /* 0 < length <= 0x7f * 512, historically */
317 length = rom[2] * 512;
318
319 /* if checksum okay, trust length byte */
320 if (length && romchecksum(rom, length))
321 video_rom_resource.end = start + length - 1;
322
323 request_resource(&iomem_resource, &video_rom_resource);
324 break;
325 }
326
327 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
328 if (start < upper)
329 start = upper;
330
331 /* system rom */
332 request_resource(&iomem_resource, &system_rom_resource);
333 upper = system_rom_resource.start;
334
335 /* check for extension rom (ignore length byte!) */
336 rom = isa_bus_to_virt(extension_rom_resource.start);
337 if (romsignature(rom)) {
338 length = extension_rom_resource.end - extension_rom_resource.start + 1;
339 if (romchecksum(rom, length)) {
340 request_resource(&iomem_resource, &extension_rom_resource);
341 upper = extension_rom_resource.start;
342 }
343 }
344
345 /* check for adapter roms on 2k boundaries */
346 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
347 rom = isa_bus_to_virt(start);
348 if (!romsignature(rom))
349 continue;
350
351 /* 0 < length <= 0x7f * 512, historically */
352 length = rom[2] * 512;
353
354 /* but accept any length that fits if checksum okay */
355 if (!length || start + length > upper || !romchecksum(rom, length))
356 continue;
357
358 adapter_rom_resources[i].start = start;
359 adapter_rom_resources[i].end = start + length - 1;
360 request_resource(&iomem_resource, &adapter_rom_resources[i]);
361
362 start = adapter_rom_resources[i++].end & ~2047UL;
363 }
364}
365
366static void __init limit_regions(unsigned long long size)
367{
368 unsigned long long current_addr = 0;
369 int i;
370
371 if (efi_enabled) {
Matt Tolentino7ae65fd2005-09-03 15:56:27 -0700372 efi_memory_desc_t *md;
373 void *p;
374
375 for (p = memmap.map, i = 0; p < memmap.map_end;
376 p += memmap.desc_size, i++) {
377 md = p;
378 current_addr = md->phys_addr + (md->num_pages << 12);
379 if (md->type == EFI_CONVENTIONAL_MEMORY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 if (current_addr >= size) {
Matt Tolentino7ae65fd2005-09-03 15:56:27 -0700381 md->num_pages -=
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
383 memmap.nr_map = i + 1;
384 return;
385 }
386 }
387 }
388 }
389 for (i = 0; i < e820.nr_map; i++) {
Dave Hansenf014a552005-10-30 14:59:37 -0800390 current_addr = e820.map[i].addr + e820.map[i].size;
391 if (current_addr < size)
392 continue;
393
394 if (e820.map[i].type != E820_RAM)
395 continue;
396
397 if (e820.map[i].addr >= size) {
398 /*
399 * This region starts past the end of the
400 * requested size, skip it completely.
401 */
402 e820.nr_map = i;
403 } else {
404 e820.nr_map = i + 1;
405 e820.map[i].size -= current_addr - size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 }
Dave Hansenf014a552005-10-30 14:59:37 -0800407 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 }
409}
410
411static void __init add_memory_region(unsigned long long start,
412 unsigned long long size, int type)
413{
414 int x;
415
416 if (!efi_enabled) {
417 x = e820.nr_map;
418
419 if (x == E820MAX) {
420 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
421 return;
422 }
423
424 e820.map[x].addr = start;
425 e820.map[x].size = size;
426 e820.map[x].type = type;
427 e820.nr_map++;
428 }
429} /* add_memory_region */
430
431#define E820_DEBUG 1
432
433static void __init print_memory_map(char *who)
434{
435 int i;
436
437 for (i = 0; i < e820.nr_map; i++) {
438 printk(" %s: %016Lx - %016Lx ", who,
439 e820.map[i].addr,
440 e820.map[i].addr + e820.map[i].size);
441 switch (e820.map[i].type) {
442 case E820_RAM: printk("(usable)\n");
443 break;
444 case E820_RESERVED:
445 printk("(reserved)\n");
446 break;
447 case E820_ACPI:
448 printk("(ACPI data)\n");
449 break;
450 case E820_NVS:
451 printk("(ACPI NVS)\n");
452 break;
453 default: printk("type %lu\n", e820.map[i].type);
454 break;
455 }
456 }
457}
458
459/*
460 * Sanitize the BIOS e820 map.
461 *
462 * Some e820 responses include overlapping entries. The following
463 * replaces the original e820 map with a new one, removing overlaps.
464 *
465 */
466struct change_member {
467 struct e820entry *pbios; /* pointer to original bios entry */
468 unsigned long long addr; /* address for this change point */
469};
470static struct change_member change_point_list[2*E820MAX] __initdata;
471static struct change_member *change_point[2*E820MAX] __initdata;
472static struct e820entry *overlap_list[E820MAX] __initdata;
473static struct e820entry new_bios[E820MAX] __initdata;
474
475static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
476{
477 struct change_member *change_tmp;
478 unsigned long current_type, last_type;
479 unsigned long long last_addr;
480 int chgidx, still_changing;
481 int overlap_entries;
482 int new_bios_entry;
483 int old_nr, new_nr, chg_nr;
484 int i;
485
486 /*
487 Visually we're performing the following (1,2,3,4 = memory types)...
488
489 Sample memory map (w/overlaps):
490 ____22__________________
491 ______________________4_
492 ____1111________________
493 _44_____________________
494 11111111________________
495 ____________________33__
496 ___________44___________
497 __________33333_________
498 ______________22________
499 ___________________2222_
500 _________111111111______
501 _____________________11_
502 _________________4______
503
504 Sanitized equivalent (no overlap):
505 1_______________________
506 _44_____________________
507 ___1____________________
508 ____22__________________
509 ______11________________
510 _________1______________
511 __________3_____________
512 ___________44___________
513 _____________33_________
514 _______________2________
515 ________________1_______
516 _________________4______
517 ___________________2____
518 ____________________33__
519 ______________________4_
520 */
521
522 /* if there's only one memory region, don't bother */
523 if (*pnr_map < 2)
524 return -1;
525
526 old_nr = *pnr_map;
527
528 /* bail out if we find any unreasonable addresses in bios map */
529 for (i=0; i<old_nr; i++)
530 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
531 return -1;
532
533 /* create pointers for initial change-point information (for sorting) */
534 for (i=0; i < 2*old_nr; i++)
535 change_point[i] = &change_point_list[i];
536
537 /* record all known change-points (starting and ending addresses),
538 omitting those that are for empty memory regions */
539 chgidx = 0;
540 for (i=0; i < old_nr; i++) {
541 if (biosmap[i].size != 0) {
542 change_point[chgidx]->addr = biosmap[i].addr;
543 change_point[chgidx++]->pbios = &biosmap[i];
544 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
545 change_point[chgidx++]->pbios = &biosmap[i];
546 }
547 }
548 chg_nr = chgidx; /* true number of change-points */
549
550 /* sort change-point list by memory addresses (low -> high) */
551 still_changing = 1;
552 while (still_changing) {
553 still_changing = 0;
554 for (i=1; i < chg_nr; i++) {
555 /* if <current_addr> > <last_addr>, swap */
556 /* or, if current=<start_addr> & last=<end_addr>, swap */
557 if ((change_point[i]->addr < change_point[i-1]->addr) ||
558 ((change_point[i]->addr == change_point[i-1]->addr) &&
559 (change_point[i]->addr == change_point[i]->pbios->addr) &&
560 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
561 )
562 {
563 change_tmp = change_point[i];
564 change_point[i] = change_point[i-1];
565 change_point[i-1] = change_tmp;
566 still_changing=1;
567 }
568 }
569 }
570
571 /* create a new bios memory map, removing overlaps */
572 overlap_entries=0; /* number of entries in the overlap table */
573 new_bios_entry=0; /* index for creating new bios map entries */
574 last_type = 0; /* start with undefined memory type */
575 last_addr = 0; /* start with 0 as last starting address */
576 /* loop through change-points, determining affect on the new bios map */
577 for (chgidx=0; chgidx < chg_nr; chgidx++)
578 {
579 /* keep track of all overlapping bios entries */
580 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
581 {
582 /* add map entry to overlap list (> 1 entry implies an overlap) */
583 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
584 }
585 else
586 {
587 /* remove entry from list (order independent, so swap with last) */
588 for (i=0; i<overlap_entries; i++)
589 {
590 if (overlap_list[i] == change_point[chgidx]->pbios)
591 overlap_list[i] = overlap_list[overlap_entries-1];
592 }
593 overlap_entries--;
594 }
595 /* if there are overlapping entries, decide which "type" to use */
596 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
597 current_type = 0;
598 for (i=0; i<overlap_entries; i++)
599 if (overlap_list[i]->type > current_type)
600 current_type = overlap_list[i]->type;
601 /* continue building up new bios map based on this information */
602 if (current_type != last_type) {
603 if (last_type != 0) {
604 new_bios[new_bios_entry].size =
605 change_point[chgidx]->addr - last_addr;
606 /* move forward only if the new size was non-zero */
607 if (new_bios[new_bios_entry].size != 0)
608 if (++new_bios_entry >= E820MAX)
609 break; /* no more space left for new bios entries */
610 }
611 if (current_type != 0) {
612 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
613 new_bios[new_bios_entry].type = current_type;
614 last_addr=change_point[chgidx]->addr;
615 }
616 last_type = current_type;
617 }
618 }
619 new_nr = new_bios_entry; /* retain count for new bios entries */
620
621 /* copy new bios mapping into original location */
622 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
623 *pnr_map = new_nr;
624
625 return 0;
626}
627
628/*
629 * Copy the BIOS e820 map into a safe place.
630 *
631 * Sanity-check it while we're at it..
632 *
633 * If we're lucky and live on a modern system, the setup code
634 * will have given us a memory map that we can use to properly
635 * set up memory. If we aren't, we'll fake a memory map.
636 *
637 * We check to see that the memory map contains at least 2 elements
638 * before we'll use it, because the detection code in setup.S may
639 * not be perfect and most every PC known to man has two memory
640 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
641 * thinkpad 560x, for example, does not cooperate with the memory
642 * detection code.)
643 */
644static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
645{
646 /* Only one memory region (or negative)? Ignore it */
647 if (nr_map < 2)
648 return -1;
649
650 do {
651 unsigned long long start = biosmap->addr;
652 unsigned long long size = biosmap->size;
653 unsigned long long end = start + size;
654 unsigned long type = biosmap->type;
655
656 /* Overflow in 64 bits? Ignore the memory map. */
657 if (start > end)
658 return -1;
659
660 /*
661 * Some BIOSes claim RAM in the 640k - 1M region.
662 * Not right. Fix it up.
663 */
664 if (type == E820_RAM) {
665 if (start < 0x100000ULL && end > 0xA0000ULL) {
666 if (start < 0xA0000ULL)
667 add_memory_region(start, 0xA0000ULL-start, type);
668 if (end <= 0x100000ULL)
669 continue;
670 start = 0x100000ULL;
671 size = end - start;
672 }
673 }
674 add_memory_region(start, size, type);
675 } while (biosmap++,--nr_map);
676 return 0;
677}
678
679#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
680struct edd edd;
681#ifdef CONFIG_EDD_MODULE
682EXPORT_SYMBOL(edd);
683#endif
684/**
685 * copy_edd() - Copy the BIOS EDD information
686 * from boot_params into a safe place.
687 *
688 */
689static inline void copy_edd(void)
690{
691 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
692 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
693 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
694 edd.edd_info_nr = EDD_NR;
695}
696#else
697static inline void copy_edd(void)
698{
699}
700#endif
701
702/*
703 * Do NOT EVER look at the BIOS memory size location.
704 * It does not work on many machines.
705 */
706#define LOWMEMSIZE() (0x9f000)
707
708static void __init parse_cmdline_early (char ** cmdline_p)
709{
710 char c = ' ', *to = command_line, *from = saved_command_line;
711 int len = 0;
712 int userdef = 0;
713
714 /* Save unparsed command line copy for /proc/cmdline */
715 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
716
717 for (;;) {
718 if (c != ' ')
719 goto next_char;
720 /*
721 * "mem=nopentium" disables the 4MB page tables.
722 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
723 * to <mem>, overriding the bios size.
724 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
725 * <start> to <start>+<mem>, overriding the bios size.
726 *
727 * HPA tells me bootloaders need to parse mem=, so no new
728 * option should be mem= [also see Documentation/i386/boot.txt]
729 */
730 if (!memcmp(from, "mem=", 4)) {
731 if (to != command_line)
732 to--;
733 if (!memcmp(from+4, "nopentium", 9)) {
734 from += 9+4;
735 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
736 disable_pse = 1;
737 } else {
738 /* If the user specifies memory size, we
739 * limit the BIOS-provided memory map to
740 * that size. exactmap can be used to specify
741 * the exact map. mem=number can be used to
742 * trim the existing memory map.
743 */
744 unsigned long long mem_size;
745
746 mem_size = memparse(from+4, &from);
747 limit_regions(mem_size);
748 userdef=1;
749 }
750 }
751
752 else if (!memcmp(from, "memmap=", 7)) {
753 if (to != command_line)
754 to--;
755 if (!memcmp(from+7, "exactmap", 8)) {
Vivek Goyal92aa63a2005-06-25 14:58:18 -0700756#ifdef CONFIG_CRASH_DUMP
757 /* If we are doing a crash dump, we
758 * still need to know the real mem
759 * size before original memory map is
760 * reset.
761 */
762 find_max_pfn();
763 saved_max_pfn = max_pfn;
764#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 from += 8+7;
766 e820.nr_map = 0;
767 userdef = 1;
768 } else {
769 /* If the user specifies memory size, we
770 * limit the BIOS-provided memory map to
771 * that size. exactmap can be used to specify
772 * the exact map. mem=number can be used to
773 * trim the existing memory map.
774 */
775 unsigned long long start_at, mem_size;
776
777 mem_size = memparse(from+7, &from);
778 if (*from == '@') {
779 start_at = memparse(from+1, &from);
780 add_memory_region(start_at, mem_size, E820_RAM);
781 } else if (*from == '#') {
782 start_at = memparse(from+1, &from);
783 add_memory_region(start_at, mem_size, E820_ACPI);
784 } else if (*from == '$') {
785 start_at = memparse(from+1, &from);
786 add_memory_region(start_at, mem_size, E820_RESERVED);
787 } else {
788 limit_regions(mem_size);
789 userdef=1;
790 }
791 }
792 }
793
794 else if (!memcmp(from, "noexec=", 7))
795 noexec_setup(from + 7);
796
797
798#ifdef CONFIG_X86_SMP
799 /*
800 * If the BIOS enumerates physical processors before logical,
801 * maxcpus=N at enumeration-time can be used to disable HT.
802 */
803 else if (!memcmp(from, "maxcpus=", 8)) {
804 extern unsigned int maxcpus;
805
806 maxcpus = simple_strtoul(from + 8, NULL, 0);
807 }
808#endif
809
Len Brown888ba6c2005-08-24 12:07:20 -0400810#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 /* "acpi=off" disables both ACPI table parsing and interpreter */
812 else if (!memcmp(from, "acpi=off", 8)) {
813 disable_acpi();
814 }
815
816 /* acpi=force to over-ride black-list */
817 else if (!memcmp(from, "acpi=force", 10)) {
818 acpi_force = 1;
819 acpi_ht = 1;
820 acpi_disabled = 0;
821 }
822
823 /* acpi=strict disables out-of-spec workarounds */
824 else if (!memcmp(from, "acpi=strict", 11)) {
825 acpi_strict = 1;
826 }
827
828 /* Limit ACPI just to boot-time to enable HT */
829 else if (!memcmp(from, "acpi=ht", 7)) {
830 if (!acpi_force)
831 disable_acpi();
832 acpi_ht = 1;
833 }
834
835 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
836 else if (!memcmp(from, "pci=noacpi", 10)) {
837 acpi_disable_pci();
838 }
839 /* "acpi=noirq" disables ACPI interrupt routing */
840 else if (!memcmp(from, "acpi=noirq", 10)) {
841 acpi_noirq_set();
842 }
843
844 else if (!memcmp(from, "acpi_sci=edge", 13))
845 acpi_sci_flags.trigger = 1;
846
847 else if (!memcmp(from, "acpi_sci=level", 14))
848 acpi_sci_flags.trigger = 3;
849
850 else if (!memcmp(from, "acpi_sci=high", 13))
851 acpi_sci_flags.polarity = 1;
852
853 else if (!memcmp(from, "acpi_sci=low", 12))
854 acpi_sci_flags.polarity = 3;
855
856#ifdef CONFIG_X86_IO_APIC
857 else if (!memcmp(from, "acpi_skip_timer_override", 24))
858 acpi_skip_timer_override = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859
Chuck Ebbert66759a02005-09-12 18:49:25 +0200860 if (!memcmp(from, "disable_timer_pin_1", 19))
861 disable_timer_pin_1 = 1;
862 if (!memcmp(from, "enable_timer_pin_1", 18))
863 disable_timer_pin_1 = -1;
864
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 /* disable IO-APIC */
866 else if (!memcmp(from, "noapic", 6))
867 disable_ioapic_setup();
Cal Peake0a305d22005-09-13 02:28:07 -0400868#endif /* CONFIG_X86_IO_APIC */
Len Brown888ba6c2005-08-24 12:07:20 -0400869#endif /* CONFIG_ACPI */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870
Eric W. Biederman9635b472005-06-25 14:57:41 -0700871#ifdef CONFIG_X86_LOCAL_APIC
872 /* enable local APIC */
873 else if (!memcmp(from, "lapic", 5))
874 lapic_enable();
875
876 /* disable local APIC */
877 else if (!memcmp(from, "nolapic", 6))
878 lapic_disable();
879#endif /* CONFIG_X86_LOCAL_APIC */
880
Eric W. Biederman1bc3b912005-06-25 14:58:01 -0700881#ifdef CONFIG_KEXEC
882 /* crashkernel=size@addr specifies the location to reserve for
883 * a crash kernel. By reserving this memory we guarantee
884 * that linux never set's it up as a DMA target.
885 * Useful for holding code to do something appropriate
886 * after a kernel panic.
887 */
888 else if (!memcmp(from, "crashkernel=", 12)) {
889 unsigned long size, base;
890 size = memparse(from+12, &from);
891 if (*from == '@') {
892 base = memparse(from+1, &from);
893 /* FIXME: Do I want a sanity check
894 * to validate the memory range?
895 */
896 crashk_res.start = base;
897 crashk_res.end = base + size - 1;
898 }
899 }
900#endif
Vivek Goyalaac04b32006-01-09 20:51:47 -0800901#ifdef CONFIG_PROC_VMCORE
Vivek Goyal2030eae2005-06-25 14:58:20 -0700902 /* elfcorehdr= specifies the location of elf core header
903 * stored by the crashed kernel.
904 */
905 else if (!memcmp(from, "elfcorehdr=", 11))
906 elfcorehdr_addr = memparse(from+11, &from);
907#endif
Eric W. Biederman1bc3b912005-06-25 14:58:01 -0700908
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 /*
910 * highmem=size forces highmem to be exactly 'size' bytes.
911 * This works even on boxes that have no highmem otherwise.
912 * This also works to reduce highmem size on bigger boxes.
913 */
914 else if (!memcmp(from, "highmem=", 8))
915 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
916
917 /*
918 * vmalloc=size forces the vmalloc area to be exactly 'size'
919 * bytes. This can be used to increase (or decrease) the
920 * vmalloc area - the default is 128m.
921 */
922 else if (!memcmp(from, "vmalloc=", 8))
923 __VMALLOC_RESERVE = memparse(from+8, &from);
924
925 next_char:
926 c = *(from++);
927 if (!c)
928 break;
929 if (COMMAND_LINE_SIZE <= ++len)
930 break;
931 *(to++) = c;
932 }
933 *to = '\0';
934 *cmdline_p = command_line;
935 if (userdef) {
936 printk(KERN_INFO "user-defined physical RAM map:\n");
937 print_memory_map("user");
938 }
939}
940
941/*
942 * Callback for efi_memory_walk.
943 */
944static int __init
945efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
946{
947 unsigned long *max_pfn = arg, pfn;
948
949 if (start < end) {
950 pfn = PFN_UP(end -1);
951 if (pfn > *max_pfn)
952 *max_pfn = pfn;
953 }
954 return 0;
955}
956
Andy Whitcroft215c3402006-01-06 00:12:06 -0800957static int __init
958efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
959{
960 memory_present(0, start, end);
961 return 0;
962}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963
964/*
965 * Find the highest page frame number we have available
966 */
967void __init find_max_pfn(void)
968{
969 int i;
970
971 max_pfn = 0;
972 if (efi_enabled) {
973 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
Andy Whitcroft215c3402006-01-06 00:12:06 -0800974 efi_memmap_walk(efi_memory_present_wrapper, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 return;
976 }
977
978 for (i = 0; i < e820.nr_map; i++) {
979 unsigned long start, end;
980 /* RAM? */
981 if (e820.map[i].type != E820_RAM)
982 continue;
983 start = PFN_UP(e820.map[i].addr);
984 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
985 if (start >= end)
986 continue;
987 if (end > max_pfn)
988 max_pfn = end;
Andy Whitcroft215c3402006-01-06 00:12:06 -0800989 memory_present(0, start, end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 }
991}
992
993/*
994 * Determine low and high memory ranges:
995 */
996unsigned long __init find_max_low_pfn(void)
997{
998 unsigned long max_low_pfn;
999
1000 max_low_pfn = max_pfn;
1001 if (max_low_pfn > MAXMEM_PFN) {
1002 if (highmem_pages == -1)
1003 highmem_pages = max_pfn - MAXMEM_PFN;
1004 if (highmem_pages + MAXMEM_PFN < max_pfn)
1005 max_pfn = MAXMEM_PFN + highmem_pages;
1006 if (highmem_pages + MAXMEM_PFN > max_pfn) {
1007 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
1008 highmem_pages = 0;
1009 }
1010 max_low_pfn = MAXMEM_PFN;
1011#ifndef CONFIG_HIGHMEM
1012 /* Maximum memory usable is what is directly addressable */
1013 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
1014 MAXMEM>>20);
1015 if (max_pfn > MAX_NONPAE_PFN)
1016 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1017 else
1018 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
1019 max_pfn = MAXMEM_PFN;
1020#else /* !CONFIG_HIGHMEM */
1021#ifndef CONFIG_X86_PAE
1022 if (max_pfn > MAX_NONPAE_PFN) {
1023 max_pfn = MAX_NONPAE_PFN;
1024 printk(KERN_WARNING "Warning only 4GB will be used.\n");
1025 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1026 }
1027#endif /* !CONFIG_X86_PAE */
1028#endif /* !CONFIG_HIGHMEM */
1029 } else {
1030 if (highmem_pages == -1)
1031 highmem_pages = 0;
1032#ifdef CONFIG_HIGHMEM
1033 if (highmem_pages >= max_pfn) {
1034 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1035 highmem_pages = 0;
1036 }
1037 if (highmem_pages) {
1038 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1039 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1040 highmem_pages = 0;
1041 }
1042 max_low_pfn -= highmem_pages;
1043 }
1044#else
1045 if (highmem_pages)
1046 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1047#endif
1048 }
1049 return max_low_pfn;
1050}
1051
1052/*
1053 * Free all available memory for boot time allocation. Used
1054 * as a callback function by efi_memory_walk()
1055 */
1056
1057static int __init
1058free_available_memory(unsigned long start, unsigned long end, void *arg)
1059{
1060 /* check max_low_pfn */
1061 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1062 return 0;
1063 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1064 end = (max_low_pfn + 1) << PAGE_SHIFT;
1065 if (start < end)
1066 free_bootmem(start, end - start);
1067
1068 return 0;
1069}
1070/*
1071 * Register fully available low RAM pages with the bootmem allocator.
1072 */
1073static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1074{
1075 int i;
1076
1077 if (efi_enabled) {
1078 efi_memmap_walk(free_available_memory, NULL);
1079 return;
1080 }
1081 for (i = 0; i < e820.nr_map; i++) {
1082 unsigned long curr_pfn, last_pfn, size;
1083 /*
1084 * Reserve usable low memory
1085 */
1086 if (e820.map[i].type != E820_RAM)
1087 continue;
1088 /*
1089 * We are rounding up the start address of usable memory:
1090 */
1091 curr_pfn = PFN_UP(e820.map[i].addr);
1092 if (curr_pfn >= max_low_pfn)
1093 continue;
1094 /*
1095 * ... and at the end of the usable range downwards:
1096 */
1097 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1098
1099 if (last_pfn > max_low_pfn)
1100 last_pfn = max_low_pfn;
1101
1102 /*
1103 * .. finally, did all the rounding and playing
1104 * around just make the area go away?
1105 */
1106 if (last_pfn <= curr_pfn)
1107 continue;
1108
1109 size = last_pfn - curr_pfn;
1110 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1111 }
1112}
1113
1114/*
1115 * workaround for Dell systems that neglect to reserve EBDA
1116 */
1117static void __init reserve_ebda_region(void)
1118{
1119 unsigned int addr;
1120 addr = get_bios_ebda();
1121 if (addr)
1122 reserve_bootmem(addr, PAGE_SIZE);
1123}
1124
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001125#ifndef CONFIG_NEED_MULTIPLE_NODES
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126void __init setup_bootmem_allocator(void);
1127static unsigned long __init setup_memory(void)
1128{
1129 /*
1130 * partially used pages are not usable - thus
1131 * we are rounding upwards:
1132 */
1133 min_low_pfn = PFN_UP(init_pg_tables_end);
1134
1135 find_max_pfn();
1136
1137 max_low_pfn = find_max_low_pfn();
1138
1139#ifdef CONFIG_HIGHMEM
1140 highstart_pfn = highend_pfn = max_pfn;
1141 if (max_pfn > max_low_pfn) {
1142 highstart_pfn = max_low_pfn;
1143 }
1144 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1145 pages_to_mb(highend_pfn - highstart_pfn));
1146#endif
1147 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1148 pages_to_mb(max_low_pfn));
1149
1150 setup_bootmem_allocator();
1151
1152 return max_low_pfn;
1153}
1154
1155void __init zone_sizes_init(void)
1156{
1157 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1158 unsigned int max_dma, low;
1159
1160 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1161 low = max_low_pfn;
1162
1163 if (low < max_dma)
1164 zones_size[ZONE_DMA] = low;
1165 else {
1166 zones_size[ZONE_DMA] = max_dma;
1167 zones_size[ZONE_NORMAL] = low - max_dma;
1168#ifdef CONFIG_HIGHMEM
1169 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1170#endif
1171 }
1172 free_area_init(zones_size);
1173}
1174#else
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001175extern unsigned long __init setup_memory(void);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176extern void zone_sizes_init(void);
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001177#endif /* !CONFIG_NEED_MULTIPLE_NODES */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178
1179void __init setup_bootmem_allocator(void)
1180{
1181 unsigned long bootmap_size;
1182 /*
1183 * Initialize the boot-time allocator (with low memory only):
1184 */
1185 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1186
1187 register_bootmem_low_pages(max_low_pfn);
1188
1189 /*
1190 * Reserve the bootmem bitmap itself as well. We do this in two
1191 * steps (first step was init_bootmem()) because this catches
1192 * the (very unlikely) case of us accidentally initializing the
1193 * bootmem allocator with an invalid RAM area.
1194 */
Vivek Goyal8a919082005-06-25 14:57:51 -07001195 reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1196 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197
1198 /*
1199 * reserve physical page 0 - it's a special BIOS page on many boxes,
1200 * enabling clean reboots, SMP operation, laptop functions.
1201 */
1202 reserve_bootmem(0, PAGE_SIZE);
1203
1204 /* reserve EBDA region, it's a 4K region */
1205 reserve_ebda_region();
1206
1207 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1208 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1209 unless you have no PS/2 mouse plugged in. */
1210 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1211 boot_cpu_data.x86 == 6)
1212 reserve_bootmem(0xa0000 - 4096, 4096);
1213
1214#ifdef CONFIG_SMP
1215 /*
1216 * But first pinch a few for the stack/trampoline stuff
1217 * FIXME: Don't need the extra page at 4K, but need to fix
1218 * trampoline before removing it. (see the GDT stuff)
1219 */
1220 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1221#endif
1222#ifdef CONFIG_ACPI_SLEEP
1223 /*
1224 * Reserve low memory region for sleep support.
1225 */
1226 acpi_reserve_bootmem();
1227#endif
1228#ifdef CONFIG_X86_FIND_SMP_CONFIG
1229 /*
1230 * Find and reserve possible boot-time SMP configuration:
1231 */
1232 find_smp_config();
1233#endif
1234
1235#ifdef CONFIG_BLK_DEV_INITRD
1236 if (LOADER_TYPE && INITRD_START) {
1237 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1238 reserve_bootmem(INITRD_START, INITRD_SIZE);
1239 initrd_start =
1240 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1241 initrd_end = initrd_start+INITRD_SIZE;
1242 }
1243 else {
1244 printk(KERN_ERR "initrd extends beyond end of memory "
1245 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1246 INITRD_START + INITRD_SIZE,
1247 max_low_pfn << PAGE_SHIFT);
1248 initrd_start = 0;
1249 }
1250 }
1251#endif
Eric W. Biederman1bc3b912005-06-25 14:58:01 -07001252#ifdef CONFIG_KEXEC
1253 if (crashk_res.start != crashk_res.end)
1254 reserve_bootmem(crashk_res.start,
1255 crashk_res.end - crashk_res.start + 1);
1256#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257}
1258
1259/*
1260 * The node 0 pgdat is initialized before all of these because
1261 * it's needed for bootmem. node>0 pgdats have their virtual
1262 * space allocated before the pagetables are in place to access
1263 * them, so they can't be cleared then.
1264 *
1265 * This should all compile down to nothing when NUMA is off.
1266 */
1267void __init remapped_pgdat_init(void)
1268{
1269 int nid;
1270
1271 for_each_online_node(nid) {
1272 if (nid != 0)
1273 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1274 }
1275}
1276
1277/*
1278 * Request address space for all standard RAM and ROM resources
1279 * and also for regions reported as reserved by the e820.
1280 */
1281static void __init
1282legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1283{
1284 int i;
1285
1286 probe_roms();
1287 for (i = 0; i < e820.nr_map; i++) {
1288 struct resource *res;
1289 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1290 continue;
1291 res = alloc_bootmem_low(sizeof(struct resource));
1292 switch (e820.map[i].type) {
1293 case E820_RAM: res->name = "System RAM"; break;
1294 case E820_ACPI: res->name = "ACPI Tables"; break;
1295 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1296 default: res->name = "reserved";
1297 }
1298 res->start = e820.map[i].addr;
1299 res->end = res->start + e820.map[i].size - 1;
1300 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1301 request_resource(&iomem_resource, res);
1302 if (e820.map[i].type == E820_RAM) {
1303 /*
1304 * We don't know which RAM region contains kernel data,
1305 * so we try it repeatedly and let the resource manager
1306 * test it.
1307 */
1308 request_resource(res, code_resource);
1309 request_resource(res, data_resource);
Eric W. Biederman1bc3b912005-06-25 14:58:01 -07001310#ifdef CONFIG_KEXEC
1311 request_resource(res, &crashk_res);
1312#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 }
1314 }
1315}
1316
1317/*
1318 * Request address space for all standard resources
1319 */
1320static void __init register_memory(void)
1321{
Daniel Ritzf0eca962005-09-09 00:57:14 +02001322 unsigned long gapstart, gapsize, round;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323 unsigned long long last;
1324 int i;
1325
1326 if (efi_enabled)
1327 efi_initialize_iomem_resources(&code_resource, &data_resource);
1328 else
1329 legacy_init_iomem_resources(&code_resource, &data_resource);
1330
1331 /* EFI systems may still have VGA */
1332 request_resource(&iomem_resource, &video_ram_resource);
1333
1334 /* request I/O space for devices used on all i[345]86 PCs */
1335 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1336 request_resource(&ioport_resource, &standard_io_resources[i]);
1337
1338 /*
1339 * Search for the bigest gap in the low 32 bits of the e820
1340 * memory space.
1341 */
1342 last = 0x100000000ull;
1343 gapstart = 0x10000000;
1344 gapsize = 0x400000;
1345 i = e820.nr_map;
1346 while (--i >= 0) {
1347 unsigned long long start = e820.map[i].addr;
1348 unsigned long long end = start + e820.map[i].size;
1349
1350 /*
1351 * Since "last" is at most 4GB, we know we'll
1352 * fit in 32 bits if this condition is true
1353 */
1354 if (last > end) {
1355 unsigned long gap = last - end;
1356
1357 if (gap > gapsize) {
1358 gapsize = gap;
1359 gapstart = end;
1360 }
1361 }
1362 if (start < last)
1363 last = start;
1364 }
1365
1366 /*
Daniel Ritzf0eca962005-09-09 00:57:14 +02001367 * See how much we want to round up: start off with
1368 * rounding to the next 1MB area.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 */
Daniel Ritzf0eca962005-09-09 00:57:14 +02001370 round = 0x100000;
1371 while ((gapsize >> 4) > round)
1372 round += round;
1373 /* Fun with two's complement */
1374 pci_mem_start = (gapstart + round) & -round;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375
1376 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1377 pci_mem_start, gapstart, gapsize);
1378}
1379
1380/* Use inline assembly to define this because the nops are defined
1381 as inline assembly strings in the include files and we cannot
1382 get them easily into strings. */
1383asm("\t.data\nintelnops: "
1384 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1385 GENERIC_NOP7 GENERIC_NOP8);
1386asm("\t.data\nk8nops: "
1387 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1388 K8_NOP7 K8_NOP8);
1389asm("\t.data\nk7nops: "
1390 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1391 K7_NOP7 K7_NOP8);
1392
1393extern unsigned char intelnops[], k8nops[], k7nops[];
1394static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1395 NULL,
1396 intelnops,
1397 intelnops + 1,
1398 intelnops + 1 + 2,
1399 intelnops + 1 + 2 + 3,
1400 intelnops + 1 + 2 + 3 + 4,
1401 intelnops + 1 + 2 + 3 + 4 + 5,
1402 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1403 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1404};
1405static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1406 NULL,
1407 k8nops,
1408 k8nops + 1,
1409 k8nops + 1 + 2,
1410 k8nops + 1 + 2 + 3,
1411 k8nops + 1 + 2 + 3 + 4,
1412 k8nops + 1 + 2 + 3 + 4 + 5,
1413 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1414 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1415};
1416static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1417 NULL,
1418 k7nops,
1419 k7nops + 1,
1420 k7nops + 1 + 2,
1421 k7nops + 1 + 2 + 3,
1422 k7nops + 1 + 2 + 3 + 4,
1423 k7nops + 1 + 2 + 3 + 4 + 5,
1424 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1425 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1426};
1427static struct nop {
1428 int cpuid;
1429 unsigned char **noptable;
1430} noptypes[] = {
1431 { X86_FEATURE_K8, k8_nops },
1432 { X86_FEATURE_K7, k7_nops },
1433 { -1, NULL }
1434};
1435
1436/* Replace instructions with better alternatives for this CPU type.
1437
1438 This runs before SMP is initialized to avoid SMP problems with
1439 self modifying code. This implies that assymetric systems where
1440 APs have less capabilities than the boot processor are not handled.
Linus Torvalds72538d82005-07-22 18:29:40 -04001441 Tough. Make sure you disable such features by hand. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442void apply_alternatives(void *start, void *end)
1443{
1444 struct alt_instr *a;
1445 int diff, i, k;
1446 unsigned char **noptable = intel_nops;
1447 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1448 if (boot_cpu_has(noptypes[i].cpuid)) {
1449 noptable = noptypes[i].noptable;
1450 break;
1451 }
1452 }
1453 for (a = start; (void *)a < end; a++) {
1454 if (!boot_cpu_has(a->cpuid))
1455 continue;
1456 BUG_ON(a->replacementlen > a->instrlen);
1457 memcpy(a->instr, a->replacement, a->replacementlen);
1458 diff = a->instrlen - a->replacementlen;
1459 /* Pad the rest with nops */
1460 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1461 k = diff;
1462 if (k > ASM_NOP_MAX)
1463 k = ASM_NOP_MAX;
1464 memcpy(a->instr + i, noptable[k], k);
1465 }
1466 }
1467}
1468
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469void __init alternative_instructions(void)
1470{
1471 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 apply_alternatives(__alt_instructions, __alt_instructions_end);
1473}
1474
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475static char * __init machine_specific_memory_setup(void);
1476
1477#ifdef CONFIG_MCA
1478static void set_mca_bus(int x)
1479{
1480 MCA_bus = x;
1481}
1482#else
1483static void set_mca_bus(int x) { }
1484#endif
1485
1486/*
1487 * Determine if we were loaded by an EFI loader. If so, then we have also been
1488 * passed the efi memmap, systab, etc., so we should use these data structures
1489 * for initialization. Note, the efi init code path is determined by the
1490 * global efi_enabled. This allows the same kernel image to be used on existing
1491 * systems (with a traditional BIOS) as well as on EFI systems.
1492 */
1493void __init setup_arch(char **cmdline_p)
1494{
1495 unsigned long max_low_pfn;
1496
1497 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1498 pre_setup_arch_hook();
1499 early_cpu_init();
1500
1501 /*
1502 * FIXME: This isn't an official loader_type right
1503 * now but does currently work with elilo.
1504 * If we were configured as an EFI kernel, check to make
1505 * sure that we were loaded correctly from elilo and that
1506 * the system table is valid. If not, then initialize normally.
1507 */
1508#ifdef CONFIG_EFI
1509 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1510 efi_enabled = 1;
1511#endif
1512
1513 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1514 drive_info = DRIVE_INFO;
1515 screen_info = SCREEN_INFO;
1516 edid_info = EDID_INFO;
1517 apm_info.bios = APM_BIOS_INFO;
1518 ist_info = IST_INFO;
1519 saved_videomode = VIDEO_MODE;
1520 if( SYS_DESC_TABLE.length != 0 ) {
1521 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1522 machine_id = SYS_DESC_TABLE.table[0];
1523 machine_submodel_id = SYS_DESC_TABLE.table[1];
1524 BIOS_revision = SYS_DESC_TABLE.table[2];
1525 }
1526 bootloader_type = LOADER_TYPE;
1527
1528#ifdef CONFIG_BLK_DEV_RAM
1529 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1530 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1531 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1532#endif
1533 ARCH_SETUP
1534 if (efi_enabled)
1535 efi_init();
1536 else {
1537 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1538 print_memory_map(machine_specific_memory_setup());
1539 }
1540
1541 copy_edd();
1542
1543 if (!MOUNT_ROOT_RDONLY)
1544 root_mountflags &= ~MS_RDONLY;
1545 init_mm.start_code = (unsigned long) _text;
1546 init_mm.end_code = (unsigned long) _etext;
1547 init_mm.end_data = (unsigned long) _edata;
1548 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1549
1550 code_resource.start = virt_to_phys(_text);
1551 code_resource.end = virt_to_phys(_etext)-1;
1552 data_resource.start = virt_to_phys(_etext);
1553 data_resource.end = virt_to_phys(_edata)-1;
1554
1555 parse_cmdline_early(cmdline_p);
1556
1557 max_low_pfn = setup_memory();
1558
1559 /*
1560 * NOTE: before this point _nobody_ is allowed to allocate
1561 * any memory using the bootmem allocator. Although the
1562 * alloctor is now initialised only the first 8Mb of the kernel
1563 * virtual address space has been mapped. All allocations before
1564 * paging_init() has completed must use the alloc_bootmem_low_pages()
1565 * variant (which allocates DMA'able memory) and care must be taken
1566 * not to exceed the 8Mb limit.
1567 */
1568
1569#ifdef CONFIG_SMP
1570 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1571#endif
1572 paging_init();
1573 remapped_pgdat_init();
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001574 sparse_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 zone_sizes_init();
1576
1577 /*
1578 * NOTE: at this point the bootmem allocator is fully available.
1579 */
1580
1581#ifdef CONFIG_EARLY_PRINTK
1582 {
1583 char *s = strstr(*cmdline_p, "earlyprintk=");
1584 if (s) {
1585 extern void setup_early_printk(char *);
1586
Jan Beulich2a2d5922006-01-11 22:47:03 +01001587 setup_early_printk(strchr(s, '=') + 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 printk("early console enabled\n");
1589 }
1590 }
1591#endif
1592
1593
1594 dmi_scan_machine();
1595
1596#ifdef CONFIG_X86_GENERICARCH
1597 generic_apic_probe(*cmdline_p);
1598#endif
1599 if (efi_enabled)
1600 efi_map_memmap();
1601
Andi Kleenf9262c12006-03-08 17:57:25 -08001602#ifdef CONFIG_X86_IO_APIC
1603 check_acpi_pci(); /* Checks more than just ACPI actually */
1604#endif
1605
Len Brown888ba6c2005-08-24 12:07:20 -04001606#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 /*
1608 * Parse the ACPI tables for possible boot-time SMP configuration.
1609 */
1610 acpi_boot_table_init();
1611 acpi_boot_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612
Venkatesh Pallipadi911a62d2005-09-03 15:56:31 -07001613#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1614 if (def_to_bigsmp)
1615 printk(KERN_WARNING "More than 8 CPUs detected and "
1616 "CONFIG_X86_PC cannot handle it.\nUse "
1617 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1618#endif
1619#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620#ifdef CONFIG_X86_LOCAL_APIC
1621 if (smp_found_config)
1622 get_smp_config();
1623#endif
1624
1625 register_memory();
1626
1627#ifdef CONFIG_VT
1628#if defined(CONFIG_VGA_CONSOLE)
1629 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1630 conswitchp = &vga_con;
1631#elif defined(CONFIG_DUMMY_CONSOLE)
1632 conswitchp = &dummy_con;
1633#endif
1634#endif
1635}
1636
1637#include "setup_arch_post.h"
1638/*
1639 * Local Variables:
1640 * mode:c
1641 * c-file-style:"k&r"
1642 * c-basic-offset:8
1643 * End:
1644 */