blob: 6917daa159ab351a4fe86da9aa6ee5276d959df6 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/i386/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 *
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
10 *
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
13 *
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
16 *
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
19 *
20 */
21
22/*
23 * This file handles the architecture-dependent parts of initialization
24 */
25
Alexey Dobriyan129f6942005-06-23 00:08:33 -070026#include <linux/config.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include <linux/sched.h>
28#include <linux/mm.h>
Andy Whitcroft05b79bd2005-06-23 00:07:57 -070029#include <linux/mmzone.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/tty.h>
31#include <linux/ioport.h>
32#include <linux/acpi.h>
33#include <linux/apm_bios.h>
34#include <linux/initrd.h>
35#include <linux/bootmem.h>
36#include <linux/seq_file.h>
37#include <linux/console.h>
38#include <linux/mca.h>
39#include <linux/root_dev.h>
40#include <linux/highmem.h>
41#include <linux/module.h>
42#include <linux/efi.h>
43#include <linux/init.h>
44#include <linux/edd.h>
45#include <linux/nodemask.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070046#include <linux/kexec.h>
Vivek Goyal2030eae2005-06-25 14:58:20 -070047#include <linux/crash_dump.h>
Andi Kleene9928672006-01-11 22:43:33 +010048#include <linux/dmi.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070049
Linus Torvalds1da177e2005-04-16 15:20:36 -070050#include <video/edid.h>
Eric W. Biederman1bc3b912005-06-25 14:58:01 -070051
Eric W. Biederman9635b472005-06-25 14:57:41 -070052#include <asm/apic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070053#include <asm/e820.h>
54#include <asm/mpspec.h>
55#include <asm/setup.h>
56#include <asm/arch_hooks.h>
57#include <asm/sections.h>
58#include <asm/io_apic.h>
59#include <asm/ist.h>
60#include <asm/io.h>
61#include "setup_arch_pre.h"
62#include <bios_ebda.h>
63
Vivek Goyal92aa63a2005-06-25 14:58:18 -070064/* Forward Declaration. */
65void __init find_max_pfn(void);
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067/* This value is set up by the early boot code to point to the value
68 immediately after the boot time page tables. It contains a *physical*
69 address, and must not be in the .bss segment! */
70unsigned long init_pg_tables_end __initdata = ~0UL;
71
Li Shaohua0bb31842005-06-25 14:54:55 -070072int disable_pse __devinitdata = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
74/*
75 * Machine setup..
76 */
77
78#ifdef CONFIG_EFI
79int efi_enabled = 0;
80EXPORT_SYMBOL(efi_enabled);
81#endif
82
83/* cpu data as detected by the assembly code in head.S */
84struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
85/* common cpu data for all cpus */
Christoph Lameterc3d8c142005-09-06 15:16:33 -070086struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
Alexey Dobriyan129f6942005-06-23 00:08:33 -070087EXPORT_SYMBOL(boot_cpu_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
89unsigned long mmu_cr4_features;
90
Len Brown84663612005-08-24 12:09:07 -040091#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 int acpi_disabled = 0;
93#else
94 int acpi_disabled = 1;
95#endif
96EXPORT_SYMBOL(acpi_disabled);
97
Len Brown888ba6c2005-08-24 12:07:20 -040098#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -070099int __initdata acpi_force = 0;
100extern acpi_interrupt_flags acpi_sci_flags;
101#endif
102
103/* for MCA, but anyone else can use it if they want */
104unsigned int machine_id;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700105#ifdef CONFIG_MCA
106EXPORT_SYMBOL(machine_id);
107#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108unsigned int machine_submodel_id;
109unsigned int BIOS_revision;
110unsigned int mca_pentium_flag;
111
112/* For PCI or other memory-mapped resources */
113unsigned long pci_mem_start = 0x10000000;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700114#ifdef CONFIG_PCI
115EXPORT_SYMBOL(pci_mem_start);
116#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
118/* Boot loader ID as an integer, for the benefit of proc_dointvec */
119int bootloader_type;
120
121/* user-defined highmem size */
122static unsigned int highmem_pages = -1;
123
124/*
125 * Setup options
126 */
127struct drive_info_struct { char dummy[32]; } drive_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700128#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
129 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
130EXPORT_SYMBOL(drive_info);
131#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132struct screen_info screen_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700133EXPORT_SYMBOL(screen_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134struct apm_info apm_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700135EXPORT_SYMBOL(apm_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136struct sys_desc_table_struct {
137 unsigned short length;
138 unsigned char table[0];
139};
140struct edid_info edid_info;
Antonino A. Daplas5e518d72005-09-09 13:04:34 -0700141EXPORT_SYMBOL_GPL(edid_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142struct ist_info ist_info;
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700143#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
144 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
145EXPORT_SYMBOL(ist_info);
146#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147struct e820map e820;
148
149extern void early_cpu_init(void);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150extern void generic_apic_probe(char *);
151extern int root_mountflags;
152
153unsigned long saved_videomode;
154
155#define RAMDISK_IMAGE_START_MASK 0x07FF
156#define RAMDISK_PROMPT_FLAG 0x8000
157#define RAMDISK_LOAD_FLAG 0x4000
158
159static char command_line[COMMAND_LINE_SIZE];
160
161unsigned char __initdata boot_params[PARAM_SIZE];
162
163static struct resource data_resource = {
164 .name = "Kernel data",
165 .start = 0,
166 .end = 0,
167 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
168};
169
170static struct resource code_resource = {
171 .name = "Kernel code",
172 .start = 0,
173 .end = 0,
174 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
175};
176
177static struct resource system_rom_resource = {
178 .name = "System ROM",
179 .start = 0xf0000,
180 .end = 0xfffff,
181 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
182};
183
184static struct resource extension_rom_resource = {
185 .name = "Extension ROM",
186 .start = 0xe0000,
187 .end = 0xeffff,
188 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
189};
190
191static struct resource adapter_rom_resources[] = { {
192 .name = "Adapter ROM",
193 .start = 0xc8000,
194 .end = 0,
195 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
196}, {
197 .name = "Adapter ROM",
198 .start = 0,
199 .end = 0,
200 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
201}, {
202 .name = "Adapter ROM",
203 .start = 0,
204 .end = 0,
205 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
206}, {
207 .name = "Adapter ROM",
208 .start = 0,
209 .end = 0,
210 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
211}, {
212 .name = "Adapter ROM",
213 .start = 0,
214 .end = 0,
215 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
216}, {
217 .name = "Adapter ROM",
218 .start = 0,
219 .end = 0,
220 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
221} };
222
223#define ADAPTER_ROM_RESOURCES \
224 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
225
226static struct resource video_rom_resource = {
227 .name = "Video ROM",
228 .start = 0xc0000,
229 .end = 0xc7fff,
230 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
231};
232
233static struct resource video_ram_resource = {
234 .name = "Video RAM area",
235 .start = 0xa0000,
236 .end = 0xbffff,
237 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
238};
239
240static struct resource standard_io_resources[] = { {
241 .name = "dma1",
242 .start = 0x0000,
243 .end = 0x001f,
244 .flags = IORESOURCE_BUSY | IORESOURCE_IO
245}, {
246 .name = "pic1",
247 .start = 0x0020,
248 .end = 0x0021,
249 .flags = IORESOURCE_BUSY | IORESOURCE_IO
250}, {
251 .name = "timer0",
252 .start = 0x0040,
253 .end = 0x0043,
254 .flags = IORESOURCE_BUSY | IORESOURCE_IO
255}, {
256 .name = "timer1",
257 .start = 0x0050,
258 .end = 0x0053,
259 .flags = IORESOURCE_BUSY | IORESOURCE_IO
260}, {
261 .name = "keyboard",
262 .start = 0x0060,
263 .end = 0x006f,
264 .flags = IORESOURCE_BUSY | IORESOURCE_IO
265}, {
266 .name = "dma page reg",
267 .start = 0x0080,
268 .end = 0x008f,
269 .flags = IORESOURCE_BUSY | IORESOURCE_IO
270}, {
271 .name = "pic2",
272 .start = 0x00a0,
273 .end = 0x00a1,
274 .flags = IORESOURCE_BUSY | IORESOURCE_IO
275}, {
276 .name = "dma2",
277 .start = 0x00c0,
278 .end = 0x00df,
279 .flags = IORESOURCE_BUSY | IORESOURCE_IO
280}, {
281 .name = "fpu",
282 .start = 0x00f0,
283 .end = 0x00ff,
284 .flags = IORESOURCE_BUSY | IORESOURCE_IO
285} };
286
287#define STANDARD_IO_RESOURCES \
288 (sizeof standard_io_resources / sizeof standard_io_resources[0])
289
290#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
291
292static int __init romchecksum(unsigned char *rom, unsigned long length)
293{
294 unsigned char *p, sum = 0;
295
296 for (p = rom; p < rom + length; p++)
297 sum += *p;
298 return sum == 0;
299}
300
301static void __init probe_roms(void)
302{
303 unsigned long start, length, upper;
304 unsigned char *rom;
305 int i;
306
307 /* video rom */
308 upper = adapter_rom_resources[0].start;
309 for (start = video_rom_resource.start; start < upper; start += 2048) {
310 rom = isa_bus_to_virt(start);
311 if (!romsignature(rom))
312 continue;
313
314 video_rom_resource.start = start;
315
316 /* 0 < length <= 0x7f * 512, historically */
317 length = rom[2] * 512;
318
319 /* if checksum okay, trust length byte */
320 if (length && romchecksum(rom, length))
321 video_rom_resource.end = start + length - 1;
322
323 request_resource(&iomem_resource, &video_rom_resource);
324 break;
325 }
326
327 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
328 if (start < upper)
329 start = upper;
330
331 /* system rom */
332 request_resource(&iomem_resource, &system_rom_resource);
333 upper = system_rom_resource.start;
334
335 /* check for extension rom (ignore length byte!) */
336 rom = isa_bus_to_virt(extension_rom_resource.start);
337 if (romsignature(rom)) {
338 length = extension_rom_resource.end - extension_rom_resource.start + 1;
339 if (romchecksum(rom, length)) {
340 request_resource(&iomem_resource, &extension_rom_resource);
341 upper = extension_rom_resource.start;
342 }
343 }
344
345 /* check for adapter roms on 2k boundaries */
346 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
347 rom = isa_bus_to_virt(start);
348 if (!romsignature(rom))
349 continue;
350
351 /* 0 < length <= 0x7f * 512, historically */
352 length = rom[2] * 512;
353
354 /* but accept any length that fits if checksum okay */
355 if (!length || start + length > upper || !romchecksum(rom, length))
356 continue;
357
358 adapter_rom_resources[i].start = start;
359 adapter_rom_resources[i].end = start + length - 1;
360 request_resource(&iomem_resource, &adapter_rom_resources[i]);
361
362 start = adapter_rom_resources[i++].end & ~2047UL;
363 }
364}
365
366static void __init limit_regions(unsigned long long size)
367{
368 unsigned long long current_addr = 0;
369 int i;
370
371 if (efi_enabled) {
Matt Tolentino7ae65fd2005-09-03 15:56:27 -0700372 efi_memory_desc_t *md;
373 void *p;
374
375 for (p = memmap.map, i = 0; p < memmap.map_end;
376 p += memmap.desc_size, i++) {
377 md = p;
378 current_addr = md->phys_addr + (md->num_pages << 12);
379 if (md->type == EFI_CONVENTIONAL_MEMORY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 if (current_addr >= size) {
Matt Tolentino7ae65fd2005-09-03 15:56:27 -0700381 md->num_pages -=
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
383 memmap.nr_map = i + 1;
384 return;
385 }
386 }
387 }
388 }
389 for (i = 0; i < e820.nr_map; i++) {
Dave Hansenf014a552005-10-30 14:59:37 -0800390 current_addr = e820.map[i].addr + e820.map[i].size;
391 if (current_addr < size)
392 continue;
393
394 if (e820.map[i].type != E820_RAM)
395 continue;
396
397 if (e820.map[i].addr >= size) {
398 /*
399 * This region starts past the end of the
400 * requested size, skip it completely.
401 */
402 e820.nr_map = i;
403 } else {
404 e820.nr_map = i + 1;
405 e820.map[i].size -= current_addr - size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 }
Dave Hansenf014a552005-10-30 14:59:37 -0800407 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 }
409}
410
411static void __init add_memory_region(unsigned long long start,
412 unsigned long long size, int type)
413{
414 int x;
415
416 if (!efi_enabled) {
417 x = e820.nr_map;
418
419 if (x == E820MAX) {
420 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
421 return;
422 }
423
424 e820.map[x].addr = start;
425 e820.map[x].size = size;
426 e820.map[x].type = type;
427 e820.nr_map++;
428 }
429} /* add_memory_region */
430
431#define E820_DEBUG 1
432
433static void __init print_memory_map(char *who)
434{
435 int i;
436
437 for (i = 0; i < e820.nr_map; i++) {
438 printk(" %s: %016Lx - %016Lx ", who,
439 e820.map[i].addr,
440 e820.map[i].addr + e820.map[i].size);
441 switch (e820.map[i].type) {
442 case E820_RAM: printk("(usable)\n");
443 break;
444 case E820_RESERVED:
445 printk("(reserved)\n");
446 break;
447 case E820_ACPI:
448 printk("(ACPI data)\n");
449 break;
450 case E820_NVS:
451 printk("(ACPI NVS)\n");
452 break;
453 default: printk("type %lu\n", e820.map[i].type);
454 break;
455 }
456 }
457}
458
459/*
460 * Sanitize the BIOS e820 map.
461 *
462 * Some e820 responses include overlapping entries. The following
463 * replaces the original e820 map with a new one, removing overlaps.
464 *
465 */
466struct change_member {
467 struct e820entry *pbios; /* pointer to original bios entry */
468 unsigned long long addr; /* address for this change point */
469};
470static struct change_member change_point_list[2*E820MAX] __initdata;
471static struct change_member *change_point[2*E820MAX] __initdata;
472static struct e820entry *overlap_list[E820MAX] __initdata;
473static struct e820entry new_bios[E820MAX] __initdata;
474
475static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
476{
477 struct change_member *change_tmp;
478 unsigned long current_type, last_type;
479 unsigned long long last_addr;
480 int chgidx, still_changing;
481 int overlap_entries;
482 int new_bios_entry;
483 int old_nr, new_nr, chg_nr;
484 int i;
485
486 /*
487 Visually we're performing the following (1,2,3,4 = memory types)...
488
489 Sample memory map (w/overlaps):
490 ____22__________________
491 ______________________4_
492 ____1111________________
493 _44_____________________
494 11111111________________
495 ____________________33__
496 ___________44___________
497 __________33333_________
498 ______________22________
499 ___________________2222_
500 _________111111111______
501 _____________________11_
502 _________________4______
503
504 Sanitized equivalent (no overlap):
505 1_______________________
506 _44_____________________
507 ___1____________________
508 ____22__________________
509 ______11________________
510 _________1______________
511 __________3_____________
512 ___________44___________
513 _____________33_________
514 _______________2________
515 ________________1_______
516 _________________4______
517 ___________________2____
518 ____________________33__
519 ______________________4_
520 */
521
522 /* if there's only one memory region, don't bother */
523 if (*pnr_map < 2)
524 return -1;
525
526 old_nr = *pnr_map;
527
528 /* bail out if we find any unreasonable addresses in bios map */
529 for (i=0; i<old_nr; i++)
530 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
531 return -1;
532
533 /* create pointers for initial change-point information (for sorting) */
534 for (i=0; i < 2*old_nr; i++)
535 change_point[i] = &change_point_list[i];
536
537 /* record all known change-points (starting and ending addresses),
538 omitting those that are for empty memory regions */
539 chgidx = 0;
540 for (i=0; i < old_nr; i++) {
541 if (biosmap[i].size != 0) {
542 change_point[chgidx]->addr = biosmap[i].addr;
543 change_point[chgidx++]->pbios = &biosmap[i];
544 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
545 change_point[chgidx++]->pbios = &biosmap[i];
546 }
547 }
548 chg_nr = chgidx; /* true number of change-points */
549
550 /* sort change-point list by memory addresses (low -> high) */
551 still_changing = 1;
552 while (still_changing) {
553 still_changing = 0;
554 for (i=1; i < chg_nr; i++) {
555 /* if <current_addr> > <last_addr>, swap */
556 /* or, if current=<start_addr> & last=<end_addr>, swap */
557 if ((change_point[i]->addr < change_point[i-1]->addr) ||
558 ((change_point[i]->addr == change_point[i-1]->addr) &&
559 (change_point[i]->addr == change_point[i]->pbios->addr) &&
560 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
561 )
562 {
563 change_tmp = change_point[i];
564 change_point[i] = change_point[i-1];
565 change_point[i-1] = change_tmp;
566 still_changing=1;
567 }
568 }
569 }
570
571 /* create a new bios memory map, removing overlaps */
572 overlap_entries=0; /* number of entries in the overlap table */
573 new_bios_entry=0; /* index for creating new bios map entries */
574 last_type = 0; /* start with undefined memory type */
575 last_addr = 0; /* start with 0 as last starting address */
576 /* loop through change-points, determining affect on the new bios map */
577 for (chgidx=0; chgidx < chg_nr; chgidx++)
578 {
579 /* keep track of all overlapping bios entries */
580 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
581 {
582 /* add map entry to overlap list (> 1 entry implies an overlap) */
583 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
584 }
585 else
586 {
587 /* remove entry from list (order independent, so swap with last) */
588 for (i=0; i<overlap_entries; i++)
589 {
590 if (overlap_list[i] == change_point[chgidx]->pbios)
591 overlap_list[i] = overlap_list[overlap_entries-1];
592 }
593 overlap_entries--;
594 }
595 /* if there are overlapping entries, decide which "type" to use */
596 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
597 current_type = 0;
598 for (i=0; i<overlap_entries; i++)
599 if (overlap_list[i]->type > current_type)
600 current_type = overlap_list[i]->type;
601 /* continue building up new bios map based on this information */
602 if (current_type != last_type) {
603 if (last_type != 0) {
604 new_bios[new_bios_entry].size =
605 change_point[chgidx]->addr - last_addr;
606 /* move forward only if the new size was non-zero */
607 if (new_bios[new_bios_entry].size != 0)
608 if (++new_bios_entry >= E820MAX)
609 break; /* no more space left for new bios entries */
610 }
611 if (current_type != 0) {
612 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
613 new_bios[new_bios_entry].type = current_type;
614 last_addr=change_point[chgidx]->addr;
615 }
616 last_type = current_type;
617 }
618 }
619 new_nr = new_bios_entry; /* retain count for new bios entries */
620
621 /* copy new bios mapping into original location */
622 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
623 *pnr_map = new_nr;
624
625 return 0;
626}
627
628/*
629 * Copy the BIOS e820 map into a safe place.
630 *
631 * Sanity-check it while we're at it..
632 *
633 * If we're lucky and live on a modern system, the setup code
634 * will have given us a memory map that we can use to properly
635 * set up memory. If we aren't, we'll fake a memory map.
636 *
637 * We check to see that the memory map contains at least 2 elements
638 * before we'll use it, because the detection code in setup.S may
639 * not be perfect and most every PC known to man has two memory
640 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
641 * thinkpad 560x, for example, does not cooperate with the memory
642 * detection code.)
643 */
644static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
645{
646 /* Only one memory region (or negative)? Ignore it */
647 if (nr_map < 2)
648 return -1;
649
650 do {
651 unsigned long long start = biosmap->addr;
652 unsigned long long size = biosmap->size;
653 unsigned long long end = start + size;
654 unsigned long type = biosmap->type;
655
656 /* Overflow in 64 bits? Ignore the memory map. */
657 if (start > end)
658 return -1;
659
660 /*
661 * Some BIOSes claim RAM in the 640k - 1M region.
662 * Not right. Fix it up.
663 */
664 if (type == E820_RAM) {
665 if (start < 0x100000ULL && end > 0xA0000ULL) {
666 if (start < 0xA0000ULL)
667 add_memory_region(start, 0xA0000ULL-start, type);
668 if (end <= 0x100000ULL)
669 continue;
670 start = 0x100000ULL;
671 size = end - start;
672 }
673 }
674 add_memory_region(start, size, type);
675 } while (biosmap++,--nr_map);
676 return 0;
677}
678
679#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
680struct edd edd;
681#ifdef CONFIG_EDD_MODULE
682EXPORT_SYMBOL(edd);
683#endif
684/**
685 * copy_edd() - Copy the BIOS EDD information
686 * from boot_params into a safe place.
687 *
688 */
689static inline void copy_edd(void)
690{
691 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
692 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
693 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
694 edd.edd_info_nr = EDD_NR;
695}
696#else
697static inline void copy_edd(void)
698{
699}
700#endif
701
702/*
703 * Do NOT EVER look at the BIOS memory size location.
704 * It does not work on many machines.
705 */
706#define LOWMEMSIZE() (0x9f000)
707
708static void __init parse_cmdline_early (char ** cmdline_p)
709{
710 char c = ' ', *to = command_line, *from = saved_command_line;
711 int len = 0;
712 int userdef = 0;
713
714 /* Save unparsed command line copy for /proc/cmdline */
715 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
716
717 for (;;) {
718 if (c != ' ')
719 goto next_char;
720 /*
721 * "mem=nopentium" disables the 4MB page tables.
722 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
723 * to <mem>, overriding the bios size.
724 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
725 * <start> to <start>+<mem>, overriding the bios size.
726 *
727 * HPA tells me bootloaders need to parse mem=, so no new
728 * option should be mem= [also see Documentation/i386/boot.txt]
729 */
730 if (!memcmp(from, "mem=", 4)) {
731 if (to != command_line)
732 to--;
733 if (!memcmp(from+4, "nopentium", 9)) {
734 from += 9+4;
735 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
736 disable_pse = 1;
737 } else {
738 /* If the user specifies memory size, we
739 * limit the BIOS-provided memory map to
740 * that size. exactmap can be used to specify
741 * the exact map. mem=number can be used to
742 * trim the existing memory map.
743 */
744 unsigned long long mem_size;
745
746 mem_size = memparse(from+4, &from);
747 limit_regions(mem_size);
748 userdef=1;
749 }
750 }
751
752 else if (!memcmp(from, "memmap=", 7)) {
753 if (to != command_line)
754 to--;
755 if (!memcmp(from+7, "exactmap", 8)) {
Vivek Goyal92aa63a2005-06-25 14:58:18 -0700756#ifdef CONFIG_CRASH_DUMP
757 /* If we are doing a crash dump, we
758 * still need to know the real mem
759 * size before original memory map is
760 * reset.
761 */
762 find_max_pfn();
763 saved_max_pfn = max_pfn;
764#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 from += 8+7;
766 e820.nr_map = 0;
767 userdef = 1;
768 } else {
769 /* If the user specifies memory size, we
770 * limit the BIOS-provided memory map to
771 * that size. exactmap can be used to specify
772 * the exact map. mem=number can be used to
773 * trim the existing memory map.
774 */
775 unsigned long long start_at, mem_size;
776
777 mem_size = memparse(from+7, &from);
778 if (*from == '@') {
779 start_at = memparse(from+1, &from);
780 add_memory_region(start_at, mem_size, E820_RAM);
781 } else if (*from == '#') {
782 start_at = memparse(from+1, &from);
783 add_memory_region(start_at, mem_size, E820_ACPI);
784 } else if (*from == '$') {
785 start_at = memparse(from+1, &from);
786 add_memory_region(start_at, mem_size, E820_RESERVED);
787 } else {
788 limit_regions(mem_size);
789 userdef=1;
790 }
791 }
792 }
793
794 else if (!memcmp(from, "noexec=", 7))
795 noexec_setup(from + 7);
796
797
798#ifdef CONFIG_X86_SMP
799 /*
800 * If the BIOS enumerates physical processors before logical,
801 * maxcpus=N at enumeration-time can be used to disable HT.
802 */
803 else if (!memcmp(from, "maxcpus=", 8)) {
804 extern unsigned int maxcpus;
805
806 maxcpus = simple_strtoul(from + 8, NULL, 0);
807 }
808#endif
809
Len Brown888ba6c2005-08-24 12:07:20 -0400810#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 /* "acpi=off" disables both ACPI table parsing and interpreter */
812 else if (!memcmp(from, "acpi=off", 8)) {
813 disable_acpi();
814 }
815
816 /* acpi=force to over-ride black-list */
817 else if (!memcmp(from, "acpi=force", 10)) {
818 acpi_force = 1;
819 acpi_ht = 1;
820 acpi_disabled = 0;
821 }
822
823 /* acpi=strict disables out-of-spec workarounds */
824 else if (!memcmp(from, "acpi=strict", 11)) {
825 acpi_strict = 1;
826 }
827
828 /* Limit ACPI just to boot-time to enable HT */
829 else if (!memcmp(from, "acpi=ht", 7)) {
830 if (!acpi_force)
831 disable_acpi();
832 acpi_ht = 1;
833 }
834
835 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
836 else if (!memcmp(from, "pci=noacpi", 10)) {
837 acpi_disable_pci();
838 }
839 /* "acpi=noirq" disables ACPI interrupt routing */
840 else if (!memcmp(from, "acpi=noirq", 10)) {
841 acpi_noirq_set();
842 }
843
844 else if (!memcmp(from, "acpi_sci=edge", 13))
845 acpi_sci_flags.trigger = 1;
846
847 else if (!memcmp(from, "acpi_sci=level", 14))
848 acpi_sci_flags.trigger = 3;
849
850 else if (!memcmp(from, "acpi_sci=high", 13))
851 acpi_sci_flags.polarity = 1;
852
853 else if (!memcmp(from, "acpi_sci=low", 12))
854 acpi_sci_flags.polarity = 3;
855
856#ifdef CONFIG_X86_IO_APIC
857 else if (!memcmp(from, "acpi_skip_timer_override", 24))
858 acpi_skip_timer_override = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859
Chuck Ebbert66759a02005-09-12 18:49:25 +0200860 if (!memcmp(from, "disable_timer_pin_1", 19))
861 disable_timer_pin_1 = 1;
862 if (!memcmp(from, "enable_timer_pin_1", 18))
863 disable_timer_pin_1 = -1;
864
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 /* disable IO-APIC */
866 else if (!memcmp(from, "noapic", 6))
867 disable_ioapic_setup();
Cal Peake0a305d22005-09-13 02:28:07 -0400868#endif /* CONFIG_X86_IO_APIC */
Len Brown888ba6c2005-08-24 12:07:20 -0400869#endif /* CONFIG_ACPI */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870
Eric W. Biederman9635b472005-06-25 14:57:41 -0700871#ifdef CONFIG_X86_LOCAL_APIC
872 /* enable local APIC */
873 else if (!memcmp(from, "lapic", 5))
874 lapic_enable();
875
876 /* disable local APIC */
877 else if (!memcmp(from, "nolapic", 6))
878 lapic_disable();
879#endif /* CONFIG_X86_LOCAL_APIC */
880
Eric W. Biederman1bc3b912005-06-25 14:58:01 -0700881#ifdef CONFIG_KEXEC
882 /* crashkernel=size@addr specifies the location to reserve for
883 * a crash kernel. By reserving this memory we guarantee
884 * that linux never set's it up as a DMA target.
885 * Useful for holding code to do something appropriate
886 * after a kernel panic.
887 */
888 else if (!memcmp(from, "crashkernel=", 12)) {
889 unsigned long size, base;
890 size = memparse(from+12, &from);
891 if (*from == '@') {
892 base = memparse(from+1, &from);
893 /* FIXME: Do I want a sanity check
894 * to validate the memory range?
895 */
896 crashk_res.start = base;
897 crashk_res.end = base + size - 1;
898 }
899 }
900#endif
Vivek Goyalaac04b32006-01-09 20:51:47 -0800901#ifdef CONFIG_PROC_VMCORE
Vivek Goyal2030eae2005-06-25 14:58:20 -0700902 /* elfcorehdr= specifies the location of elf core header
903 * stored by the crashed kernel.
904 */
905 else if (!memcmp(from, "elfcorehdr=", 11))
906 elfcorehdr_addr = memparse(from+11, &from);
907#endif
Eric W. Biederman1bc3b912005-06-25 14:58:01 -0700908
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 /*
910 * highmem=size forces highmem to be exactly 'size' bytes.
911 * This works even on boxes that have no highmem otherwise.
912 * This also works to reduce highmem size on bigger boxes.
913 */
914 else if (!memcmp(from, "highmem=", 8))
915 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
916
917 /*
918 * vmalloc=size forces the vmalloc area to be exactly 'size'
919 * bytes. This can be used to increase (or decrease) the
920 * vmalloc area - the default is 128m.
921 */
922 else if (!memcmp(from, "vmalloc=", 8))
923 __VMALLOC_RESERVE = memparse(from+8, &from);
924
925 next_char:
926 c = *(from++);
927 if (!c)
928 break;
929 if (COMMAND_LINE_SIZE <= ++len)
930 break;
931 *(to++) = c;
932 }
933 *to = '\0';
934 *cmdline_p = command_line;
935 if (userdef) {
936 printk(KERN_INFO "user-defined physical RAM map:\n");
937 print_memory_map("user");
938 }
939}
940
941/*
942 * Callback for efi_memory_walk.
943 */
944static int __init
945efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
946{
947 unsigned long *max_pfn = arg, pfn;
948
949 if (start < end) {
950 pfn = PFN_UP(end -1);
951 if (pfn > *max_pfn)
952 *max_pfn = pfn;
953 }
954 return 0;
955}
956
Andy Whitcroft215c3402006-01-06 00:12:06 -0800957static int __init
958efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
959{
960 memory_present(0, start, end);
961 return 0;
962}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963
964/*
965 * Find the highest page frame number we have available
966 */
967void __init find_max_pfn(void)
968{
969 int i;
970
971 max_pfn = 0;
972 if (efi_enabled) {
973 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
Andy Whitcroft215c3402006-01-06 00:12:06 -0800974 efi_memmap_walk(efi_memory_present_wrapper, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 return;
976 }
977
978 for (i = 0; i < e820.nr_map; i++) {
979 unsigned long start, end;
980 /* RAM? */
981 if (e820.map[i].type != E820_RAM)
982 continue;
983 start = PFN_UP(e820.map[i].addr);
984 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
985 if (start >= end)
986 continue;
987 if (end > max_pfn)
988 max_pfn = end;
Andy Whitcroft215c3402006-01-06 00:12:06 -0800989 memory_present(0, start, end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 }
991}
992
993/*
994 * Determine low and high memory ranges:
995 */
996unsigned long __init find_max_low_pfn(void)
997{
998 unsigned long max_low_pfn;
999
1000 max_low_pfn = max_pfn;
1001 if (max_low_pfn > MAXMEM_PFN) {
1002 if (highmem_pages == -1)
1003 highmem_pages = max_pfn - MAXMEM_PFN;
1004 if (highmem_pages + MAXMEM_PFN < max_pfn)
1005 max_pfn = MAXMEM_PFN + highmem_pages;
1006 if (highmem_pages + MAXMEM_PFN > max_pfn) {
1007 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
1008 highmem_pages = 0;
1009 }
1010 max_low_pfn = MAXMEM_PFN;
1011#ifndef CONFIG_HIGHMEM
1012 /* Maximum memory usable is what is directly addressable */
1013 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
1014 MAXMEM>>20);
1015 if (max_pfn > MAX_NONPAE_PFN)
1016 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1017 else
1018 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
1019 max_pfn = MAXMEM_PFN;
1020#else /* !CONFIG_HIGHMEM */
1021#ifndef CONFIG_X86_PAE
1022 if (max_pfn > MAX_NONPAE_PFN) {
1023 max_pfn = MAX_NONPAE_PFN;
1024 printk(KERN_WARNING "Warning only 4GB will be used.\n");
1025 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1026 }
1027#endif /* !CONFIG_X86_PAE */
1028#endif /* !CONFIG_HIGHMEM */
1029 } else {
1030 if (highmem_pages == -1)
1031 highmem_pages = 0;
1032#ifdef CONFIG_HIGHMEM
1033 if (highmem_pages >= max_pfn) {
1034 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1035 highmem_pages = 0;
1036 }
1037 if (highmem_pages) {
1038 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1039 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1040 highmem_pages = 0;
1041 }
1042 max_low_pfn -= highmem_pages;
1043 }
1044#else
1045 if (highmem_pages)
1046 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1047#endif
1048 }
1049 return max_low_pfn;
1050}
1051
1052/*
1053 * Free all available memory for boot time allocation. Used
1054 * as a callback function by efi_memory_walk()
1055 */
1056
1057static int __init
1058free_available_memory(unsigned long start, unsigned long end, void *arg)
1059{
1060 /* check max_low_pfn */
Tolentino, Matthew E23dd8422006-03-26 01:37:09 -08001061 if (start >= (max_low_pfn << PAGE_SHIFT))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 return 0;
Tolentino, Matthew E23dd8422006-03-26 01:37:09 -08001063 if (end >= (max_low_pfn << PAGE_SHIFT))
1064 end = max_low_pfn << PAGE_SHIFT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 if (start < end)
1066 free_bootmem(start, end - start);
1067
1068 return 0;
1069}
1070/*
1071 * Register fully available low RAM pages with the bootmem allocator.
1072 */
1073static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1074{
1075 int i;
1076
1077 if (efi_enabled) {
1078 efi_memmap_walk(free_available_memory, NULL);
1079 return;
1080 }
1081 for (i = 0; i < e820.nr_map; i++) {
1082 unsigned long curr_pfn, last_pfn, size;
1083 /*
1084 * Reserve usable low memory
1085 */
1086 if (e820.map[i].type != E820_RAM)
1087 continue;
1088 /*
1089 * We are rounding up the start address of usable memory:
1090 */
1091 curr_pfn = PFN_UP(e820.map[i].addr);
1092 if (curr_pfn >= max_low_pfn)
1093 continue;
1094 /*
1095 * ... and at the end of the usable range downwards:
1096 */
1097 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1098
1099 if (last_pfn > max_low_pfn)
1100 last_pfn = max_low_pfn;
1101
1102 /*
1103 * .. finally, did all the rounding and playing
1104 * around just make the area go away?
1105 */
1106 if (last_pfn <= curr_pfn)
1107 continue;
1108
1109 size = last_pfn - curr_pfn;
1110 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1111 }
1112}
1113
1114/*
1115 * workaround for Dell systems that neglect to reserve EBDA
1116 */
1117static void __init reserve_ebda_region(void)
1118{
1119 unsigned int addr;
1120 addr = get_bios_ebda();
1121 if (addr)
1122 reserve_bootmem(addr, PAGE_SIZE);
1123}
1124
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001125#ifndef CONFIG_NEED_MULTIPLE_NODES
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126void __init setup_bootmem_allocator(void);
1127static unsigned long __init setup_memory(void)
1128{
1129 /*
1130 * partially used pages are not usable - thus
1131 * we are rounding upwards:
1132 */
1133 min_low_pfn = PFN_UP(init_pg_tables_end);
1134
1135 find_max_pfn();
1136
1137 max_low_pfn = find_max_low_pfn();
1138
1139#ifdef CONFIG_HIGHMEM
1140 highstart_pfn = highend_pfn = max_pfn;
1141 if (max_pfn > max_low_pfn) {
1142 highstart_pfn = max_low_pfn;
1143 }
1144 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1145 pages_to_mb(highend_pfn - highstart_pfn));
1146#endif
1147 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1148 pages_to_mb(max_low_pfn));
1149
1150 setup_bootmem_allocator();
1151
1152 return max_low_pfn;
1153}
1154
1155void __init zone_sizes_init(void)
1156{
1157 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1158 unsigned int max_dma, low;
1159
1160 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1161 low = max_low_pfn;
1162
1163 if (low < max_dma)
1164 zones_size[ZONE_DMA] = low;
1165 else {
1166 zones_size[ZONE_DMA] = max_dma;
1167 zones_size[ZONE_NORMAL] = low - max_dma;
1168#ifdef CONFIG_HIGHMEM
1169 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1170#endif
1171 }
1172 free_area_init(zones_size);
1173}
1174#else
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001175extern unsigned long __init setup_memory(void);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176extern void zone_sizes_init(void);
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001177#endif /* !CONFIG_NEED_MULTIPLE_NODES */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178
1179void __init setup_bootmem_allocator(void)
1180{
1181 unsigned long bootmap_size;
1182 /*
1183 * Initialize the boot-time allocator (with low memory only):
1184 */
1185 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1186
1187 register_bootmem_low_pages(max_low_pfn);
1188
1189 /*
1190 * Reserve the bootmem bitmap itself as well. We do this in two
1191 * steps (first step was init_bootmem()) because this catches
1192 * the (very unlikely) case of us accidentally initializing the
1193 * bootmem allocator with an invalid RAM area.
1194 */
Vivek Goyal8a919082005-06-25 14:57:51 -07001195 reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1196 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197
1198 /*
1199 * reserve physical page 0 - it's a special BIOS page on many boxes,
1200 * enabling clean reboots, SMP operation, laptop functions.
1201 */
1202 reserve_bootmem(0, PAGE_SIZE);
1203
1204 /* reserve EBDA region, it's a 4K region */
1205 reserve_ebda_region();
1206
1207 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1208 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1209 unless you have no PS/2 mouse plugged in. */
1210 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1211 boot_cpu_data.x86 == 6)
1212 reserve_bootmem(0xa0000 - 4096, 4096);
1213
1214#ifdef CONFIG_SMP
1215 /*
1216 * But first pinch a few for the stack/trampoline stuff
1217 * FIXME: Don't need the extra page at 4K, but need to fix
1218 * trampoline before removing it. (see the GDT stuff)
1219 */
1220 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1221#endif
1222#ifdef CONFIG_ACPI_SLEEP
1223 /*
1224 * Reserve low memory region for sleep support.
1225 */
1226 acpi_reserve_bootmem();
1227#endif
1228#ifdef CONFIG_X86_FIND_SMP_CONFIG
1229 /*
1230 * Find and reserve possible boot-time SMP configuration:
1231 */
1232 find_smp_config();
1233#endif
1234
1235#ifdef CONFIG_BLK_DEV_INITRD
1236 if (LOADER_TYPE && INITRD_START) {
1237 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1238 reserve_bootmem(INITRD_START, INITRD_SIZE);
1239 initrd_start =
1240 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1241 initrd_end = initrd_start+INITRD_SIZE;
1242 }
1243 else {
1244 printk(KERN_ERR "initrd extends beyond end of memory "
1245 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1246 INITRD_START + INITRD_SIZE,
1247 max_low_pfn << PAGE_SHIFT);
1248 initrd_start = 0;
1249 }
1250 }
1251#endif
Eric W. Biederman1bc3b912005-06-25 14:58:01 -07001252#ifdef CONFIG_KEXEC
1253 if (crashk_res.start != crashk_res.end)
1254 reserve_bootmem(crashk_res.start,
1255 crashk_res.end - crashk_res.start + 1);
1256#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257}
1258
1259/*
1260 * The node 0 pgdat is initialized before all of these because
1261 * it's needed for bootmem. node>0 pgdats have their virtual
1262 * space allocated before the pagetables are in place to access
1263 * them, so they can't be cleared then.
1264 *
1265 * This should all compile down to nothing when NUMA is off.
1266 */
1267void __init remapped_pgdat_init(void)
1268{
1269 int nid;
1270
1271 for_each_online_node(nid) {
1272 if (nid != 0)
1273 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1274 }
1275}
1276
1277/*
1278 * Request address space for all standard RAM and ROM resources
1279 * and also for regions reported as reserved by the e820.
1280 */
1281static void __init
1282legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1283{
1284 int i;
1285
1286 probe_roms();
1287 for (i = 0; i < e820.nr_map; i++) {
1288 struct resource *res;
Linus Torvaldsb408cbc2006-02-22 15:50:30 -08001289 res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 switch (e820.map[i].type) {
1291 case E820_RAM: res->name = "System RAM"; break;
1292 case E820_ACPI: res->name = "ACPI Tables"; break;
1293 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1294 default: res->name = "reserved";
1295 }
1296 res->start = e820.map[i].addr;
1297 res->end = res->start + e820.map[i].size - 1;
1298 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1299 request_resource(&iomem_resource, res);
1300 if (e820.map[i].type == E820_RAM) {
1301 /*
1302 * We don't know which RAM region contains kernel data,
1303 * so we try it repeatedly and let the resource manager
1304 * test it.
1305 */
1306 request_resource(res, code_resource);
1307 request_resource(res, data_resource);
Eric W. Biederman1bc3b912005-06-25 14:58:01 -07001308#ifdef CONFIG_KEXEC
1309 request_resource(res, &crashk_res);
1310#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311 }
1312 }
1313}
1314
1315/*
1316 * Request address space for all standard resources
Linus Torvaldsb408cbc2006-02-22 15:50:30 -08001317 *
1318 * This is called just before pcibios_assign_resources(), which is also
1319 * an fs_initcall, but is linked in later (in arch/i386/pci/i386.c).
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 */
Linus Torvaldsb408cbc2006-02-22 15:50:30 -08001321static int __init request_standard_resources(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322{
Linus Torvaldsb408cbc2006-02-22 15:50:30 -08001323 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324
Linus Torvaldsb408cbc2006-02-22 15:50:30 -08001325 printk("Setting up standard PCI resources\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326 if (efi_enabled)
1327 efi_initialize_iomem_resources(&code_resource, &data_resource);
1328 else
1329 legacy_init_iomem_resources(&code_resource, &data_resource);
1330
1331 /* EFI systems may still have VGA */
1332 request_resource(&iomem_resource, &video_ram_resource);
1333
1334 /* request I/O space for devices used on all i[345]86 PCs */
1335 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1336 request_resource(&ioport_resource, &standard_io_resources[i]);
Linus Torvaldsb408cbc2006-02-22 15:50:30 -08001337 return 0;
1338}
1339
1340fs_initcall(request_standard_resources);
1341
1342static void __init register_memory(void)
1343{
1344 unsigned long gapstart, gapsize, round;
1345 unsigned long long last;
1346 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347
1348 /*
1349 * Search for the bigest gap in the low 32 bits of the e820
1350 * memory space.
1351 */
1352 last = 0x100000000ull;
1353 gapstart = 0x10000000;
1354 gapsize = 0x400000;
1355 i = e820.nr_map;
1356 while (--i >= 0) {
1357 unsigned long long start = e820.map[i].addr;
1358 unsigned long long end = start + e820.map[i].size;
1359
1360 /*
1361 * Since "last" is at most 4GB, we know we'll
1362 * fit in 32 bits if this condition is true
1363 */
1364 if (last > end) {
1365 unsigned long gap = last - end;
1366
1367 if (gap > gapsize) {
1368 gapsize = gap;
1369 gapstart = end;
1370 }
1371 }
1372 if (start < last)
1373 last = start;
1374 }
1375
1376 /*
Daniel Ritzf0eca962005-09-09 00:57:14 +02001377 * See how much we want to round up: start off with
1378 * rounding to the next 1MB area.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 */
Daniel Ritzf0eca962005-09-09 00:57:14 +02001380 round = 0x100000;
1381 while ((gapsize >> 4) > round)
1382 round += round;
1383 /* Fun with two's complement */
1384 pci_mem_start = (gapstart + round) & -round;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385
1386 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1387 pci_mem_start, gapstart, gapsize);
1388}
1389
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390static char * __init machine_specific_memory_setup(void);
1391
1392#ifdef CONFIG_MCA
1393static void set_mca_bus(int x)
1394{
1395 MCA_bus = x;
1396}
1397#else
1398static void set_mca_bus(int x) { }
1399#endif
1400
1401/*
1402 * Determine if we were loaded by an EFI loader. If so, then we have also been
1403 * passed the efi memmap, systab, etc., so we should use these data structures
1404 * for initialization. Note, the efi init code path is determined by the
1405 * global efi_enabled. This allows the same kernel image to be used on existing
1406 * systems (with a traditional BIOS) as well as on EFI systems.
1407 */
1408void __init setup_arch(char **cmdline_p)
1409{
1410 unsigned long max_low_pfn;
1411
1412 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1413 pre_setup_arch_hook();
1414 early_cpu_init();
1415
1416 /*
1417 * FIXME: This isn't an official loader_type right
1418 * now but does currently work with elilo.
1419 * If we were configured as an EFI kernel, check to make
1420 * sure that we were loaded correctly from elilo and that
1421 * the system table is valid. If not, then initialize normally.
1422 */
1423#ifdef CONFIG_EFI
1424 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1425 efi_enabled = 1;
1426#endif
1427
1428 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1429 drive_info = DRIVE_INFO;
1430 screen_info = SCREEN_INFO;
1431 edid_info = EDID_INFO;
1432 apm_info.bios = APM_BIOS_INFO;
1433 ist_info = IST_INFO;
1434 saved_videomode = VIDEO_MODE;
1435 if( SYS_DESC_TABLE.length != 0 ) {
1436 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1437 machine_id = SYS_DESC_TABLE.table[0];
1438 machine_submodel_id = SYS_DESC_TABLE.table[1];
1439 BIOS_revision = SYS_DESC_TABLE.table[2];
1440 }
1441 bootloader_type = LOADER_TYPE;
1442
1443#ifdef CONFIG_BLK_DEV_RAM
1444 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1445 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1446 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1447#endif
1448 ARCH_SETUP
1449 if (efi_enabled)
1450 efi_init();
1451 else {
1452 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1453 print_memory_map(machine_specific_memory_setup());
1454 }
1455
1456 copy_edd();
1457
1458 if (!MOUNT_ROOT_RDONLY)
1459 root_mountflags &= ~MS_RDONLY;
1460 init_mm.start_code = (unsigned long) _text;
1461 init_mm.end_code = (unsigned long) _etext;
1462 init_mm.end_data = (unsigned long) _edata;
1463 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1464
1465 code_resource.start = virt_to_phys(_text);
1466 code_resource.end = virt_to_phys(_etext)-1;
1467 data_resource.start = virt_to_phys(_etext);
1468 data_resource.end = virt_to_phys(_edata)-1;
1469
1470 parse_cmdline_early(cmdline_p);
1471
Stas Sergeev99b7de32006-03-23 02:59:41 -08001472#ifdef CONFIG_EARLY_PRINTK
1473 {
1474 char *s = strstr(*cmdline_p, "earlyprintk=");
1475 if (s) {
1476 setup_early_printk(strchr(s, '=') + 1);
1477 printk("early console enabled\n");
1478 }
1479 }
1480#endif
1481
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 max_low_pfn = setup_memory();
1483
1484 /*
1485 * NOTE: before this point _nobody_ is allowed to allocate
1486 * any memory using the bootmem allocator. Although the
1487 * alloctor is now initialised only the first 8Mb of the kernel
1488 * virtual address space has been mapped. All allocations before
1489 * paging_init() has completed must use the alloc_bootmem_low_pages()
1490 * variant (which allocates DMA'able memory) and care must be taken
1491 * not to exceed the 8Mb limit.
1492 */
1493
1494#ifdef CONFIG_SMP
1495 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1496#endif
1497 paging_init();
1498 remapped_pgdat_init();
Andy Whitcroft05b79bd2005-06-23 00:07:57 -07001499 sparse_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 zone_sizes_init();
1501
1502 /*
1503 * NOTE: at this point the bootmem allocator is fully available.
1504 */
1505
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 dmi_scan_machine();
1507
1508#ifdef CONFIG_X86_GENERICARCH
1509 generic_apic_probe(*cmdline_p);
1510#endif
1511 if (efi_enabled)
1512 efi_map_memmap();
1513
Andi Kleenf9262c12006-03-08 17:57:25 -08001514#ifdef CONFIG_X86_IO_APIC
1515 check_acpi_pci(); /* Checks more than just ACPI actually */
1516#endif
1517
Len Brown888ba6c2005-08-24 12:07:20 -04001518#ifdef CONFIG_ACPI
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 /*
1520 * Parse the ACPI tables for possible boot-time SMP configuration.
1521 */
1522 acpi_boot_table_init();
1523 acpi_boot_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524
Venkatesh Pallipadi911a62d2005-09-03 15:56:31 -07001525#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1526 if (def_to_bigsmp)
1527 printk(KERN_WARNING "More than 8 CPUs detected and "
1528 "CONFIG_X86_PC cannot handle it.\nUse "
1529 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1530#endif
1531#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532#ifdef CONFIG_X86_LOCAL_APIC
1533 if (smp_found_config)
1534 get_smp_config();
1535#endif
1536
1537 register_memory();
1538
1539#ifdef CONFIG_VT
1540#if defined(CONFIG_VGA_CONSOLE)
1541 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1542 conswitchp = &vga_con;
1543#elif defined(CONFIG_DUMMY_CONSOLE)
1544 conswitchp = &dummy_con;
1545#endif
1546#endif
1547}
1548
1549#include "setup_arch_post.h"
1550/*
1551 * Local Variables:
1552 * mode:c
1553 * c-file-style:"k&r"
1554 * c-basic-offset:8
1555 * End:
1556 */