Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) |
| 3 | * Licensed under the GPL |
| 4 | */ |
| 5 | |
| 6 | #include "linux/mm.h" |
| 7 | #include "linux/rbtree.h" |
| 8 | #include "linux/slab.h" |
| 9 | #include "linux/vmalloc.h" |
| 10 | #include "linux/bootmem.h" |
| 11 | #include "linux/module.h" |
Dave Hansen | 22a9835 | 2006-03-27 01:16:04 -0800 | [diff] [blame^] | 12 | #include "linux/pfn.h" |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 13 | #include "asm/types.h" |
| 14 | #include "asm/pgtable.h" |
| 15 | #include "kern_util.h" |
| 16 | #include "user_util.h" |
| 17 | #include "mode_kern.h" |
| 18 | #include "mem.h" |
| 19 | #include "mem_user.h" |
| 20 | #include "os.h" |
| 21 | #include "kern.h" |
| 22 | #include "init.h" |
| 23 | |
| 24 | struct phys_desc { |
| 25 | struct rb_node rb; |
| 26 | int fd; |
| 27 | __u64 offset; |
| 28 | void *virt; |
| 29 | unsigned long phys; |
| 30 | struct list_head list; |
| 31 | }; |
| 32 | |
| 33 | static struct rb_root phys_mappings = RB_ROOT; |
| 34 | |
| 35 | static struct rb_node **find_rb(void *virt) |
| 36 | { |
| 37 | struct rb_node **n = &phys_mappings.rb_node; |
| 38 | struct phys_desc *d; |
| 39 | |
| 40 | while(*n != NULL){ |
| 41 | d = rb_entry(*n, struct phys_desc, rb); |
| 42 | if(d->virt == virt) |
| 43 | return(n); |
| 44 | |
| 45 | if(d->virt > virt) |
| 46 | n = &(*n)->rb_left; |
| 47 | else |
| 48 | n = &(*n)->rb_right; |
| 49 | } |
| 50 | |
| 51 | return(n); |
| 52 | } |
| 53 | |
| 54 | static struct phys_desc *find_phys_mapping(void *virt) |
| 55 | { |
| 56 | struct rb_node **n = find_rb(virt); |
| 57 | |
| 58 | if(*n == NULL) |
| 59 | return(NULL); |
| 60 | |
| 61 | return(rb_entry(*n, struct phys_desc, rb)); |
| 62 | } |
| 63 | |
| 64 | static void insert_phys_mapping(struct phys_desc *desc) |
| 65 | { |
| 66 | struct rb_node **n = find_rb(desc->virt); |
| 67 | |
| 68 | if(*n != NULL) |
| 69 | panic("Physical remapping for %p already present", |
| 70 | desc->virt); |
| 71 | |
| 72 | rb_link_node(&desc->rb, (*n)->rb_parent, n); |
| 73 | rb_insert_color(&desc->rb, &phys_mappings); |
| 74 | } |
| 75 | |
| 76 | LIST_HEAD(descriptor_mappings); |
| 77 | |
| 78 | struct desc_mapping { |
| 79 | int fd; |
| 80 | struct list_head list; |
| 81 | struct list_head pages; |
| 82 | }; |
| 83 | |
| 84 | static struct desc_mapping *find_mapping(int fd) |
| 85 | { |
| 86 | struct desc_mapping *desc; |
| 87 | struct list_head *ele; |
| 88 | |
| 89 | list_for_each(ele, &descriptor_mappings){ |
| 90 | desc = list_entry(ele, struct desc_mapping, list); |
| 91 | if(desc->fd == fd) |
| 92 | return(desc); |
| 93 | } |
| 94 | |
| 95 | return(NULL); |
| 96 | } |
| 97 | |
| 98 | static struct desc_mapping *descriptor_mapping(int fd) |
| 99 | { |
| 100 | struct desc_mapping *desc; |
| 101 | |
| 102 | desc = find_mapping(fd); |
| 103 | if(desc != NULL) |
| 104 | return(desc); |
| 105 | |
| 106 | desc = kmalloc(sizeof(*desc), GFP_ATOMIC); |
| 107 | if(desc == NULL) |
| 108 | return(NULL); |
| 109 | |
| 110 | *desc = ((struct desc_mapping) |
| 111 | { .fd = fd, |
| 112 | .list = LIST_HEAD_INIT(desc->list), |
| 113 | .pages = LIST_HEAD_INIT(desc->pages) }); |
| 114 | list_add(&desc->list, &descriptor_mappings); |
| 115 | |
| 116 | return(desc); |
| 117 | } |
| 118 | |
| 119 | int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w) |
| 120 | { |
| 121 | struct desc_mapping *fd_maps; |
| 122 | struct phys_desc *desc; |
| 123 | unsigned long phys; |
| 124 | int err; |
| 125 | |
| 126 | fd_maps = descriptor_mapping(fd); |
| 127 | if(fd_maps == NULL) |
| 128 | return(-ENOMEM); |
| 129 | |
| 130 | phys = __pa(virt); |
| 131 | desc = find_phys_mapping(virt); |
| 132 | if(desc != NULL) |
| 133 | panic("Address 0x%p is already substituted\n", virt); |
| 134 | |
| 135 | err = -ENOMEM; |
| 136 | desc = kmalloc(sizeof(*desc), GFP_ATOMIC); |
| 137 | if(desc == NULL) |
| 138 | goto out; |
| 139 | |
| 140 | *desc = ((struct phys_desc) |
| 141 | { .fd = fd, |
| 142 | .offset = offset, |
| 143 | .virt = virt, |
| 144 | .phys = __pa(virt), |
| 145 | .list = LIST_HEAD_INIT(desc->list) }); |
| 146 | insert_phys_mapping(desc); |
| 147 | |
| 148 | list_add(&desc->list, &fd_maps->pages); |
| 149 | |
| 150 | virt = (void *) ((unsigned long) virt & PAGE_MASK); |
| 151 | err = os_map_memory(virt, fd, offset, PAGE_SIZE, 1, w, 0); |
| 152 | if(!err) |
| 153 | goto out; |
| 154 | |
| 155 | rb_erase(&desc->rb, &phys_mappings); |
| 156 | kfree(desc); |
| 157 | out: |
| 158 | return(err); |
| 159 | } |
| 160 | |
| 161 | static int physmem_fd = -1; |
| 162 | |
| 163 | static void remove_mapping(struct phys_desc *desc) |
| 164 | { |
| 165 | void *virt = desc->virt; |
| 166 | int err; |
| 167 | |
| 168 | rb_erase(&desc->rb, &phys_mappings); |
| 169 | list_del(&desc->list); |
| 170 | kfree(desc); |
| 171 | |
| 172 | err = os_map_memory(virt, physmem_fd, __pa(virt), PAGE_SIZE, 1, 1, 0); |
| 173 | if(err) |
| 174 | panic("Failed to unmap block device page from physical memory, " |
| 175 | "errno = %d", -err); |
| 176 | } |
| 177 | |
| 178 | int physmem_remove_mapping(void *virt) |
| 179 | { |
| 180 | struct phys_desc *desc; |
| 181 | |
| 182 | virt = (void *) ((unsigned long) virt & PAGE_MASK); |
| 183 | desc = find_phys_mapping(virt); |
| 184 | if(desc == NULL) |
| 185 | return(0); |
| 186 | |
| 187 | remove_mapping(desc); |
| 188 | return(1); |
| 189 | } |
| 190 | |
| 191 | void physmem_forget_descriptor(int fd) |
| 192 | { |
| 193 | struct desc_mapping *desc; |
| 194 | struct phys_desc *page; |
| 195 | struct list_head *ele, *next; |
| 196 | __u64 offset; |
| 197 | void *addr; |
| 198 | int err; |
| 199 | |
| 200 | desc = find_mapping(fd); |
| 201 | if(desc == NULL) |
| 202 | return; |
| 203 | |
| 204 | list_for_each_safe(ele, next, &desc->pages){ |
| 205 | page = list_entry(ele, struct phys_desc, list); |
| 206 | offset = page->offset; |
| 207 | addr = page->virt; |
| 208 | remove_mapping(page); |
| 209 | err = os_seek_file(fd, offset); |
| 210 | if(err) |
| 211 | panic("physmem_forget_descriptor - failed to seek " |
| 212 | "to %lld in fd %d, error = %d\n", |
| 213 | offset, fd, -err); |
| 214 | err = os_read_file(fd, addr, PAGE_SIZE); |
| 215 | if(err < 0) |
| 216 | panic("physmem_forget_descriptor - failed to read " |
| 217 | "from fd %d to 0x%p, error = %d\n", |
| 218 | fd, addr, -err); |
| 219 | } |
| 220 | |
| 221 | list_del(&desc->list); |
| 222 | kfree(desc); |
| 223 | } |
| 224 | |
| 225 | EXPORT_SYMBOL(physmem_forget_descriptor); |
| 226 | EXPORT_SYMBOL(physmem_remove_mapping); |
| 227 | EXPORT_SYMBOL(physmem_subst_mapping); |
| 228 | |
| 229 | void arch_free_page(struct page *page, int order) |
| 230 | { |
| 231 | void *virt; |
| 232 | int i; |
| 233 | |
| 234 | for(i = 0; i < (1 << order); i++){ |
| 235 | virt = __va(page_to_phys(page + i)); |
| 236 | physmem_remove_mapping(virt); |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | int is_remapped(void *virt) |
| 241 | { |
| 242 | struct phys_desc *desc = find_phys_mapping(virt); |
| 243 | |
| 244 | return(desc != NULL); |
| 245 | } |
| 246 | |
| 247 | /* Changed during early boot */ |
| 248 | unsigned long high_physmem; |
| 249 | |
Jeff Dike | ae17381 | 2005-11-07 00:58:57 -0800 | [diff] [blame] | 250 | extern unsigned long long physmem_size; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 251 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 252 | int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) |
| 253 | { |
| 254 | struct page *p, *map; |
| 255 | unsigned long phys_len, phys_pages, highmem_len, highmem_pages; |
| 256 | unsigned long iomem_len, iomem_pages, total_len, total_pages; |
| 257 | int i; |
| 258 | |
| 259 | phys_pages = physmem >> PAGE_SHIFT; |
| 260 | phys_len = phys_pages * sizeof(struct page); |
| 261 | |
| 262 | iomem_pages = iomem >> PAGE_SHIFT; |
| 263 | iomem_len = iomem_pages * sizeof(struct page); |
| 264 | |
| 265 | highmem_pages = highmem >> PAGE_SHIFT; |
| 266 | highmem_len = highmem_pages * sizeof(struct page); |
| 267 | |
| 268 | total_pages = phys_pages + iomem_pages + highmem_pages; |
Paolo 'Blaisorblade' Giarrusso | 3dfd95b | 2006-02-01 03:06:26 -0800 | [diff] [blame] | 269 | total_len = phys_len + iomem_len + highmem_len; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 270 | |
| 271 | if(kmalloc_ok){ |
| 272 | map = kmalloc(total_len, GFP_KERNEL); |
| 273 | if(map == NULL) |
| 274 | map = vmalloc(total_len); |
| 275 | } |
| 276 | else map = alloc_bootmem_low_pages(total_len); |
| 277 | |
| 278 | if(map == NULL) |
| 279 | return(-ENOMEM); |
| 280 | |
| 281 | for(i = 0; i < total_pages; i++){ |
| 282 | p = &map[i]; |
Nick Piggin | 70dc991 | 2006-03-22 00:08:35 -0800 | [diff] [blame] | 283 | memset(p, 0, sizeof(struct page)); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 284 | SetPageReserved(p); |
| 285 | INIT_LIST_HEAD(&p->lru); |
| 286 | } |
| 287 | |
| 288 | max_mapnr = total_pages; |
| 289 | return(0); |
| 290 | } |
| 291 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 292 | /* Changed during early boot */ |
| 293 | static unsigned long kmem_top = 0; |
| 294 | |
| 295 | unsigned long get_kmem_end(void) |
| 296 | { |
| 297 | if(kmem_top == 0) |
| 298 | kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); |
| 299 | return(kmem_top); |
| 300 | } |
| 301 | |
| 302 | void map_memory(unsigned long virt, unsigned long phys, unsigned long len, |
| 303 | int r, int w, int x) |
| 304 | { |
| 305 | __u64 offset; |
| 306 | int fd, err; |
| 307 | |
| 308 | fd = phys_mapping(phys, &offset); |
| 309 | err = os_map_memory((void *) virt, fd, offset, len, r, w, x); |
| 310 | if(err) { |
| 311 | if(err == -ENOMEM) |
| 312 | printk("try increasing the host's " |
| 313 | "/proc/sys/vm/max_map_count to <physical " |
| 314 | "memory size>/4096\n"); |
| 315 | panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, " |
| 316 | "err = %d\n", virt, fd, offset, len, r, w, x, err); |
| 317 | } |
| 318 | } |
| 319 | |
Jeff Dike | d67b569 | 2005-07-07 17:56:49 -0700 | [diff] [blame] | 320 | extern int __syscall_stub_start, __binary_start; |
| 321 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 322 | void setup_physmem(unsigned long start, unsigned long reserve_end, |
Jeff Dike | ae17381 | 2005-11-07 00:58:57 -0800 | [diff] [blame] | 323 | unsigned long len, unsigned long long highmem) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 324 | { |
| 325 | unsigned long reserve = reserve_end - start; |
| 326 | int pfn = PFN_UP(__pa(reserve_end)); |
| 327 | int delta = (len - reserve) >> PAGE_SHIFT; |
| 328 | int err, offset, bootmap_size; |
| 329 | |
| 330 | physmem_fd = create_mem_file(len + highmem); |
| 331 | |
| 332 | offset = uml_reserved - uml_physmem; |
| 333 | err = os_map_memory((void *) uml_reserved, physmem_fd, offset, |
| 334 | len - offset, 1, 1, 0); |
| 335 | if(err < 0){ |
| 336 | os_print_error(err, "Mapping memory"); |
| 337 | exit(1); |
| 338 | } |
| 339 | |
Jeff Dike | d67b569 | 2005-07-07 17:56:49 -0700 | [diff] [blame] | 340 | /* Special kludge - This page will be mapped in to userspace processes |
| 341 | * from physmem_fd, so it needs to be written out there. |
| 342 | */ |
| 343 | os_seek_file(physmem_fd, __pa(&__syscall_stub_start)); |
| 344 | os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE); |
| 345 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 346 | bootmap_size = init_bootmem(pfn, pfn + delta); |
| 347 | free_bootmem(__pa(reserve_end) + bootmap_size, |
| 348 | len - bootmap_size - reserve); |
| 349 | } |
| 350 | |
| 351 | int phys_mapping(unsigned long phys, __u64 *offset_out) |
| 352 | { |
| 353 | struct phys_desc *desc = find_phys_mapping(__va(phys & PAGE_MASK)); |
| 354 | int fd = -1; |
| 355 | |
| 356 | if(desc != NULL){ |
| 357 | fd = desc->fd; |
| 358 | *offset_out = desc->offset; |
| 359 | } |
| 360 | else if(phys < physmem_size){ |
| 361 | fd = physmem_fd; |
| 362 | *offset_out = phys; |
| 363 | } |
| 364 | else if(phys < __pa(end_iomem)){ |
| 365 | struct iomem_region *region = iomem_regions; |
| 366 | |
| 367 | while(region != NULL){ |
| 368 | if((phys >= region->phys) && |
| 369 | (phys < region->phys + region->size)){ |
| 370 | fd = region->fd; |
| 371 | *offset_out = phys - region->phys; |
| 372 | break; |
| 373 | } |
| 374 | region = region->next; |
| 375 | } |
| 376 | } |
| 377 | else if(phys < __pa(end_iomem) + highmem){ |
| 378 | fd = physmem_fd; |
| 379 | *offset_out = phys - iomem_size; |
| 380 | } |
| 381 | |
| 382 | return(fd); |
| 383 | } |
| 384 | |
| 385 | static int __init uml_mem_setup(char *line, int *add) |
| 386 | { |
| 387 | char *retptr; |
| 388 | physmem_size = memparse(line,&retptr); |
| 389 | return 0; |
| 390 | } |
| 391 | __uml_setup("mem=", uml_mem_setup, |
| 392 | "mem=<Amount of desired ram>\n" |
| 393 | " This controls how much \"physical\" memory the kernel allocates\n" |
| 394 | " for the system. The size is specified as a number followed by\n" |
| 395 | " one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" |
| 396 | " This is not related to the amount of memory in the host. It can\n" |
| 397 | " be more, and the excess, if it's ever used, will just be swapped out.\n" |
| 398 | " Example: mem=64M\n\n" |
| 399 | ); |
| 400 | |
| 401 | unsigned long find_iomem(char *driver, unsigned long *len_out) |
| 402 | { |
| 403 | struct iomem_region *region = iomem_regions; |
| 404 | |
| 405 | while(region != NULL){ |
| 406 | if(!strcmp(region->driver, driver)){ |
| 407 | *len_out = region->size; |
| 408 | return(region->virt); |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | return(0); |
| 413 | } |
| 414 | |
| 415 | int setup_iomem(void) |
| 416 | { |
| 417 | struct iomem_region *region = iomem_regions; |
| 418 | unsigned long iomem_start = high_physmem + PAGE_SIZE; |
| 419 | int err; |
| 420 | |
| 421 | while(region != NULL){ |
| 422 | err = os_map_memory((void *) iomem_start, region->fd, 0, |
| 423 | region->size, 1, 1, 0); |
| 424 | if(err) |
| 425 | printk("Mapping iomem region for driver '%s' failed, " |
| 426 | "errno = %d\n", region->driver, -err); |
| 427 | else { |
| 428 | region->virt = iomem_start; |
| 429 | region->phys = __pa(region->virt); |
| 430 | } |
| 431 | |
| 432 | iomem_start += region->size + PAGE_SIZE; |
| 433 | region = region->next; |
| 434 | } |
| 435 | |
| 436 | return(0); |
| 437 | } |
| 438 | |
| 439 | __initcall(setup_iomem); |
| 440 | |
| 441 | /* |
| 442 | * Overrides for Emacs so that we follow Linus's tabbing style. |
| 443 | * Emacs will notice this stuff at the end of the file and automatically |
| 444 | * adjust the settings for this buffer only. This must remain at the end |
| 445 | * of the file. |
| 446 | * --------------------------------------------------------------------------- |
| 447 | * Local variables: |
| 448 | * c-file-style: "linux" |
| 449 | * End: |
| 450 | */ |