blob: e93bd63462f01b682903af309f7b73c9efc94bbf [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Generic hugetlb support.
3 * (C) William Irwin, April 2004
4 */
5#include <linux/gfp.h>
6#include <linux/list.h>
7#include <linux/init.h>
8#include <linux/module.h>
9#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070010#include <linux/sysctl.h>
11#include <linux/highmem.h>
12#include <linux/nodemask.h>
David Gibson63551ae2005-06-21 17:14:44 -070013#include <linux/pagemap.h>
14#include <asm/page.h>
15#include <asm/pgtable.h>
16
17#include <linux/hugetlb.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018
19const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
20static unsigned long nr_huge_pages, free_huge_pages;
21unsigned long max_huge_pages;
22static struct list_head hugepage_freelists[MAX_NUMNODES];
23static unsigned int nr_huge_pages_node[MAX_NUMNODES];
24static unsigned int free_huge_pages_node[MAX_NUMNODES];
Eric Paris0bd0f9f2005-11-21 21:32:28 -080025
26/*
27 * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
28 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070029static DEFINE_SPINLOCK(hugetlb_lock);
30
31static void enqueue_huge_page(struct page *page)
32{
33 int nid = page_to_nid(page);
34 list_add(&page->lru, &hugepage_freelists[nid]);
35 free_huge_pages++;
36 free_huge_pages_node[nid]++;
37}
38
39static struct page *dequeue_huge_page(void)
40{
41 int nid = numa_node_id();
42 struct page *page = NULL;
Christoph Lameter96df9332006-01-06 00:10:45 -080043 struct zonelist *zonelist = NODE_DATA(nid)->node_zonelists;
44 struct zone **z;
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
Christoph Lameter96df9332006-01-06 00:10:45 -080046 for (z = zonelist->zones; *z; z++) {
47 nid = (*z)->zone_pgdat->node_id;
48 if (!list_empty(&hugepage_freelists[nid]))
49 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -070050 }
Christoph Lameter96df9332006-01-06 00:10:45 -080051
52 if (*z) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070053 page = list_entry(hugepage_freelists[nid].next,
54 struct page, lru);
55 list_del(&page->lru);
56 free_huge_pages--;
57 free_huge_pages_node[nid]--;
58 }
59 return page;
60}
61
62static struct page *alloc_fresh_huge_page(void)
63{
64 static int nid = 0;
65 struct page *page;
66 page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN,
67 HUGETLB_PAGE_ORDER);
68 nid = (nid + 1) % num_online_nodes();
69 if (page) {
Eric Paris0bd0f9f2005-11-21 21:32:28 -080070 spin_lock(&hugetlb_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 nr_huge_pages++;
72 nr_huge_pages_node[page_to_nid(page)]++;
Eric Paris0bd0f9f2005-11-21 21:32:28 -080073 spin_unlock(&hugetlb_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070074 }
75 return page;
76}
77
78void free_huge_page(struct page *page)
79{
80 BUG_ON(page_count(page));
81
82 INIT_LIST_HEAD(&page->lru);
83 page[1].mapping = NULL;
84
85 spin_lock(&hugetlb_lock);
86 enqueue_huge_page(page);
87 spin_unlock(&hugetlb_lock);
88}
89
90struct page *alloc_huge_page(void)
91{
92 struct page *page;
93 int i;
94
95 spin_lock(&hugetlb_lock);
96 page = dequeue_huge_page();
97 if (!page) {
98 spin_unlock(&hugetlb_lock);
99 return NULL;
100 }
101 spin_unlock(&hugetlb_lock);
102 set_page_count(page, 1);
103 page[1].mapping = (void *)free_huge_page;
104 for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
105 clear_highpage(&page[i]);
106 return page;
107}
108
109static int __init hugetlb_init(void)
110{
111 unsigned long i;
112 struct page *page;
113
Benjamin Herrenschmidt3c726f82005-11-07 11:06:55 +1100114 if (HPAGE_SHIFT == 0)
115 return 0;
116
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117 for (i = 0; i < MAX_NUMNODES; ++i)
118 INIT_LIST_HEAD(&hugepage_freelists[i]);
119
120 for (i = 0; i < max_huge_pages; ++i) {
121 page = alloc_fresh_huge_page();
122 if (!page)
123 break;
124 spin_lock(&hugetlb_lock);
125 enqueue_huge_page(page);
126 spin_unlock(&hugetlb_lock);
127 }
128 max_huge_pages = free_huge_pages = nr_huge_pages = i;
129 printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);
130 return 0;
131}
132module_init(hugetlb_init);
133
134static int __init hugetlb_setup(char *s)
135{
136 if (sscanf(s, "%lu", &max_huge_pages) <= 0)
137 max_huge_pages = 0;
138 return 1;
139}
140__setup("hugepages=", hugetlb_setup);
141
142#ifdef CONFIG_SYSCTL
143static void update_and_free_page(struct page *page)
144{
145 int i;
146 nr_huge_pages--;
147 nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--;
148 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
149 page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
150 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
151 1 << PG_private | 1<< PG_writeback);
152 set_page_count(&page[i], 0);
153 }
154 set_page_count(page, 1);
155 __free_pages(page, HUGETLB_PAGE_ORDER);
156}
157
158#ifdef CONFIG_HIGHMEM
159static void try_to_free_low(unsigned long count)
160{
161 int i, nid;
162 for (i = 0; i < MAX_NUMNODES; ++i) {
163 struct page *page, *next;
164 list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
165 if (PageHighMem(page))
166 continue;
167 list_del(&page->lru);
168 update_and_free_page(page);
169 nid = page_zone(page)->zone_pgdat->node_id;
170 free_huge_pages--;
171 free_huge_pages_node[nid]--;
172 if (count >= nr_huge_pages)
173 return;
174 }
175 }
176}
177#else
178static inline void try_to_free_low(unsigned long count)
179{
180}
181#endif
182
183static unsigned long set_max_huge_pages(unsigned long count)
184{
185 while (count > nr_huge_pages) {
186 struct page *page = alloc_fresh_huge_page();
187 if (!page)
188 return nr_huge_pages;
189 spin_lock(&hugetlb_lock);
190 enqueue_huge_page(page);
191 spin_unlock(&hugetlb_lock);
192 }
193 if (count >= nr_huge_pages)
194 return nr_huge_pages;
195
196 spin_lock(&hugetlb_lock);
197 try_to_free_low(count);
198 while (count < nr_huge_pages) {
199 struct page *page = dequeue_huge_page();
200 if (!page)
201 break;
202 update_and_free_page(page);
203 }
204 spin_unlock(&hugetlb_lock);
205 return nr_huge_pages;
206}
207
208int hugetlb_sysctl_handler(struct ctl_table *table, int write,
209 struct file *file, void __user *buffer,
210 size_t *length, loff_t *ppos)
211{
212 proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
213 max_huge_pages = set_max_huge_pages(max_huge_pages);
214 return 0;
215}
216#endif /* CONFIG_SYSCTL */
217
218int hugetlb_report_meminfo(char *buf)
219{
220 return sprintf(buf,
221 "HugePages_Total: %5lu\n"
222 "HugePages_Free: %5lu\n"
223 "Hugepagesize: %5lu kB\n",
224 nr_huge_pages,
225 free_huge_pages,
226 HPAGE_SIZE/1024);
227}
228
229int hugetlb_report_node_meminfo(int nid, char *buf)
230{
231 return sprintf(buf,
232 "Node %d HugePages_Total: %5u\n"
233 "Node %d HugePages_Free: %5u\n",
234 nid, nr_huge_pages_node[nid],
235 nid, free_huge_pages_node[nid]);
236}
237
238int is_hugepage_mem_enough(size_t size)
239{
240 return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages;
241}
242
243/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
244unsigned long hugetlb_total_pages(void)
245{
246 return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
247}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248
249/*
250 * We cannot handle pagefaults against hugetlb pages at all. They cause
251 * handle_mm_fault() to try to instantiate regular-sized pages in the
252 * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get
253 * this far.
254 */
255static struct page *hugetlb_nopage(struct vm_area_struct *vma,
256 unsigned long address, int *unused)
257{
258 BUG();
259 return NULL;
260}
261
262struct vm_operations_struct hugetlb_vm_ops = {
263 .nopage = hugetlb_nopage,
264};
265
David Gibson1e8f8892006-01-06 00:10:44 -0800266static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
267 int writable)
David Gibson63551ae2005-06-21 17:14:44 -0700268{
269 pte_t entry;
270
David Gibson1e8f8892006-01-06 00:10:44 -0800271 if (writable) {
David Gibson63551ae2005-06-21 17:14:44 -0700272 entry =
273 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
274 } else {
275 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
276 }
277 entry = pte_mkyoung(entry);
278 entry = pte_mkhuge(entry);
279
280 return entry;
281}
282
David Gibson1e8f8892006-01-06 00:10:44 -0800283static void set_huge_ptep_writable(struct vm_area_struct *vma,
284 unsigned long address, pte_t *ptep)
285{
286 pte_t entry;
287
288 entry = pte_mkwrite(pte_mkdirty(*ptep));
289 ptep_set_access_flags(vma, address, ptep, entry, 1);
290 update_mmu_cache(vma, address, entry);
291 lazy_mmu_prot_update(entry);
292}
293
294
David Gibson63551ae2005-06-21 17:14:44 -0700295int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
296 struct vm_area_struct *vma)
297{
298 pte_t *src_pte, *dst_pte, entry;
299 struct page *ptepage;
Hugh Dickins1c598272005-10-19 21:23:43 -0700300 unsigned long addr;
David Gibson1e8f8892006-01-06 00:10:44 -0800301 int cow;
302
303 cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
David Gibson63551ae2005-06-21 17:14:44 -0700304
Hugh Dickins1c598272005-10-19 21:23:43 -0700305 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
Hugh Dickinsc74df322005-10-29 18:16:23 -0700306 src_pte = huge_pte_offset(src, addr);
307 if (!src_pte)
308 continue;
David Gibson63551ae2005-06-21 17:14:44 -0700309 dst_pte = huge_pte_alloc(dst, addr);
310 if (!dst_pte)
311 goto nomem;
Hugh Dickinsc74df322005-10-29 18:16:23 -0700312 spin_lock(&dst->page_table_lock);
Hugh Dickins1c598272005-10-19 21:23:43 -0700313 spin_lock(&src->page_table_lock);
Hugh Dickinsc74df322005-10-29 18:16:23 -0700314 if (!pte_none(*src_pte)) {
David Gibson1e8f8892006-01-06 00:10:44 -0800315 if (cow)
316 ptep_set_wrprotect(src, addr, src_pte);
Hugh Dickins1c598272005-10-19 21:23:43 -0700317 entry = *src_pte;
318 ptepage = pte_page(entry);
319 get_page(ptepage);
Hugh Dickins42946212005-10-29 18:16:05 -0700320 add_mm_counter(dst, file_rss, HPAGE_SIZE / PAGE_SIZE);
Hugh Dickins1c598272005-10-19 21:23:43 -0700321 set_huge_pte_at(dst, addr, dst_pte, entry);
322 }
323 spin_unlock(&src->page_table_lock);
Hugh Dickinsc74df322005-10-29 18:16:23 -0700324 spin_unlock(&dst->page_table_lock);
David Gibson63551ae2005-06-21 17:14:44 -0700325 }
326 return 0;
327
328nomem:
329 return -ENOMEM;
330}
331
332void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
333 unsigned long end)
334{
335 struct mm_struct *mm = vma->vm_mm;
336 unsigned long address;
David Gibsonc7546f82005-08-05 11:59:35 -0700337 pte_t *ptep;
David Gibson63551ae2005-06-21 17:14:44 -0700338 pte_t pte;
339 struct page *page;
340
341 WARN_ON(!is_vm_hugetlb_page(vma));
342 BUG_ON(start & ~HPAGE_MASK);
343 BUG_ON(end & ~HPAGE_MASK);
344
Hugh Dickins508034a2005-10-29 18:16:30 -0700345 spin_lock(&mm->page_table_lock);
346
Hugh Dickins365e9c872005-10-29 18:16:18 -0700347 /* Update high watermark before we lower rss */
348 update_hiwater_rss(mm);
349
David Gibson63551ae2005-06-21 17:14:44 -0700350 for (address = start; address < end; address += HPAGE_SIZE) {
David Gibsonc7546f82005-08-05 11:59:35 -0700351 ptep = huge_pte_offset(mm, address);
Adam Litke4c887262005-10-29 18:16:46 -0700352 if (!ptep)
David Gibsonc7546f82005-08-05 11:59:35 -0700353 continue;
354
355 pte = huge_ptep_get_and_clear(mm, address, ptep);
David Gibson63551ae2005-06-21 17:14:44 -0700356 if (pte_none(pte))
357 continue;
David Gibsonc7546f82005-08-05 11:59:35 -0700358
David Gibson63551ae2005-06-21 17:14:44 -0700359 page = pte_page(pte);
360 put_page(page);
Hugh Dickins42946212005-10-29 18:16:05 -0700361 add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE));
David Gibson63551ae2005-06-21 17:14:44 -0700362 }
David Gibson63551ae2005-06-21 17:14:44 -0700363
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 spin_unlock(&mm->page_table_lock);
Hugh Dickins508034a2005-10-29 18:16:30 -0700365 flush_tlb_range(vma, start, end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366}
David Gibson63551ae2005-06-21 17:14:44 -0700367
Adam Litke85ef47f2006-01-06 00:10:42 -0800368static struct page *find_or_alloc_huge_page(struct address_space *mapping,
David Gibson1e8f8892006-01-06 00:10:44 -0800369 unsigned long idx, int shared)
David Gibson63551ae2005-06-21 17:14:44 -0700370{
Adam Litke4c887262005-10-29 18:16:46 -0700371 struct page *page;
372 int err;
David Gibson63551ae2005-06-21 17:14:44 -0700373
Adam Litke4c887262005-10-29 18:16:46 -0700374retry:
375 page = find_lock_page(mapping, idx);
376 if (page)
377 goto out;
David Gibson63551ae2005-06-21 17:14:44 -0700378
Adam Litke4c887262005-10-29 18:16:46 -0700379 if (hugetlb_get_quota(mapping))
380 goto out;
381 page = alloc_huge_page();
382 if (!page) {
383 hugetlb_put_quota(mapping);
384 goto out;
385 }
David Gibson63551ae2005-06-21 17:14:44 -0700386
David Gibson1e8f8892006-01-06 00:10:44 -0800387 if (shared) {
388 err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
389 if (err) {
390 put_page(page);
391 hugetlb_put_quota(mapping);
392 if (err == -EEXIST)
393 goto retry;
394 page = NULL;
395 }
396 } else {
397 /* Caller expects a locked page */
398 lock_page(page);
David Gibson63551ae2005-06-21 17:14:44 -0700399 }
400out:
Adam Litke4c887262005-10-29 18:16:46 -0700401 return page;
David Gibson63551ae2005-06-21 17:14:44 -0700402}
403
David Gibson1e8f8892006-01-06 00:10:44 -0800404static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
405 unsigned long address, pte_t *ptep, pte_t pte)
406{
407 struct page *old_page, *new_page;
408 int i, avoidcopy;
409
410 old_page = pte_page(pte);
411
412 /* If no-one else is actually using this page, avoid the copy
413 * and just make the page writable */
414 avoidcopy = (page_count(old_page) == 1);
415 if (avoidcopy) {
416 set_huge_ptep_writable(vma, address, ptep);
417 return VM_FAULT_MINOR;
418 }
419
420 page_cache_get(old_page);
421 new_page = alloc_huge_page();
422
423 if (!new_page) {
424 page_cache_release(old_page);
425
426 /* Logically this is OOM, not a SIGBUS, but an OOM
427 * could cause the kernel to go killing other
428 * processes which won't help the hugepage situation
429 * at all (?) */
430 return VM_FAULT_SIGBUS;
431 }
432
433 spin_unlock(&mm->page_table_lock);
434 for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++)
435 copy_user_highpage(new_page + i, old_page + i,
436 address + i*PAGE_SIZE);
437 spin_lock(&mm->page_table_lock);
438
439 ptep = huge_pte_offset(mm, address & HPAGE_MASK);
440 if (likely(pte_same(*ptep, pte))) {
441 /* Break COW */
442 set_huge_pte_at(mm, address, ptep,
443 make_huge_pte(vma, new_page, 1));
444 /* Make the old page be freed below */
445 new_page = old_page;
446 }
447 page_cache_release(new_page);
448 page_cache_release(old_page);
449 return VM_FAULT_MINOR;
450}
451
Adam Litke86e52162006-01-06 00:10:43 -0800452int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
David Gibson1e8f8892006-01-06 00:10:44 -0800453 unsigned long address, pte_t *ptep, int write_access)
Hugh Dickinsac9b9c62005-10-20 16:24:28 +0100454{
455 int ret = VM_FAULT_SIGBUS;
Adam Litke4c887262005-10-29 18:16:46 -0700456 unsigned long idx;
457 unsigned long size;
Adam Litke4c887262005-10-29 18:16:46 -0700458 struct page *page;
459 struct address_space *mapping;
David Gibson1e8f8892006-01-06 00:10:44 -0800460 pte_t new_pte;
Adam Litke4c887262005-10-29 18:16:46 -0700461
Adam Litke4c887262005-10-29 18:16:46 -0700462 mapping = vma->vm_file->f_mapping;
463 idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
464 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
465
466 /*
467 * Use page lock to guard against racing truncation
468 * before we get page_table_lock.
469 */
David Gibson1e8f8892006-01-06 00:10:44 -0800470 page = find_or_alloc_huge_page(mapping, idx,
471 vma->vm_flags & VM_SHARED);
Adam Litke4c887262005-10-29 18:16:46 -0700472 if (!page)
473 goto out;
Hugh Dickinsac9b9c62005-10-20 16:24:28 +0100474
David Gibson1e8f8892006-01-06 00:10:44 -0800475 BUG_ON(!PageLocked(page));
476
Hugh Dickinsac9b9c62005-10-20 16:24:28 +0100477 spin_lock(&mm->page_table_lock);
Adam Litke4c887262005-10-29 18:16:46 -0700478 size = i_size_read(mapping->host) >> HPAGE_SHIFT;
479 if (idx >= size)
480 goto backout;
481
482 ret = VM_FAULT_MINOR;
Adam Litke86e52162006-01-06 00:10:43 -0800483 if (!pte_none(*ptep))
Adam Litke4c887262005-10-29 18:16:46 -0700484 goto backout;
485
486 add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE);
David Gibson1e8f8892006-01-06 00:10:44 -0800487 new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
488 && (vma->vm_flags & VM_SHARED)));
489 set_huge_pte_at(mm, address, ptep, new_pte);
490
491 if (write_access && !(vma->vm_flags & VM_SHARED)) {
492 /* Optimization, do the COW without a second fault */
493 ret = hugetlb_cow(mm, vma, address, ptep, new_pte);
494 }
495
Hugh Dickinsac9b9c62005-10-20 16:24:28 +0100496 spin_unlock(&mm->page_table_lock);
Adam Litke4c887262005-10-29 18:16:46 -0700497 unlock_page(page);
498out:
Hugh Dickinsac9b9c62005-10-20 16:24:28 +0100499 return ret;
Adam Litke4c887262005-10-29 18:16:46 -0700500
501backout:
502 spin_unlock(&mm->page_table_lock);
503 hugetlb_put_quota(mapping);
504 unlock_page(page);
505 put_page(page);
506 goto out;
Hugh Dickinsac9b9c62005-10-20 16:24:28 +0100507}
508
Adam Litke86e52162006-01-06 00:10:43 -0800509int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
510 unsigned long address, int write_access)
511{
512 pte_t *ptep;
513 pte_t entry;
David Gibson1e8f8892006-01-06 00:10:44 -0800514 int ret;
Adam Litke86e52162006-01-06 00:10:43 -0800515
516 ptep = huge_pte_alloc(mm, address);
517 if (!ptep)
518 return VM_FAULT_OOM;
519
520 entry = *ptep;
521 if (pte_none(entry))
David Gibson1e8f8892006-01-06 00:10:44 -0800522 return hugetlb_no_page(mm, vma, address, ptep, write_access);
Adam Litke86e52162006-01-06 00:10:43 -0800523
David Gibson1e8f8892006-01-06 00:10:44 -0800524 ret = VM_FAULT_MINOR;
525
526 spin_lock(&mm->page_table_lock);
527 /* Check for a racing update before calling hugetlb_cow */
528 if (likely(pte_same(entry, *ptep)))
529 if (write_access && !pte_write(entry))
530 ret = hugetlb_cow(mm, vma, address, ptep, entry);
531 spin_unlock(&mm->page_table_lock);
532
533 return ret;
Adam Litke86e52162006-01-06 00:10:43 -0800534}
535
David Gibson63551ae2005-06-21 17:14:44 -0700536int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
537 struct page **pages, struct vm_area_struct **vmas,
538 unsigned long *position, int *length, int i)
539{
540 unsigned long vpfn, vaddr = *position;
541 int remainder = *length;
542
David Gibson63551ae2005-06-21 17:14:44 -0700543 vpfn = vaddr/PAGE_SIZE;
Hugh Dickins1c598272005-10-19 21:23:43 -0700544 spin_lock(&mm->page_table_lock);
David Gibson63551ae2005-06-21 17:14:44 -0700545 while (vaddr < vma->vm_end && remainder) {
Adam Litke4c887262005-10-29 18:16:46 -0700546 pte_t *pte;
547 struct page *page;
548
549 /*
550 * Some archs (sparc64, sh*) have multiple pte_ts to
551 * each hugepage. We have to make * sure we get the
552 * first, for the page indexing below to work.
553 */
554 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
555
556 if (!pte || pte_none(*pte)) {
557 int ret;
558
559 spin_unlock(&mm->page_table_lock);
560 ret = hugetlb_fault(mm, vma, vaddr, 0);
561 spin_lock(&mm->page_table_lock);
562 if (ret == VM_FAULT_MINOR)
563 continue;
564
565 remainder = 0;
566 if (!i)
567 i = -EFAULT;
568 break;
569 }
David Gibson63551ae2005-06-21 17:14:44 -0700570
571 if (pages) {
David Gibson63551ae2005-06-21 17:14:44 -0700572 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
David Gibson63551ae2005-06-21 17:14:44 -0700573 get_page(page);
574 pages[i] = page;
575 }
576
577 if (vmas)
578 vmas[i] = vma;
579
580 vaddr += PAGE_SIZE;
581 ++vpfn;
582 --remainder;
583 ++i;
584 }
Hugh Dickins1c598272005-10-19 21:23:43 -0700585 spin_unlock(&mm->page_table_lock);
David Gibson63551ae2005-06-21 17:14:44 -0700586 *length = remainder;
587 *position = vaddr;
588
589 return i;
590}