mm: introduce mm_populate() for populating new vmas
When creating new mappings using the MAP_POPULATE / MAP_LOCKED flags (or
with MCL_FUTURE in effect), we want to populate the pages within the
newly created vmas. This may take a while as we may have to read pages
from disk, so ideally we want to do this outside of the write-locked
mmap_sem region.
This change introduces mm_populate(), which is used to defer populating
such mappings until after the mmap_sem write lock has been released.
This is implemented as a generalization of the former do_mlock_pages(),
which accomplished the same task but was using during mlock() /
mlockall().
Signed-off-by: Michel Lespinasse <walken@google.com>
Reported-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Andy Lutomirski <luto@amacapital.net>
Cc: Greg Ungerer <gregungerer@westnet.com.au>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/mlock.c b/mm/mlock.c
index c9bd528..a296a49 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -416,7 +416,14 @@
return error;
}
-static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors)
+/*
+ * __mm_populate - populate and/or mlock pages within a range of address space.
+ *
+ * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
+ * flags. VMAs must be already marked with the desired vm_flags, and
+ * mmap_sem must not be held.
+ */
+int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
{
struct mm_struct *mm = current->mm;
unsigned long end, nstart, nend;
@@ -498,7 +505,7 @@
error = do_mlock(start, len, 1);
up_write(¤t->mm->mmap_sem);
if (!error)
- error = do_mlock_pages(start, len, 0);
+ error = __mm_populate(start, len, 0);
return error;
}
@@ -564,10 +571,8 @@
capable(CAP_IPC_LOCK))
ret = do_mlockall(flags);
up_write(¤t->mm->mmap_sem);
- if (!ret && (flags & MCL_CURRENT)) {
- /* Ignore errors */
- do_mlock_pages(0, TASK_SIZE, 1);
- }
+ if (!ret && (flags & MCL_CURRENT))
+ mm_populate(0, TASK_SIZE);
out:
return ret;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 09da0b2..9b12e30 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1154,12 +1154,15 @@
unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long pgoff)
+ unsigned long flags, unsigned long pgoff,
+ bool *populate)
{
struct mm_struct * mm = current->mm;
struct inode *inode;
vm_flags_t vm_flags;
+ *populate = false;
+
/*
* Does the application expect PROT_READ to imply PROT_EXEC?
*
@@ -1280,7 +1283,12 @@
}
}
- return mmap_region(file, addr, len, flags, vm_flags, pgoff);
+ addr = mmap_region(file, addr, len, flags, vm_flags, pgoff);
+ if (!IS_ERR_VALUE(addr) &&
+ ((vm_flags & VM_LOCKED) ||
+ (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
+ *populate = true;
+ return addr;
}
SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
@@ -1531,10 +1539,12 @@
vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
- if (!mlock_vma_pages_range(vma, addr, addr + len))
+ if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
+ vma == get_gate_vma(current->mm)))
mm->locked_vm += (len >> PAGE_SHIFT);
- } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
- make_pages_present(addr, addr + len);
+ else
+ vma->vm_flags &= ~VM_LOCKED;
+ }
if (file)
uprobe_mmap(vma);
diff --git a/mm/nommu.c b/mm/nommu.c
index b20db4e..7296a5a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1250,7 +1250,8 @@
unsigned long len,
unsigned long prot,
unsigned long flags,
- unsigned long pgoff)
+ unsigned long pgoff,
+ bool *populate)
{
struct vm_area_struct *vma;
struct vm_region *region;
@@ -1260,6 +1261,8 @@
kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
+ *populate = false;
+
/* decide whether we should attempt the mapping, and if so what sort of
* mapping */
ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
diff --git a/mm/util.c b/mm/util.c
index c55e26b..13467e0 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -355,12 +355,16 @@
{
unsigned long ret;
struct mm_struct *mm = current->mm;
+ bool populate;
ret = security_mmap_file(file, prot, flag);
if (!ret) {
down_write(&mm->mmap_sem);
- ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff);
+ ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
+ &populate);
up_write(&mm->mmap_sem);
+ if (!IS_ERR_VALUE(ret) && populate)
+ mm_populate(ret, len);
}
return ret;
}