[PATCH] unpaged: VM_UNPAGED

Although we tend to associate VM_RESERVED with remap_pfn_range, quite a few
drivers set VM_RESERVED on areas which are then populated by nopage.  The
PageReserved removal in 2.6.15-rc1 changed VM_RESERVED not to free pages in
zap_pte_range, without changing those drivers not to set it: so their pages
just leak away.

Let's not change miscellaneous drivers now: introduce VM_UNPAGED at the core,
to flag the special areas where the ptes may have no struct page, or if they
have then it's not to be touched.  Replace most instances of VM_RESERVED in
core mm by VM_UNPAGED.  Force it on in remap_pfn_range, and the sparc and
sparc64 io_remap_pfn_range.

Revert addition of VM_RESERVED to powerpc vdso, it's not needed there.  Is it
needed anywhere?  It still governs the mm->reserved_vm statistic, and special
vmas not to be merged, and areas not to be core dumped; but could probably be
eliminated later (the drivers are probably specifying it because in 2.4 it
kept swapout off the vma, but in 2.6 we work from the LRU, which these pages
don't get on).

Use the VM_SHM slot for VM_UNPAGED, and define VM_SHM to 0: it serves no
purpose whatsoever, and should be removed from drivers when we clean up.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: William Irwin <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/mm/fremap.c b/mm/fremap.c
index d862be3..94254c5 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -65,7 +65,7 @@
 	pte_t pte_val;
 	spinlock_t *ptl;
 
-	BUG_ON(vma->vm_flags & VM_RESERVED);
+	BUG_ON(vma->vm_flags & VM_UNPAGED);
 
 	pgd = pgd_offset(mm, addr);
 	pud = pud_alloc(mm, pgd, addr);
@@ -122,7 +122,7 @@
 	pte_t pte_val;
 	spinlock_t *ptl;
 
-	BUG_ON(vma->vm_flags & VM_RESERVED);
+	BUG_ON(vma->vm_flags & VM_UNPAGED);
 
 	pgd = pgd_offset(mm, addr);
 	pud = pud_alloc(mm, pgd, addr);
diff --git a/mm/madvise.c b/mm/madvise.c
index 17aaf3e..328a3bc 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -126,7 +126,7 @@
 			     unsigned long start, unsigned long end)
 {
 	*prev = vma;
-	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_RESERVED))
+	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_UNPAGED))
 		return -EINVAL;
 
 	if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
diff --git a/mm/memory.c b/mm/memory.c
index cfce5f1..ece0496 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -334,7 +334,7 @@
 
 /*
  * This function is called to print an error when a pte in a
- * !VM_RESERVED region is found pointing to an invalid pfn (which
+ * !VM_UNPAGED region is found pointing to an invalid pfn (which
  * is an error.
  *
  * The calling function must still handle the error.
@@ -381,15 +381,15 @@
 		goto out_set_pte;
 	}
 
-	/* If the region is VM_RESERVED, the mapping is not
+	/* If the region is VM_UNPAGED, the mapping is not
 	 * mapped via rmap - duplicate the pte as is.
 	 */
-	if (vm_flags & VM_RESERVED)
+	if (vm_flags & VM_UNPAGED)
 		goto out_set_pte;
 
 	pfn = pte_pfn(pte);
 	/* If the pte points outside of valid memory but
-	 * the region is not VM_RESERVED, we have a problem.
+	 * the region is not VM_UNPAGED, we have a problem.
 	 */
 	if (unlikely(!pfn_valid(pfn))) {
 		print_bad_pte(vma, pte, addr);
@@ -528,7 +528,7 @@
 	 * readonly mappings. The tradeoff is that copy_page_range is more
 	 * efficient than faulting.
 	 */
-	if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) {
+	if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_UNPAGED))) {
 		if (!vma->anon_vma)
 			return 0;
 	}
@@ -572,7 +572,7 @@
 
 			(*zap_work) -= PAGE_SIZE;
 
-			if (!(vma->vm_flags & VM_RESERVED)) {
+			if (!(vma->vm_flags & VM_UNPAGED)) {
 				unsigned long pfn = pte_pfn(ptent);
 				if (unlikely(!pfn_valid(pfn)))
 					print_bad_pte(vma, ptent, addr);
@@ -1191,10 +1191,16 @@
 	 * rest of the world about it:
 	 *   VM_IO tells people not to look at these pages
 	 *	(accesses can have side effects).
-	 *   VM_RESERVED tells the core MM not to "manage" these pages
-         *	(e.g. refcount, mapcount, try to swap them out).
+	 *   VM_RESERVED is specified all over the place, because
+	 *	in 2.4 it kept swapout's vma scan off this vma; but
+	 *	in 2.6 the LRU scan won't even find its pages, so this
+	 *	flag means no more than count its pages in reserved_vm,
+	 * 	and omit it from core dump, even when VM_IO turned off.
+	 *   VM_UNPAGED tells the core MM not to "manage" these pages
+         *	(e.g. refcount, mapcount, try to swap them out): in
+	 *	particular, zap_pte_range does not try to free them.
 	 */
-	vma->vm_flags |= VM_IO | VM_RESERVED;
+	vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED;
 
 	BUG_ON(addr >= end);
 	pfn -= addr >> PAGE_SHIFT;
@@ -1276,7 +1282,7 @@
 	pte_t entry;
 	int ret = VM_FAULT_MINOR;
 
-	BUG_ON(vma->vm_flags & VM_RESERVED);
+	BUG_ON(vma->vm_flags & VM_UNPAGED);
 
 	if (unlikely(!pfn_valid(pfn))) {
 		/*
@@ -1924,7 +1930,7 @@
 			inc_mm_counter(mm, anon_rss);
 			lru_cache_add_active(new_page);
 			page_add_anon_rmap(new_page, vma, address);
-		} else if (!(vma->vm_flags & VM_RESERVED)) {
+		} else if (!(vma->vm_flags & VM_UNPAGED)) {
 			inc_mm_counter(mm, file_rss);
 			page_add_file_rmap(new_page);
 		}
@@ -2203,7 +2209,7 @@
 	gate_vma.vm_start = FIXADDR_USER_START;
 	gate_vma.vm_end = FIXADDR_USER_END;
 	gate_vma.vm_page_prot = PAGE_READONLY;
-	gate_vma.vm_flags = VM_RESERVED;
+	gate_vma.vm_flags = 0;
 	return 0;
 }
 __initcall(gate_vma_init);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5abc57c..5609a31 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -269,7 +269,7 @@
 	first = find_vma(mm, start);
 	if (!first)
 		return ERR_PTR(-EFAULT);
-	if (first->vm_flags & VM_RESERVED)
+	if (first->vm_flags & VM_UNPAGED)
 		return ERR_PTR(-EACCES);
 	prev = NULL;
 	for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
diff --git a/mm/msync.c b/mm/msync.c
index 0e040e9..b3f4caf 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -97,9 +97,9 @@
 	/* For hugepages we can't go walking the page table normally,
 	 * but that's ok, hugetlbfs is memory based, so we don't need
 	 * to do anything more on an msync().
-	 * Can't do anything with VM_RESERVED regions either.
+	 * Can't do anything with VM_UNPAGED regions either.
 	 */
-	if (vma->vm_flags & (VM_HUGETLB|VM_RESERVED))
+	if (vma->vm_flags & (VM_HUGETLB|VM_UNPAGED))
 		return;
 
 	BUG_ON(addr >= end);