blob: f0f91461a9f45c350fd8fccc0ccaf6c9578e4b3a [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001// SPDX-License-Identifier: GPL-2.0
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * linux/mm/mincore.c
4 *
Linus Torvalds2f77d102006-12-16 09:44:32 -08005 * Copyright (C) 1994-2006 Linus Torvalds
Linus Torvalds1da177e2005-04-16 15:20:36 -07006 */
7
8/*
9 * The mincore() system call.
10 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070011#include <linux/pagemap.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090012#include <linux/gfp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/mm.h>
14#include <linux/mman.h>
15#include <linux/syscalls.h>
Nick Piggin42da9cb2007-02-12 00:51:39 -080016#include <linux/swap.h>
17#include <linux/swapops.h>
Hugh Dickins3a4f8a02017-02-24 14:59:36 -080018#include <linux/shmem_fs.h>
Naoya Horiguchi4f16fc12009-12-14 17:59:58 -080019#include <linux/hugetlb.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080021#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <asm/pgtable.h>
23
Naoya Horiguchi1e25a272015-02-11 15:28:11 -080024static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
25 unsigned long end, struct mm_walk *walk)
Johannes Weinerf4884012010-05-24 14:32:10 -070026{
27#ifdef CONFIG_HUGETLB_PAGE
Naoya Horiguchi1e25a272015-02-11 15:28:11 -080028 unsigned char present;
29 unsigned char *vec = walk->private;
Johannes Weinerf4884012010-05-24 14:32:10 -070030
Naoya Horiguchi1e25a272015-02-11 15:28:11 -080031 /*
32 * Hugepages under user process are always in RAM and never
33 * swapped out, but theoretically it needs to be checked.
34 */
35 present = pte && !huge_pte_none(huge_ptep_get(pte));
36 for (; addr != end; vec++, addr += PAGE_SIZE)
37 *vec = present;
38 walk->private = vec;
Johannes Weinerf4884012010-05-24 14:32:10 -070039#else
40 BUG();
41#endif
Naoya Horiguchi1e25a272015-02-11 15:28:11 -080042 return 0;
Johannes Weinerf4884012010-05-24 14:32:10 -070043}
44
Naoya Horiguchi1e25a272015-02-11 15:28:11 -080045static int mincore_unmapped_range(unsigned long addr, unsigned long end,
46 struct mm_walk *walk)
Johannes Weinerf4884012010-05-24 14:32:10 -070047{
Linus Torvalds574823bf2019-01-05 17:50:59 -080048 unsigned char *vec = walk->private;
49 unsigned long nr = (end - addr) >> PAGE_SHIFT;
50
51 memset(vec, 0, nr);
52 walk->private += nr;
Naoya Horiguchi1e25a272015-02-11 15:28:11 -080053 return 0;
54}
Linus Torvalds2f77d102006-12-16 09:44:32 -080055
Naoya Horiguchi1e25a272015-02-11 15:28:11 -080056static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
57 struct mm_walk *walk)
58{
59 spinlock_t *ptl;
60 struct vm_area_struct *vma = walk->vma;
61 pte_t *ptep;
62 unsigned char *vec = walk->private;
63 int nr = (end - addr) >> PAGE_SHIFT;
64
Kirill A. Shutemovb6ec57f2016-01-21 16:40:25 -080065 ptl = pmd_trans_huge_lock(pmd, vma);
66 if (ptl) {
Naoya Horiguchi1e25a272015-02-11 15:28:11 -080067 memset(vec, 1, nr);
68 spin_unlock(ptl);
69 goto out;
70 }
71
Linus Torvalds574823bf2019-01-05 17:50:59 -080072 /* We'll consider a THP page under construction to be there */
Naoya Horiguchi1e25a272015-02-11 15:28:11 -080073 if (pmd_trans_unstable(pmd)) {
Linus Torvalds574823bf2019-01-05 17:50:59 -080074 memset(vec, 1, nr);
Naoya Horiguchi1e25a272015-02-11 15:28:11 -080075 goto out;
76 }
77
78 ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
79 for (; addr != end; ptep++, addr += PAGE_SIZE) {
Nick Piggin42da9cb2007-02-12 00:51:39 -080080 pte_t pte = *ptep;
81
Johannes Weinerf4884012010-05-24 14:32:10 -070082 if (pte_none(pte))
Linus Torvalds574823bf2019-01-05 17:50:59 -080083 *vec = 0;
Johannes Weinerf4884012010-05-24 14:32:10 -070084 else if (pte_present(pte))
Johannes Weiner25ef0e52010-05-24 14:32:11 -070085 *vec = 1;
Kirill A. Shutemov0661a332015-02-10 14:10:04 -080086 else { /* pte is a swap entry */
Nick Piggin42da9cb2007-02-12 00:51:39 -080087 swp_entry_t entry = pte_to_swp_entry(pte);
Johannes Weiner6a60f1b2010-05-24 14:32:09 -070088
Linus Torvalds574823bf2019-01-05 17:50:59 -080089 /*
90 * migration or hwpoison entries are always
91 * uptodate
92 */
93 *vec = !!non_swap_entry(entry);
Nick Piggin42da9cb2007-02-12 00:51:39 -080094 }
Johannes Weiner25ef0e52010-05-24 14:32:11 -070095 vec++;
Naoya Horiguchi1e25a272015-02-11 15:28:11 -080096 }
Johannes Weiner6a60f1b2010-05-24 14:32:09 -070097 pte_unmap_unlock(ptep - 1, ptl);
Naoya Horiguchi1e25a272015-02-11 15:28:11 -080098out:
99 walk->private += nr;
100 cond_resched();
101 return 0;
Johannes Weinere48293f2010-05-24 14:32:11 -0700102}
103
Johannes Weinerf4884012010-05-24 14:32:10 -0700104/*
105 * Do a chunk of "sys_mincore()". We've already checked
106 * all the arguments, we hold the mmap semaphore: we should
107 * just return the amount of info we're asked for.
108 */
109static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
110{
Johannes Weinerf4884012010-05-24 14:32:10 -0700111 struct vm_area_struct *vma;
Johannes Weiner25ef0e52010-05-24 14:32:11 -0700112 unsigned long end;
Naoya Horiguchi1e25a272015-02-11 15:28:11 -0800113 int err;
114 struct mm_walk mincore_walk = {
115 .pmd_entry = mincore_pte_range,
116 .pte_hole = mincore_unmapped_range,
117 .hugetlb_entry = mincore_hugetlb,
118 .private = vec,
119 };
Johannes Weinerf4884012010-05-24 14:32:10 -0700120
121 vma = find_vma(current->mm, addr);
122 if (!vma || addr < vma->vm_start)
123 return -ENOMEM;
Naoya Horiguchi1e25a272015-02-11 15:28:11 -0800124 mincore_walk.mm = vma->vm_mm;
Johannes Weiner25ef0e52010-05-24 14:32:11 -0700125 end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
Naoya Horiguchi1e25a272015-02-11 15:28:11 -0800126 err = walk_page_range(addr, end, &mincore_walk);
127 if (err < 0)
128 return err;
Johannes Weiner25ef0e52010-05-24 14:32:11 -0700129 return (end - addr) >> PAGE_SHIFT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130}
131
132/*
133 * The mincore(2) system call.
134 *
135 * mincore() returns the memory residency status of the pages in the
136 * current process's address space specified by [addr, addr + len).
137 * The status is returned in a vector of bytes. The least significant
138 * bit of each byte is 1 if the referenced page is in memory, otherwise
139 * it is zero.
140 *
141 * Because the status of a page can change after mincore() checks it
142 * but before it returns to the application, the returned vector may
143 * contain stale information. Only locked pages are guaranteed to
144 * remain in memory.
145 *
146 * return values:
147 * zero - success
148 * -EFAULT - vec points to an illegal address
Kirill A. Shutemovea1754a2016-04-01 15:29:48 +0300149 * -EINVAL - addr is not a multiple of PAGE_SIZE
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 * -ENOMEM - Addresses in the range [addr, addr + len] are
151 * invalid for the address space of this process, or
152 * specify one or more pages which are not currently
153 * mapped
154 * -EAGAIN - A kernel resource was temporarily unavailable.
155 */
Heiko Carstens3480b252009-01-14 14:14:16 +0100156SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
157 unsigned char __user *, vec)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158{
Linus Torvalds2f77d102006-12-16 09:44:32 -0800159 long retval;
160 unsigned long pages;
161 unsigned char *tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
Linus Torvalds2f77d102006-12-16 09:44:32 -0800163 /* Check the start address: needs to be page-aligned.. */
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300164 if (start & ~PAGE_MASK)
Linus Torvalds2f77d102006-12-16 09:44:32 -0800165 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Linus Torvalds2f77d102006-12-16 09:44:32 -0800167 /* ..and we need to be passed a valid user-space range */
Linus Torvalds96d4f262019-01-03 18:57:57 -0800168 if (!access_ok((void __user *) start, len))
Linus Torvalds2f77d102006-12-16 09:44:32 -0800169 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170
Kirill A. Shutemovea1754a2016-04-01 15:29:48 +0300171 /* This also avoids any overflows on PAGE_ALIGN */
Linus Torvalds2f77d102006-12-16 09:44:32 -0800172 pages = len >> PAGE_SHIFT;
Alexander Kuleshove7bbdd02015-11-05 18:46:38 -0800173 pages += (offset_in_page(len)) != 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174
Linus Torvalds96d4f262019-01-03 18:57:57 -0800175 if (!access_ok(vec, pages))
Linus Torvalds2f77d102006-12-16 09:44:32 -0800176 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177
Linus Torvalds2f77d102006-12-16 09:44:32 -0800178 tmp = (void *) __get_free_page(GFP_USER);
179 if (!tmp)
Linus Torvalds4fb23e42006-12-16 16:01:50 -0800180 return -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
Linus Torvalds2f77d102006-12-16 09:44:32 -0800182 retval = 0;
183 while (pages) {
184 /*
185 * Do at most PAGE_SIZE entries per iteration, due to
186 * the temporary buffer size.
187 */
188 down_read(&current->mm->mmap_sem);
Johannes Weiner6a60f1b2010-05-24 14:32:09 -0700189 retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
Linus Torvalds2f77d102006-12-16 09:44:32 -0800190 up_read(&current->mm->mmap_sem);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191
Linus Torvalds2f77d102006-12-16 09:44:32 -0800192 if (retval <= 0)
193 break;
194 if (copy_to_user(vec, tmp, retval)) {
195 retval = -EFAULT;
196 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 }
Linus Torvalds2f77d102006-12-16 09:44:32 -0800198 pages -= retval;
199 vec += retval;
200 start += retval << PAGE_SHIFT;
201 retval = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 }
Linus Torvalds2f77d102006-12-16 09:44:32 -0800203 free_page((unsigned long) tmp);
204 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205}