Jiri Kosina | cc503c1 | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 1 | /* |
Harvey Harrison | 675a081 | 2008-01-30 13:31:10 +0100 | [diff] [blame] | 2 | * Flexible mmap layout support |
Jiri Kosina | cc503c1 | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 3 | * |
| 4 | * Based on code by Ingo Molnar and Andi Kleen, copyrighted |
| 5 | * as follows: |
| 6 | * |
Ingo Molnar | 8f47e16 | 2009-01-31 02:03:42 +0100 | [diff] [blame] | 7 | * Copyright 2003-2009 Red Hat Inc. |
Jiri Kosina | cc503c1 | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 8 | * All Rights Reserved. |
| 9 | * Copyright 2005 Andi Kleen, SUSE Labs. |
| 10 | * Copyright 2007 Jiri Kosina, SUSE Labs. |
| 11 | * |
| 12 | * This program is free software; you can redistribute it and/or modify |
| 13 | * it under the terms of the GNU General Public License as published by |
| 14 | * the Free Software Foundation; either version 2 of the License, or |
| 15 | * (at your option) any later version. |
| 16 | * |
| 17 | * This program is distributed in the hope that it will be useful, |
| 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | * GNU General Public License for more details. |
| 21 | * |
| 22 | * You should have received a copy of the GNU General Public License |
| 23 | * along with this program; if not, write to the Free Software |
| 24 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
Andi Kleen | 8817210 | 2006-01-17 07:03:38 +0100 | [diff] [blame] | 25 | */ |
Jiri Kosina | cc503c1 | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 26 | |
| 27 | #include <linux/personality.h> |
Andi Kleen | 8817210 | 2006-01-17 07:03:38 +0100 | [diff] [blame] | 28 | #include <linux/mm.h> |
Andi Kleen | 8817210 | 2006-01-17 07:03:38 +0100 | [diff] [blame] | 29 | #include <linux/random.h> |
Jiri Kosina | cc503c1 | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 30 | #include <linux/limits.h> |
Ingo Molnar | 3f07c01 | 2017-02-08 18:51:30 +0100 | [diff] [blame] | 31 | #include <linux/sched/signal.h> |
Ingo Molnar | 0104260 | 2017-02-08 18:51:31 +0100 | [diff] [blame] | 32 | #include <linux/sched/mm.h> |
Dmitry Safonov | e13b73d | 2017-03-14 14:41:26 +0300 | [diff] [blame] | 33 | #include <linux/compat.h> |
Michal Hocko | 8093833 | 2009-09-08 11:01:55 +0200 | [diff] [blame] | 34 | #include <asm/elf.h> |
| 35 | |
Craig Bergstrom | be62a32 | 2017-11-15 15:29:51 -0700 | [diff] [blame] | 36 | #include "physaddr.h" |
| 37 | |
Jan-Simon Möller | cc99535 | 2014-09-05 16:16:45 -0700 | [diff] [blame] | 38 | struct va_alignment __read_mostly va_align = { |
Borislav Petkov | 9387f77 | 2011-08-06 14:31:38 +0200 | [diff] [blame] | 39 | .flags = -1, |
| 40 | }; |
| 41 | |
Kirill A. Shutemov | e8f01a8 | 2017-07-17 01:59:50 +0300 | [diff] [blame] | 42 | unsigned long task_size_32bit(void) |
Dmitry Safonov | 8f3e474 | 2017-03-06 17:17:18 +0300 | [diff] [blame] | 43 | { |
| 44 | return IA32_PAGE_OFFSET; |
| 45 | } |
| 46 | |
Kirill A. Shutemov | b569bab | 2017-07-17 01:59:52 +0300 | [diff] [blame] | 47 | unsigned long task_size_64bit(int full_addr_space) |
Dmitry Safonov | 1b028f7 | 2017-03-06 17:17:19 +0300 | [diff] [blame] | 48 | { |
Kirill A. Shutemov | b569bab | 2017-07-17 01:59:52 +0300 | [diff] [blame] | 49 | return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW; |
Dmitry Safonov | 1b028f7 | 2017-03-06 17:17:19 +0300 | [diff] [blame] | 50 | } |
| 51 | |
Dmitry Safonov | 8f3e474 | 2017-03-06 17:17:18 +0300 | [diff] [blame] | 52 | static unsigned long stack_maxrandom_size(unsigned long task_size) |
Michal Hocko | 8093833 | 2009-09-08 11:01:55 +0200 | [diff] [blame] | 53 | { |
Hector Marco-Gisbert | 4e7c22d | 2015-02-14 09:33:50 -0800 | [diff] [blame] | 54 | unsigned long max = 0; |
Oleg Nesterov | 01578e3 | 2017-08-15 17:40:11 +0200 | [diff] [blame] | 55 | if (current->flags & PF_RANDOMIZE) { |
Kirill A. Shutemov | e8f01a8 | 2017-07-17 01:59:50 +0300 | [diff] [blame] | 56 | max = (-1UL) & __STACK_RND_MASK(task_size == task_size_32bit()); |
Dmitry Safonov | 8f3e474 | 2017-03-06 17:17:18 +0300 | [diff] [blame] | 57 | max <<= PAGE_SHIFT; |
Michal Hocko | 8093833 | 2009-09-08 11:01:55 +0200 | [diff] [blame] | 58 | } |
| 59 | |
| 60 | return max; |
| 61 | } |
| 62 | |
Dmitry Safonov | 6a0b41d | 2017-03-06 17:17:17 +0300 | [diff] [blame] | 63 | #ifdef CONFIG_COMPAT |
| 64 | # define mmap32_rnd_bits mmap_rnd_compat_bits |
| 65 | # define mmap64_rnd_bits mmap_rnd_bits |
| 66 | #else |
| 67 | # define mmap32_rnd_bits mmap_rnd_bits |
| 68 | # define mmap64_rnd_bits mmap_rnd_bits |
| 69 | #endif |
| 70 | |
Dmitry Safonov | 8f3e474 | 2017-03-06 17:17:18 +0300 | [diff] [blame] | 71 | #define SIZE_128M (128 * 1024 * 1024UL) |
| 72 | |
Andrew Morton | 954683a | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 73 | static int mmap_is_legacy(void) |
Jiri Kosina | cc503c1 | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 74 | { |
| 75 | if (current->personality & ADDR_COMPAT_LAYOUT) |
| 76 | return 1; |
| 77 | |
Jiri Kosina | cc503c1 | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 78 | return sysctl_legacy_va_layout; |
| 79 | } |
| 80 | |
Dmitry Safonov | 6a0b41d | 2017-03-06 17:17:17 +0300 | [diff] [blame] | 81 | static unsigned long arch_rnd(unsigned int rndbits) |
| 82 | { |
Oleg Nesterov | 47ac548 | 2017-08-15 17:39:52 +0200 | [diff] [blame] | 83 | if (!(current->flags & PF_RANDOMIZE)) |
| 84 | return 0; |
Dmitry Safonov | 6a0b41d | 2017-03-06 17:17:17 +0300 | [diff] [blame] | 85 | return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT; |
| 86 | } |
| 87 | |
Kees Cook | 2b68f6c | 2015-04-14 15:48:00 -0700 | [diff] [blame] | 88 | unsigned long arch_mmap_rnd(void) |
Harvey Harrison | 675a081 | 2008-01-30 13:31:10 +0100 | [diff] [blame] | 89 | { |
Dmitry Safonov | 6a0b41d | 2017-03-06 17:17:17 +0300 | [diff] [blame] | 90 | return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits); |
Harvey Harrison | 675a081 | 2008-01-30 13:31:10 +0100 | [diff] [blame] | 91 | } |
| 92 | |
Kees Cook | 8f2af15 | 2018-04-10 16:34:53 -0700 | [diff] [blame] | 93 | static unsigned long mmap_base(unsigned long rnd, unsigned long task_size, |
| 94 | struct rlimit *rlim_stack) |
Harvey Harrison | 675a081 | 2008-01-30 13:31:10 +0100 | [diff] [blame] | 95 | { |
Kees Cook | 8f2af15 | 2018-04-10 16:34:53 -0700 | [diff] [blame] | 96 | unsigned long gap = rlim_stack->rlim_cur; |
Rik van Riel | c204d21 | 2017-07-12 14:36:33 -0700 | [diff] [blame] | 97 | unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap; |
Dmitry Safonov | 8f3e474 | 2017-03-06 17:17:18 +0300 | [diff] [blame] | 98 | unsigned long gap_min, gap_max; |
Harvey Harrison | 675a081 | 2008-01-30 13:31:10 +0100 | [diff] [blame] | 99 | |
Rik van Riel | c204d21 | 2017-07-12 14:36:33 -0700 | [diff] [blame] | 100 | /* Values close to RLIM_INFINITY can overflow. */ |
| 101 | if (gap + pad > gap) |
| 102 | gap += pad; |
| 103 | |
Dmitry Safonov | 8f3e474 | 2017-03-06 17:17:18 +0300 | [diff] [blame] | 104 | /* |
| 105 | * Top of mmap area (just below the process stack). |
| 106 | * Leave an at least ~128 MB hole with possible stack randomization. |
| 107 | */ |
Rik van Riel | c204d21 | 2017-07-12 14:36:33 -0700 | [diff] [blame] | 108 | gap_min = SIZE_128M; |
Dmitry Safonov | 8f3e474 | 2017-03-06 17:17:18 +0300 | [diff] [blame] | 109 | gap_max = (task_size / 6) * 5; |
Harvey Harrison | 675a081 | 2008-01-30 13:31:10 +0100 | [diff] [blame] | 110 | |
Dmitry Safonov | 8f3e474 | 2017-03-06 17:17:18 +0300 | [diff] [blame] | 111 | if (gap < gap_min) |
| 112 | gap = gap_min; |
| 113 | else if (gap > gap_max) |
| 114 | gap = gap_max; |
| 115 | |
| 116 | return PAGE_ALIGN(task_size - gap - rnd); |
| 117 | } |
| 118 | |
| 119 | static unsigned long mmap_legacy_base(unsigned long rnd, |
| 120 | unsigned long task_size) |
| 121 | { |
| 122 | return __TASK_UNMAPPED_BASE(task_size) + rnd; |
Harvey Harrison | 675a081 | 2008-01-30 13:31:10 +0100 | [diff] [blame] | 123 | } |
| 124 | |
| 125 | /* |
Jiri Kosina | cc503c1 | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 126 | * This function, called very early during the creation of a new |
| 127 | * process VM image, sets up which VM layout function to use: |
| 128 | */ |
Dmitry Safonov | 1b028f7 | 2017-03-06 17:17:19 +0300 | [diff] [blame] | 129 | static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base, |
Kees Cook | 8f2af15 | 2018-04-10 16:34:53 -0700 | [diff] [blame] | 130 | unsigned long random_factor, unsigned long task_size, |
| 131 | struct rlimit *rlim_stack) |
Dmitry Safonov | 1b028f7 | 2017-03-06 17:17:19 +0300 | [diff] [blame] | 132 | { |
| 133 | *legacy_base = mmap_legacy_base(random_factor, task_size); |
| 134 | if (mmap_is_legacy()) |
| 135 | *base = *legacy_base; |
| 136 | else |
Kees Cook | 8f2af15 | 2018-04-10 16:34:53 -0700 | [diff] [blame] | 137 | *base = mmap_base(random_factor, task_size, rlim_stack); |
Dmitry Safonov | 1b028f7 | 2017-03-06 17:17:19 +0300 | [diff] [blame] | 138 | } |
| 139 | |
Kees Cook | 8f2af15 | 2018-04-10 16:34:53 -0700 | [diff] [blame] | 140 | void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) |
Jiri Kosina | cc503c1 | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 141 | { |
Dmitry Safonov | 1b028f7 | 2017-03-06 17:17:19 +0300 | [diff] [blame] | 142 | if (mmap_is_legacy()) |
Jiri Kosina | cc503c1 | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 143 | mm->get_unmapped_area = arch_get_unmapped_area; |
Dmitry Safonov | 1b028f7 | 2017-03-06 17:17:19 +0300 | [diff] [blame] | 144 | else |
Jiri Kosina | cc503c1 | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 145 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
Dmitry Safonov | 1b028f7 | 2017-03-06 17:17:19 +0300 | [diff] [blame] | 146 | |
| 147 | arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base, |
Kees Cook | 8f2af15 | 2018-04-10 16:34:53 -0700 | [diff] [blame] | 148 | arch_rnd(mmap64_rnd_bits), task_size_64bit(0), |
| 149 | rlim_stack); |
Dmitry Safonov | 1b028f7 | 2017-03-06 17:17:19 +0300 | [diff] [blame] | 150 | |
| 151 | #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES |
| 152 | /* |
| 153 | * The mmap syscall mapping base decision depends solely on the |
| 154 | * syscall type (64-bit or compat). This applies for 64bit |
| 155 | * applications and 32bit applications. The 64bit syscall uses |
| 156 | * mmap_base, the compat syscall uses mmap_compat_base. |
| 157 | */ |
| 158 | arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base, |
Kees Cook | 8f2af15 | 2018-04-10 16:34:53 -0700 | [diff] [blame] | 159 | arch_rnd(mmap32_rnd_bits), task_size_32bit(), |
| 160 | rlim_stack); |
Dmitry Safonov | 1b028f7 | 2017-03-06 17:17:19 +0300 | [diff] [blame] | 161 | #endif |
Jiri Kosina | cc503c1 | 2008-01-30 13:31:07 +0100 | [diff] [blame] | 162 | } |
Kirill A. Shutemov | a8965276 | 2015-07-20 14:29:58 -0700 | [diff] [blame] | 163 | |
Dmitry Safonov | e13b73d | 2017-03-14 14:41:26 +0300 | [diff] [blame] | 164 | unsigned long get_mmap_base(int is_legacy) |
| 165 | { |
| 166 | struct mm_struct *mm = current->mm; |
| 167 | |
| 168 | #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES |
| 169 | if (in_compat_syscall()) { |
| 170 | return is_legacy ? mm->mmap_compat_legacy_base |
| 171 | : mm->mmap_compat_base; |
| 172 | } |
| 173 | #endif |
| 174 | return is_legacy ? mm->mmap_legacy_base : mm->mmap_base; |
| 175 | } |
| 176 | |
Kirill A. Shutemov | a8965276 | 2015-07-20 14:29:58 -0700 | [diff] [blame] | 177 | const char *arch_vma_name(struct vm_area_struct *vma) |
| 178 | { |
| 179 | if (vma->vm_flags & VM_MPX) |
| 180 | return "[mpx]"; |
| 181 | return NULL; |
| 182 | } |
Kirill A. Shutemov | 1e0f25d | 2017-11-15 17:36:06 +0300 | [diff] [blame] | 183 | |
| 184 | /** |
| 185 | * mmap_address_hint_valid - Validate the address hint of mmap |
| 186 | * @addr: Address hint |
| 187 | * @len: Mapping length |
| 188 | * |
| 189 | * Check whether @addr and @addr + @len result in a valid mapping. |
| 190 | * |
| 191 | * On 32bit this only checks whether @addr + @len is <= TASK_SIZE. |
| 192 | * |
| 193 | * On 64bit with 5-level page tables another sanity check is required |
| 194 | * because mappings requested by mmap(@addr, 0) which cross the 47-bit |
| 195 | * virtual address boundary can cause the following theoretical issue: |
| 196 | * |
| 197 | * An application calls mmap(addr, 0), i.e. without MAP_FIXED, where @addr |
| 198 | * is below the border of the 47-bit address space and @addr + @len is |
| 199 | * above the border. |
| 200 | * |
| 201 | * With 4-level paging this request succeeds, but the resulting mapping |
| 202 | * address will always be within the 47-bit virtual address space, because |
| 203 | * the hint address does not result in a valid mapping and is |
| 204 | * ignored. Hence applications which are not prepared to handle virtual |
| 205 | * addresses above 47-bit work correctly. |
| 206 | * |
| 207 | * With 5-level paging this request would be granted and result in a |
| 208 | * mapping which crosses the border of the 47-bit virtual address |
| 209 | * space. If the application cannot handle addresses above 47-bit this |
| 210 | * will lead to misbehaviour and hard to diagnose failures. |
| 211 | * |
| 212 | * Therefore ignore address hints which would result in a mapping crossing |
| 213 | * the 47-bit virtual address boundary. |
| 214 | * |
| 215 | * Note, that in the same scenario with MAP_FIXED the behaviour is |
| 216 | * different. The request with @addr < 47-bit and @addr + @len > 47-bit |
| 217 | * fails on a 4-level paging machine but succeeds on a 5-level paging |
| 218 | * machine. It is reasonable to expect that an application does not rely on |
| 219 | * the failure of such a fixed mapping request, so the restriction is not |
| 220 | * applied. |
| 221 | */ |
| 222 | bool mmap_address_hint_valid(unsigned long addr, unsigned long len) |
| 223 | { |
| 224 | if (TASK_SIZE - len < addr) |
| 225 | return false; |
| 226 | |
| 227 | return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW); |
| 228 | } |
Craig Bergstrom | be62a32 | 2017-11-15 15:29:51 -0700 | [diff] [blame] | 229 | |
| 230 | /* Can we access it for direct reading/writing? Must be RAM: */ |
| 231 | int valid_phys_addr_range(phys_addr_t addr, size_t count) |
| 232 | { |
| 233 | return addr + count <= __pa(high_memory); |
| 234 | } |
| 235 | |
| 236 | /* Can we access it through mmap? Must be a valid physical address: */ |
| 237 | int valid_mmap_phys_addr_range(unsigned long pfn, size_t count) |
| 238 | { |
| 239 | phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT; |
| 240 | |
| 241 | return phys_addr_valid(addr + count - 1); |
| 242 | } |
Andi Kleen | 42e4089 | 2018-06-13 15:48:27 -0700 | [diff] [blame] | 243 | |
| 244 | /* |
| 245 | * Only allow root to set high MMIO mappings to PROT_NONE. |
| 246 | * This prevents an unpriv. user to set them to PROT_NONE and invert |
| 247 | * them, then pointing to valid memory for L1TF speculation. |
| 248 | * |
| 249 | * Note: for locked down kernels may want to disable the root override. |
| 250 | */ |
| 251 | bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) |
| 252 | { |
| 253 | if (!boot_cpu_has_bug(X86_BUG_L1TF)) |
| 254 | return true; |
| 255 | if (!__pte_needs_invert(pgprot_val(prot))) |
| 256 | return true; |
| 257 | /* If it's real memory always allow */ |
| 258 | if (pfn_valid(pfn)) |
| 259 | return true; |
Vlastimil Babka | b0a182f | 2018-08-23 15:44:18 +0200 | [diff] [blame] | 260 | if (pfn >= l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN)) |
Andi Kleen | 42e4089 | 2018-06-13 15:48:27 -0700 | [diff] [blame] | 261 | return false; |
| 262 | return true; |
| 263 | } |