Thomas Gleixner | caab277 | 2019-06-03 07:44:50 +0200 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 2 | /* |
| 3 | * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. |
| 4 | * <benh@kernel.crashing.org> |
| 5 | * Copyright (C) 2012 ARM Limited |
| 6 | * Copyright (C) 2015 Regents of the University of California |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 7 | */ |
| 8 | |
Paul Walmsley | 5ed881b | 2019-10-17 15:21:28 -0700 | [diff] [blame] | 9 | #include <linux/elf.h> |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 10 | #include <linux/mm.h> |
| 11 | #include <linux/slab.h> |
| 12 | #include <linux/binfmts.h> |
| 13 | #include <linux/err.h> |
Vincent Chen | ad5d112 | 2020-06-09 22:14:48 +0800 | [diff] [blame] | 14 | #include <asm/page.h> |
Tong Tiangen | bb4a23c | 2021-09-01 02:46:19 +0000 | [diff] [blame] | 15 | #include <asm/vdso.h> |
Tong Tiangen | dffe11e | 2021-09-01 03:20:25 +0000 | [diff] [blame] | 16 | #include <linux/time_namespace.h> |
Tong Tiangen | bb4a23c | 2021-09-01 02:46:19 +0000 | [diff] [blame] | 17 | |
Guo Ren | 0aa2ec8 | 2021-01-02 13:24:34 +0000 | [diff] [blame] | 18 | #ifdef CONFIG_GENERIC_TIME_VSYSCALL |
Vincent Chen | ad5d112 | 2020-06-09 22:14:48 +0800 | [diff] [blame] | 19 | #include <vdso/datapage.h> |
| 20 | #else |
Tong Tiangen | bb4a23c | 2021-09-01 02:46:19 +0000 | [diff] [blame] | 21 | struct vdso_data { |
| 22 | }; |
Vincent Chen | ad5d112 | 2020-06-09 22:14:48 +0800 | [diff] [blame] | 23 | #endif |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 24 | |
| 25 | extern char vdso_start[], vdso_end[]; |
| 26 | |
Tong Tiangen | 78a743c | 2021-09-01 02:46:20 +0000 | [diff] [blame] | 27 | enum vvar_pages { |
| 28 | VVAR_DATA_PAGE_OFFSET, |
Tong Tiangen | dffe11e | 2021-09-01 03:20:25 +0000 | [diff] [blame] | 29 | VVAR_TIMENS_PAGE_OFFSET, |
Tong Tiangen | 78a743c | 2021-09-01 02:46:20 +0000 | [diff] [blame] | 30 | VVAR_NR_PAGES, |
| 31 | }; |
| 32 | |
| 33 | #define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) |
| 34 | |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 35 | /* |
| 36 | * The vDSO data page. |
| 37 | */ |
| 38 | static union { |
| 39 | struct vdso_data data; |
| 40 | u8 page[PAGE_SIZE]; |
| 41 | } vdso_data_store __page_aligned_data; |
Vincent Chen | ad5d112 | 2020-06-09 22:14:48 +0800 | [diff] [blame] | 42 | struct vdso_data *vdso_data = &vdso_data_store.data; |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 43 | |
Tong Tiangen | dffe11e | 2021-09-01 03:20:25 +0000 | [diff] [blame] | 44 | struct __vdso_info { |
| 45 | const char *name; |
| 46 | const char *vdso_code_start; |
| 47 | const char *vdso_code_end; |
| 48 | unsigned long vdso_pages; |
| 49 | /* Data Mapping */ |
| 50 | struct vm_special_mapping *dm; |
| 51 | /* Code Mapping */ |
| 52 | struct vm_special_mapping *cm; |
| 53 | }; |
| 54 | |
| 55 | static struct __vdso_info vdso_info __ro_after_init = { |
| 56 | .name = "vdso", |
| 57 | .vdso_code_start = vdso_start, |
| 58 | .vdso_code_end = vdso_end, |
| 59 | }; |
| 60 | |
| 61 | static int vdso_mremap(const struct vm_special_mapping *sm, |
| 62 | struct vm_area_struct *new_vma) |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 63 | { |
Tong Tiangen | dffe11e | 2021-09-01 03:20:25 +0000 | [diff] [blame] | 64 | current->mm->context.vdso = (void *)new_vma->vm_start; |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 65 | |
| 66 | return 0; |
| 67 | } |
Tong Tiangen | dffe11e | 2021-09-01 03:20:25 +0000 | [diff] [blame] | 68 | |
| 69 | static int __init __vdso_init(void) |
| 70 | { |
| 71 | unsigned int i; |
| 72 | struct page **vdso_pagelist; |
| 73 | unsigned long pfn; |
| 74 | |
| 75 | if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) { |
| 76 | pr_err("vDSO is not a valid ELF object!\n"); |
| 77 | return -EINVAL; |
| 78 | } |
| 79 | |
| 80 | vdso_info.vdso_pages = ( |
| 81 | vdso_info.vdso_code_end - |
| 82 | vdso_info.vdso_code_start) >> |
| 83 | PAGE_SHIFT; |
| 84 | |
| 85 | vdso_pagelist = kcalloc(vdso_info.vdso_pages, |
| 86 | sizeof(struct page *), |
| 87 | GFP_KERNEL); |
| 88 | if (vdso_pagelist == NULL) |
| 89 | return -ENOMEM; |
| 90 | |
| 91 | /* Grab the vDSO code pages. */ |
| 92 | pfn = sym_to_pfn(vdso_info.vdso_code_start); |
| 93 | |
| 94 | for (i = 0; i < vdso_info.vdso_pages; i++) |
| 95 | vdso_pagelist[i] = pfn_to_page(pfn + i); |
| 96 | |
| 97 | vdso_info.cm->pages = vdso_pagelist; |
| 98 | |
| 99 | return 0; |
| 100 | } |
| 101 | |
| 102 | #ifdef CONFIG_TIME_NS |
| 103 | struct vdso_data *arch_get_vdso_data(void *vvar_page) |
| 104 | { |
| 105 | return (struct vdso_data *)(vvar_page); |
| 106 | } |
| 107 | |
| 108 | /* |
| 109 | * The vvar mapping contains data for a specific time namespace, so when a task |
| 110 | * changes namespace we must unmap its vvar data for the old namespace. |
| 111 | * Subsequent faults will map in data for the new namespace. |
| 112 | * |
| 113 | * For more details see timens_setup_vdso_data(). |
| 114 | */ |
| 115 | int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) |
| 116 | { |
| 117 | struct mm_struct *mm = task->mm; |
| 118 | struct vm_area_struct *vma; |
| 119 | |
| 120 | mmap_read_lock(mm); |
| 121 | |
| 122 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
| 123 | unsigned long size = vma->vm_end - vma->vm_start; |
| 124 | |
| 125 | if (vma_is_special_mapping(vma, vdso_info.dm)) |
| 126 | zap_page_range(vma, vma->vm_start, size); |
| 127 | } |
| 128 | |
| 129 | mmap_read_unlock(mm); |
| 130 | return 0; |
| 131 | } |
| 132 | |
| 133 | static struct page *find_timens_vvar_page(struct vm_area_struct *vma) |
| 134 | { |
| 135 | if (likely(vma->vm_mm == current->mm)) |
| 136 | return current->nsproxy->time_ns->vvar_page; |
| 137 | |
| 138 | /* |
| 139 | * VM_PFNMAP | VM_IO protect .fault() handler from being called |
| 140 | * through interfaces like /proc/$pid/mem or |
| 141 | * process_vm_{readv,writev}() as long as there's no .access() |
| 142 | * in special_mapping_vmops. |
| 143 | * For more details check_vma_flags() and __access_remote_vm() |
| 144 | */ |
| 145 | WARN(1, "vvar_page accessed remotely"); |
| 146 | |
| 147 | return NULL; |
| 148 | } |
| 149 | #else |
| 150 | static struct page *find_timens_vvar_page(struct vm_area_struct *vma) |
| 151 | { |
| 152 | return NULL; |
| 153 | } |
| 154 | #endif |
| 155 | |
| 156 | static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, |
| 157 | struct vm_area_struct *vma, struct vm_fault *vmf) |
| 158 | { |
| 159 | struct page *timens_page = find_timens_vvar_page(vma); |
| 160 | unsigned long pfn; |
| 161 | |
| 162 | switch (vmf->pgoff) { |
| 163 | case VVAR_DATA_PAGE_OFFSET: |
| 164 | if (timens_page) |
| 165 | pfn = page_to_pfn(timens_page); |
| 166 | else |
| 167 | pfn = sym_to_pfn(vdso_data); |
| 168 | break; |
| 169 | #ifdef CONFIG_TIME_NS |
| 170 | case VVAR_TIMENS_PAGE_OFFSET: |
| 171 | /* |
| 172 | * If a task belongs to a time namespace then a namespace |
| 173 | * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and |
| 174 | * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET |
| 175 | * offset. |
| 176 | * See also the comment near timens_setup_vdso_data(). |
| 177 | */ |
| 178 | if (!timens_page) |
| 179 | return VM_FAULT_SIGBUS; |
| 180 | pfn = sym_to_pfn(vdso_data); |
| 181 | break; |
| 182 | #endif /* CONFIG_TIME_NS */ |
| 183 | default: |
| 184 | return VM_FAULT_SIGBUS; |
| 185 | } |
| 186 | |
| 187 | return vmf_insert_pfn(vma, vmf->address, pfn); |
| 188 | } |
| 189 | |
| 190 | enum rv_vdso_map { |
| 191 | RV_VDSO_MAP_VVAR, |
| 192 | RV_VDSO_MAP_VDSO, |
| 193 | }; |
| 194 | |
| 195 | static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = { |
| 196 | [RV_VDSO_MAP_VVAR] = { |
| 197 | .name = "[vvar]", |
| 198 | .fault = vvar_fault, |
| 199 | }, |
| 200 | [RV_VDSO_MAP_VDSO] = { |
| 201 | .name = "[vdso]", |
| 202 | .mremap = vdso_mremap, |
| 203 | }, |
| 204 | }; |
| 205 | |
| 206 | static int __init vdso_init(void) |
| 207 | { |
| 208 | vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR]; |
| 209 | vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO]; |
| 210 | |
| 211 | return __vdso_init(); |
| 212 | } |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 213 | arch_initcall(vdso_init); |
| 214 | |
Tong Tiangen | dffe11e | 2021-09-01 03:20:25 +0000 | [diff] [blame] | 215 | static int __setup_additional_pages(struct mm_struct *mm, |
| 216 | struct linux_binprm *bprm, |
| 217 | int uses_interp) |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 218 | { |
Tong Tiangen | dffe11e | 2021-09-01 03:20:25 +0000 | [diff] [blame] | 219 | unsigned long vdso_base, vdso_text_len, vdso_mapping_len; |
| 220 | void *ret; |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 221 | |
Tong Tiangen | 78a743c | 2021-09-01 02:46:20 +0000 | [diff] [blame] | 222 | BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); |
| 223 | |
Tong Tiangen | dffe11e | 2021-09-01 03:20:25 +0000 | [diff] [blame] | 224 | vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT; |
| 225 | /* Be sure to map the data page */ |
| 226 | vdso_mapping_len = vdso_text_len + VVAR_SIZE; |
| 227 | |
| 228 | vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); |
| 229 | if (IS_ERR_VALUE(vdso_base)) { |
| 230 | ret = ERR_PTR(vdso_base); |
| 231 | goto up_fail; |
| 232 | } |
| 233 | |
| 234 | ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE, |
| 235 | (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm); |
| 236 | if (IS_ERR(ret)) |
| 237 | goto up_fail; |
| 238 | |
| 239 | vdso_base += VVAR_SIZE; |
| 240 | mm->context.vdso = (void *)vdso_base; |
| 241 | ret = |
| 242 | _install_special_mapping(mm, vdso_base, vdso_text_len, |
| 243 | (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), |
| 244 | vdso_info.cm); |
| 245 | |
| 246 | if (IS_ERR(ret)) |
| 247 | goto up_fail; |
| 248 | |
| 249 | return 0; |
| 250 | |
| 251 | up_fail: |
| 252 | mm->context.vdso = NULL; |
| 253 | return PTR_ERR(ret); |
| 254 | } |
| 255 | |
| 256 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
| 257 | { |
| 258 | struct mm_struct *mm = current->mm; |
| 259 | int ret; |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 260 | |
Tong Tiangen | 8bb0ab3 | 2021-09-01 02:46:21 +0000 | [diff] [blame] | 261 | if (mmap_write_lock_killable(mm)) |
| 262 | return -EINTR; |
| 263 | |
Tong Tiangen | dffe11e | 2021-09-01 03:20:25 +0000 | [diff] [blame] | 264 | ret = __setup_additional_pages(mm, bprm, uses_interp); |
Michel Lespinasse | d8ed45c | 2020-06-08 21:33:25 -0700 | [diff] [blame] | 265 | mmap_write_unlock(mm); |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 266 | |
Tong Tiangen | dffe11e | 2021-09-01 03:20:25 +0000 | [diff] [blame] | 267 | return ret; |
Palmer Dabbelt | 76d2a04 | 2017-07-10 18:00:26 -0700 | [diff] [blame] | 268 | } |