Thomas Gleixner | 1802d0b | 2019-05-27 08:55:21 +0200 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
Steve Capper | 084bd29 | 2013-04-10 13:48:00 +0100 | [diff] [blame] | 2 | /* |
| 3 | * arch/arm64/mm/hugetlbpage.c |
| 4 | * |
| 5 | * Copyright (C) 2013 Linaro Ltd. |
| 6 | * |
| 7 | * Based on arch/x86/mm/hugetlbpage.c. |
Steve Capper | 084bd29 | 2013-04-10 13:48:00 +0100 | [diff] [blame] | 8 | */ |
| 9 | |
| 10 | #include <linux/init.h> |
| 11 | #include <linux/fs.h> |
| 12 | #include <linux/mm.h> |
| 13 | #include <linux/hugetlb.h> |
| 14 | #include <linux/pagemap.h> |
| 15 | #include <linux/err.h> |
| 16 | #include <linux/sysctl.h> |
| 17 | #include <asm/mman.h> |
| 18 | #include <asm/tlb.h> |
| 19 | #include <asm/tlbflush.h> |
Steve Capper | 084bd29 | 2013-04-10 13:48:00 +0100 | [diff] [blame] | 20 | |
Anshuman Khandual | abb7962 | 2020-07-01 10:12:01 +0530 | [diff] [blame] | 21 | /* |
| 22 | * HugeTLB Support Matrix |
| 23 | * |
| 24 | * --------------------------------------------------- |
| 25 | * | Page Size | CONT PTE | PMD | CONT PMD | PUD | |
| 26 | * --------------------------------------------------- |
| 27 | * | 4K | 64K | 2M | 32M | 1G | |
| 28 | * | 16K | 2M | 32M | 1G | | |
| 29 | * | 64K | 2M | 512M | 16G | | |
| 30 | * --------------------------------------------------- |
| 31 | */ |
| 32 | |
| 33 | /* |
| 34 | * Reserve CMA areas for the largest supported gigantic |
| 35 | * huge page when requested. Any other smaller gigantic |
| 36 | * huge pages could still be served from those areas. |
| 37 | */ |
| 38 | #ifdef CONFIG_CMA |
| 39 | void __init arm64_hugetlb_cma_reserve(void) |
| 40 | { |
| 41 | int order; |
| 42 | |
| 43 | #ifdef CONFIG_ARM64_4K_PAGES |
| 44 | order = PUD_SHIFT - PAGE_SHIFT; |
| 45 | #else |
| 46 | order = CONT_PMD_SHIFT + PMD_SHIFT - PAGE_SHIFT; |
| 47 | #endif |
| 48 | /* |
| 49 | * HugeTLB CMA reservation is required for gigantic |
| 50 | * huge pages which could not be allocated via the |
| 51 | * page allocator. Just warn if there is any change |
| 52 | * breaking this assumption. |
| 53 | */ |
| 54 | WARN_ON(order <= MAX_ORDER); |
| 55 | hugetlb_cma_reserve(order); |
| 56 | } |
| 57 | #endif /* CONFIG_CMA */ |
| 58 | |
Anshuman Khandual | 5480280 | 2019-03-05 15:43:58 -0800 | [diff] [blame] | 59 | #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION |
| 60 | bool arch_hugetlb_migration_supported(struct hstate *h) |
| 61 | { |
| 62 | size_t pagesize = huge_page_size(h); |
| 63 | |
| 64 | switch (pagesize) { |
| 65 | #ifdef CONFIG_ARM64_4K_PAGES |
| 66 | case PUD_SIZE: |
| 67 | #endif |
| 68 | case PMD_SIZE: |
| 69 | case CONT_PMD_SIZE: |
| 70 | case CONT_PTE_SIZE: |
| 71 | return true; |
| 72 | } |
| 73 | pr_warn("%s: unrecognized huge page size 0x%lx\n", |
| 74 | __func__, pagesize); |
| 75 | return false; |
| 76 | } |
| 77 | #endif |
| 78 | |
Steve Capper | 084bd29 | 2013-04-10 13:48:00 +0100 | [diff] [blame] | 79 | int pmd_huge(pmd_t pmd) |
| 80 | { |
Christoffer Dall | fd28f5d | 2015-07-01 14:08:31 +0200 | [diff] [blame] | 81 | return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); |
Steve Capper | 084bd29 | 2013-04-10 13:48:00 +0100 | [diff] [blame] | 82 | } |
| 83 | |
| 84 | int pud_huge(pud_t pud) |
| 85 | { |
Mark Salter | 4797ec2 | 2014-05-15 15:19:22 +0100 | [diff] [blame] | 86 | #ifndef __PAGETABLE_PMD_FOLDED |
Christoffer Dall | fd28f5d | 2015-07-01 14:08:31 +0200 | [diff] [blame] | 87 | return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT); |
Mark Salter | 4797ec2 | 2014-05-15 15:19:22 +0100 | [diff] [blame] | 88 | #else |
| 89 | return 0; |
| 90 | #endif |
Steve Capper | 084bd29 | 2013-04-10 13:48:00 +0100 | [diff] [blame] | 91 | } |
| 92 | |
Steve Capper | b5b0be8 | 2017-08-22 11:42:42 +0100 | [diff] [blame] | 93 | /* |
| 94 | * Select all bits except the pfn |
| 95 | */ |
| 96 | static inline pgprot_t pte_pgprot(pte_t pte) |
| 97 | { |
| 98 | unsigned long pfn = pte_pfn(pte); |
| 99 | |
| 100 | return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); |
| 101 | } |
| 102 | |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 103 | static int find_num_contig(struct mm_struct *mm, unsigned long addr, |
Steve Capper | bb9dd3d | 2017-07-06 15:39:29 -0700 | [diff] [blame] | 104 | pte_t *ptep, size_t *pgsize) |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 105 | { |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 106 | pgd_t *pgdp = pgd_offset(mm, addr); |
Mike Rapoport | e9f6376 | 2020-06-04 16:46:23 -0700 | [diff] [blame] | 107 | p4d_t *p4dp; |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 108 | pud_t *pudp; |
| 109 | pmd_t *pmdp; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 110 | |
| 111 | *pgsize = PAGE_SIZE; |
Mike Rapoport | e9f6376 | 2020-06-04 16:46:23 -0700 | [diff] [blame] | 112 | p4dp = p4d_offset(pgdp, addr); |
| 113 | pudp = pud_offset(p4dp, addr); |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 114 | pmdp = pmd_offset(pudp, addr); |
| 115 | if ((pte_t *)pmdp == ptep) { |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 116 | *pgsize = PMD_SIZE; |
| 117 | return CONT_PMDS; |
| 118 | } |
| 119 | return CONT_PTES; |
| 120 | } |
| 121 | |
Punit Agrawal | c3e4ed5 | 2017-08-22 11:42:46 +0100 | [diff] [blame] | 122 | static inline int num_contig_ptes(unsigned long size, size_t *pgsize) |
| 123 | { |
| 124 | int contig_ptes = 0; |
| 125 | |
| 126 | *pgsize = size; |
| 127 | |
| 128 | switch (size) { |
| 129 | #ifdef CONFIG_ARM64_4K_PAGES |
| 130 | case PUD_SIZE: |
| 131 | #endif |
| 132 | case PMD_SIZE: |
| 133 | contig_ptes = 1; |
| 134 | break; |
| 135 | case CONT_PMD_SIZE: |
| 136 | *pgsize = PMD_SIZE; |
| 137 | contig_ptes = CONT_PMDS; |
| 138 | break; |
| 139 | case CONT_PTE_SIZE: |
| 140 | *pgsize = PAGE_SIZE; |
| 141 | contig_ptes = CONT_PTES; |
| 142 | break; |
| 143 | } |
| 144 | |
| 145 | return contig_ptes; |
| 146 | } |
| 147 | |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 148 | /* |
| 149 | * Changing some bits of contiguous entries requires us to follow a |
| 150 | * Break-Before-Make approach, breaking the whole contiguous set |
| 151 | * before we can change any entries. See ARM DDI 0487A.k_iss10775, |
| 152 | * "Misprogramming of the Contiguous bit", page D4-1762. |
| 153 | * |
| 154 | * This helper performs the break step. |
| 155 | */ |
| 156 | static pte_t get_clear_flush(struct mm_struct *mm, |
| 157 | unsigned long addr, |
| 158 | pte_t *ptep, |
| 159 | unsigned long pgsize, |
| 160 | unsigned long ncontig) |
| 161 | { |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 162 | pte_t orig_pte = huge_ptep_get(ptep); |
| 163 | bool valid = pte_valid(orig_pte); |
| 164 | unsigned long i, saddr = addr; |
| 165 | |
| 166 | for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { |
| 167 | pte_t pte = ptep_get_and_clear(mm, addr, ptep); |
| 168 | |
| 169 | /* |
| 170 | * If HW_AFDBM is enabled, then the HW could turn on |
Steve Capper | 469ed9d | 2018-09-21 16:34:04 +0100 | [diff] [blame] | 171 | * the dirty or accessed bit for any page in the set, |
| 172 | * so check them all. |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 173 | */ |
| 174 | if (pte_dirty(pte)) |
| 175 | orig_pte = pte_mkdirty(orig_pte); |
Steve Capper | 469ed9d | 2018-09-21 16:34:04 +0100 | [diff] [blame] | 176 | |
| 177 | if (pte_young(pte)) |
| 178 | orig_pte = pte_mkyoung(orig_pte); |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 179 | } |
| 180 | |
Linus Torvalds | 8b11ec1 | 2018-08-01 13:43:38 -0700 | [diff] [blame] | 181 | if (valid) { |
| 182 | struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 183 | flush_tlb_range(&vma, saddr, addr); |
Linus Torvalds | 8b11ec1 | 2018-08-01 13:43:38 -0700 | [diff] [blame] | 184 | } |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 185 | return orig_pte; |
| 186 | } |
| 187 | |
| 188 | /* |
| 189 | * Changing some bits of contiguous entries requires us to follow a |
| 190 | * Break-Before-Make approach, breaking the whole contiguous set |
| 191 | * before we can change any entries. See ARM DDI 0487A.k_iss10775, |
| 192 | * "Misprogramming of the Contiguous bit", page D4-1762. |
| 193 | * |
| 194 | * This helper performs the break step for use cases where the |
| 195 | * original pte is not needed. |
| 196 | */ |
| 197 | static void clear_flush(struct mm_struct *mm, |
| 198 | unsigned long addr, |
| 199 | pte_t *ptep, |
| 200 | unsigned long pgsize, |
| 201 | unsigned long ncontig) |
| 202 | { |
Linus Torvalds | 8b11ec1 | 2018-08-01 13:43:38 -0700 | [diff] [blame] | 203 | struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 204 | unsigned long i, saddr = addr; |
| 205 | |
| 206 | for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) |
| 207 | pte_clear(mm, addr, ptep); |
| 208 | |
| 209 | flush_tlb_range(&vma, saddr, addr); |
| 210 | } |
| 211 | |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 212 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
| 213 | pte_t *ptep, pte_t pte) |
| 214 | { |
| 215 | size_t pgsize; |
| 216 | int i; |
Steve Capper | bb9dd3d | 2017-07-06 15:39:29 -0700 | [diff] [blame] | 217 | int ncontig; |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 218 | unsigned long pfn, dpfn; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 219 | pgprot_t hugeprot; |
| 220 | |
Steve Capper | d3ea795 | 2017-08-22 11:42:41 +0100 | [diff] [blame] | 221 | /* |
| 222 | * Code needs to be expanded to handle huge swap and migration |
| 223 | * entries. Needed for HUGETLB and MEMORY_FAILURE. |
| 224 | */ |
| 225 | WARN_ON(!pte_present(pte)); |
| 226 | |
Steve Capper | bb9dd3d | 2017-07-06 15:39:29 -0700 | [diff] [blame] | 227 | if (!pte_cont(pte)) { |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 228 | set_pte_at(mm, addr, ptep, pte); |
| 229 | return; |
| 230 | } |
| 231 | |
Steve Capper | bb9dd3d | 2017-07-06 15:39:29 -0700 | [diff] [blame] | 232 | ncontig = find_num_contig(mm, addr, ptep, &pgsize); |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 233 | pfn = pte_pfn(pte); |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 234 | dpfn = pgsize >> PAGE_SHIFT; |
Steve Capper | b5b0be8 | 2017-08-22 11:42:42 +0100 | [diff] [blame] | 235 | hugeprot = pte_pgprot(pte); |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 236 | |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 237 | clear_flush(mm, addr, ptep, pgsize, ncontig); |
| 238 | |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 239 | for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 240 | set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 241 | } |
| 242 | |
Punit Agrawal | a8d623e | 2017-08-22 11:42:47 +0100 | [diff] [blame] | 243 | void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, |
| 244 | pte_t *ptep, pte_t pte, unsigned long sz) |
| 245 | { |
| 246 | int i, ncontig; |
| 247 | size_t pgsize; |
| 248 | |
| 249 | ncontig = num_contig_ptes(sz, &pgsize); |
| 250 | |
| 251 | for (i = 0; i < ncontig; i++, ptep++) |
| 252 | set_pte(ptep, pte); |
| 253 | } |
| 254 | |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 255 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
| 256 | unsigned long addr, unsigned long sz) |
| 257 | { |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 258 | pgd_t *pgdp; |
Mike Rapoport | e9f6376 | 2020-06-04 16:46:23 -0700 | [diff] [blame] | 259 | p4d_t *p4dp; |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 260 | pud_t *pudp; |
| 261 | pmd_t *pmdp; |
| 262 | pte_t *ptep = NULL; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 263 | |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 264 | pgdp = pgd_offset(mm, addr); |
Mike Rapoport | e9f6376 | 2020-06-04 16:46:23 -0700 | [diff] [blame] | 265 | p4dp = p4d_offset(pgdp, addr); |
| 266 | pudp = pud_alloc(mm, p4dp, addr); |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 267 | if (!pudp) |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 268 | return NULL; |
| 269 | |
| 270 | if (sz == PUD_SIZE) { |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 271 | ptep = (pte_t *)pudp; |
Anshuman Khandual | 441a627 | 2019-05-21 09:05:03 +0530 | [diff] [blame] | 272 | } else if (sz == (CONT_PTE_SIZE)) { |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 273 | pmdp = pmd_alloc(mm, pudp, addr); |
Mark Rutland | 027d0c7 | 2020-05-05 13:59:30 +0100 | [diff] [blame] | 274 | if (!pmdp) |
| 275 | return NULL; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 276 | |
| 277 | WARN_ON(addr & (sz - 1)); |
| 278 | /* |
| 279 | * Note that if this code were ever ported to the |
| 280 | * 32-bit arm platform then it will cause trouble in |
| 281 | * the case where CONFIG_HIGHPTE is set, since there |
| 282 | * will be no pte_unmap() to correspond with this |
| 283 | * pte_alloc_map(). |
| 284 | */ |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 285 | ptep = pte_alloc_map(mm, pmdp, addr); |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 286 | } else if (sz == PMD_SIZE) { |
| 287 | if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 288 | pud_none(READ_ONCE(*pudp))) |
| 289 | ptep = huge_pmd_share(mm, addr, pudp); |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 290 | else |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 291 | ptep = (pte_t *)pmd_alloc(mm, pudp, addr); |
Anshuman Khandual | 441a627 | 2019-05-21 09:05:03 +0530 | [diff] [blame] | 292 | } else if (sz == (CONT_PMD_SIZE)) { |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 293 | pmdp = pmd_alloc(mm, pudp, addr); |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 294 | WARN_ON(addr & (sz - 1)); |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 295 | return (pte_t *)pmdp; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 296 | } |
| 297 | |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 298 | return ptep; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 299 | } |
| 300 | |
Punit Agrawal | 7868a20 | 2017-07-06 15:39:42 -0700 | [diff] [blame] | 301 | pte_t *huge_pte_offset(struct mm_struct *mm, |
| 302 | unsigned long addr, unsigned long sz) |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 303 | { |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 304 | pgd_t *pgdp; |
Mike Rapoport | e9f6376 | 2020-06-04 16:46:23 -0700 | [diff] [blame] | 305 | p4d_t *p4dp; |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 306 | pud_t *pudp, pud; |
| 307 | pmd_t *pmdp, pmd; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 308 | |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 309 | pgdp = pgd_offset(mm, addr); |
| 310 | if (!pgd_present(READ_ONCE(*pgdp))) |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 311 | return NULL; |
Punit Agrawal | f02ab08 | 2017-06-08 18:25:26 +0100 | [diff] [blame] | 312 | |
Mike Rapoport | e9f6376 | 2020-06-04 16:46:23 -0700 | [diff] [blame] | 313 | p4dp = p4d_offset(pgdp, addr); |
| 314 | if (!p4d_present(READ_ONCE(*p4dp))) |
| 315 | return NULL; |
| 316 | |
| 317 | pudp = pud_offset(p4dp, addr); |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 318 | pud = READ_ONCE(*pudp); |
| 319 | if (sz != PUD_SIZE && pud_none(pud)) |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 320 | return NULL; |
Punit Agrawal | 30f3ac0 | 2017-08-22 11:42:45 +0100 | [diff] [blame] | 321 | /* hugepage or swap? */ |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 322 | if (pud_huge(pud) || !pud_present(pud)) |
| 323 | return (pte_t *)pudp; |
Punit Agrawal | f02ab08 | 2017-06-08 18:25:26 +0100 | [diff] [blame] | 324 | /* table; check the next level */ |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 325 | |
Punit Agrawal | 30f3ac0 | 2017-08-22 11:42:45 +0100 | [diff] [blame] | 326 | if (sz == CONT_PMD_SIZE) |
| 327 | addr &= CONT_PMD_MASK; |
| 328 | |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 329 | pmdp = pmd_offset(pudp, addr); |
| 330 | pmd = READ_ONCE(*pmdp); |
Punit Agrawal | 30f3ac0 | 2017-08-22 11:42:45 +0100 | [diff] [blame] | 331 | if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) && |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 332 | pmd_none(pmd)) |
Punit Agrawal | f02ab08 | 2017-06-08 18:25:26 +0100 | [diff] [blame] | 333 | return NULL; |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 334 | if (pmd_huge(pmd) || !pmd_present(pmd)) |
| 335 | return (pte_t *)pmdp; |
Punit Agrawal | f02ab08 | 2017-06-08 18:25:26 +0100 | [diff] [blame] | 336 | |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 337 | if (sz == CONT_PTE_SIZE) |
| 338 | return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK)); |
Punit Agrawal | 30f3ac0 | 2017-08-22 11:42:45 +0100 | [diff] [blame] | 339 | |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 340 | return NULL; |
| 341 | } |
| 342 | |
| 343 | pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, |
| 344 | struct page *page, int writable) |
| 345 | { |
| 346 | size_t pagesize = huge_page_size(hstate_vma(vma)); |
| 347 | |
| 348 | if (pagesize == CONT_PTE_SIZE) { |
| 349 | entry = pte_mkcont(entry); |
| 350 | } else if (pagesize == CONT_PMD_SIZE) { |
| 351 | entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); |
| 352 | } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) { |
| 353 | pr_warn("%s: unrecognized huge page size 0x%lx\n", |
| 354 | __func__, pagesize); |
| 355 | } |
| 356 | return entry; |
| 357 | } |
| 358 | |
Punit Agrawal | c3e4ed5 | 2017-08-22 11:42:46 +0100 | [diff] [blame] | 359 | void huge_pte_clear(struct mm_struct *mm, unsigned long addr, |
| 360 | pte_t *ptep, unsigned long sz) |
| 361 | { |
| 362 | int i, ncontig; |
| 363 | size_t pgsize; |
| 364 | |
| 365 | ncontig = num_contig_ptes(sz, &pgsize); |
| 366 | |
| 367 | for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) |
| 368 | pte_clear(mm, addr, ptep); |
| 369 | } |
| 370 | |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 371 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, |
| 372 | unsigned long addr, pte_t *ptep) |
| 373 | { |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 374 | int ncontig; |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 375 | size_t pgsize; |
| 376 | pte_t orig_pte = huge_ptep_get(ptep); |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 377 | |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 378 | if (!pte_cont(orig_pte)) |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 379 | return ptep_get_and_clear(mm, addr, ptep); |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 380 | |
| 381 | ncontig = find_num_contig(mm, addr, ptep, &pgsize); |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 382 | |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 383 | return get_clear_flush(mm, addr, ptep, pgsize, ncontig); |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 384 | } |
| 385 | |
Steve Capper | 031e6e6 | 2018-09-21 16:34:05 +0100 | [diff] [blame] | 386 | /* |
| 387 | * huge_ptep_set_access_flags will update access flags (dirty, accesssed) |
| 388 | * and write permission. |
| 389 | * |
| 390 | * For a contiguous huge pte range we need to check whether or not write |
| 391 | * permission has to change only on the first pte in the set. Then for |
| 392 | * all the contiguous ptes we need to check whether or not there is a |
| 393 | * discrepancy between dirty or young. |
| 394 | */ |
| 395 | static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) |
| 396 | { |
| 397 | int i; |
| 398 | |
| 399 | if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) |
| 400 | return 1; |
| 401 | |
| 402 | for (i = 0; i < ncontig; i++) { |
| 403 | pte_t orig_pte = huge_ptep_get(ptep + i); |
| 404 | |
| 405 | if (pte_dirty(pte) != pte_dirty(orig_pte)) |
| 406 | return 1; |
| 407 | |
| 408 | if (pte_young(pte) != pte_young(orig_pte)) |
| 409 | return 1; |
| 410 | } |
| 411 | |
| 412 | return 0; |
| 413 | } |
| 414 | |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 415 | int huge_ptep_set_access_flags(struct vm_area_struct *vma, |
| 416 | unsigned long addr, pte_t *ptep, |
| 417 | pte_t pte, int dirty) |
| 418 | { |
Steve Capper | 031e6e6 | 2018-09-21 16:34:05 +0100 | [diff] [blame] | 419 | int ncontig, i; |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 420 | size_t pgsize = 0; |
| 421 | unsigned long pfn = pte_pfn(pte), dpfn; |
| 422 | pgprot_t hugeprot; |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 423 | pte_t orig_pte; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 424 | |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 425 | if (!pte_cont(pte)) |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 426 | return ptep_set_access_flags(vma, addr, ptep, pte, dirty); |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 427 | |
| 428 | ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); |
| 429 | dpfn = pgsize >> PAGE_SHIFT; |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 430 | |
Steve Capper | 031e6e6 | 2018-09-21 16:34:05 +0100 | [diff] [blame] | 431 | if (!__cont_access_flags_changed(ptep, pte, ncontig)) |
| 432 | return 0; |
| 433 | |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 434 | orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 435 | |
Steve Capper | 469ed9d | 2018-09-21 16:34:04 +0100 | [diff] [blame] | 436 | /* Make sure we don't lose the dirty or young state */ |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 437 | if (pte_dirty(orig_pte)) |
| 438 | pte = pte_mkdirty(pte); |
| 439 | |
Steve Capper | 469ed9d | 2018-09-21 16:34:04 +0100 | [diff] [blame] | 440 | if (pte_young(orig_pte)) |
| 441 | pte = pte_mkyoung(pte); |
| 442 | |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 443 | hugeprot = pte_pgprot(pte); |
| 444 | for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) |
| 445 | set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 446 | |
Steve Capper | 031e6e6 | 2018-09-21 16:34:05 +0100 | [diff] [blame] | 447 | return 1; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 448 | } |
| 449 | |
| 450 | void huge_ptep_set_wrprotect(struct mm_struct *mm, |
| 451 | unsigned long addr, pte_t *ptep) |
| 452 | { |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 453 | unsigned long pfn, dpfn; |
| 454 | pgprot_t hugeprot; |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 455 | int ncontig, i; |
| 456 | size_t pgsize; |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 457 | pte_t pte; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 458 | |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 459 | if (!pte_cont(READ_ONCE(*ptep))) { |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 460 | ptep_set_wrprotect(mm, addr, ptep); |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 461 | return; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 462 | } |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 463 | |
| 464 | ncontig = find_num_contig(mm, addr, ptep, &pgsize); |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 465 | dpfn = pgsize >> PAGE_SHIFT; |
| 466 | |
| 467 | pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig); |
| 468 | pte = pte_wrprotect(pte); |
| 469 | |
| 470 | hugeprot = pte_pgprot(pte); |
| 471 | pfn = pte_pfn(pte); |
| 472 | |
| 473 | for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) |
| 474 | set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 475 | } |
| 476 | |
| 477 | void huge_ptep_clear_flush(struct vm_area_struct *vma, |
| 478 | unsigned long addr, pte_t *ptep) |
| 479 | { |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 480 | size_t pgsize; |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 481 | int ncontig; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 482 | |
Will Deacon | 20a004e | 2018-02-15 11:14:56 +0000 | [diff] [blame] | 483 | if (!pte_cont(READ_ONCE(*ptep))) { |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 484 | ptep_clear_flush(vma, addr, ptep); |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 485 | return; |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 486 | } |
Steve Capper | 29a7287 | 2017-08-22 11:42:43 +0100 | [diff] [blame] | 487 | |
| 488 | ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); |
Steve Capper | d8bdcff | 2017-08-22 11:42:44 +0100 | [diff] [blame] | 489 | clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); |
David Woods | 66b3923 | 2015-12-17 14:31:26 -0500 | [diff] [blame] | 490 | } |
| 491 | |
Allen Pais | a21b0b7 | 2018-10-23 06:36:57 +0530 | [diff] [blame] | 492 | static int __init hugetlbpage_init(void) |
| 493 | { |
| 494 | #ifdef CONFIG_ARM64_4K_PAGES |
Mike Kravetz | 3823783 | 2020-06-03 16:00:42 -0700 | [diff] [blame] | 495 | hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); |
Allen Pais | a21b0b7 | 2018-10-23 06:36:57 +0530 | [diff] [blame] | 496 | #endif |
Gavin Shan | a1634a5 | 2020-06-30 16:24:28 +1000 | [diff] [blame] | 497 | hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT); |
Mike Kravetz | 3823783 | 2020-06-03 16:00:42 -0700 | [diff] [blame] | 498 | hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); |
Gavin Shan | a1634a5 | 2020-06-30 16:24:28 +1000 | [diff] [blame] | 499 | hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT); |
Allen Pais | a21b0b7 | 2018-10-23 06:36:57 +0530 | [diff] [blame] | 500 | |
| 501 | return 0; |
| 502 | } |
| 503 | arch_initcall(hugetlbpage_init); |
| 504 | |
Mike Kravetz | ae94da8 | 2020-06-03 16:00:34 -0700 | [diff] [blame] | 505 | bool __init arch_hugetlb_valid_size(unsigned long size) |
Steve Capper | 084bd29 | 2013-04-10 13:48:00 +0100 | [diff] [blame] | 506 | { |
Mike Kravetz | ae94da8 | 2020-06-03 16:00:34 -0700 | [diff] [blame] | 507 | switch (size) { |
Steve Capper | 828f193 | 2017-08-22 11:42:49 +0100 | [diff] [blame] | 508 | #ifdef CONFIG_ARM64_4K_PAGES |
| 509 | case PUD_SIZE: |
| 510 | #endif |
Anshuman Khandual | 441a627 | 2019-05-21 09:05:03 +0530 | [diff] [blame] | 511 | case CONT_PMD_SIZE: |
Steve Capper | 828f193 | 2017-08-22 11:42:49 +0100 | [diff] [blame] | 512 | case PMD_SIZE: |
Anshuman Khandual | 441a627 | 2019-05-21 09:05:03 +0530 | [diff] [blame] | 513 | case CONT_PTE_SIZE: |
Mike Kravetz | ae94da8 | 2020-06-03 16:00:34 -0700 | [diff] [blame] | 514 | return true; |
| 515 | } |
| 516 | |
| 517 | return false; |
| 518 | } |