blob: dd1bea45325c6ed6ef707986efd0a21e58520222 [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +10002/*
3 * Page table handling routines for radix page table.
4 *
5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +10006 */
Michael Ellermanbd350f72017-08-30 17:41:29 +10007
8#define pr_fmt(fmt) "radix-mmu: " fmt
9
Nicholas Piggind38153f2019-06-10 13:08:17 +100010#include <linux/io.h>
Michael Ellermanbd350f72017-08-30 17:41:29 +100011#include <linux/kernel.h>
Ingo Molnar589ee622017-02-04 00:16:44 +010012#include <linux/sched/mm.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100013#include <linux/memblock.h>
14#include <linux/of_fdt.h>
Balbir Singh7614ff32017-06-29 03:04:09 +100015#include <linux/mm.h>
Mike Kravetz997cdcb2019-11-30 17:56:37 -080016#include <linux/hugetlb.h>
Michael Ellerman6deb6b42017-08-30 17:41:17 +100017#include <linux/string_helpers.h>
Balbir Singh4dd5f8a92018-02-07 17:35:51 +110018#include <linux/stop_machine.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100019
20#include <asm/pgtable.h>
21#include <asm/pgalloc.h>
Nicholas Piggineeb715c2018-02-07 11:20:02 +100022#include <asm/mmu_context.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100023#include <asm/dma.h>
24#include <asm/machdep.h>
25#include <asm/mmu.h>
26#include <asm/firmware.h>
Alistair Popple1d0761d2016-12-14 13:36:51 +110027#include <asm/powernv.h>
Michael Ellerman9abcc982017-06-06 15:48:57 +100028#include <asm/sections.h>
Balbir Singh04284912017-04-11 15:23:25 +100029#include <asm/trace.h>
Michael Ellerman890274c2019-04-18 16:51:24 +100030#include <asm/uaccess.h>
Claudio Carvalho52231342019-08-22 00:48:36 -030031#include <asm/ultravisor.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100032
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +100033#include <trace/events/thp.h>
34
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +100035unsigned int mmu_pid_bits;
36unsigned int mmu_base_pid;
37
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100038static __ref void *early_alloc_pgtable(unsigned long size, int nid,
39 unsigned long region_start, unsigned long region_end)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100040{
Mike Rapoportf8067142019-03-07 16:30:48 -080041 phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT;
42 phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE;
Mike Rapoport8a7f97b2019-03-11 23:30:31 -070043 void *ptr;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100044
Mike Rapoportf8067142019-03-07 16:30:48 -080045 if (region_start)
46 min_addr = region_start;
47 if (region_end)
48 max_addr = region_end;
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100049
Mike Rapoport8a7f97b2019-03-11 23:30:31 -070050 ptr = memblock_alloc_try_nid(size, size, min_addr, max_addr, nid);
51
52 if (!ptr)
53 panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa max_addr=%pa\n",
54 __func__, size, size, nid, &min_addr, &max_addr);
55
56 return ptr;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100057}
58
Nicholas Piggin0633daf2018-02-14 01:08:23 +100059static int early_map_kernel_page(unsigned long ea, unsigned long pa,
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100060 pgprot_t flags,
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100061 unsigned int map_page_size,
62 int nid,
63 unsigned long region_start, unsigned long region_end)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100064{
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100065 unsigned long pfn = pa >> PAGE_SHIFT;
Nicholas Piggin0633daf2018-02-14 01:08:23 +100066 pgd_t *pgdp;
67 pud_t *pudp;
68 pmd_t *pmdp;
69 pte_t *ptep;
70
71 pgdp = pgd_offset_k(ea);
72 if (pgd_none(*pgdp)) {
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100073 pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
74 region_start, region_end);
Nicholas Piggin0633daf2018-02-14 01:08:23 +100075 pgd_populate(&init_mm, pgdp, pudp);
76 }
77 pudp = pud_offset(pgdp, ea);
78 if (map_page_size == PUD_SIZE) {
79 ptep = (pte_t *)pudp;
80 goto set_the_pte;
81 }
82 if (pud_none(*pudp)) {
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100083 pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
84 region_start, region_end);
Nicholas Piggin0633daf2018-02-14 01:08:23 +100085 pud_populate(&init_mm, pudp, pmdp);
86 }
87 pmdp = pmd_offset(pudp, ea);
88 if (map_page_size == PMD_SIZE) {
89 ptep = pmdp_ptep(pmdp);
90 goto set_the_pte;
91 }
92 if (!pmd_present(*pmdp)) {
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100093 ptep = early_alloc_pgtable(PAGE_SIZE, nid,
94 region_start, region_end);
Nicholas Piggin0633daf2018-02-14 01:08:23 +100095 pmd_populate_kernel(&init_mm, pmdp, ptep);
96 }
97 ptep = pte_offset_kernel(pmdp, ea);
98
99set_the_pte:
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000100 set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000101 smp_wmb();
102 return 0;
103}
104
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000105/*
106 * nid, region_start, and region_end are hints to try to place the page
107 * table memory in the same node or region.
108 */
109static int __map_kernel_page(unsigned long ea, unsigned long pa,
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000110 pgprot_t flags,
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000111 unsigned int map_page_size,
112 int nid,
113 unsigned long region_start, unsigned long region_end)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000114{
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000115 unsigned long pfn = pa >> PAGE_SHIFT;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000116 pgd_t *pgdp;
117 pud_t *pudp;
118 pmd_t *pmdp;
119 pte_t *ptep;
120 /*
121 * Make sure task size is correct as per the max adddr
122 */
123 BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000124
Aneesh Kumar K.V0034d392019-04-17 18:29:14 +0530125#ifdef CONFIG_PPC_64K_PAGES
126 BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT));
127#endif
128
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000129 if (unlikely(!slab_is_available()))
130 return early_map_kernel_page(ea, pa, flags, map_page_size,
131 nid, region_start, region_end);
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000132
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000133 /*
134 * Should make page table allocation functions be able to take a
135 * node, so we can place kernel page tables on the right nodes after
136 * boot.
137 */
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000138 pgdp = pgd_offset_k(ea);
139 pudp = pud_alloc(&init_mm, pgdp, ea);
140 if (!pudp)
141 return -ENOMEM;
142 if (map_page_size == PUD_SIZE) {
143 ptep = (pte_t *)pudp;
144 goto set_the_pte;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000145 }
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000146 pmdp = pmd_alloc(&init_mm, pudp, ea);
147 if (!pmdp)
148 return -ENOMEM;
149 if (map_page_size == PMD_SIZE) {
150 ptep = pmdp_ptep(pmdp);
151 goto set_the_pte;
152 }
153 ptep = pte_alloc_kernel(pmdp, ea);
154 if (!ptep)
155 return -ENOMEM;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000156
157set_the_pte:
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000158 set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000159 smp_wmb();
160 return 0;
161}
162
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000163int radix__map_kernel_page(unsigned long ea, unsigned long pa,
164 pgprot_t flags,
165 unsigned int map_page_size)
166{
167 return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
168}
169
Balbir Singh7614ff32017-06-29 03:04:09 +1000170#ifdef CONFIG_STRICT_KERNEL_RWX
Michael Ellermanb134bd92017-07-14 16:51:21 +1000171void radix__change_memory_range(unsigned long start, unsigned long end,
172 unsigned long clear)
Balbir Singh7614ff32017-06-29 03:04:09 +1000173{
Balbir Singh7614ff32017-06-29 03:04:09 +1000174 unsigned long idx;
175 pgd_t *pgdp;
176 pud_t *pudp;
177 pmd_t *pmdp;
178 pte_t *ptep;
179
180 start = ALIGN_DOWN(start, PAGE_SIZE);
181 end = PAGE_ALIGN(end); // aligns up
182
Michael Ellermanb134bd92017-07-14 16:51:21 +1000183 pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
184 start, end, clear);
Balbir Singh7614ff32017-06-29 03:04:09 +1000185
186 for (idx = start; idx < end; idx += PAGE_SIZE) {
187 pgdp = pgd_offset_k(idx);
188 pudp = pud_alloc(&init_mm, pgdp, idx);
189 if (!pudp)
190 continue;
Aneesh Kumar K.Vd6eaced2019-05-14 11:33:00 +0530191 if (pud_is_leaf(*pudp)) {
Balbir Singh7614ff32017-06-29 03:04:09 +1000192 ptep = (pte_t *)pudp;
193 goto update_the_pte;
194 }
195 pmdp = pmd_alloc(&init_mm, pudp, idx);
196 if (!pmdp)
197 continue;
Aneesh Kumar K.Vd6eaced2019-05-14 11:33:00 +0530198 if (pmd_is_leaf(*pmdp)) {
Balbir Singh7614ff32017-06-29 03:04:09 +1000199 ptep = pmdp_ptep(pmdp);
200 goto update_the_pte;
201 }
202 ptep = pte_alloc_kernel(pmdp, idx);
203 if (!ptep)
204 continue;
205update_the_pte:
Michael Ellermanb134bd92017-07-14 16:51:21 +1000206 radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
Balbir Singh7614ff32017-06-29 03:04:09 +1000207 }
208
209 radix__flush_tlb_kernel_range(start, end);
210}
Michael Ellermanb134bd92017-07-14 16:51:21 +1000211
212void radix__mark_rodata_ro(void)
213{
214 unsigned long start, end;
215
216 start = (unsigned long)_stext;
217 end = (unsigned long)__init_begin;
218
219 radix__change_memory_range(start, end, _PAGE_WRITE);
220}
Michael Ellerman029d9252017-07-14 16:51:23 +1000221
222void radix__mark_initmem_nx(void)
223{
224 unsigned long start = (unsigned long)__init_begin;
225 unsigned long end = (unsigned long)__init_end;
226
227 radix__change_memory_range(start, end, _PAGE_EXEC);
228}
Balbir Singh7614ff32017-06-29 03:04:09 +1000229#endif /* CONFIG_STRICT_KERNEL_RWX */
230
Michael Ellermanafb6d062018-10-17 23:53:38 +1100231static inline void __meminit
232print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
Reza Arbabb5200ec2017-01-16 13:07:43 -0600233{
Michael Ellerman6deb6b42017-08-30 17:41:17 +1000234 char buf[10];
235
Reza Arbabb5200ec2017-01-16 13:07:43 -0600236 if (end <= start)
237 return;
238
Michael Ellerman6deb6b42017-08-30 17:41:17 +1000239 string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
240
Michael Ellermanafb6d062018-10-17 23:53:38 +1100241 pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf,
242 exec ? " (exec)" : "");
Reza Arbabb5200ec2017-01-16 13:07:43 -0600243}
244
Michael Ellerman232aa402018-08-14 22:37:32 +1000245static unsigned long next_boundary(unsigned long addr, unsigned long end)
246{
247#ifdef CONFIG_STRICT_KERNEL_RWX
248 if (addr < __pa_symbol(__init_begin))
249 return __pa_symbol(__init_begin);
250#endif
251 return end;
252}
253
Reza Arbabb5200ec2017-01-16 13:07:43 -0600254static int __meminit create_physical_mapping(unsigned long start,
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000255 unsigned long end,
256 int nid)
Reza Arbabb5200ec2017-01-16 13:07:43 -0600257{
Michael Ellerman9abcc982017-06-06 15:48:57 +1000258 unsigned long vaddr, addr, mapping_size = 0;
Michael Ellermanafb6d062018-10-17 23:53:38 +1100259 bool prev_exec, exec = false;
Michael Ellerman9abcc982017-06-06 15:48:57 +1000260 pgprot_t prot;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530261 int psize;
Reza Arbabb5200ec2017-01-16 13:07:43 -0600262
263 start = _ALIGN_UP(start, PAGE_SIZE);
264 for (addr = start; addr < end; addr += mapping_size) {
265 unsigned long gap, previous_size;
266 int rc;
267
Michael Ellerman232aa402018-08-14 22:37:32 +1000268 gap = next_boundary(addr, end) - addr;
Reza Arbabb5200ec2017-01-16 13:07:43 -0600269 previous_size = mapping_size;
Michael Ellermanafb6d062018-10-17 23:53:38 +1100270 prev_exec = exec;
Reza Arbabb5200ec2017-01-16 13:07:43 -0600271
272 if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
Michael Ellerman57306c62018-08-14 22:01:44 +1000273 mmu_psize_defs[MMU_PAGE_1G].shift) {
Reza Arbabb5200ec2017-01-16 13:07:43 -0600274 mapping_size = PUD_SIZE;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530275 psize = MMU_PAGE_1G;
276 } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
277 mmu_psize_defs[MMU_PAGE_2M].shift) {
Reza Arbabb5200ec2017-01-16 13:07:43 -0600278 mapping_size = PMD_SIZE;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530279 psize = MMU_PAGE_2M;
280 } else {
Reza Arbabb5200ec2017-01-16 13:07:43 -0600281 mapping_size = PAGE_SIZE;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530282 psize = mmu_virtual_psize;
283 }
Reza Arbabb5200ec2017-01-16 13:07:43 -0600284
Michael Ellerman9abcc982017-06-06 15:48:57 +1000285 vaddr = (unsigned long)__va(addr);
286
Balbir Singh7f6d4982017-06-29 03:04:10 +1000287 if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
Michael Ellermanafb6d062018-10-17 23:53:38 +1100288 overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) {
Michael Ellerman9abcc982017-06-06 15:48:57 +1000289 prot = PAGE_KERNEL_X;
Michael Ellermanafb6d062018-10-17 23:53:38 +1100290 exec = true;
291 } else {
Michael Ellerman9abcc982017-06-06 15:48:57 +1000292 prot = PAGE_KERNEL;
Michael Ellermanafb6d062018-10-17 23:53:38 +1100293 exec = false;
294 }
295
296 if (mapping_size != previous_size || exec != prev_exec) {
297 print_mapping(start, addr, previous_size, prev_exec);
298 start = addr;
299 }
Michael Ellerman9abcc982017-06-06 15:48:57 +1000300
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000301 rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
Reza Arbabb5200ec2017-01-16 13:07:43 -0600302 if (rc)
303 return rc;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530304
305 update_page_count(psize, 1);
Reza Arbabb5200ec2017-01-16 13:07:43 -0600306 }
307
Michael Ellermanafb6d062018-10-17 23:53:38 +1100308 print_mapping(start, addr, mapping_size, exec);
Reza Arbabb5200ec2017-01-16 13:07:43 -0600309 return 0;
310}
311
YueHaibingd667edc2019-05-04 18:24:27 +0800312static void __init radix_init_pgtable(void)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000313{
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000314 unsigned long rts_field;
315 struct memblock_region *reg;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000316
317 /* We don't support slb for radix */
318 mmu_slb_size = 0;
319 /*
320 * Create the linear mapping, using standard page size for now
321 */
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000322 for_each_memblock(memory, reg) {
323 /*
324 * The memblock allocator is up at this point, so the
325 * page tables will be allocated within the range. No
326 * need or a node (which we don't have yet).
327 */
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530328
329 if ((reg->base + reg->size) >= RADIX_VMALLOC_START) {
Colin Ian Kingf341d892019-04-23 16:10:17 +0100330 pr_warn("Outside the supported range\n");
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530331 continue;
332 }
333
Reza Arbabb5200ec2017-01-16 13:07:43 -0600334 WARN_ON(create_physical_mapping(reg->base,
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000335 reg->base + reg->size,
336 -1));
337 }
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +1000338
339 /* Find out how many PID bits are supported */
Jordan Niethe736bcdd2019-12-06 14:17:22 +1100340 if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
341 if (!mmu_pid_bits)
342 mmu_pid_bits = 20;
343 mmu_base_pid = 1;
344 } else if (cpu_has_feature(CPU_FTR_HVMODE)) {
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +1000345 if (!mmu_pid_bits)
346 mmu_pid_bits = 20;
347#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
348 /*
349 * When KVM is possible, we only use the top half of the
350 * PID space to avoid collisions between host and guest PIDs
351 * which can cause problems due to prefetch when exiting the
352 * guest with AIL=3
353 */
354 mmu_base_pid = 1 << (mmu_pid_bits - 1);
355#else
356 mmu_base_pid = 1;
357#endif
358 } else {
359 /* The guest uses the bottom half of the PID space */
360 if (!mmu_pid_bits)
361 mmu_pid_bits = 19;
362 mmu_base_pid = 1;
363 }
364
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000365 /*
366 * Allocate Partition table and process table for the
367 * host.
368 */
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +1000369 BUG_ON(PRTB_SIZE_SHIFT > 36);
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000370 process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000371 /*
372 * Fill in the process table.
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000373 */
Aneesh Kumar K.Vb23d9c52016-06-17 11:40:36 +0530374 rts_field = radix__get_tree_size();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000375 process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
Nicholas Piggined6546b2019-09-03 01:29:26 +1000376
Nicholas Piggineeb715c2018-02-07 11:20:02 +1000377 /*
378 * The init_mm context is given the first available (non-zero) PID,
379 * which is the "guard PID" and contains no page table. PIDR should
380 * never be set to zero because that duplicates the kernel address
381 * space at the 0x0... offset (quadrant 0)!
382 *
383 * An arbitrary PID that may later be allocated by the PID allocator
384 * for userspace processes must not be used either, because that
385 * would cause stale user mappings for that PID on CPUs outside of
386 * the TLB invalidation scheme (because it won't be in mm_cpumask).
387 *
388 * So permanently carve out one PID for the purpose of a guard PID.
389 */
390 init_mm.context.id = mmu_base_pid;
391 mmu_base_pid++;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000392}
393
394static void __init radix_init_partition_table(void)
395{
Nicholas Piggined6546b2019-09-03 01:29:26 +1000396 unsigned long rts_field, dw0, dw1;
Aneesh Kumar K.Vb23d9c52016-06-17 11:40:36 +0530397
Paul Mackerras9d661952016-11-21 16:00:58 +1100398 mmu_partition_table_init();
Aneesh Kumar K.Vb23d9c52016-06-17 11:40:36 +0530399 rts_field = radix__get_tree_size();
Paul Mackerras9d661952016-11-21 16:00:58 +1100400 dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
Nicholas Piggined6546b2019-09-03 01:29:26 +1000401 dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR;
Nicholas Piggin7d805ac2019-09-03 01:29:30 +1000402 mmu_partition_table_set_entry(0, dw0, dw1, false);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000403
Aneesh Kumar K.V56547412016-07-13 15:05:25 +0530404 pr_info("Initializing Radix MMU\n");
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000405}
406
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000407static int __init get_idx_from_shift(unsigned int shift)
408{
409 int idx = -1;
410
411 switch (shift) {
412 case 0xc:
413 idx = MMU_PAGE_4K;
414 break;
415 case 0x10:
416 idx = MMU_PAGE_64K;
417 break;
418 case 0x15:
419 idx = MMU_PAGE_2M;
420 break;
421 case 0x1e:
422 idx = MMU_PAGE_1G;
423 break;
424 }
425 return idx;
426}
427
428static int __init radix_dt_scan_page_sizes(unsigned long node,
429 const char *uname, int depth,
430 void *data)
431{
432 int size = 0;
433 int shift, idx;
434 unsigned int ap;
435 const __be32 *prop;
436 const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
437
438 /* We are scanning "cpu" nodes only */
439 if (type == NULL || strcmp(type, "cpu") != 0)
440 return 0;
441
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +1000442 /* Find MMU PID size */
443 prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
444 if (prop && size == 4)
445 mmu_pid_bits = be32_to_cpup(prop);
446
447 /* Grab page size encodings */
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000448 prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
449 if (!prop)
450 return 0;
451
452 pr_info("Page sizes from device-tree:\n");
453 for (; size >= 4; size -= 4, ++prop) {
454
455 struct mmu_psize_def *def;
456
457 /* top 3 bit is AP encoding */
458 shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
459 ap = be32_to_cpu(prop[0]) >> 29;
Balbir Singhac8d3812016-11-05 15:24:22 +1100460 pr_info("Page size shift = %d AP=0x%x\n", shift, ap);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000461
462 idx = get_idx_from_shift(shift);
463 if (idx < 0)
464 continue;
465
466 def = &mmu_psize_defs[idx];
467 def->shift = shift;
468 def->ap = ap;
469 }
470
471 /* needed ? */
472 cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
473 return 1;
474}
475
Michael Ellerman2537b092016-07-26 21:55:27 +1000476void __init radix__early_init_devtree(void)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000477{
478 int rc;
479
480 /*
481 * Try to find the available page sizes in the device-tree
482 */
483 rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
484 if (rc != 0) /* Found */
485 goto found;
486 /*
487 * let's assume we have page 4k and 64k support
488 */
489 mmu_psize_defs[MMU_PAGE_4K].shift = 12;
490 mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
491
492 mmu_psize_defs[MMU_PAGE_64K].shift = 16;
493 mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
494found:
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000495 return;
496}
497
Balbir Singhee97b6b2016-11-15 17:56:14 +1100498static void radix_init_amor(void)
499{
500 /*
501 * In HV mode, we init AMOR (Authority Mask Override Register) so that
502 * the hypervisor and guest can setup IAMR (Instruction Authority Mask
503 * Register), enable key 0 and set it to 1.
504 *
505 * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
506 */
507 mtspr(SPRN_AMOR, (3ul << 62));
508}
509
Russell Currey1bb2bae2019-04-18 16:51:22 +1000510#ifdef CONFIG_PPC_KUEP
511void setup_kuep(bool disabled)
Balbir Singh3b10d002016-11-15 17:56:16 +1100512{
Russell Currey1bb2bae2019-04-18 16:51:22 +1000513 if (disabled || !early_radix_enabled())
514 return;
515
516 if (smp_processor_id() == boot_cpuid)
517 pr_info("Activating Kernel Userspace Execution Prevention\n");
518
Balbir Singh3b10d002016-11-15 17:56:16 +1100519 /*
520 * Radix always uses key0 of the IAMR to determine if an access is
521 * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
522 * fetch.
523 */
Nicholas Piggin2bf10712018-07-05 18:47:00 +1000524 mtspr(SPRN_IAMR, (1ul << 62));
Balbir Singh3b10d002016-11-15 17:56:16 +1100525}
Russell Currey1bb2bae2019-04-18 16:51:22 +1000526#endif
Balbir Singh3b10d002016-11-15 17:56:16 +1100527
Michael Ellerman890274c2019-04-18 16:51:24 +1000528#ifdef CONFIG_PPC_KUAP
529void setup_kuap(bool disabled)
530{
531 if (disabled || !early_radix_enabled())
532 return;
533
534 if (smp_processor_id() == boot_cpuid) {
535 pr_info("Activating Kernel Userspace Access Prevention\n");
536 cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP;
537 }
538
539 /* Make sure userspace can't change the AMR */
540 mtspr(SPRN_UAMOR, 0);
541 mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
542 isync();
543}
544#endif
545
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000546void __init radix__early_init_mmu(void)
547{
548 unsigned long lpcr;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000549
550#ifdef CONFIG_PPC_64K_PAGES
551 /* PAGE_SIZE mappings */
552 mmu_virtual_psize = MMU_PAGE_64K;
553#else
554 mmu_virtual_psize = MMU_PAGE_4K;
555#endif
556
557#ifdef CONFIG_SPARSEMEM_VMEMMAP
558 /* vmemmap mapping */
Aneesh Kumar K.V89a34962019-07-01 20:04:42 +0530559 if (mmu_psize_defs[MMU_PAGE_2M].shift) {
560 /*
561 * map vmemmap using 2M if available
562 */
563 mmu_vmemmap_psize = MMU_PAGE_2M;
564 } else
565 mmu_vmemmap_psize = mmu_virtual_psize;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000566#endif
567 /*
568 * initialize page table size
569 */
570 __pte_index_size = RADIX_PTE_INDEX_SIZE;
571 __pmd_index_size = RADIX_PMD_INDEX_SIZE;
572 __pud_index_size = RADIX_PUD_INDEX_SIZE;
573 __pgd_index_size = RADIX_PGD_INDEX_SIZE;
Aneesh Kumar K.Vfae22112018-02-11 20:30:06 +0530574 __pud_cache_index = RADIX_PUD_INDEX_SIZE;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000575 __pte_table_size = RADIX_PTE_TABLE_SIZE;
576 __pmd_table_size = RADIX_PMD_TABLE_SIZE;
577 __pud_table_size = RADIX_PUD_TABLE_SIZE;
578 __pgd_table_size = RADIX_PGD_TABLE_SIZE;
579
Aneesh Kumar K.Va2f41eb2016-04-29 23:26:19 +1000580 __pmd_val_bits = RADIX_PMD_VAL_BITS;
581 __pud_val_bits = RADIX_PUD_VAL_BITS;
582 __pgd_val_bits = RADIX_PGD_VAL_BITS;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000583
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000584 __kernel_virt_start = RADIX_KERN_VIRT_START;
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000585 __vmalloc_start = RADIX_VMALLOC_START;
586 __vmalloc_end = RADIX_VMALLOC_END;
Michael Ellerman63ee9b22017-08-01 20:29:22 +1000587 __kernel_io_start = RADIX_KERN_IO_START;
Aneesh Kumar K.Va35a3c62019-04-17 18:29:13 +0530588 __kernel_io_end = RADIX_KERN_IO_END;
Aneesh Kumar K.V0034d392019-04-17 18:29:14 +0530589 vmemmap = (struct page *)RADIX_VMEMMAP_START;
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000590 ioremap_bot = IOREMAP_BASE;
Darren Stevensbfa37082016-06-29 21:06:28 +0100591
592#ifdef CONFIG_PCI
593 pci_io_base = ISA_IO_BASE;
594#endif
Aneesh Kumar K.Vfb4e5db2018-03-22 14:13:50 +0530595 __pte_frag_nr = RADIX_PTE_FRAG_NR;
596 __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
Aneesh Kumar K.V8a6c6972018-04-16 16:57:22 +0530597 __pmd_frag_nr = RADIX_PMD_FRAG_NR;
598 __pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT;
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000599
Nicholas Piggined6546b2019-09-03 01:29:26 +1000600 radix_init_pgtable();
601
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530602 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
603 lpcr = mfspr(SPRN_LPCR);
Aneesh Kumar K.Vbf16cdf2016-07-13 15:05:21 +0530604 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000605 radix_init_partition_table();
Balbir Singhee97b6b2016-11-15 17:56:14 +1100606 radix_init_amor();
Paul Mackerrascc3d2942017-01-30 21:21:36 +1100607 } else {
608 radix_init_pseries();
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530609 }
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000610
Paul Mackerras9d661952016-11-21 16:00:58 +1100611 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
612
Nicholas Piggineeb715c2018-02-07 11:20:02 +1000613 /* Switch to the guard PID before turning on MMU */
614 radix__switch_mmu_context(NULL, &init_mm);
Nicholas Piggin7e71c422019-09-03 01:29:29 +1000615 tlbiel_all();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000616}
617
618void radix__early_init_mmu_secondary(void)
619{
620 unsigned long lpcr;
621 /*
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530622 * update partition table control register and UPRT
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000623 */
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530624 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
625 lpcr = mfspr(SPRN_LPCR);
Aneesh Kumar K.Vbf16cdf2016-07-13 15:05:21 +0530626 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530627
Claudio Carvalho52231342019-08-22 00:48:36 -0300628 set_ptcr_when_no_uv(__pa(partition_tb) |
629 (PATB_SIZE_SHIFT - 12));
630
Balbir Singhee97b6b2016-11-15 17:56:14 +1100631 radix_init_amor();
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530632 }
Nicholas Piggind4748272017-12-24 01:15:50 +1000633
Nicholas Piggineeb715c2018-02-07 11:20:02 +1000634 radix__switch_mmu_context(NULL, &init_mm);
Nicholas Piggin7e71c422019-09-03 01:29:29 +1000635 tlbiel_all();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000636}
637
Benjamin Herrenschmidtfe036a02016-08-19 14:22:37 +0530638void radix__mmu_cleanup_all(void)
639{
640 unsigned long lpcr;
641
642 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
643 lpcr = mfspr(SPRN_LPCR);
644 mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
Claudio Carvalho52231342019-08-22 00:48:36 -0300645 set_ptcr_when_no_uv(0);
Alistair Popple1d0761d2016-12-14 13:36:51 +1100646 powernv_set_nmmu_ptcr(0);
Benjamin Herrenschmidtfe036a02016-08-19 14:22:37 +0530647 radix__flush_tlb_all();
648 }
649}
650
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000651void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
652 phys_addr_t first_memblock_size)
653{
Christophe Leroy47d99942019-03-29 10:00:00 +0000654 /*
655 * We don't currently support the first MEMBLOCK not mapping 0
Aneesh Kumar K.V177ba7c2016-04-29 23:26:10 +1000656 * physical on those processors
657 */
658 BUG_ON(first_memblock_base != 0);
Nicholas Piggin1513c332017-12-22 21:17:08 +1000659
Nicholas Piggin5eae82c2017-12-22 21:17:11 +1000660 /*
661 * Radix mode is not limited by RMA / VRMA addressing.
662 */
663 ppc64_rma_size = ULONG_MAX;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000664}
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000665
Reza Arbab6cc27342017-01-16 13:07:44 -0600666#ifdef CONFIG_MEMORY_HOTPLUG
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600667static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
668{
669 pte_t *pte;
670 int i;
671
672 for (i = 0; i < PTRS_PER_PTE; i++) {
673 pte = pte_start + i;
674 if (!pte_none(*pte))
675 return;
676 }
677
678 pte_free_kernel(&init_mm, pte_start);
679 pmd_clear(pmd);
680}
681
682static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
683{
684 pmd_t *pmd;
685 int i;
686
687 for (i = 0; i < PTRS_PER_PMD; i++) {
688 pmd = pmd_start + i;
689 if (!pmd_none(*pmd))
690 return;
691 }
692
693 pmd_free(&init_mm, pmd_start);
694 pud_clear(pud);
695}
696
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100697struct change_mapping_params {
698 pte_t *pte;
699 unsigned long start;
700 unsigned long end;
701 unsigned long aligned_start;
702 unsigned long aligned_end;
703};
704
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300705static int __meminit stop_machine_change_mapping(void *data)
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100706{
707 struct change_mapping_params *params =
708 (struct change_mapping_params *)data;
709
710 if (!data)
711 return -1;
712
713 spin_unlock(&init_mm.page_table_lock);
714 pte_clear(&init_mm, params->aligned_start, params->pte);
Nicholas Piggin31f210c2019-07-24 18:46:35 +1000715 create_physical_mapping(__pa(params->aligned_start), __pa(params->start), -1);
716 create_physical_mapping(__pa(params->end), __pa(params->aligned_end), -1);
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100717 spin_lock(&init_mm.page_table_lock);
718 return 0;
719}
720
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600721static void remove_pte_table(pte_t *pte_start, unsigned long addr,
722 unsigned long end)
723{
724 unsigned long next;
725 pte_t *pte;
726
727 pte = pte_start + pte_index(addr);
728 for (; addr < end; addr = next, pte++) {
729 next = (addr + PAGE_SIZE) & PAGE_MASK;
730 if (next > end)
731 next = end;
732
733 if (!pte_present(*pte))
734 continue;
735
Reza Arbab0d0a4bc2017-01-16 13:07:46 -0600736 if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
737 /*
738 * The vmemmap_free() and remove_section_mapping()
739 * codepaths call us with aligned addresses.
740 */
741 WARN_ONCE(1, "%s: unaligned range\n", __func__);
742 continue;
743 }
744
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600745 pte_clear(&init_mm, addr, pte);
746 }
747}
748
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100749/*
750 * clear the pte and potentially split the mapping helper
751 */
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300752static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end,
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100753 unsigned long size, pte_t *pte)
754{
755 unsigned long mask = ~(size - 1);
756 unsigned long aligned_start = addr & mask;
757 unsigned long aligned_end = addr + size;
758 struct change_mapping_params params;
759 bool split_region = false;
760
761 if ((end - addr) < size) {
762 /*
763 * We're going to clear the PTE, but not flushed
764 * the mapping, time to remap and flush. The
765 * effects if visible outside the processor or
766 * if we are running in code close to the
767 * mapping we cleared, we are in trouble.
768 */
769 if (overlaps_kernel_text(aligned_start, addr) ||
770 overlaps_kernel_text(end, aligned_end)) {
771 /*
772 * Hack, just return, don't pte_clear
773 */
774 WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
775 "text, not splitting\n", addr, end);
776 return;
777 }
778 split_region = true;
779 }
780
781 if (split_region) {
782 params.pte = pte;
783 params.start = addr;
784 params.end = end;
785 params.aligned_start = addr & ~(size - 1);
786 params.aligned_end = min_t(unsigned long, aligned_end,
787 (unsigned long)__va(memblock_end_of_DRAM()));
788 stop_machine(stop_machine_change_mapping, &params, NULL);
789 return;
790 }
791
792 pte_clear(&init_mm, addr, pte);
793}
794
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600795static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
796 unsigned long end)
797{
798 unsigned long next;
799 pte_t *pte_base;
800 pmd_t *pmd;
801
802 pmd = pmd_start + pmd_index(addr);
803 for (; addr < end; addr = next, pmd++) {
804 next = pmd_addr_end(addr, end);
805
806 if (!pmd_present(*pmd))
807 continue;
808
Aneesh Kumar K.Vd6eaced2019-05-14 11:33:00 +0530809 if (pmd_is_leaf(*pmd)) {
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100810 split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600811 continue;
812 }
813
814 pte_base = (pte_t *)pmd_page_vaddr(*pmd);
815 remove_pte_table(pte_base, addr, next);
816 free_pte_table(pte_base, pmd);
817 }
818}
819
820static void remove_pud_table(pud_t *pud_start, unsigned long addr,
821 unsigned long end)
822{
823 unsigned long next;
824 pmd_t *pmd_base;
825 pud_t *pud;
826
827 pud = pud_start + pud_index(addr);
828 for (; addr < end; addr = next, pud++) {
829 next = pud_addr_end(addr, end);
830
831 if (!pud_present(*pud))
832 continue;
833
Aneesh Kumar K.Vd6eaced2019-05-14 11:33:00 +0530834 if (pud_is_leaf(*pud)) {
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100835 split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600836 continue;
837 }
838
839 pmd_base = (pmd_t *)pud_page_vaddr(*pud);
840 remove_pmd_table(pmd_base, addr, next);
841 free_pmd_table(pmd_base, pud);
842 }
843}
844
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300845static void __meminit remove_pagetable(unsigned long start, unsigned long end)
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600846{
847 unsigned long addr, next;
848 pud_t *pud_base;
849 pgd_t *pgd;
850
851 spin_lock(&init_mm.page_table_lock);
852
853 for (addr = start; addr < end; addr = next) {
854 next = pgd_addr_end(addr, end);
855
856 pgd = pgd_offset_k(addr);
857 if (!pgd_present(*pgd))
858 continue;
859
Aneesh Kumar K.Vd6eaced2019-05-14 11:33:00 +0530860 if (pgd_is_leaf(*pgd)) {
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100861 split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600862 continue;
863 }
864
865 pud_base = (pud_t *)pgd_page_vaddr(*pgd);
866 remove_pud_table(pud_base, addr, next);
867 }
868
869 spin_unlock(&init_mm.page_table_lock);
870 radix__flush_tlb_kernel_range(start, end);
871}
872
Michael Ellermanf437c512018-03-31 00:11:24 +1100873int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
Reza Arbab6cc27342017-01-16 13:07:44 -0600874{
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530875 if (end >= RADIX_VMALLOC_START) {
Colin Ian Kingf341d892019-04-23 16:10:17 +0100876 pr_warn("Outside the supported range\n");
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530877 return -1;
878 }
879
Nicholas Piggin8f51e392019-07-24 18:46:34 +1000880 return create_physical_mapping(__pa(start), __pa(end), nid);
Reza Arbab6cc27342017-01-16 13:07:44 -0600881}
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600882
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300883int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600884{
885 remove_pagetable(start, end);
886 return 0;
887}
Reza Arbab6cc27342017-01-16 13:07:44 -0600888#endif /* CONFIG_MEMORY_HOTPLUG */
889
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000890#ifdef CONFIG_SPARSEMEM_VMEMMAP
Nicholas Piggin29ab6c42018-02-14 01:08:22 +1000891static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
892 pgprot_t flags, unsigned int map_page_size,
893 int nid)
894{
895 return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
896}
897
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000898int __meminit radix__vmemmap_create_mapping(unsigned long start,
899 unsigned long page_size,
900 unsigned long phys)
901{
902 /* Create a PTE encoding */
903 unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000904 int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
905 int ret;
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000906
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530907 if ((start + page_size) >= RADIX_VMEMMAP_END) {
Colin Ian Kingf341d892019-04-23 16:10:17 +0100908 pr_warn("Outside the supported range\n");
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530909 return -1;
910 }
911
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000912 ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
913 BUG_ON(ret);
914
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000915 return 0;
916}
917
918#ifdef CONFIG_MEMORY_HOTPLUG
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300919void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000920{
Reza Arbab0d0a4bc2017-01-16 13:07:46 -0600921 remove_pagetable(start, start + page_size);
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000922}
923#endif
924#endif
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000925
926#ifdef CONFIG_TRANSPARENT_HUGEPAGE
927
928unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
929 pmd_t *pmdp, unsigned long clr,
930 unsigned long set)
931{
932 unsigned long old;
933
934#ifdef CONFIG_DEBUG_VM
Oliver O'Halloranebd31192017-06-28 11:32:34 +1000935 WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
Aneesh Kumar K.Vaf60a4c2018-04-16 16:57:16 +0530936 assert_spin_locked(pmd_lockptr(mm, pmdp));
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000937#endif
938
939 old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
940 trace_hugepage_update(addr, old, clr, set);
941
942 return old;
943}
944
945pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
946 pmd_t *pmdp)
947
948{
949 pmd_t pmd;
950
951 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
952 VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
Oliver O'Halloranebd31192017-06-28 11:32:34 +1000953 VM_BUG_ON(pmd_devmap(*pmdp));
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000954 /*
955 * khugepaged calls this for normal pmd
956 */
957 pmd = *pmdp;
958 pmd_clear(pmdp);
Benjamin Herrenschmidt424de9c2017-07-19 14:49:06 +1000959
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000960 /*FIXME!! Verify whether we need this kick below */
Aneesh Kumar K.Vfa4531f2017-07-27 11:54:54 +0530961 serialize_against_pte_lookup(vma->vm_mm);
Benjamin Herrenschmidt424de9c2017-07-19 14:49:06 +1000962
963 radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
964
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000965 return pmd;
966}
967
968/*
969 * For us pgtable_t is pte_t *. Inorder to save the deposisted
970 * page table, we consider the allocated page table as a list
971 * head. On withdraw we need to make sure we zero out the used
972 * list_head memory area.
973 */
974void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
975 pgtable_t pgtable)
976{
Christophe Leroy47d99942019-03-29 10:00:00 +0000977 struct list_head *lh = (struct list_head *) pgtable;
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000978
Christophe Leroy47d99942019-03-29 10:00:00 +0000979 assert_spin_locked(pmd_lockptr(mm, pmdp));
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000980
Christophe Leroy47d99942019-03-29 10:00:00 +0000981 /* FIFO */
982 if (!pmd_huge_pte(mm, pmdp))
983 INIT_LIST_HEAD(lh);
984 else
985 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
986 pmd_huge_pte(mm, pmdp) = pgtable;
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000987}
988
989pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
990{
Christophe Leroy47d99942019-03-29 10:00:00 +0000991 pte_t *ptep;
992 pgtable_t pgtable;
993 struct list_head *lh;
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000994
Christophe Leroy47d99942019-03-29 10:00:00 +0000995 assert_spin_locked(pmd_lockptr(mm, pmdp));
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000996
Christophe Leroy47d99942019-03-29 10:00:00 +0000997 /* FIFO */
998 pgtable = pmd_huge_pte(mm, pmdp);
999 lh = (struct list_head *) pgtable;
1000 if (list_empty(lh))
1001 pmd_huge_pte(mm, pmdp) = NULL;
1002 else {
1003 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
1004 list_del(lh);
1005 }
1006 ptep = (pte_t *) pgtable;
1007 *ptep = __pte(0);
1008 ptep++;
1009 *ptep = __pte(0);
1010 return pgtable;
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001011}
1012
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001013pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
Christophe Leroy47d99942019-03-29 10:00:00 +00001014 unsigned long addr, pmd_t *pmdp)
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001015{
1016 pmd_t old_pmd;
1017 unsigned long old;
1018
1019 old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
1020 old_pmd = __pmd(old);
1021 /*
Aneesh Kumar K.Vfa4531f2017-07-27 11:54:54 +05301022 * Serialize against find_current_mm_pte which does lock-less
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001023 * lookup in page tables with local interrupts disabled. For huge pages
1024 * it casts pmd_t to pte_t. Since format of pte_t is different from
1025 * pmd_t we want to prevent transit from pmd pointing to page table
1026 * to pmd pointing to huge page (and back) while interrupts are disabled.
1027 * We clear pmd to possibly replace it with page table pointer in
1028 * different code paths. So make sure we wait for the parallel
Aneesh Kumar K.Vfa4531f2017-07-27 11:54:54 +05301029 * find_current_mm_pte to finish.
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001030 */
Aneesh Kumar K.Vfa4531f2017-07-27 11:54:54 +05301031 serialize_against_pte_lookup(mm);
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001032 return old_pmd;
1033}
1034
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001035#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301036
Aneesh Kumar K.Ve4c11122018-05-29 19:58:40 +05301037void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
1038 pte_t entry, unsigned long address, int psize)
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301039{
Aneesh Kumar K.Ve4c11122018-05-29 19:58:40 +05301040 struct mm_struct *mm = vma->vm_mm;
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301041 unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
1042 _PAGE_RW | _PAGE_EXEC);
Aneesh Kumar K.Vf08d08f2018-08-22 22:46:05 +05301043
1044 unsigned long change = pte_val(entry) ^ pte_val(*ptep);
Aneesh Kumar K.Vbd5050e2018-05-29 19:58:41 +05301045 /*
1046 * To avoid NMMU hang while relaxing access, we need mark
1047 * the pte invalid in between.
1048 */
Aneesh Kumar K.Vf08d08f2018-08-22 22:46:05 +05301049 if ((change & _PAGE_RW) && atomic_read(&mm->context.copros) > 0) {
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301050 unsigned long old_pte, new_pte;
1051
Aneesh Kumar K.Vf08d08f2018-08-22 22:46:05 +05301052 old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301053 /*
1054 * new value of pte
1055 */
1056 new_pte = old_pte | set;
Aneesh Kumar K.Vbd5050e2018-05-29 19:58:41 +05301057 radix__flush_tlb_page_psize(mm, address, psize);
Aneesh Kumar K.Vf08d08f2018-08-22 22:46:05 +05301058 __radix_pte_update(ptep, _PAGE_INVALID, new_pte);
Aneesh Kumar K.Vbd5050e2018-05-29 19:58:41 +05301059 } else {
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301060 __radix_pte_update(ptep, 0, set);
Nicholas Piggine5f7cb52018-06-01 20:01:15 +10001061 /*
1062 * Book3S does not require a TLB flush when relaxing access
1063 * restrictions when the address space is not attached to a
1064 * NMMU, because the core MMU will reload the pte after taking
1065 * an access fault, which is defined by the architectue.
1066 */
Aneesh Kumar K.Vbd5050e2018-05-29 19:58:41 +05301067 }
Nicholas Pigginf1cb8f92018-06-01 20:01:19 +10001068 /* See ptesync comment in radix__set_pte_at */
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301069}
Aneesh Kumar K.V5b323362019-03-05 15:46:33 -08001070
1071void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
1072 unsigned long addr, pte_t *ptep,
1073 pte_t old_pte, pte_t pte)
1074{
1075 struct mm_struct *mm = vma->vm_mm;
1076
1077 /*
1078 * To avoid NMMU hang while relaxing access we need to flush the tlb before
1079 * we set the new value. We need to do this only for radix, because hash
1080 * translation does flush when updating the linux pte.
1081 */
1082 if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
1083 (atomic_read(&mm->context.copros) > 0))
1084 radix__flush_tlb_page(vma, addr);
1085
1086 set_pte_at(mm, addr, ptep, pte);
1087}
Nicholas Piggind38153f2019-06-10 13:08:17 +10001088
Nicholas Piggind909f912019-06-10 13:08:18 +10001089int __init arch_ioremap_pud_supported(void)
1090{
1091 /* HPT does not cope with large pages in the vmalloc area */
1092 return radix_enabled();
1093}
1094
1095int __init arch_ioremap_pmd_supported(void)
1096{
1097 return radix_enabled();
1098}
1099
1100int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
1101{
1102 return 0;
1103}
1104
1105int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
1106{
1107 pte_t *ptep = (pte_t *)pud;
1108 pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);
1109
1110 if (!radix_enabled())
1111 return 0;
1112
1113 set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud);
1114
1115 return 1;
1116}
1117
1118int pud_clear_huge(pud_t *pud)
1119{
1120 if (pud_huge(*pud)) {
1121 pud_clear(pud);
1122 return 1;
1123 }
1124
1125 return 0;
1126}
1127
1128int pud_free_pmd_page(pud_t *pud, unsigned long addr)
1129{
1130 pmd_t *pmd;
1131 int i;
1132
1133 pmd = (pmd_t *)pud_page_vaddr(*pud);
1134 pud_clear(pud);
1135
1136 flush_tlb_kernel_range(addr, addr + PUD_SIZE);
1137
1138 for (i = 0; i < PTRS_PER_PMD; i++) {
1139 if (!pmd_none(pmd[i])) {
1140 pte_t *pte;
1141 pte = (pte_t *)pmd_page_vaddr(pmd[i]);
1142
1143 pte_free_kernel(&init_mm, pte);
1144 }
1145 }
1146
1147 pmd_free(&init_mm, pmd);
1148
1149 return 1;
1150}
1151
1152int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
1153{
1154 pte_t *ptep = (pte_t *)pmd;
1155 pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);
1156
1157 if (!radix_enabled())
1158 return 0;
1159
1160 set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd);
1161
1162 return 1;
1163}
1164
1165int pmd_clear_huge(pmd_t *pmd)
1166{
1167 if (pmd_huge(*pmd)) {
1168 pmd_clear(pmd);
1169 return 1;
1170 }
1171
1172 return 0;
1173}
1174
1175int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
1176{
1177 pte_t *pte;
1178
1179 pte = (pte_t *)pmd_page_vaddr(*pmd);
1180 pmd_clear(pmd);
1181
1182 flush_tlb_kernel_range(addr, addr + PMD_SIZE);
1183
1184 pte_free_kernel(&init_mm, pte);
1185
1186 return 1;
1187}
1188
Anshuman Khandual0f472d02019-07-16 16:27:33 -07001189int __init arch_ioremap_p4d_supported(void)
1190{
1191 return 0;
1192}