blob: c5bf2ef73c36cc4e46157f297bffec81cc2ec036 [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +10002/*
3 * Page table handling routines for radix page table.
4 *
5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +10006 */
Michael Ellermanbd350f72017-08-30 17:41:29 +10007
8#define pr_fmt(fmt) "radix-mmu: " fmt
9
Nicholas Piggind38153f2019-06-10 13:08:17 +100010#include <linux/io.h>
Michael Ellermanbd350f72017-08-30 17:41:29 +100011#include <linux/kernel.h>
Ingo Molnar589ee622017-02-04 00:16:44 +010012#include <linux/sched/mm.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100013#include <linux/memblock.h>
14#include <linux/of_fdt.h>
Balbir Singh7614ff32017-06-29 03:04:09 +100015#include <linux/mm.h>
Mike Kravetz997cdcb2019-11-30 17:56:37 -080016#include <linux/hugetlb.h>
Michael Ellerman6deb6b42017-08-30 17:41:17 +100017#include <linux/string_helpers.h>
Aneesh Kumar K.Vaf9d00e2020-07-09 18:49:25 +053018#include <linux/memory.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100019
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100020#include <asm/pgalloc.h>
Nicholas Piggineeb715c2018-02-07 11:20:02 +100021#include <asm/mmu_context.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100022#include <asm/dma.h>
23#include <asm/machdep.h>
24#include <asm/mmu.h>
25#include <asm/firmware.h>
Alistair Popple1d0761d2016-12-14 13:36:51 +110026#include <asm/powernv.h>
Michael Ellerman9abcc982017-06-06 15:48:57 +100027#include <asm/sections.h>
Nicholas Piggin993cfec2020-03-02 11:04:10 +100028#include <asm/smp.h>
Balbir Singh04284912017-04-11 15:23:25 +100029#include <asm/trace.h>
Michael Ellerman890274c2019-04-18 16:51:24 +100030#include <asm/uaccess.h>
Claudio Carvalho52231342019-08-22 00:48:36 -030031#include <asm/ultravisor.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100032
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +100033#include <trace/events/thp.h>
34
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +100035unsigned int mmu_pid_bits;
36unsigned int mmu_base_pid;
Aneesh Kumar K.Vaf9d00e2020-07-09 18:49:25 +053037unsigned int radix_mem_block_size __ro_after_init;
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +100038
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100039static __ref void *early_alloc_pgtable(unsigned long size, int nid,
40 unsigned long region_start, unsigned long region_end)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100041{
Mike Rapoportf8067142019-03-07 16:30:48 -080042 phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT;
43 phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE;
Mike Rapoport8a7f97b2019-03-11 23:30:31 -070044 void *ptr;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100045
Mike Rapoportf8067142019-03-07 16:30:48 -080046 if (region_start)
47 min_addr = region_start;
48 if (region_end)
49 max_addr = region_end;
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100050
Mike Rapoport8a7f97b2019-03-11 23:30:31 -070051 ptr = memblock_alloc_try_nid(size, size, min_addr, max_addr, nid);
52
53 if (!ptr)
54 panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa max_addr=%pa\n",
55 __func__, size, size, nid, &min_addr, &max_addr);
56
57 return ptr;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100058}
59
Aneesh Kumar K.V645d5ce2020-07-09 18:49:22 +053060/*
61 * When allocating pud or pmd pointers, we allocate a complete page
62 * of PAGE_SIZE rather than PUD_TABLE_SIZE or PMD_TABLE_SIZE. This
63 * is to ensure that the page obtained from the memblock allocator
64 * can be completely used as page table page and can be freed
65 * correctly when the page table entries are removed.
66 */
Nicholas Piggin0633daf2018-02-14 01:08:23 +100067static int early_map_kernel_page(unsigned long ea, unsigned long pa,
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100068 pgprot_t flags,
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100069 unsigned int map_page_size,
70 int nid,
71 unsigned long region_start, unsigned long region_end)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100072{
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100073 unsigned long pfn = pa >> PAGE_SHIFT;
Nicholas Piggin0633daf2018-02-14 01:08:23 +100074 pgd_t *pgdp;
Mike Rapoport2fb47062020-06-04 16:46:44 -070075 p4d_t *p4dp;
Nicholas Piggin0633daf2018-02-14 01:08:23 +100076 pud_t *pudp;
77 pmd_t *pmdp;
78 pte_t *ptep;
79
80 pgdp = pgd_offset_k(ea);
Mike Rapoport2fb47062020-06-04 16:46:44 -070081 p4dp = p4d_offset(pgdp, ea);
82 if (p4d_none(*p4dp)) {
Aneesh Kumar K.V645d5ce2020-07-09 18:49:22 +053083 pudp = early_alloc_pgtable(PAGE_SIZE, nid,
84 region_start, region_end);
Mike Rapoport2fb47062020-06-04 16:46:44 -070085 p4d_populate(&init_mm, p4dp, pudp);
Nicholas Piggin0633daf2018-02-14 01:08:23 +100086 }
Mike Rapoport2fb47062020-06-04 16:46:44 -070087 pudp = pud_offset(p4dp, ea);
Nicholas Piggin0633daf2018-02-14 01:08:23 +100088 if (map_page_size == PUD_SIZE) {
89 ptep = (pte_t *)pudp;
90 goto set_the_pte;
91 }
92 if (pud_none(*pudp)) {
Aneesh Kumar K.V645d5ce2020-07-09 18:49:22 +053093 pmdp = early_alloc_pgtable(PAGE_SIZE, nid, region_start,
94 region_end);
Nicholas Piggin0633daf2018-02-14 01:08:23 +100095 pud_populate(&init_mm, pudp, pmdp);
96 }
97 pmdp = pmd_offset(pudp, ea);
98 if (map_page_size == PMD_SIZE) {
99 ptep = pmdp_ptep(pmdp);
100 goto set_the_pte;
101 }
102 if (!pmd_present(*pmdp)) {
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000103 ptep = early_alloc_pgtable(PAGE_SIZE, nid,
104 region_start, region_end);
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000105 pmd_populate_kernel(&init_mm, pmdp, ptep);
106 }
107 ptep = pte_offset_kernel(pmdp, ea);
108
109set_the_pte:
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000110 set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000111 smp_wmb();
112 return 0;
113}
114
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000115/*
116 * nid, region_start, and region_end are hints to try to place the page
117 * table memory in the same node or region.
118 */
119static int __map_kernel_page(unsigned long ea, unsigned long pa,
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000120 pgprot_t flags,
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000121 unsigned int map_page_size,
122 int nid,
123 unsigned long region_start, unsigned long region_end)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000124{
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000125 unsigned long pfn = pa >> PAGE_SHIFT;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000126 pgd_t *pgdp;
Mike Rapoport2fb47062020-06-04 16:46:44 -0700127 p4d_t *p4dp;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000128 pud_t *pudp;
129 pmd_t *pmdp;
130 pte_t *ptep;
131 /*
132 * Make sure task size is correct as per the max adddr
133 */
134 BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000135
Aneesh Kumar K.V0034d392019-04-17 18:29:14 +0530136#ifdef CONFIG_PPC_64K_PAGES
137 BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT));
138#endif
139
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000140 if (unlikely(!slab_is_available()))
141 return early_map_kernel_page(ea, pa, flags, map_page_size,
142 nid, region_start, region_end);
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000143
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000144 /*
145 * Should make page table allocation functions be able to take a
146 * node, so we can place kernel page tables on the right nodes after
147 * boot.
148 */
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000149 pgdp = pgd_offset_k(ea);
Mike Rapoport2fb47062020-06-04 16:46:44 -0700150 p4dp = p4d_offset(pgdp, ea);
151 pudp = pud_alloc(&init_mm, p4dp, ea);
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000152 if (!pudp)
153 return -ENOMEM;
154 if (map_page_size == PUD_SIZE) {
155 ptep = (pte_t *)pudp;
156 goto set_the_pte;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000157 }
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000158 pmdp = pmd_alloc(&init_mm, pudp, ea);
159 if (!pmdp)
160 return -ENOMEM;
161 if (map_page_size == PMD_SIZE) {
162 ptep = pmdp_ptep(pmdp);
163 goto set_the_pte;
164 }
165 ptep = pte_alloc_kernel(pmdp, ea);
166 if (!ptep)
167 return -ENOMEM;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000168
169set_the_pte:
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000170 set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000171 smp_wmb();
172 return 0;
173}
174
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000175int radix__map_kernel_page(unsigned long ea, unsigned long pa,
176 pgprot_t flags,
177 unsigned int map_page_size)
178{
179 return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
180}
181
Balbir Singh7614ff32017-06-29 03:04:09 +1000182#ifdef CONFIG_STRICT_KERNEL_RWX
Michael Ellermanb134bd92017-07-14 16:51:21 +1000183void radix__change_memory_range(unsigned long start, unsigned long end,
184 unsigned long clear)
Balbir Singh7614ff32017-06-29 03:04:09 +1000185{
Balbir Singh7614ff32017-06-29 03:04:09 +1000186 unsigned long idx;
187 pgd_t *pgdp;
Mike Rapoport2fb47062020-06-04 16:46:44 -0700188 p4d_t *p4dp;
Balbir Singh7614ff32017-06-29 03:04:09 +1000189 pud_t *pudp;
190 pmd_t *pmdp;
191 pte_t *ptep;
192
193 start = ALIGN_DOWN(start, PAGE_SIZE);
194 end = PAGE_ALIGN(end); // aligns up
195
Michael Ellermanb134bd92017-07-14 16:51:21 +1000196 pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
197 start, end, clear);
Balbir Singh7614ff32017-06-29 03:04:09 +1000198
199 for (idx = start; idx < end; idx += PAGE_SIZE) {
200 pgdp = pgd_offset_k(idx);
Mike Rapoport2fb47062020-06-04 16:46:44 -0700201 p4dp = p4d_offset(pgdp, idx);
202 pudp = pud_alloc(&init_mm, p4dp, idx);
Balbir Singh7614ff32017-06-29 03:04:09 +1000203 if (!pudp)
204 continue;
Aneesh Kumar K.Vd6eaced2019-05-14 11:33:00 +0530205 if (pud_is_leaf(*pudp)) {
Balbir Singh7614ff32017-06-29 03:04:09 +1000206 ptep = (pte_t *)pudp;
207 goto update_the_pte;
208 }
209 pmdp = pmd_alloc(&init_mm, pudp, idx);
210 if (!pmdp)
211 continue;
Aneesh Kumar K.Vd6eaced2019-05-14 11:33:00 +0530212 if (pmd_is_leaf(*pmdp)) {
Balbir Singh7614ff32017-06-29 03:04:09 +1000213 ptep = pmdp_ptep(pmdp);
214 goto update_the_pte;
215 }
216 ptep = pte_alloc_kernel(pmdp, idx);
217 if (!ptep)
218 continue;
219update_the_pte:
Michael Ellermanb134bd92017-07-14 16:51:21 +1000220 radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
Balbir Singh7614ff32017-06-29 03:04:09 +1000221 }
222
223 radix__flush_tlb_kernel_range(start, end);
224}
Michael Ellermanb134bd92017-07-14 16:51:21 +1000225
226void radix__mark_rodata_ro(void)
227{
228 unsigned long start, end;
229
230 start = (unsigned long)_stext;
231 end = (unsigned long)__init_begin;
232
233 radix__change_memory_range(start, end, _PAGE_WRITE);
234}
Michael Ellerman029d9252017-07-14 16:51:23 +1000235
236void radix__mark_initmem_nx(void)
237{
238 unsigned long start = (unsigned long)__init_begin;
239 unsigned long end = (unsigned long)__init_end;
240
241 radix__change_memory_range(start, end, _PAGE_EXEC);
242}
Balbir Singh7614ff32017-06-29 03:04:09 +1000243#endif /* CONFIG_STRICT_KERNEL_RWX */
244
Michael Ellermanafb6d062018-10-17 23:53:38 +1100245static inline void __meminit
246print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
Reza Arbabb5200ec2017-01-16 13:07:43 -0600247{
Michael Ellerman6deb6b42017-08-30 17:41:17 +1000248 char buf[10];
249
Reza Arbabb5200ec2017-01-16 13:07:43 -0600250 if (end <= start)
251 return;
252
Michael Ellerman6deb6b42017-08-30 17:41:17 +1000253 string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
254
Michael Ellermanafb6d062018-10-17 23:53:38 +1100255 pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf,
256 exec ? " (exec)" : "");
Reza Arbabb5200ec2017-01-16 13:07:43 -0600257}
258
Michael Ellerman232aa402018-08-14 22:37:32 +1000259static unsigned long next_boundary(unsigned long addr, unsigned long end)
260{
261#ifdef CONFIG_STRICT_KERNEL_RWX
262 if (addr < __pa_symbol(__init_begin))
263 return __pa_symbol(__init_begin);
264#endif
265 return end;
266}
267
Reza Arbabb5200ec2017-01-16 13:07:43 -0600268static int __meminit create_physical_mapping(unsigned long start,
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000269 unsigned long end,
Aneesh Kumar K.Vaf9d00e2020-07-09 18:49:25 +0530270 unsigned long max_mapping_size,
Logan Gunthorpe4e00c5a2020-04-10 14:33:32 -0700271 int nid, pgprot_t _prot)
Reza Arbabb5200ec2017-01-16 13:07:43 -0600272{
Michael Ellerman9abcc982017-06-06 15:48:57 +1000273 unsigned long vaddr, addr, mapping_size = 0;
Michael Ellermanafb6d062018-10-17 23:53:38 +1100274 bool prev_exec, exec = false;
Michael Ellerman9abcc982017-06-06 15:48:57 +1000275 pgprot_t prot;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530276 int psize;
Reza Arbabb5200ec2017-01-16 13:07:43 -0600277
Christophe Leroyb7115312020-04-20 18:36:36 +0000278 start = ALIGN(start, PAGE_SIZE);
Reza Arbabb5200ec2017-01-16 13:07:43 -0600279 for (addr = start; addr < end; addr += mapping_size) {
280 unsigned long gap, previous_size;
281 int rc;
282
Michael Ellerman232aa402018-08-14 22:37:32 +1000283 gap = next_boundary(addr, end) - addr;
Aneesh Kumar K.Vaf9d00e2020-07-09 18:49:25 +0530284 if (gap > max_mapping_size)
285 gap = max_mapping_size;
Reza Arbabb5200ec2017-01-16 13:07:43 -0600286 previous_size = mapping_size;
Michael Ellermanafb6d062018-10-17 23:53:38 +1100287 prev_exec = exec;
Reza Arbabb5200ec2017-01-16 13:07:43 -0600288
289 if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
Michael Ellerman57306c62018-08-14 22:01:44 +1000290 mmu_psize_defs[MMU_PAGE_1G].shift) {
Reza Arbabb5200ec2017-01-16 13:07:43 -0600291 mapping_size = PUD_SIZE;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530292 psize = MMU_PAGE_1G;
293 } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
294 mmu_psize_defs[MMU_PAGE_2M].shift) {
Reza Arbabb5200ec2017-01-16 13:07:43 -0600295 mapping_size = PMD_SIZE;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530296 psize = MMU_PAGE_2M;
297 } else {
Reza Arbabb5200ec2017-01-16 13:07:43 -0600298 mapping_size = PAGE_SIZE;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530299 psize = mmu_virtual_psize;
300 }
Reza Arbabb5200ec2017-01-16 13:07:43 -0600301
Michael Ellerman9abcc982017-06-06 15:48:57 +1000302 vaddr = (unsigned long)__va(addr);
303
Balbir Singh7f6d4982017-06-29 03:04:10 +1000304 if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
Michael Ellermanafb6d062018-10-17 23:53:38 +1100305 overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) {
Michael Ellerman9abcc982017-06-06 15:48:57 +1000306 prot = PAGE_KERNEL_X;
Michael Ellermanafb6d062018-10-17 23:53:38 +1100307 exec = true;
308 } else {
Logan Gunthorpe4e00c5a2020-04-10 14:33:32 -0700309 prot = _prot;
Michael Ellermanafb6d062018-10-17 23:53:38 +1100310 exec = false;
311 }
312
313 if (mapping_size != previous_size || exec != prev_exec) {
314 print_mapping(start, addr, previous_size, prev_exec);
315 start = addr;
316 }
Michael Ellerman9abcc982017-06-06 15:48:57 +1000317
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000318 rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
Reza Arbabb5200ec2017-01-16 13:07:43 -0600319 if (rc)
320 return rc;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530321
322 update_page_count(psize, 1);
Reza Arbabb5200ec2017-01-16 13:07:43 -0600323 }
324
Michael Ellermanafb6d062018-10-17 23:53:38 +1100325 print_mapping(start, addr, mapping_size, exec);
Reza Arbabb5200ec2017-01-16 13:07:43 -0600326 return 0;
327}
328
YueHaibingd667edc2019-05-04 18:24:27 +0800329static void __init radix_init_pgtable(void)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000330{
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000331 unsigned long rts_field;
332 struct memblock_region *reg;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000333
334 /* We don't support slb for radix */
335 mmu_slb_size = 0;
Aneesh Kumar K.Vaf9d00e2020-07-09 18:49:25 +0530336
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000337 /*
Aneesh Kumar K.Vaf9d00e2020-07-09 18:49:25 +0530338 * Create the linear mapping
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000339 */
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000340 for_each_memblock(memory, reg) {
341 /*
342 * The memblock allocator is up at this point, so the
343 * page tables will be allocated within the range. No
344 * need or a node (which we don't have yet).
345 */
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530346
347 if ((reg->base + reg->size) >= RADIX_VMALLOC_START) {
Colin Ian Kingf341d892019-04-23 16:10:17 +0100348 pr_warn("Outside the supported range\n");
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530349 continue;
350 }
351
Reza Arbabb5200ec2017-01-16 13:07:43 -0600352 WARN_ON(create_physical_mapping(reg->base,
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000353 reg->base + reg->size,
Aneesh Kumar K.Vaf9d00e2020-07-09 18:49:25 +0530354 radix_mem_block_size,
Logan Gunthorpe4e00c5a2020-04-10 14:33:32 -0700355 -1, PAGE_KERNEL));
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000356 }
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +1000357
358 /* Find out how many PID bits are supported */
Jordan Niethe736bcdd2019-12-06 14:17:22 +1100359 if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
360 if (!mmu_pid_bits)
361 mmu_pid_bits = 20;
362 mmu_base_pid = 1;
363 } else if (cpu_has_feature(CPU_FTR_HVMODE)) {
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +1000364 if (!mmu_pid_bits)
365 mmu_pid_bits = 20;
366#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
367 /*
368 * When KVM is possible, we only use the top half of the
369 * PID space to avoid collisions between host and guest PIDs
370 * which can cause problems due to prefetch when exiting the
371 * guest with AIL=3
372 */
373 mmu_base_pid = 1 << (mmu_pid_bits - 1);
374#else
375 mmu_base_pid = 1;
376#endif
377 } else {
378 /* The guest uses the bottom half of the PID space */
379 if (!mmu_pid_bits)
380 mmu_pid_bits = 19;
381 mmu_base_pid = 1;
382 }
383
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000384 /*
385 * Allocate Partition table and process table for the
386 * host.
387 */
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +1000388 BUG_ON(PRTB_SIZE_SHIFT > 36);
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000389 process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000390 /*
391 * Fill in the process table.
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000392 */
Aneesh Kumar K.Vb23d9c52016-06-17 11:40:36 +0530393 rts_field = radix__get_tree_size();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000394 process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
Nicholas Piggined6546b2019-09-03 01:29:26 +1000395
Nicholas Piggineeb715c2018-02-07 11:20:02 +1000396 /*
397 * The init_mm context is given the first available (non-zero) PID,
398 * which is the "guard PID" and contains no page table. PIDR should
399 * never be set to zero because that duplicates the kernel address
400 * space at the 0x0... offset (quadrant 0)!
401 *
402 * An arbitrary PID that may later be allocated by the PID allocator
403 * for userspace processes must not be used either, because that
404 * would cause stale user mappings for that PID on CPUs outside of
405 * the TLB invalidation scheme (because it won't be in mm_cpumask).
406 *
407 * So permanently carve out one PID for the purpose of a guard PID.
408 */
409 init_mm.context.id = mmu_base_pid;
410 mmu_base_pid++;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000411}
412
413static void __init radix_init_partition_table(void)
414{
Nicholas Piggined6546b2019-09-03 01:29:26 +1000415 unsigned long rts_field, dw0, dw1;
Aneesh Kumar K.Vb23d9c52016-06-17 11:40:36 +0530416
Paul Mackerras9d661952016-11-21 16:00:58 +1100417 mmu_partition_table_init();
Aneesh Kumar K.Vb23d9c52016-06-17 11:40:36 +0530418 rts_field = radix__get_tree_size();
Paul Mackerras9d661952016-11-21 16:00:58 +1100419 dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
Nicholas Piggined6546b2019-09-03 01:29:26 +1000420 dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR;
Nicholas Piggin7d805ac2019-09-03 01:29:30 +1000421 mmu_partition_table_set_entry(0, dw0, dw1, false);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000422
Aneesh Kumar K.V56547412016-07-13 15:05:25 +0530423 pr_info("Initializing Radix MMU\n");
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000424}
425
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000426static int __init get_idx_from_shift(unsigned int shift)
427{
428 int idx = -1;
429
430 switch (shift) {
431 case 0xc:
432 idx = MMU_PAGE_4K;
433 break;
434 case 0x10:
435 idx = MMU_PAGE_64K;
436 break;
437 case 0x15:
438 idx = MMU_PAGE_2M;
439 break;
440 case 0x1e:
441 idx = MMU_PAGE_1G;
442 break;
443 }
444 return idx;
445}
446
447static int __init radix_dt_scan_page_sizes(unsigned long node,
448 const char *uname, int depth,
449 void *data)
450{
451 int size = 0;
452 int shift, idx;
453 unsigned int ap;
454 const __be32 *prop;
455 const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
456
457 /* We are scanning "cpu" nodes only */
458 if (type == NULL || strcmp(type, "cpu") != 0)
459 return 0;
460
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +1000461 /* Find MMU PID size */
462 prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
463 if (prop && size == 4)
464 mmu_pid_bits = be32_to_cpup(prop);
465
466 /* Grab page size encodings */
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000467 prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
468 if (!prop)
469 return 0;
470
471 pr_info("Page sizes from device-tree:\n");
472 for (; size >= 4; size -= 4, ++prop) {
473
474 struct mmu_psize_def *def;
475
476 /* top 3 bit is AP encoding */
477 shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
478 ap = be32_to_cpu(prop[0]) >> 29;
Balbir Singhac8d3812016-11-05 15:24:22 +1100479 pr_info("Page size shift = %d AP=0x%x\n", shift, ap);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000480
481 idx = get_idx_from_shift(shift);
482 if (idx < 0)
483 continue;
484
485 def = &mmu_psize_defs[idx];
486 def->shift = shift;
487 def->ap = ap;
488 }
489
490 /* needed ? */
491 cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
492 return 1;
493}
494
Aneesh Kumar K.Vaf9d00e2020-07-09 18:49:25 +0530495#ifdef CONFIG_MEMORY_HOTPLUG
496static int __init probe_memory_block_size(unsigned long node, const char *uname, int
497 depth, void *data)
498{
499 unsigned long *mem_block_size = (unsigned long *)data;
500 const __be64 *prop;
501 int len;
502
503 if (depth != 1)
504 return 0;
505
506 if (strcmp(uname, "ibm,dynamic-reconfiguration-memory"))
507 return 0;
508
509 prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
510 if (!prop || len < sizeof(__be64))
511 /*
512 * Nothing in the device tree
513 */
514 *mem_block_size = MIN_MEMORY_BLOCK_SIZE;
515 else
516 *mem_block_size = be64_to_cpup(prop);
517 return 1;
518}
519
520static unsigned long radix_memory_block_size(void)
521{
522 unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
523
524 /*
525 * OPAL firmware feature is set by now. Hence we are ok
526 * to test OPAL feature.
527 */
528 if (firmware_has_feature(FW_FEATURE_OPAL))
529 mem_block_size = 1UL * 1024 * 1024 * 1024;
530 else
531 of_scan_flat_dt(probe_memory_block_size, &mem_block_size);
532
533 return mem_block_size;
534}
535
536#else /* CONFIG_MEMORY_HOTPLUG */
537
538static unsigned long radix_memory_block_size(void)
539{
540 return 1UL * 1024 * 1024 * 1024;
541}
542
543#endif /* CONFIG_MEMORY_HOTPLUG */
544
545
Michael Ellerman2537b092016-07-26 21:55:27 +1000546void __init radix__early_init_devtree(void)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000547{
548 int rc;
549
550 /*
551 * Try to find the available page sizes in the device-tree
552 */
553 rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
Aneesh Kumar K.Vaf9d00e2020-07-09 18:49:25 +0530554 if (!rc) {
555 /*
556 * No page size details found in device tree.
557 * Let's assume we have page 4k and 64k support
558 */
559 mmu_psize_defs[MMU_PAGE_4K].shift = 12;
560 mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000561
Aneesh Kumar K.Vaf9d00e2020-07-09 18:49:25 +0530562 mmu_psize_defs[MMU_PAGE_64K].shift = 16;
563 mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
564 }
565
566 /*
567 * Max mapping size used when mapping pages. We don't use
568 * ppc_md.memory_block_size() here because this get called
569 * early and we don't have machine probe called yet. Also
570 * the pseries implementation only check for ibm,lmb-size.
571 * All hypervisor supporting radix do expose that device
572 * tree node.
573 */
574 radix_mem_block_size = radix_memory_block_size();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000575 return;
576}
577
Balbir Singhee97b6b2016-11-15 17:56:14 +1100578static void radix_init_amor(void)
579{
580 /*
581 * In HV mode, we init AMOR (Authority Mask Override Register) so that
582 * the hypervisor and guest can setup IAMR (Instruction Authority Mask
583 * Register), enable key 0 and set it to 1.
584 *
585 * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
586 */
587 mtspr(SPRN_AMOR, (3ul << 62));
588}
589
Russell Currey1bb2bae2019-04-18 16:51:22 +1000590#ifdef CONFIG_PPC_KUEP
591void setup_kuep(bool disabled)
Balbir Singh3b10d002016-11-15 17:56:16 +1100592{
Russell Currey1bb2bae2019-04-18 16:51:22 +1000593 if (disabled || !early_radix_enabled())
594 return;
595
596 if (smp_processor_id() == boot_cpuid)
597 pr_info("Activating Kernel Userspace Execution Prevention\n");
598
Balbir Singh3b10d002016-11-15 17:56:16 +1100599 /*
600 * Radix always uses key0 of the IAMR to determine if an access is
601 * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
602 * fetch.
603 */
Nicholas Piggin2bf10712018-07-05 18:47:00 +1000604 mtspr(SPRN_IAMR, (1ul << 62));
Balbir Singh3b10d002016-11-15 17:56:16 +1100605}
Russell Currey1bb2bae2019-04-18 16:51:22 +1000606#endif
Balbir Singh3b10d002016-11-15 17:56:16 +1100607
Michael Ellerman890274c2019-04-18 16:51:24 +1000608#ifdef CONFIG_PPC_KUAP
609void setup_kuap(bool disabled)
610{
611 if (disabled || !early_radix_enabled())
612 return;
613
614 if (smp_processor_id() == boot_cpuid) {
615 pr_info("Activating Kernel Userspace Access Prevention\n");
616 cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP;
617 }
618
619 /* Make sure userspace can't change the AMR */
620 mtspr(SPRN_UAMOR, 0);
621 mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
622 isync();
623}
624#endif
625
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000626void __init radix__early_init_mmu(void)
627{
628 unsigned long lpcr;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000629
630#ifdef CONFIG_PPC_64K_PAGES
631 /* PAGE_SIZE mappings */
632 mmu_virtual_psize = MMU_PAGE_64K;
633#else
634 mmu_virtual_psize = MMU_PAGE_4K;
635#endif
636
637#ifdef CONFIG_SPARSEMEM_VMEMMAP
638 /* vmemmap mapping */
Aneesh Kumar K.V89a34962019-07-01 20:04:42 +0530639 if (mmu_psize_defs[MMU_PAGE_2M].shift) {
640 /*
641 * map vmemmap using 2M if available
642 */
643 mmu_vmemmap_psize = MMU_PAGE_2M;
644 } else
645 mmu_vmemmap_psize = mmu_virtual_psize;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000646#endif
647 /*
648 * initialize page table size
649 */
650 __pte_index_size = RADIX_PTE_INDEX_SIZE;
651 __pmd_index_size = RADIX_PMD_INDEX_SIZE;
652 __pud_index_size = RADIX_PUD_INDEX_SIZE;
653 __pgd_index_size = RADIX_PGD_INDEX_SIZE;
Aneesh Kumar K.Vfae22112018-02-11 20:30:06 +0530654 __pud_cache_index = RADIX_PUD_INDEX_SIZE;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000655 __pte_table_size = RADIX_PTE_TABLE_SIZE;
656 __pmd_table_size = RADIX_PMD_TABLE_SIZE;
657 __pud_table_size = RADIX_PUD_TABLE_SIZE;
658 __pgd_table_size = RADIX_PGD_TABLE_SIZE;
659
Aneesh Kumar K.Va2f41eb2016-04-29 23:26:19 +1000660 __pmd_val_bits = RADIX_PMD_VAL_BITS;
661 __pud_val_bits = RADIX_PUD_VAL_BITS;
662 __pgd_val_bits = RADIX_PGD_VAL_BITS;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000663
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000664 __kernel_virt_start = RADIX_KERN_VIRT_START;
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000665 __vmalloc_start = RADIX_VMALLOC_START;
666 __vmalloc_end = RADIX_VMALLOC_END;
Michael Ellerman63ee9b22017-08-01 20:29:22 +1000667 __kernel_io_start = RADIX_KERN_IO_START;
Aneesh Kumar K.Va35a3c62019-04-17 18:29:13 +0530668 __kernel_io_end = RADIX_KERN_IO_END;
Aneesh Kumar K.V0034d392019-04-17 18:29:14 +0530669 vmemmap = (struct page *)RADIX_VMEMMAP_START;
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000670 ioremap_bot = IOREMAP_BASE;
Darren Stevensbfa37082016-06-29 21:06:28 +0100671
672#ifdef CONFIG_PCI
673 pci_io_base = ISA_IO_BASE;
674#endif
Aneesh Kumar K.Vfb4e5db2018-03-22 14:13:50 +0530675 __pte_frag_nr = RADIX_PTE_FRAG_NR;
676 __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
Aneesh Kumar K.V8a6c6972018-04-16 16:57:22 +0530677 __pmd_frag_nr = RADIX_PMD_FRAG_NR;
678 __pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT;
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000679
Nicholas Piggined6546b2019-09-03 01:29:26 +1000680 radix_init_pgtable();
681
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530682 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
683 lpcr = mfspr(SPRN_LPCR);
Aneesh Kumar K.Vbf16cdf2016-07-13 15:05:21 +0530684 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000685 radix_init_partition_table();
Balbir Singhee97b6b2016-11-15 17:56:14 +1100686 radix_init_amor();
Paul Mackerrascc3d2942017-01-30 21:21:36 +1100687 } else {
688 radix_init_pseries();
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530689 }
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000690
Paul Mackerras9d661952016-11-21 16:00:58 +1100691 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
692
Nicholas Piggineeb715c2018-02-07 11:20:02 +1000693 /* Switch to the guard PID before turning on MMU */
694 radix__switch_mmu_context(NULL, &init_mm);
Nicholas Piggin7e71c422019-09-03 01:29:29 +1000695 tlbiel_all();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000696}
697
698void radix__early_init_mmu_secondary(void)
699{
700 unsigned long lpcr;
701 /*
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530702 * update partition table control register and UPRT
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000703 */
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530704 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
705 lpcr = mfspr(SPRN_LPCR);
Aneesh Kumar K.Vbf16cdf2016-07-13 15:05:21 +0530706 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530707
Claudio Carvalho52231342019-08-22 00:48:36 -0300708 set_ptcr_when_no_uv(__pa(partition_tb) |
709 (PATB_SIZE_SHIFT - 12));
710
Balbir Singhee97b6b2016-11-15 17:56:14 +1100711 radix_init_amor();
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530712 }
Nicholas Piggind4748272017-12-24 01:15:50 +1000713
Nicholas Piggineeb715c2018-02-07 11:20:02 +1000714 radix__switch_mmu_context(NULL, &init_mm);
Nicholas Piggin7e71c422019-09-03 01:29:29 +1000715 tlbiel_all();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000716}
717
Benjamin Herrenschmidtfe036a02016-08-19 14:22:37 +0530718void radix__mmu_cleanup_all(void)
719{
720 unsigned long lpcr;
721
722 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
723 lpcr = mfspr(SPRN_LPCR);
724 mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
Claudio Carvalho52231342019-08-22 00:48:36 -0300725 set_ptcr_when_no_uv(0);
Alistair Popple1d0761d2016-12-14 13:36:51 +1100726 powernv_set_nmmu_ptcr(0);
Benjamin Herrenschmidtfe036a02016-08-19 14:22:37 +0530727 radix__flush_tlb_all();
728 }
729}
730
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000731void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
732 phys_addr_t first_memblock_size)
733{
Christophe Leroy47d99942019-03-29 10:00:00 +0000734 /*
735 * We don't currently support the first MEMBLOCK not mapping 0
Aneesh Kumar K.V177ba7c2016-04-29 23:26:10 +1000736 * physical on those processors
737 */
738 BUG_ON(first_memblock_base != 0);
Nicholas Piggin1513c332017-12-22 21:17:08 +1000739
Nicholas Piggin5eae82c2017-12-22 21:17:11 +1000740 /*
741 * Radix mode is not limited by RMA / VRMA addressing.
742 */
743 ppc64_rma_size = ULONG_MAX;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000744}
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000745
Reza Arbab6cc27342017-01-16 13:07:44 -0600746#ifdef CONFIG_MEMORY_HOTPLUG
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600747static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
748{
749 pte_t *pte;
750 int i;
751
752 for (i = 0; i < PTRS_PER_PTE; i++) {
753 pte = pte_start + i;
754 if (!pte_none(*pte))
755 return;
756 }
757
758 pte_free_kernel(&init_mm, pte_start);
759 pmd_clear(pmd);
760}
761
762static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
763{
764 pmd_t *pmd;
765 int i;
766
767 for (i = 0; i < PTRS_PER_PMD; i++) {
768 pmd = pmd_start + i;
769 if (!pmd_none(*pmd))
770 return;
771 }
772
773 pmd_free(&init_mm, pmd_start);
774 pud_clear(pud);
775}
776
Bharata B Rao9ce88532020-07-09 18:49:23 +0530777static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
778{
779 pud_t *pud;
780 int i;
781
782 for (i = 0; i < PTRS_PER_PUD; i++) {
783 pud = pud_start + i;
784 if (!pud_none(*pud))
785 return;
786 }
787
788 pud_free(&init_mm, pud_start);
789 p4d_clear(p4d);
790}
791
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600792static void remove_pte_table(pte_t *pte_start, unsigned long addr,
793 unsigned long end)
794{
795 unsigned long next;
796 pte_t *pte;
797
798 pte = pte_start + pte_index(addr);
799 for (; addr < end; addr = next, pte++) {
800 next = (addr + PAGE_SIZE) & PAGE_MASK;
801 if (next > end)
802 next = end;
803
804 if (!pte_present(*pte))
805 continue;
806
Reza Arbab0d0a4bc2017-01-16 13:07:46 -0600807 if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
808 /*
809 * The vmemmap_free() and remove_section_mapping()
810 * codepaths call us with aligned addresses.
811 */
812 WARN_ONCE(1, "%s: unaligned range\n", __func__);
813 continue;
814 }
815
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600816 pte_clear(&init_mm, addr, pte);
817 }
818}
819
820static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
821 unsigned long end)
822{
823 unsigned long next;
824 pte_t *pte_base;
825 pmd_t *pmd;
826
827 pmd = pmd_start + pmd_index(addr);
828 for (; addr < end; addr = next, pmd++) {
829 next = pmd_addr_end(addr, end);
830
831 if (!pmd_present(*pmd))
832 continue;
833
Aneesh Kumar K.Vd6eaced2019-05-14 11:33:00 +0530834 if (pmd_is_leaf(*pmd)) {
Bharata B Raod6d6ebf2020-07-09 18:49:24 +0530835 if (!IS_ALIGNED(addr, PMD_SIZE) ||
836 !IS_ALIGNED(next, PMD_SIZE)) {
837 WARN_ONCE(1, "%s: unaligned range\n", __func__);
838 continue;
839 }
840 pte_clear(&init_mm, addr, (pte_t *)pmd);
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600841 continue;
842 }
843
844 pte_base = (pte_t *)pmd_page_vaddr(*pmd);
845 remove_pte_table(pte_base, addr, next);
846 free_pte_table(pte_base, pmd);
847 }
848}
849
850static void remove_pud_table(pud_t *pud_start, unsigned long addr,
851 unsigned long end)
852{
853 unsigned long next;
854 pmd_t *pmd_base;
855 pud_t *pud;
856
857 pud = pud_start + pud_index(addr);
858 for (; addr < end; addr = next, pud++) {
859 next = pud_addr_end(addr, end);
860
861 if (!pud_present(*pud))
862 continue;
863
Aneesh Kumar K.Vd6eaced2019-05-14 11:33:00 +0530864 if (pud_is_leaf(*pud)) {
Bharata B Raod6d6ebf2020-07-09 18:49:24 +0530865 if (!IS_ALIGNED(addr, PUD_SIZE) ||
866 !IS_ALIGNED(next, PUD_SIZE)) {
867 WARN_ONCE(1, "%s: unaligned range\n", __func__);
868 continue;
869 }
870 pte_clear(&init_mm, addr, (pte_t *)pud);
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600871 continue;
872 }
873
874 pmd_base = (pmd_t *)pud_page_vaddr(*pud);
875 remove_pmd_table(pmd_base, addr, next);
876 free_pmd_table(pmd_base, pud);
877 }
878}
879
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300880static void __meminit remove_pagetable(unsigned long start, unsigned long end)
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600881{
882 unsigned long addr, next;
883 pud_t *pud_base;
884 pgd_t *pgd;
Mike Rapoport2fb47062020-06-04 16:46:44 -0700885 p4d_t *p4d;
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600886
887 spin_lock(&init_mm.page_table_lock);
888
889 for (addr = start; addr < end; addr = next) {
890 next = pgd_addr_end(addr, end);
891
892 pgd = pgd_offset_k(addr);
Mike Rapoport2fb47062020-06-04 16:46:44 -0700893 p4d = p4d_offset(pgd, addr);
894 if (!p4d_present(*p4d))
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600895 continue;
896
Mike Rapoport2fb47062020-06-04 16:46:44 -0700897 if (p4d_is_leaf(*p4d)) {
Bharata B Raod6d6ebf2020-07-09 18:49:24 +0530898 if (!IS_ALIGNED(addr, P4D_SIZE) ||
899 !IS_ALIGNED(next, P4D_SIZE)) {
900 WARN_ONCE(1, "%s: unaligned range\n", __func__);
901 continue;
902 }
903
904 pte_clear(&init_mm, addr, (pte_t *)pgd);
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600905 continue;
906 }
907
Mike Rapoport2fb47062020-06-04 16:46:44 -0700908 pud_base = (pud_t *)p4d_page_vaddr(*p4d);
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600909 remove_pud_table(pud_base, addr, next);
Bharata B Rao9ce88532020-07-09 18:49:23 +0530910 free_pud_table(pud_base, p4d);
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600911 }
912
913 spin_unlock(&init_mm.page_table_lock);
914 radix__flush_tlb_kernel_range(start, end);
915}
916
Logan Gunthorpe4e00c5a2020-04-10 14:33:32 -0700917int __meminit radix__create_section_mapping(unsigned long start,
918 unsigned long end, int nid,
919 pgprot_t prot)
Reza Arbab6cc27342017-01-16 13:07:44 -0600920{
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530921 if (end >= RADIX_VMALLOC_START) {
Colin Ian Kingf341d892019-04-23 16:10:17 +0100922 pr_warn("Outside the supported range\n");
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530923 return -1;
924 }
925
Aneesh Kumar K.Vaf9d00e2020-07-09 18:49:25 +0530926 return create_physical_mapping(__pa(start), __pa(end),
927 radix_mem_block_size, nid, prot);
Reza Arbab6cc27342017-01-16 13:07:44 -0600928}
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600929
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300930int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600931{
932 remove_pagetable(start, end);
933 return 0;
934}
Reza Arbab6cc27342017-01-16 13:07:44 -0600935#endif /* CONFIG_MEMORY_HOTPLUG */
936
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000937#ifdef CONFIG_SPARSEMEM_VMEMMAP
Nicholas Piggin29ab6c42018-02-14 01:08:22 +1000938static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
939 pgprot_t flags, unsigned int map_page_size,
940 int nid)
941{
942 return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
943}
944
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000945int __meminit radix__vmemmap_create_mapping(unsigned long start,
946 unsigned long page_size,
947 unsigned long phys)
948{
949 /* Create a PTE encoding */
950 unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000951 int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
952 int ret;
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000953
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530954 if ((start + page_size) >= RADIX_VMEMMAP_END) {
Colin Ian Kingf341d892019-04-23 16:10:17 +0100955 pr_warn("Outside the supported range\n");
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530956 return -1;
957 }
958
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000959 ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
960 BUG_ON(ret);
961
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000962 return 0;
963}
964
965#ifdef CONFIG_MEMORY_HOTPLUG
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300966void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000967{
Reza Arbab0d0a4bc2017-01-16 13:07:46 -0600968 remove_pagetable(start, start + page_size);
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000969}
970#endif
971#endif
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000972
973#ifdef CONFIG_TRANSPARENT_HUGEPAGE
974
975unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
976 pmd_t *pmdp, unsigned long clr,
977 unsigned long set)
978{
979 unsigned long old;
980
981#ifdef CONFIG_DEBUG_VM
Oliver O'Halloranebd31192017-06-28 11:32:34 +1000982 WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
Aneesh Kumar K.Vaf60a4c2018-04-16 16:57:16 +0530983 assert_spin_locked(pmd_lockptr(mm, pmdp));
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000984#endif
985
986 old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
987 trace_hugepage_update(addr, old, clr, set);
988
989 return old;
990}
991
992pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
993 pmd_t *pmdp)
994
995{
996 pmd_t pmd;
997
998 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
999 VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
Oliver O'Halloranebd31192017-06-28 11:32:34 +10001000 VM_BUG_ON(pmd_devmap(*pmdp));
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001001 /*
1002 * khugepaged calls this for normal pmd
1003 */
1004 pmd = *pmdp;
1005 pmd_clear(pmdp);
Benjamin Herrenschmidt424de9c2017-07-19 14:49:06 +10001006
Aneesh Kumar K.Ve21dfbf2020-05-05 12:47:27 +05301007 /*
1008 * pmdp collapse_flush need to ensure that there are no parallel gup
1009 * walk after this call. This is needed so that we can have stable
1010 * page ref count when collapsing a page. We don't allow a collapse page
1011 * if we have gup taken on the page. We can ensure that by sending IPI
1012 * because gup walk happens with IRQ disabled.
1013 */
Aneesh Kumar K.Vfa4531f2017-07-27 11:54:54 +05301014 serialize_against_pte_lookup(vma->vm_mm);
Benjamin Herrenschmidt424de9c2017-07-19 14:49:06 +10001015
1016 radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
1017
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001018 return pmd;
1019}
1020
1021/*
1022 * For us pgtable_t is pte_t *. Inorder to save the deposisted
1023 * page table, we consider the allocated page table as a list
1024 * head. On withdraw we need to make sure we zero out the used
1025 * list_head memory area.
1026 */
1027void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
1028 pgtable_t pgtable)
1029{
Christophe Leroy47d99942019-03-29 10:00:00 +00001030 struct list_head *lh = (struct list_head *) pgtable;
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001031
Christophe Leroy47d99942019-03-29 10:00:00 +00001032 assert_spin_locked(pmd_lockptr(mm, pmdp));
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001033
Christophe Leroy47d99942019-03-29 10:00:00 +00001034 /* FIFO */
1035 if (!pmd_huge_pte(mm, pmdp))
1036 INIT_LIST_HEAD(lh);
1037 else
1038 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
1039 pmd_huge_pte(mm, pmdp) = pgtable;
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001040}
1041
1042pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
1043{
Christophe Leroy47d99942019-03-29 10:00:00 +00001044 pte_t *ptep;
1045 pgtable_t pgtable;
1046 struct list_head *lh;
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001047
Christophe Leroy47d99942019-03-29 10:00:00 +00001048 assert_spin_locked(pmd_lockptr(mm, pmdp));
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001049
Christophe Leroy47d99942019-03-29 10:00:00 +00001050 /* FIFO */
1051 pgtable = pmd_huge_pte(mm, pmdp);
1052 lh = (struct list_head *) pgtable;
1053 if (list_empty(lh))
1054 pmd_huge_pte(mm, pmdp) = NULL;
1055 else {
1056 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
1057 list_del(lh);
1058 }
1059 ptep = (pte_t *) pgtable;
1060 *ptep = __pte(0);
1061 ptep++;
1062 *ptep = __pte(0);
1063 return pgtable;
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001064}
1065
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001066pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
Christophe Leroy47d99942019-03-29 10:00:00 +00001067 unsigned long addr, pmd_t *pmdp)
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001068{
1069 pmd_t old_pmd;
1070 unsigned long old;
1071
1072 old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
1073 old_pmd = __pmd(old);
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001074 return old_pmd;
1075}
1076
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001077#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301078
Aneesh Kumar K.Ve4c11122018-05-29 19:58:40 +05301079void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
1080 pte_t entry, unsigned long address, int psize)
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301081{
Aneesh Kumar K.Ve4c11122018-05-29 19:58:40 +05301082 struct mm_struct *mm = vma->vm_mm;
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301083 unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
1084 _PAGE_RW | _PAGE_EXEC);
Aneesh Kumar K.Vf08d08f2018-08-22 22:46:05 +05301085
1086 unsigned long change = pte_val(entry) ^ pte_val(*ptep);
Aneesh Kumar K.Vbd5050e2018-05-29 19:58:41 +05301087 /*
1088 * To avoid NMMU hang while relaxing access, we need mark
1089 * the pte invalid in between.
1090 */
Aneesh Kumar K.Vf08d08f2018-08-22 22:46:05 +05301091 if ((change & _PAGE_RW) && atomic_read(&mm->context.copros) > 0) {
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301092 unsigned long old_pte, new_pte;
1093
Aneesh Kumar K.Vf08d08f2018-08-22 22:46:05 +05301094 old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301095 /*
1096 * new value of pte
1097 */
1098 new_pte = old_pte | set;
Aneesh Kumar K.Vbd5050e2018-05-29 19:58:41 +05301099 radix__flush_tlb_page_psize(mm, address, psize);
Aneesh Kumar K.Vf08d08f2018-08-22 22:46:05 +05301100 __radix_pte_update(ptep, _PAGE_INVALID, new_pte);
Aneesh Kumar K.Vbd5050e2018-05-29 19:58:41 +05301101 } else {
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301102 __radix_pte_update(ptep, 0, set);
Nicholas Piggine5f7cb52018-06-01 20:01:15 +10001103 /*
1104 * Book3S does not require a TLB flush when relaxing access
1105 * restrictions when the address space is not attached to a
1106 * NMMU, because the core MMU will reload the pte after taking
1107 * an access fault, which is defined by the architectue.
1108 */
Aneesh Kumar K.Vbd5050e2018-05-29 19:58:41 +05301109 }
Nicholas Pigginf1cb8f92018-06-01 20:01:19 +10001110 /* See ptesync comment in radix__set_pte_at */
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301111}
Aneesh Kumar K.V5b323362019-03-05 15:46:33 -08001112
1113void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
1114 unsigned long addr, pte_t *ptep,
1115 pte_t old_pte, pte_t pte)
1116{
1117 struct mm_struct *mm = vma->vm_mm;
1118
1119 /*
1120 * To avoid NMMU hang while relaxing access we need to flush the tlb before
1121 * we set the new value. We need to do this only for radix, because hash
1122 * translation does flush when updating the linux pte.
1123 */
1124 if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
1125 (atomic_read(&mm->context.copros) > 0))
1126 radix__flush_tlb_page(vma, addr);
1127
1128 set_pte_at(mm, addr, ptep, pte);
1129}
Nicholas Piggind38153f2019-06-10 13:08:17 +10001130
Nicholas Piggind909f912019-06-10 13:08:18 +10001131int __init arch_ioremap_pud_supported(void)
1132{
1133 /* HPT does not cope with large pages in the vmalloc area */
1134 return radix_enabled();
1135}
1136
1137int __init arch_ioremap_pmd_supported(void)
1138{
1139 return radix_enabled();
1140}
1141
1142int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
1143{
1144 return 0;
1145}
1146
1147int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
1148{
1149 pte_t *ptep = (pte_t *)pud;
1150 pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);
1151
1152 if (!radix_enabled())
1153 return 0;
1154
1155 set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud);
1156
1157 return 1;
1158}
1159
1160int pud_clear_huge(pud_t *pud)
1161{
1162 if (pud_huge(*pud)) {
1163 pud_clear(pud);
1164 return 1;
1165 }
1166
1167 return 0;
1168}
1169
1170int pud_free_pmd_page(pud_t *pud, unsigned long addr)
1171{
1172 pmd_t *pmd;
1173 int i;
1174
1175 pmd = (pmd_t *)pud_page_vaddr(*pud);
1176 pud_clear(pud);
1177
1178 flush_tlb_kernel_range(addr, addr + PUD_SIZE);
1179
1180 for (i = 0; i < PTRS_PER_PMD; i++) {
1181 if (!pmd_none(pmd[i])) {
1182 pte_t *pte;
1183 pte = (pte_t *)pmd_page_vaddr(pmd[i]);
1184
1185 pte_free_kernel(&init_mm, pte);
1186 }
1187 }
1188
1189 pmd_free(&init_mm, pmd);
1190
1191 return 1;
1192}
1193
1194int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
1195{
1196 pte_t *ptep = (pte_t *)pmd;
1197 pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);
1198
1199 if (!radix_enabled())
1200 return 0;
1201
1202 set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd);
1203
1204 return 1;
1205}
1206
1207int pmd_clear_huge(pmd_t *pmd)
1208{
1209 if (pmd_huge(*pmd)) {
1210 pmd_clear(pmd);
1211 return 1;
1212 }
1213
1214 return 0;
1215}
1216
1217int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
1218{
1219 pte_t *pte;
1220
1221 pte = (pte_t *)pmd_page_vaddr(*pmd);
1222 pmd_clear(pmd);
1223
1224 flush_tlb_kernel_range(addr, addr + PMD_SIZE);
1225
1226 pte_free_kernel(&init_mm, pte);
1227
1228 return 1;
1229}
1230
Anshuman Khandual0f472d02019-07-16 16:27:33 -07001231int __init arch_ioremap_p4d_supported(void)
1232{
1233 return 0;
1234}