blob: c929d31f1043579d1ae48f62281036f5c8b2e227 [file] [log] [blame]
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +10001/*
2 * Page table handling routines for radix page table.
3 *
4 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
Michael Ellermanbd350f72017-08-30 17:41:29 +100011
12#define pr_fmt(fmt) "radix-mmu: " fmt
13
14#include <linux/kernel.h>
Ingo Molnar589ee622017-02-04 00:16:44 +010015#include <linux/sched/mm.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100016#include <linux/memblock.h>
17#include <linux/of_fdt.h>
Balbir Singh7614ff32017-06-29 03:04:09 +100018#include <linux/mm.h>
Michael Ellerman6deb6b42017-08-30 17:41:17 +100019#include <linux/string_helpers.h>
Balbir Singh4dd5f8a92018-02-07 17:35:51 +110020#include <linux/stop_machine.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100021
22#include <asm/pgtable.h>
23#include <asm/pgalloc.h>
Nicholas Piggineeb715c2018-02-07 11:20:02 +100024#include <asm/mmu_context.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100025#include <asm/dma.h>
26#include <asm/machdep.h>
27#include <asm/mmu.h>
28#include <asm/firmware.h>
Alistair Popple1d0761d2016-12-14 13:36:51 +110029#include <asm/powernv.h>
Michael Ellerman9abcc982017-06-06 15:48:57 +100030#include <asm/sections.h>
Balbir Singh04284912017-04-11 15:23:25 +100031#include <asm/trace.h>
Michael Ellerman890274c2019-04-18 16:51:24 +100032#include <asm/uaccess.h>
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100033
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +100034#include <trace/events/thp.h>
35
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +100036unsigned int mmu_pid_bits;
37unsigned int mmu_base_pid;
38
Aneesh Kumar K.V83209bc2016-07-13 15:05:28 +053039static int native_register_process_table(unsigned long base, unsigned long pg_sz,
40 unsigned long table_size)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100041{
Suraj Jitindar Singh7cd2a862017-08-03 14:15:51 +100042 unsigned long patb0, patb1;
Aneesh Kumar K.V83209bc2016-07-13 15:05:28 +053043
Suraj Jitindar Singh7cd2a862017-08-03 14:15:51 +100044 patb0 = be64_to_cpu(partition_tb[0].patb0);
45 patb1 = base | table_size | PATB_GR;
46
47 mmu_partition_table_set_entry(0, patb0, patb1);
48
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100049 return 0;
50}
51
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100052static __ref void *early_alloc_pgtable(unsigned long size, int nid,
53 unsigned long region_start, unsigned long region_end)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100054{
Mike Rapoportf8067142019-03-07 16:30:48 -080055 phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT;
56 phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE;
Mike Rapoport8a7f97b2019-03-11 23:30:31 -070057 void *ptr;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100058
Mike Rapoportf8067142019-03-07 16:30:48 -080059 if (region_start)
60 min_addr = region_start;
61 if (region_end)
62 max_addr = region_end;
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100063
Mike Rapoport8a7f97b2019-03-11 23:30:31 -070064 ptr = memblock_alloc_try_nid(size, size, min_addr, max_addr, nid);
65
66 if (!ptr)
67 panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa max_addr=%pa\n",
68 __func__, size, size, nid, &min_addr, &max_addr);
69
70 return ptr;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100071}
72
Nicholas Piggin0633daf2018-02-14 01:08:23 +100073static int early_map_kernel_page(unsigned long ea, unsigned long pa,
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100074 pgprot_t flags,
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100075 unsigned int map_page_size,
76 int nid,
77 unsigned long region_start, unsigned long region_end)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +100078{
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100079 unsigned long pfn = pa >> PAGE_SHIFT;
Nicholas Piggin0633daf2018-02-14 01:08:23 +100080 pgd_t *pgdp;
81 pud_t *pudp;
82 pmd_t *pmdp;
83 pte_t *ptep;
84
85 pgdp = pgd_offset_k(ea);
86 if (pgd_none(*pgdp)) {
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100087 pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
88 region_start, region_end);
Nicholas Piggin0633daf2018-02-14 01:08:23 +100089 pgd_populate(&init_mm, pgdp, pudp);
90 }
91 pudp = pud_offset(pgdp, ea);
92 if (map_page_size == PUD_SIZE) {
93 ptep = (pte_t *)pudp;
94 goto set_the_pte;
95 }
96 if (pud_none(*pudp)) {
Nicholas Piggin2ad452f2018-02-14 01:08:24 +100097 pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
98 region_start, region_end);
Nicholas Piggin0633daf2018-02-14 01:08:23 +100099 pud_populate(&init_mm, pudp, pmdp);
100 }
101 pmdp = pmd_offset(pudp, ea);
102 if (map_page_size == PMD_SIZE) {
103 ptep = pmdp_ptep(pmdp);
104 goto set_the_pte;
105 }
106 if (!pmd_present(*pmdp)) {
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000107 ptep = early_alloc_pgtable(PAGE_SIZE, nid,
108 region_start, region_end);
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000109 pmd_populate_kernel(&init_mm, pmdp, ptep);
110 }
111 ptep = pte_offset_kernel(pmdp, ea);
112
113set_the_pte:
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000114 set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000115 smp_wmb();
116 return 0;
117}
118
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000119/*
120 * nid, region_start, and region_end are hints to try to place the page
121 * table memory in the same node or region.
122 */
123static int __map_kernel_page(unsigned long ea, unsigned long pa,
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000124 pgprot_t flags,
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000125 unsigned int map_page_size,
126 int nid,
127 unsigned long region_start, unsigned long region_end)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000128{
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000129 unsigned long pfn = pa >> PAGE_SHIFT;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000130 pgd_t *pgdp;
131 pud_t *pudp;
132 pmd_t *pmdp;
133 pte_t *ptep;
134 /*
135 * Make sure task size is correct as per the max adddr
136 */
137 BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000138
Aneesh Kumar K.V0034d392019-04-17 18:29:14 +0530139#ifdef CONFIG_PPC_64K_PAGES
140 BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT));
141#endif
142
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000143 if (unlikely(!slab_is_available()))
144 return early_map_kernel_page(ea, pa, flags, map_page_size,
145 nid, region_start, region_end);
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000146
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000147 /*
148 * Should make page table allocation functions be able to take a
149 * node, so we can place kernel page tables on the right nodes after
150 * boot.
151 */
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000152 pgdp = pgd_offset_k(ea);
153 pudp = pud_alloc(&init_mm, pgdp, ea);
154 if (!pudp)
155 return -ENOMEM;
156 if (map_page_size == PUD_SIZE) {
157 ptep = (pte_t *)pudp;
158 goto set_the_pte;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000159 }
Nicholas Piggin0633daf2018-02-14 01:08:23 +1000160 pmdp = pmd_alloc(&init_mm, pudp, ea);
161 if (!pmdp)
162 return -ENOMEM;
163 if (map_page_size == PMD_SIZE) {
164 ptep = pmdp_ptep(pmdp);
165 goto set_the_pte;
166 }
167 ptep = pte_alloc_kernel(pmdp, ea);
168 if (!ptep)
169 return -ENOMEM;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000170
171set_the_pte:
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000172 set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000173 smp_wmb();
174 return 0;
175}
176
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000177int radix__map_kernel_page(unsigned long ea, unsigned long pa,
178 pgprot_t flags,
179 unsigned int map_page_size)
180{
181 return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
182}
183
Balbir Singh7614ff32017-06-29 03:04:09 +1000184#ifdef CONFIG_STRICT_KERNEL_RWX
Michael Ellermanb134bd92017-07-14 16:51:21 +1000185void radix__change_memory_range(unsigned long start, unsigned long end,
186 unsigned long clear)
Balbir Singh7614ff32017-06-29 03:04:09 +1000187{
Balbir Singh7614ff32017-06-29 03:04:09 +1000188 unsigned long idx;
189 pgd_t *pgdp;
190 pud_t *pudp;
191 pmd_t *pmdp;
192 pte_t *ptep;
193
194 start = ALIGN_DOWN(start, PAGE_SIZE);
195 end = PAGE_ALIGN(end); // aligns up
196
Michael Ellermanb134bd92017-07-14 16:51:21 +1000197 pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
198 start, end, clear);
Balbir Singh7614ff32017-06-29 03:04:09 +1000199
200 for (idx = start; idx < end; idx += PAGE_SIZE) {
201 pgdp = pgd_offset_k(idx);
202 pudp = pud_alloc(&init_mm, pgdp, idx);
203 if (!pudp)
204 continue;
205 if (pud_huge(*pudp)) {
206 ptep = (pte_t *)pudp;
207 goto update_the_pte;
208 }
209 pmdp = pmd_alloc(&init_mm, pudp, idx);
210 if (!pmdp)
211 continue;
212 if (pmd_huge(*pmdp)) {
213 ptep = pmdp_ptep(pmdp);
214 goto update_the_pte;
215 }
216 ptep = pte_alloc_kernel(pmdp, idx);
217 if (!ptep)
218 continue;
219update_the_pte:
Michael Ellermanb134bd92017-07-14 16:51:21 +1000220 radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
Balbir Singh7614ff32017-06-29 03:04:09 +1000221 }
222
223 radix__flush_tlb_kernel_range(start, end);
224}
Michael Ellermanb134bd92017-07-14 16:51:21 +1000225
226void radix__mark_rodata_ro(void)
227{
228 unsigned long start, end;
229
230 start = (unsigned long)_stext;
231 end = (unsigned long)__init_begin;
232
233 radix__change_memory_range(start, end, _PAGE_WRITE);
234}
Michael Ellerman029d9252017-07-14 16:51:23 +1000235
236void radix__mark_initmem_nx(void)
237{
238 unsigned long start = (unsigned long)__init_begin;
239 unsigned long end = (unsigned long)__init_end;
240
241 radix__change_memory_range(start, end, _PAGE_EXEC);
242}
Balbir Singh7614ff32017-06-29 03:04:09 +1000243#endif /* CONFIG_STRICT_KERNEL_RWX */
244
Michael Ellermanafb6d062018-10-17 23:53:38 +1100245static inline void __meminit
246print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
Reza Arbabb5200ec2017-01-16 13:07:43 -0600247{
Michael Ellerman6deb6b42017-08-30 17:41:17 +1000248 char buf[10];
249
Reza Arbabb5200ec2017-01-16 13:07:43 -0600250 if (end <= start)
251 return;
252
Michael Ellerman6deb6b42017-08-30 17:41:17 +1000253 string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
254
Michael Ellermanafb6d062018-10-17 23:53:38 +1100255 pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf,
256 exec ? " (exec)" : "");
Reza Arbabb5200ec2017-01-16 13:07:43 -0600257}
258
Michael Ellerman232aa402018-08-14 22:37:32 +1000259static unsigned long next_boundary(unsigned long addr, unsigned long end)
260{
261#ifdef CONFIG_STRICT_KERNEL_RWX
262 if (addr < __pa_symbol(__init_begin))
263 return __pa_symbol(__init_begin);
264#endif
265 return end;
266}
267
Reza Arbabb5200ec2017-01-16 13:07:43 -0600268static int __meminit create_physical_mapping(unsigned long start,
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000269 unsigned long end,
270 int nid)
Reza Arbabb5200ec2017-01-16 13:07:43 -0600271{
Michael Ellerman9abcc982017-06-06 15:48:57 +1000272 unsigned long vaddr, addr, mapping_size = 0;
Michael Ellermanafb6d062018-10-17 23:53:38 +1100273 bool prev_exec, exec = false;
Michael Ellerman9abcc982017-06-06 15:48:57 +1000274 pgprot_t prot;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530275 int psize;
Reza Arbabb5200ec2017-01-16 13:07:43 -0600276
277 start = _ALIGN_UP(start, PAGE_SIZE);
278 for (addr = start; addr < end; addr += mapping_size) {
279 unsigned long gap, previous_size;
280 int rc;
281
Michael Ellerman232aa402018-08-14 22:37:32 +1000282 gap = next_boundary(addr, end) - addr;
Reza Arbabb5200ec2017-01-16 13:07:43 -0600283 previous_size = mapping_size;
Michael Ellermanafb6d062018-10-17 23:53:38 +1100284 prev_exec = exec;
Reza Arbabb5200ec2017-01-16 13:07:43 -0600285
286 if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
Michael Ellerman57306c62018-08-14 22:01:44 +1000287 mmu_psize_defs[MMU_PAGE_1G].shift) {
Reza Arbabb5200ec2017-01-16 13:07:43 -0600288 mapping_size = PUD_SIZE;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530289 psize = MMU_PAGE_1G;
290 } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
291 mmu_psize_defs[MMU_PAGE_2M].shift) {
Reza Arbabb5200ec2017-01-16 13:07:43 -0600292 mapping_size = PMD_SIZE;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530293 psize = MMU_PAGE_2M;
294 } else {
Reza Arbabb5200ec2017-01-16 13:07:43 -0600295 mapping_size = PAGE_SIZE;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530296 psize = mmu_virtual_psize;
297 }
Reza Arbabb5200ec2017-01-16 13:07:43 -0600298
Michael Ellerman9abcc982017-06-06 15:48:57 +1000299 vaddr = (unsigned long)__va(addr);
300
Balbir Singh7f6d4982017-06-29 03:04:10 +1000301 if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
Michael Ellermanafb6d062018-10-17 23:53:38 +1100302 overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) {
Michael Ellerman9abcc982017-06-06 15:48:57 +1000303 prot = PAGE_KERNEL_X;
Michael Ellermanafb6d062018-10-17 23:53:38 +1100304 exec = true;
305 } else {
Michael Ellerman9abcc982017-06-06 15:48:57 +1000306 prot = PAGE_KERNEL;
Michael Ellermanafb6d062018-10-17 23:53:38 +1100307 exec = false;
308 }
309
310 if (mapping_size != previous_size || exec != prev_exec) {
311 print_mapping(start, addr, previous_size, prev_exec);
312 start = addr;
313 }
Michael Ellerman9abcc982017-06-06 15:48:57 +1000314
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000315 rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
Reza Arbabb5200ec2017-01-16 13:07:43 -0600316 if (rc)
317 return rc;
Aneesh Kumar K.Va2dc0092018-08-13 11:14:57 +0530318
319 update_page_count(psize, 1);
Reza Arbabb5200ec2017-01-16 13:07:43 -0600320 }
321
Michael Ellermanafb6d062018-10-17 23:53:38 +1100322 print_mapping(start, addr, mapping_size, exec);
Reza Arbabb5200ec2017-01-16 13:07:43 -0600323 return 0;
324}
325
YueHaibingd667edc2019-05-04 18:24:27 +0800326static void __init radix_init_pgtable(void)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000327{
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000328 unsigned long rts_field;
329 struct memblock_region *reg;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000330
331 /* We don't support slb for radix */
332 mmu_slb_size = 0;
333 /*
334 * Create the linear mapping, using standard page size for now
335 */
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000336 for_each_memblock(memory, reg) {
337 /*
338 * The memblock allocator is up at this point, so the
339 * page tables will be allocated within the range. No
340 * need or a node (which we don't have yet).
341 */
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530342
343 if ((reg->base + reg->size) >= RADIX_VMALLOC_START) {
Colin Ian Kingf341d892019-04-23 16:10:17 +0100344 pr_warn("Outside the supported range\n");
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530345 continue;
346 }
347
Reza Arbabb5200ec2017-01-16 13:07:43 -0600348 WARN_ON(create_physical_mapping(reg->base,
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000349 reg->base + reg->size,
350 -1));
351 }
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +1000352
353 /* Find out how many PID bits are supported */
354 if (cpu_has_feature(CPU_FTR_HVMODE)) {
355 if (!mmu_pid_bits)
356 mmu_pid_bits = 20;
357#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
358 /*
359 * When KVM is possible, we only use the top half of the
360 * PID space to avoid collisions between host and guest PIDs
361 * which can cause problems due to prefetch when exiting the
362 * guest with AIL=3
363 */
364 mmu_base_pid = 1 << (mmu_pid_bits - 1);
365#else
366 mmu_base_pid = 1;
367#endif
368 } else {
369 /* The guest uses the bottom half of the PID space */
370 if (!mmu_pid_bits)
371 mmu_pid_bits = 19;
372 mmu_base_pid = 1;
373 }
374
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000375 /*
376 * Allocate Partition table and process table for the
377 * host.
378 */
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +1000379 BUG_ON(PRTB_SIZE_SHIFT > 36);
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000380 process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000381 /*
382 * Fill in the process table.
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000383 */
Aneesh Kumar K.Vb23d9c52016-06-17 11:40:36 +0530384 rts_field = radix__get_tree_size();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000385 process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
386 /*
387 * Fill in the partition table. We are suppose to use effective address
388 * of process table here. But our linear mapping also enable us to use
389 * physical address here.
390 */
Michael Ellermaneea81482016-08-04 15:32:06 +1000391 register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000392 pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
Paul Mackerras7a70d722017-02-27 14:32:41 +1100393 asm volatile("ptesync" : : : "memory");
394 asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
395 "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
396 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
Balbir Singh04284912017-04-11 15:23:25 +1000397 trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
Nicholas Piggineeb715c2018-02-07 11:20:02 +1000398
399 /*
400 * The init_mm context is given the first available (non-zero) PID,
401 * which is the "guard PID" and contains no page table. PIDR should
402 * never be set to zero because that duplicates the kernel address
403 * space at the 0x0... offset (quadrant 0)!
404 *
405 * An arbitrary PID that may later be allocated by the PID allocator
406 * for userspace processes must not be used either, because that
407 * would cause stale user mappings for that PID on CPUs outside of
408 * the TLB invalidation scheme (because it won't be in mm_cpumask).
409 *
410 * So permanently carve out one PID for the purpose of a guard PID.
411 */
412 init_mm.context.id = mmu_base_pid;
413 mmu_base_pid++;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000414}
415
416static void __init radix_init_partition_table(void)
417{
Paul Mackerras9d661952016-11-21 16:00:58 +1100418 unsigned long rts_field, dw0;
Aneesh Kumar K.Vb23d9c52016-06-17 11:40:36 +0530419
Paul Mackerras9d661952016-11-21 16:00:58 +1100420 mmu_partition_table_init();
Aneesh Kumar K.Vb23d9c52016-06-17 11:40:36 +0530421 rts_field = radix__get_tree_size();
Paul Mackerras9d661952016-11-21 16:00:58 +1100422 dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
423 mmu_partition_table_set_entry(0, dw0, 0);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000424
Aneesh Kumar K.V56547412016-07-13 15:05:25 +0530425 pr_info("Initializing Radix MMU\n");
426 pr_info("Partition table %p\n", partition_tb);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000427}
428
429void __init radix_init_native(void)
430{
Michael Ellermaneea81482016-08-04 15:32:06 +1000431 register_process_table = native_register_process_table;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000432}
433
434static int __init get_idx_from_shift(unsigned int shift)
435{
436 int idx = -1;
437
438 switch (shift) {
439 case 0xc:
440 idx = MMU_PAGE_4K;
441 break;
442 case 0x10:
443 idx = MMU_PAGE_64K;
444 break;
445 case 0x15:
446 idx = MMU_PAGE_2M;
447 break;
448 case 0x1e:
449 idx = MMU_PAGE_1G;
450 break;
451 }
452 return idx;
453}
454
455static int __init radix_dt_scan_page_sizes(unsigned long node,
456 const char *uname, int depth,
457 void *data)
458{
459 int size = 0;
460 int shift, idx;
461 unsigned int ap;
462 const __be32 *prop;
463 const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
464
465 /* We are scanning "cpu" nodes only */
466 if (type == NULL || strcmp(type, "cpu") != 0)
467 return 0;
468
Benjamin Herrenschmidta25bd722017-07-24 14:26:06 +1000469 /* Find MMU PID size */
470 prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
471 if (prop && size == 4)
472 mmu_pid_bits = be32_to_cpup(prop);
473
474 /* Grab page size encodings */
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000475 prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
476 if (!prop)
477 return 0;
478
479 pr_info("Page sizes from device-tree:\n");
480 for (; size >= 4; size -= 4, ++prop) {
481
482 struct mmu_psize_def *def;
483
484 /* top 3 bit is AP encoding */
485 shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
486 ap = be32_to_cpu(prop[0]) >> 29;
Balbir Singhac8d3812016-11-05 15:24:22 +1100487 pr_info("Page size shift = %d AP=0x%x\n", shift, ap);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000488
489 idx = get_idx_from_shift(shift);
490 if (idx < 0)
491 continue;
492
493 def = &mmu_psize_defs[idx];
494 def->shift = shift;
495 def->ap = ap;
496 }
497
498 /* needed ? */
499 cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
500 return 1;
501}
502
Michael Ellerman2537b092016-07-26 21:55:27 +1000503void __init radix__early_init_devtree(void)
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000504{
505 int rc;
506
507 /*
508 * Try to find the available page sizes in the device-tree
509 */
510 rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
511 if (rc != 0) /* Found */
512 goto found;
513 /*
514 * let's assume we have page 4k and 64k support
515 */
516 mmu_psize_defs[MMU_PAGE_4K].shift = 12;
517 mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
518
519 mmu_psize_defs[MMU_PAGE_64K].shift = 16;
520 mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
521found:
522#ifdef CONFIG_SPARSEMEM_VMEMMAP
523 if (mmu_psize_defs[MMU_PAGE_2M].shift) {
524 /*
525 * map vmemmap using 2M if available
526 */
527 mmu_vmemmap_psize = MMU_PAGE_2M;
528 }
529#endif /* CONFIG_SPARSEMEM_VMEMMAP */
530 return;
531}
532
Balbir Singhee97b6b2016-11-15 17:56:14 +1100533static void radix_init_amor(void)
534{
535 /*
536 * In HV mode, we init AMOR (Authority Mask Override Register) so that
537 * the hypervisor and guest can setup IAMR (Instruction Authority Mask
538 * Register), enable key 0 and set it to 1.
539 *
540 * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
541 */
542 mtspr(SPRN_AMOR, (3ul << 62));
543}
544
Russell Currey1bb2bae2019-04-18 16:51:22 +1000545#ifdef CONFIG_PPC_KUEP
546void setup_kuep(bool disabled)
Balbir Singh3b10d002016-11-15 17:56:16 +1100547{
Russell Currey1bb2bae2019-04-18 16:51:22 +1000548 if (disabled || !early_radix_enabled())
549 return;
550
551 if (smp_processor_id() == boot_cpuid)
552 pr_info("Activating Kernel Userspace Execution Prevention\n");
553
Balbir Singh3b10d002016-11-15 17:56:16 +1100554 /*
555 * Radix always uses key0 of the IAMR to determine if an access is
556 * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
557 * fetch.
558 */
Nicholas Piggin2bf10712018-07-05 18:47:00 +1000559 mtspr(SPRN_IAMR, (1ul << 62));
Balbir Singh3b10d002016-11-15 17:56:16 +1100560}
Russell Currey1bb2bae2019-04-18 16:51:22 +1000561#endif
Balbir Singh3b10d002016-11-15 17:56:16 +1100562
Michael Ellerman890274c2019-04-18 16:51:24 +1000563#ifdef CONFIG_PPC_KUAP
564void setup_kuap(bool disabled)
565{
566 if (disabled || !early_radix_enabled())
567 return;
568
569 if (smp_processor_id() == boot_cpuid) {
570 pr_info("Activating Kernel Userspace Access Prevention\n");
571 cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP;
572 }
573
574 /* Make sure userspace can't change the AMR */
575 mtspr(SPRN_UAMOR, 0);
576 mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
577 isync();
578}
579#endif
580
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000581void __init radix__early_init_mmu(void)
582{
583 unsigned long lpcr;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000584
585#ifdef CONFIG_PPC_64K_PAGES
586 /* PAGE_SIZE mappings */
587 mmu_virtual_psize = MMU_PAGE_64K;
588#else
589 mmu_virtual_psize = MMU_PAGE_4K;
590#endif
591
592#ifdef CONFIG_SPARSEMEM_VMEMMAP
593 /* vmemmap mapping */
594 mmu_vmemmap_psize = mmu_virtual_psize;
595#endif
596 /*
597 * initialize page table size
598 */
599 __pte_index_size = RADIX_PTE_INDEX_SIZE;
600 __pmd_index_size = RADIX_PMD_INDEX_SIZE;
601 __pud_index_size = RADIX_PUD_INDEX_SIZE;
602 __pgd_index_size = RADIX_PGD_INDEX_SIZE;
Aneesh Kumar K.Vfae22112018-02-11 20:30:06 +0530603 __pud_cache_index = RADIX_PUD_INDEX_SIZE;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000604 __pte_table_size = RADIX_PTE_TABLE_SIZE;
605 __pmd_table_size = RADIX_PMD_TABLE_SIZE;
606 __pud_table_size = RADIX_PUD_TABLE_SIZE;
607 __pgd_table_size = RADIX_PGD_TABLE_SIZE;
608
Aneesh Kumar K.Va2f41eb2016-04-29 23:26:19 +1000609 __pmd_val_bits = RADIX_PMD_VAL_BITS;
610 __pud_val_bits = RADIX_PUD_VAL_BITS;
611 __pgd_val_bits = RADIX_PGD_VAL_BITS;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000612
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000613 __kernel_virt_start = RADIX_KERN_VIRT_START;
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000614 __vmalloc_start = RADIX_VMALLOC_START;
615 __vmalloc_end = RADIX_VMALLOC_END;
Michael Ellerman63ee9b22017-08-01 20:29:22 +1000616 __kernel_io_start = RADIX_KERN_IO_START;
Aneesh Kumar K.Va35a3c62019-04-17 18:29:13 +0530617 __kernel_io_end = RADIX_KERN_IO_END;
Aneesh Kumar K.V0034d392019-04-17 18:29:14 +0530618 vmemmap = (struct page *)RADIX_VMEMMAP_START;
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000619 ioremap_bot = IOREMAP_BASE;
Darren Stevensbfa37082016-06-29 21:06:28 +0100620
621#ifdef CONFIG_PCI
622 pci_io_base = ISA_IO_BASE;
623#endif
Aneesh Kumar K.Vfb4e5db2018-03-22 14:13:50 +0530624 __pte_frag_nr = RADIX_PTE_FRAG_NR;
625 __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
Aneesh Kumar K.V8a6c6972018-04-16 16:57:22 +0530626 __pmd_frag_nr = RADIX_PMD_FRAG_NR;
627 __pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT;
Aneesh Kumar K.Vd6a99962016-04-29 23:26:21 +1000628
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530629 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
Benjamin Herrenschmidt166dd7d2016-07-05 15:03:51 +1000630 radix_init_native();
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530631 lpcr = mfspr(SPRN_LPCR);
Aneesh Kumar K.Vbf16cdf2016-07-13 15:05:21 +0530632 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000633 radix_init_partition_table();
Balbir Singhee97b6b2016-11-15 17:56:14 +1100634 radix_init_amor();
Paul Mackerrascc3d2942017-01-30 21:21:36 +1100635 } else {
636 radix_init_pseries();
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530637 }
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000638
Paul Mackerras9d661952016-11-21 16:00:58 +1100639 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
640
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000641 radix_init_pgtable();
Nicholas Piggineeb715c2018-02-07 11:20:02 +1000642 /* Switch to the guard PID before turning on MMU */
643 radix__switch_mmu_context(NULL, &init_mm);
Nicholas Piggind4748272017-12-24 01:15:50 +1000644 if (cpu_has_feature(CPU_FTR_HVMODE))
645 tlbiel_all();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000646}
647
648void radix__early_init_mmu_secondary(void)
649{
650 unsigned long lpcr;
651 /*
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530652 * update partition table control register and UPRT
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000653 */
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530654 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
655 lpcr = mfspr(SPRN_LPCR);
Aneesh Kumar K.Vbf16cdf2016-07-13 15:05:21 +0530656 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530657
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000658 mtspr(SPRN_PTCR,
659 __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
Balbir Singhee97b6b2016-11-15 17:56:14 +1100660 radix_init_amor();
Aneesh Kumar K.Vd6c88602016-05-31 11:56:29 +0530661 }
Nicholas Piggind4748272017-12-24 01:15:50 +1000662
Nicholas Piggineeb715c2018-02-07 11:20:02 +1000663 radix__switch_mmu_context(NULL, &init_mm);
Nicholas Piggind4748272017-12-24 01:15:50 +1000664 if (cpu_has_feature(CPU_FTR_HVMODE))
665 tlbiel_all();
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000666}
667
Benjamin Herrenschmidtfe036a02016-08-19 14:22:37 +0530668void radix__mmu_cleanup_all(void)
669{
670 unsigned long lpcr;
671
672 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
673 lpcr = mfspr(SPRN_LPCR);
674 mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
675 mtspr(SPRN_PTCR, 0);
Alistair Popple1d0761d2016-12-14 13:36:51 +1100676 powernv_set_nmmu_ptcr(0);
Benjamin Herrenschmidtfe036a02016-08-19 14:22:37 +0530677 radix__flush_tlb_all();
678 }
679}
680
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000681void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
682 phys_addr_t first_memblock_size)
683{
Christophe Leroy47d99942019-03-29 10:00:00 +0000684 /*
685 * We don't currently support the first MEMBLOCK not mapping 0
Aneesh Kumar K.V177ba7c2016-04-29 23:26:10 +1000686 * physical on those processors
687 */
688 BUG_ON(first_memblock_base != 0);
Nicholas Piggin1513c332017-12-22 21:17:08 +1000689
Nicholas Piggin5eae82c2017-12-22 21:17:11 +1000690 /*
691 * Radix mode is not limited by RMA / VRMA addressing.
692 */
693 ppc64_rma_size = ULONG_MAX;
Aneesh Kumar K.V2bfd65e2016-04-29 23:25:58 +1000694}
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000695
Reza Arbab6cc27342017-01-16 13:07:44 -0600696#ifdef CONFIG_MEMORY_HOTPLUG
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600697static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
698{
699 pte_t *pte;
700 int i;
701
702 for (i = 0; i < PTRS_PER_PTE; i++) {
703 pte = pte_start + i;
704 if (!pte_none(*pte))
705 return;
706 }
707
708 pte_free_kernel(&init_mm, pte_start);
709 pmd_clear(pmd);
710}
711
712static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
713{
714 pmd_t *pmd;
715 int i;
716
717 for (i = 0; i < PTRS_PER_PMD; i++) {
718 pmd = pmd_start + i;
719 if (!pmd_none(*pmd))
720 return;
721 }
722
723 pmd_free(&init_mm, pmd_start);
724 pud_clear(pud);
725}
726
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100727struct change_mapping_params {
728 pte_t *pte;
729 unsigned long start;
730 unsigned long end;
731 unsigned long aligned_start;
732 unsigned long aligned_end;
733};
734
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300735static int __meminit stop_machine_change_mapping(void *data)
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100736{
737 struct change_mapping_params *params =
738 (struct change_mapping_params *)data;
739
740 if (!data)
741 return -1;
742
743 spin_unlock(&init_mm.page_table_lock);
744 pte_clear(&init_mm, params->aligned_start, params->pte);
Michael Ellermanf437c512018-03-31 00:11:24 +1100745 create_physical_mapping(params->aligned_start, params->start, -1);
746 create_physical_mapping(params->end, params->aligned_end, -1);
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100747 spin_lock(&init_mm.page_table_lock);
748 return 0;
749}
750
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600751static void remove_pte_table(pte_t *pte_start, unsigned long addr,
752 unsigned long end)
753{
754 unsigned long next;
755 pte_t *pte;
756
757 pte = pte_start + pte_index(addr);
758 for (; addr < end; addr = next, pte++) {
759 next = (addr + PAGE_SIZE) & PAGE_MASK;
760 if (next > end)
761 next = end;
762
763 if (!pte_present(*pte))
764 continue;
765
Reza Arbab0d0a4bc2017-01-16 13:07:46 -0600766 if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
767 /*
768 * The vmemmap_free() and remove_section_mapping()
769 * codepaths call us with aligned addresses.
770 */
771 WARN_ONCE(1, "%s: unaligned range\n", __func__);
772 continue;
773 }
774
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600775 pte_clear(&init_mm, addr, pte);
776 }
777}
778
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100779/*
780 * clear the pte and potentially split the mapping helper
781 */
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300782static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end,
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100783 unsigned long size, pte_t *pte)
784{
785 unsigned long mask = ~(size - 1);
786 unsigned long aligned_start = addr & mask;
787 unsigned long aligned_end = addr + size;
788 struct change_mapping_params params;
789 bool split_region = false;
790
791 if ((end - addr) < size) {
792 /*
793 * We're going to clear the PTE, but not flushed
794 * the mapping, time to remap and flush. The
795 * effects if visible outside the processor or
796 * if we are running in code close to the
797 * mapping we cleared, we are in trouble.
798 */
799 if (overlaps_kernel_text(aligned_start, addr) ||
800 overlaps_kernel_text(end, aligned_end)) {
801 /*
802 * Hack, just return, don't pte_clear
803 */
804 WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
805 "text, not splitting\n", addr, end);
806 return;
807 }
808 split_region = true;
809 }
810
811 if (split_region) {
812 params.pte = pte;
813 params.start = addr;
814 params.end = end;
815 params.aligned_start = addr & ~(size - 1);
816 params.aligned_end = min_t(unsigned long, aligned_end,
817 (unsigned long)__va(memblock_end_of_DRAM()));
818 stop_machine(stop_machine_change_mapping, &params, NULL);
819 return;
820 }
821
822 pte_clear(&init_mm, addr, pte);
823}
824
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600825static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
826 unsigned long end)
827{
828 unsigned long next;
829 pte_t *pte_base;
830 pmd_t *pmd;
831
832 pmd = pmd_start + pmd_index(addr);
833 for (; addr < end; addr = next, pmd++) {
834 next = pmd_addr_end(addr, end);
835
836 if (!pmd_present(*pmd))
837 continue;
838
839 if (pmd_huge(*pmd)) {
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100840 split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600841 continue;
842 }
843
844 pte_base = (pte_t *)pmd_page_vaddr(*pmd);
845 remove_pte_table(pte_base, addr, next);
846 free_pte_table(pte_base, pmd);
847 }
848}
849
850static void remove_pud_table(pud_t *pud_start, unsigned long addr,
851 unsigned long end)
852{
853 unsigned long next;
854 pmd_t *pmd_base;
855 pud_t *pud;
856
857 pud = pud_start + pud_index(addr);
858 for (; addr < end; addr = next, pud++) {
859 next = pud_addr_end(addr, end);
860
861 if (!pud_present(*pud))
862 continue;
863
864 if (pud_huge(*pud)) {
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100865 split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600866 continue;
867 }
868
869 pmd_base = (pmd_t *)pud_page_vaddr(*pud);
870 remove_pmd_table(pmd_base, addr, next);
871 free_pmd_table(pmd_base, pud);
872 }
873}
874
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300875static void __meminit remove_pagetable(unsigned long start, unsigned long end)
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600876{
877 unsigned long addr, next;
878 pud_t *pud_base;
879 pgd_t *pgd;
880
881 spin_lock(&init_mm.page_table_lock);
882
883 for (addr = start; addr < end; addr = next) {
884 next = pgd_addr_end(addr, end);
885
886 pgd = pgd_offset_k(addr);
887 if (!pgd_present(*pgd))
888 continue;
889
890 if (pgd_huge(*pgd)) {
Balbir Singh4dd5f8a92018-02-07 17:35:51 +1100891 split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600892 continue;
893 }
894
895 pud_base = (pud_t *)pgd_page_vaddr(*pgd);
896 remove_pud_table(pud_base, addr, next);
897 }
898
899 spin_unlock(&init_mm.page_table_lock);
900 radix__flush_tlb_kernel_range(start, end);
901}
902
Michael Ellermanf437c512018-03-31 00:11:24 +1100903int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
Reza Arbab6cc27342017-01-16 13:07:44 -0600904{
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530905 if (end >= RADIX_VMALLOC_START) {
Colin Ian Kingf341d892019-04-23 16:10:17 +0100906 pr_warn("Outside the supported range\n");
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530907 return -1;
908 }
909
Nicholas Piggin29ab6c42018-02-14 01:08:22 +1000910 return create_physical_mapping(start, end, nid);
Reza Arbab6cc27342017-01-16 13:07:44 -0600911}
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600912
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300913int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
Reza Arbab4b5d62c2017-01-16 13:07:45 -0600914{
915 remove_pagetable(start, end);
916 return 0;
917}
Reza Arbab6cc27342017-01-16 13:07:44 -0600918#endif /* CONFIG_MEMORY_HOTPLUG */
919
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000920#ifdef CONFIG_SPARSEMEM_VMEMMAP
Nicholas Piggin29ab6c42018-02-14 01:08:22 +1000921static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
922 pgprot_t flags, unsigned int map_page_size,
923 int nid)
924{
925 return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
926}
927
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000928int __meminit radix__vmemmap_create_mapping(unsigned long start,
929 unsigned long page_size,
930 unsigned long phys)
931{
932 /* Create a PTE encoding */
933 unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000934 int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
935 int ret;
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000936
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530937 if ((start + page_size) >= RADIX_VMEMMAP_END) {
Colin Ian Kingf341d892019-04-23 16:10:17 +0100938 pr_warn("Outside the supported range\n");
Aneesh Kumar K.Ve0909392019-04-17 18:29:15 +0530939 return -1;
940 }
941
Nicholas Piggin2ad452f2018-02-14 01:08:24 +1000942 ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
943 BUG_ON(ret);
944
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000945 return 0;
946}
947
948#ifdef CONFIG_MEMORY_HOTPLUG
Mauricio Faria de Oliveirabde709a2018-03-09 17:45:58 -0300949void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000950{
Reza Arbab0d0a4bc2017-01-16 13:07:46 -0600951 remove_pagetable(start, start + page_size);
Aneesh Kumar K.Vd9225ad2016-04-29 23:26:00 +1000952}
953#endif
954#endif
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000955
956#ifdef CONFIG_TRANSPARENT_HUGEPAGE
957
958unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
959 pmd_t *pmdp, unsigned long clr,
960 unsigned long set)
961{
962 unsigned long old;
963
964#ifdef CONFIG_DEBUG_VM
Oliver O'Halloranebd31192017-06-28 11:32:34 +1000965 WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
Aneesh Kumar K.Vaf60a4c2018-04-16 16:57:16 +0530966 assert_spin_locked(pmd_lockptr(mm, pmdp));
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000967#endif
968
969 old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
970 trace_hugepage_update(addr, old, clr, set);
971
972 return old;
973}
974
975pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
976 pmd_t *pmdp)
977
978{
979 pmd_t pmd;
980
981 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
982 VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
Oliver O'Halloranebd31192017-06-28 11:32:34 +1000983 VM_BUG_ON(pmd_devmap(*pmdp));
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000984 /*
985 * khugepaged calls this for normal pmd
986 */
987 pmd = *pmdp;
988 pmd_clear(pmdp);
Benjamin Herrenschmidt424de9c2017-07-19 14:49:06 +1000989
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000990 /*FIXME!! Verify whether we need this kick below */
Aneesh Kumar K.Vfa4531f2017-07-27 11:54:54 +0530991 serialize_against_pte_lookup(vma->vm_mm);
Benjamin Herrenschmidt424de9c2017-07-19 14:49:06 +1000992
993 radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
994
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +1000995 return pmd;
996}
997
998/*
999 * For us pgtable_t is pte_t *. Inorder to save the deposisted
1000 * page table, we consider the allocated page table as a list
1001 * head. On withdraw we need to make sure we zero out the used
1002 * list_head memory area.
1003 */
1004void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
1005 pgtable_t pgtable)
1006{
Christophe Leroy47d99942019-03-29 10:00:00 +00001007 struct list_head *lh = (struct list_head *) pgtable;
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001008
Christophe Leroy47d99942019-03-29 10:00:00 +00001009 assert_spin_locked(pmd_lockptr(mm, pmdp));
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001010
Christophe Leroy47d99942019-03-29 10:00:00 +00001011 /* FIFO */
1012 if (!pmd_huge_pte(mm, pmdp))
1013 INIT_LIST_HEAD(lh);
1014 else
1015 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
1016 pmd_huge_pte(mm, pmdp) = pgtable;
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001017}
1018
1019pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
1020{
Christophe Leroy47d99942019-03-29 10:00:00 +00001021 pte_t *ptep;
1022 pgtable_t pgtable;
1023 struct list_head *lh;
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001024
Christophe Leroy47d99942019-03-29 10:00:00 +00001025 assert_spin_locked(pmd_lockptr(mm, pmdp));
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001026
Christophe Leroy47d99942019-03-29 10:00:00 +00001027 /* FIFO */
1028 pgtable = pmd_huge_pte(mm, pmdp);
1029 lh = (struct list_head *) pgtable;
1030 if (list_empty(lh))
1031 pmd_huge_pte(mm, pmdp) = NULL;
1032 else {
1033 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
1034 list_del(lh);
1035 }
1036 ptep = (pte_t *) pgtable;
1037 *ptep = __pte(0);
1038 ptep++;
1039 *ptep = __pte(0);
1040 return pgtable;
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001041}
1042
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001043pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
Christophe Leroy47d99942019-03-29 10:00:00 +00001044 unsigned long addr, pmd_t *pmdp)
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001045{
1046 pmd_t old_pmd;
1047 unsigned long old;
1048
1049 old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
1050 old_pmd = __pmd(old);
1051 /*
Aneesh Kumar K.Vfa4531f2017-07-27 11:54:54 +05301052 * Serialize against find_current_mm_pte which does lock-less
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001053 * lookup in page tables with local interrupts disabled. For huge pages
1054 * it casts pmd_t to pte_t. Since format of pte_t is different from
1055 * pmd_t we want to prevent transit from pmd pointing to page table
1056 * to pmd pointing to huge page (and back) while interrupts are disabled.
1057 * We clear pmd to possibly replace it with page table pointer in
1058 * different code paths. So make sure we wait for the parallel
Aneesh Kumar K.Vfa4531f2017-07-27 11:54:54 +05301059 * find_current_mm_pte to finish.
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001060 */
Aneesh Kumar K.Vfa4531f2017-07-27 11:54:54 +05301061 serialize_against_pte_lookup(mm);
Aneesh Kumar K.Vbde3eb62016-04-29 23:26:30 +10001062 return old_pmd;
1063}
1064
1065int radix__has_transparent_hugepage(void)
1066{
1067 /* For radix 2M at PMD level means thp */
1068 if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
1069 return 1;
1070 return 0;
1071}
1072#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301073
Aneesh Kumar K.Ve4c11122018-05-29 19:58:40 +05301074void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
1075 pte_t entry, unsigned long address, int psize)
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301076{
Aneesh Kumar K.Ve4c11122018-05-29 19:58:40 +05301077 struct mm_struct *mm = vma->vm_mm;
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301078 unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
1079 _PAGE_RW | _PAGE_EXEC);
Aneesh Kumar K.Vf08d08f2018-08-22 22:46:05 +05301080
1081 unsigned long change = pte_val(entry) ^ pte_val(*ptep);
Aneesh Kumar K.Vbd5050e2018-05-29 19:58:41 +05301082 /*
1083 * To avoid NMMU hang while relaxing access, we need mark
1084 * the pte invalid in between.
1085 */
Aneesh Kumar K.Vf08d08f2018-08-22 22:46:05 +05301086 if ((change & _PAGE_RW) && atomic_read(&mm->context.copros) > 0) {
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301087 unsigned long old_pte, new_pte;
1088
Aneesh Kumar K.Vf08d08f2018-08-22 22:46:05 +05301089 old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301090 /*
1091 * new value of pte
1092 */
1093 new_pte = old_pte | set;
Aneesh Kumar K.Vbd5050e2018-05-29 19:58:41 +05301094 radix__flush_tlb_page_psize(mm, address, psize);
Aneesh Kumar K.Vf08d08f2018-08-22 22:46:05 +05301095 __radix_pte_update(ptep, _PAGE_INVALID, new_pte);
Aneesh Kumar K.Vbd5050e2018-05-29 19:58:41 +05301096 } else {
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301097 __radix_pte_update(ptep, 0, set);
Nicholas Piggine5f7cb52018-06-01 20:01:15 +10001098 /*
1099 * Book3S does not require a TLB flush when relaxing access
1100 * restrictions when the address space is not attached to a
1101 * NMMU, because the core MMU will reload the pte after taking
1102 * an access fault, which is defined by the architectue.
1103 */
Aneesh Kumar K.Vbd5050e2018-05-29 19:58:41 +05301104 }
Nicholas Pigginf1cb8f92018-06-01 20:01:19 +10001105 /* See ptesync comment in radix__set_pte_at */
Aneesh Kumar K.V044003b2018-05-29 19:58:39 +05301106}
Aneesh Kumar K.V5b323362019-03-05 15:46:33 -08001107
1108void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
1109 unsigned long addr, pte_t *ptep,
1110 pte_t old_pte, pte_t pte)
1111{
1112 struct mm_struct *mm = vma->vm_mm;
1113
1114 /*
1115 * To avoid NMMU hang while relaxing access we need to flush the tlb before
1116 * we set the new value. We need to do this only for radix, because hash
1117 * translation does flush when updating the linux pte.
1118 */
1119 if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
1120 (atomic_read(&mm->context.copros) > 0))
1121 radix__flush_tlb_page(vma, addr);
1122
1123 set_pte_at(mm, addr, ptep, pte);
1124}