blob: 9b08f7c7e6f0f789b9c35b49902ff6fe5476773b [file] [log] [blame]
Thomas Gleixnercaab2772019-06-03 07:44:50 +02001// SPDX-License-Identifier: GPL-2.0-only
Catalin Marinasc1cc1552012-03-05 11:49:27 +00002/*
3 * Based on arch/arm/mm/mmu.c
4 *
5 * Copyright (C) 1995-2005 Russell King
6 * Copyright (C) 2012 ARM Ltd.
Catalin Marinasc1cc1552012-03-05 11:49:27 +00007 */
8
Jisheng Zhang5a9e3e12016-08-15 14:45:46 +08009#include <linux/cache.h>
Catalin Marinasc1cc1552012-03-05 11:49:27 +000010#include <linux/export.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/init.h>
Takahiro Akashi98d2e152017-04-03 11:24:34 +090014#include <linux/ioport.h>
15#include <linux/kexec.h>
Ard Biesheuvel61bd93c2015-06-01 13:40:32 +020016#include <linux/libfdt.h>
Catalin Marinasc1cc1552012-03-05 11:49:27 +000017#include <linux/mman.h>
18#include <linux/nodemask.h>
19#include <linux/memblock.h>
Anshuman Khandualbbd6ec62020-03-04 09:58:43 +053020#include <linux/memory.h>
Catalin Marinasc1cc1552012-03-05 11:49:27 +000021#include <linux/fs.h>
Catalin Marinas2475ff92012-10-23 14:55:08 +010022#include <linux/io.h>
Laura Abbott2077be62017-01-10 13:35:49 -080023#include <linux/mm.h>
Tobias Klauser6efd8492017-05-15 13:40:20 +020024#include <linux/vmalloc.h>
Catalin Marinasc1cc1552012-03-05 11:49:27 +000025
Mark Rutland21ab99c2016-01-25 11:44:56 +000026#include <asm/barrier.h>
Catalin Marinasc1cc1552012-03-05 11:49:27 +000027#include <asm/cputype.h>
Laura Abbottaf86e592014-11-21 21:50:42 +000028#include <asm/fixmap.h>
Mark Rutland068a17a2016-01-25 11:45:12 +000029#include <asm/kasan.h>
Suzuki K. Pouloseb433dce2015-10-19 14:19:28 +010030#include <asm/kernel-pgtable.h>
Catalin Marinasc1cc1552012-03-05 11:49:27 +000031#include <asm/sections.h>
32#include <asm/setup.h>
Masahiro Yamada87dfb312019-05-14 15:46:51 -070033#include <linux/sizes.h>
Catalin Marinasc1cc1552012-03-05 11:49:27 +000034#include <asm/tlb.h>
35#include <asm/mmu_context.h>
Laura Abbott1404d6f2016-10-27 09:27:34 -070036#include <asm/ptdump.h>
Chintan Pandyaec28bb92018-06-06 12:31:21 +053037#include <asm/tlbflush.h>
Catalin Marinasc1cc1552012-03-05 11:49:27 +000038
Ard Biesheuvelc0951362017-03-09 21:52:07 +010039#define NO_BLOCK_MAPPINGS BIT(0)
Ard Biesheuveld27cfa12017-03-09 21:52:09 +010040#define NO_CONT_MAPPINGS BIT(1)
Ard Biesheuvelc0951362017-03-09 21:52:07 +010041
Ard Biesheuveldd006da2015-03-19 16:42:27 +000042u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
Kristina Martsenkofa2a8442017-12-13 17:07:24 +000043u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
Ard Biesheuveldd006da2015-03-19 16:42:27 +000044
Steve Capper5383cc62019-08-07 16:55:18 +010045u64 __section(".mmuoff.data.write") vabits_actual;
46EXPORT_SYMBOL(vabits_actual);
Catalin Marinasc1cc1552012-03-05 11:49:27 +000047
Jisheng Zhang5a9e3e12016-08-15 14:45:46 +080048u64 kimage_voffset __ro_after_init;
Ard Biesheuvela7f8de12016-02-16 13:52:42 +010049EXPORT_SYMBOL(kimage_voffset);
50
Catalin Marinasc1cc1552012-03-05 11:49:27 +000051/*
52 * Empty_zero_page is a special page that is used for zero-initialized data
53 * and COW.
54 */
Mark Rutland5227cfa2016-01-25 11:44:57 +000055unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
Catalin Marinasc1cc1552012-03-05 11:49:27 +000056EXPORT_SYMBOL(empty_zero_page);
57
Ard Biesheuvelf9040772016-02-16 13:52:40 +010058static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
59static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
60static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
61
Jun Yao2330b7c2018-09-24 17:15:02 +010062static DEFINE_SPINLOCK(swapper_pgdir_lock);
63
64void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
65{
66 pgd_t *fixmap_pgdp;
67
68 spin_lock(&swapper_pgdir_lock);
James Morse26a6f872018-10-10 15:43:22 +010069 fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp));
Jun Yao2330b7c2018-09-24 17:15:02 +010070 WRITE_ONCE(*fixmap_pgdp, pgd);
71 /*
72 * We need dsb(ishst) here to ensure the page-table-walker sees
73 * our new entry before set_p?d() returns. The fixmap's
74 * flush_tlb_kernel_range() via clear_fixmap() does this for us.
75 */
76 pgd_clear_fixmap();
77 spin_unlock(&swapper_pgdir_lock);
78}
79
Catalin Marinasc1cc1552012-03-05 11:49:27 +000080pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
81 unsigned long size, pgprot_t vma_prot)
82{
83 if (!pfn_valid(pfn))
84 return pgprot_noncached(vma_prot);
85 else if (file->f_flags & O_SYNC)
86 return pgprot_writecombine(vma_prot);
87 return vma_prot;
88}
89EXPORT_SYMBOL(phys_mem_access_prot);
90
Yu Zhao90292ac2019-03-11 18:57:46 -060091static phys_addr_t __init early_pgtable_alloc(int shift)
Catalin Marinasc1cc1552012-03-05 11:49:27 +000092{
Suzuki K. Poulose71423922015-11-20 17:45:40 +000093 phys_addr_t phys;
94 void *ptr;
95
Mike Rapoport9a8dd702018-10-30 15:07:59 -070096 phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
Mike Rapoportecc3e772019-03-11 23:29:26 -070097 if (!phys)
98 panic("Failed to allocate page table page\n");
Mark Rutlandf4710442016-01-25 11:45:08 +000099
100 /*
101 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
102 * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
103 * any level of table.
104 */
105 ptr = pte_set_fixmap(phys);
106
Mark Rutland21ab99c2016-01-25 11:44:56 +0000107 memset(ptr, 0, PAGE_SIZE);
108
Mark Rutlandf4710442016-01-25 11:45:08 +0000109 /*
110 * Implicit barriers also ensure the zeroed page is visible to the page
111 * table walker
112 */
113 pte_clear_fixmap();
114
115 return phys;
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000116}
117
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100118static bool pgattr_change_is_safe(u64 old, u64 new)
119{
120 /*
121 * The following mapping attributes may be updated in live
122 * kernel mappings without the need for break-before-make.
123 */
Ard Biesheuvel753e8ab2018-02-23 18:04:48 +0000124 static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100125
Ard Biesheuvel141d1492017-03-09 21:52:06 +0100126 /* creating or taking down mappings is always safe */
127 if (old == 0 || new == 0)
128 return true;
129
130 /* live contiguous mappings may not be manipulated at all */
131 if ((old | new) & PTE_CONT)
132 return false;
133
Ard Biesheuvel753e8ab2018-02-23 18:04:48 +0000134 /* Transitioning from Non-Global to Global is unsafe */
135 if (old & ~new & PTE_NG)
136 return false;
Will Deacon4e602052018-01-29 11:59:54 +0000137
Ard Biesheuvel141d1492017-03-09 21:52:06 +0100138 return ((old ^ new) & ~mask) == 0;
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100139}
140
Will Deacon20a004e2018-02-15 11:14:56 +0000141static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100142 phys_addr_t phys, pgprot_t prot)
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000143{
Will Deacon20a004e2018-02-15 11:14:56 +0000144 pte_t *ptep;
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000145
Will Deacon20a004e2018-02-15 11:14:56 +0000146 ptep = pte_set_fixmap_offset(pmdp, addr);
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000147 do {
Will Deacon20a004e2018-02-15 11:14:56 +0000148 pte_t old_pte = READ_ONCE(*ptep);
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100149
Will Deacon20a004e2018-02-15 11:14:56 +0000150 set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100151
152 /*
153 * After the PTE entry has been populated once, we
154 * only allow updates to the permission attributes.
155 */
Will Deacon20a004e2018-02-15 11:14:56 +0000156 BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
157 READ_ONCE(pte_val(*ptep))));
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100158
Ard Biesheuvele393cf42017-03-09 21:52:04 +0100159 phys += PAGE_SIZE;
Will Deacon20a004e2018-02-15 11:14:56 +0000160 } while (ptep++, addr += PAGE_SIZE, addr != end);
Mark Rutlandf4710442016-01-25 11:45:08 +0000161
162 pte_clear_fixmap();
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000163}
164
Will Deacon20a004e2018-02-15 11:14:56 +0000165static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100166 unsigned long end, phys_addr_t phys,
167 pgprot_t prot,
Yu Zhao90292ac2019-03-11 18:57:46 -0600168 phys_addr_t (*pgtable_alloc)(int),
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100169 int flags)
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000170{
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000171 unsigned long next;
Will Deacon20a004e2018-02-15 11:14:56 +0000172 pmd_t pmd = READ_ONCE(*pmdp);
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000173
Will Deacon20a004e2018-02-15 11:14:56 +0000174 BUG_ON(pmd_sect(pmd));
175 if (pmd_none(pmd)) {
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100176 phys_addr_t pte_phys;
Laura Abbott132233a2016-02-05 16:24:46 -0800177 BUG_ON(!pgtable_alloc);
Yu Zhao90292ac2019-03-11 18:57:46 -0600178 pte_phys = pgtable_alloc(PAGE_SHIFT);
Will Deacon20a004e2018-02-15 11:14:56 +0000179 __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
180 pmd = READ_ONCE(*pmdp);
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000181 }
Will Deacon20a004e2018-02-15 11:14:56 +0000182 BUG_ON(pmd_bad(pmd));
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100183
184 do {
185 pgprot_t __prot = prot;
186
187 next = pte_cont_addr_end(addr, end);
188
189 /* use a contiguous mapping if the range is suitably aligned */
190 if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) &&
191 (flags & NO_CONT_MAPPINGS) == 0)
192 __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
193
Will Deacon20a004e2018-02-15 11:14:56 +0000194 init_pte(pmdp, addr, next, phys, __prot);
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100195
196 phys += next - addr;
197 } while (addr = next, addr != end);
198}
199
Will Deacon20a004e2018-02-15 11:14:56 +0000200static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100201 phys_addr_t phys, pgprot_t prot,
Yu Zhao90292ac2019-03-11 18:57:46 -0600202 phys_addr_t (*pgtable_alloc)(int), int flags)
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100203{
204 unsigned long next;
Will Deacon20a004e2018-02-15 11:14:56 +0000205 pmd_t *pmdp;
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000206
Will Deacon20a004e2018-02-15 11:14:56 +0000207 pmdp = pmd_set_fixmap_offset(pudp, addr);
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000208 do {
Will Deacon20a004e2018-02-15 11:14:56 +0000209 pmd_t old_pmd = READ_ONCE(*pmdp);
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100210
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000211 next = pmd_addr_end(addr, end);
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100212
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000213 /* try section mapping first */
Laura Abbott83863f22016-02-05 16:24:47 -0800214 if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
Ard Biesheuvelc0951362017-03-09 21:52:07 +0100215 (flags & NO_BLOCK_MAPPINGS) == 0) {
Will Deacon20a004e2018-02-15 11:14:56 +0000216 pmd_set_huge(pmdp, phys, prot);
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100217
Catalin Marinasa55f9922014-02-04 16:01:31 +0000218 /*
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100219 * After the PMD entry has been populated once, we
220 * only allow updates to the permission attributes.
Catalin Marinasa55f9922014-02-04 16:01:31 +0000221 */
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100222 BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
Will Deacon20a004e2018-02-15 11:14:56 +0000223 READ_ONCE(pmd_val(*pmdp))));
Catalin Marinasa55f9922014-02-04 16:01:31 +0000224 } else {
Will Deacon20a004e2018-02-15 11:14:56 +0000225 alloc_init_cont_pte(pmdp, addr, next, phys, prot,
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100226 pgtable_alloc, flags);
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100227
228 BUG_ON(pmd_val(old_pmd) != 0 &&
Will Deacon20a004e2018-02-15 11:14:56 +0000229 pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
Catalin Marinasa55f9922014-02-04 16:01:31 +0000230 }
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000231 phys += next - addr;
Will Deacon20a004e2018-02-15 11:14:56 +0000232 } while (pmdp++, addr = next, addr != end);
Mark Rutlandf4710442016-01-25 11:45:08 +0000233
234 pmd_clear_fixmap();
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000235}
236
Will Deacon20a004e2018-02-15 11:14:56 +0000237static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100238 unsigned long end, phys_addr_t phys,
239 pgprot_t prot,
Yu Zhao90292ac2019-03-11 18:57:46 -0600240 phys_addr_t (*pgtable_alloc)(int), int flags)
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100241{
242 unsigned long next;
Will Deacon20a004e2018-02-15 11:14:56 +0000243 pud_t pud = READ_ONCE(*pudp);
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100244
245 /*
246 * Check for initial section mappings in the pgd/pud.
247 */
Will Deacon20a004e2018-02-15 11:14:56 +0000248 BUG_ON(pud_sect(pud));
249 if (pud_none(pud)) {
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100250 phys_addr_t pmd_phys;
251 BUG_ON(!pgtable_alloc);
Yu Zhao90292ac2019-03-11 18:57:46 -0600252 pmd_phys = pgtable_alloc(PMD_SHIFT);
Will Deacon20a004e2018-02-15 11:14:56 +0000253 __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
254 pud = READ_ONCE(*pudp);
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100255 }
Will Deacon20a004e2018-02-15 11:14:56 +0000256 BUG_ON(pud_bad(pud));
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100257
258 do {
259 pgprot_t __prot = prot;
260
261 next = pmd_cont_addr_end(addr, end);
262
263 /* use a contiguous mapping if the range is suitably aligned */
264 if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) &&
265 (flags & NO_CONT_MAPPINGS) == 0)
266 __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
267
Will Deacon20a004e2018-02-15 11:14:56 +0000268 init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100269
270 phys += next - addr;
271 } while (addr = next, addr != end);
272}
273
Laura Abbottda141702015-01-21 17:36:06 -0800274static inline bool use_1G_block(unsigned long addr, unsigned long next,
275 unsigned long phys)
276{
277 if (PAGE_SHIFT != 12)
278 return false;
279
280 if (((addr | next | phys) & ~PUD_MASK) != 0)
281 return false;
282
283 return true;
284}
285
Will Deacon20a004e2018-02-15 11:14:56 +0000286static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
287 phys_addr_t phys, pgprot_t prot,
Yu Zhao90292ac2019-03-11 18:57:46 -0600288 phys_addr_t (*pgtable_alloc)(int),
Will Deacon20a004e2018-02-15 11:14:56 +0000289 int flags)
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000290{
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000291 unsigned long next;
Will Deacon20a004e2018-02-15 11:14:56 +0000292 pud_t *pudp;
293 pgd_t pgd = READ_ONCE(*pgdp);
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000294
Will Deacon20a004e2018-02-15 11:14:56 +0000295 if (pgd_none(pgd)) {
Laura Abbott132233a2016-02-05 16:24:46 -0800296 phys_addr_t pud_phys;
297 BUG_ON(!pgtable_alloc);
Yu Zhao90292ac2019-03-11 18:57:46 -0600298 pud_phys = pgtable_alloc(PUD_SHIFT);
Will Deacon20a004e2018-02-15 11:14:56 +0000299 __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
300 pgd = READ_ONCE(*pgdp);
Jungseok Leec79b954b2014-05-12 18:40:51 +0900301 }
Will Deacon20a004e2018-02-15 11:14:56 +0000302 BUG_ON(pgd_bad(pgd));
Jungseok Leec79b954b2014-05-12 18:40:51 +0900303
Will Deacon20a004e2018-02-15 11:14:56 +0000304 pudp = pud_set_fixmap_offset(pgdp, addr);
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000305 do {
Will Deacon20a004e2018-02-15 11:14:56 +0000306 pud_t old_pud = READ_ONCE(*pudp);
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100307
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000308 next = pud_addr_end(addr, end);
Steve Capper206a2a72014-05-06 14:02:27 +0100309
310 /*
311 * For 4K granule only, attempt to put down a 1GB block
312 */
Ard Biesheuvelc0951362017-03-09 21:52:07 +0100313 if (use_1G_block(addr, next, phys) &&
314 (flags & NO_BLOCK_MAPPINGS) == 0) {
Will Deacon20a004e2018-02-15 11:14:56 +0000315 pud_set_huge(pudp, phys, prot);
Steve Capper206a2a72014-05-06 14:02:27 +0100316
317 /*
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100318 * After the PUD entry has been populated once, we
319 * only allow updates to the permission attributes.
Steve Capper206a2a72014-05-06 14:02:27 +0100320 */
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100321 BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
Will Deacon20a004e2018-02-15 11:14:56 +0000322 READ_ONCE(pud_val(*pudp))));
Steve Capper206a2a72014-05-06 14:02:27 +0100323 } else {
Will Deacon20a004e2018-02-15 11:14:56 +0000324 alloc_init_cont_pmd(pudp, addr, next, phys, prot,
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100325 pgtable_alloc, flags);
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100326
327 BUG_ON(pud_val(old_pud) != 0 &&
Will Deacon20a004e2018-02-15 11:14:56 +0000328 pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
Steve Capper206a2a72014-05-06 14:02:27 +0100329 }
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000330 phys += next - addr;
Will Deacon20a004e2018-02-15 11:14:56 +0000331 } while (pudp++, addr = next, addr != end);
Mark Rutlandf4710442016-01-25 11:45:08 +0000332
333 pud_clear_fixmap();
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000334}
335
Ard Biesheuvel40f87d32016-06-29 14:51:30 +0200336static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
337 unsigned long virt, phys_addr_t size,
338 pgprot_t prot,
Yu Zhao90292ac2019-03-11 18:57:46 -0600339 phys_addr_t (*pgtable_alloc)(int),
Ard Biesheuvelc0951362017-03-09 21:52:07 +0100340 int flags)
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000341{
Masahiro Yamada32d18702019-11-03 21:35:58 +0900342 unsigned long addr, end, next;
Will Deacon20a004e2018-02-15 11:14:56 +0000343 pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000344
Mark Rutlandcc5d2b32015-11-23 13:26:19 +0000345 /*
346 * If the virtual and physical address don't have the same offset
347 * within a page, we cannot map the region as the caller expects.
348 */
349 if (WARN_ON((phys ^ virt) & ~PAGE_MASK))
350 return;
351
Mark Rutland9c4e08a2015-11-23 13:26:20 +0000352 phys &= PAGE_MASK;
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000353 addr = virt & PAGE_MASK;
Masahiro Yamada32d18702019-11-03 21:35:58 +0900354 end = PAGE_ALIGN(virt + size);
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000355
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000356 do {
357 next = pgd_addr_end(addr, end);
Will Deacon20a004e2018-02-15 11:14:56 +0000358 alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
Ard Biesheuvelc0951362017-03-09 21:52:07 +0100359 flags);
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000360 phys += next - addr;
Will Deacon20a004e2018-02-15 11:14:56 +0000361 } while (pgdp++, addr = next, addr != end);
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000362}
363
Will Deacon475ba3f2019-04-08 11:23:48 +0100364static phys_addr_t __pgd_pgtable_alloc(int shift)
Yu Zhao369aaab2019-03-11 18:57:47 -0600365{
Mike Rapoport50f11a82019-07-11 20:58:02 -0700366 void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
Yu Zhao369aaab2019-03-11 18:57:47 -0600367 BUG_ON(!ptr);
368
369 /* Ensure the zeroed page is visible to the page table walker */
370 dsb(ishst);
371 return __pa(ptr);
372}
373
Yu Zhao90292ac2019-03-11 18:57:46 -0600374static phys_addr_t pgd_pgtable_alloc(int shift)
Laura Abbottda141702015-01-21 17:36:06 -0800375{
Will Deacon475ba3f2019-04-08 11:23:48 +0100376 phys_addr_t pa = __pgd_pgtable_alloc(shift);
Yu Zhao90292ac2019-03-11 18:57:46 -0600377
378 /*
379 * Call proper page table ctor in case later we need to
380 * call core mm functions like apply_to_page_range() on
381 * this pre-allocated page table.
382 *
383 * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is
384 * folded, and if so pgtable_pmd_page_ctor() becomes nop.
385 */
386 if (shift == PAGE_SHIFT)
Mark Rutlandb4ed71f2019-09-25 16:49:46 -0700387 BUG_ON(!pgtable_pte_page_ctor(phys_to_page(pa)));
Yu Zhao90292ac2019-03-11 18:57:46 -0600388 else if (shift == PMD_SHIFT)
Will Deacon475ba3f2019-04-08 11:23:48 +0100389 BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa)));
Mark Rutland21ab99c2016-01-25 11:44:56 +0000390
Will Deacon475ba3f2019-04-08 11:23:48 +0100391 return pa;
Laura Abbottda141702015-01-21 17:36:06 -0800392}
393
Laura Abbott132233a2016-02-05 16:24:46 -0800394/*
395 * This function can only be used to modify existing table entries,
396 * without allocating new levels of table. Note that this permits the
397 * creation of new section or page entries.
398 */
399static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
Laura Abbottda141702015-01-21 17:36:06 -0800400 phys_addr_t size, pgprot_t prot)
Mark Salterd7ecbdd2014-03-12 12:28:06 -0400401{
Mark Rutland77ad4ce2019-08-14 14:28:48 +0100402 if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
Mark Salterd7ecbdd2014-03-12 12:28:06 -0400403 pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
404 &phys, virt);
405 return;
406 }
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100407 __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
408 NO_CONT_MAPPINGS);
Mark Salterd7ecbdd2014-03-12 12:28:06 -0400409}
410
Ard Biesheuvel8ce837c2014-10-20 15:42:07 +0200411void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
412 unsigned long virt, phys_addr_t size,
Ard Biesheuvelf14c66c2016-10-21 12:22:57 +0100413 pgprot_t prot, bool page_mappings_only)
Ard Biesheuvel8ce837c2014-10-20 15:42:07 +0200414{
Ard Biesheuvelc0951362017-03-09 21:52:07 +0100415 int flags = 0;
416
Ard Biesheuvel1378dc32016-07-22 19:32:25 +0200417 BUG_ON(mm == &init_mm);
418
Ard Biesheuvelc0951362017-03-09 21:52:07 +0100419 if (page_mappings_only)
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100420 flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
Ard Biesheuvelc0951362017-03-09 21:52:07 +0100421
Mark Rutland11509a32016-01-25 11:45:10 +0000422 __create_pgd_mapping(mm->pgd, phys, virt, size, prot,
Ard Biesheuvelc0951362017-03-09 21:52:07 +0100423 pgd_pgtable_alloc, flags);
Mark Salterd7ecbdd2014-03-12 12:28:06 -0400424}
425
Ard Biesheuvelaa8c09b2017-03-09 21:52:00 +0100426static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
427 phys_addr_t size, pgprot_t prot)
Laura Abbottda141702015-01-21 17:36:06 -0800428{
Mark Rutland77ad4ce2019-08-14 14:28:48 +0100429 if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
Ard Biesheuvelaa8c09b2017-03-09 21:52:00 +0100430 pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n",
Laura Abbottda141702015-01-21 17:36:06 -0800431 &phys, virt);
432 return;
433 }
434
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100435 __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
436 NO_CONT_MAPPINGS);
Ard Biesheuvelaa8c09b2017-03-09 21:52:00 +0100437
438 /* flush the TLBs after updating live kernel mappings */
439 flush_tlb_kernel_range(virt, virt + size);
Laura Abbottda141702015-01-21 17:36:06 -0800440}
441
Will Deacon20a004e2018-02-15 11:14:56 +0000442static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
Takahiro Akashi98d2e152017-04-03 11:24:34 +0900443 phys_addr_t end, pgprot_t prot, int flags)
Laura Abbottda141702015-01-21 17:36:06 -0800444{
Will Deacon20a004e2018-02-15 11:14:56 +0000445 __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
Takahiro Akashi98d2e152017-04-03 11:24:34 +0900446 prot, early_pgtable_alloc, flags);
Laura Abbottda141702015-01-21 17:36:06 -0800447}
Laura Abbottda141702015-01-21 17:36:06 -0800448
Ard Biesheuvel5ea53062017-03-09 21:52:01 +0100449void __init mark_linear_text_alias_ro(void)
450{
451 /*
452 * Remove the write permissions from the linear alias of .text/.rodata
453 */
454 update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
455 (unsigned long)__init_begin - (unsigned long)_text,
456 PAGE_KERNEL_RO);
457}
458
Will Deacon20a004e2018-02-15 11:14:56 +0000459static void __init map_mem(pgd_t *pgdp)
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000460{
Takahiro Akashi98d2e152017-04-03 11:24:34 +0900461 phys_addr_t kernel_start = __pa_symbol(_text);
462 phys_addr_t kernel_end = __pa_symbol(__init_begin);
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000463 struct memblock_region *reg;
Takahiro Akashi98d2e152017-04-03 11:24:34 +0900464 int flags = 0;
465
Ard Biesheuvelc55191e2018-11-07 11:36:20 +0100466 if (rodata_full || debug_pagealloc_enabled())
Takahiro Akashi98d2e152017-04-03 11:24:34 +0900467 flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
468
469 /*
470 * Take care not to create a writable alias for the
471 * read-only text and rodata sections of the kernel image.
472 * So temporarily mark them as NOMAP to skip mappings in
473 * the following for-loop
474 */
475 memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
476#ifdef CONFIG_KEXEC_CORE
477 if (crashk_res.end)
478 memblock_mark_nomap(crashk_res.start,
479 resource_size(&crashk_res));
480#endif
Steve Capperf6bc87c2013-04-30 11:00:33 +0100481
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000482 /* map all the memory banks */
483 for_each_memblock(memory, reg) {
484 phys_addr_t start = reg->base;
485 phys_addr_t end = start + reg->size;
486
487 if (start >= end)
488 break;
Ard Biesheuvel68709f42015-11-30 13:28:16 +0100489 if (memblock_is_nomap(reg))
490 continue;
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000491
Will Deacon20a004e2018-02-15 11:14:56 +0000492 __map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000493 }
Takahiro Akashi98d2e152017-04-03 11:24:34 +0900494
495 /*
496 * Map the linear alias of the [_text, __init_begin) interval
497 * as non-executable now, and remove the write permission in
498 * mark_linear_text_alias_ro() below (which will be called after
499 * alternative patching has completed). This makes the contents
500 * of the region accessible to subsystems such as hibernate,
501 * but protects it from inadvertent modification or execution.
502 * Note that contiguous mappings cannot be remapped in this way,
503 * so we should avoid them here.
504 */
Will Deacon20a004e2018-02-15 11:14:56 +0000505 __map_memblock(pgdp, kernel_start, kernel_end,
Takahiro Akashi98d2e152017-04-03 11:24:34 +0900506 PAGE_KERNEL, NO_CONT_MAPPINGS);
507 memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
508
509#ifdef CONFIG_KEXEC_CORE
510 /*
511 * Use page-level mappings here so that we can shrink the region
512 * in page granularity and put back unused memory to buddy system
513 * through /sys/kernel/kexec_crash_size interface.
514 */
515 if (crashk_res.end) {
Will Deacon20a004e2018-02-15 11:14:56 +0000516 __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
Takahiro Akashi98d2e152017-04-03 11:24:34 +0900517 PAGE_KERNEL,
518 NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
519 memblock_clear_nomap(crashk_res.start,
520 resource_size(&crashk_res));
521 }
522#endif
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000523}
524
Laura Abbottda141702015-01-21 17:36:06 -0800525void mark_rodata_ro(void)
526{
Jeremy Linton2f39b5f2016-02-19 11:50:32 -0600527 unsigned long section_size;
Ard Biesheuvelf9040772016-02-16 13:52:40 +0100528
Jeremy Linton2f39b5f2016-02-19 11:50:32 -0600529 /*
Ard Biesheuvel9fdc14c52016-06-23 15:53:17 +0200530 * mark .rodata as read only. Use __init_begin rather than __end_rodata
531 * to cover NOTES and EXCEPTION_TABLE.
Jeremy Linton2f39b5f2016-02-19 11:50:32 -0600532 */
Ard Biesheuvel9fdc14c52016-06-23 15:53:17 +0200533 section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
Ard Biesheuvelaa8c09b2017-03-09 21:52:00 +0100534 update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
Jeremy Linton2f39b5f2016-02-19 11:50:32 -0600535 section_size, PAGE_KERNEL_RO);
Ard Biesheuvele98216b2016-10-21 12:22:56 +0100536
Laura Abbott1404d6f2016-10-27 09:27:34 -0700537 debug_checkwx();
Laura Abbottda141702015-01-21 17:36:06 -0800538}
Laura Abbottda141702015-01-21 17:36:06 -0800539
Will Deacon20a004e2018-02-15 11:14:56 +0000540static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100541 pgprot_t prot, struct vm_struct *vma,
Will Deacon92bbd162017-07-24 11:46:09 +0100542 int flags, unsigned long vm_flags)
Mark Rutland068a17a2016-01-25 11:45:12 +0000543{
Laura Abbott2077be62017-01-10 13:35:49 -0800544 phys_addr_t pa_start = __pa_symbol(va_start);
Mark Rutland068a17a2016-01-25 11:45:12 +0000545 unsigned long size = va_end - va_start;
546
547 BUG_ON(!PAGE_ALIGNED(pa_start));
548 BUG_ON(!PAGE_ALIGNED(size));
549
Will Deacon20a004e2018-02-15 11:14:56 +0000550 __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100551 early_pgtable_alloc, flags);
Ard Biesheuvelf9040772016-02-16 13:52:40 +0100552
Will Deacon92bbd162017-07-24 11:46:09 +0100553 if (!(vm_flags & VM_NO_GUARD))
554 size += PAGE_SIZE;
555
Ard Biesheuvelf9040772016-02-16 13:52:40 +0100556 vma->addr = va_start;
557 vma->phys_addr = pa_start;
558 vma->size = size;
Will Deacon92bbd162017-07-24 11:46:09 +0100559 vma->flags = VM_MAP | vm_flags;
Ard Biesheuvelf9040772016-02-16 13:52:40 +0100560 vma->caller = __builtin_return_address(0);
561
562 vm_area_add_early(vma);
Mark Rutland068a17a2016-01-25 11:45:12 +0000563}
564
Ard Biesheuvel28b066d2017-03-09 21:52:02 +0100565static int __init parse_rodata(char *arg)
566{
Ard Biesheuvelc55191e2018-11-07 11:36:20 +0100567 int ret = strtobool(arg, &rodata_enabled);
568 if (!ret) {
569 rodata_full = false;
570 return 0;
571 }
572
573 /* permit 'full' in addition to boolean options */
574 if (strcmp(arg, "full"))
575 return -EINVAL;
576
577 rodata_enabled = true;
578 rodata_full = true;
579 return 0;
Ard Biesheuvel28b066d2017-03-09 21:52:02 +0100580}
581early_param("rodata", parse_rodata);
582
Will Deacon51a00482017-11-14 14:14:17 +0000583#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
584static int __init map_entry_trampoline(void)
585{
Will Deacon51a00482017-11-14 14:14:17 +0000586 pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
587 phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
588
589 /* The trampoline is always mapped and can therefore be global */
590 pgprot_val(prot) &= ~PTE_NG;
591
592 /* Map only the text into the trampoline page table */
593 memset(tramp_pg_dir, 0, PGD_SIZE);
594 __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
Will Deacon475ba3f2019-04-08 11:23:48 +0100595 prot, __pgd_pgtable_alloc, 0);
Will Deacon51a00482017-11-14 14:14:17 +0000596
Will Deacon6c27c402017-12-06 11:24:02 +0000597 /* Map both the text and data into the kernel page table */
Will Deacon51a00482017-11-14 14:14:17 +0000598 __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
Will Deacon6c27c402017-12-06 11:24:02 +0000599 if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
600 extern char __entry_tramp_data_start[];
601
602 __set_fixmap(FIX_ENTRY_TRAMP_DATA,
603 __pa_symbol(__entry_tramp_data_start),
604 PAGE_KERNEL_RO);
605 }
606
Will Deacon51a00482017-11-14 14:14:17 +0000607 return 0;
608}
609core_initcall(map_entry_trampoline);
610#endif
611
Mark Rutland068a17a2016-01-25 11:45:12 +0000612/*
613 * Create fine-grained mappings for the kernel.
614 */
Will Deacon20a004e2018-02-15 11:14:56 +0000615static void __init map_kernel(pgd_t *pgdp)
Mark Rutland068a17a2016-01-25 11:45:12 +0000616{
Ard Biesheuvel2ebe088b2017-03-09 21:52:03 +0100617 static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
618 vmlinux_initdata, vmlinux_data;
Mark Rutland068a17a2016-01-25 11:45:12 +0000619
Ard Biesheuvel28b066d2017-03-09 21:52:02 +0100620 /*
621 * External debuggers may need to write directly to the text
622 * mapping to install SW breakpoints. Allow this (only) when
623 * explicitly requested with rodata=off.
624 */
625 pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
626
Ard Biesheuveld27cfa12017-03-09 21:52:09 +0100627 /*
628 * Only rodata will be remapped with different permissions later on,
629 * all other segments are allowed to use contiguous mappings.
630 */
Will Deacon20a004e2018-02-15 11:14:56 +0000631 map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
Will Deacon92bbd162017-07-24 11:46:09 +0100632 VM_NO_GUARD);
Will Deacon20a004e2018-02-15 11:14:56 +0000633 map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
Will Deacon92bbd162017-07-24 11:46:09 +0100634 &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
Will Deacon20a004e2018-02-15 11:14:56 +0000635 map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
Will Deacon92bbd162017-07-24 11:46:09 +0100636 &vmlinux_inittext, 0, VM_NO_GUARD);
Will Deacon20a004e2018-02-15 11:14:56 +0000637 map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
Will Deacon92bbd162017-07-24 11:46:09 +0100638 &vmlinux_initdata, 0, VM_NO_GUARD);
Will Deacon20a004e2018-02-15 11:14:56 +0000639 map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
Mark Rutland068a17a2016-01-25 11:45:12 +0000640
Will Deacon20a004e2018-02-15 11:14:56 +0000641 if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
Ard Biesheuvelf9040772016-02-16 13:52:40 +0100642 /*
643 * The fixmap falls in a separate pgd to the kernel, and doesn't
644 * live in the carveout for the swapper_pg_dir. We can simply
645 * re-use the existing dir for the fixmap.
646 */
Will Deacon20a004e2018-02-15 11:14:56 +0000647 set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
648 READ_ONCE(*pgd_offset_k(FIXADDR_START)));
Ard Biesheuvelf9040772016-02-16 13:52:40 +0100649 } else if (CONFIG_PGTABLE_LEVELS > 3) {
Mark Rutlandb333b0b2019-08-27 16:57:08 +0100650 pgd_t *bm_pgdp;
651 pud_t *bm_pudp;
Ard Biesheuvelf9040772016-02-16 13:52:40 +0100652 /*
653 * The fixmap shares its top level pgd entry with the kernel
654 * mapping. This can really only occur when we are running
655 * with 16k/4 levels, so we can simply reuse the pud level
656 * entry instead.
657 */
658 BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
Mark Rutlandb333b0b2019-08-27 16:57:08 +0100659 bm_pgdp = pgd_offset_raw(pgdp, FIXADDR_START);
660 bm_pudp = pud_set_fixmap_offset(bm_pgdp, FIXADDR_START);
661 pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd));
Ard Biesheuvelf9040772016-02-16 13:52:40 +0100662 pud_clear_fixmap();
663 } else {
664 BUG();
665 }
Mark Rutland068a17a2016-01-25 11:45:12 +0000666
Will Deacon20a004e2018-02-15 11:14:56 +0000667 kasan_copy_shadow(pgdp);
Mark Rutland068a17a2016-01-25 11:45:12 +0000668}
669
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000670void __init paging_init(void)
671{
Jun Yao2330b7c2018-09-24 17:15:02 +0100672 pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
Mark Rutland068a17a2016-01-25 11:45:12 +0000673
Will Deacon20a004e2018-02-15 11:14:56 +0000674 map_kernel(pgdp);
675 map_mem(pgdp);
Mark Rutland068a17a2016-01-25 11:45:12 +0000676
Mark Rutland068a17a2016-01-25 11:45:12 +0000677 pgd_clear_fixmap();
Mark Rutland068a17a2016-01-25 11:45:12 +0000678
Mark Rutland068a17a2016-01-25 11:45:12 +0000679 cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
Jun Yao2b5548b2018-09-24 15:47:49 +0100680 init_mm.pgd = swapper_pg_dir;
Mark Rutland068a17a2016-01-25 11:45:12 +0000681
Jun Yao2b5548b2018-09-24 15:47:49 +0100682 memblock_free(__pa_symbol(init_pg_dir),
683 __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
Ard Biesheuvel24cc61d2018-11-07 15:16:06 +0100684
685 memblock_allow_resize();
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000686}
687
688/*
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000689 * Check whether a kernel address is valid (derived from arch/x86/).
690 */
691int kern_addr_valid(unsigned long addr)
692{
Will Deacon20a004e2018-02-15 11:14:56 +0000693 pgd_t *pgdp;
694 pud_t *pudp, pud;
695 pmd_t *pmdp, pmd;
696 pte_t *ptep, pte;
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000697
698 if ((((long)addr) >> VA_BITS) != -1UL)
699 return 0;
700
Will Deacon20a004e2018-02-15 11:14:56 +0000701 pgdp = pgd_offset_k(addr);
702 if (pgd_none(READ_ONCE(*pgdp)))
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000703 return 0;
704
Will Deacon20a004e2018-02-15 11:14:56 +0000705 pudp = pud_offset(pgdp, addr);
706 pud = READ_ONCE(*pudp);
707 if (pud_none(pud))
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000708 return 0;
709
Will Deacon20a004e2018-02-15 11:14:56 +0000710 if (pud_sect(pud))
711 return pfn_valid(pud_pfn(pud));
Steve Capper206a2a72014-05-06 14:02:27 +0100712
Will Deacon20a004e2018-02-15 11:14:56 +0000713 pmdp = pmd_offset(pudp, addr);
714 pmd = READ_ONCE(*pmdp);
715 if (pmd_none(pmd))
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000716 return 0;
717
Will Deacon20a004e2018-02-15 11:14:56 +0000718 if (pmd_sect(pmd))
719 return pfn_valid(pmd_pfn(pmd));
Dave Andersonda6e4cb2014-04-15 18:53:24 +0100720
Will Deacon20a004e2018-02-15 11:14:56 +0000721 ptep = pte_offset_kernel(pmdp, addr);
722 pte = READ_ONCE(*ptep);
723 if (pte_none(pte))
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000724 return 0;
725
Will Deacon20a004e2018-02-15 11:14:56 +0000726 return pfn_valid(pte_pfn(pte));
Catalin Marinasc1cc1552012-03-05 11:49:27 +0000727}
Anshuman Khandualbbd6ec62020-03-04 09:58:43 +0530728
729#ifdef CONFIG_MEMORY_HOTPLUG
730static void free_hotplug_page_range(struct page *page, size_t size)
731{
732 WARN_ON(PageReserved(page));
733 free_pages((unsigned long)page_address(page), get_order(size));
734}
735
736static void free_hotplug_pgtable_page(struct page *page)
737{
738 free_hotplug_page_range(page, PAGE_SIZE);
739}
740
741static bool pgtable_range_aligned(unsigned long start, unsigned long end,
742 unsigned long floor, unsigned long ceiling,
743 unsigned long mask)
744{
745 start &= mask;
746 if (start < floor)
747 return false;
748
749 if (ceiling) {
750 ceiling &= mask;
751 if (!ceiling)
752 return false;
753 }
754
755 if (end - 1 > ceiling - 1)
756 return false;
757 return true;
758}
759
760static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr,
761 unsigned long end, bool free_mapped)
762{
763 pte_t *ptep, pte;
764
765 do {
766 ptep = pte_offset_kernel(pmdp, addr);
767 pte = READ_ONCE(*ptep);
768 if (pte_none(pte))
769 continue;
770
771 WARN_ON(!pte_present(pte));
772 pte_clear(&init_mm, addr, ptep);
773 flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
774 if (free_mapped)
775 free_hotplug_page_range(pte_page(pte), PAGE_SIZE);
776 } while (addr += PAGE_SIZE, addr < end);
777}
778
779static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
780 unsigned long end, bool free_mapped)
781{
782 unsigned long next;
783 pmd_t *pmdp, pmd;
784
785 do {
786 next = pmd_addr_end(addr, end);
787 pmdp = pmd_offset(pudp, addr);
788 pmd = READ_ONCE(*pmdp);
789 if (pmd_none(pmd))
790 continue;
791
792 WARN_ON(!pmd_present(pmd));
793 if (pmd_sect(pmd)) {
794 pmd_clear(pmdp);
795
796 /*
797 * One TLBI should be sufficient here as the PMD_SIZE
798 * range is mapped with a single block entry.
799 */
800 flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
801 if (free_mapped)
802 free_hotplug_page_range(pmd_page(pmd),
803 PMD_SIZE);
804 continue;
805 }
806 WARN_ON(!pmd_table(pmd));
807 unmap_hotplug_pte_range(pmdp, addr, next, free_mapped);
808 } while (addr = next, addr < end);
809}
810
811static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr,
812 unsigned long end, bool free_mapped)
813{
814 unsigned long next;
815 pud_t *pudp, pud;
816
817 do {
818 next = pud_addr_end(addr, end);
819 pudp = pud_offset(p4dp, addr);
820 pud = READ_ONCE(*pudp);
821 if (pud_none(pud))
822 continue;
823
824 WARN_ON(!pud_present(pud));
825 if (pud_sect(pud)) {
826 pud_clear(pudp);
827
828 /*
829 * One TLBI should be sufficient here as the PUD_SIZE
830 * range is mapped with a single block entry.
831 */
832 flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
833 if (free_mapped)
834 free_hotplug_page_range(pud_page(pud),
835 PUD_SIZE);
836 continue;
837 }
838 WARN_ON(!pud_table(pud));
839 unmap_hotplug_pmd_range(pudp, addr, next, free_mapped);
840 } while (addr = next, addr < end);
841}
842
843static void unmap_hotplug_p4d_range(pgd_t *pgdp, unsigned long addr,
844 unsigned long end, bool free_mapped)
845{
846 unsigned long next;
847 p4d_t *p4dp, p4d;
848
849 do {
850 next = p4d_addr_end(addr, end);
851 p4dp = p4d_offset(pgdp, addr);
852 p4d = READ_ONCE(*p4dp);
853 if (p4d_none(p4d))
854 continue;
855
856 WARN_ON(!p4d_present(p4d));
857 unmap_hotplug_pud_range(p4dp, addr, next, free_mapped);
858 } while (addr = next, addr < end);
859}
860
861static void unmap_hotplug_range(unsigned long addr, unsigned long end,
862 bool free_mapped)
863{
864 unsigned long next;
865 pgd_t *pgdp, pgd;
866
867 do {
868 next = pgd_addr_end(addr, end);
869 pgdp = pgd_offset_k(addr);
870 pgd = READ_ONCE(*pgdp);
871 if (pgd_none(pgd))
872 continue;
873
874 WARN_ON(!pgd_present(pgd));
875 unmap_hotplug_p4d_range(pgdp, addr, next, free_mapped);
876 } while (addr = next, addr < end);
877}
878
879static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr,
880 unsigned long end, unsigned long floor,
881 unsigned long ceiling)
882{
883 pte_t *ptep, pte;
884 unsigned long i, start = addr;
885
886 do {
887 ptep = pte_offset_kernel(pmdp, addr);
888 pte = READ_ONCE(*ptep);
889
890 /*
891 * This is just a sanity check here which verifies that
892 * pte clearing has been done by earlier unmap loops.
893 */
894 WARN_ON(!pte_none(pte));
895 } while (addr += PAGE_SIZE, addr < end);
896
897 if (!pgtable_range_aligned(start, end, floor, ceiling, PMD_MASK))
898 return;
899
900 /*
901 * Check whether we can free the pte page if the rest of the
902 * entries are empty. Overlap with other regions have been
903 * handled by the floor/ceiling check.
904 */
905 ptep = pte_offset_kernel(pmdp, 0UL);
906 for (i = 0; i < PTRS_PER_PTE; i++) {
907 if (!pte_none(READ_ONCE(ptep[i])))
908 return;
909 }
910
911 pmd_clear(pmdp);
912 __flush_tlb_kernel_pgtable(start);
913 free_hotplug_pgtable_page(virt_to_page(ptep));
914}
915
916static void free_empty_pmd_table(pud_t *pudp, unsigned long addr,
917 unsigned long end, unsigned long floor,
918 unsigned long ceiling)
919{
920 pmd_t *pmdp, pmd;
921 unsigned long i, next, start = addr;
922
923 do {
924 next = pmd_addr_end(addr, end);
925 pmdp = pmd_offset(pudp, addr);
926 pmd = READ_ONCE(*pmdp);
927 if (pmd_none(pmd))
928 continue;
929
930 WARN_ON(!pmd_present(pmd) || !pmd_table(pmd) || pmd_sect(pmd));
931 free_empty_pte_table(pmdp, addr, next, floor, ceiling);
932 } while (addr = next, addr < end);
933
934 if (CONFIG_PGTABLE_LEVELS <= 2)
935 return;
936
937 if (!pgtable_range_aligned(start, end, floor, ceiling, PUD_MASK))
938 return;
939
940 /*
941 * Check whether we can free the pmd page if the rest of the
942 * entries are empty. Overlap with other regions have been
943 * handled by the floor/ceiling check.
944 */
945 pmdp = pmd_offset(pudp, 0UL);
946 for (i = 0; i < PTRS_PER_PMD; i++) {
947 if (!pmd_none(READ_ONCE(pmdp[i])))
948 return;
949 }
950
951 pud_clear(pudp);
952 __flush_tlb_kernel_pgtable(start);
953 free_hotplug_pgtable_page(virt_to_page(pmdp));
954}
955
956static void free_empty_pud_table(p4d_t *p4dp, unsigned long addr,
957 unsigned long end, unsigned long floor,
958 unsigned long ceiling)
959{
960 pud_t *pudp, pud;
961 unsigned long i, next, start = addr;
962
963 do {
964 next = pud_addr_end(addr, end);
965 pudp = pud_offset(p4dp, addr);
966 pud = READ_ONCE(*pudp);
967 if (pud_none(pud))
968 continue;
969
970 WARN_ON(!pud_present(pud) || !pud_table(pud) || pud_sect(pud));
971 free_empty_pmd_table(pudp, addr, next, floor, ceiling);
972 } while (addr = next, addr < end);
973
974 if (CONFIG_PGTABLE_LEVELS <= 3)
975 return;
976
977 if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK))
978 return;
979
980 /*
981 * Check whether we can free the pud page if the rest of the
982 * entries are empty. Overlap with other regions have been
983 * handled by the floor/ceiling check.
984 */
985 pudp = pud_offset(p4dp, 0UL);
986 for (i = 0; i < PTRS_PER_PUD; i++) {
987 if (!pud_none(READ_ONCE(pudp[i])))
988 return;
989 }
990
991 p4d_clear(p4dp);
992 __flush_tlb_kernel_pgtable(start);
993 free_hotplug_pgtable_page(virt_to_page(pudp));
994}
995
996static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr,
997 unsigned long end, unsigned long floor,
998 unsigned long ceiling)
999{
1000 unsigned long next;
1001 p4d_t *p4dp, p4d;
1002
1003 do {
1004 next = p4d_addr_end(addr, end);
1005 p4dp = p4d_offset(pgdp, addr);
1006 p4d = READ_ONCE(*p4dp);
1007 if (p4d_none(p4d))
1008 continue;
1009
1010 WARN_ON(!p4d_present(p4d));
1011 free_empty_pud_table(p4dp, addr, next, floor, ceiling);
1012 } while (addr = next, addr < end);
1013}
1014
1015static void free_empty_tables(unsigned long addr, unsigned long end,
1016 unsigned long floor, unsigned long ceiling)
1017{
1018 unsigned long next;
1019 pgd_t *pgdp, pgd;
1020
1021 do {
1022 next = pgd_addr_end(addr, end);
1023 pgdp = pgd_offset_k(addr);
1024 pgd = READ_ONCE(*pgdp);
1025 if (pgd_none(pgd))
1026 continue;
1027
1028 WARN_ON(!pgd_present(pgd));
1029 free_empty_p4d_table(pgdp, addr, next, floor, ceiling);
1030 } while (addr = next, addr < end);
1031}
1032#endif
1033
Catalin Marinasc1cc1552012-03-05 11:49:27 +00001034#ifdef CONFIG_SPARSEMEM_VMEMMAP
Suzuki K. Pouloseb433dce2015-10-19 14:19:28 +01001035#if !ARM64_SWAPPER_USES_SECTION_MAPS
Christoph Hellwig7b73d972017-12-29 08:53:54 +01001036int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
1037 struct vmem_altmap *altmap)
Catalin Marinasc1cc1552012-03-05 11:49:27 +00001038{
Johannes Weiner0aad8182013-04-29 15:07:50 -07001039 return vmemmap_populate_basepages(start, end, node);
Catalin Marinasc1cc1552012-03-05 11:49:27 +00001040}
Suzuki K. Pouloseb433dce2015-10-19 14:19:28 +01001041#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
Christoph Hellwig7b73d972017-12-29 08:53:54 +01001042int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
1043 struct vmem_altmap *altmap)
Catalin Marinasc1cc1552012-03-05 11:49:27 +00001044{
Johannes Weiner0aad8182013-04-29 15:07:50 -07001045 unsigned long addr = start;
Catalin Marinasc1cc1552012-03-05 11:49:27 +00001046 unsigned long next;
Will Deacon20a004e2018-02-15 11:14:56 +00001047 pgd_t *pgdp;
1048 pud_t *pudp;
1049 pmd_t *pmdp;
Catalin Marinasc1cc1552012-03-05 11:49:27 +00001050
1051 do {
1052 next = pmd_addr_end(addr, end);
1053
Will Deacon20a004e2018-02-15 11:14:56 +00001054 pgdp = vmemmap_pgd_populate(addr, node);
1055 if (!pgdp)
Catalin Marinasc1cc1552012-03-05 11:49:27 +00001056 return -ENOMEM;
1057
Will Deacon20a004e2018-02-15 11:14:56 +00001058 pudp = vmemmap_pud_populate(pgdp, addr, node);
1059 if (!pudp)
Catalin Marinasc1cc1552012-03-05 11:49:27 +00001060 return -ENOMEM;
1061
Will Deacon20a004e2018-02-15 11:14:56 +00001062 pmdp = pmd_offset(pudp, addr);
1063 if (pmd_none(READ_ONCE(*pmdp))) {
Catalin Marinasc1cc1552012-03-05 11:49:27 +00001064 void *p = NULL;
1065
1066 p = vmemmap_alloc_block_buf(PMD_SIZE, node);
1067 if (!p)
1068 return -ENOMEM;
1069
Will Deacon20a004e2018-02-15 11:14:56 +00001070 pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
Catalin Marinasc1cc1552012-03-05 11:49:27 +00001071 } else
Will Deacon20a004e2018-02-15 11:14:56 +00001072 vmemmap_verify((pte_t *)pmdp, node, addr, next);
Catalin Marinasc1cc1552012-03-05 11:49:27 +00001073 } while (addr = next, addr != end);
1074
1075 return 0;
1076}
Odin Ugedal8e010762019-06-07 01:49:10 +02001077#endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */
Christoph Hellwig24b6d412017-12-29 08:53:56 +01001078void vmemmap_free(unsigned long start, unsigned long end,
1079 struct vmem_altmap *altmap)
Tang Chen01975182013-02-22 16:33:08 -08001080{
Anshuman Khandualbbd6ec62020-03-04 09:58:43 +05301081#ifdef CONFIG_MEMORY_HOTPLUG
1082 WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
1083
1084 unmap_hotplug_range(start, end, true);
1085 free_empty_tables(start, end, VMEMMAP_START, VMEMMAP_END);
1086#endif
Tang Chen01975182013-02-22 16:33:08 -08001087}
Catalin Marinasc1cc1552012-03-05 11:49:27 +00001088#endif /* CONFIG_SPARSEMEM_VMEMMAP */
Laura Abbottaf86e592014-11-21 21:50:42 +00001089
Laura Abbottaf86e592014-11-21 21:50:42 +00001090static inline pud_t * fixmap_pud(unsigned long addr)
1091{
Will Deacon20a004e2018-02-15 11:14:56 +00001092 pgd_t *pgdp = pgd_offset_k(addr);
1093 pgd_t pgd = READ_ONCE(*pgdp);
Laura Abbottaf86e592014-11-21 21:50:42 +00001094
Will Deacon20a004e2018-02-15 11:14:56 +00001095 BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
Laura Abbottaf86e592014-11-21 21:50:42 +00001096
Will Deacon20a004e2018-02-15 11:14:56 +00001097 return pud_offset_kimg(pgdp, addr);
Laura Abbottaf86e592014-11-21 21:50:42 +00001098}
1099
1100static inline pmd_t * fixmap_pmd(unsigned long addr)
1101{
Will Deacon20a004e2018-02-15 11:14:56 +00001102 pud_t *pudp = fixmap_pud(addr);
1103 pud_t pud = READ_ONCE(*pudp);
Laura Abbottaf86e592014-11-21 21:50:42 +00001104
Will Deacon20a004e2018-02-15 11:14:56 +00001105 BUG_ON(pud_none(pud) || pud_bad(pud));
Laura Abbottaf86e592014-11-21 21:50:42 +00001106
Will Deacon20a004e2018-02-15 11:14:56 +00001107 return pmd_offset_kimg(pudp, addr);
Laura Abbottaf86e592014-11-21 21:50:42 +00001108}
1109
1110static inline pte_t * fixmap_pte(unsigned long addr)
1111{
Ard Biesheuvel157962f2016-02-16 13:52:38 +01001112 return &bm_pte[pte_index(addr)];
Laura Abbottaf86e592014-11-21 21:50:42 +00001113}
1114
Laura Abbott2077be62017-01-10 13:35:49 -08001115/*
1116 * The p*d_populate functions call virt_to_phys implicitly so they can't be used
1117 * directly on kernel symbols (bm_p*d). This function is called too early to use
1118 * lm_alias so __p*d_populate functions must be used to populate with the
1119 * physical address from __pa_symbol.
1120 */
Laura Abbottaf86e592014-11-21 21:50:42 +00001121void __init early_fixmap_init(void)
1122{
Will Deacon20a004e2018-02-15 11:14:56 +00001123 pgd_t *pgdp, pgd;
1124 pud_t *pudp;
1125 pmd_t *pmdp;
Laura Abbottaf86e592014-11-21 21:50:42 +00001126 unsigned long addr = FIXADDR_START;
1127
Will Deacon20a004e2018-02-15 11:14:56 +00001128 pgdp = pgd_offset_k(addr);
1129 pgd = READ_ONCE(*pgdp);
Ard Biesheuvelf80fb3a2016-01-26 14:12:01 +01001130 if (CONFIG_PGTABLE_LEVELS > 3 &&
Will Deacon20a004e2018-02-15 11:14:56 +00001131 !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
Ard Biesheuvelf9040772016-02-16 13:52:40 +01001132 /*
1133 * We only end up here if the kernel mapping and the fixmap
1134 * share the top level pgd entry, which should only happen on
1135 * 16k/4 levels configurations.
1136 */
1137 BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
Will Deacon20a004e2018-02-15 11:14:56 +00001138 pudp = pud_offset_kimg(pgdp, addr);
Ard Biesheuvelf9040772016-02-16 13:52:40 +01001139 } else {
Will Deacon20a004e2018-02-15 11:14:56 +00001140 if (pgd_none(pgd))
1141 __pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
1142 pudp = fixmap_pud(addr);
Ard Biesheuvelf9040772016-02-16 13:52:40 +01001143 }
Will Deacon20a004e2018-02-15 11:14:56 +00001144 if (pud_none(READ_ONCE(*pudp)))
1145 __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
1146 pmdp = fixmap_pmd(addr);
1147 __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
Laura Abbottaf86e592014-11-21 21:50:42 +00001148
1149 /*
1150 * The boot-ioremap range spans multiple pmds, for which
Ard Biesheuvel157962f2016-02-16 13:52:38 +01001151 * we are not prepared:
Laura Abbottaf86e592014-11-21 21:50:42 +00001152 */
1153 BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
1154 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
1155
Will Deacon20a004e2018-02-15 11:14:56 +00001156 if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
1157 || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
Laura Abbottaf86e592014-11-21 21:50:42 +00001158 WARN_ON(1);
Will Deacon20a004e2018-02-15 11:14:56 +00001159 pr_warn("pmdp %p != %p, %p\n",
1160 pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
Laura Abbottaf86e592014-11-21 21:50:42 +00001161 fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
1162 pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
1163 fix_to_virt(FIX_BTMAP_BEGIN));
1164 pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
1165 fix_to_virt(FIX_BTMAP_END));
1166
1167 pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
1168 pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
1169 }
1170}
1171
James Morse18b4b272017-11-06 18:44:26 +00001172/*
1173 * Unusually, this is also called in IRQ context (ghes_iounmap_irq) so if we
1174 * ever need to use IPIs for TLB broadcasting, then we're in trouble here.
1175 */
Laura Abbottaf86e592014-11-21 21:50:42 +00001176void __set_fixmap(enum fixed_addresses idx,
1177 phys_addr_t phys, pgprot_t flags)
1178{
1179 unsigned long addr = __fix_to_virt(idx);
Will Deacon20a004e2018-02-15 11:14:56 +00001180 pte_t *ptep;
Laura Abbottaf86e592014-11-21 21:50:42 +00001181
Mark Rutlandb63dbef2015-03-04 13:27:35 +00001182 BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
Laura Abbottaf86e592014-11-21 21:50:42 +00001183
Will Deacon20a004e2018-02-15 11:14:56 +00001184 ptep = fixmap_pte(addr);
Laura Abbottaf86e592014-11-21 21:50:42 +00001185
1186 if (pgprot_val(flags)) {
Will Deacon20a004e2018-02-15 11:14:56 +00001187 set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
Laura Abbottaf86e592014-11-21 21:50:42 +00001188 } else {
Will Deacon20a004e2018-02-15 11:14:56 +00001189 pte_clear(&init_mm, addr, ptep);
Laura Abbottaf86e592014-11-21 21:50:42 +00001190 flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
1191 }
1192}
Ard Biesheuvel61bd93c2015-06-01 13:40:32 +02001193
Hsin-Yi Wange112b032019-08-23 14:24:50 +08001194void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
Ard Biesheuvel61bd93c2015-06-01 13:40:32 +02001195{
1196 const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
Ard Biesheuvelf80fb3a2016-01-26 14:12:01 +01001197 int offset;
Ard Biesheuvel61bd93c2015-06-01 13:40:32 +02001198 void *dt_virt;
1199
1200 /*
1201 * Check whether the physical FDT address is set and meets the minimum
1202 * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be
Ard Biesheuvel04a84812016-08-01 13:29:31 +02001203 * at least 8 bytes so that we can always access the magic and size
1204 * fields of the FDT header after mapping the first chunk, double check
1205 * here if that is indeed the case.
Ard Biesheuvel61bd93c2015-06-01 13:40:32 +02001206 */
1207 BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
1208 if (!dt_phys || dt_phys % MIN_FDT_ALIGN)
1209 return NULL;
1210
1211 /*
1212 * Make sure that the FDT region can be mapped without the need to
1213 * allocate additional translation table pages, so that it is safe
Laura Abbott132233a2016-02-05 16:24:46 -08001214 * to call create_mapping_noalloc() this early.
Ard Biesheuvel61bd93c2015-06-01 13:40:32 +02001215 *
1216 * On 64k pages, the FDT will be mapped using PTEs, so we need to
1217 * be in the same PMD as the rest of the fixmap.
1218 * On 4k pages, we'll use section mappings for the FDT so we only
1219 * have to be in the same PUD.
1220 */
1221 BUILD_BUG_ON(dt_virt_base % SZ_2M);
1222
Suzuki K. Pouloseb433dce2015-10-19 14:19:28 +01001223 BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
1224 __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
Ard Biesheuvel61bd93c2015-06-01 13:40:32 +02001225
Suzuki K. Pouloseb433dce2015-10-19 14:19:28 +01001226 offset = dt_phys % SWAPPER_BLOCK_SIZE;
Ard Biesheuvel61bd93c2015-06-01 13:40:32 +02001227 dt_virt = (void *)dt_virt_base + offset;
1228
1229 /* map the first chunk so we can read the size from the header */
Laura Abbott132233a2016-02-05 16:24:46 -08001230 create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
1231 dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
Ard Biesheuvel61bd93c2015-06-01 13:40:32 +02001232
Ard Biesheuvel04a84812016-08-01 13:29:31 +02001233 if (fdt_magic(dt_virt) != FDT_MAGIC)
Ard Biesheuvel61bd93c2015-06-01 13:40:32 +02001234 return NULL;
1235
Ard Biesheuvelf80fb3a2016-01-26 14:12:01 +01001236 *size = fdt_totalsize(dt_virt);
1237 if (*size > MAX_FDT_SIZE)
Ard Biesheuvel61bd93c2015-06-01 13:40:32 +02001238 return NULL;
1239
Ard Biesheuvelf80fb3a2016-01-26 14:12:01 +01001240 if (offset + *size > SWAPPER_BLOCK_SIZE)
Laura Abbott132233a2016-02-05 16:24:46 -08001241 create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
Ard Biesheuvelf80fb3a2016-01-26 14:12:01 +01001242 round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
1243
1244 return dt_virt;
1245}
1246
Anshuman Khandual0f472d02019-07-16 16:27:33 -07001247int __init arch_ioremap_p4d_supported(void)
1248{
1249 return 0;
1250}
1251
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001252int __init arch_ioremap_pud_supported(void)
1253{
Mark Rutland7ba36ec2019-05-14 14:30:06 +05301254 /*
1255 * Only 4k granule supports level 1 block mappings.
1256 * SW table walks can't handle removal of intermediate entries.
1257 */
1258 return IS_ENABLED(CONFIG_ARM64_4K_PAGES) &&
Steven Price102f45f2020-02-03 17:36:29 -08001259 !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001260}
1261
1262int __init arch_ioremap_pmd_supported(void)
1263{
Mark Rutland7ba36ec2019-05-14 14:30:06 +05301264 /* See arch_ioremap_pud_supported() */
Steven Price102f45f2020-02-03 17:36:29 -08001265 return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001266}
1267
Will Deacon20a004e2018-02-15 11:14:56 +00001268int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001269{
Anshuman Khandualf7f00972019-05-27 09:28:15 +05301270 pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot));
Will Deacon15122ee2018-02-21 12:59:27 +00001271
Laura Abbott82034c22018-05-23 11:43:46 -07001272 /* Only allow permission changes for now */
1273 if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
1274 pud_val(new_pud)))
Will Deacon15122ee2018-02-21 12:59:27 +00001275 return 0;
1276
Anshuman Khandual87dedf72019-05-27 12:33:29 +05301277 VM_BUG_ON(phys & ~PUD_MASK);
Laura Abbott82034c22018-05-23 11:43:46 -07001278 set_pud(pudp, new_pud);
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001279 return 1;
1280}
1281
Will Deacon20a004e2018-02-15 11:14:56 +00001282int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001283{
Anshuman Khandualf7f00972019-05-27 09:28:15 +05301284 pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot));
Will Deacon15122ee2018-02-21 12:59:27 +00001285
Laura Abbott82034c22018-05-23 11:43:46 -07001286 /* Only allow permission changes for now */
1287 if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
1288 pmd_val(new_pmd)))
Will Deacon15122ee2018-02-21 12:59:27 +00001289 return 0;
1290
Anshuman Khandual87dedf72019-05-27 12:33:29 +05301291 VM_BUG_ON(phys & ~PMD_MASK);
Laura Abbott82034c22018-05-23 11:43:46 -07001292 set_pmd(pmdp, new_pmd);
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001293 return 1;
1294}
1295
Will Deacon20a004e2018-02-15 11:14:56 +00001296int pud_clear_huge(pud_t *pudp)
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001297{
Will Deacon20a004e2018-02-15 11:14:56 +00001298 if (!pud_sect(READ_ONCE(*pudp)))
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001299 return 0;
Will Deacon20a004e2018-02-15 11:14:56 +00001300 pud_clear(pudp);
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001301 return 1;
1302}
1303
Will Deacon20a004e2018-02-15 11:14:56 +00001304int pmd_clear_huge(pmd_t *pmdp)
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001305{
Will Deacon20a004e2018-02-15 11:14:56 +00001306 if (!pmd_sect(READ_ONCE(*pmdp)))
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001307 return 0;
Will Deacon20a004e2018-02-15 11:14:56 +00001308 pmd_clear(pmdp);
Ard Biesheuvel324420b2016-02-16 13:52:35 +01001309 return 1;
1310}
Toshi Kanib6bdb752018-03-22 16:17:20 -07001311
Chintan Pandyaec28bb92018-06-06 12:31:21 +05301312int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
Toshi Kanib6bdb752018-03-22 16:17:20 -07001313{
Chintan Pandyaec28bb92018-06-06 12:31:21 +05301314 pte_t *table;
1315 pmd_t pmd;
1316
1317 pmd = READ_ONCE(*pmdp);
1318
Mark Rutlandfac880c2018-09-05 17:38:57 +01001319 if (!pmd_table(pmd)) {
Will Deacon9c006972018-12-28 00:37:42 -08001320 VM_WARN_ON(1);
Chintan Pandyaec28bb92018-06-06 12:31:21 +05301321 return 1;
1322 }
1323
1324 table = pte_offset_kernel(pmdp, addr);
1325 pmd_clear(pmdp);
1326 __flush_tlb_kernel_pgtable(addr);
1327 pte_free_kernel(NULL, table);
1328 return 1;
Toshi Kanib6bdb752018-03-22 16:17:20 -07001329}
1330
Chintan Pandyaec28bb92018-06-06 12:31:21 +05301331int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
Toshi Kanib6bdb752018-03-22 16:17:20 -07001332{
Chintan Pandyaec28bb92018-06-06 12:31:21 +05301333 pmd_t *table;
1334 pmd_t *pmdp;
1335 pud_t pud;
1336 unsigned long next, end;
1337
1338 pud = READ_ONCE(*pudp);
1339
Mark Rutlandfac880c2018-09-05 17:38:57 +01001340 if (!pud_table(pud)) {
Will Deacon9c006972018-12-28 00:37:42 -08001341 VM_WARN_ON(1);
Chintan Pandyaec28bb92018-06-06 12:31:21 +05301342 return 1;
1343 }
1344
1345 table = pmd_offset(pudp, addr);
1346 pmdp = table;
1347 next = addr;
1348 end = addr + PUD_SIZE;
1349 do {
1350 pmd_free_pte_page(pmdp, next);
1351 } while (pmdp++, next += PMD_SIZE, next != end);
1352
1353 pud_clear(pudp);
1354 __flush_tlb_kernel_pgtable(addr);
1355 pmd_free(NULL, table);
1356 return 1;
Toshi Kanib6bdb752018-03-22 16:17:20 -07001357}
Robin Murphy4ab21502018-12-11 18:48:48 +00001358
Will Deacon8e2d4342018-12-28 00:37:53 -08001359int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
1360{
1361 return 0; /* Don't attempt a block mapping */
1362}
1363
Robin Murphy4ab21502018-12-11 18:48:48 +00001364#ifdef CONFIG_MEMORY_HOTPLUG
Anshuman Khandualbbd6ec62020-03-04 09:58:43 +05301365static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
1366{
1367 unsigned long end = start + size;
1368
1369 WARN_ON(pgdir != init_mm.pgd);
1370 WARN_ON((start < PAGE_OFFSET) || (end > PAGE_END));
1371
1372 unmap_hotplug_range(start, end, false);
1373 free_empty_tables(start, end, PAGE_OFFSET, PAGE_END);
1374}
1375
Michal Hocko940519f2019-05-13 17:21:26 -07001376int arch_add_memory(int nid, u64 start, u64 size,
1377 struct mhp_restrictions *restrictions)
Robin Murphy4ab21502018-12-11 18:48:48 +00001378{
Anshuman Khandualbbd6ec62020-03-04 09:58:43 +05301379 int ret, flags = 0;
Robin Murphy4ab21502018-12-11 18:48:48 +00001380
1381 if (rodata_full || debug_pagealloc_enabled())
1382 flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
1383
1384 __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
Will Deacon475ba3f2019-04-08 11:23:48 +01001385 size, PAGE_KERNEL, __pgd_pgtable_alloc, flags);
Robin Murphy4ab21502018-12-11 18:48:48 +00001386
Dan Williams16993c02019-11-06 17:43:21 -08001387 memblock_clear_nomap(start, size);
1388
Anshuman Khandualbbd6ec62020-03-04 09:58:43 +05301389 ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
Michal Hocko940519f2019-05-13 17:21:26 -07001390 restrictions);
Anshuman Khandualbbd6ec62020-03-04 09:58:43 +05301391 if (ret)
1392 __remove_pgd_mapping(swapper_pg_dir,
1393 __phys_to_virt(start), size);
1394 return ret;
Robin Murphy4ab21502018-12-11 18:48:48 +00001395}
Anshuman Khandualbbd6ec62020-03-04 09:58:43 +05301396
David Hildenbrand22eb6342019-07-18 15:56:41 -07001397void arch_remove_memory(int nid, u64 start, u64 size,
1398 struct vmem_altmap *altmap)
1399{
1400 unsigned long start_pfn = start >> PAGE_SHIFT;
1401 unsigned long nr_pages = size >> PAGE_SHIFT;
David Hildenbrand22eb6342019-07-18 15:56:41 -07001402
David Hildenbrandfeee6b22020-01-04 12:59:33 -08001403 __remove_pages(start_pfn, nr_pages, altmap);
Anshuman Khandualbbd6ec62020-03-04 09:58:43 +05301404 __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size);
David Hildenbrand22eb6342019-07-18 15:56:41 -07001405}
Anshuman Khandualbbd6ec62020-03-04 09:58:43 +05301406
1407/*
1408 * This memory hotplug notifier helps prevent boot memory from being
1409 * inadvertently removed as it blocks pfn range offlining process in
1410 * __offline_pages(). Hence this prevents both offlining as well as
1411 * removal process for boot memory which is initially always online.
1412 * In future if and when boot memory could be removed, this notifier
1413 * should be dropped and free_hotplug_page_range() should handle any
1414 * reserved pages allocated during boot.
1415 */
1416static int prevent_bootmem_remove_notifier(struct notifier_block *nb,
1417 unsigned long action, void *data)
1418{
1419 struct mem_section *ms;
1420 struct memory_notify *arg = data;
1421 unsigned long end_pfn = arg->start_pfn + arg->nr_pages;
1422 unsigned long pfn = arg->start_pfn;
1423
1424 if (action != MEM_GOING_OFFLINE)
1425 return NOTIFY_OK;
1426
1427 for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
1428 ms = __pfn_to_section(pfn);
1429 if (early_section(ms))
1430 return NOTIFY_BAD;
1431 }
1432 return NOTIFY_OK;
1433}
1434
1435static struct notifier_block prevent_bootmem_remove_nb = {
1436 .notifier_call = prevent_bootmem_remove_notifier,
1437};
1438
1439static int __init prevent_bootmem_remove_init(void)
1440{
1441 return register_memory_notifier(&prevent_bootmem_remove_nb);
1442}
1443device_initcall(prevent_bootmem_remove_init);
David Hildenbrand22eb6342019-07-18 15:56:41 -07001444#endif