Thomas Gleixner | 2874c5f | 2019-05-27 08:55:01 +0200 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 2 | /* |
| 3 | * PowerPC version |
| 4 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) |
| 5 | * |
| 6 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) |
| 7 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) |
| 8 | * Copyright (C) 1996 Paul Mackerras |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 9 | * |
| 10 | * Derived from "arch/i386/mm/init.c" |
| 11 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds |
| 12 | * |
| 13 | * Dave Engebretsen <engebret@us.ibm.com> |
| 14 | * Rework for PPC64 port. |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 15 | */ |
| 16 | |
Benjamin Herrenschmidt | cec08e7 | 2008-04-30 15:41:48 +1000 | [diff] [blame] | 17 | #undef DEBUG |
| 18 | |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 19 | #include <linux/signal.h> |
| 20 | #include <linux/sched.h> |
| 21 | #include <linux/kernel.h> |
| 22 | #include <linux/errno.h> |
| 23 | #include <linux/string.h> |
| 24 | #include <linux/types.h> |
| 25 | #include <linux/mman.h> |
| 26 | #include <linux/mm.h> |
| 27 | #include <linux/swap.h> |
| 28 | #include <linux/stddef.h> |
| 29 | #include <linux/vmalloc.h> |
| 30 | #include <linux/init.h> |
| 31 | #include <linux/delay.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 32 | #include <linux/highmem.h> |
| 33 | #include <linux/idr.h> |
| 34 | #include <linux/nodemask.h> |
| 35 | #include <linux/module.h> |
Randy Dunlap | c9cf552 | 2006-06-27 02:53:52 -0700 | [diff] [blame] | 36 | #include <linux/poison.h> |
Yinghai Lu | 95f72d1 | 2010-07-12 14:36:09 +1000 | [diff] [blame] | 37 | #include <linux/memblock.h> |
David Gibson | a4fe3ce | 2009-10-26 19:24:31 +0000 | [diff] [blame] | 38 | #include <linux/hugetlb.h> |
Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 39 | #include <linux/slab.h> |
Paul Mackerras | 18569c1 | 2017-01-30 21:21:34 +1100 | [diff] [blame] | 40 | #include <linux/of_fdt.h> |
| 41 | #include <linux/libfdt.h> |
Oliver O'Halloran | b584c25 | 2017-06-28 11:32:33 +1000 | [diff] [blame] | 42 | #include <linux/memremap.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 43 | |
| 44 | #include <asm/pgalloc.h> |
| 45 | #include <asm/page.h> |
| 46 | #include <asm/prom.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 47 | #include <asm/rtas.h> |
| 48 | #include <asm/io.h> |
| 49 | #include <asm/mmu_context.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 50 | #include <asm/mmu.h> |
Linus Torvalds | 7c0f6ba | 2016-12-24 11:46:01 -0800 | [diff] [blame] | 51 | #include <linux/uaccess.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 52 | #include <asm/smp.h> |
| 53 | #include <asm/machdep.h> |
| 54 | #include <asm/tlb.h> |
| 55 | #include <asm/eeh.h> |
| 56 | #include <asm/processor.h> |
| 57 | #include <asm/mmzone.h> |
| 58 | #include <asm/cputable.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 59 | #include <asm/sections.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 60 | #include <asm/iommu.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 61 | #include <asm/vdso.h> |
David Gibson | 800fc3e | 2005-11-16 15:43:48 +1100 | [diff] [blame] | 62 | |
Christophe Leroy | 9d9f2cc | 2019-03-29 09:59:59 +0000 | [diff] [blame] | 63 | #include <mm/mmu_decl.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 64 | |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 65 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
| 66 | /* |
Aneesh Kumar K.V | 5f5d6e4 | 2019-09-17 18:08:51 +0530 | [diff] [blame] | 67 | * Given an address within the vmemmap, determine the page that |
| 68 | * represents the start of the subsection it is within. Note that we have to |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 69 | * do this by hand as the proffered address may not be correctly aligned. |
| 70 | * Subtraction of non-aligned pointers produces undefined results. |
| 71 | */ |
Aneesh Kumar K.V | 5f5d6e4 | 2019-09-17 18:08:51 +0530 | [diff] [blame] | 72 | static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_addr) |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 73 | { |
Aneesh Kumar K.V | 5f5d6e4 | 2019-09-17 18:08:51 +0530 | [diff] [blame] | 74 | unsigned long start_pfn; |
| 75 | unsigned long offset = vmemmap_addr - ((unsigned long)(vmemmap)); |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 76 | |
| 77 | /* Return the pfn of the start of the section. */ |
Aneesh Kumar K.V | 5f5d6e4 | 2019-09-17 18:08:51 +0530 | [diff] [blame] | 78 | start_pfn = (offset / sizeof(struct page)) & PAGE_SUBSECTION_MASK; |
| 79 | return pfn_to_page(start_pfn); |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 80 | } |
| 81 | |
| 82 | /* |
Aneesh Kumar K.V | 5f5d6e4 | 2019-09-17 18:08:51 +0530 | [diff] [blame] | 83 | * Since memory is added in sub-section chunks, before creating a new vmemmap |
| 84 | * mapping, the kernel should check whether there is an existing memmap mapping |
| 85 | * covering the new subsection added. This is needed because kernel can map |
| 86 | * vmemmap area using 16MB pages which will cover a memory range of 16G. Such |
| 87 | * a range covers multiple subsections (2M) |
| 88 | * |
| 89 | * If any subsection in the 16G range mapped by vmemmap is valid we consider the |
| 90 | * vmemmap populated (There is a page table entry already present). We can't do |
| 91 | * a page table lookup here because with the hash translation we don't keep |
| 92 | * vmemmap details in linux page table. |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 93 | */ |
Aneesh Kumar K.V | 5f5d6e4 | 2019-09-17 18:08:51 +0530 | [diff] [blame] | 94 | static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size) |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 95 | { |
Aneesh Kumar K.V | 5f5d6e4 | 2019-09-17 18:08:51 +0530 | [diff] [blame] | 96 | struct page *start; |
| 97 | unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size; |
| 98 | start = vmemmap_subsection_start(vmemmap_addr); |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 99 | |
Aneesh Kumar K.V | 5f5d6e4 | 2019-09-17 18:08:51 +0530 | [diff] [blame] | 100 | for (; (unsigned long)start < vmemmap_end; start += PAGES_PER_SUBSECTION) |
| 101 | /* |
| 102 | * pfn valid check here is intended to really check |
| 103 | * whether we have any subsection already initialized |
| 104 | * in this range. |
| 105 | */ |
| 106 | if (pfn_valid(page_to_pfn(start))) |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 107 | return 1; |
| 108 | |
| 109 | return 0; |
| 110 | } |
| 111 | |
Anshuman Khandual | 39e4675 | 2017-04-06 19:44:50 +0530 | [diff] [blame] | 112 | /* |
| 113 | * vmemmap virtual address space management does not have a traditonal page |
| 114 | * table to track which virtual struct pages are backed by physical mapping. |
| 115 | * The virtual to physical mappings are tracked in a simple linked list |
| 116 | * format. 'vmemmap_list' maintains the entire vmemmap physical mapping at |
| 117 | * all times where as the 'next' list maintains the available |
| 118 | * vmemmap_backing structures which have been deleted from the |
| 119 | * 'vmemmap_global' list during system runtime (memory hotplug remove |
| 120 | * operation). The freed 'vmemmap_backing' structures are reused later when |
| 121 | * new requests come in without allocating fresh memory. This pointer also |
| 122 | * tracks the allocated 'vmemmap_backing' structures as we allocate one |
| 123 | * full page memory at a time when we dont have any. |
| 124 | */ |
Mark Nelson | 91eea67 | 2010-04-21 16:21:03 +0000 | [diff] [blame] | 125 | struct vmemmap_backing *vmemmap_list; |
Li Zhong | bd8cb03 | 2014-06-11 16:23:36 +0800 | [diff] [blame] | 126 | static struct vmemmap_backing *next; |
Anshuman Khandual | 39e4675 | 2017-04-06 19:44:50 +0530 | [diff] [blame] | 127 | |
| 128 | /* |
| 129 | * The same pointer 'next' tracks individual chunks inside the allocated |
| 130 | * full page during the boot time and again tracks the freeed nodes during |
| 131 | * runtime. It is racy but it does not happen as they are separated by the |
| 132 | * boot process. Will create problem if some how we have memory hotplug |
| 133 | * operation during boot !! |
| 134 | */ |
Li Zhong | bd8cb03 | 2014-06-11 16:23:36 +0800 | [diff] [blame] | 135 | static int num_left; |
| 136 | static int num_freed; |
Mark Nelson | 91eea67 | 2010-04-21 16:21:03 +0000 | [diff] [blame] | 137 | |
| 138 | static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) |
| 139 | { |
Li Zhong | bd8cb03 | 2014-06-11 16:23:36 +0800 | [diff] [blame] | 140 | struct vmemmap_backing *vmem_back; |
| 141 | /* get from freed entries first */ |
| 142 | if (num_freed) { |
| 143 | num_freed--; |
| 144 | vmem_back = next; |
| 145 | next = next->list; |
| 146 | |
| 147 | return vmem_back; |
| 148 | } |
Mark Nelson | 91eea67 | 2010-04-21 16:21:03 +0000 | [diff] [blame] | 149 | |
| 150 | /* allocate a page when required and hand out chunks */ |
Li Zhong | bd8cb03 | 2014-06-11 16:23:36 +0800 | [diff] [blame] | 151 | if (!num_left) { |
Mark Nelson | 91eea67 | 2010-04-21 16:21:03 +0000 | [diff] [blame] | 152 | next = vmemmap_alloc_block(PAGE_SIZE, node); |
| 153 | if (unlikely(!next)) { |
| 154 | WARN_ON(1); |
| 155 | return NULL; |
| 156 | } |
| 157 | num_left = PAGE_SIZE / sizeof(struct vmemmap_backing); |
| 158 | } |
| 159 | |
| 160 | num_left--; |
| 161 | |
| 162 | return next++; |
| 163 | } |
| 164 | |
Aneesh Kumar K.V | ccaea15 | 2020-07-31 17:04:59 +0530 | [diff] [blame] | 165 | static __meminit int vmemmap_list_populate(unsigned long phys, |
| 166 | unsigned long start, |
| 167 | int node) |
Mark Nelson | 91eea67 | 2010-04-21 16:21:03 +0000 | [diff] [blame] | 168 | { |
| 169 | struct vmemmap_backing *vmem_back; |
| 170 | |
| 171 | vmem_back = vmemmap_list_alloc(node); |
| 172 | if (unlikely(!vmem_back)) { |
Aneesh Kumar K.V | ccaea15 | 2020-07-31 17:04:59 +0530 | [diff] [blame] | 173 | pr_debug("vmemap list allocation failed\n"); |
| 174 | return -ENOMEM; |
Mark Nelson | 91eea67 | 2010-04-21 16:21:03 +0000 | [diff] [blame] | 175 | } |
| 176 | |
| 177 | vmem_back->phys = phys; |
| 178 | vmem_back->virt_addr = start; |
| 179 | vmem_back->list = vmemmap_list; |
| 180 | |
| 181 | vmemmap_list = vmem_back; |
Aneesh Kumar K.V | ccaea15 | 2020-07-31 17:04:59 +0530 | [diff] [blame] | 182 | return 0; |
Mark Nelson | 91eea67 | 2010-04-21 16:21:03 +0000 | [diff] [blame] | 183 | } |
| 184 | |
Aneesh Kumar K.V | cf387d9 | 2019-09-10 11:58:25 +0530 | [diff] [blame] | 185 | static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start, |
| 186 | unsigned long page_size) |
| 187 | { |
| 188 | unsigned long nr_pfn = page_size / sizeof(struct page); |
| 189 | unsigned long start_pfn = page_to_pfn((struct page *)start); |
| 190 | |
| 191 | if ((start_pfn + nr_pfn) > altmap->end_pfn) |
| 192 | return true; |
| 193 | |
| 194 | if (start_pfn < altmap->base_pfn) |
| 195 | return true; |
| 196 | |
| 197 | return false; |
| 198 | } |
| 199 | |
Christoph Hellwig | 7b73d97 | 2017-12-29 08:53:54 +0100 | [diff] [blame] | 200 | int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, |
| 201 | struct vmem_altmap *altmap) |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 202 | { |
Aneesh Kumar K.V | ccaea15 | 2020-07-31 17:04:59 +0530 | [diff] [blame] | 203 | bool altmap_alloc; |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 204 | unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; |
| 205 | |
| 206 | /* Align to the page size of the linear mapping. */ |
Christophe Leroy | e96d904 | 2020-04-20 18:36:35 +0000 | [diff] [blame] | 207 | start = ALIGN_DOWN(start, page_size); |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 208 | |
| 209 | pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); |
| 210 | |
| 211 | for (; start < end; start += page_size) { |
Oliver O'Halloran | 9ef3463 | 2018-12-07 02:17:14 +1100 | [diff] [blame] | 212 | void *p = NULL; |
David Gibson | 1dace6c | 2016-02-09 13:32:42 +1000 | [diff] [blame] | 213 | int rc; |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 214 | |
Aneesh Kumar K.V | 5f5d6e4 | 2019-09-17 18:08:51 +0530 | [diff] [blame] | 215 | /* |
| 216 | * This vmemmap range is backing different subsections. If any |
| 217 | * of that subsection is marked valid, that means we already |
| 218 | * have initialized a page table covering this range and hence |
| 219 | * the vmemmap range is populated. |
| 220 | */ |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 221 | if (vmemmap_populated(start, page_size)) |
| 222 | continue; |
| 223 | |
Oliver O'Halloran | 9ef3463 | 2018-12-07 02:17:14 +1100 | [diff] [blame] | 224 | /* |
| 225 | * Allocate from the altmap first if we have one. This may |
| 226 | * fail due to alignment issues when using 16MB hugepages, so |
| 227 | * fall back to system memory if the altmap allocation fail. |
| 228 | */ |
Aneesh Kumar K.V | cf387d9 | 2019-09-10 11:58:25 +0530 | [diff] [blame] | 229 | if (altmap && !altmap_cross_boundary(altmap, start, page_size)) { |
Anshuman Khandual | 56993b4 | 2020-08-06 23:23:24 -0700 | [diff] [blame] | 230 | p = vmemmap_alloc_block_buf(page_size, node, altmap); |
Aneesh Kumar K.V | c0b1b23 | 2019-07-01 20:03:38 +0530 | [diff] [blame] | 231 | if (!p) |
| 232 | pr_debug("altmap block allocation failed, falling back to system memory"); |
Aneesh Kumar K.V | ccaea15 | 2020-07-31 17:04:59 +0530 | [diff] [blame] | 233 | else |
| 234 | altmap_alloc = true; |
Aneesh Kumar K.V | c0b1b23 | 2019-07-01 20:03:38 +0530 | [diff] [blame] | 235 | } |
Aneesh Kumar K.V | ccaea15 | 2020-07-31 17:04:59 +0530 | [diff] [blame] | 236 | if (!p) { |
Anshuman Khandual | 56993b4 | 2020-08-06 23:23:24 -0700 | [diff] [blame] | 237 | p = vmemmap_alloc_block_buf(page_size, node, NULL); |
Aneesh Kumar K.V | ccaea15 | 2020-07-31 17:04:59 +0530 | [diff] [blame] | 238 | altmap_alloc = false; |
| 239 | } |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 240 | if (!p) |
| 241 | return -ENOMEM; |
| 242 | |
Aneesh Kumar K.V | ccaea15 | 2020-07-31 17:04:59 +0530 | [diff] [blame] | 243 | if (vmemmap_list_populate(__pa(p), start, node)) { |
| 244 | /* |
| 245 | * If we don't populate vmemap list, we don't have |
| 246 | * the ability to free the allocated vmemmap |
| 247 | * pages in section_deactivate. Hence free them |
| 248 | * here. |
| 249 | */ |
| 250 | int nr_pfns = page_size >> PAGE_SHIFT; |
| 251 | unsigned long page_order = get_order(page_size); |
| 252 | |
| 253 | if (altmap_alloc) |
| 254 | vmem_altmap_free(altmap, nr_pfns); |
| 255 | else |
| 256 | free_pages((unsigned long)p, page_order); |
| 257 | return -ENOMEM; |
| 258 | } |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 259 | |
| 260 | pr_debug(" * %016lx..%016lx allocated at %p\n", |
| 261 | start, start + page_size, p); |
| 262 | |
David Gibson | 1dace6c | 2016-02-09 13:32:42 +1000 | [diff] [blame] | 263 | rc = vmemmap_create_mapping(start, page_size, __pa(p)); |
| 264 | if (rc < 0) { |
Joe Perches | f2c2cbc | 2016-10-24 21:00:08 -0700 | [diff] [blame] | 265 | pr_warn("%s: Unable to create vmemmap mapping: %d\n", |
| 266 | __func__, rc); |
David Gibson | 1dace6c | 2016-02-09 13:32:42 +1000 | [diff] [blame] | 267 | return -EFAULT; |
| 268 | } |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 269 | } |
| 270 | |
| 271 | return 0; |
| 272 | } |
| 273 | |
| 274 | #ifdef CONFIG_MEMORY_HOTPLUG |
Li Zhong | bd8cb03 | 2014-06-11 16:23:36 +0800 | [diff] [blame] | 275 | static unsigned long vmemmap_list_free(unsigned long start) |
| 276 | { |
| 277 | struct vmemmap_backing *vmem_back, *vmem_back_prev; |
| 278 | |
| 279 | vmem_back_prev = vmem_back = vmemmap_list; |
| 280 | |
| 281 | /* look for it with prev pointer recorded */ |
| 282 | for (; vmem_back; vmem_back = vmem_back->list) { |
| 283 | if (vmem_back->virt_addr == start) |
| 284 | break; |
| 285 | vmem_back_prev = vmem_back; |
| 286 | } |
| 287 | |
Aneesh Kumar K.V | 1c0a7ac | 2020-07-31 17:05:00 +0530 | [diff] [blame] | 288 | if (unlikely(!vmem_back)) |
Li Zhong | bd8cb03 | 2014-06-11 16:23:36 +0800 | [diff] [blame] | 289 | return 0; |
Li Zhong | bd8cb03 | 2014-06-11 16:23:36 +0800 | [diff] [blame] | 290 | |
| 291 | /* remove it from vmemmap_list */ |
| 292 | if (vmem_back == vmemmap_list) /* remove head */ |
| 293 | vmemmap_list = vmem_back->list; |
| 294 | else |
| 295 | vmem_back_prev->list = vmem_back->list; |
| 296 | |
| 297 | /* next point to this freed entry */ |
| 298 | vmem_back->list = next; |
| 299 | next = vmem_back; |
| 300 | num_freed++; |
| 301 | |
| 302 | return vmem_back->phys; |
| 303 | } |
| 304 | |
Christoph Hellwig | 24b6d41 | 2017-12-29 08:53:56 +0100 | [diff] [blame] | 305 | void __ref vmemmap_free(unsigned long start, unsigned long end, |
| 306 | struct vmem_altmap *altmap) |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 307 | { |
Benjamin Herrenschmidt | cec08e7 | 2008-04-30 15:41:48 +1000 | [diff] [blame] | 308 | unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; |
Oliver O'Halloran | d7d9b61 | 2017-06-28 11:32:32 +1000 | [diff] [blame] | 309 | unsigned long page_order = get_order(page_size); |
Oliver O'Halloran | 9ef3463 | 2018-12-07 02:17:14 +1100 | [diff] [blame] | 310 | unsigned long alt_start = ~0, alt_end = ~0; |
| 311 | unsigned long base_pfn; |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 312 | |
Christophe Leroy | e96d904 | 2020-04-20 18:36:35 +0000 | [diff] [blame] | 313 | start = ALIGN_DOWN(start, page_size); |
Oliver O'Halloran | 9ef3463 | 2018-12-07 02:17:14 +1100 | [diff] [blame] | 314 | if (altmap) { |
| 315 | alt_start = altmap->base_pfn; |
| 316 | alt_end = altmap->base_pfn + altmap->reserve + |
| 317 | altmap->free + altmap->alloc + altmap->align; |
| 318 | } |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 319 | |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 320 | pr_debug("vmemmap_free %lx...%lx\n", start, end); |
Benjamin Herrenschmidt | 32a7494 | 2009-07-23 23:15:58 +0000 | [diff] [blame] | 321 | |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 322 | for (; start < end; start += page_size) { |
Oliver O'Halloran | d7d9b61 | 2017-06-28 11:32:32 +1000 | [diff] [blame] | 323 | unsigned long nr_pages, addr; |
| 324 | struct page *page; |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 325 | |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 326 | /* |
Aneesh Kumar K.V | 5f5d6e4 | 2019-09-17 18:08:51 +0530 | [diff] [blame] | 327 | * We have already marked the subsection we are trying to remove |
| 328 | * invalid. So if we want to remove the vmemmap range, we |
| 329 | * need to make sure there is no subsection marked valid |
| 330 | * in this range. |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 331 | */ |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 332 | if (vmemmap_populated(start, page_size)) |
| 333 | continue; |
| 334 | |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 335 | addr = vmemmap_list_free(start); |
Oliver O'Halloran | d7d9b61 | 2017-06-28 11:32:32 +1000 | [diff] [blame] | 336 | if (!addr) |
| 337 | continue; |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 338 | |
Oliver O'Halloran | d7d9b61 | 2017-06-28 11:32:32 +1000 | [diff] [blame] | 339 | page = pfn_to_page(addr >> PAGE_SHIFT); |
| 340 | nr_pages = 1 << page_order; |
Oliver O'Halloran | 9ef3463 | 2018-12-07 02:17:14 +1100 | [diff] [blame] | 341 | base_pfn = PHYS_PFN(addr); |
Mark Nelson | 91eea67 | 2010-04-21 16:21:03 +0000 | [diff] [blame] | 342 | |
Oliver O'Halloran | 9ef3463 | 2018-12-07 02:17:14 +1100 | [diff] [blame] | 343 | if (base_pfn >= alt_start && base_pfn < alt_end) { |
Oliver O'Halloran | b584c25 | 2017-06-28 11:32:33 +1000 | [diff] [blame] | 344 | vmem_altmap_free(altmap, nr_pages); |
| 345 | } else if (PageReserved(page)) { |
Oliver O'Halloran | d7d9b61 | 2017-06-28 11:32:32 +1000 | [diff] [blame] | 346 | /* allocated from bootmem */ |
| 347 | if (page_size < PAGE_SIZE) { |
| 348 | /* |
| 349 | * this shouldn't happen, but if it is |
| 350 | * the case, leave the memory there |
| 351 | */ |
| 352 | WARN_ON_ONCE(1); |
| 353 | } else { |
| 354 | while (nr_pages--) |
| 355 | free_reserved_page(page++); |
| 356 | } |
| 357 | } else { |
| 358 | free_pages((unsigned long)(__va(addr)), page_order); |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 359 | } |
Oliver O'Halloran | d7d9b61 | 2017-06-28 11:32:32 +1000 | [diff] [blame] | 360 | |
| 361 | vmemmap_remove_mapping(start, page_size); |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 362 | } |
Andy Whitcroft | d29eff7bc | 2007-10-16 01:24:17 -0700 | [diff] [blame] | 363 | } |
Li Zhong | 71b0bfe | 2014-06-11 16:23:38 +0800 | [diff] [blame] | 364 | #endif |
Nathan Fontenot | f7e3334 | 2013-09-27 10:18:09 -0500 | [diff] [blame] | 365 | void register_page_bootmem_memmap(unsigned long section_nr, |
| 366 | struct page *start_page, unsigned long size) |
| 367 | { |
| 368 | } |
Benjamin Herrenschmidt | cd3db0c | 2010-07-06 15:39:02 -0700 | [diff] [blame] | 369 | |
Aneesh Kumar K.V | 7e7dc66 | 2017-06-28 11:39:28 +0530 | [diff] [blame] | 370 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
Michael Ellerman | 1a01dc8 | 2016-07-26 20:09:30 +1000 | [diff] [blame] | 371 | |
Michael Ellerman | 4e00374 | 2017-10-19 15:08:43 +1100 | [diff] [blame] | 372 | #ifdef CONFIG_PPC_BOOK3S_64 |
Michael Ellerman | 1fd6c02 | 2017-10-24 17:48:49 +0200 | [diff] [blame] | 373 | static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); |
| 374 | |
Michael Ellerman | c610ec6 | 2016-07-26 21:29:30 +1000 | [diff] [blame] | 375 | static int __init parse_disable_radix(char *p) |
| 376 | { |
Michael Ellerman | 1fd6c02 | 2017-10-24 17:48:49 +0200 | [diff] [blame] | 377 | bool val; |
| 378 | |
Aneesh Kumar K.V | cec4e9b | 2018-03-30 17:39:01 +0530 | [diff] [blame] | 379 | if (!p) |
Michael Ellerman | 1fd6c02 | 2017-10-24 17:48:49 +0200 | [diff] [blame] | 380 | val = true; |
| 381 | else if (kstrtobool(p, &val)) |
| 382 | return -EINVAL; |
| 383 | |
| 384 | disable_radix = val; |
| 385 | |
Michael Ellerman | c610ec6 | 2016-07-26 21:29:30 +1000 | [diff] [blame] | 386 | return 0; |
| 387 | } |
| 388 | early_param("disable_radix", parse_disable_radix); |
| 389 | |
Paul Mackerras | 18569c1 | 2017-01-30 21:21:34 +1100 | [diff] [blame] | 390 | /* |
Paul Mackerras | cc3d294 | 2017-01-30 21:21:36 +1100 | [diff] [blame] | 391 | * If we're running under a hypervisor, we need to check the contents of |
| 392 | * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do |
| 393 | * radix. If not, we clear the radix feature bit so we fall back to hash. |
Paul Mackerras | 18569c1 | 2017-01-30 21:21:34 +1100 | [diff] [blame] | 394 | */ |
Michael Ellerman | 7559952 | 2017-08-08 21:44:08 +1000 | [diff] [blame] | 395 | static void __init early_check_vec5(void) |
Paul Mackerras | 18569c1 | 2017-01-30 21:21:34 +1100 | [diff] [blame] | 396 | { |
| 397 | unsigned long root, chosen; |
| 398 | int size; |
| 399 | const u8 *vec5; |
Suraj Jitindar Singh | 014d02c | 2017-02-28 17:03:48 +1100 | [diff] [blame] | 400 | u8 mmu_supported; |
Paul Mackerras | 18569c1 | 2017-01-30 21:21:34 +1100 | [diff] [blame] | 401 | |
| 402 | root = of_get_flat_dt_root(); |
| 403 | chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); |
Suraj Jitindar Singh | 014d02c | 2017-02-28 17:03:48 +1100 | [diff] [blame] | 404 | if (chosen == -FDT_ERR_NOTFOUND) { |
Paul Mackerras | cc3d294 | 2017-01-30 21:21:36 +1100 | [diff] [blame] | 405 | cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; |
Suraj Jitindar Singh | 014d02c | 2017-02-28 17:03:48 +1100 | [diff] [blame] | 406 | return; |
| 407 | } |
| 408 | vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); |
| 409 | if (!vec5) { |
| 410 | cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; |
| 411 | return; |
| 412 | } |
| 413 | if (size <= OV5_INDX(OV5_MMU_SUPPORT)) { |
| 414 | cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; |
| 415 | return; |
| 416 | } |
| 417 | |
| 418 | /* Check for supported configuration */ |
| 419 | mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] & |
| 420 | OV5_FEAT(OV5_MMU_SUPPORT); |
| 421 | if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) { |
| 422 | /* Hypervisor only supports radix - check enabled && GTSE */ |
| 423 | if (!early_radix_enabled()) { |
| 424 | pr_warn("WARNING: Ignoring cmdline option disable_radix\n"); |
| 425 | } |
| 426 | if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & |
| 427 | OV5_FEAT(OV5_RADIX_GTSE))) { |
Bharata B Rao | 029ab30 | 2020-07-03 11:06:06 +0530 | [diff] [blame] | 428 | cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE; |
| 429 | } else |
| 430 | cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; |
Suraj Jitindar Singh | 014d02c | 2017-02-28 17:03:48 +1100 | [diff] [blame] | 431 | /* Do radix anyway - the hypervisor said we had to */ |
| 432 | cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; |
| 433 | } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { |
| 434 | /* Hypervisor only supports hash - disable radix */ |
| 435 | cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; |
Bharata B Rao | 029ab30 | 2020-07-03 11:06:06 +0530 | [diff] [blame] | 436 | cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE; |
Suraj Jitindar Singh | 014d02c | 2017-02-28 17:03:48 +1100 | [diff] [blame] | 437 | } |
Paul Mackerras | 18569c1 | 2017-01-30 21:21:34 +1100 | [diff] [blame] | 438 | } |
| 439 | |
Michael Ellerman | 1a01dc8 | 2016-07-26 20:09:30 +1000 | [diff] [blame] | 440 | void __init mmu_early_init_devtree(void) |
| 441 | { |
Michael Ellerman | c610ec6 | 2016-07-26 21:29:30 +1000 | [diff] [blame] | 442 | /* Disable radix mode based on kernel command line. */ |
Paul Mackerras | fc36a90 | 2017-03-21 12:38:02 +1100 | [diff] [blame] | 443 | if (disable_radix) |
Aneesh Kumar K.V | 5a25b6f | 2016-07-27 13:19:01 +1000 | [diff] [blame] | 444 | cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; |
Michael Ellerman | bacf9cf | 2016-07-26 21:31:59 +1000 | [diff] [blame] | 445 | |
Paul Mackerras | 18569c1 | 2017-01-30 21:21:34 +1100 | [diff] [blame] | 446 | /* |
| 447 | * Check /chosen/ibm,architecture-vec-5 if running as a guest. |
| 448 | * When running bare-metal, we can use radix if we like |
| 449 | * even though the ibm,architecture-vec-5 property created by |
| 450 | * skiboot doesn't have the necessary bits set. |
| 451 | */ |
Suraj Jitindar Singh | 014d02c | 2017-02-28 17:03:48 +1100 | [diff] [blame] | 452 | if (!(mfmsr() & MSR_HV)) |
Paul Mackerras | 18569c1 | 2017-01-30 21:21:34 +1100 | [diff] [blame] | 453 | early_check_vec5(); |
| 454 | |
Aneesh Kumar K.V | 103a854 | 2020-08-28 15:38:52 +0530 | [diff] [blame] | 455 | if (early_radix_enabled()) { |
Michael Ellerman | 2537b09 | 2016-07-26 21:55:27 +1000 | [diff] [blame] | 456 | radix__early_init_devtree(); |
Aneesh Kumar K.V | 103a854 | 2020-08-28 15:38:52 +0530 | [diff] [blame] | 457 | /* |
| 458 | * We have finalized the translation we are going to use by now. |
| 459 | * Radix mode is not limited by RMA / VRMA addressing. |
| 460 | * Hence don't limit memblock allocations. |
| 461 | */ |
| 462 | ppc64_rma_size = ULONG_MAX; |
| 463 | memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); |
| 464 | } else |
Michael Ellerman | bacf9cf | 2016-07-26 21:31:59 +1000 | [diff] [blame] | 465 | hash__early_init_devtree(); |
Michael Ellerman | 1a01dc8 | 2016-07-26 20:09:30 +1000 | [diff] [blame] | 466 | } |
Michael Ellerman | 4e00374 | 2017-10-19 15:08:43 +1100 | [diff] [blame] | 467 | #endif /* CONFIG_PPC_BOOK3S_64 */ |