Thomas Gleixner | 55716d2 | 2019-06-01 10:08:42 +0200 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 2 | /* |
| 3 | * mm/percpu-vm.c - vmalloc area based chunk allocation |
| 4 | * |
| 5 | * Copyright (C) 2010 SUSE Linux Products GmbH |
| 6 | * Copyright (C) 2010 Tejun Heo <tj@kernel.org> |
| 7 | * |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 8 | * Chunks are mapped into vmalloc areas and populated page by page. |
| 9 | * This is the default chunk allocator. |
| 10 | */ |
Nicholas Piggin | b67177e | 2021-04-29 22:58:53 -0700 | [diff] [blame] | 11 | #include "internal.h" |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 12 | |
| 13 | static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, |
| 14 | unsigned int cpu, int page_idx) |
| 15 | { |
| 16 | /* must not be used on pre-mapped chunk */ |
| 17 | WARN_ON(chunk->immutable); |
| 18 | |
| 19 | return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); |
| 20 | } |
| 21 | |
| 22 | /** |
Tejun Heo | fbbb7f4 | 2014-09-02 14:46:01 -0400 | [diff] [blame] | 23 | * pcpu_get_pages - get temp pages array |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 24 | * |
Tejun Heo | fbbb7f4 | 2014-09-02 14:46:01 -0400 | [diff] [blame] | 25 | * Returns pointer to array of pointers to struct page which can be indexed |
Tejun Heo | cdb4cba | 2014-09-02 14:46:01 -0400 | [diff] [blame] | 26 | * with pcpu_page_idx(). Note that there is only one array and accesses |
| 27 | * should be serialized by pcpu_alloc_mutex. |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 28 | * |
| 29 | * RETURNS: |
Tejun Heo | fbbb7f4 | 2014-09-02 14:46:01 -0400 | [diff] [blame] | 30 | * Pointer to temp pages array on success. |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 31 | */ |
Tahsin Erdogan | 8a1df54 | 2017-02-25 12:59:26 -0800 | [diff] [blame] | 32 | static struct page **pcpu_get_pages(void) |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 33 | { |
| 34 | static struct page **pages; |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 35 | size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 36 | |
Tejun Heo | cdb4cba | 2014-09-02 14:46:01 -0400 | [diff] [blame] | 37 | lockdep_assert_held(&pcpu_alloc_mutex); |
| 38 | |
| 39 | if (!pages) |
Dennis Zhou | 554fef1 | 2018-02-16 12:09:58 -0600 | [diff] [blame] | 40 | pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 41 | return pages; |
| 42 | } |
| 43 | |
| 44 | /** |
| 45 | * pcpu_free_pages - free pages which were allocated for @chunk |
| 46 | * @chunk: chunk pages were allocated for |
| 47 | * @pages: array of pages to be freed, indexed by pcpu_page_idx() |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 48 | * @page_start: page index of the first page to be freed |
| 49 | * @page_end: page index of the last page to be freed + 1 |
| 50 | * |
| 51 | * Free pages [@page_start and @page_end) in @pages for all units. |
| 52 | * The pages were allocated for @chunk. |
| 53 | */ |
| 54 | static void pcpu_free_pages(struct pcpu_chunk *chunk, |
Tejun Heo | fbbb7f4 | 2014-09-02 14:46:01 -0400 | [diff] [blame] | 55 | struct page **pages, int page_start, int page_end) |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 56 | { |
| 57 | unsigned int cpu; |
| 58 | int i; |
| 59 | |
| 60 | for_each_possible_cpu(cpu) { |
| 61 | for (i = page_start; i < page_end; i++) { |
| 62 | struct page *page = pages[pcpu_page_idx(cpu, i)]; |
| 63 | |
| 64 | if (page) |
| 65 | __free_page(page); |
| 66 | } |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | /** |
| 71 | * pcpu_alloc_pages - allocates pages for @chunk |
| 72 | * @chunk: target chunk |
| 73 | * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 74 | * @page_start: page index of the first page to be allocated |
| 75 | * @page_end: page index of the last page to be allocated + 1 |
Dennis Zhou | 47504ee | 2018-02-16 12:07:19 -0600 | [diff] [blame] | 76 | * @gfp: allocation flags passed to the underlying allocator |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 77 | * |
| 78 | * Allocate pages [@page_start,@page_end) into @pages for all units. |
| 79 | * The allocation is for @chunk. Percpu core doesn't care about the |
| 80 | * content of @pages and will pass it verbatim to pcpu_map_pages(). |
| 81 | */ |
| 82 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, |
Dennis Zhou | 47504ee | 2018-02-16 12:07:19 -0600 | [diff] [blame] | 83 | struct page **pages, int page_start, int page_end, |
| 84 | gfp_t gfp) |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 85 | { |
Tejun Heo | f0d2796 | 2014-08-15 16:06:06 -0400 | [diff] [blame] | 86 | unsigned int cpu, tcpu; |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 87 | int i; |
| 88 | |
Dennis Zhou | 554fef1 | 2018-02-16 12:09:58 -0600 | [diff] [blame] | 89 | gfp |= __GFP_HIGHMEM; |
Dennis Zhou | 47504ee | 2018-02-16 12:07:19 -0600 | [diff] [blame] | 90 | |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 91 | for_each_possible_cpu(cpu) { |
| 92 | for (i = page_start; i < page_end; i++) { |
| 93 | struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; |
| 94 | |
| 95 | *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); |
Tejun Heo | f0d2796 | 2014-08-15 16:06:06 -0400 | [diff] [blame] | 96 | if (!*pagep) |
| 97 | goto err; |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 98 | } |
| 99 | } |
| 100 | return 0; |
Tejun Heo | f0d2796 | 2014-08-15 16:06:06 -0400 | [diff] [blame] | 101 | |
| 102 | err: |
| 103 | while (--i >= page_start) |
| 104 | __free_page(pages[pcpu_page_idx(cpu, i)]); |
| 105 | |
| 106 | for_each_possible_cpu(tcpu) { |
| 107 | if (tcpu == cpu) |
| 108 | break; |
| 109 | for (i = page_start; i < page_end; i++) |
| 110 | __free_page(pages[pcpu_page_idx(tcpu, i)]); |
| 111 | } |
| 112 | return -ENOMEM; |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 113 | } |
| 114 | |
| 115 | /** |
| 116 | * pcpu_pre_unmap_flush - flush cache prior to unmapping |
| 117 | * @chunk: chunk the regions to be flushed belongs to |
| 118 | * @page_start: page index of the first page to be flushed |
| 119 | * @page_end: page index of the last page to be flushed + 1 |
| 120 | * |
| 121 | * Pages in [@page_start,@page_end) of @chunk are about to be |
| 122 | * unmapped. Flush cache. As each flushing trial can be very |
| 123 | * expensive, issue flush on the whole region at once rather than |
| 124 | * doing it for each cpu. This could be an overkill but is more |
| 125 | * scalable. |
| 126 | */ |
| 127 | static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, |
| 128 | int page_start, int page_end) |
| 129 | { |
| 130 | flush_cache_vunmap( |
Tejun Heo | a855b84 | 2011-11-18 10:55:35 -0800 | [diff] [blame] | 131 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
| 132 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 133 | } |
| 134 | |
| 135 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) |
| 136 | { |
Nicholas Piggin | 4ad0ae8 | 2021-04-29 22:59:01 -0700 | [diff] [blame] | 137 | vunmap_range_noflush(addr, addr + (nr_pages << PAGE_SHIFT)); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 138 | } |
| 139 | |
| 140 | /** |
| 141 | * pcpu_unmap_pages - unmap pages out of a pcpu_chunk |
| 142 | * @chunk: chunk of interest |
| 143 | * @pages: pages array which can be used to pass information to free |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 144 | * @page_start: page index of the first page to unmap |
| 145 | * @page_end: page index of the last page to unmap + 1 |
| 146 | * |
| 147 | * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. |
| 148 | * Corresponding elements in @pages were cleared by the caller and can |
| 149 | * be used to carry information to pcpu_free_pages() which will be |
| 150 | * called after all unmaps are finished. The caller should call |
| 151 | * proper pre/post flush functions. |
| 152 | */ |
| 153 | static void pcpu_unmap_pages(struct pcpu_chunk *chunk, |
Tejun Heo | fbbb7f4 | 2014-09-02 14:46:01 -0400 | [diff] [blame] | 154 | struct page **pages, int page_start, int page_end) |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 155 | { |
| 156 | unsigned int cpu; |
| 157 | int i; |
| 158 | |
| 159 | for_each_possible_cpu(cpu) { |
| 160 | for (i = page_start; i < page_end; i++) { |
| 161 | struct page *page; |
| 162 | |
| 163 | page = pcpu_chunk_page(chunk, cpu, i); |
| 164 | WARN_ON(!page); |
| 165 | pages[pcpu_page_idx(cpu, i)] = page; |
| 166 | } |
| 167 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), |
| 168 | page_end - page_start); |
| 169 | } |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 170 | } |
| 171 | |
| 172 | /** |
| 173 | * pcpu_post_unmap_tlb_flush - flush TLB after unmapping |
| 174 | * @chunk: pcpu_chunk the regions to be flushed belong to |
| 175 | * @page_start: page index of the first page to be flushed |
| 176 | * @page_end: page index of the last page to be flushed + 1 |
| 177 | * |
| 178 | * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush |
| 179 | * TLB for the regions. This can be skipped if the area is to be |
| 180 | * returned to vmalloc as vmalloc will handle TLB flushing lazily. |
| 181 | * |
| 182 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once |
| 183 | * for the whole region. |
| 184 | */ |
| 185 | static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, |
| 186 | int page_start, int page_end) |
| 187 | { |
| 188 | flush_tlb_kernel_range( |
Tejun Heo | a855b84 | 2011-11-18 10:55:35 -0800 | [diff] [blame] | 189 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
| 190 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 191 | } |
| 192 | |
| 193 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, |
| 194 | int nr_pages) |
| 195 | { |
Nicholas Piggin | b67177e | 2021-04-29 22:58:53 -0700 | [diff] [blame] | 196 | return vmap_pages_range_noflush(addr, addr + (nr_pages << PAGE_SHIFT), |
| 197 | PAGE_KERNEL, pages, PAGE_SHIFT); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 198 | } |
| 199 | |
| 200 | /** |
| 201 | * pcpu_map_pages - map pages into a pcpu_chunk |
| 202 | * @chunk: chunk of interest |
| 203 | * @pages: pages array containing pages to be mapped |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 204 | * @page_start: page index of the first page to map |
| 205 | * @page_end: page index of the last page to map + 1 |
| 206 | * |
| 207 | * For each cpu, map pages [@page_start,@page_end) into @chunk. The |
| 208 | * caller is responsible for calling pcpu_post_map_flush() after all |
| 209 | * mappings are complete. |
| 210 | * |
Tejun Heo | fbbb7f4 | 2014-09-02 14:46:01 -0400 | [diff] [blame] | 211 | * This function is responsible for setting up whatever is necessary for |
| 212 | * reverse lookup (addr -> chunk). |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 213 | */ |
| 214 | static int pcpu_map_pages(struct pcpu_chunk *chunk, |
Tejun Heo | fbbb7f4 | 2014-09-02 14:46:01 -0400 | [diff] [blame] | 215 | struct page **pages, int page_start, int page_end) |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 216 | { |
| 217 | unsigned int cpu, tcpu; |
| 218 | int i, err; |
| 219 | |
| 220 | for_each_possible_cpu(cpu) { |
| 221 | err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), |
| 222 | &pages[pcpu_page_idx(cpu, page_start)], |
| 223 | page_end - page_start); |
| 224 | if (err < 0) |
| 225 | goto err; |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 226 | |
Tejun Heo | fbbb7f4 | 2014-09-02 14:46:01 -0400 | [diff] [blame] | 227 | for (i = page_start; i < page_end; i++) |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 228 | pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], |
| 229 | chunk); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 230 | } |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 231 | return 0; |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 232 | err: |
| 233 | for_each_possible_cpu(tcpu) { |
| 234 | if (tcpu == cpu) |
| 235 | break; |
| 236 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), |
| 237 | page_end - page_start); |
| 238 | } |
Tejun Heo | 849f516 | 2014-08-15 16:06:10 -0400 | [diff] [blame] | 239 | pcpu_post_unmap_tlb_flush(chunk, page_start, page_end); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 240 | return err; |
| 241 | } |
| 242 | |
| 243 | /** |
| 244 | * pcpu_post_map_flush - flush cache after mapping |
| 245 | * @chunk: pcpu_chunk the regions to be flushed belong to |
| 246 | * @page_start: page index of the first page to be flushed |
| 247 | * @page_end: page index of the last page to be flushed + 1 |
| 248 | * |
| 249 | * Pages [@page_start,@page_end) of @chunk have been mapped. Flush |
| 250 | * cache. |
| 251 | * |
| 252 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once |
| 253 | * for the whole region. |
| 254 | */ |
| 255 | static void pcpu_post_map_flush(struct pcpu_chunk *chunk, |
| 256 | int page_start, int page_end) |
| 257 | { |
| 258 | flush_cache_vmap( |
Tejun Heo | a855b84 | 2011-11-18 10:55:35 -0800 | [diff] [blame] | 259 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
| 260 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 261 | } |
| 262 | |
| 263 | /** |
| 264 | * pcpu_populate_chunk - populate and map an area of a pcpu_chunk |
| 265 | * @chunk: chunk of interest |
Tejun Heo | a93ace4 | 2014-09-02 14:46:02 -0400 | [diff] [blame] | 266 | * @page_start: the start page |
| 267 | * @page_end: the end page |
Dennis Zhou | 47504ee | 2018-02-16 12:07:19 -0600 | [diff] [blame] | 268 | * @gfp: allocation flags passed to the underlying memory allocator |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 269 | * |
| 270 | * For each cpu, populate and map pages [@page_start,@page_end) into |
Tejun Heo | dca4964 | 2014-09-02 14:46:01 -0400 | [diff] [blame] | 271 | * @chunk. |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 272 | * |
| 273 | * CONTEXT: |
| 274 | * pcpu_alloc_mutex, does GFP_KERNEL allocation. |
| 275 | */ |
Tejun Heo | a93ace4 | 2014-09-02 14:46:02 -0400 | [diff] [blame] | 276 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, |
Dennis Zhou | 47504ee | 2018-02-16 12:07:19 -0600 | [diff] [blame] | 277 | int page_start, int page_end, gfp_t gfp) |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 278 | { |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 279 | struct page **pages; |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 280 | |
Tahsin Erdogan | 8a1df54 | 2017-02-25 12:59:26 -0800 | [diff] [blame] | 281 | pages = pcpu_get_pages(); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 282 | if (!pages) |
| 283 | return -ENOMEM; |
| 284 | |
Dennis Zhou | 47504ee | 2018-02-16 12:07:19 -0600 | [diff] [blame] | 285 | if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp)) |
Tejun Heo | a93ace4 | 2014-09-02 14:46:02 -0400 | [diff] [blame] | 286 | return -ENOMEM; |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 287 | |
Tejun Heo | a93ace4 | 2014-09-02 14:46:02 -0400 | [diff] [blame] | 288 | if (pcpu_map_pages(chunk, pages, page_start, page_end)) { |
| 289 | pcpu_free_pages(chunk, pages, page_start, page_end); |
| 290 | return -ENOMEM; |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 291 | } |
| 292 | pcpu_post_map_flush(chunk, page_start, page_end); |
| 293 | |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 294 | return 0; |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 295 | } |
| 296 | |
| 297 | /** |
| 298 | * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk |
| 299 | * @chunk: chunk to depopulate |
Tejun Heo | a93ace4 | 2014-09-02 14:46:02 -0400 | [diff] [blame] | 300 | * @page_start: the start page |
| 301 | * @page_end: the end page |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 302 | * |
| 303 | * For each cpu, depopulate and unmap pages [@page_start,@page_end) |
Tejun Heo | a93ace4 | 2014-09-02 14:46:02 -0400 | [diff] [blame] | 304 | * from @chunk. |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 305 | * |
Dennis Zhou | 93274f1 | 2021-07-03 03:49:57 +0000 | [diff] [blame] | 306 | * Caller is required to call pcpu_post_unmap_tlb_flush() if not returning the |
| 307 | * region back to vmalloc() which will lazily flush the tlb. |
| 308 | * |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 309 | * CONTEXT: |
| 310 | * pcpu_alloc_mutex. |
| 311 | */ |
Tejun Heo | a93ace4 | 2014-09-02 14:46:02 -0400 | [diff] [blame] | 312 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, |
| 313 | int page_start, int page_end) |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 314 | { |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 315 | struct page **pages; |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 316 | |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 317 | /* |
| 318 | * If control reaches here, there must have been at least one |
| 319 | * successful population attempt so the temp pages array must |
| 320 | * be available now. |
| 321 | */ |
Tahsin Erdogan | 8a1df54 | 2017-02-25 12:59:26 -0800 | [diff] [blame] | 322 | pages = pcpu_get_pages(); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 323 | BUG_ON(!pages); |
| 324 | |
| 325 | /* unmap and free */ |
| 326 | pcpu_pre_unmap_flush(chunk, page_start, page_end); |
| 327 | |
Tejun Heo | a93ace4 | 2014-09-02 14:46:02 -0400 | [diff] [blame] | 328 | pcpu_unmap_pages(chunk, pages, page_start, page_end); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 329 | |
Tejun Heo | a93ace4 | 2014-09-02 14:46:02 -0400 | [diff] [blame] | 330 | pcpu_free_pages(chunk, pages, page_start, page_end); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 331 | } |
| 332 | |
Roman Gushchin | faf65dd | 2021-06-02 18:09:31 -0700 | [diff] [blame] | 333 | static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 334 | { |
| 335 | struct pcpu_chunk *chunk; |
| 336 | struct vm_struct **vms; |
| 337 | |
Roman Gushchin | faf65dd | 2021-06-02 18:09:31 -0700 | [diff] [blame] | 338 | chunk = pcpu_alloc_chunk(gfp); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 339 | if (!chunk) |
| 340 | return NULL; |
| 341 | |
| 342 | vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, |
David Rientjes | ec3f64f | 2011-01-13 15:46:01 -0800 | [diff] [blame] | 343 | pcpu_nr_groups, pcpu_atom_size); |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 344 | if (!vms) { |
| 345 | pcpu_free_chunk(chunk); |
| 346 | return NULL; |
| 347 | } |
| 348 | |
| 349 | chunk->data = vms; |
| 350 | chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; |
Dennis Zhou | 30a5b53 | 2017-06-19 19:28:31 -0400 | [diff] [blame] | 351 | |
| 352 | pcpu_stats_chunk_alloc(); |
Dennis Zhou | df95e79 | 2017-06-19 19:28:32 -0400 | [diff] [blame] | 353 | trace_percpu_create_chunk(chunk->base_addr); |
Dennis Zhou | 30a5b53 | 2017-06-19 19:28:31 -0400 | [diff] [blame] | 354 | |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 355 | return chunk; |
| 356 | } |
| 357 | |
| 358 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) |
| 359 | { |
Dennis Zhou | e3efe3d | 2017-06-29 10:56:26 -0400 | [diff] [blame] | 360 | if (!chunk) |
| 361 | return; |
| 362 | |
Dennis Zhou | 30a5b53 | 2017-06-19 19:28:31 -0400 | [diff] [blame] | 363 | pcpu_stats_chunk_dealloc(); |
Dennis Zhou | df95e79 | 2017-06-19 19:28:32 -0400 | [diff] [blame] | 364 | trace_percpu_destroy_chunk(chunk->base_addr); |
Dennis Zhou | 30a5b53 | 2017-06-19 19:28:31 -0400 | [diff] [blame] | 365 | |
Dennis Zhou | e3efe3d | 2017-06-29 10:56:26 -0400 | [diff] [blame] | 366 | if (chunk->data) |
Tejun Heo | 9f64553 | 2010-04-09 18:57:01 +0900 | [diff] [blame] | 367 | pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); |
| 368 | pcpu_free_chunk(chunk); |
| 369 | } |
| 370 | |
| 371 | static struct page *pcpu_addr_to_page(void *addr) |
| 372 | { |
| 373 | return vmalloc_to_page(addr); |
| 374 | } |
| 375 | |
| 376 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) |
| 377 | { |
| 378 | /* no extra restriction */ |
| 379 | return 0; |
| 380 | } |
Roman Gushchin | f183324 | 2021-04-07 20:57:36 -0700 | [diff] [blame] | 381 | |
| 382 | /** |
| 383 | * pcpu_should_reclaim_chunk - determine if a chunk should go into reclaim |
| 384 | * @chunk: chunk of interest |
| 385 | * |
| 386 | * This is the entry point for percpu reclaim. If a chunk qualifies, it is then |
| 387 | * isolated and managed in separate lists at the back of pcpu_slot: sidelined |
| 388 | * and to_depopulate respectively. The to_depopulate list holds chunks slated |
| 389 | * for depopulation. They no longer contribute to pcpu_nr_empty_pop_pages once |
| 390 | * they are on this list. Once depopulated, they are moved onto the sidelined |
| 391 | * list which enables them to be pulled back in for allocation if no other chunk |
| 392 | * can suffice the allocation. |
| 393 | */ |
| 394 | static bool pcpu_should_reclaim_chunk(struct pcpu_chunk *chunk) |
| 395 | { |
| 396 | /* do not reclaim either the first chunk or reserved chunk */ |
| 397 | if (chunk == pcpu_first_chunk || chunk == pcpu_reserved_chunk) |
| 398 | return false; |
| 399 | |
| 400 | /* |
| 401 | * If it is isolated, it may be on the sidelined list so move it back to |
| 402 | * the to_depopulate list. If we hit at least 1/4 pages empty pages AND |
| 403 | * there is no system-wide shortage of empty pages aside from this |
| 404 | * chunk, move it to the to_depopulate list. |
| 405 | */ |
| 406 | return ((chunk->isolated && chunk->nr_empty_pop_pages) || |
Roman Gushchin | faf65dd | 2021-06-02 18:09:31 -0700 | [diff] [blame] | 407 | (pcpu_nr_empty_pop_pages > |
| 408 | (PCPU_EMPTY_POP_PAGES_HIGH + chunk->nr_empty_pop_pages) && |
| 409 | chunk->nr_empty_pop_pages >= chunk->nr_pages / 4)); |
Roman Gushchin | f183324 | 2021-04-07 20:57:36 -0700 | [diff] [blame] | 410 | } |