Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 1 | /* |
| 2 | * NUMA support for s390 |
| 3 | * |
| 4 | * NUMA emulation (aka fake NUMA) distributes the available memory to nodes |
| 5 | * without using real topology information about the physical memory of the |
| 6 | * machine. |
| 7 | * |
| 8 | * It distributes the available CPUs to nodes while respecting the original |
| 9 | * machine topology information. This is done by trying to avoid to separate |
| 10 | * CPUs which reside on the same book or even on the same MC. |
| 11 | * |
| 12 | * Because the current Linux scheduler code requires a stable cpu to node |
| 13 | * mapping, cores are pinned to nodes when the first CPU thread is set online. |
| 14 | * |
| 15 | * Copyright IBM Corp. 2015 |
| 16 | */ |
| 17 | |
| 18 | #define KMSG_COMPONENT "numa_emu" |
| 19 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
| 20 | |
| 21 | #include <linux/kernel.h> |
| 22 | #include <linux/cpumask.h> |
| 23 | #include <linux/memblock.h> |
Heiko Carstens | 8c910580 | 2016-12-03 09:50:21 +0100 | [diff] [blame] | 24 | #include <linux/bootmem.h> |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 25 | #include <linux/node.h> |
| 26 | #include <linux/memory.h> |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 27 | #include <linux/slab.h> |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 28 | #include <asm/smp.h> |
| 29 | #include <asm/topology.h> |
| 30 | #include "numa_mode.h" |
| 31 | #include "toptree.h" |
| 32 | |
| 33 | /* Distances between the different system components */ |
| 34 | #define DIST_EMPTY 0 |
| 35 | #define DIST_CORE 1 |
| 36 | #define DIST_MC 2 |
| 37 | #define DIST_BOOK 3 |
Heiko Carstens | adac0f1 | 2016-05-25 10:25:50 +0200 | [diff] [blame] | 38 | #define DIST_DRAWER 4 |
| 39 | #define DIST_MAX 5 |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 40 | |
| 41 | /* Node distance reported to common code */ |
| 42 | #define EMU_NODE_DIST 10 |
| 43 | |
| 44 | /* Node ID for free (not yet pinned) cores */ |
| 45 | #define NODE_ID_FREE -1 |
| 46 | |
| 47 | /* Different levels of toptree */ |
Heiko Carstens | adac0f1 | 2016-05-25 10:25:50 +0200 | [diff] [blame] | 48 | enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY}; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 49 | |
| 50 | /* The two toptree IDs */ |
| 51 | enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA}; |
| 52 | |
| 53 | /* Number of NUMA nodes */ |
| 54 | static int emu_nodes = 1; |
| 55 | /* NUMA stripe size */ |
| 56 | static unsigned long emu_size; |
Michael Holzheu | 3a3814c | 2015-08-01 18:12:41 +0200 | [diff] [blame] | 57 | |
| 58 | /* |
| 59 | * Node to core pinning information updates are protected by |
| 60 | * "sched_domains_mutex". |
| 61 | */ |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 62 | static struct { |
| 63 | s32 to_node_id[CONFIG_NR_CPUS]; /* Pinned core to node mapping */ |
| 64 | int total; /* Total number of pinned cores */ |
| 65 | int per_node_target; /* Cores per node without extra cores */ |
| 66 | int per_node[MAX_NUMNODES]; /* Number of cores pinned to node */ |
| 67 | } *emu_cores; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 68 | |
| 69 | /* |
| 70 | * Pin a core to a node |
| 71 | */ |
| 72 | static void pin_core_to_node(int core_id, int node_id) |
| 73 | { |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 74 | if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) { |
| 75 | emu_cores->per_node[node_id]++; |
| 76 | emu_cores->to_node_id[core_id] = node_id; |
| 77 | emu_cores->total++; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 78 | } else { |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 79 | WARN_ON(emu_cores->to_node_id[core_id] != node_id); |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 80 | } |
| 81 | } |
| 82 | |
| 83 | /* |
| 84 | * Number of pinned cores of a node |
| 85 | */ |
| 86 | static int cores_pinned(struct toptree *node) |
| 87 | { |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 88 | return emu_cores->per_node[node->id]; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 89 | } |
| 90 | |
| 91 | /* |
| 92 | * ID of the node where the core is pinned (or NODE_ID_FREE) |
| 93 | */ |
| 94 | static int core_pinned_to_node_id(struct toptree *core) |
| 95 | { |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 96 | return emu_cores->to_node_id[core->id]; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 97 | } |
| 98 | |
| 99 | /* |
| 100 | * Number of cores in the tree that are not yet pinned |
| 101 | */ |
| 102 | static int cores_free(struct toptree *tree) |
| 103 | { |
| 104 | struct toptree *core; |
| 105 | int count = 0; |
| 106 | |
| 107 | toptree_for_each(core, tree, CORE) { |
| 108 | if (core_pinned_to_node_id(core) == NODE_ID_FREE) |
| 109 | count++; |
| 110 | } |
| 111 | return count; |
| 112 | } |
| 113 | |
| 114 | /* |
| 115 | * Return node of core |
| 116 | */ |
| 117 | static struct toptree *core_node(struct toptree *core) |
| 118 | { |
Heiko Carstens | adac0f1 | 2016-05-25 10:25:50 +0200 | [diff] [blame] | 119 | return core->parent->parent->parent->parent; |
| 120 | } |
| 121 | |
| 122 | /* |
| 123 | * Return drawer of core |
| 124 | */ |
| 125 | static struct toptree *core_drawer(struct toptree *core) |
| 126 | { |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 127 | return core->parent->parent->parent; |
| 128 | } |
| 129 | |
| 130 | /* |
| 131 | * Return book of core |
| 132 | */ |
| 133 | static struct toptree *core_book(struct toptree *core) |
| 134 | { |
| 135 | return core->parent->parent; |
| 136 | } |
| 137 | |
| 138 | /* |
| 139 | * Return mc of core |
| 140 | */ |
| 141 | static struct toptree *core_mc(struct toptree *core) |
| 142 | { |
| 143 | return core->parent; |
| 144 | } |
| 145 | |
| 146 | /* |
| 147 | * Distance between two cores |
| 148 | */ |
| 149 | static int dist_core_to_core(struct toptree *core1, struct toptree *core2) |
| 150 | { |
Heiko Carstens | adac0f1 | 2016-05-25 10:25:50 +0200 | [diff] [blame] | 151 | if (core_drawer(core1)->id != core_drawer(core2)->id) |
| 152 | return DIST_DRAWER; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 153 | if (core_book(core1)->id != core_book(core2)->id) |
| 154 | return DIST_BOOK; |
| 155 | if (core_mc(core1)->id != core_mc(core2)->id) |
| 156 | return DIST_MC; |
| 157 | /* Same core or sibling on same MC */ |
| 158 | return DIST_CORE; |
| 159 | } |
| 160 | |
| 161 | /* |
| 162 | * Distance of a node to a core |
| 163 | */ |
| 164 | static int dist_node_to_core(struct toptree *node, struct toptree *core) |
| 165 | { |
| 166 | struct toptree *core_node; |
| 167 | int dist_min = DIST_MAX; |
| 168 | |
| 169 | toptree_for_each(core_node, node, CORE) |
| 170 | dist_min = min(dist_min, dist_core_to_core(core_node, core)); |
| 171 | return dist_min == DIST_MAX ? DIST_EMPTY : dist_min; |
| 172 | } |
| 173 | |
| 174 | /* |
| 175 | * Unify will delete empty nodes, therefore recreate nodes. |
| 176 | */ |
| 177 | static void toptree_unify_tree(struct toptree *tree) |
| 178 | { |
| 179 | int nid; |
| 180 | |
| 181 | toptree_unify(tree); |
| 182 | for (nid = 0; nid < emu_nodes; nid++) |
| 183 | toptree_get_child(tree, nid); |
| 184 | } |
| 185 | |
| 186 | /* |
| 187 | * Find the best/nearest node for a given core and ensure that no node |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 188 | * gets more than "emu_cores->per_node_target + extra" cores. |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 189 | */ |
| 190 | static struct toptree *node_for_core(struct toptree *numa, struct toptree *core, |
| 191 | int extra) |
| 192 | { |
| 193 | struct toptree *node, *node_best = NULL; |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 194 | int dist_cur, dist_best, cores_target; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 195 | |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 196 | cores_target = emu_cores->per_node_target + extra; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 197 | dist_best = DIST_MAX; |
| 198 | node_best = NULL; |
| 199 | toptree_for_each(node, numa, NODE) { |
| 200 | /* Already pinned cores must use their nodes */ |
| 201 | if (core_pinned_to_node_id(core) == node->id) { |
| 202 | node_best = node; |
| 203 | break; |
| 204 | } |
| 205 | /* Skip nodes that already have enough cores */ |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 206 | if (cores_pinned(node) >= cores_target) |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 207 | continue; |
| 208 | dist_cur = dist_node_to_core(node, core); |
| 209 | if (dist_cur < dist_best) { |
| 210 | dist_best = dist_cur; |
| 211 | node_best = node; |
| 212 | } |
| 213 | } |
| 214 | return node_best; |
| 215 | } |
| 216 | |
| 217 | /* |
| 218 | * Find the best node for each core with respect to "extra" core count |
| 219 | */ |
| 220 | static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys, |
| 221 | int extra) |
| 222 | { |
| 223 | struct toptree *node, *core, *tmp; |
| 224 | |
| 225 | toptree_for_each_safe(core, tmp, phys, CORE) { |
| 226 | node = node_for_core(numa, core, extra); |
| 227 | if (!node) |
| 228 | return; |
| 229 | toptree_move(core, node); |
| 230 | pin_core_to_node(core->id, node->id); |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | /* |
| 235 | * Move structures of given level to specified NUMA node |
| 236 | */ |
| 237 | static void move_level_to_numa_node(struct toptree *node, struct toptree *phys, |
| 238 | enum toptree_level level, bool perfect) |
| 239 | { |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 240 | int cores_free, cores_target = emu_cores->per_node_target; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 241 | struct toptree *cur, *tmp; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 242 | |
| 243 | toptree_for_each_safe(cur, tmp, phys, level) { |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 244 | cores_free = cores_target - toptree_count(node, CORE); |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 245 | if (perfect) { |
| 246 | if (cores_free == toptree_count(cur, CORE)) |
| 247 | toptree_move(cur, node); |
| 248 | } else { |
| 249 | if (cores_free >= toptree_count(cur, CORE)) |
| 250 | toptree_move(cur, node); |
| 251 | } |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | /* |
| 256 | * Move structures of a given level to NUMA nodes. If "perfect" is specified |
| 257 | * move only perfectly fitting structures. Otherwise move also smaller |
| 258 | * than needed structures. |
| 259 | */ |
| 260 | static void move_level_to_numa(struct toptree *numa, struct toptree *phys, |
| 261 | enum toptree_level level, bool perfect) |
| 262 | { |
| 263 | struct toptree *node; |
| 264 | |
| 265 | toptree_for_each(node, numa, NODE) |
| 266 | move_level_to_numa_node(node, phys, level, perfect); |
| 267 | } |
| 268 | |
| 269 | /* |
| 270 | * For the first run try to move the big structures |
| 271 | */ |
| 272 | static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys) |
| 273 | { |
| 274 | struct toptree *core; |
| 275 | |
| 276 | /* Always try to move perfectly fitting structures first */ |
Heiko Carstens | adac0f1 | 2016-05-25 10:25:50 +0200 | [diff] [blame] | 277 | move_level_to_numa(numa, phys, DRAWER, true); |
| 278 | move_level_to_numa(numa, phys, DRAWER, false); |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 279 | move_level_to_numa(numa, phys, BOOK, true); |
| 280 | move_level_to_numa(numa, phys, BOOK, false); |
| 281 | move_level_to_numa(numa, phys, MC, true); |
| 282 | move_level_to_numa(numa, phys, MC, false); |
| 283 | /* Now pin all the moved cores */ |
| 284 | toptree_for_each(core, numa, CORE) |
| 285 | pin_core_to_node(core->id, core_node(core)->id); |
| 286 | } |
| 287 | |
| 288 | /* |
| 289 | * Allocate new topology and create required nodes |
| 290 | */ |
| 291 | static struct toptree *toptree_new(int id, int nodes) |
| 292 | { |
| 293 | struct toptree *tree; |
| 294 | int nid; |
| 295 | |
| 296 | tree = toptree_alloc(TOPOLOGY, id); |
| 297 | if (!tree) |
| 298 | goto fail; |
| 299 | for (nid = 0; nid < nodes; nid++) { |
| 300 | if (!toptree_get_child(tree, nid)) |
| 301 | goto fail; |
| 302 | } |
| 303 | return tree; |
| 304 | fail: |
| 305 | panic("NUMA emulation could not allocate topology"); |
| 306 | } |
| 307 | |
| 308 | /* |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 309 | * Allocate and initialize core to node mapping |
| 310 | */ |
Heiko Carstens | 8c910580 | 2016-12-03 09:50:21 +0100 | [diff] [blame] | 311 | static void __ref create_core_to_node_map(void) |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 312 | { |
| 313 | int i; |
| 314 | |
Heiko Carstens | 8c910580 | 2016-12-03 09:50:21 +0100 | [diff] [blame] | 315 | emu_cores = memblock_virt_alloc(sizeof(*emu_cores), 8); |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 316 | for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++) |
| 317 | emu_cores->to_node_id[i] = NODE_ID_FREE; |
| 318 | } |
| 319 | |
| 320 | /* |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 321 | * Move cores from physical topology into NUMA target topology |
| 322 | * and try to keep as much of the physical topology as possible. |
| 323 | */ |
| 324 | static struct toptree *toptree_to_numa(struct toptree *phys) |
| 325 | { |
| 326 | static int first = 1; |
| 327 | struct toptree *numa; |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 328 | int cores_total; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 329 | |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 330 | cores_total = emu_cores->total + cores_free(phys); |
| 331 | emu_cores->per_node_target = cores_total / emu_nodes; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 332 | numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes); |
| 333 | if (first) { |
| 334 | toptree_to_numa_first(numa, phys); |
| 335 | first = 0; |
| 336 | } |
| 337 | toptree_to_numa_single(numa, phys, 0); |
| 338 | toptree_to_numa_single(numa, phys, 1); |
| 339 | toptree_unify_tree(numa); |
| 340 | |
| 341 | WARN_ON(cpumask_weight(&phys->mask)); |
| 342 | return numa; |
| 343 | } |
| 344 | |
| 345 | /* |
| 346 | * Create a toptree out of the physical topology that we got from the hypervisor |
| 347 | */ |
| 348 | static struct toptree *toptree_from_topology(void) |
| 349 | { |
Heiko Carstens | adac0f1 | 2016-05-25 10:25:50 +0200 | [diff] [blame] | 350 | struct toptree *phys, *node, *drawer, *book, *mc, *core; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 351 | struct cpu_topology_s390 *top; |
| 352 | int cpu; |
| 353 | |
| 354 | phys = toptree_new(TOPTREE_ID_PHYS, 1); |
| 355 | |
Heiko Carstens | 8c910580 | 2016-12-03 09:50:21 +0100 | [diff] [blame] | 356 | for_each_cpu(cpu, &cpus_with_topology) { |
Heiko Carstens | 30fc4ca | 2016-12-02 10:38:37 +0100 | [diff] [blame] | 357 | top = &cpu_topology[cpu]; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 358 | node = toptree_get_child(phys, 0); |
Heiko Carstens | adac0f1 | 2016-05-25 10:25:50 +0200 | [diff] [blame] | 359 | drawer = toptree_get_child(node, top->drawer_id); |
| 360 | book = toptree_get_child(drawer, top->book_id); |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 361 | mc = toptree_get_child(book, top->socket_id); |
Heiko Carstens | 307b311 | 2016-12-02 13:16:02 +0100 | [diff] [blame] | 362 | core = toptree_get_child(mc, smp_get_base_cpu(cpu)); |
Heiko Carstens | adac0f1 | 2016-05-25 10:25:50 +0200 | [diff] [blame] | 363 | if (!drawer || !book || !mc || !core) |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 364 | panic("NUMA emulation could not allocate memory"); |
| 365 | cpumask_set_cpu(cpu, &core->mask); |
| 366 | toptree_update_mask(mc); |
| 367 | } |
| 368 | return phys; |
| 369 | } |
| 370 | |
| 371 | /* |
| 372 | * Add toptree core to topology and create correct CPU masks |
| 373 | */ |
| 374 | static void topology_add_core(struct toptree *core) |
| 375 | { |
| 376 | struct cpu_topology_s390 *top; |
| 377 | int cpu; |
| 378 | |
| 379 | for_each_cpu(cpu, &core->mask) { |
Heiko Carstens | 30fc4ca | 2016-12-02 10:38:37 +0100 | [diff] [blame] | 380 | top = &cpu_topology[cpu]; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 381 | cpumask_copy(&top->thread_mask, &core->mask); |
| 382 | cpumask_copy(&top->core_mask, &core_mc(core)->mask); |
| 383 | cpumask_copy(&top->book_mask, &core_book(core)->mask); |
Heiko Carstens | adac0f1 | 2016-05-25 10:25:50 +0200 | [diff] [blame] | 384 | cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask); |
Martin Schwidefsky | 22be9cd | 2015-09-22 14:21:16 +0200 | [diff] [blame] | 385 | cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]); |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 386 | top->node_id = core_node(core)->id; |
| 387 | } |
| 388 | } |
| 389 | |
| 390 | /* |
| 391 | * Apply toptree to topology and create CPU masks |
| 392 | */ |
| 393 | static void toptree_to_topology(struct toptree *numa) |
| 394 | { |
| 395 | struct toptree *core; |
| 396 | int i; |
| 397 | |
| 398 | /* Clear all node masks */ |
| 399 | for (i = 0; i < MAX_NUMNODES; i++) |
Martin Schwidefsky | 22be9cd | 2015-09-22 14:21:16 +0200 | [diff] [blame] | 400 | cpumask_clear(&node_to_cpumask_map[i]); |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 401 | |
| 402 | /* Rebuild all masks */ |
| 403 | toptree_for_each(core, numa, CORE) |
| 404 | topology_add_core(core); |
| 405 | } |
| 406 | |
| 407 | /* |
| 408 | * Show the node to core mapping |
| 409 | */ |
| 410 | static void print_node_to_core_map(void) |
| 411 | { |
| 412 | int nid, cid; |
| 413 | |
| 414 | if (!numa_debug_enabled) |
| 415 | return; |
| 416 | printk(KERN_DEBUG "NUMA node to core mapping\n"); |
| 417 | for (nid = 0; nid < emu_nodes; nid++) { |
| 418 | printk(KERN_DEBUG " node %3d: ", nid); |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 419 | for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) { |
| 420 | if (emu_cores->to_node_id[cid] == nid) |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 421 | printk(KERN_CONT "%d ", cid); |
| 422 | } |
| 423 | printk(KERN_CONT "\n"); |
| 424 | } |
| 425 | } |
| 426 | |
Heiko Carstens | e6d4a63 | 2016-12-02 11:12:01 +0100 | [diff] [blame^] | 427 | static void pin_all_possible_cpus(void) |
| 428 | { |
| 429 | int core_id, node_id, cpu; |
| 430 | static int initialized; |
| 431 | |
| 432 | if (initialized) |
| 433 | return; |
| 434 | print_node_to_core_map(); |
| 435 | node_id = 0; |
| 436 | for_each_possible_cpu(cpu) { |
| 437 | core_id = smp_get_base_cpu(cpu); |
| 438 | if (emu_cores->to_node_id[core_id] != NODE_ID_FREE) |
| 439 | continue; |
| 440 | pin_core_to_node(core_id, node_id); |
| 441 | cpu_topology[cpu].node_id = node_id; |
| 442 | node_id = (node_id + 1) % emu_nodes; |
| 443 | } |
| 444 | print_node_to_core_map(); |
| 445 | initialized = 1; |
| 446 | } |
| 447 | |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 448 | /* |
| 449 | * Transfer physical topology into a NUMA topology and modify CPU masks |
| 450 | * according to the NUMA topology. |
| 451 | * |
Michael Holzheu | 3a3814c | 2015-08-01 18:12:41 +0200 | [diff] [blame] | 452 | * Must be called with "sched_domains_mutex" lock held. |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 453 | */ |
| 454 | static void emu_update_cpu_topology(void) |
| 455 | { |
| 456 | struct toptree *phys, *numa; |
| 457 | |
Michael Holzheu | 7cde491 | 2015-08-05 11:23:53 +0200 | [diff] [blame] | 458 | if (emu_cores == NULL) |
| 459 | create_core_to_node_map(); |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 460 | phys = toptree_from_topology(); |
| 461 | numa = toptree_to_numa(phys); |
| 462 | toptree_free(phys); |
| 463 | toptree_to_topology(numa); |
| 464 | toptree_free(numa); |
Heiko Carstens | e6d4a63 | 2016-12-02 11:12:01 +0100 | [diff] [blame^] | 465 | pin_all_possible_cpus(); |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 466 | } |
| 467 | |
| 468 | /* |
| 469 | * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum |
| 470 | * alignment (needed for memory hotplug). |
| 471 | */ |
| 472 | static unsigned long emu_setup_size_adjust(unsigned long size) |
| 473 | { |
Michael Holzheu | b02064a | 2015-09-03 11:57:56 +0200 | [diff] [blame] | 474 | unsigned long size_new; |
| 475 | |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 476 | size = size ? : CONFIG_EMU_SIZE; |
Michael Holzheu | b02064a | 2015-09-03 11:57:56 +0200 | [diff] [blame] | 477 | size_new = roundup(size, memory_block_size_bytes()); |
| 478 | if (size_new == size) |
| 479 | return size; |
| 480 | pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n", |
| 481 | size >> 20, size_new >> 20); |
| 482 | return size_new; |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 483 | } |
| 484 | |
| 485 | /* |
| 486 | * If we have not enough memory for the specified nodes, reduce the node count. |
| 487 | */ |
| 488 | static int emu_setup_nodes_adjust(int nodes) |
| 489 | { |
| 490 | int nodes_max; |
| 491 | |
| 492 | nodes_max = memblock.memory.total_size / emu_size; |
| 493 | nodes_max = max(nodes_max, 1); |
| 494 | if (nodes_max >= nodes) |
| 495 | return nodes; |
| 496 | pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes); |
| 497 | return nodes_max; |
| 498 | } |
| 499 | |
| 500 | /* |
| 501 | * Early emu setup |
| 502 | */ |
| 503 | static void emu_setup(void) |
| 504 | { |
Heiko Carstens | ef4423ce | 2016-07-28 18:14:29 +0200 | [diff] [blame] | 505 | int nid; |
| 506 | |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 507 | emu_size = emu_setup_size_adjust(emu_size); |
| 508 | emu_nodes = emu_setup_nodes_adjust(emu_nodes); |
Heiko Carstens | ef4423ce | 2016-07-28 18:14:29 +0200 | [diff] [blame] | 509 | for (nid = 0; nid < emu_nodes; nid++) |
| 510 | node_set(nid, node_possible_map); |
Michael Holzheu | c29a7ba | 2014-03-06 18:47:21 +0100 | [diff] [blame] | 511 | pr_info("Creating %d nodes with memory stripe size %ld MB\n", |
| 512 | emu_nodes, emu_size >> 20); |
| 513 | } |
| 514 | |
| 515 | /* |
| 516 | * Return node id for given page number |
| 517 | */ |
| 518 | static int emu_pfn_to_nid(unsigned long pfn) |
| 519 | { |
| 520 | return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes; |
| 521 | } |
| 522 | |
| 523 | /* |
| 524 | * Return stripe size |
| 525 | */ |
| 526 | static unsigned long emu_align(void) |
| 527 | { |
| 528 | return emu_size; |
| 529 | } |
| 530 | |
| 531 | /* |
| 532 | * Return distance between two nodes |
| 533 | */ |
| 534 | static int emu_distance(int node1, int node2) |
| 535 | { |
| 536 | return (node1 != node2) * EMU_NODE_DIST; |
| 537 | } |
| 538 | |
| 539 | /* |
| 540 | * Define callbacks for generic s390 NUMA infrastructure |
| 541 | */ |
| 542 | const struct numa_mode numa_mode_emu = { |
| 543 | .name = "emu", |
| 544 | .setup = emu_setup, |
| 545 | .update_cpu_topology = emu_update_cpu_topology, |
| 546 | .__pfn_to_nid = emu_pfn_to_nid, |
| 547 | .align = emu_align, |
| 548 | .distance = emu_distance, |
| 549 | }; |
| 550 | |
| 551 | /* |
| 552 | * Kernel parameter: emu_nodes=<n> |
| 553 | */ |
| 554 | static int __init early_parse_emu_nodes(char *p) |
| 555 | { |
| 556 | int count; |
| 557 | |
| 558 | if (kstrtoint(p, 0, &count) != 0 || count <= 0) |
| 559 | return 0; |
| 560 | if (count <= 0) |
| 561 | return 0; |
| 562 | emu_nodes = min(count, MAX_NUMNODES); |
| 563 | return 0; |
| 564 | } |
| 565 | early_param("emu_nodes", early_parse_emu_nodes); |
| 566 | |
| 567 | /* |
| 568 | * Kernel parameter: emu_size=[<n>[k|M|G|T]] |
| 569 | */ |
| 570 | static int __init early_parse_emu_size(char *p) |
| 571 | { |
| 572 | emu_size = memparse(p, NULL); |
| 573 | return 0; |
| 574 | } |
| 575 | early_param("emu_size", early_parse_emu_size); |