Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
Christoph Hellwig | 9a0ef98 | 2017-06-20 01:37:55 +0200 | [diff] [blame] | 2 | /* |
| 3 | * Copyright (C) 2016 Thomas Gleixner. |
| 4 | * Copyright (C) 2016-2017 Christoph Hellwig. |
| 5 | */ |
Christoph Hellwig | 5e385a6 | 2016-07-04 17:39:27 +0900 | [diff] [blame] | 6 | #include <linux/interrupt.h> |
| 7 | #include <linux/kernel.h> |
| 8 | #include <linux/slab.h> |
| 9 | #include <linux/cpu.h> |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 10 | #include <linux/sort.h> |
Christoph Hellwig | 5e385a6 | 2016-07-04 17:39:27 +0900 | [diff] [blame] | 11 | |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 12 | static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, |
Thomas Gleixner | 0145c30 | 2019-02-16 18:13:07 +0100 | [diff] [blame] | 13 | unsigned int cpus_per_vec) |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 14 | { |
| 15 | const struct cpumask *siblmsk; |
| 16 | int cpu, sibl; |
| 17 | |
| 18 | for ( ; cpus_per_vec > 0; ) { |
| 19 | cpu = cpumask_first(nmsk); |
| 20 | |
| 21 | /* Should not happen, but I'm too lazy to think about it */ |
| 22 | if (cpu >= nr_cpu_ids) |
| 23 | return; |
| 24 | |
| 25 | cpumask_clear_cpu(cpu, nmsk); |
| 26 | cpumask_set_cpu(cpu, irqmsk); |
| 27 | cpus_per_vec--; |
| 28 | |
| 29 | /* If the cpu has siblings, use them first */ |
| 30 | siblmsk = topology_sibling_cpumask(cpu); |
| 31 | for (sibl = -1; cpus_per_vec > 0; ) { |
| 32 | sibl = cpumask_next(sibl, siblmsk); |
| 33 | if (sibl >= nr_cpu_ids) |
| 34 | break; |
| 35 | if (!cpumask_test_and_clear_cpu(sibl, nmsk)) |
| 36 | continue; |
| 37 | cpumask_set_cpu(sibl, irqmsk); |
| 38 | cpus_per_vec--; |
| 39 | } |
| 40 | } |
| 41 | } |
| 42 | |
Ming Lei | 47778f33 | 2018-03-08 18:53:55 +0800 | [diff] [blame] | 43 | static cpumask_var_t *alloc_node_to_cpumask(void) |
Christoph Hellwig | 9a0ef98 | 2017-06-20 01:37:55 +0200 | [diff] [blame] | 44 | { |
| 45 | cpumask_var_t *masks; |
| 46 | int node; |
| 47 | |
| 48 | masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL); |
| 49 | if (!masks) |
| 50 | return NULL; |
| 51 | |
| 52 | for (node = 0; node < nr_node_ids; node++) { |
| 53 | if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL)) |
| 54 | goto out_unwind; |
| 55 | } |
| 56 | |
| 57 | return masks; |
| 58 | |
| 59 | out_unwind: |
| 60 | while (--node >= 0) |
| 61 | free_cpumask_var(masks[node]); |
| 62 | kfree(masks); |
| 63 | return NULL; |
| 64 | } |
| 65 | |
Ming Lei | 47778f33 | 2018-03-08 18:53:55 +0800 | [diff] [blame] | 66 | static void free_node_to_cpumask(cpumask_var_t *masks) |
Christoph Hellwig | 9a0ef98 | 2017-06-20 01:37:55 +0200 | [diff] [blame] | 67 | { |
| 68 | int node; |
| 69 | |
| 70 | for (node = 0; node < nr_node_ids; node++) |
| 71 | free_cpumask_var(masks[node]); |
| 72 | kfree(masks); |
| 73 | } |
| 74 | |
Ming Lei | 47778f33 | 2018-03-08 18:53:55 +0800 | [diff] [blame] | 75 | static void build_node_to_cpumask(cpumask_var_t *masks) |
Christoph Hellwig | 9a0ef98 | 2017-06-20 01:37:55 +0200 | [diff] [blame] | 76 | { |
| 77 | int cpu; |
| 78 | |
Christoph Hellwig | 84676c1 | 2018-01-12 10:53:05 +0800 | [diff] [blame] | 79 | for_each_possible_cpu(cpu) |
Christoph Hellwig | 9a0ef98 | 2017-06-20 01:37:55 +0200 | [diff] [blame] | 80 | cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); |
| 81 | } |
| 82 | |
Ming Lei | 47778f33 | 2018-03-08 18:53:55 +0800 | [diff] [blame] | 83 | static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, |
Christoph Hellwig | 9a0ef98 | 2017-06-20 01:37:55 +0200 | [diff] [blame] | 84 | const struct cpumask *mask, nodemask_t *nodemsk) |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 85 | { |
Guilherme G. Piccoli | c0af524 | 2016-12-14 16:01:12 -0200 | [diff] [blame] | 86 | int n, nodes = 0; |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 87 | |
| 88 | /* Calculate the number of nodes in the supplied affinity mask */ |
Christoph Hellwig | 9a0ef98 | 2017-06-20 01:37:55 +0200 | [diff] [blame] | 89 | for_each_node(n) { |
Ming Lei | 47778f33 | 2018-03-08 18:53:55 +0800 | [diff] [blame] | 90 | if (cpumask_intersects(mask, node_to_cpumask[n])) { |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 91 | node_set(n, *nodemsk); |
| 92 | nodes++; |
| 93 | } |
| 94 | } |
| 95 | return nodes; |
| 96 | } |
| 97 | |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 98 | struct node_vectors { |
| 99 | unsigned id; |
| 100 | |
| 101 | union { |
| 102 | unsigned nvectors; |
| 103 | unsigned ncpus; |
| 104 | }; |
| 105 | }; |
| 106 | |
| 107 | static int ncpus_cmp_func(const void *l, const void *r) |
| 108 | { |
| 109 | const struct node_vectors *ln = l; |
| 110 | const struct node_vectors *rn = r; |
| 111 | |
| 112 | return ln->ncpus - rn->ncpus; |
| 113 | } |
| 114 | |
| 115 | /* |
| 116 | * Allocate vector number for each node, so that for each node: |
| 117 | * |
| 118 | * 1) the allocated number is >= 1 |
| 119 | * |
| 120 | * 2) the allocated numbver is <= active CPU number of this node |
| 121 | * |
| 122 | * The actual allocated total vectors may be less than @numvecs when |
| 123 | * active total CPU number is less than @numvecs. |
| 124 | * |
| 125 | * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]' |
| 126 | * for each node. |
| 127 | */ |
| 128 | static void alloc_nodes_vectors(unsigned int numvecs, |
Ming Lei | 101f85b | 2019-08-28 16:58:15 +0800 | [diff] [blame] | 129 | cpumask_var_t *node_to_cpumask, |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 130 | const struct cpumask *cpu_mask, |
| 131 | const nodemask_t nodemsk, |
| 132 | struct cpumask *nmsk, |
| 133 | struct node_vectors *node_vectors) |
| 134 | { |
| 135 | unsigned n, remaining_ncpus = 0; |
| 136 | |
| 137 | for (n = 0; n < nr_node_ids; n++) { |
| 138 | node_vectors[n].id = n; |
| 139 | node_vectors[n].ncpus = UINT_MAX; |
| 140 | } |
| 141 | |
| 142 | for_each_node_mask(n, nodemsk) { |
| 143 | unsigned ncpus; |
| 144 | |
| 145 | cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); |
| 146 | ncpus = cpumask_weight(nmsk); |
| 147 | |
| 148 | if (!ncpus) |
| 149 | continue; |
| 150 | remaining_ncpus += ncpus; |
| 151 | node_vectors[n].ncpus = ncpus; |
| 152 | } |
| 153 | |
| 154 | numvecs = min_t(unsigned, remaining_ncpus, numvecs); |
| 155 | |
| 156 | sort(node_vectors, nr_node_ids, sizeof(node_vectors[0]), |
| 157 | ncpus_cmp_func, NULL); |
| 158 | |
| 159 | /* |
| 160 | * Allocate vectors for each node according to the ratio of this |
| 161 | * node's nr_cpus to remaining un-assigned ncpus. 'numvecs' is |
| 162 | * bigger than number of active numa nodes. Always start the |
| 163 | * allocation from the node with minimized nr_cpus. |
| 164 | * |
| 165 | * This way guarantees that each active node gets allocated at |
| 166 | * least one vector, and the theory is simple: over-allocation |
| 167 | * is only done when this node is assigned by one vector, so |
| 168 | * other nodes will be allocated >= 1 vector, since 'numvecs' is |
| 169 | * bigger than number of numa nodes. |
| 170 | * |
| 171 | * One perfect invariant is that number of allocated vectors for |
| 172 | * each node is <= CPU count of this node: |
| 173 | * |
| 174 | * 1) suppose there are two nodes: A and B |
| 175 | * ncpu(X) is CPU count of node X |
| 176 | * vecs(X) is the vector count allocated to node X via this |
| 177 | * algorithm |
| 178 | * |
| 179 | * ncpu(A) <= ncpu(B) |
| 180 | * ncpu(A) + ncpu(B) = N |
| 181 | * vecs(A) + vecs(B) = V |
| 182 | * |
| 183 | * vecs(A) = max(1, round_down(V * ncpu(A) / N)) |
| 184 | * vecs(B) = V - vecs(A) |
| 185 | * |
| 186 | * both N and V are integer, and 2 <= V <= N, suppose |
| 187 | * V = N - delta, and 0 <= delta <= N - 2 |
| 188 | * |
| 189 | * 2) obviously vecs(A) <= ncpu(A) because: |
| 190 | * |
| 191 | * if vecs(A) is 1, then vecs(A) <= ncpu(A) given |
| 192 | * ncpu(A) >= 1 |
| 193 | * |
| 194 | * otherwise, |
| 195 | * vecs(A) <= V * ncpu(A) / N <= ncpu(A), given V <= N |
| 196 | * |
| 197 | * 3) prove how vecs(B) <= ncpu(B): |
| 198 | * |
| 199 | * if round_down(V * ncpu(A) / N) == 0, vecs(B) won't be |
| 200 | * over-allocated, so vecs(B) <= ncpu(B), |
| 201 | * |
| 202 | * otherwise: |
| 203 | * |
| 204 | * vecs(A) = |
| 205 | * round_down(V * ncpu(A) / N) = |
| 206 | * round_down((N - delta) * ncpu(A) / N) = |
| 207 | * round_down((N * ncpu(A) - delta * ncpu(A)) / N) >= |
| 208 | * round_down((N * ncpu(A) - delta * N) / N) = |
| 209 | * cpu(A) - delta |
| 210 | * |
| 211 | * then: |
| 212 | * |
| 213 | * vecs(A) - V >= ncpu(A) - delta - V |
| 214 | * => |
| 215 | * V - vecs(A) <= V + delta - ncpu(A) |
| 216 | * => |
| 217 | * vecs(B) <= N - ncpu(A) |
| 218 | * => |
| 219 | * vecs(B) <= cpu(B) |
| 220 | * |
| 221 | * For nodes >= 3, it can be thought as one node and another big |
| 222 | * node given that is exactly what this algorithm is implemented, |
| 223 | * and we always re-calculate 'remaining_ncpus' & 'numvecs', and |
| 224 | * finally for each node X: vecs(X) <= ncpu(X). |
| 225 | * |
| 226 | */ |
| 227 | for (n = 0; n < nr_node_ids; n++) { |
| 228 | unsigned nvectors, ncpus; |
| 229 | |
| 230 | if (node_vectors[n].ncpus == UINT_MAX) |
| 231 | continue; |
| 232 | |
| 233 | WARN_ON_ONCE(numvecs == 0); |
| 234 | |
| 235 | ncpus = node_vectors[n].ncpus; |
| 236 | nvectors = max_t(unsigned, 1, |
| 237 | numvecs * ncpus / remaining_ncpus); |
| 238 | WARN_ON_ONCE(nvectors > ncpus); |
| 239 | |
| 240 | node_vectors[n].nvectors = nvectors; |
| 241 | |
| 242 | remaining_ncpus -= ncpus; |
| 243 | numvecs -= nvectors; |
| 244 | } |
| 245 | } |
| 246 | |
Minwoo Im | 0e51833 | 2019-06-02 20:21:17 +0900 | [diff] [blame] | 247 | static int __irq_build_affinity_masks(unsigned int startvec, |
Thomas Gleixner | 0145c30 | 2019-02-16 18:13:07 +0100 | [diff] [blame] | 248 | unsigned int numvecs, |
| 249 | unsigned int firstvec, |
Thomas Gleixner | c2899c3 | 2018-12-18 16:06:53 +0100 | [diff] [blame] | 250 | cpumask_var_t *node_to_cpumask, |
| 251 | const struct cpumask *cpu_mask, |
| 252 | struct cpumask *nmsk, |
Dou Liyang | bec0403 | 2018-12-04 23:51:20 +0800 | [diff] [blame] | 253 | struct irq_affinity_desc *masks) |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 254 | { |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 255 | unsigned int i, n, nodes, cpus_per_vec, extra_vecs, done = 0; |
Thomas Gleixner | 0145c30 | 2019-02-16 18:13:07 +0100 | [diff] [blame] | 256 | unsigned int last_affv = firstvec + numvecs; |
| 257 | unsigned int curvec = startvec; |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 258 | nodemask_t nodemsk = NODE_MASK_NONE; |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 259 | struct node_vectors *node_vectors; |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 260 | |
Ming Lei | d305681 | 2018-03-08 18:53:58 +0800 | [diff] [blame] | 261 | if (!cpumask_weight(cpu_mask)) |
| 262 | return 0; |
| 263 | |
Ming Lei | b3e6aaa | 2018-03-08 18:53:56 +0800 | [diff] [blame] | 264 | nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk); |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 265 | |
| 266 | /* |
Guilherme G. Piccoli | c0af524 | 2016-12-14 16:01:12 -0200 | [diff] [blame] | 267 | * If the number of nodes in the mask is greater than or equal the |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 268 | * number of vectors we just spread the vectors across the nodes. |
| 269 | */ |
Ming Lei | 1a2d091 | 2018-03-08 18:53:57 +0800 | [diff] [blame] | 270 | if (numvecs <= nodes) { |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 271 | for_each_node_mask(n, nodemsk) { |
Thomas Gleixner | 0145c30 | 2019-02-16 18:13:07 +0100 | [diff] [blame] | 272 | cpumask_or(&masks[curvec].mask, &masks[curvec].mask, |
| 273 | node_to_cpumask[n]); |
Ming Lei | 1a2d091 | 2018-03-08 18:53:57 +0800 | [diff] [blame] | 274 | if (++curvec == last_affv) |
Ming Lei | 060746d | 2018-11-02 22:59:50 +0800 | [diff] [blame] | 275 | curvec = firstvec; |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 276 | } |
Thomas Gleixner | 0145c30 | 2019-02-16 18:13:07 +0100 | [diff] [blame] | 277 | return numvecs; |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 278 | } |
| 279 | |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 280 | node_vectors = kcalloc(nr_node_ids, |
| 281 | sizeof(struct node_vectors), |
| 282 | GFP_KERNEL); |
| 283 | if (!node_vectors) |
| 284 | return -ENOMEM; |
| 285 | |
| 286 | /* allocate vector number for each node */ |
| 287 | alloc_nodes_vectors(numvecs, node_to_cpumask, cpu_mask, |
| 288 | nodemsk, nmsk, node_vectors); |
| 289 | |
| 290 | for (i = 0; i < nr_node_ids; i++) { |
| 291 | unsigned int ncpus, v; |
| 292 | struct node_vectors *nv = &node_vectors[i]; |
| 293 | |
| 294 | if (nv->nvectors == UINT_MAX) |
| 295 | continue; |
Keith Busch | 7bf8222 | 2017-04-03 15:25:53 -0400 | [diff] [blame] | 296 | |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 297 | /* Get the cpus on this node which are in the mask */ |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 298 | cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]); |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 299 | ncpus = cpumask_weight(nmsk); |
Ming Lei | 53c1788 | 2019-08-16 10:28:48 +0800 | [diff] [blame] | 300 | if (!ncpus) |
| 301 | continue; |
| 302 | |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 303 | WARN_ON_ONCE(nv->nvectors > ncpus); |
Keith Busch | 7bf8222 | 2017-04-03 15:25:53 -0400 | [diff] [blame] | 304 | |
| 305 | /* Account for rounding errors */ |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 306 | extra_vecs = ncpus - nv->nvectors * (ncpus / nv->nvectors); |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 307 | |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 308 | /* Spread allocated vectors on CPUs of the current node */ |
| 309 | for (v = 0; v < nv->nvectors; v++, curvec++) { |
| 310 | cpus_per_vec = ncpus / nv->nvectors; |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 311 | |
| 312 | /* Account for extra vectors to compensate rounding errors */ |
| 313 | if (extra_vecs) { |
| 314 | cpus_per_vec++; |
Keith Busch | 7bf8222 | 2017-04-03 15:25:53 -0400 | [diff] [blame] | 315 | --extra_vecs; |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 316 | } |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 317 | |
| 318 | /* |
| 319 | * wrapping has to be considered given 'startvec' |
| 320 | * may start anywhere |
| 321 | */ |
| 322 | if (curvec >= last_affv) |
| 323 | curvec = firstvec; |
Dou Liyang | bec0403 | 2018-12-04 23:51:20 +0800 | [diff] [blame] | 324 | irq_spread_init_one(&masks[curvec].mask, nmsk, |
| 325 | cpus_per_vec); |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 326 | } |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 327 | done += nv->nvectors; |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 328 | } |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 329 | kfree(node_vectors); |
| 330 | return done; |
Ming Lei | b3e6aaa | 2018-03-08 18:53:56 +0800 | [diff] [blame] | 331 | } |
| 332 | |
Ming Lei | 5c903e10 | 2018-11-02 22:59:49 +0800 | [diff] [blame] | 333 | /* |
| 334 | * build affinity in two stages: |
| 335 | * 1) spread present CPU on these vectors |
| 336 | * 2) spread other possible CPUs on these vectors |
| 337 | */ |
Minwoo Im | 0e51833 | 2019-06-02 20:21:17 +0900 | [diff] [blame] | 338 | static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs, |
Thomas Gleixner | 0145c30 | 2019-02-16 18:13:07 +0100 | [diff] [blame] | 339 | unsigned int firstvec, |
Dou Liyang | bec0403 | 2018-12-04 23:51:20 +0800 | [diff] [blame] | 340 | struct irq_affinity_desc *masks) |
Ming Lei | 5c903e10 | 2018-11-02 22:59:49 +0800 | [diff] [blame] | 341 | { |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 342 | unsigned int curvec = startvec, nr_present = 0, nr_others = 0; |
Ming Lei | 347253c | 2019-01-25 17:53:43 +0800 | [diff] [blame] | 343 | cpumask_var_t *node_to_cpumask; |
Thomas Gleixner | 0145c30 | 2019-02-16 18:13:07 +0100 | [diff] [blame] | 344 | cpumask_var_t nmsk, npresmsk; |
| 345 | int ret = -ENOMEM; |
Ming Lei | 5c903e10 | 2018-11-02 22:59:49 +0800 | [diff] [blame] | 346 | |
| 347 | if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) |
Thomas Gleixner | c2899c3 | 2018-12-18 16:06:53 +0100 | [diff] [blame] | 348 | return ret; |
Ming Lei | 5c903e10 | 2018-11-02 22:59:49 +0800 | [diff] [blame] | 349 | |
| 350 | if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) |
Ming Lei | 347253c | 2019-01-25 17:53:43 +0800 | [diff] [blame] | 351 | goto fail_nmsk; |
| 352 | |
| 353 | node_to_cpumask = alloc_node_to_cpumask(); |
| 354 | if (!node_to_cpumask) |
| 355 | goto fail_npresmsk; |
Ming Lei | 5c903e10 | 2018-11-02 22:59:49 +0800 | [diff] [blame] | 356 | |
| 357 | /* Stabilize the cpumasks */ |
| 358 | get_online_cpus(); |
| 359 | build_node_to_cpumask(node_to_cpumask); |
| 360 | |
| 361 | /* Spread on present CPUs starting from affd->pre_vectors */ |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 362 | ret = __irq_build_affinity_masks(curvec, numvecs, firstvec, |
| 363 | node_to_cpumask, cpu_present_mask, |
| 364 | nmsk, masks); |
| 365 | if (ret < 0) |
| 366 | goto fail_build_affinity; |
| 367 | nr_present = ret; |
Ming Lei | 5c903e10 | 2018-11-02 22:59:49 +0800 | [diff] [blame] | 368 | |
| 369 | /* |
| 370 | * Spread on non present CPUs starting from the next vector to be |
| 371 | * handled. If the spreading of present CPUs already exhausted the |
| 372 | * vector space, assign the non present CPUs to the already spread |
| 373 | * out vectors. |
| 374 | */ |
Jens Axboe | 6da4b3a | 2018-11-02 22:59:51 +0800 | [diff] [blame] | 375 | if (nr_present >= numvecs) |
| 376 | curvec = firstvec; |
Ming Lei | 5c903e10 | 2018-11-02 22:59:49 +0800 | [diff] [blame] | 377 | else |
Jens Axboe | 6da4b3a | 2018-11-02 22:59:51 +0800 | [diff] [blame] | 378 | curvec = firstvec + nr_present; |
Ming Lei | 5c903e10 | 2018-11-02 22:59:49 +0800 | [diff] [blame] | 379 | cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 380 | ret = __irq_build_affinity_masks(curvec, numvecs, firstvec, |
| 381 | node_to_cpumask, npresmsk, nmsk, |
| 382 | masks); |
| 383 | if (ret >= 0) |
| 384 | nr_others = ret; |
| 385 | |
| 386 | fail_build_affinity: |
Ming Lei | 5c903e10 | 2018-11-02 22:59:49 +0800 | [diff] [blame] | 387 | put_online_cpus(); |
| 388 | |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 389 | if (ret >= 0) |
Thomas Gleixner | c2899c3 | 2018-12-18 16:06:53 +0100 | [diff] [blame] | 390 | WARN_ON(nr_present + nr_others < numvecs); |
Jens Axboe | 6da4b3a | 2018-11-02 22:59:51 +0800 | [diff] [blame] | 391 | |
Ming Lei | 347253c | 2019-01-25 17:53:43 +0800 | [diff] [blame] | 392 | free_node_to_cpumask(node_to_cpumask); |
| 393 | |
| 394 | fail_npresmsk: |
Ming Lei | 5c903e10 | 2018-11-02 22:59:49 +0800 | [diff] [blame] | 395 | free_cpumask_var(npresmsk); |
| 396 | |
Ming Lei | 347253c | 2019-01-25 17:53:43 +0800 | [diff] [blame] | 397 | fail_nmsk: |
Ming Lei | 5c903e10 | 2018-11-02 22:59:49 +0800 | [diff] [blame] | 398 | free_cpumask_var(nmsk); |
Ming Lei | b1a5a73 | 2019-08-16 10:28:49 +0800 | [diff] [blame] | 399 | return ret < 0 ? ret : 0; |
Ming Lei | 5c903e10 | 2018-11-02 22:59:49 +0800 | [diff] [blame] | 400 | } |
| 401 | |
Ming Lei | c66d4bd | 2019-02-16 18:13:09 +0100 | [diff] [blame] | 402 | static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs) |
| 403 | { |
| 404 | affd->nr_sets = 1; |
| 405 | affd->set_size[0] = affvecs; |
| 406 | } |
| 407 | |
Ming Lei | b3e6aaa | 2018-03-08 18:53:56 +0800 | [diff] [blame] | 408 | /** |
| 409 | * irq_create_affinity_masks - Create affinity masks for multiqueue spreading |
| 410 | * @nvecs: The total number of vectors |
| 411 | * @affd: Description of the affinity requirements |
| 412 | * |
Dou Liyang | bec0403 | 2018-12-04 23:51:20 +0800 | [diff] [blame] | 413 | * Returns the irq_affinity_desc pointer or NULL if allocation failed. |
Ming Lei | b3e6aaa | 2018-03-08 18:53:56 +0800 | [diff] [blame] | 414 | */ |
Dou Liyang | bec0403 | 2018-12-04 23:51:20 +0800 | [diff] [blame] | 415 | struct irq_affinity_desc * |
Ming Lei | 9cfef55 | 2019-02-16 18:13:08 +0100 | [diff] [blame] | 416 | irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) |
Ming Lei | b3e6aaa | 2018-03-08 18:53:56 +0800 | [diff] [blame] | 417 | { |
Ming Lei | c66d4bd | 2019-02-16 18:13:09 +0100 | [diff] [blame] | 418 | unsigned int affvecs, curvec, usedvecs, i; |
Dou Liyang | bec0403 | 2018-12-04 23:51:20 +0800 | [diff] [blame] | 419 | struct irq_affinity_desc *masks = NULL; |
Ming Lei | b3e6aaa | 2018-03-08 18:53:56 +0800 | [diff] [blame] | 420 | |
| 421 | /* |
Ming Lei | c66d4bd | 2019-02-16 18:13:09 +0100 | [diff] [blame] | 422 | * Determine the number of vectors which need interrupt affinities |
| 423 | * assigned. If the pre/post request exhausts the available vectors |
| 424 | * then nothing to do here except for invoking the calc_sets() |
Ming Lei | 491beed | 2019-08-05 09:19:06 +0800 | [diff] [blame] | 425 | * callback so the device driver can adjust to the situation. |
Ming Lei | b3e6aaa | 2018-03-08 18:53:56 +0800 | [diff] [blame] | 426 | */ |
Ming Lei | 491beed | 2019-08-05 09:19:06 +0800 | [diff] [blame] | 427 | if (nvecs > affd->pre_vectors + affd->post_vectors) |
Ming Lei | c66d4bd | 2019-02-16 18:13:09 +0100 | [diff] [blame] | 428 | affvecs = nvecs - affd->pre_vectors - affd->post_vectors; |
| 429 | else |
| 430 | affvecs = 0; |
| 431 | |
| 432 | /* |
| 433 | * Simple invocations do not provide a calc_sets() callback. Install |
Thomas Gleixner | a6a309e | 2019-02-16 18:13:11 +0100 | [diff] [blame] | 434 | * the generic one. |
Ming Lei | c66d4bd | 2019-02-16 18:13:09 +0100 | [diff] [blame] | 435 | */ |
Thomas Gleixner | a6a309e | 2019-02-16 18:13:11 +0100 | [diff] [blame] | 436 | if (!affd->calc_sets) |
Ming Lei | c66d4bd | 2019-02-16 18:13:09 +0100 | [diff] [blame] | 437 | affd->calc_sets = default_calc_sets; |
| 438 | |
Thomas Gleixner | a6a309e | 2019-02-16 18:13:11 +0100 | [diff] [blame] | 439 | /* Recalculate the sets */ |
| 440 | affd->calc_sets(affd, affvecs); |
Ming Lei | b3e6aaa | 2018-03-08 18:53:56 +0800 | [diff] [blame] | 441 | |
Ming Lei | 9cfef55 | 2019-02-16 18:13:08 +0100 | [diff] [blame] | 442 | if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS)) |
| 443 | return NULL; |
| 444 | |
Ming Lei | c66d4bd | 2019-02-16 18:13:09 +0100 | [diff] [blame] | 445 | /* Nothing to assign? */ |
| 446 | if (!affvecs) |
| 447 | return NULL; |
| 448 | |
Ming Lei | b3e6aaa | 2018-03-08 18:53:56 +0800 | [diff] [blame] | 449 | masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); |
| 450 | if (!masks) |
Ming Lei | 347253c | 2019-01-25 17:53:43 +0800 | [diff] [blame] | 451 | return NULL; |
Ming Lei | b3e6aaa | 2018-03-08 18:53:56 +0800 | [diff] [blame] | 452 | |
| 453 | /* Fill out vectors at the beginning that don't need affinity */ |
| 454 | for (curvec = 0; curvec < affd->pre_vectors; curvec++) |
Dou Liyang | bec0403 | 2018-12-04 23:51:20 +0800 | [diff] [blame] | 455 | cpumask_copy(&masks[curvec].mask, irq_default_affinity); |
Ming Lei | c66d4bd | 2019-02-16 18:13:09 +0100 | [diff] [blame] | 456 | |
Jens Axboe | 6da4b3a | 2018-11-02 22:59:51 +0800 | [diff] [blame] | 457 | /* |
| 458 | * Spread on present CPUs starting from affd->pre_vectors. If we |
| 459 | * have multiple sets, build each sets affinity mask separately. |
| 460 | */ |
Ming Lei | c66d4bd | 2019-02-16 18:13:09 +0100 | [diff] [blame] | 461 | for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) { |
| 462 | unsigned int this_vecs = affd->set_size[i]; |
Jens Axboe | 6da4b3a | 2018-11-02 22:59:51 +0800 | [diff] [blame] | 463 | int ret; |
| 464 | |
Minwoo Im | 0e51833 | 2019-06-02 20:21:17 +0900 | [diff] [blame] | 465 | ret = irq_build_affinity_masks(curvec, this_vecs, |
Thomas Gleixner | 0145c30 | 2019-02-16 18:13:07 +0100 | [diff] [blame] | 466 | curvec, masks); |
Jens Axboe | 6da4b3a | 2018-11-02 22:59:51 +0800 | [diff] [blame] | 467 | if (ret) { |
Thomas Gleixner | c2899c3 | 2018-12-18 16:06:53 +0100 | [diff] [blame] | 468 | kfree(masks); |
Ming Lei | 347253c | 2019-01-25 17:53:43 +0800 | [diff] [blame] | 469 | return NULL; |
Jens Axboe | 6da4b3a | 2018-11-02 22:59:51 +0800 | [diff] [blame] | 470 | } |
| 471 | curvec += this_vecs; |
| 472 | usedvecs += this_vecs; |
| 473 | } |
Christoph Hellwig | 67c93c2 | 2016-11-08 17:15:03 -0800 | [diff] [blame] | 474 | |
| 475 | /* Fill out vectors at the end that don't need affinity */ |
Ming Lei | d305681 | 2018-03-08 18:53:58 +0800 | [diff] [blame] | 476 | if (usedvecs >= affvecs) |
| 477 | curvec = affd->pre_vectors + affvecs; |
| 478 | else |
| 479 | curvec = affd->pre_vectors + usedvecs; |
Christoph Hellwig | 67c93c2 | 2016-11-08 17:15:03 -0800 | [diff] [blame] | 480 | for (; curvec < nvecs; curvec++) |
Dou Liyang | bec0403 | 2018-12-04 23:51:20 +0800 | [diff] [blame] | 481 | cpumask_copy(&masks[curvec].mask, irq_default_affinity); |
Ming Lei | d305681 | 2018-03-08 18:53:58 +0800 | [diff] [blame] | 482 | |
Dou Liyang | c410abbb | 2018-12-04 23:51:21 +0800 | [diff] [blame] | 483 | /* Mark the managed interrupts */ |
| 484 | for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++) |
| 485 | masks[i].is_managed = 1; |
| 486 | |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 487 | return masks; |
| 488 | } |
| 489 | |
| 490 | /** |
Christoph Hellwig | 212bd84 | 2016-11-08 17:15:02 -0800 | [diff] [blame] | 491 | * irq_calc_affinity_vectors - Calculate the optimal number of vectors |
Michael Hernandez | 6f9a22b | 2017-05-18 10:47:47 -0700 | [diff] [blame] | 492 | * @minvec: The minimum number of vectors available |
Christoph Hellwig | 212bd84 | 2016-11-08 17:15:02 -0800 | [diff] [blame] | 493 | * @maxvec: The maximum number of vectors available |
| 494 | * @affd: Description of the affinity requirements |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 495 | */ |
Thomas Gleixner | 0145c30 | 2019-02-16 18:13:07 +0100 | [diff] [blame] | 496 | unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, |
| 497 | const struct irq_affinity *affd) |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 498 | { |
Thomas Gleixner | 0145c30 | 2019-02-16 18:13:07 +0100 | [diff] [blame] | 499 | unsigned int resv = affd->pre_vectors + affd->post_vectors; |
| 500 | unsigned int set_vecs; |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 501 | |
Michael Hernandez | 6f9a22b | 2017-05-18 10:47:47 -0700 | [diff] [blame] | 502 | if (resv > minvec) |
| 503 | return 0; |
| 504 | |
Ming Lei | c66d4bd | 2019-02-16 18:13:09 +0100 | [diff] [blame] | 505 | if (affd->calc_sets) { |
| 506 | set_vecs = maxvec - resv; |
Jens Axboe | 6da4b3a | 2018-11-02 22:59:51 +0800 | [diff] [blame] | 507 | } else { |
| 508 | get_online_cpus(); |
| 509 | set_vecs = cpumask_weight(cpu_possible_mask); |
| 510 | put_online_cpus(); |
| 511 | } |
| 512 | |
Thomas Gleixner | 0145c30 | 2019-02-16 18:13:07 +0100 | [diff] [blame] | 513 | return resv + min(set_vecs, maxvec - resv); |
Thomas Gleixner | 34c3d98 | 2016-09-14 16:18:48 +0200 | [diff] [blame] | 514 | } |