Thomas Gleixner | d2912cb | 2019-06-04 10:11:33 +0200 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 2 | /* |
| 3 | * cpu_rmap.c: CPU affinity reverse-map support |
| 4 | * Copyright 2011 Solarflare Communications Inc. |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 5 | */ |
| 6 | |
| 7 | #include <linux/cpu_rmap.h> |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 8 | #include <linux/interrupt.h> |
Paul Gortmaker | 8bc3bcc | 2011-11-16 21:29:17 -0500 | [diff] [blame] | 9 | #include <linux/export.h> |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 10 | |
| 11 | /* |
| 12 | * These functions maintain a mapping from CPUs to some ordered set of |
| 13 | * objects with CPU affinities. This can be seen as a reverse-map of |
| 14 | * CPU affinity. However, we do not assume that the object affinities |
| 15 | * cover all CPUs in the system. For those CPUs not directly covered |
| 16 | * by object affinities, we attempt to find a nearest object based on |
| 17 | * CPU topology. |
| 18 | */ |
| 19 | |
| 20 | /** |
| 21 | * alloc_cpu_rmap - allocate CPU affinity reverse-map |
| 22 | * @size: Number of objects to be mapped |
| 23 | * @flags: Allocation flags e.g. %GFP_KERNEL |
| 24 | */ |
| 25 | struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) |
| 26 | { |
| 27 | struct cpu_rmap *rmap; |
| 28 | unsigned int cpu; |
| 29 | size_t obj_offset; |
| 30 | |
| 31 | /* This is a silly number of objects, and we use u16 indices. */ |
| 32 | if (size > 0xffff) |
| 33 | return NULL; |
| 34 | |
| 35 | /* Offset of object pointer array from base structure */ |
| 36 | obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]), |
| 37 | sizeof(void *)); |
| 38 | |
| 39 | rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags); |
| 40 | if (!rmap) |
| 41 | return NULL; |
| 42 | |
David Decotigny | 896f97e | 2013-01-11 14:31:36 -0800 | [diff] [blame] | 43 | kref_init(&rmap->refcount); |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 44 | rmap->obj = (void **)((char *)rmap + obj_offset); |
| 45 | |
| 46 | /* Initially assign CPUs to objects on a rota, since we have |
| 47 | * no idea where the objects are. Use infinite distance, so |
| 48 | * any object with known distance is preferable. Include the |
| 49 | * CPUs that are not present/online, since we definitely want |
| 50 | * any newly-hotplugged CPUs to have some object assigned. |
| 51 | */ |
| 52 | for_each_possible_cpu(cpu) { |
| 53 | rmap->near[cpu].index = cpu % size; |
| 54 | rmap->near[cpu].dist = CPU_RMAP_DIST_INF; |
| 55 | } |
| 56 | |
| 57 | rmap->size = size; |
| 58 | return rmap; |
| 59 | } |
| 60 | EXPORT_SYMBOL(alloc_cpu_rmap); |
| 61 | |
David Decotigny | 896f97e | 2013-01-11 14:31:36 -0800 | [diff] [blame] | 62 | /** |
| 63 | * cpu_rmap_release - internal reclaiming helper called from kref_put |
| 64 | * @ref: kref to struct cpu_rmap |
| 65 | */ |
| 66 | static void cpu_rmap_release(struct kref *ref) |
| 67 | { |
| 68 | struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount); |
| 69 | kfree(rmap); |
| 70 | } |
| 71 | |
| 72 | /** |
| 73 | * cpu_rmap_get - internal helper to get new ref on a cpu_rmap |
| 74 | * @rmap: reverse-map allocated with alloc_cpu_rmap() |
| 75 | */ |
| 76 | static inline void cpu_rmap_get(struct cpu_rmap *rmap) |
| 77 | { |
| 78 | kref_get(&rmap->refcount); |
| 79 | } |
| 80 | |
| 81 | /** |
| 82 | * cpu_rmap_put - release ref on a cpu_rmap |
| 83 | * @rmap: reverse-map allocated with alloc_cpu_rmap() |
| 84 | */ |
| 85 | int cpu_rmap_put(struct cpu_rmap *rmap) |
| 86 | { |
| 87 | return kref_put(&rmap->refcount, cpu_rmap_release); |
| 88 | } |
| 89 | EXPORT_SYMBOL(cpu_rmap_put); |
| 90 | |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 91 | /* Reevaluate nearest object for given CPU, comparing with the given |
| 92 | * neighbours at the given distance. |
| 93 | */ |
| 94 | static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu, |
| 95 | const struct cpumask *mask, u16 dist) |
| 96 | { |
| 97 | int neigh; |
| 98 | |
| 99 | for_each_cpu(neigh, mask) { |
| 100 | if (rmap->near[cpu].dist > dist && |
| 101 | rmap->near[neigh].dist <= dist) { |
| 102 | rmap->near[cpu].index = rmap->near[neigh].index; |
| 103 | rmap->near[cpu].dist = dist; |
| 104 | return true; |
| 105 | } |
| 106 | } |
| 107 | return false; |
| 108 | } |
| 109 | |
| 110 | #ifdef DEBUG |
| 111 | static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) |
| 112 | { |
| 113 | unsigned index; |
| 114 | unsigned int cpu; |
| 115 | |
| 116 | pr_info("cpu_rmap %p, %s:\n", rmap, prefix); |
| 117 | |
| 118 | for_each_possible_cpu(cpu) { |
| 119 | index = rmap->near[cpu].index; |
| 120 | pr_info("cpu %d -> obj %u (distance %u)\n", |
| 121 | cpu, index, rmap->near[cpu].dist); |
| 122 | } |
| 123 | } |
| 124 | #else |
| 125 | static inline void |
| 126 | debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix) |
| 127 | { |
| 128 | } |
| 129 | #endif |
| 130 | |
| 131 | /** |
| 132 | * cpu_rmap_add - add object to a rmap |
| 133 | * @rmap: CPU rmap allocated with alloc_cpu_rmap() |
| 134 | * @obj: Object to add to rmap |
| 135 | * |
| 136 | * Return index of object. |
| 137 | */ |
| 138 | int cpu_rmap_add(struct cpu_rmap *rmap, void *obj) |
| 139 | { |
| 140 | u16 index; |
| 141 | |
| 142 | BUG_ON(rmap->used >= rmap->size); |
| 143 | index = rmap->used++; |
| 144 | rmap->obj[index] = obj; |
| 145 | return index; |
| 146 | } |
| 147 | EXPORT_SYMBOL(cpu_rmap_add); |
| 148 | |
| 149 | /** |
| 150 | * cpu_rmap_update - update CPU rmap following a change of object affinity |
| 151 | * @rmap: CPU rmap to update |
| 152 | * @index: Index of object whose affinity changed |
| 153 | * @affinity: New CPU affinity of object |
| 154 | */ |
| 155 | int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, |
| 156 | const struct cpumask *affinity) |
| 157 | { |
| 158 | cpumask_var_t update_mask; |
| 159 | unsigned int cpu; |
| 160 | |
| 161 | if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL))) |
| 162 | return -ENOMEM; |
| 163 | |
| 164 | /* Invalidate distance for all CPUs for which this used to be |
| 165 | * the nearest object. Mark those CPUs for update. |
| 166 | */ |
| 167 | for_each_online_cpu(cpu) { |
| 168 | if (rmap->near[cpu].index == index) { |
| 169 | rmap->near[cpu].dist = CPU_RMAP_DIST_INF; |
| 170 | cpumask_set_cpu(cpu, update_mask); |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | debug_print_rmap(rmap, "after invalidating old distances"); |
| 175 | |
| 176 | /* Set distance to 0 for all CPUs in the new affinity mask. |
| 177 | * Mark all CPUs within their NUMA nodes for update. |
| 178 | */ |
| 179 | for_each_cpu(cpu, affinity) { |
| 180 | rmap->near[cpu].index = index; |
| 181 | rmap->near[cpu].dist = 0; |
| 182 | cpumask_or(update_mask, update_mask, |
| 183 | cpumask_of_node(cpu_to_node(cpu))); |
| 184 | } |
| 185 | |
| 186 | debug_print_rmap(rmap, "after updating neighbours"); |
| 187 | |
| 188 | /* Update distances based on topology */ |
| 189 | for_each_cpu(cpu, update_mask) { |
| 190 | if (cpu_rmap_copy_neigh(rmap, cpu, |
Bartosz Golaszewski | 06931e6 | 2015-05-26 15:11:28 +0200 | [diff] [blame] | 191 | topology_sibling_cpumask(cpu), 1)) |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 192 | continue; |
| 193 | if (cpu_rmap_copy_neigh(rmap, cpu, |
| 194 | topology_core_cpumask(cpu), 2)) |
| 195 | continue; |
| 196 | if (cpu_rmap_copy_neigh(rmap, cpu, |
| 197 | cpumask_of_node(cpu_to_node(cpu)), 3)) |
| 198 | continue; |
| 199 | /* We could continue into NUMA node distances, but for now |
| 200 | * we give up. |
| 201 | */ |
| 202 | } |
| 203 | |
| 204 | debug_print_rmap(rmap, "after copying neighbours"); |
| 205 | |
| 206 | free_cpumask_var(update_mask); |
| 207 | return 0; |
| 208 | } |
| 209 | EXPORT_SYMBOL(cpu_rmap_update); |
| 210 | |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 211 | /* Glue between IRQ affinity notifiers and CPU rmaps */ |
| 212 | |
| 213 | struct irq_glue { |
| 214 | struct irq_affinity_notify notify; |
| 215 | struct cpu_rmap *rmap; |
| 216 | u16 index; |
| 217 | }; |
| 218 | |
| 219 | /** |
| 220 | * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs |
| 221 | * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL |
| 222 | * |
David Decotigny | 896f97e | 2013-01-11 14:31:36 -0800 | [diff] [blame] | 223 | * Must be called in process context, before freeing the IRQs. |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 224 | */ |
| 225 | void free_irq_cpu_rmap(struct cpu_rmap *rmap) |
| 226 | { |
| 227 | struct irq_glue *glue; |
| 228 | u16 index; |
| 229 | |
| 230 | if (!rmap) |
| 231 | return; |
| 232 | |
| 233 | for (index = 0; index < rmap->used; index++) { |
| 234 | glue = rmap->obj[index]; |
| 235 | irq_set_affinity_notifier(glue->notify.irq, NULL); |
| 236 | } |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 237 | |
David Decotigny | 896f97e | 2013-01-11 14:31:36 -0800 | [diff] [blame] | 238 | cpu_rmap_put(rmap); |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 239 | } |
| 240 | EXPORT_SYMBOL(free_irq_cpu_rmap); |
| 241 | |
David Decotigny | 896f97e | 2013-01-11 14:31:36 -0800 | [diff] [blame] | 242 | /** |
| 243 | * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated |
| 244 | * @notify: struct irq_affinity_notify passed by irq/manage.c |
| 245 | * @mask: cpu mask for new SMP affinity |
| 246 | * |
| 247 | * This is executed in workqueue context. |
| 248 | */ |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 249 | static void |
| 250 | irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) |
| 251 | { |
| 252 | struct irq_glue *glue = |
| 253 | container_of(notify, struct irq_glue, notify); |
| 254 | int rc; |
| 255 | |
| 256 | rc = cpu_rmap_update(glue->rmap, glue->index, mask); |
| 257 | if (rc) |
Kefeng Wang | 256339d | 2019-10-18 11:18:44 +0800 | [diff] [blame] | 258 | pr_warn("irq_cpu_rmap_notify: update failed: %d\n", rc); |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 259 | } |
| 260 | |
David Decotigny | 896f97e | 2013-01-11 14:31:36 -0800 | [diff] [blame] | 261 | /** |
| 262 | * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem |
| 263 | * @ref: kref to struct irq_affinity_notify passed by irq/manage.c |
| 264 | */ |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 265 | static void irq_cpu_rmap_release(struct kref *ref) |
| 266 | { |
| 267 | struct irq_glue *glue = |
| 268 | container_of(ref, struct irq_glue, notify.kref); |
David Decotigny | 896f97e | 2013-01-11 14:31:36 -0800 | [diff] [blame] | 269 | |
| 270 | cpu_rmap_put(glue->rmap); |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 271 | kfree(glue); |
| 272 | } |
| 273 | |
| 274 | /** |
| 275 | * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map |
| 276 | * @rmap: The reverse-map |
| 277 | * @irq: The IRQ number |
| 278 | * |
| 279 | * This adds an IRQ affinity notifier that will update the reverse-map |
| 280 | * automatically. |
| 281 | * |
| 282 | * Must be called in process context, after the IRQ is allocated but |
| 283 | * before it is bound with request_irq(). |
| 284 | */ |
| 285 | int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) |
| 286 | { |
| 287 | struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL); |
| 288 | int rc; |
| 289 | |
| 290 | if (!glue) |
| 291 | return -ENOMEM; |
| 292 | glue->notify.notify = irq_cpu_rmap_notify; |
| 293 | glue->notify.release = irq_cpu_rmap_release; |
| 294 | glue->rmap = rmap; |
David Decotigny | 896f97e | 2013-01-11 14:31:36 -0800 | [diff] [blame] | 295 | cpu_rmap_get(rmap); |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 296 | glue->index = cpu_rmap_add(rmap, glue); |
| 297 | rc = irq_set_affinity_notifier(irq, &glue->notify); |
David Decotigny | 896f97e | 2013-01-11 14:31:36 -0800 | [diff] [blame] | 298 | if (rc) { |
| 299 | cpu_rmap_put(glue->rmap); |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 300 | kfree(glue); |
David Decotigny | 896f97e | 2013-01-11 14:31:36 -0800 | [diff] [blame] | 301 | } |
Ben Hutchings | c39649c | 2011-01-19 11:03:25 +0000 | [diff] [blame] | 302 | return rc; |
| 303 | } |
| 304 | EXPORT_SYMBOL(irq_cpu_rmap_add); |