Glauber Costa | 68a1c3f | 2008-03-03 14:12:42 -0300 | [diff] [blame] | 1 | #include <linux/init.h> |
| 2 | #include <linux/smp.h> |
Glauber Costa | a355352 | 2008-03-03 14:12:58 -0300 | [diff] [blame] | 3 | #include <linux/module.h> |
Glauber Costa | 70708a1 | 2008-03-03 14:13:03 -0300 | [diff] [blame] | 4 | #include <linux/sched.h> |
Glauber Costa | 69c18c1 | 2008-03-03 14:13:07 -0300 | [diff] [blame] | 5 | #include <linux/percpu.h> |
Glauber Costa | 91718e8 | 2008-03-03 14:13:12 -0300 | [diff] [blame] | 6 | #include <linux/bootmem.h> |
Glauber Costa | 69c18c1 | 2008-03-03 14:13:07 -0300 | [diff] [blame] | 7 | |
| 8 | #include <asm/nmi.h> |
| 9 | #include <asm/irq.h> |
| 10 | #include <asm/smp.h> |
| 11 | #include <asm/cpu.h> |
| 12 | #include <asm/numa.h> |
Glauber Costa | 68a1c3f | 2008-03-03 14:12:42 -0300 | [diff] [blame] | 13 | |
Glauber Costa | a355352 | 2008-03-03 14:12:58 -0300 | [diff] [blame] | 14 | /* Number of siblings per CPU package */ |
| 15 | int smp_num_siblings = 1; |
| 16 | EXPORT_SYMBOL(smp_num_siblings); |
| 17 | |
| 18 | /* Last level cache ID of each logical CPU */ |
| 19 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; |
| 20 | |
| 21 | /* bitmap of online cpus */ |
| 22 | cpumask_t cpu_online_map __read_mostly; |
| 23 | EXPORT_SYMBOL(cpu_online_map); |
| 24 | |
| 25 | cpumask_t cpu_callin_map; |
| 26 | cpumask_t cpu_callout_map; |
| 27 | cpumask_t cpu_possible_map; |
| 28 | EXPORT_SYMBOL(cpu_possible_map); |
| 29 | |
| 30 | /* representing HT siblings of each logical CPU */ |
| 31 | DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); |
| 32 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); |
| 33 | |
| 34 | /* representing HT and core siblings of each logical CPU */ |
| 35 | DEFINE_PER_CPU(cpumask_t, cpu_core_map); |
| 36 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); |
| 37 | |
| 38 | /* Per CPU bogomips and other parameters */ |
| 39 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |
| 40 | EXPORT_PER_CPU_SYMBOL(cpu_info); |
Glauber Costa | 768d950 | 2008-03-03 14:13:02 -0300 | [diff] [blame] | 41 | |
Glauber Costa | 91718e8 | 2008-03-03 14:13:12 -0300 | [diff] [blame] | 42 | /* ready for x86_64, no harm for x86, since it will overwrite after alloc */ |
| 43 | unsigned char *trampoline_base = __va(SMP_TRAMPOLINE_BASE); |
| 44 | |
Glauber Costa | 768d950 | 2008-03-03 14:13:02 -0300 | [diff] [blame] | 45 | /* representing cpus for which sibling maps can be computed */ |
| 46 | static cpumask_t cpu_sibling_setup_map; |
| 47 | |
Glauber de Oliveira Costa | 1d89a7f | 2008-03-19 14:25:05 -0300 | [diff] [blame] | 48 | /* Set if we find a B stepping CPU */ |
| 49 | int __cpuinitdata smp_b_stepping; |
Glauber de Oliveira Costa | 1d89a7f | 2008-03-19 14:25:05 -0300 | [diff] [blame] | 50 | |
| 51 | static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) |
| 52 | { |
| 53 | #ifdef CONFIG_X86_32 |
| 54 | /* |
| 55 | * Mask B, Pentium, but not Pentium MMX |
| 56 | */ |
| 57 | if (c->x86_vendor == X86_VENDOR_INTEL && |
| 58 | c->x86 == 5 && |
| 59 | c->x86_mask >= 1 && c->x86_mask <= 4 && |
| 60 | c->x86_model <= 3) |
| 61 | /* |
| 62 | * Remember we have B step Pentia with bugs |
| 63 | */ |
| 64 | smp_b_stepping = 1; |
| 65 | |
| 66 | /* |
| 67 | * Certain Athlons might work (for various values of 'work') in SMP |
| 68 | * but they are not certified as MP capable. |
| 69 | */ |
| 70 | if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { |
| 71 | |
| 72 | if (num_possible_cpus() == 1) |
| 73 | goto valid_k7; |
| 74 | |
| 75 | /* Athlon 660/661 is valid. */ |
| 76 | if ((c->x86_model == 6) && ((c->x86_mask == 0) || |
| 77 | (c->x86_mask == 1))) |
| 78 | goto valid_k7; |
| 79 | |
| 80 | /* Duron 670 is valid */ |
| 81 | if ((c->x86_model == 7) && (c->x86_mask == 0)) |
| 82 | goto valid_k7; |
| 83 | |
| 84 | /* |
| 85 | * Athlon 662, Duron 671, and Athlon >model 7 have capability |
| 86 | * bit. It's worth noting that the A5 stepping (662) of some |
| 87 | * Athlon XP's have the MP bit set. |
| 88 | * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for |
| 89 | * more. |
| 90 | */ |
| 91 | if (((c->x86_model == 6) && (c->x86_mask >= 2)) || |
| 92 | ((c->x86_model == 7) && (c->x86_mask >= 1)) || |
| 93 | (c->x86_model > 7)) |
| 94 | if (cpu_has_mp) |
| 95 | goto valid_k7; |
| 96 | |
| 97 | /* If we get here, not a certified SMP capable AMD system. */ |
| 98 | add_taint(TAINT_UNSAFE_SMP); |
| 99 | } |
| 100 | |
| 101 | valid_k7: |
| 102 | ; |
| 103 | #endif |
| 104 | } |
| 105 | |
Glauber de Oliveira Costa | 693d4b8 | 2008-03-19 14:25:28 -0300 | [diff] [blame^] | 106 | void smp_checks(void) |
| 107 | { |
| 108 | if (smp_b_stepping) |
| 109 | printk(KERN_WARNING "WARNING: SMP operation may be unreliable" |
| 110 | "with B stepping processors.\n"); |
| 111 | |
| 112 | /* |
| 113 | * Don't taint if we are running SMP kernel on a single non-MP |
| 114 | * approved Athlon |
| 115 | */ |
| 116 | if (tainted & TAINT_UNSAFE_SMP) { |
| 117 | if (cpus_weight(cpu_present_map)) |
| 118 | printk(KERN_INFO "WARNING: This combination of AMD" |
| 119 | "processors is not suitable for SMP.\n"); |
| 120 | else |
| 121 | tainted &= ~TAINT_UNSAFE_SMP; |
| 122 | } |
| 123 | } |
| 124 | |
Glauber de Oliveira Costa | 1d89a7f | 2008-03-19 14:25:05 -0300 | [diff] [blame] | 125 | /* |
| 126 | * The bootstrap kernel entry code has set these up. Save them for |
| 127 | * a given CPU |
| 128 | */ |
| 129 | |
| 130 | void __cpuinit smp_store_cpu_info(int id) |
| 131 | { |
| 132 | struct cpuinfo_x86 *c = &cpu_data(id); |
| 133 | |
| 134 | *c = boot_cpu_data; |
| 135 | c->cpu_index = id; |
| 136 | if (id != 0) |
| 137 | identify_secondary_cpu(c); |
| 138 | smp_apply_quirks(c); |
| 139 | } |
| 140 | |
| 141 | |
Glauber Costa | 768d950 | 2008-03-03 14:13:02 -0300 | [diff] [blame] | 142 | void __cpuinit set_cpu_sibling_map(int cpu) |
| 143 | { |
| 144 | int i; |
| 145 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 146 | |
| 147 | cpu_set(cpu, cpu_sibling_setup_map); |
| 148 | |
| 149 | if (smp_num_siblings > 1) { |
| 150 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
| 151 | if (c->phys_proc_id == cpu_data(i).phys_proc_id && |
| 152 | c->cpu_core_id == cpu_data(i).cpu_core_id) { |
| 153 | cpu_set(i, per_cpu(cpu_sibling_map, cpu)); |
| 154 | cpu_set(cpu, per_cpu(cpu_sibling_map, i)); |
| 155 | cpu_set(i, per_cpu(cpu_core_map, cpu)); |
| 156 | cpu_set(cpu, per_cpu(cpu_core_map, i)); |
| 157 | cpu_set(i, c->llc_shared_map); |
| 158 | cpu_set(cpu, cpu_data(i).llc_shared_map); |
| 159 | } |
| 160 | } |
| 161 | } else { |
| 162 | cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); |
| 163 | } |
| 164 | |
| 165 | cpu_set(cpu, c->llc_shared_map); |
| 166 | |
| 167 | if (current_cpu_data.x86_max_cores == 1) { |
| 168 | per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); |
| 169 | c->booted_cores = 1; |
| 170 | return; |
| 171 | } |
| 172 | |
| 173 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
| 174 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && |
| 175 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { |
| 176 | cpu_set(i, c->llc_shared_map); |
| 177 | cpu_set(cpu, cpu_data(i).llc_shared_map); |
| 178 | } |
| 179 | if (c->phys_proc_id == cpu_data(i).phys_proc_id) { |
| 180 | cpu_set(i, per_cpu(cpu_core_map, cpu)); |
| 181 | cpu_set(cpu, per_cpu(cpu_core_map, i)); |
| 182 | /* |
| 183 | * Does this new cpu bringup a new core? |
| 184 | */ |
| 185 | if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { |
| 186 | /* |
| 187 | * for each core in package, increment |
| 188 | * the booted_cores for this new cpu |
| 189 | */ |
| 190 | if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) |
| 191 | c->booted_cores++; |
| 192 | /* |
| 193 | * increment the core count for all |
| 194 | * the other cpus in this package |
| 195 | */ |
| 196 | if (i != cpu) |
| 197 | cpu_data(i).booted_cores++; |
| 198 | } else if (i != cpu && !c->booted_cores) |
| 199 | c->booted_cores = cpu_data(i).booted_cores; |
| 200 | } |
| 201 | } |
| 202 | } |
| 203 | |
Glauber Costa | 70708a1 | 2008-03-03 14:13:03 -0300 | [diff] [blame] | 204 | /* maps the cpu to the sched domain representing multi-core */ |
| 205 | cpumask_t cpu_coregroup_map(int cpu) |
| 206 | { |
| 207 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 208 | /* |
| 209 | * For perf, we return last level cache shared map. |
| 210 | * And for power savings, we return cpu_core_map |
| 211 | */ |
| 212 | if (sched_mc_power_savings || sched_smt_power_savings) |
| 213 | return per_cpu(cpu_core_map, cpu); |
| 214 | else |
| 215 | return c->llc_shared_map; |
| 216 | } |
| 217 | |
Glauber Costa | 91718e8 | 2008-03-03 14:13:12 -0300 | [diff] [blame] | 218 | /* |
| 219 | * Currently trivial. Write the real->protected mode |
| 220 | * bootstrap into the page concerned. The caller |
| 221 | * has made sure it's suitably aligned. |
| 222 | */ |
| 223 | |
| 224 | unsigned long __cpuinit setup_trampoline(void) |
| 225 | { |
| 226 | memcpy(trampoline_base, trampoline_data, |
| 227 | trampoline_end - trampoline_data); |
| 228 | return virt_to_phys(trampoline_base); |
| 229 | } |
| 230 | |
| 231 | #ifdef CONFIG_X86_32 |
| 232 | /* |
| 233 | * We are called very early to get the low memory for the |
| 234 | * SMP bootup trampoline page. |
| 235 | */ |
| 236 | void __init smp_alloc_memory(void) |
| 237 | { |
| 238 | trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); |
| 239 | /* |
| 240 | * Has to be in very low memory so we can execute |
| 241 | * real-mode AP code. |
| 242 | */ |
| 243 | if (__pa(trampoline_base) >= 0x9F000) |
| 244 | BUG(); |
| 245 | } |
| 246 | #endif |
Glauber Costa | 70708a1 | 2008-03-03 14:13:03 -0300 | [diff] [blame] | 247 | |
Glauber de Oliveira Costa | 904541e | 2008-03-19 14:25:27 -0300 | [diff] [blame] | 248 | void impress_friends(void) |
| 249 | { |
| 250 | int cpu; |
| 251 | unsigned long bogosum = 0; |
| 252 | /* |
| 253 | * Allow the user to impress friends. |
| 254 | */ |
| 255 | Dprintk("Before bogomips.\n"); |
| 256 | for_each_possible_cpu(cpu) |
| 257 | if (cpu_isset(cpu, cpu_callout_map)) |
| 258 | bogosum += cpu_data(cpu).loops_per_jiffy; |
| 259 | printk(KERN_INFO |
| 260 | "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", |
| 261 | cpus_weight(cpu_present_map), |
| 262 | bogosum/(500000/HZ), |
| 263 | (bogosum/(5000/HZ))%100); |
| 264 | |
| 265 | Dprintk("Before bogocount - setting activated=1.\n"); |
| 266 | } |
| 267 | |
Glauber Costa | 68a1c3f | 2008-03-03 14:12:42 -0300 | [diff] [blame] | 268 | #ifdef CONFIG_HOTPLUG_CPU |
Glauber Costa | 768d950 | 2008-03-03 14:13:02 -0300 | [diff] [blame] | 269 | void remove_siblinginfo(int cpu) |
| 270 | { |
| 271 | int sibling; |
| 272 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 273 | |
| 274 | for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { |
| 275 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); |
| 276 | /*/ |
| 277 | * last thread sibling in this cpu core going down |
| 278 | */ |
| 279 | if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) |
| 280 | cpu_data(sibling).booted_cores--; |
| 281 | } |
| 282 | |
| 283 | for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) |
| 284 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); |
| 285 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); |
| 286 | cpus_clear(per_cpu(cpu_core_map, cpu)); |
| 287 | c->phys_proc_id = 0; |
| 288 | c->cpu_core_id = 0; |
| 289 | cpu_clear(cpu, cpu_sibling_setup_map); |
| 290 | } |
Glauber Costa | 68a1c3f | 2008-03-03 14:12:42 -0300 | [diff] [blame] | 291 | |
| 292 | int additional_cpus __initdata = -1; |
| 293 | |
| 294 | static __init int setup_additional_cpus(char *s) |
| 295 | { |
| 296 | return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL; |
| 297 | } |
| 298 | early_param("additional_cpus", setup_additional_cpus); |
| 299 | |
| 300 | /* |
| 301 | * cpu_possible_map should be static, it cannot change as cpu's |
| 302 | * are onlined, or offlined. The reason is per-cpu data-structures |
| 303 | * are allocated by some modules at init time, and dont expect to |
| 304 | * do this dynamically on cpu arrival/departure. |
| 305 | * cpu_present_map on the other hand can change dynamically. |
| 306 | * In case when cpu_hotplug is not compiled, then we resort to current |
| 307 | * behaviour, which is cpu_possible == cpu_present. |
| 308 | * - Ashok Raj |
| 309 | * |
| 310 | * Three ways to find out the number of additional hotplug CPUs: |
| 311 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. |
| 312 | * - The user can overwrite it with additional_cpus=NUM |
| 313 | * - Otherwise don't reserve additional CPUs. |
| 314 | * We do this because additional CPUs waste a lot of memory. |
| 315 | * -AK |
| 316 | */ |
| 317 | __init void prefill_possible_map(void) |
| 318 | { |
| 319 | int i; |
| 320 | int possible; |
| 321 | |
| 322 | if (additional_cpus == -1) { |
| 323 | if (disabled_cpus > 0) |
| 324 | additional_cpus = disabled_cpus; |
| 325 | else |
| 326 | additional_cpus = 0; |
| 327 | } |
| 328 | possible = num_processors + additional_cpus; |
| 329 | if (possible > NR_CPUS) |
| 330 | possible = NR_CPUS; |
| 331 | |
| 332 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", |
| 333 | possible, max_t(int, possible - num_processors, 0)); |
| 334 | |
| 335 | for (i = 0; i < possible; i++) |
| 336 | cpu_set(i, cpu_possible_map); |
| 337 | } |
Glauber Costa | 69c18c1 | 2008-03-03 14:13:07 -0300 | [diff] [blame] | 338 | |
| 339 | static void __ref remove_cpu_from_maps(int cpu) |
| 340 | { |
| 341 | cpu_clear(cpu, cpu_online_map); |
| 342 | #ifdef CONFIG_X86_64 |
| 343 | cpu_clear(cpu, cpu_callout_map); |
| 344 | cpu_clear(cpu, cpu_callin_map); |
| 345 | /* was set by cpu_init() */ |
| 346 | clear_bit(cpu, (unsigned long *)&cpu_initialized); |
| 347 | clear_node_cpumask(cpu); |
| 348 | #endif |
| 349 | } |
| 350 | |
| 351 | int __cpu_disable(void) |
| 352 | { |
| 353 | int cpu = smp_processor_id(); |
| 354 | |
| 355 | /* |
| 356 | * Perhaps use cpufreq to drop frequency, but that could go |
| 357 | * into generic code. |
| 358 | * |
| 359 | * We won't take down the boot processor on i386 due to some |
| 360 | * interrupts only being able to be serviced by the BSP. |
| 361 | * Especially so if we're not using an IOAPIC -zwane |
| 362 | */ |
| 363 | if (cpu == 0) |
| 364 | return -EBUSY; |
| 365 | |
| 366 | if (nmi_watchdog == NMI_LOCAL_APIC) |
| 367 | stop_apic_nmi_watchdog(NULL); |
| 368 | clear_local_APIC(); |
| 369 | |
| 370 | /* |
| 371 | * HACK: |
| 372 | * Allow any queued timer interrupts to get serviced |
| 373 | * This is only a temporary solution until we cleanup |
| 374 | * fixup_irqs as we do for IA64. |
| 375 | */ |
| 376 | local_irq_enable(); |
| 377 | mdelay(1); |
| 378 | |
| 379 | local_irq_disable(); |
| 380 | remove_siblinginfo(cpu); |
| 381 | |
| 382 | /* It's now safe to remove this processor from the online map */ |
| 383 | remove_cpu_from_maps(cpu); |
| 384 | fixup_irqs(cpu_online_map); |
| 385 | return 0; |
| 386 | } |
| 387 | |
| 388 | void __cpu_die(unsigned int cpu) |
| 389 | { |
| 390 | /* We don't do anything here: idle task is faking death itself. */ |
| 391 | unsigned int i; |
| 392 | |
| 393 | for (i = 0; i < 10; i++) { |
| 394 | /* They ack this in play_dead by setting CPU_DEAD */ |
| 395 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { |
| 396 | printk(KERN_INFO "CPU %d is now offline\n", cpu); |
| 397 | if (1 == num_online_cpus()) |
| 398 | alternatives_smp_switch(0); |
| 399 | return; |
| 400 | } |
| 401 | msleep(100); |
| 402 | } |
| 403 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); |
| 404 | } |
| 405 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
| 406 | int __cpu_disable(void) |
| 407 | { |
| 408 | return -ENOSYS; |
| 409 | } |
| 410 | |
| 411 | void __cpu_die(unsigned int cpu) |
| 412 | { |
| 413 | /* We said "no" in __cpu_disable */ |
| 414 | BUG(); |
| 415 | } |
Glauber Costa | 68a1c3f | 2008-03-03 14:12:42 -0300 | [diff] [blame] | 416 | #endif |
| 417 | |
Glauber Costa | 89b0820 | 2008-03-03 14:13:08 -0300 | [diff] [blame] | 418 | /* |
| 419 | * If the BIOS enumerates physical processors before logical, |
| 420 | * maxcpus=N at enumeration-time can be used to disable HT. |
| 421 | */ |
| 422 | static int __init parse_maxcpus(char *arg) |
| 423 | { |
| 424 | extern unsigned int maxcpus; |
| 425 | |
| 426 | maxcpus = simple_strtoul(arg, NULL, 0); |
| 427 | return 0; |
| 428 | } |
| 429 | early_param("maxcpus", parse_maxcpus); |