Glauber Costa | 68a1c3f | 2008-03-03 14:12:42 -0300 | [diff] [blame] | 1 | #include <linux/init.h> |
| 2 | #include <linux/smp.h> |
Glauber Costa | a355352 | 2008-03-03 14:12:58 -0300 | [diff] [blame] | 3 | #include <linux/module.h> |
Glauber Costa | 70708a1 | 2008-03-03 14:13:03 -0300 | [diff] [blame] | 4 | #include <linux/sched.h> |
Glauber Costa | 69c18c1 | 2008-03-03 14:13:07 -0300 | [diff] [blame] | 5 | #include <linux/percpu.h> |
Glauber Costa | 91718e8 | 2008-03-03 14:13:12 -0300 | [diff] [blame] | 6 | #include <linux/bootmem.h> |
Glauber Costa | 69c18c1 | 2008-03-03 14:13:07 -0300 | [diff] [blame] | 7 | |
| 8 | #include <asm/nmi.h> |
| 9 | #include <asm/irq.h> |
| 10 | #include <asm/smp.h> |
| 11 | #include <asm/cpu.h> |
| 12 | #include <asm/numa.h> |
Glauber Costa | 68a1c3f | 2008-03-03 14:12:42 -0300 | [diff] [blame] | 13 | |
Glauber de Oliveira Costa | f6bc402 | 2008-03-19 14:25:53 -0300 | [diff] [blame^] | 14 | #include <mach_apic.h> |
| 15 | |
Glauber Costa | a355352 | 2008-03-03 14:12:58 -0300 | [diff] [blame] | 16 | /* Number of siblings per CPU package */ |
| 17 | int smp_num_siblings = 1; |
| 18 | EXPORT_SYMBOL(smp_num_siblings); |
| 19 | |
| 20 | /* Last level cache ID of each logical CPU */ |
| 21 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; |
| 22 | |
| 23 | /* bitmap of online cpus */ |
| 24 | cpumask_t cpu_online_map __read_mostly; |
| 25 | EXPORT_SYMBOL(cpu_online_map); |
| 26 | |
| 27 | cpumask_t cpu_callin_map; |
| 28 | cpumask_t cpu_callout_map; |
| 29 | cpumask_t cpu_possible_map; |
| 30 | EXPORT_SYMBOL(cpu_possible_map); |
| 31 | |
| 32 | /* representing HT siblings of each logical CPU */ |
| 33 | DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); |
| 34 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); |
| 35 | |
| 36 | /* representing HT and core siblings of each logical CPU */ |
| 37 | DEFINE_PER_CPU(cpumask_t, cpu_core_map); |
| 38 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); |
| 39 | |
| 40 | /* Per CPU bogomips and other parameters */ |
| 41 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |
| 42 | EXPORT_PER_CPU_SYMBOL(cpu_info); |
Glauber Costa | 768d950 | 2008-03-03 14:13:02 -0300 | [diff] [blame] | 43 | |
Glauber Costa | 91718e8 | 2008-03-03 14:13:12 -0300 | [diff] [blame] | 44 | /* ready for x86_64, no harm for x86, since it will overwrite after alloc */ |
| 45 | unsigned char *trampoline_base = __va(SMP_TRAMPOLINE_BASE); |
| 46 | |
Glauber Costa | 768d950 | 2008-03-03 14:13:02 -0300 | [diff] [blame] | 47 | /* representing cpus for which sibling maps can be computed */ |
| 48 | static cpumask_t cpu_sibling_setup_map; |
| 49 | |
Glauber de Oliveira Costa | 1d89a7f | 2008-03-19 14:25:05 -0300 | [diff] [blame] | 50 | /* Set if we find a B stepping CPU */ |
| 51 | int __cpuinitdata smp_b_stepping; |
Glauber de Oliveira Costa | 1d89a7f | 2008-03-19 14:25:05 -0300 | [diff] [blame] | 52 | |
| 53 | static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) |
| 54 | { |
| 55 | #ifdef CONFIG_X86_32 |
| 56 | /* |
| 57 | * Mask B, Pentium, but not Pentium MMX |
| 58 | */ |
| 59 | if (c->x86_vendor == X86_VENDOR_INTEL && |
| 60 | c->x86 == 5 && |
| 61 | c->x86_mask >= 1 && c->x86_mask <= 4 && |
| 62 | c->x86_model <= 3) |
| 63 | /* |
| 64 | * Remember we have B step Pentia with bugs |
| 65 | */ |
| 66 | smp_b_stepping = 1; |
| 67 | |
| 68 | /* |
| 69 | * Certain Athlons might work (for various values of 'work') in SMP |
| 70 | * but they are not certified as MP capable. |
| 71 | */ |
| 72 | if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { |
| 73 | |
| 74 | if (num_possible_cpus() == 1) |
| 75 | goto valid_k7; |
| 76 | |
| 77 | /* Athlon 660/661 is valid. */ |
| 78 | if ((c->x86_model == 6) && ((c->x86_mask == 0) || |
| 79 | (c->x86_mask == 1))) |
| 80 | goto valid_k7; |
| 81 | |
| 82 | /* Duron 670 is valid */ |
| 83 | if ((c->x86_model == 7) && (c->x86_mask == 0)) |
| 84 | goto valid_k7; |
| 85 | |
| 86 | /* |
| 87 | * Athlon 662, Duron 671, and Athlon >model 7 have capability |
| 88 | * bit. It's worth noting that the A5 stepping (662) of some |
| 89 | * Athlon XP's have the MP bit set. |
| 90 | * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for |
| 91 | * more. |
| 92 | */ |
| 93 | if (((c->x86_model == 6) && (c->x86_mask >= 2)) || |
| 94 | ((c->x86_model == 7) && (c->x86_mask >= 1)) || |
| 95 | (c->x86_model > 7)) |
| 96 | if (cpu_has_mp) |
| 97 | goto valid_k7; |
| 98 | |
| 99 | /* If we get here, not a certified SMP capable AMD system. */ |
| 100 | add_taint(TAINT_UNSAFE_SMP); |
| 101 | } |
| 102 | |
| 103 | valid_k7: |
| 104 | ; |
| 105 | #endif |
| 106 | } |
| 107 | |
Glauber de Oliveira Costa | 693d4b8 | 2008-03-19 14:25:28 -0300 | [diff] [blame] | 108 | void smp_checks(void) |
| 109 | { |
| 110 | if (smp_b_stepping) |
| 111 | printk(KERN_WARNING "WARNING: SMP operation may be unreliable" |
| 112 | "with B stepping processors.\n"); |
| 113 | |
| 114 | /* |
| 115 | * Don't taint if we are running SMP kernel on a single non-MP |
| 116 | * approved Athlon |
| 117 | */ |
| 118 | if (tainted & TAINT_UNSAFE_SMP) { |
Glauber de Oliveira Costa | f68e00a | 2008-03-19 14:25:29 -0300 | [diff] [blame] | 119 | if (num_online_cpus()) |
Glauber de Oliveira Costa | 693d4b8 | 2008-03-19 14:25:28 -0300 | [diff] [blame] | 120 | printk(KERN_INFO "WARNING: This combination of AMD" |
| 121 | "processors is not suitable for SMP.\n"); |
| 122 | else |
| 123 | tainted &= ~TAINT_UNSAFE_SMP; |
| 124 | } |
| 125 | } |
| 126 | |
Glauber de Oliveira Costa | 1d89a7f | 2008-03-19 14:25:05 -0300 | [diff] [blame] | 127 | /* |
| 128 | * The bootstrap kernel entry code has set these up. Save them for |
| 129 | * a given CPU |
| 130 | */ |
| 131 | |
| 132 | void __cpuinit smp_store_cpu_info(int id) |
| 133 | { |
| 134 | struct cpuinfo_x86 *c = &cpu_data(id); |
| 135 | |
| 136 | *c = boot_cpu_data; |
| 137 | c->cpu_index = id; |
| 138 | if (id != 0) |
| 139 | identify_secondary_cpu(c); |
| 140 | smp_apply_quirks(c); |
| 141 | } |
| 142 | |
| 143 | |
Glauber Costa | 768d950 | 2008-03-03 14:13:02 -0300 | [diff] [blame] | 144 | void __cpuinit set_cpu_sibling_map(int cpu) |
| 145 | { |
| 146 | int i; |
| 147 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 148 | |
| 149 | cpu_set(cpu, cpu_sibling_setup_map); |
| 150 | |
| 151 | if (smp_num_siblings > 1) { |
| 152 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
| 153 | if (c->phys_proc_id == cpu_data(i).phys_proc_id && |
| 154 | c->cpu_core_id == cpu_data(i).cpu_core_id) { |
| 155 | cpu_set(i, per_cpu(cpu_sibling_map, cpu)); |
| 156 | cpu_set(cpu, per_cpu(cpu_sibling_map, i)); |
| 157 | cpu_set(i, per_cpu(cpu_core_map, cpu)); |
| 158 | cpu_set(cpu, per_cpu(cpu_core_map, i)); |
| 159 | cpu_set(i, c->llc_shared_map); |
| 160 | cpu_set(cpu, cpu_data(i).llc_shared_map); |
| 161 | } |
| 162 | } |
| 163 | } else { |
| 164 | cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); |
| 165 | } |
| 166 | |
| 167 | cpu_set(cpu, c->llc_shared_map); |
| 168 | |
| 169 | if (current_cpu_data.x86_max_cores == 1) { |
| 170 | per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); |
| 171 | c->booted_cores = 1; |
| 172 | return; |
| 173 | } |
| 174 | |
| 175 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
| 176 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && |
| 177 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { |
| 178 | cpu_set(i, c->llc_shared_map); |
| 179 | cpu_set(cpu, cpu_data(i).llc_shared_map); |
| 180 | } |
| 181 | if (c->phys_proc_id == cpu_data(i).phys_proc_id) { |
| 182 | cpu_set(i, per_cpu(cpu_core_map, cpu)); |
| 183 | cpu_set(cpu, per_cpu(cpu_core_map, i)); |
| 184 | /* |
| 185 | * Does this new cpu bringup a new core? |
| 186 | */ |
| 187 | if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { |
| 188 | /* |
| 189 | * for each core in package, increment |
| 190 | * the booted_cores for this new cpu |
| 191 | */ |
| 192 | if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) |
| 193 | c->booted_cores++; |
| 194 | /* |
| 195 | * increment the core count for all |
| 196 | * the other cpus in this package |
| 197 | */ |
| 198 | if (i != cpu) |
| 199 | cpu_data(i).booted_cores++; |
| 200 | } else if (i != cpu && !c->booted_cores) |
| 201 | c->booted_cores = cpu_data(i).booted_cores; |
| 202 | } |
| 203 | } |
| 204 | } |
| 205 | |
Glauber Costa | 70708a1 | 2008-03-03 14:13:03 -0300 | [diff] [blame] | 206 | /* maps the cpu to the sched domain representing multi-core */ |
| 207 | cpumask_t cpu_coregroup_map(int cpu) |
| 208 | { |
| 209 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 210 | /* |
| 211 | * For perf, we return last level cache shared map. |
| 212 | * And for power savings, we return cpu_core_map |
| 213 | */ |
| 214 | if (sched_mc_power_savings || sched_smt_power_savings) |
| 215 | return per_cpu(cpu_core_map, cpu); |
| 216 | else |
| 217 | return c->llc_shared_map; |
| 218 | } |
| 219 | |
Glauber Costa | 91718e8 | 2008-03-03 14:13:12 -0300 | [diff] [blame] | 220 | /* |
| 221 | * Currently trivial. Write the real->protected mode |
| 222 | * bootstrap into the page concerned. The caller |
| 223 | * has made sure it's suitably aligned. |
| 224 | */ |
| 225 | |
| 226 | unsigned long __cpuinit setup_trampoline(void) |
| 227 | { |
| 228 | memcpy(trampoline_base, trampoline_data, |
| 229 | trampoline_end - trampoline_data); |
| 230 | return virt_to_phys(trampoline_base); |
| 231 | } |
| 232 | |
| 233 | #ifdef CONFIG_X86_32 |
| 234 | /* |
| 235 | * We are called very early to get the low memory for the |
| 236 | * SMP bootup trampoline page. |
| 237 | */ |
| 238 | void __init smp_alloc_memory(void) |
| 239 | { |
| 240 | trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); |
| 241 | /* |
| 242 | * Has to be in very low memory so we can execute |
| 243 | * real-mode AP code. |
| 244 | */ |
| 245 | if (__pa(trampoline_base) >= 0x9F000) |
| 246 | BUG(); |
| 247 | } |
| 248 | #endif |
Glauber Costa | 70708a1 | 2008-03-03 14:13:03 -0300 | [diff] [blame] | 249 | |
Glauber de Oliveira Costa | 904541e | 2008-03-19 14:25:27 -0300 | [diff] [blame] | 250 | void impress_friends(void) |
| 251 | { |
| 252 | int cpu; |
| 253 | unsigned long bogosum = 0; |
| 254 | /* |
| 255 | * Allow the user to impress friends. |
| 256 | */ |
| 257 | Dprintk("Before bogomips.\n"); |
| 258 | for_each_possible_cpu(cpu) |
| 259 | if (cpu_isset(cpu, cpu_callout_map)) |
| 260 | bogosum += cpu_data(cpu).loops_per_jiffy; |
| 261 | printk(KERN_INFO |
| 262 | "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", |
Glauber de Oliveira Costa | f68e00a | 2008-03-19 14:25:29 -0300 | [diff] [blame] | 263 | num_online_cpus(), |
Glauber de Oliveira Costa | 904541e | 2008-03-19 14:25:27 -0300 | [diff] [blame] | 264 | bogosum/(500000/HZ), |
| 265 | (bogosum/(5000/HZ))%100); |
| 266 | |
| 267 | Dprintk("Before bogocount - setting activated=1.\n"); |
| 268 | } |
| 269 | |
Glauber Costa | 68a1c3f | 2008-03-03 14:12:42 -0300 | [diff] [blame] | 270 | #ifdef CONFIG_HOTPLUG_CPU |
Glauber Costa | 768d950 | 2008-03-03 14:13:02 -0300 | [diff] [blame] | 271 | void remove_siblinginfo(int cpu) |
| 272 | { |
| 273 | int sibling; |
| 274 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 275 | |
| 276 | for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { |
| 277 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); |
| 278 | /*/ |
| 279 | * last thread sibling in this cpu core going down |
| 280 | */ |
| 281 | if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) |
| 282 | cpu_data(sibling).booted_cores--; |
| 283 | } |
| 284 | |
| 285 | for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) |
| 286 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); |
| 287 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); |
| 288 | cpus_clear(per_cpu(cpu_core_map, cpu)); |
| 289 | c->phys_proc_id = 0; |
| 290 | c->cpu_core_id = 0; |
| 291 | cpu_clear(cpu, cpu_sibling_setup_map); |
| 292 | } |
Glauber Costa | 68a1c3f | 2008-03-03 14:12:42 -0300 | [diff] [blame] | 293 | |
| 294 | int additional_cpus __initdata = -1; |
| 295 | |
| 296 | static __init int setup_additional_cpus(char *s) |
| 297 | { |
| 298 | return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL; |
| 299 | } |
| 300 | early_param("additional_cpus", setup_additional_cpus); |
| 301 | |
| 302 | /* |
| 303 | * cpu_possible_map should be static, it cannot change as cpu's |
| 304 | * are onlined, or offlined. The reason is per-cpu data-structures |
| 305 | * are allocated by some modules at init time, and dont expect to |
| 306 | * do this dynamically on cpu arrival/departure. |
| 307 | * cpu_present_map on the other hand can change dynamically. |
| 308 | * In case when cpu_hotplug is not compiled, then we resort to current |
| 309 | * behaviour, which is cpu_possible == cpu_present. |
| 310 | * - Ashok Raj |
| 311 | * |
| 312 | * Three ways to find out the number of additional hotplug CPUs: |
| 313 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. |
| 314 | * - The user can overwrite it with additional_cpus=NUM |
| 315 | * - Otherwise don't reserve additional CPUs. |
| 316 | * We do this because additional CPUs waste a lot of memory. |
| 317 | * -AK |
| 318 | */ |
| 319 | __init void prefill_possible_map(void) |
| 320 | { |
| 321 | int i; |
| 322 | int possible; |
| 323 | |
| 324 | if (additional_cpus == -1) { |
| 325 | if (disabled_cpus > 0) |
| 326 | additional_cpus = disabled_cpus; |
| 327 | else |
| 328 | additional_cpus = 0; |
| 329 | } |
| 330 | possible = num_processors + additional_cpus; |
| 331 | if (possible > NR_CPUS) |
| 332 | possible = NR_CPUS; |
| 333 | |
| 334 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", |
| 335 | possible, max_t(int, possible - num_processors, 0)); |
| 336 | |
| 337 | for (i = 0; i < possible; i++) |
| 338 | cpu_set(i, cpu_possible_map); |
| 339 | } |
Glauber Costa | 69c18c1 | 2008-03-03 14:13:07 -0300 | [diff] [blame] | 340 | |
| 341 | static void __ref remove_cpu_from_maps(int cpu) |
| 342 | { |
| 343 | cpu_clear(cpu, cpu_online_map); |
| 344 | #ifdef CONFIG_X86_64 |
| 345 | cpu_clear(cpu, cpu_callout_map); |
| 346 | cpu_clear(cpu, cpu_callin_map); |
| 347 | /* was set by cpu_init() */ |
| 348 | clear_bit(cpu, (unsigned long *)&cpu_initialized); |
| 349 | clear_node_cpumask(cpu); |
| 350 | #endif |
| 351 | } |
| 352 | |
| 353 | int __cpu_disable(void) |
| 354 | { |
| 355 | int cpu = smp_processor_id(); |
| 356 | |
| 357 | /* |
| 358 | * Perhaps use cpufreq to drop frequency, but that could go |
| 359 | * into generic code. |
| 360 | * |
| 361 | * We won't take down the boot processor on i386 due to some |
| 362 | * interrupts only being able to be serviced by the BSP. |
| 363 | * Especially so if we're not using an IOAPIC -zwane |
| 364 | */ |
| 365 | if (cpu == 0) |
| 366 | return -EBUSY; |
| 367 | |
| 368 | if (nmi_watchdog == NMI_LOCAL_APIC) |
| 369 | stop_apic_nmi_watchdog(NULL); |
| 370 | clear_local_APIC(); |
| 371 | |
| 372 | /* |
| 373 | * HACK: |
| 374 | * Allow any queued timer interrupts to get serviced |
| 375 | * This is only a temporary solution until we cleanup |
| 376 | * fixup_irqs as we do for IA64. |
| 377 | */ |
| 378 | local_irq_enable(); |
| 379 | mdelay(1); |
| 380 | |
| 381 | local_irq_disable(); |
| 382 | remove_siblinginfo(cpu); |
| 383 | |
| 384 | /* It's now safe to remove this processor from the online map */ |
| 385 | remove_cpu_from_maps(cpu); |
| 386 | fixup_irqs(cpu_online_map); |
| 387 | return 0; |
| 388 | } |
| 389 | |
| 390 | void __cpu_die(unsigned int cpu) |
| 391 | { |
| 392 | /* We don't do anything here: idle task is faking death itself. */ |
| 393 | unsigned int i; |
| 394 | |
| 395 | for (i = 0; i < 10; i++) { |
| 396 | /* They ack this in play_dead by setting CPU_DEAD */ |
| 397 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { |
| 398 | printk(KERN_INFO "CPU %d is now offline\n", cpu); |
| 399 | if (1 == num_online_cpus()) |
| 400 | alternatives_smp_switch(0); |
| 401 | return; |
| 402 | } |
| 403 | msleep(100); |
| 404 | } |
| 405 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); |
| 406 | } |
| 407 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
| 408 | int __cpu_disable(void) |
| 409 | { |
| 410 | return -ENOSYS; |
| 411 | } |
| 412 | |
| 413 | void __cpu_die(unsigned int cpu) |
| 414 | { |
| 415 | /* We said "no" in __cpu_disable */ |
| 416 | BUG(); |
| 417 | } |
Glauber Costa | 68a1c3f | 2008-03-03 14:12:42 -0300 | [diff] [blame] | 418 | #endif |
| 419 | |
Glauber Costa | 89b0820 | 2008-03-03 14:13:08 -0300 | [diff] [blame] | 420 | /* |
| 421 | * If the BIOS enumerates physical processors before logical, |
| 422 | * maxcpus=N at enumeration-time can be used to disable HT. |
| 423 | */ |
| 424 | static int __init parse_maxcpus(char *arg) |
| 425 | { |
| 426 | extern unsigned int maxcpus; |
| 427 | |
| 428 | maxcpus = simple_strtoul(arg, NULL, 0); |
| 429 | return 0; |
| 430 | } |
| 431 | early_param("maxcpus", parse_maxcpus); |