blob: 8256309deaad6102b894ba209ddcea01d755bc77 [file] [log] [blame]
Len Brown26717172010-03-08 14:07:30 -05001/*
2 * intel_idle.c - native hardware idle loop for modern Intel processors
3 *
4 * Copyright (c) 2010, Intel Corporation.
5 * Len Brown <len.brown@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21/*
22 * intel_idle is a cpuidle driver that loads on specific Intel processors
23 * in lieu of the legacy ACPI processor_idle driver. The intent is to
24 * make Linux more efficient on these processors, as intel_idle knows
25 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
26 */
27
28/*
29 * Design Assumptions
30 *
31 * All CPUs have same idle states as boot CPU
32 *
33 * Chipset BM_STS (bus master status) bit is a NOP
34 * for preventing entry into deep C-stats
35 */
36
37/*
38 * Known limitations
39 *
40 * The driver currently initializes for_each_online_cpu() upon modprobe.
41 * It it unaware of subsequent processors hot-added to the system.
42 * This means that if you boot with maxcpus=n and later online
43 * processors above n, those processors will use C1 only.
44 *
45 * ACPI has a .suspend hack to turn off deep c-statees during suspend
46 * to avoid complications with the lapic timer workaround.
47 * Have not seen issues with suspend, but may need same workaround here.
48 *
49 * There is currently no kernel-based automatic probing/loading mechanism
50 * if the driver is built as a module.
51 */
52
53/* un-comment DEBUG to enable pr_debug() statements */
54#define DEBUG
55
56#include <linux/kernel.h>
57#include <linux/cpuidle.h>
58#include <linux/clockchips.h>
59#include <linux/hrtimer.h> /* ktime_get_real() */
60#include <trace/events/power.h>
61#include <linux/sched.h>
H. Peter Anvinbc83ccc2010-09-17 15:36:40 -070062#include <asm/mwait.h>
Len Brown26717172010-03-08 14:07:30 -050063
64#define INTEL_IDLE_VERSION "0.4"
65#define PREFIX "intel_idle: "
66
Len Brown26717172010-03-08 14:07:30 -050067static struct cpuidle_driver intel_idle_driver = {
68 .name = "intel_idle",
69 .owner = THIS_MODULE,
70};
71/* intel_idle.max_cstate=0 disables driver */
72static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1;
Len Brown26717172010-03-08 14:07:30 -050073
Len Brownc4236282010-05-28 02:22:03 -040074static unsigned int mwait_substates;
Len Brown26717172010-03-08 14:07:30 -050075
76/* Reliable LAPIC Timer States, bit 1 for C1 etc. */
Len Brownd13780d2010-07-07 00:12:03 -040077static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */
Len Brown26717172010-03-08 14:07:30 -050078
Namhyung Kim3265eba2010-08-08 03:10:03 +090079static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
Len Brown26717172010-03-08 14:07:30 -050080static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state);
81
82static struct cpuidle_state *cpuidle_state_table;
83
84/*
Len Brown956d0332011-01-12 02:51:20 -050085 * Set this flag for states where the HW flushes the TLB for us
86 * and so we don't need cross-calls to keep it consistent.
87 * If this flag is set, SW flushes the TLB, so even if the
88 * HW doesn't do the flushing, this flag is safe to use.
89 */
90#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
91
92/*
Len Brown26717172010-03-08 14:07:30 -050093 * States are indexed by the cstate number,
94 * which is also the index into the MWAIT hint array.
95 * Thus C0 is a dummy.
96 */
97static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = {
98 { /* MWAIT C0 */ },
99 { /* MWAIT C1 */
100 .name = "NHM-C1",
101 .desc = "MWAIT 0x00",
102 .driver_data = (void *) 0x00,
103 .flags = CPUIDLE_FLAG_TIME_VALID,
104 .exit_latency = 3,
Len Brown26717172010-03-08 14:07:30 -0500105 .target_residency = 6,
106 .enter = &intel_idle },
107 { /* MWAIT C2 */
108 .name = "NHM-C3",
109 .desc = "MWAIT 0x10",
110 .driver_data = (void *) 0x10,
Suresh Siddha6110a1f2010-09-30 21:19:07 -0400111 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
Len Brown26717172010-03-08 14:07:30 -0500112 .exit_latency = 20,
Len Brown26717172010-03-08 14:07:30 -0500113 .target_residency = 80,
114 .enter = &intel_idle },
115 { /* MWAIT C3 */
116 .name = "NHM-C6",
117 .desc = "MWAIT 0x20",
118 .driver_data = (void *) 0x20,
Suresh Siddha6110a1f2010-09-30 21:19:07 -0400119 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
Len Brown26717172010-03-08 14:07:30 -0500120 .exit_latency = 200,
Len Brown26717172010-03-08 14:07:30 -0500121 .target_residency = 800,
122 .enter = &intel_idle },
123};
124
Len Brownd13780d2010-07-07 00:12:03 -0400125static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = {
126 { /* MWAIT C0 */ },
127 { /* MWAIT C1 */
128 .name = "SNB-C1",
129 .desc = "MWAIT 0x00",
130 .driver_data = (void *) 0x00,
131 .flags = CPUIDLE_FLAG_TIME_VALID,
132 .exit_latency = 1,
Len Brownddbd5502010-12-13 18:28:22 -0500133 .target_residency = 1,
Len Brownd13780d2010-07-07 00:12:03 -0400134 .enter = &intel_idle },
135 { /* MWAIT C2 */
136 .name = "SNB-C3",
137 .desc = "MWAIT 0x10",
138 .driver_data = (void *) 0x10,
Len Brown00527cc2010-10-23 02:33:50 -0400139 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
Len Brownd13780d2010-07-07 00:12:03 -0400140 .exit_latency = 80,
Len Brownddbd5502010-12-13 18:28:22 -0500141 .target_residency = 211,
Len Brownd13780d2010-07-07 00:12:03 -0400142 .enter = &intel_idle },
143 { /* MWAIT C3 */
144 .name = "SNB-C6",
145 .desc = "MWAIT 0x20",
146 .driver_data = (void *) 0x20,
Len Brown00527cc2010-10-23 02:33:50 -0400147 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
Len Brownd13780d2010-07-07 00:12:03 -0400148 .exit_latency = 104,
Len Brownddbd5502010-12-13 18:28:22 -0500149 .target_residency = 345,
Len Brownd13780d2010-07-07 00:12:03 -0400150 .enter = &intel_idle },
151 { /* MWAIT C4 */
152 .name = "SNB-C7",
153 .desc = "MWAIT 0x30",
154 .driver_data = (void *) 0x30,
Len Brown00527cc2010-10-23 02:33:50 -0400155 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
Len Brownd13780d2010-07-07 00:12:03 -0400156 .exit_latency = 109,
Len Brownddbd5502010-12-13 18:28:22 -0500157 .target_residency = 345,
Len Brownd13780d2010-07-07 00:12:03 -0400158 .enter = &intel_idle },
159};
160
Len Brown26717172010-03-08 14:07:30 -0500161static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
162 { /* MWAIT C0 */ },
163 { /* MWAIT C1 */
164 .name = "ATM-C1",
165 .desc = "MWAIT 0x00",
166 .driver_data = (void *) 0x00,
167 .flags = CPUIDLE_FLAG_TIME_VALID,
168 .exit_latency = 1,
Len Brown26717172010-03-08 14:07:30 -0500169 .target_residency = 4,
170 .enter = &intel_idle },
171 { /* MWAIT C2 */
172 .name = "ATM-C2",
173 .desc = "MWAIT 0x10",
174 .driver_data = (void *) 0x10,
175 .flags = CPUIDLE_FLAG_TIME_VALID,
176 .exit_latency = 20,
Len Brown26717172010-03-08 14:07:30 -0500177 .target_residency = 80,
178 .enter = &intel_idle },
179 { /* MWAIT C3 */ },
180 { /* MWAIT C4 */
181 .name = "ATM-C4",
182 .desc = "MWAIT 0x30",
183 .driver_data = (void *) 0x30,
Suresh Siddha6110a1f2010-09-30 21:19:07 -0400184 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
Len Brown26717172010-03-08 14:07:30 -0500185 .exit_latency = 100,
Len Brown26717172010-03-08 14:07:30 -0500186 .target_residency = 400,
187 .enter = &intel_idle },
188 { /* MWAIT C5 */ },
189 { /* MWAIT C6 */
190 .name = "ATM-C6",
Len Brown7fcca7d2010-10-05 13:43:14 -0400191 .desc = "MWAIT 0x52",
192 .driver_data = (void *) 0x52,
Suresh Siddha6110a1f2010-09-30 21:19:07 -0400193 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
Len Brown7fcca7d2010-10-05 13:43:14 -0400194 .exit_latency = 140,
Len Brown7fcca7d2010-10-05 13:43:14 -0400195 .target_residency = 560,
196 .enter = &intel_idle },
Len Brown26717172010-03-08 14:07:30 -0500197};
198
Len Brown26717172010-03-08 14:07:30 -0500199/**
200 * intel_idle
201 * @dev: cpuidle_device
202 * @state: cpuidle state
203 *
204 */
205static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
206{
207 unsigned long ecx = 1; /* break on interrupt flag */
208 unsigned long eax = (unsigned long)cpuidle_get_statedata(state);
209 unsigned int cstate;
210 ktime_t kt_before, kt_after;
211 s64 usec_delta;
212 int cpu = smp_processor_id();
213
214 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;
215
Len Brown26717172010-03-08 14:07:30 -0500216 local_irq_disable();
217
Suresh Siddha6110a1f2010-09-30 21:19:07 -0400218 /*
Len Brownc8381cc2010-10-15 20:43:06 -0400219 * leave_mm() to avoid costly and often unnecessary wakeups
220 * for flushing the user TLB's associated with the active mm.
Suresh Siddha6110a1f2010-09-30 21:19:07 -0400221 */
Len Brownc8381cc2010-10-15 20:43:06 -0400222 if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
Suresh Siddha6110a1f2010-09-30 21:19:07 -0400223 leave_mm(cpu);
224
Len Brown26717172010-03-08 14:07:30 -0500225 if (!(lapic_timer_reliable_states & (1 << (cstate))))
226 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
227
228 kt_before = ktime_get_real();
229
230 stop_critical_timings();
231#ifndef MODULE
Linus Torvalds8d915302010-08-04 11:13:36 -0700232 trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu);
Len Brown26717172010-03-08 14:07:30 -0500233#endif
234 if (!need_resched()) {
235
236 __monitor((void *)&current_thread_info()->flags, 0, 0);
237 smp_mb();
238 if (!need_resched())
239 __mwait(eax, ecx);
240 }
241
242 start_critical_timings();
243
244 kt_after = ktime_get_real();
245 usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
246
247 local_irq_enable();
248
249 if (!(lapic_timer_reliable_states & (1 << (cstate))))
250 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
251
252 return usec_delta;
253}
254
255/*
256 * intel_idle_probe()
257 */
258static int intel_idle_probe(void)
259{
Len Brownc4236282010-05-28 02:22:03 -0400260 unsigned int eax, ebx, ecx;
Len Brown26717172010-03-08 14:07:30 -0500261
262 if (max_cstate == 0) {
263 pr_debug(PREFIX "disabled\n");
264 return -EPERM;
265 }
266
267 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
268 return -ENODEV;
269
270 if (!boot_cpu_has(X86_FEATURE_MWAIT))
271 return -ENODEV;
272
273 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
274 return -ENODEV;
275
Len Brownc4236282010-05-28 02:22:03 -0400276 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
Len Brown26717172010-03-08 14:07:30 -0500277
278 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
279 !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
280 return -ENODEV;
Len Brown26717172010-03-08 14:07:30 -0500281
Len Brownc4236282010-05-28 02:22:03 -0400282 pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates);
Len Brown26717172010-03-08 14:07:30 -0500283
Len Brown26717172010-03-08 14:07:30 -0500284
285 if (boot_cpu_data.x86 != 6) /* family 6 */
286 return -ENODEV;
287
288 switch (boot_cpu_data.x86_model) {
289
290 case 0x1A: /* Core i7, Xeon 5500 series */
291 case 0x1E: /* Core i7 and i5 Processor - Lynnfield Jasper Forest */
292 case 0x1F: /* Core i7 and i5 Processor - Nehalem */
293 case 0x2E: /* Nehalem-EX Xeon */
Len Brownec67a2b2010-07-26 23:40:19 -0400294 case 0x2F: /* Westmere-EX Xeon */
Len Brown26717172010-03-08 14:07:30 -0500295 case 0x25: /* Westmere */
296 case 0x2C: /* Westmere */
297 cpuidle_state_table = nehalem_cstates;
Len Brown26717172010-03-08 14:07:30 -0500298 break;
299
300 case 0x1C: /* 28 - Atom Processor */
Arjan van de Ven4725fd32010-07-21 23:42:25 -0400301 case 0x26: /* 38 - Lincroft Atom Processor */
Len Brown26717172010-03-08 14:07:30 -0500302 cpuidle_state_table = atom_cstates;
Len Brown26717172010-03-08 14:07:30 -0500303 break;
Len Brownd13780d2010-07-07 00:12:03 -0400304
305 case 0x2A: /* SNB */
306 case 0x2D: /* SNB Xeon */
307 cpuidle_state_table = snb_cstates;
Len Brownd13780d2010-07-07 00:12:03 -0400308 break;
Len Brown26717172010-03-08 14:07:30 -0500309
310 default:
311 pr_debug(PREFIX "does not run on family %d model %d\n",
312 boot_cpu_data.x86, boot_cpu_data.x86_model);
313 return -ENODEV;
314 }
315
Len Brown56b9aea2010-12-02 01:19:32 -0500316 if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */
317 lapic_timer_reliable_states = 0xFFFFFFFF;
318
Len Brown26717172010-03-08 14:07:30 -0500319 pr_debug(PREFIX "v" INTEL_IDLE_VERSION
320 " model 0x%X\n", boot_cpu_data.x86_model);
321
322 pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n",
323 lapic_timer_reliable_states);
324 return 0;
325}
326
327/*
328 * intel_idle_cpuidle_devices_uninit()
329 * unregister, free cpuidle_devices
330 */
331static void intel_idle_cpuidle_devices_uninit(void)
332{
333 int i;
334 struct cpuidle_device *dev;
335
336 for_each_online_cpu(i) {
337 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
338 cpuidle_unregister_device(dev);
339 }
340
341 free_percpu(intel_idle_cpuidle_devices);
342 return;
343}
344/*
345 * intel_idle_cpuidle_devices_init()
346 * allocate, initialize, register cpuidle_devices
347 */
348static int intel_idle_cpuidle_devices_init(void)
349{
350 int i, cstate;
351 struct cpuidle_device *dev;
352
353 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
354 if (intel_idle_cpuidle_devices == NULL)
355 return -ENOMEM;
356
357 for_each_online_cpu(i) {
358 dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
359
360 dev->state_count = 1;
361
362 for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
363 int num_substates;
364
365 if (cstate > max_cstate) {
366 printk(PREFIX "max_cstate %d reached\n",
367 max_cstate);
368 break;
369 }
370
371 /* does the state exist in CPUID.MWAIT? */
Len Brownc4236282010-05-28 02:22:03 -0400372 num_substates = (mwait_substates >> ((cstate) * 4))
Len Brown26717172010-03-08 14:07:30 -0500373 & MWAIT_SUBSTATE_MASK;
374 if (num_substates == 0)
375 continue;
376 /* is the state not enabled? */
377 if (cpuidle_state_table[cstate].enter == NULL) {
378 /* does the driver not know about the state? */
379 if (*cpuidle_state_table[cstate].name == '\0')
380 pr_debug(PREFIX "unaware of model 0x%x"
381 " MWAIT %d please"
382 " contact lenb@kernel.org",
383 boot_cpu_data.x86_model, cstate);
384 continue;
385 }
386
387 if ((cstate > 2) &&
388 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
389 mark_tsc_unstable("TSC halts in idle"
390 " states deeper than C2");
391
392 dev->states[dev->state_count] = /* structure copy */
393 cpuidle_state_table[cstate];
394
395 dev->state_count += 1;
396 }
397
398 dev->cpu = i;
399 if (cpuidle_register_device(dev)) {
400 pr_debug(PREFIX "cpuidle_register_device %d failed!\n",
401 i);
402 intel_idle_cpuidle_devices_uninit();
403 return -EIO;
404 }
405 }
406
407 return 0;
408}
409
410
411static int __init intel_idle_init(void)
412{
413 int retval;
414
Thomas Renningerd1896042010-11-03 17:06:14 +0100415 /* Do not load intel_idle at all for now if idle= is passed */
416 if (boot_option_idle_override != IDLE_NO_OVERRIDE)
417 return -ENODEV;
418
Len Brown26717172010-03-08 14:07:30 -0500419 retval = intel_idle_probe();
420 if (retval)
421 return retval;
422
423 retval = cpuidle_register_driver(&intel_idle_driver);
424 if (retval) {
425 printk(KERN_DEBUG PREFIX "intel_idle yielding to %s",
426 cpuidle_get_driver()->name);
427 return retval;
428 }
429
430 retval = intel_idle_cpuidle_devices_init();
431 if (retval) {
432 cpuidle_unregister_driver(&intel_idle_driver);
433 return retval;
434 }
435
436 return 0;
437}
438
439static void __exit intel_idle_exit(void)
440{
441 intel_idle_cpuidle_devices_uninit();
442 cpuidle_unregister_driver(&intel_idle_driver);
443
444 return;
445}
446
447module_init(intel_idle_init);
448module_exit(intel_idle_exit);
449
Len Brown26717172010-03-08 14:07:30 -0500450module_param(max_cstate, int, 0444);
Len Brown26717172010-03-08 14:07:30 -0500451
452MODULE_AUTHOR("Len Brown <len.brown@intel.com>");
453MODULE_DESCRIPTION("Cpuidle driver for Intel Hardware v" INTEL_IDLE_VERSION);
454MODULE_LICENSE("GPL");