blob: 2f2b8c7ccb857f636f745a5754810ea47e3d86df [file] [log] [blame]
Ingo Molnar0c867532015-04-22 10:53:34 +02001/*
Ingo Molnarae026792015-04-26 15:36:46 +02002 * x86 FPU boot time init code:
Ingo Molnar0c867532015-04-22 10:53:34 +02003 */
Ingo Molnar78f7f1e2015-04-24 02:54:44 +02004#include <asm/fpu/internal.h>
Ingo Molnar0c867532015-04-22 10:53:34 +02005#include <asm/tlbflush.h>
yu-cheng yu4f81cba2016-01-06 14:24:51 -08006#include <asm/setup.h>
7#include <asm/cmdline.h>
Ingo Molnar0c867532015-04-22 10:53:34 +02008
Ingo Molnar5aaeb5c2015-07-17 12:28:12 +02009#include <linux/sched.h>
yu-cheng yu4f81cba2016-01-06 14:24:51 -080010#include <linux/init.h>
Ingo Molnar5aaeb5c2015-07-17 12:28:12 +020011
Ingo Molnarae026792015-04-26 15:36:46 +020012/*
13 * Initialize the TS bit in CR0 according to the style of context-switches
14 * we are using:
15 */
Ingo Molnar41e78412015-04-26 15:32:40 +020016static void fpu__init_cpu_ctx_switch(void)
17{
Borislav Petkov362f9242015-12-07 10:39:41 +010018 if (!boot_cpu_has(X86_FEATURE_EAGER_FPU))
Ingo Molnar41e78412015-04-26 15:32:40 +020019 stts();
20 else
21 clts();
22}
23
24/*
25 * Initialize the registers found in all CPUs, CR0 and CR4:
26 */
27static void fpu__init_cpu_generic(void)
28{
29 unsigned long cr0;
30 unsigned long cr4_mask = 0;
31
Borislav Petkov01f8fd72016-04-04 22:25:01 +020032 if (boot_cpu_has(X86_FEATURE_FXSR))
Ingo Molnar41e78412015-04-26 15:32:40 +020033 cr4_mask |= X86_CR4_OSFXSR;
Borislav Petkovdda9edf2016-04-04 22:24:57 +020034 if (boot_cpu_has(X86_FEATURE_XMM))
Ingo Molnar41e78412015-04-26 15:32:40 +020035 cr4_mask |= X86_CR4_OSXMMEXCPT;
36 if (cr4_mask)
37 cr4_set_bits(cr4_mask);
38
39 cr0 = read_cr0();
40 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
Borislav Petkova402a8d2016-04-04 22:24:58 +020041 if (!boot_cpu_has(X86_FEATURE_FPU))
Ingo Molnar41e78412015-04-26 15:32:40 +020042 cr0 |= X86_CR0_EM;
43 write_cr0(cr0);
Ingo Molnarb1276c42015-04-29 10:58:03 +020044
45 /* Flush out any pending x87 state: */
Ingo Molnar5fc96032015-08-22 09:52:06 +020046#ifdef CONFIG_MATH_EMULATION
Borislav Petkova402a8d2016-04-04 22:24:58 +020047 if (!boot_cpu_has(X86_FEATURE_FPU))
Ingo Molnar5fc96032015-08-22 09:52:06 +020048 fpstate_init_soft(&current->thread.fpu.state.soft);
49 else
50#endif
51 asm volatile ("fninit");
Ingo Molnar41e78412015-04-26 15:32:40 +020052}
53
54/*
Ingo Molnarae026792015-04-26 15:36:46 +020055 * Enable all supported FPU features. Called when a CPU is brought online:
Ingo Molnar41e78412015-04-26 15:32:40 +020056 */
57void fpu__init_cpu(void)
58{
59 fpu__init_cpu_generic();
60 fpu__init_cpu_xstate();
61 fpu__init_cpu_ctx_switch();
62}
63
Ingo Molnar4d164092015-04-22 13:44:25 +020064/*
Ingo Molnardd863882015-04-26 15:07:18 +020065 * The earliest FPU detection code.
66 *
67 * Set the X86_FEATURE_FPU CPU-capability bit based on
68 * trying to execute an actual sequence of FPU instructions:
Ingo Molnar2e2f3da2015-04-26 14:40:54 +020069 */
70static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
71{
72 unsigned long cr0;
73 u16 fsw, fcw;
74
75 fsw = fcw = 0xffff;
76
77 cr0 = read_cr0();
78 cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
79 write_cr0(cr0);
80
Andy Lutomirskif3639382016-01-21 15:24:31 -080081 if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
82 asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
83 : "+m" (fsw), "+m" (fcw));
Ingo Molnar2e2f3da2015-04-26 14:40:54 +020084
Andy Lutomirskif3639382016-01-21 15:24:31 -080085 if (fsw == 0 && (fcw & 0x103f) == 0x003f)
86 set_cpu_cap(c, X86_FEATURE_FPU);
87 else
88 clear_cpu_cap(c, X86_FEATURE_FPU);
89 }
Ingo Molnare83ab9a2015-04-26 14:43:44 +020090
91#ifndef CONFIG_MATH_EMULATION
Borislav Petkova402a8d2016-04-04 22:24:58 +020092 if (!boot_cpu_has(X86_FEATURE_FPU)) {
Ingo Molnarae026792015-04-26 15:36:46 +020093 pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
Ingo Molnare83ab9a2015-04-26 14:43:44 +020094 for (;;)
95 asm volatile("hlt");
96 }
97#endif
Ingo Molnar2e2f3da2015-04-26 14:40:54 +020098}
99
100/*
Ingo Molnar4d164092015-04-22 13:44:25 +0200101 * Boot time FPU feature detection code:
102 */
Ingo Molnar0c867532015-04-22 10:53:34 +0200103unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
Ingo Molnar91a8c2a2015-04-24 10:49:11 +0200104
Ingo Molnar32231872015-05-04 09:52:42 +0200105static void __init fpu__init_system_mxcsr(void)
Ingo Molnar0c867532015-04-22 10:53:34 +0200106{
Ingo Molnar91a8c2a2015-04-24 10:49:11 +0200107 unsigned int mask = 0;
Ingo Molnar0c867532015-04-22 10:53:34 +0200108
Borislav Petkov01f8fd72016-04-04 22:25:01 +0200109 if (boot_cpu_has(X86_FEATURE_FXSR)) {
Ingo Molnarb96fecb2015-07-04 09:58:19 +0200110 /* Static because GCC does not get 16-byte stack alignment right: */
111 static struct fxregs_state fxregs __initdata;
Ingo Molnar91a8c2a2015-04-24 10:49:11 +0200112
Ingo Molnarb96fecb2015-07-04 09:58:19 +0200113 asm volatile("fxsave %0" : "+m" (fxregs));
Ingo Molnar91a8c2a2015-04-24 10:49:11 +0200114
Ingo Molnarb96fecb2015-07-04 09:58:19 +0200115 mask = fxregs.mxcsr_mask;
Ingo Molnar91a8c2a2015-04-24 10:49:11 +0200116
117 /*
118 * If zero then use the default features mask,
119 * which has all features set, except the
120 * denormals-are-zero feature bit:
121 */
Ingo Molnar0c867532015-04-22 10:53:34 +0200122 if (mask == 0)
123 mask = 0x0000ffbf;
124 }
125 mxcsr_feature_mask &= mask;
126}
127
Ingo Molnar7218e8b2015-04-26 14:35:54 +0200128/*
129 * Once per bootup FPU initialization sequences that will run on most x86 CPUs:
130 */
Ingo Molnar32231872015-05-04 09:52:42 +0200131static void __init fpu__init_system_generic(void)
Ingo Molnar7218e8b2015-04-26 14:35:54 +0200132{
133 /*
134 * Set up the legacy init FPU context. (xstate init might overwrite this
135 * with a more modern format, if the CPU supports it.)
136 */
Borislav Petkov6e686702016-03-11 12:32:06 +0100137 fpstate_init(&init_fpstate);
Ingo Molnar7218e8b2015-04-26 14:35:54 +0200138
139 fpu__init_system_mxcsr();
140}
141
Ingo Molnarae026792015-04-26 15:36:46 +0200142/*
143 * Size of the FPU context state. All tasks in the system use the
144 * same context size, regardless of what portion they use.
145 * This is inherent to the XSAVE architecture which puts all state
146 * components into a single, continuous memory block:
147 */
Fenghua Yubf15a8c2016-05-20 10:47:06 -0700148unsigned int fpu_kernel_xstate_size;
149EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
Ingo Molnar41e78412015-04-26 15:32:40 +0200150
Jiri Olsa25ec02f2015-12-21 15:25:30 +0100151/* Get alignment of the TYPE. */
152#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
153
154/*
155 * Enforce that 'MEMBER' is the last field of 'TYPE'.
156 *
157 * Align the computed size with alignment of the TYPE,
158 * because that's how C aligns structs.
159 */
Ingo Molnar5aaeb5c2015-07-17 12:28:12 +0200160#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
Jiri Olsa25ec02f2015-12-21 15:25:30 +0100161 BUILD_BUG_ON(sizeof(TYPE) != ALIGN(offsetofend(TYPE, MEMBER), \
162 TYPE_ALIGN(TYPE)))
Dave Hansen0c8c0f02015-07-17 12:28:11 +0200163
164/*
Ingo Molnar5aaeb5c2015-07-17 12:28:12 +0200165 * We append the 'struct fpu' to the task_struct:
Dave Hansen0c8c0f02015-07-17 12:28:11 +0200166 */
Ingo Molnar5aaeb5c2015-07-17 12:28:12 +0200167static void __init fpu__init_task_struct_size(void)
Dave Hansen0c8c0f02015-07-17 12:28:11 +0200168{
169 int task_size = sizeof(struct task_struct);
170
171 /*
172 * Subtract off the static size of the register state.
173 * It potentially has a bunch of padding.
174 */
175 task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
176
177 /*
178 * Add back the dynamically-calculated register state
179 * size.
180 */
Fenghua Yubf15a8c2016-05-20 10:47:06 -0700181 task_size += fpu_kernel_xstate_size;
Dave Hansen0c8c0f02015-07-17 12:28:11 +0200182
183 /*
184 * We dynamically size 'struct fpu', so we require that
185 * it be at the end of 'thread_struct' and that
186 * 'thread_struct' be at the end of 'task_struct'. If
187 * you hit a compile error here, check the structure to
188 * see if something got added to the end.
189 */
190 CHECK_MEMBER_AT_END_OF(struct fpu, state);
191 CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
192 CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
193
Ingo Molnar5aaeb5c2015-07-17 12:28:12 +0200194 arch_task_struct_size = task_size;
Dave Hansen0c8c0f02015-07-17 12:28:11 +0200195}
196
Ingo Molnar41e78412015-04-26 15:32:40 +0200197/*
Fenghua Yubf15a8c2016-05-20 10:47:06 -0700198 * Set up the user and kernel xstate sizes based on the legacy FPU context size.
Ingo Molnar41e78412015-04-26 15:32:40 +0200199 *
200 * We set this up first, and later it will be overwritten by
201 * fpu__init_system_xstate() if the CPU knows about xstates.
202 */
Ingo Molnar32231872015-05-04 09:52:42 +0200203static void __init fpu__init_system_xstate_size_legacy(void)
Ingo Molnar0c867532015-04-22 10:53:34 +0200204{
Rasmus Villemoese49a4492015-11-13 15:18:31 +0100205 static int on_boot_cpu __initdata = 1;
Ingo Molnare97131a2015-05-05 11:34:49 +0200206
207 WARN_ON_FPU(!on_boot_cpu);
208 on_boot_cpu = 0;
209
Ingo Molnar0c867532015-04-22 10:53:34 +0200210 /*
Fenghua Yubf15a8c2016-05-20 10:47:06 -0700211 * Note that xstate sizes might be overwritten later during
Ingo Molnarc42103b2015-04-25 06:52:53 +0200212 * fpu__init_system_xstate().
Ingo Molnar0c867532015-04-22 10:53:34 +0200213 */
214
Borislav Petkova402a8d2016-04-04 22:24:58 +0200215 if (!boot_cpu_has(X86_FEATURE_FPU)) {
Ingo Molnar0c867532015-04-22 10:53:34 +0200216 /*
217 * Disable xsave as we do not support it if i387
218 * emulation is enabled.
219 */
220 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
221 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
Fenghua Yubf15a8c2016-05-20 10:47:06 -0700222 fpu_kernel_xstate_size = sizeof(struct swregs_state);
Ingo Molnar6a133202015-04-25 04:29:26 +0200223 } else {
Borislav Petkov01f8fd72016-04-04 22:25:01 +0200224 if (boot_cpu_has(X86_FEATURE_FXSR))
Fenghua Yubf15a8c2016-05-20 10:47:06 -0700225 fpu_kernel_xstate_size =
226 sizeof(struct fxregs_state);
Ingo Molnar6a133202015-04-25 04:29:26 +0200227 else
Fenghua Yubf15a8c2016-05-20 10:47:06 -0700228 fpu_kernel_xstate_size =
229 sizeof(struct fregs_state);
Ingo Molnar0c867532015-04-22 10:53:34 +0200230 }
Fenghua Yua1141e02016-05-20 10:47:05 -0700231
Fenghua Yubf15a8c2016-05-20 10:47:06 -0700232 fpu_user_xstate_size = fpu_kernel_xstate_size;
Ingo Molnar0c867532015-04-22 10:53:34 +0200233}
234
Ingo Molnarae026792015-04-26 15:36:46 +0200235/*
236 * FPU context switching strategies:
237 *
238 * Against popular belief, we don't do lazy FPU saves, due to the
239 * task migration complications it brings on SMP - we only do
240 * lazy FPU restores.
241 *
242 * 'lazy' is the traditional strategy, which is based on setting
243 * CR0::TS to 1 during context-switch (instead of doing a full
244 * restore of the FPU state), which causes the first FPU instruction
245 * after the context switch (whenever it is executed) to fault - at
246 * which point we lazily restore the FPU state into FPU registers.
247 *
248 * Tasks are of course under no obligation to execute FPU instructions,
249 * so it can easily happen that another context-switch occurs without
250 * a single FPU instruction being executed. If we eventually switch
251 * back to the original task (that still owns the FPU) then we have
252 * not only saved the restores along the way, but we also have the
253 * FPU ready to be used for the original task.
254 *
Andy Lutomirski58122bf2016-01-24 14:38:10 -0800255 * 'lazy' is deprecated because it's almost never a performance win
256 * and it's much more complicated than 'eager'.
257 *
258 * 'eager' switching is by default on all CPUs, there we switch the FPU
Ingo Molnarae026792015-04-26 15:36:46 +0200259 * state during every context switch, regardless of whether the task
260 * has used FPU instructions in that time slice or not. This is done
261 * because modern FPU context saving instructions are able to optimize
262 * state saving and restoration in hardware: they can detect both
263 * unused and untouched FPU state and optimize accordingly.
264 *
265 * [ Note that even in 'lazy' mode we might optimize context switches
266 * to use 'eager' restores, if we detect that a task is using the FPU
267 * frequently. See the fpu->counter logic in fpu/internal.h for that. ]
268 */
Andy Lutomirski58122bf2016-01-24 14:38:10 -0800269static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
Ingo Molnar6f5d2652015-04-25 20:11:05 +0200270
Ingo Molnar6f5d2652015-04-25 20:11:05 +0200271/*
yu-cheng yua5fe93a2016-01-06 14:24:53 -0800272 * Find supported xfeatures based on cpu features and command-line input.
273 * This must be called after fpu__init_parse_early_param() is called and
274 * xfeatures_mask is enumerated.
275 */
276u64 __init fpu__get_supported_xfeatures_mask(void)
277{
278 /* Support all xfeatures known to us */
279 if (eagerfpu != DISABLE)
280 return XCNTXT_MASK;
281
282 /* Warning of xfeatures being disabled for no eagerfpu mode */
283 if (xfeatures_mask & XFEATURE_MASK_EAGER) {
284 pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
285 xfeatures_mask & XFEATURE_MASK_EAGER);
286 }
287
288 /* Return a mask that masks out all features requiring eagerfpu mode */
289 return ~XFEATURE_MASK_EAGER;
290}
291
292/*
293 * Disable features dependent on eagerfpu.
294 */
295static void __init fpu__clear_eager_fpu_features(void)
296{
297 setup_clear_cpu_cap(X86_FEATURE_MPX);
298}
299
300/*
Ingo Molnarae026792015-04-26 15:36:46 +0200301 * Pick the FPU context switching strategy:
yu-cheng yua5fe93a2016-01-06 14:24:53 -0800302 *
303 * When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of
304 * the following is true:
305 *
306 * (1) the cpu has xsaveopt, as it has the optimization and doing eager
307 * FPU switching has a relatively low cost compared to a plain xsave;
308 * (2) the cpu has xsave features (e.g. MPX) that depend on eager FPU
309 * switching. Should the kernel boot with noxsaveopt, we support MPX
310 * with eager FPU switching at a higher cost.
Ingo Molnar6f5d2652015-04-25 20:11:05 +0200311 */
Ingo Molnar32231872015-05-04 09:52:42 +0200312static void __init fpu__init_system_ctx_switch(void)
Ingo Molnar6f5d2652015-04-25 20:11:05 +0200313{
Rasmus Villemoese49a4492015-11-13 15:18:31 +0100314 static bool on_boot_cpu __initdata = 1;
Ingo Molnare97131a2015-05-05 11:34:49 +0200315
316 WARN_ON_FPU(!on_boot_cpu);
317 on_boot_cpu = 0;
318
319 WARN_ON_FPU(current->thread.fpu.fpstate_active);
Ingo Molnar6f5d2652015-04-25 20:11:05 +0200320
Borislav Petkov362f9242015-12-07 10:39:41 +0100321 if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
Ingo Molnar6f5d2652015-04-25 20:11:05 +0200322 eagerfpu = ENABLE;
323
yu-cheng yua5fe93a2016-01-06 14:24:53 -0800324 if (xfeatures_mask & XFEATURE_MASK_EAGER)
325 eagerfpu = ENABLE;
Ingo Molnar6f5d2652015-04-25 20:11:05 +0200326
327 if (eagerfpu == ENABLE)
328 setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
329
Ingo Molnar32231872015-05-04 09:52:42 +0200330 printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy");
Ingo Molnar6f5d2652015-04-25 20:11:05 +0200331}
332
Ingo Molnare35f6f12015-04-25 04:34:48 +0200333/*
yu-cheng yu4f81cba2016-01-06 14:24:51 -0800334 * We parse fpu parameters early because fpu__init_system() is executed
335 * before parse_early_param().
336 */
337static void __init fpu__init_parse_early_param(void)
338{
yu-cheng yua5fe93a2016-01-06 14:24:53 -0800339 if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
yu-cheng yu4f81cba2016-01-06 14:24:51 -0800340 eagerfpu = DISABLE;
yu-cheng yua5fe93a2016-01-06 14:24:53 -0800341 fpu__clear_eager_fpu_features();
yu-cheng yua5fe93a2016-01-06 14:24:53 -0800342 }
yu-cheng yu4f81cba2016-01-06 14:24:51 -0800343
344 if (cmdline_find_option_bool(boot_command_line, "no387"))
345 setup_clear_cpu_cap(X86_FEATURE_FPU);
346
347 if (cmdline_find_option_bool(boot_command_line, "nofxsr")) {
348 setup_clear_cpu_cap(X86_FEATURE_FXSR);
349 setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT);
350 setup_clear_cpu_cap(X86_FEATURE_XMM);
351 }
352
353 if (cmdline_find_option_bool(boot_command_line, "noxsave"))
354 fpu__xstate_clear_all_cpu_caps();
355
356 if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
357 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
358
359 if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
360 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
361}
362
363/*
Ingo Molnarae026792015-04-26 15:36:46 +0200364 * Called on the boot CPU once per system bootup, to set up the initial
365 * FPU state that is later cloned into all processes:
Ingo Molnare35f6f12015-04-25 04:34:48 +0200366 */
Ingo Molnar32231872015-05-04 09:52:42 +0200367void __init fpu__init_system(struct cpuinfo_x86 *c)
Ingo Molnare35f6f12015-04-25 04:34:48 +0200368{
yu-cheng yu4f81cba2016-01-06 14:24:51 -0800369 fpu__init_parse_early_param();
Ingo Molnardd863882015-04-26 15:07:18 +0200370 fpu__init_system_early_generic(c);
371
Ingo Molnarae026792015-04-26 15:36:46 +0200372 /*
373 * The FPU has to be operational for some of the
374 * later FPU init activities:
375 */
Ingo Molnare35f6f12015-04-25 04:34:48 +0200376 fpu__init_cpu();
Ingo Molnar0c867532015-04-22 10:53:34 +0200377
Ingo Molnar2507e1c2015-04-25 08:35:53 +0200378 /*
Ingo Molnarae026792015-04-26 15:36:46 +0200379 * But don't leave CR0::TS set yet, as some of the FPU setup
380 * methods depend on being able to execute FPU instructions
381 * that will fault on a set TS, such as the FXSAVE in
382 * fpu__init_system_mxcsr().
Ingo Molnar530b37e2015-04-25 08:27:44 +0200383 */
384 clts();
385
Ingo Molnar7218e8b2015-04-26 14:35:54 +0200386 fpu__init_system_generic();
Ingo Molnar7638b742015-04-26 15:23:37 +0200387 fpu__init_system_xstate_size_legacy();
Ingo Molnarc42103b2015-04-25 06:52:53 +0200388 fpu__init_system_xstate();
Ingo Molnar5aaeb5c2015-07-17 12:28:12 +0200389 fpu__init_task_struct_size();
Ingo Molnar997578b2015-04-26 10:35:57 +0200390
Ingo Molnar011545b2015-04-26 08:28:31 +0200391 fpu__init_system_ctx_switch();
Ingo Molnar0c867532015-04-22 10:53:34 +0200392}