blob: 25c14a0981bf2eef8623aaecc0cea7f44689c102 [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001/* SPDX-License-Identifier: GPL-2.0 */
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +00002#include <asm/processor.h>
Paul Mackerras14cf11a2005-09-26 16:04:21 +10003#include <asm/ppc_asm.h>
Paul Mackerrasb3b8dc62005-10-10 22:20:10 +10004#include <asm/reg.h>
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +00005#include <asm/asm-offsets.h>
6#include <asm/cputable.h>
7#include <asm/thread_info.h>
8#include <asm/page.h>
Stephen Rothwell46f52212010-11-18 15:06:17 +00009#include <asm/ptrace.h>
Al Viro9445aa12016-01-13 23:33:46 -050010#include <asm/export.h>
Christophe Leroyec0c4642018-07-05 16:24:57 +000011#include <asm/asm-compat.h>
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +000012
13/*
Paul Mackerras18461962013-09-10 20:21:10 +100014 * Load state from memory into VMX registers including VSCR.
15 * Assumes the caller has enabled VMX in the MSR.
16 */
17_GLOBAL(load_vr_state)
18 li r4,VRSTATE_VSCR
Anton Blanchardc2ce6f92015-02-10 09:51:22 +110019 lvx v0,r4,r3
20 mtvscr v0
Paul Mackerras18461962013-09-10 20:21:10 +100021 REST_32VRS(0,r4,r3)
22 blr
Al Viro9445aa12016-01-13 23:33:46 -050023EXPORT_SYMBOL(load_vr_state)
Nicholas Piggine2b36d52019-05-02 15:21:07 +100024_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
Paul Mackerras18461962013-09-10 20:21:10 +100025
26/*
27 * Store VMX state into memory, including VSCR.
28 * Assumes the caller has enabled VMX in the MSR.
29 */
30_GLOBAL(store_vr_state)
31 SAVE_32VRS(0, r4, r3)
Anton Blanchardc2ce6f92015-02-10 09:51:22 +110032 mfvscr v0
Paul Mackerras18461962013-09-10 20:21:10 +100033 li r4, VRSTATE_VSCR
Anton Blanchardc2ce6f92015-02-10 09:51:22 +110034 stvx v0, r4, r3
Paul Mackerras18461962013-09-10 20:21:10 +100035 blr
Al Viro9445aa12016-01-13 23:33:46 -050036EXPORT_SYMBOL(store_vr_state)
Paul Mackerras18461962013-09-10 20:21:10 +100037
38/*
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +000039 * Disable VMX for the task which had it previously,
40 * and save its vector registers in its thread_struct.
41 * Enables the VMX for use in the kernel on return.
42 * On SMP we know the VMX is free, since we give it up every
43 * switch (ie, no lazy save of the vector registers).
Paul Mackerras955c1ca2013-10-23 09:40:02 +010044 *
45 * Note that on 32-bit this can only use registers that will be
46 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +000047 */
48_GLOBAL(load_up_altivec)
49 mfmsr r5 /* grab the current MSR */
50 oris r5,r5,MSR_VEC@h
51 MTMSRD(r5) /* enable use of AltiVec now */
52 isync
53
Anton Blancharddd5702372016-05-20 04:41:34 +100054 /*
55 * While userspace in general ignores VRSAVE, glibc uses it as a boolean
56 * to optimise userspace context save/restore. Whenever we take an
57 * altivec unavailable exception we must set VRSAVE to something non
58 * zero. Set it to all 1s. See also the programming note in the ISA.
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +000059 */
60 mfspr r4,SPRN_VRSAVE
Benjamin Herrenschmidte090aa82009-12-08 18:45:45 +000061 cmpwi 0,r4,0
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +000062 bne+ 1f
63 li r4,-1
64 mtspr SPRN_VRSAVE,r4
651:
66 /* enable use of VMX after return */
67#ifdef CONFIG_PPC32
Benjamin Herrenschmidtee43eb72009-07-14 20:52:54 +000068 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +000069 oris r9,r9,MSR_VEC@h
Christophe Leroycd08f102019-12-21 08:32:38 +000070#ifdef CONFIG_VMAP_STACK
71 tovirt(r5, r5)
72#endif
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +000073#else
74 ld r4,PACACURRENT(r13)
75 addi r5,r4,THREAD /* Get THREAD */
76 oris r12,r12,MSR_VEC@h
77 std r12,_MSR(r1)
78#endif
Cyril Bur70fe3d92016-02-29 17:53:47 +110079 /* Don't care if r4 overflows, this is desired behaviour */
80 lbz r4,THREAD_LOAD_VEC(r5)
81 addi r4,r4,1
82 stb r4,THREAD_LOAD_VEC(r5)
Paul Mackerras955c1ca2013-10-23 09:40:02 +010083 addi r6,r5,THREAD_VRSTATE
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +000084 li r4,1
Paul Mackerrasde79f7b2013-09-10 20:20:42 +100085 li r10,VRSTATE_VSCR
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +000086 stw r4,THREAD_USED_VR(r5)
Anton Blanchardc2ce6f92015-02-10 09:51:22 +110087 lvx v0,r10,r6
88 mtvscr v0
Paul Mackerras955c1ca2013-10-23 09:40:02 +010089 REST_32VRS(0,r4,r6)
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +000090 /* restore registers and return */
91 blr
92
93/*
Cyril Bur6f515d82016-02-29 17:53:50 +110094 * save_altivec(tsk)
95 * Save the vector registers to its thread_struct
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +000096 */
Cyril Bur6f515d82016-02-29 17:53:50 +110097_GLOBAL(save_altivec)
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +000098 addi r3,r3,THREAD /* want THREAD of task */
Paul Mackerras18461962013-09-10 20:21:10 +100099 PPC_LL r7,THREAD_VRSAVEAREA(r3)
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +0000100 PPC_LL r5,PT_REGS(r3)
Paul Mackerras18461962013-09-10 20:21:10 +1000101 PPC_LCMPI 0,r7,0
102 bne 2f
103 addi r7,r3,THREAD_VRSTATE
Cyril Bur6f515d82016-02-29 17:53:50 +11001042: SAVE_32VRS(0,r4,r7)
Anton Blanchardc2ce6f92015-02-10 09:51:22 +1100105 mfvscr v0
Paul Mackerrasde79f7b2013-09-10 20:20:42 +1000106 li r4,VRSTATE_VSCR
Anton Blanchardc2ce6f92015-02-10 09:51:22 +1100107 stvx v0,r4,r7
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +0000108 blr
109
110#ifdef CONFIG_VSX
111
112#ifdef CONFIG_PPC32
113#error This asm code isn't ready for 32-bit kernels
114#endif
115
116/*
117 * load_up_vsx(unused, unused, tsk)
118 * Disable VSX for the task which had it previously,
119 * and save its vector registers in its thread_struct.
120 * Reuse the fp and vsx saves, but first check to see if they have
121 * been saved already.
122 */
123_GLOBAL(load_up_vsx)
124/* Load FP and VSX registers if they haven't been done yet */
125 andi. r5,r12,MSR_FP
126 beql+ load_up_fpu /* skip if already loaded */
127 andis. r5,r12,MSR_VEC@h
128 beql+ load_up_altivec /* skip if already loaded */
129
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +0000130 ld r4,PACACURRENT(r13)
131 addi r4,r4,THREAD /* Get THREAD */
132 li r6,1
133 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
134 /* enable use of VSX after return */
135 oris r12,r12,MSR_VSX@h
136 std r12,_MSR(r1)
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +0000137 b fast_exception_return
138
Benjamin Herrenschmidte821ea72009-06-02 21:17:37 +0000139#endif /* CONFIG_VSX */
140
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000141
142/*
143 * The routines below are in assembler so we can closely control the
144 * usage of floating-point registers. These routines must be called
145 * with preempt disabled.
146 */
147#ifdef CONFIG_PPC32
148 .data
149fpzero:
150 .long 0
151fpone:
152 .long 0x3f800000 /* 1.0 in single-precision FP */
153fphalf:
154 .long 0x3f000000 /* 0.5 in single-precision FP */
155
156#define LDCONST(fr, name) \
157 lis r11,name@ha; \
158 lfs fr,name@l(r11)
159#else
160
161 .section ".toc","aw"
162fpzero:
163 .tc FD_0_0[TC],0
164fpone:
165 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
166fphalf:
167 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
168
169#define LDCONST(fr, name) \
170 lfd fr,name@toc(r2)
171#endif
172
173 .text
174/*
175 * Internal routine to enable floating point and set FPSCR to 0.
176 * Don't call it from C; it doesn't use the normal calling convention.
177 */
178fpenable:
179#ifdef CONFIG_PPC32
180 stwu r1,-64(r1)
181#else
182 stdu r1,-64(r1)
183#endif
184 mfmsr r10
185 ori r11,r10,MSR_FP
186 mtmsr r11
187 isync
188 stfd fr0,24(r1)
189 stfd fr1,16(r1)
190 stfd fr31,8(r1)
191 LDCONST(fr1, fpzero)
192 mffs fr31
Anton Blanchard3a2c48c2006-06-10 20:18:39 +1000193 MTFSF_L(fr1)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000194 blr
195
196fpdisable:
197 mtlr r12
Anton Blanchard3a2c48c2006-06-10 20:18:39 +1000198 MTFSF_L(fr31)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000199 lfd fr31,8(r1)
200 lfd fr1,16(r1)
201 lfd fr0,24(r1)
202 mtmsr r10
203 isync
204 addi r1,r1,64
205 blr
206
207/*
208 * Vector add, floating point.
209 */
210_GLOBAL(vaddfp)
211 mflr r12
212 bl fpenable
213 li r0,4
214 mtctr r0
215 li r6,0
2161: lfsx fr0,r4,r6
217 lfsx fr1,r5,r6
218 fadds fr0,fr0,fr1
219 stfsx fr0,r3,r6
220 addi r6,r6,4
221 bdnz 1b
222 b fpdisable
223
224/*
225 * Vector subtract, floating point.
226 */
227_GLOBAL(vsubfp)
228 mflr r12
229 bl fpenable
230 li r0,4
231 mtctr r0
232 li r6,0
2331: lfsx fr0,r4,r6
234 lfsx fr1,r5,r6
235 fsubs fr0,fr0,fr1
236 stfsx fr0,r3,r6
237 addi r6,r6,4
238 bdnz 1b
239 b fpdisable
240
241/*
242 * Vector multiply and add, floating point.
243 */
244_GLOBAL(vmaddfp)
245 mflr r12
246 bl fpenable
247 stfd fr2,32(r1)
248 li r0,4
249 mtctr r0
250 li r7,0
2511: lfsx fr0,r4,r7
252 lfsx fr1,r5,r7
253 lfsx fr2,r6,r7
254 fmadds fr0,fr0,fr2,fr1
255 stfsx fr0,r3,r7
256 addi r7,r7,4
257 bdnz 1b
258 lfd fr2,32(r1)
259 b fpdisable
260
261/*
262 * Vector negative multiply and subtract, floating point.
263 */
264_GLOBAL(vnmsubfp)
265 mflr r12
266 bl fpenable
267 stfd fr2,32(r1)
268 li r0,4
269 mtctr r0
270 li r7,0
2711: lfsx fr0,r4,r7
272 lfsx fr1,r5,r7
273 lfsx fr2,r6,r7
274 fnmsubs fr0,fr0,fr2,fr1
275 stfsx fr0,r3,r7
276 addi r7,r7,4
277 bdnz 1b
278 lfd fr2,32(r1)
279 b fpdisable
280
281/*
282 * Vector reciprocal estimate. We just compute 1.0/x.
283 * r3 -> destination, r4 -> source.
284 */
285_GLOBAL(vrefp)
286 mflr r12
287 bl fpenable
288 li r0,4
289 LDCONST(fr1, fpone)
290 mtctr r0
291 li r6,0
2921: lfsx fr0,r4,r6
293 fdivs fr0,fr1,fr0
294 stfsx fr0,r3,r6
295 addi r6,r6,4
296 bdnz 1b
297 b fpdisable
298
299/*
300 * Vector reciprocal square-root estimate, floating point.
301 * We use the frsqrte instruction for the initial estimate followed
302 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
303 * r3 -> destination, r4 -> source.
304 */
305_GLOBAL(vrsqrtefp)
306 mflr r12
307 bl fpenable
308 stfd fr2,32(r1)
309 stfd fr3,40(r1)
310 stfd fr4,48(r1)
311 stfd fr5,56(r1)
312 li r0,4
313 LDCONST(fr4, fpone)
314 LDCONST(fr5, fphalf)
315 mtctr r0
316 li r6,0
3171: lfsx fr0,r4,r6
318 frsqrte fr1,fr0 /* r = frsqrte(s) */
319 fmuls fr3,fr1,fr0 /* r * s */
320 fmuls fr2,fr1,fr5 /* r * 0.5 */
321 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
322 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
323 fmuls fr3,fr1,fr0 /* r * s */
324 fmuls fr2,fr1,fr5 /* r * 0.5 */
325 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
326 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
327 stfsx fr1,r3,r6
328 addi r6,r6,4
329 bdnz 1b
330 lfd fr5,56(r1)
331 lfd fr4,48(r1)
332 lfd fr3,40(r1)
333 lfd fr2,32(r1)
334 b fpdisable