blob: dc72f2b970b7b3328b0ccc1566c2d64951669abf [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001/* SPDX-License-Identifier: GPL-2.0 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code.
3 *
4 * Copyright(C) 1995 Linus Torvalds
5 * Copyright(C) 1996 David S. Miller
6 * Copyright(C) 1996 Eddie C. Dost
7 * Copyright(C) 1996,1998 Jakub Jelinek
8 *
9 * derived from:
10 * e-mail between David and Eddie.
11 *
12 * Returns 0 if successful, otherwise count of bytes not copied yet
13 */
14
15#include <asm/ptrace.h>
16#include <asm/asmmacro.h>
17#include <asm/page.h>
David S. Miller3a1d5c82006-09-27 19:43:02 -070018#include <asm/thread_info.h>
Al Virod3867f042016-01-16 21:39:30 -050019#include <asm/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020
21/* Work around cpp -rob */
22#define ALLOC #alloc
23#define EXECINSTR #execinstr
24#define EX(x,y,a,b) \
2598: x,y; \
26 .section .fixup,ALLOC,EXECINSTR; \
27 .align 4; \
2899: ba fixupretl; \
29 a, b, %g3; \
30 .section __ex_table,ALLOC; \
31 .align 4; \
32 .word 98b, 99b; \
33 .text; \
34 .align 4
35
36#define EX2(x,y,c,d,e,a,b) \
3798: x,y; \
38 .section .fixup,ALLOC,EXECINSTR; \
39 .align 4; \
4099: c, d, e; \
41 ba fixupretl; \
42 a, b, %g3; \
43 .section __ex_table,ALLOC; \
44 .align 4; \
45 .word 98b, 99b; \
46 .text; \
47 .align 4
48
49#define EXO2(x,y) \
5098: x, y; \
51 .section __ex_table,ALLOC; \
52 .align 4; \
53 .word 98b, 97f; \
54 .text; \
55 .align 4
56
57#define EXT(start,end,handler) \
58 .section __ex_table,ALLOC; \
59 .align 4; \
60 .word start, 0, end, handler; \
61 .text; \
62 .align 4
63
64/* Please do not change following macros unless you change logic used
65 * in .fixup at the end of this file as well
66 */
67
68/* Both these macros have to start with exactly the same insn */
69#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
70 ldd [%src + (offset) + 0x00], %t0; \
71 ldd [%src + (offset) + 0x08], %t2; \
72 ldd [%src + (offset) + 0x10], %t4; \
73 ldd [%src + (offset) + 0x18], %t6; \
74 st %t0, [%dst + (offset) + 0x00]; \
75 st %t1, [%dst + (offset) + 0x04]; \
76 st %t2, [%dst + (offset) + 0x08]; \
77 st %t3, [%dst + (offset) + 0x0c]; \
78 st %t4, [%dst + (offset) + 0x10]; \
79 st %t5, [%dst + (offset) + 0x14]; \
80 st %t6, [%dst + (offset) + 0x18]; \
81 st %t7, [%dst + (offset) + 0x1c];
82
83#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
84 ldd [%src + (offset) + 0x00], %t0; \
85 ldd [%src + (offset) + 0x08], %t2; \
86 ldd [%src + (offset) + 0x10], %t4; \
87 ldd [%src + (offset) + 0x18], %t6; \
88 std %t0, [%dst + (offset) + 0x00]; \
89 std %t2, [%dst + (offset) + 0x08]; \
90 std %t4, [%dst + (offset) + 0x10]; \
91 std %t6, [%dst + (offset) + 0x18];
92
93#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
94 ldd [%src - (offset) - 0x10], %t0; \
95 ldd [%src - (offset) - 0x08], %t2; \
96 st %t0, [%dst - (offset) - 0x10]; \
97 st %t1, [%dst - (offset) - 0x0c]; \
98 st %t2, [%dst - (offset) - 0x08]; \
99 st %t3, [%dst - (offset) - 0x04];
100
101#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
102 lduh [%src + (offset) + 0x00], %t0; \
103 lduh [%src + (offset) + 0x02], %t1; \
104 lduh [%src + (offset) + 0x04], %t2; \
105 lduh [%src + (offset) + 0x06], %t3; \
106 sth %t0, [%dst + (offset) + 0x00]; \
107 sth %t1, [%dst + (offset) + 0x02]; \
108 sth %t2, [%dst + (offset) + 0x04]; \
109 sth %t3, [%dst + (offset) + 0x06];
110
111#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
112 ldub [%src - (offset) - 0x02], %t0; \
113 ldub [%src - (offset) - 0x01], %t1; \
114 stb %t0, [%dst - (offset) - 0x02]; \
115 stb %t1, [%dst - (offset) - 0x01];
116
117 .text
118 .align 4
119
120 .globl __copy_user_begin
121__copy_user_begin:
122
123 .globl __copy_user
Al Virod3867f042016-01-16 21:39:30 -0500124 EXPORT_SYMBOL(__copy_user)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125dword_align:
126 andcc %o1, 1, %g0
127 be 4f
128 andcc %o1, 2, %g0
129
130 EXO2(ldub [%o1], %g2)
131 add %o1, 1, %o1
132 EXO2(stb %g2, [%o0])
133 sub %o2, 1, %o2
134 bne 3f
135 add %o0, 1, %o0
136
137 EXO2(lduh [%o1], %g2)
138 add %o1, 2, %o1
139 EXO2(sth %g2, [%o0])
140 sub %o2, 2, %o2
141 b 3f
142 add %o0, 2, %o0
1434:
144 EXO2(lduh [%o1], %g2)
145 add %o1, 2, %o1
146 EXO2(sth %g2, [%o0])
147 sub %o2, 2, %o2
148 b 3f
149 add %o0, 2, %o0
150
151__copy_user: /* %o0=dst %o1=src %o2=len */
152 xor %o0, %o1, %o4
1531:
154 andcc %o4, 3, %o5
1552:
156 bne cannot_optimize
157 cmp %o2, 15
158
159 bleu short_aligned_end
160 andcc %o1, 3, %g0
161
162 bne dword_align
1633:
164 andcc %o1, 4, %g0
165
166 be 2f
167 mov %o2, %g1
168
169 EXO2(ld [%o1], %o4)
170 sub %g1, 4, %g1
171 EXO2(st %o4, [%o0])
172 add %o1, 4, %o1
173 add %o0, 4, %o0
1742:
175 andcc %g1, 0xffffff80, %g7
176 be 3f
177 andcc %o0, 4, %g0
178
179 be ldd_std + 4
1805:
181 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
182 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
183 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
184 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
18580:
186 EXT(5b, 80b, 50f)
187 subcc %g7, 128, %g7
188 add %o1, 128, %o1
189 bne 5b
190 add %o0, 128, %o0
1913:
192 andcc %g1, 0x70, %g7
193 be copy_user_table_end
194 andcc %g1, 8, %g0
195
196 sethi %hi(copy_user_table_end), %o5
197 srl %g7, 1, %o4
198 add %g7, %o4, %o4
199 add %o1, %g7, %o1
200 sub %o5, %o4, %o5
201 jmpl %o5 + %lo(copy_user_table_end), %g0
202 add %o0, %g7, %o0
203
204copy_user_table:
205 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
206 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
207 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
208 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
209 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
210 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
211 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
212copy_user_table_end:
213 EXT(copy_user_table, copy_user_table_end, 51f)
214 be copy_user_last7
215 andcc %g1, 4, %g0
216
217 EX(ldd [%o1], %g2, and %g1, 0xf)
218 add %o0, 8, %o0
219 add %o1, 8, %o1
220 EX(st %g2, [%o0 - 0x08], and %g1, 0xf)
221 EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
222copy_user_last7:
223 be 1f
224 andcc %g1, 2, %g0
225
226 EX(ld [%o1], %g2, and %g1, 7)
227 add %o1, 4, %o1
228 EX(st %g2, [%o0], and %g1, 7)
229 add %o0, 4, %o0
2301:
231 be 1f
232 andcc %g1, 1, %g0
233
234 EX(lduh [%o1], %g2, and %g1, 3)
235 add %o1, 2, %o1
236 EX(sth %g2, [%o0], and %g1, 3)
237 add %o0, 2, %o0
2381:
239 be 1f
240 nop
241
242 EX(ldub [%o1], %g2, add %g0, 1)
243 EX(stb %g2, [%o0], add %g0, 1)
2441:
245 retl
246 clr %o0
247
248ldd_std:
249 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
250 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
251 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
252 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
25381:
254 EXT(ldd_std, 81b, 52f)
255 subcc %g7, 128, %g7
256 add %o1, 128, %o1
257 bne ldd_std
258 add %o0, 128, %o0
259
260 andcc %g1, 0x70, %g7
261 be copy_user_table_end
262 andcc %g1, 8, %g0
263
264 sethi %hi(copy_user_table_end), %o5
265 srl %g7, 1, %o4
266 add %g7, %o4, %o4
267 add %o1, %g7, %o1
268 sub %o5, %o4, %o5
269 jmpl %o5 + %lo(copy_user_table_end), %g0
270 add %o0, %g7, %o0
271
272cannot_optimize:
273 bleu short_end
274 cmp %o5, 2
275
276 bne byte_chunk
277 and %o2, 0xfffffff0, %o3
278
279 andcc %o1, 1, %g0
280 be 10f
281 nop
282
283 EXO2(ldub [%o1], %g2)
284 add %o1, 1, %o1
285 EXO2(stb %g2, [%o0])
286 sub %o2, 1, %o2
287 andcc %o2, 0xfffffff0, %o3
288 be short_end
289 add %o0, 1, %o0
29010:
291 MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
292 MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
29382:
294 EXT(10b, 82b, 53f)
295 subcc %o3, 0x10, %o3
296 add %o1, 0x10, %o1
297 bne 10b
298 add %o0, 0x10, %o0
299 b 2f
300 and %o2, 0xe, %o3
301
302byte_chunk:
303 MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3)
304 MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3)
305 MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3)
306 MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3)
307 MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3)
308 MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
309 MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
310 MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
31183:
312 EXT(byte_chunk, 83b, 54f)
313 subcc %o3, 0x10, %o3
314 add %o1, 0x10, %o1
315 bne byte_chunk
316 add %o0, 0x10, %o0
317
318short_end:
319 and %o2, 0xe, %o3
3202:
321 sethi %hi(short_table_end), %o5
322 sll %o3, 3, %o4
323 add %o0, %o3, %o0
324 sub %o5, %o4, %o5
325 add %o1, %o3, %o1
326 jmpl %o5 + %lo(short_table_end), %g0
327 andcc %o2, 1, %g0
32884:
329 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
330 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
331 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
332 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
333 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
334 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
335 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
336short_table_end:
337 EXT(84b, short_table_end, 55f)
338 be 1f
339 nop
340 EX(ldub [%o1], %g2, add %g0, 1)
341 EX(stb %g2, [%o0], add %g0, 1)
3421:
343 retl
344 clr %o0
345
346short_aligned_end:
347 bne short_end
348 andcc %o2, 8, %g0
349
350 be 1f
351 andcc %o2, 4, %g0
352
353 EXO2(ld [%o1 + 0x00], %g2)
354 EXO2(ld [%o1 + 0x04], %g3)
355 add %o1, 8, %o1
356 EXO2(st %g2, [%o0 + 0x00])
357 EX(st %g3, [%o0 + 0x04], sub %o2, 4)
358 add %o0, 8, %o0
3591:
360 b copy_user_last7
361 mov %o2, %g1
362
363 .section .fixup,#alloc,#execinstr
364 .align 4
36597:
366 mov %o2, %g3
367fixupretl:
Al Viro31af2f32017-03-21 17:04:45 -0400368 retl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 mov %g3, %o0
370
371/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
37250:
373/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
374 * happens. This is derived from the amount ldd reads, st stores, etc.
375 * x = g2 % 12;
376 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
377 * o0 += (g2 / 12) * 32;
378 */
379 cmp %g2, 12
380 add %o0, %g7, %o0
381 bcs 1f
382 cmp %g2, 24
383 bcs 2f
384 cmp %g2, 36
385 bcs 3f
386 nop
387 sub %g2, 12, %g2
388 sub %g7, 32, %g7
3893: sub %g2, 12, %g2
390 sub %g7, 32, %g7
3912: sub %g2, 12, %g2
392 sub %g7, 32, %g7
3931: cmp %g2, 4
394 bcs,a 60f
395 clr %g2
396 sub %g2, 4, %g2
397 sll %g2, 2, %g2
39860: and %g1, 0x7f, %g3
399 sub %o0, %g7, %o0
400 add %g3, %g7, %g3
401 ba fixupretl
402 sub %g3, %g2, %g3
40351:
404/* i = 41 - g2; j = i % 6;
405 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
406 * o0 -= (i / 6) * 16 + 16;
407 */
408 neg %g2
409 and %g1, 0xf, %g1
410 add %g2, 41, %g2
411 add %o0, %g1, %o0
4121: cmp %g2, 6
413 bcs,a 2f
414 cmp %g2, 4
415 add %g1, 16, %g1
416 b 1b
417 sub %g2, 6, %g2
4182: bcc,a 2f
419 mov 16, %g2
420 inc %g2
421 sll %g2, 2, %g2
4222: add %g1, %g2, %g3
423 ba fixupretl
424 sub %o0, %g3, %o0
42552:
426/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
427 o0 += (g2 / 8) * 32 */
428 andn %g2, 7, %g4
429 add %o0, %g7, %o0
430 andcc %g2, 4, %g0
431 and %g2, 3, %g2
432 sll %g4, 2, %g4
433 sll %g2, 3, %g2
434 bne 60b
435 sub %g7, %g4, %g7
436 ba 60b
437 clr %g2
43853:
439/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
440 o0 += (g2 & 8) */
441 and %g2, 3, %g4
442 andcc %g2, 4, %g0
443 and %g2, 8, %g2
444 sll %g4, 1, %g4
445 be 1f
446 add %o0, %g2, %o0
447 add %g2, %g4, %g2
4481: and %o2, 0xf, %g3
449 add %g3, %o3, %g3
450 ba fixupretl
451 sub %g3, %g2, %g3
45254:
453/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
454 o0 += (g2 / 4) * 2 */
455 srl %g2, 2, %o4
456 and %g2, 1, %o5
457 srl %g2, 1, %g2
458 add %o4, %o4, %o4
459 and %o5, %g2, %o5
460 and %o2, 0xf, %o2
461 add %o0, %o4, %o0
462 sub %o3, %o5, %o3
463 sub %o2, %o4, %o2
464 ba fixupretl
465 add %o2, %o3, %g3
46655:
467/* i = 27 - g2;
468 g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
469 o0 -= i / 4 * 2 + 1 */
470 neg %g2
471 and %o2, 1, %o2
472 add %g2, 27, %g2
473 srl %g2, 2, %o5
474 andcc %g2, 3, %g0
475 mov 1, %g2
476 add %o5, %o5, %o5
477 be,a 1f
478 clr %g2
4791: add %g2, %o5, %g3
480 sub %o0, %g3, %o0
481 ba fixupretl
482 add %g3, %o2, %g3
483
484 .globl __copy_user_end
485__copy_user_end: