blob: a3bcf4786e4aadf39398592ba039d7c7b6b7aaea [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001/* SPDX-License-Identifier: GPL-2.0-or-later */
Paul Mackerras14cf11a2005-09-26 16:04:21 +10002/*
3 * Memory copy functions for 32-bit PowerPC.
4 *
5 * Copyright (C) 1996-2005 Paul Mackerras.
Paul Mackerras14cf11a2005-09-26 16:04:21 +10006 */
Paul Mackerras14cf11a2005-09-26 16:04:21 +10007#include <asm/processor.h>
8#include <asm/cache.h>
9#include <asm/errno.h>
10#include <asm/ppc_asm.h>
Al Viro9445aa12016-01-13 23:33:46 -050011#include <asm/export.h>
Christophe Leroyfa54a982018-08-09 08:14:41 +000012#include <asm/code-patching-asm.h>
Christophe Leroy26deb042019-04-26 16:23:26 +000013#include <asm/kasan.h>
Paul Mackerras14cf11a2005-09-26 16:04:21 +100014
15#define COPY_16_BYTES \
16 lwz r7,4(r4); \
17 lwz r8,8(r4); \
18 lwz r9,12(r4); \
19 lwzu r10,16(r4); \
20 stw r7,4(r6); \
21 stw r8,8(r6); \
22 stw r9,12(r6); \
23 stwu r10,16(r6)
24
25#define COPY_16_BYTES_WITHEX(n) \
268 ## n ## 0: \
27 lwz r7,4(r4); \
288 ## n ## 1: \
29 lwz r8,8(r4); \
308 ## n ## 2: \
31 lwz r9,12(r4); \
328 ## n ## 3: \
33 lwzu r10,16(r4); \
348 ## n ## 4: \
35 stw r7,4(r6); \
368 ## n ## 5: \
37 stw r8,8(r6); \
388 ## n ## 6: \
39 stw r9,12(r6); \
408 ## n ## 7: \
41 stwu r10,16(r6)
42
43#define COPY_16_BYTES_EXCODE(n) \
449 ## n ## 0: \
45 addi r5,r5,-(16 * n); \
46 b 104f; \
479 ## n ## 1: \
48 addi r5,r5,-(16 * n); \
49 b 105f; \
Nicholas Piggin24bfa6a2016-10-13 16:42:53 +110050 EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \
51 EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \
52 EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \
53 EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \
54 EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \
55 EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \
56 EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \
57 EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
Paul Mackerras14cf11a2005-09-26 16:04:21 +100058
59 .text
60 .stabs "arch/powerpc/lib/",N_SO,0,0,0f
Sean MacLennan025c0182010-09-01 07:21:21 +000061 .stabs "copy_32.S",N_SO,0,0,0f
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000620:
63
Stephen Rothwell7dffb722005-10-17 11:50:32 +100064CACHELINE_BYTES = L1_CACHE_BYTES
65LG_CACHELINE_BYTES = L1_CACHE_SHIFT
66CACHELINE_MASK = (L1_CACHE_BYTES-1)
Paul Mackerras14cf11a2005-09-26 16:04:21 +100067
Christophe Leroy26deb042019-04-26 16:23:26 +000068#ifndef CONFIG_KASAN
Christophe Leroyda74f652017-08-23 16:54:32 +020069_GLOBAL(memset16)
70 rlwinm. r0 ,r5, 31, 1, 31
71 addi r6, r3, -4
72 beq- 2f
73 rlwimi r4 ,r4 ,16 ,0 ,15
74 mtctr r0
751: stwu r4, 4(r6)
76 bdnz 1b
772: andi. r0, r5, 1
78 beqlr
79 sth r4, 4(r6)
80 blr
81EXPORT_SYMBOL(memset16)
Christophe Leroy26deb042019-04-26 16:23:26 +000082#endif
Christophe Leroyda74f652017-08-23 16:54:32 +020083
LEROY Christophedf087e42015-05-19 12:07:48 +020084/*
85 * Use dcbz on the complete cache lines in the destination
86 * to set them to zero. This requires that the destination
87 * area is cacheable. -- paulus
LEROY Christophe400c47d2015-09-16 12:04:53 +020088 *
89 * During early init, cache might not be active yet, so dcbz cannot be used.
90 * We therefore skip the optimised bloc that uses dcbz. This jump is
91 * replaced by a nop once cache is active. This is done in machine_init()
LEROY Christophedf087e42015-05-19 12:07:48 +020092 */
Christophe Leroy26deb042019-04-26 16:23:26 +000093_GLOBAL_KASAN(memset)
Christophe Leroy7bf60572017-08-23 16:54:36 +020094 cmplwi 0,r5,4
95 blt 7f
96
LEROY Christophec152f142015-05-19 12:07:52 +020097 rlwimi r4,r4,8,16,23
98 rlwimi r4,r4,16,0,15
99
Christophe Leroy7bf60572017-08-23 16:54:36 +0200100 stw r4,0(r3)
LEROY Christophedf087e42015-05-19 12:07:48 +0200101 beqlr
Christophe Leroy7bf60572017-08-23 16:54:36 +0200102 andi. r0,r3,3
LEROY Christophedf087e42015-05-19 12:07:48 +0200103 add r5,r0,r5
Christophe Leroy7bf60572017-08-23 16:54:36 +0200104 subf r6,r0,r3
LEROY Christophec152f142015-05-19 12:07:52 +0200105 cmplwi 0,r4,0
Christophe Leroyad1b0122017-08-23 16:54:38 +0200106 /*
107 * Skip optimised bloc until cache is enabled. Will be replaced
108 * by 'bne' during boot to use normal procedure if r4 is not zero
109 */
Christophe Leroyfa54a982018-08-09 08:14:41 +00001105: b 2f
111 patch_site 5b, patch__memset_nocache
LEROY Christophec152f142015-05-19 12:07:52 +0200112
LEROY Christophedf087e42015-05-19 12:07:48 +0200113 clrlwi r7,r6,32-LG_CACHELINE_BYTES
114 add r8,r7,r5
115 srwi r9,r8,LG_CACHELINE_BYTES
116 addic. r9,r9,-1 /* total number of complete cachelines */
117 ble 2f
118 xori r0,r7,CACHELINE_MASK & ~3
119 srwi. r0,r0,2
120 beq 3f
121 mtctr r0
1224: stwu r4,4(r6)
123 bdnz 4b
1243: mtctr r9
125 li r7,4
12610: dcbz r7,r6
127 addi r6,r6,CACHELINE_BYTES
128 bdnz 10b
129 clrlwi r5,r8,32-LG_CACHELINE_BYTES
130 addi r5,r5,4
LEROY Christophedf087e42015-05-19 12:07:48 +0200131
LEROY Christophec152f142015-05-19 12:07:52 +02001322: srwi r0,r5,2
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000133 mtctr r0
134 bdz 6f
1351: stwu r4,4(r6)
136 bdnz 1b
1376: andi. r5,r5,3
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000138 beqlr
139 mtctr r5
140 addi r6,r6,3
1418: stbu r4,1(r6)
142 bdnz 8b
143 blr
Christophe Leroy7bf60572017-08-23 16:54:36 +0200144
1457: cmpwi 0,r5,0
146 beqlr
147 mtctr r5
148 addi r6,r3,-1
1499: stbu r4,1(r6)
150 bdnz 9b
151 blr
Christophe Leroyc0622162017-08-23 16:54:34 +0200152EXPORT_SYMBOL(memset)
Christophe Leroy26deb042019-04-26 16:23:26 +0000153EXPORT_SYMBOL_KASAN(memset)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000154
LEROY Christophedf087e42015-05-19 12:07:48 +0200155/*
156 * This version uses dcbz on the complete cache lines in the
157 * destination area to reduce memory traffic. This requires that
158 * the destination area is cacheable.
159 * We only use this version if the source and dest don't overlap.
160 * -- paulus.
LEROY Christophe1cd03892015-09-16 12:04:51 +0200161 *
162 * During early init, cache might not be active yet, so dcbz cannot be used.
163 * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
164 * replaced by a nop once cache is active. This is done in machine_init()
LEROY Christophedf087e42015-05-19 12:07:48 +0200165 */
Christophe Leroy26deb042019-04-26 16:23:26 +0000166_GLOBAL_KASAN(memmove)
LEROY Christophe0b05e2d2015-05-19 12:07:55 +0200167 cmplw 0,r3,r4
168 bgt backwards_memcpy
169 /* fall through */
170
Christophe Leroy26deb042019-04-26 16:23:26 +0000171_GLOBAL_KASAN(memcpy)
Christophe Leroyfa54a982018-08-09 08:14:41 +00001721: b generic_memcpy
173 patch_site 1b, patch__memcpy_nocache
174
LEROY Christophedf087e42015-05-19 12:07:48 +0200175 add r7,r3,r5 /* test if the src & dst overlap */
176 add r8,r4,r5
177 cmplw 0,r4,r7
178 cmplw 1,r3,r8
179 crand 0,0,4 /* cr0.lt &= cr1.lt */
LEROY Christophe0b05e2d2015-05-19 12:07:55 +0200180 blt generic_memcpy /* if regions overlap */
LEROY Christophedf087e42015-05-19 12:07:48 +0200181
182 addi r4,r4,-4
183 addi r6,r3,-4
184 neg r0,r3
185 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
186 beq 58f
187
188 cmplw 0,r5,r0 /* is this more than total to do? */
189 blt 63f /* if not much to do */
190 andi. r8,r0,3 /* get it word-aligned first */
191 subf r5,r0,r5
192 mtctr r8
193 beq+ 61f
19470: lbz r9,4(r4) /* do some bytes */
LEROY Christophedf087e42015-05-19 12:07:48 +0200195 addi r4,r4,1
196 addi r6,r6,1
LEROY Christophe295ffb42015-05-19 12:07:57 +0200197 stb r9,3(r6)
LEROY Christophedf087e42015-05-19 12:07:48 +0200198 bdnz 70b
19961: srwi. r0,r0,2
200 mtctr r0
201 beq 58f
20272: lwzu r9,4(r4) /* do some words */
203 stwu r9,4(r6)
204 bdnz 72b
205
20658: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
207 clrlwi r5,r5,32-LG_CACHELINE_BYTES
208 li r11,4
209 mtctr r0
210 beq 63f
21153:
212 dcbz r11,r6
213 COPY_16_BYTES
214#if L1_CACHE_BYTES >= 32
215 COPY_16_BYTES
216#if L1_CACHE_BYTES >= 64
217 COPY_16_BYTES
218 COPY_16_BYTES
219#if L1_CACHE_BYTES >= 128
220 COPY_16_BYTES
221 COPY_16_BYTES
222 COPY_16_BYTES
223 COPY_16_BYTES
224#endif
225#endif
226#endif
227 bdnz 53b
228
22963: srwi. r0,r5,2
230 mtctr r0
231 beq 64f
23230: lwzu r0,4(r4)
233 stwu r0,4(r6)
234 bdnz 30b
235
23664: andi. r0,r5,3
237 mtctr r0
238 beq+ 65f
LEROY Christophe295ffb42015-05-19 12:07:57 +0200239 addi r4,r4,3
240 addi r6,r6,3
24140: lbzu r0,1(r4)
242 stbu r0,1(r6)
LEROY Christophedf087e42015-05-19 12:07:48 +0200243 bdnz 40b
24465: blr
Al Viro9445aa12016-01-13 23:33:46 -0500245EXPORT_SYMBOL(memcpy)
246EXPORT_SYMBOL(memmove)
Christophe Leroy26deb042019-04-26 16:23:26 +0000247EXPORT_SYMBOL_KASAN(memcpy)
248EXPORT_SYMBOL_KASAN(memmove)
LEROY Christophedf087e42015-05-19 12:07:48 +0200249
Michael Ellermanb4c6afd2016-03-16 21:36:06 +1100250generic_memcpy:
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000251 srwi. r7,r5,3
252 addi r6,r3,-4
253 addi r4,r4,-4
254 beq 2f /* if less than 8 bytes to do */
255 andi. r0,r6,3 /* get dest word aligned */
256 mtctr r7
257 bne 5f
2581: lwz r7,4(r4)
259 lwzu r8,8(r4)
260 stw r7,4(r6)
261 stwu r8,8(r6)
262 bdnz 1b
263 andi. r5,r5,7
2642: cmplwi 0,r5,4
265 blt 3f
266 lwzu r0,4(r4)
267 addi r5,r5,-4
268 stwu r0,4(r6)
2693: cmpwi 0,r5,0
270 beqlr
271 mtctr r5
272 addi r4,r4,3
273 addi r6,r6,3
2744: lbzu r0,1(r4)
275 stbu r0,1(r6)
276 bdnz 4b
277 blr
2785: subfic r0,r0,4
279 mtctr r0
2806: lbz r7,4(r4)
281 addi r4,r4,1
282 stb r7,4(r6)
283 addi r6,r6,1
284 bdnz 6b
285 subf r5,r0,r5
286 rlwinm. r7,r5,32-3,3,31
287 beq 2b
288 mtctr r7
289 b 1b
290
291_GLOBAL(backwards_memcpy)
292 rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
293 add r6,r3,r5
294 add r4,r4,r5
295 beq 2f
296 andi. r0,r6,3
297 mtctr r7
298 bne 5f
2991: lwz r7,-4(r4)
300 lwzu r8,-8(r4)
301 stw r7,-4(r6)
302 stwu r8,-8(r6)
303 bdnz 1b
304 andi. r5,r5,7
3052: cmplwi 0,r5,4
306 blt 3f
307 lwzu r0,-4(r4)
308 subi r5,r5,4
309 stwu r0,-4(r6)
3103: cmpwi 0,r5,0
311 beqlr
312 mtctr r5
3134: lbzu r0,-1(r4)
314 stbu r0,-1(r6)
315 bdnz 4b
316 blr
3175: mtctr r0
3186: lbzu r7,-1(r4)
319 stbu r7,-1(r6)
320 bdnz 6b
321 subf r5,r0,r5
322 rlwinm. r7,r5,32-3,3,31
323 beq 2b
324 mtctr r7
325 b 1b
326
327_GLOBAL(__copy_tofrom_user)
328 addi r4,r4,-4
329 addi r6,r3,-4
330 neg r0,r3
331 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
332 beq 58f
333
334 cmplw 0,r5,r0 /* is this more than total to do? */
335 blt 63f /* if not much to do */
336 andi. r8,r0,3 /* get it word-aligned first */
337 mtctr r8
338 beq+ 61f
33970: lbz r9,4(r4) /* do some bytes */
34071: stb r9,4(r6)
341 addi r4,r4,1
342 addi r6,r6,1
343 bdnz 70b
34461: subf r5,r0,r5
345 srwi. r0,r0,2
346 mtctr r0
347 beq 58f
34872: lwzu r9,4(r4) /* do some words */
34973: stwu r9,4(r6)
350 bdnz 72b
351
Nicholas Piggin24bfa6a2016-10-13 16:42:53 +1100352 EX_TABLE(70b,100f)
353 EX_TABLE(71b,101f)
354 EX_TABLE(72b,102f)
355 EX_TABLE(73b,103f)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000356
35758: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
358 clrlwi r5,r5,32-LG_CACHELINE_BYTES
359 li r11,4
360 beq 63f
361
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000362 /* Here we decide how far ahead to prefetch the source */
363 li r3,4
364 cmpwi r0,1
365 li r7,0
366 ble 114f
367 li r7,1
368#if MAX_COPY_PREFETCH > 1
369 /* Heuristically, for large transfers we prefetch
370 MAX_COPY_PREFETCH cachelines ahead. For small transfers
371 we prefetch 1 cacheline ahead. */
372 cmpwi r0,MAX_COPY_PREFETCH
373 ble 112f
374 li r7,MAX_COPY_PREFETCH
375112: mtctr r7
376111: dcbt r3,r4
377 addi r3,r3,CACHELINE_BYTES
378 bdnz 111b
379#else
380 dcbt r3,r4
381 addi r3,r3,CACHELINE_BYTES
382#endif /* MAX_COPY_PREFETCH > 1 */
383
384114: subf r8,r7,r0
385 mr r0,r7
386 mtctr r8
387
38853: dcbt r3,r4
38954: dcbz r11,r6
Nicholas Piggin24bfa6a2016-10-13 16:42:53 +1100390 EX_TABLE(54b,105f)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000391/* the main body of the cacheline loop */
392 COPY_16_BYTES_WITHEX(0)
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000393#if L1_CACHE_BYTES >= 32
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000394 COPY_16_BYTES_WITHEX(1)
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000395#if L1_CACHE_BYTES >= 64
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000396 COPY_16_BYTES_WITHEX(2)
397 COPY_16_BYTES_WITHEX(3)
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000398#if L1_CACHE_BYTES >= 128
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000399 COPY_16_BYTES_WITHEX(4)
400 COPY_16_BYTES_WITHEX(5)
401 COPY_16_BYTES_WITHEX(6)
402 COPY_16_BYTES_WITHEX(7)
403#endif
404#endif
405#endif
406 bdnz 53b
407 cmpwi r0,0
408 li r3,4
409 li r7,0
410 bne 114b
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000411
41263: srwi. r0,r5,2
413 mtctr r0
414 beq 64f
41530: lwzu r0,4(r4)
41631: stwu r0,4(r6)
417 bdnz 30b
418
41964: andi. r0,r5,3
420 mtctr r0
421 beq+ 65f
42240: lbz r0,4(r4)
42341: stb r0,4(r6)
424 addi r4,r4,1
425 addi r6,r6,1
426 bdnz 40b
42765: li r3,0
428 blr
429
430/* read fault, initial single-byte copy */
431100: li r9,0
432 b 90f
433/* write fault, initial single-byte copy */
434101: li r9,1
43590: subf r5,r8,r5
436 li r3,0
437 b 99f
438/* read fault, initial word copy */
439102: li r9,0
440 b 91f
441/* write fault, initial word copy */
442103: li r9,1
44391: li r3,2
444 b 99f
445
446/*
447 * this stuff handles faults in the cacheline loop and branches to either
448 * 104f (if in read part) or 105f (if in write part), after updating r5
449 */
450 COPY_16_BYTES_EXCODE(0)
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000451#if L1_CACHE_BYTES >= 32
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000452 COPY_16_BYTES_EXCODE(1)
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000453#if L1_CACHE_BYTES >= 64
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000454 COPY_16_BYTES_EXCODE(2)
455 COPY_16_BYTES_EXCODE(3)
Stephen Rothwell7dffb722005-10-17 11:50:32 +1000456#if L1_CACHE_BYTES >= 128
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000457 COPY_16_BYTES_EXCODE(4)
458 COPY_16_BYTES_EXCODE(5)
459 COPY_16_BYTES_EXCODE(6)
460 COPY_16_BYTES_EXCODE(7)
461#endif
462#endif
463#endif
464
465/* read fault in cacheline loop */
466104: li r9,0
467 b 92f
468/* fault on dcbz (effectively a write fault) */
469/* or write fault in cacheline loop */
470105: li r9,1
47192: li r3,LG_CACHELINE_BYTES
472 mfctr r8
473 add r0,r0,r8
474 b 106f
475/* read fault in final word loop */
476108: li r9,0
477 b 93f
478/* write fault in final word loop */
479109: li r9,1
48093: andi. r5,r5,3
481 li r3,2
482 b 99f
483/* read fault in final byte loop */
484110: li r9,0
485 b 94f
486/* write fault in final byte loop */
487111: li r9,1
48894: li r5,0
489 li r3,0
490/*
491 * At this stage the number of bytes not copied is
492 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
493 */
49499: mfctr r0
495106: slw r3,r0,r3
496 add. r3,r3,r5
497 beq 120f /* shouldn't happen */
498 cmpwi 0,r9,0
499 bne 120f
500/* for a read fault, first try to continue the copy one byte at a time */
501 mtctr r3
502130: lbz r0,4(r4)
503131: stb r0,4(r6)
504 addi r4,r4,1
505 addi r6,r6,1
506 bdnz 130b
507/* then clear out the destination: r3 bytes starting at 4(r6) */
508132: mfctr r3
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000509120: blr
510
Nicholas Piggin24bfa6a2016-10-13 16:42:53 +1100511 EX_TABLE(30b,108b)
512 EX_TABLE(31b,109b)
513 EX_TABLE(40b,110b)
514 EX_TABLE(41b,111b)
515 EX_TABLE(130b,132b)
516 EX_TABLE(131b,120b)
Nicholas Piggin24bfa6a2016-10-13 16:42:53 +1100517
Al Viro9445aa12016-01-13 23:33:46 -0500518EXPORT_SYMBOL(__copy_tofrom_user)