blob: db8719a14846da9897bac1f21a3430684f15d4bb [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001/* SPDX-License-Identifier: GPL-2.0-or-later */
Paul Mackerras14cf11a2005-09-26 16:04:21 +10002/*
Paul Mackerras14cf11a2005-09-26 16:04:21 +10003 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
Paul Mackerras14cf11a2005-09-26 16:04:21 +10004 */
5#include <asm/processor.h>
6#include <asm/ppc_asm.h>
Al Viro9445aa12016-01-13 23:33:46 -05007#include <asm/export.h>
Christophe Leroyec0c4642018-07-05 16:24:57 +00008#include <asm/asm-compat.h>
Christophe Leroy2c86cd12018-07-05 16:25:01 +00009#include <asm/feature-fixups.h>
Paul Mackerras14cf11a2005-09-26 16:04:21 +100010
Paul Mackerras98c45f52018-08-03 20:13:04 +100011#ifndef SELFTEST_CASE
12/* 0 == most CPUs, 1 == POWER6, 2 == Cell */
13#define SELFTEST_CASE 0
14#endif
15
Paul E. McKenney20151162013-12-18 09:29:57 +110016#ifdef __BIG_ENDIAN__
17#define sLd sld /* Shift towards low-numbered address. */
18#define sHd srd /* Shift towards high-numbered address. */
19#else
20#define sLd srd /* Shift towards low-numbered address. */
21#define sHd sld /* Shift towards high-numbered address. */
22#endif
23
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +100024/*
25 * These macros are used to generate exception table entries.
26 * The exception handlers below use the original arguments
27 * (stored on the stack) and the point where we're up to in
28 * the destination buffer, i.e. the address of the first
29 * unmodified byte. Generally r3 points into the destination
30 * buffer, but the first unmodified byte is at a variable
31 * offset from r3. In the code below, the symbol r3_offset
32 * is set to indicate the current offset at each point in
33 * the code. This offset is then used as a negative offset
34 * from the exception handler code, and those instructions
35 * before the exception handlers are addi instructions that
36 * adjust r3 to point to the correct place.
37 */
38 .macro lex /* exception handler for load */
39100: EX_TABLE(100b, .Lld_exc - r3_offset)
40 .endm
41
42 .macro stex /* exception handler for store */
43100: EX_TABLE(100b, .Lst_exc - r3_offset)
44 .endm
45
Paul Mackerras14cf11a2005-09-26 16:04:21 +100046 .align 7
Anton Blanchard169c7ce2014-04-03 16:01:11 +110047_GLOBAL_TOC(__copy_tofrom_user)
Nicholas Piggin15a32042018-02-21 05:08:26 +100048#ifdef CONFIG_PPC_BOOK3S_64
Anton Blancharda66086b2011-12-07 20:11:45 +000049BEGIN_FTR_SECTION
50 nop
51FTR_SECTION_ELSE
52 b __copy_tofrom_user_power7
53ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
Nicholas Piggin15a32042018-02-21 05:08:26 +100054#endif
Anton Blancharda66086b2011-12-07 20:11:45 +000055_GLOBAL(__copy_tofrom_user_base)
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +100056 /* first check for a 4kB copy on a 4kB boundary */
Paul Mackerras14cf11a2005-09-26 16:04:21 +100057 cmpldi cr1,r5,16
58 cmpdi cr6,r5,4096
59 or r0,r3,r4
60 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
61 andi. r0,r0,4095
62 std r3,-24(r1)
63 crand cr0*4+2,cr0*4+2,cr6*4+2
64 std r4,-16(r1)
65 std r5,-8(r1)
66 dcbt 0,r4
Benjamin Herrenschmidt3c726f82005-11-07 11:06:55 +110067 beq .Lcopy_page_4K
Paul Mackerras14cf11a2005-09-26 16:04:21 +100068 andi. r6,r6,7
Anton Blanchard694caf02012-04-18 02:21:52 +000069 PPC_MTOCRF(0x01,r5)
Paul Mackerras14cf11a2005-09-26 16:04:21 +100070 blt cr1,.Lshort_copy
Mark Nelsona4e22f02008-11-11 00:53:34 +000071/* Below we want to nop out the bne if we're on a CPU that has the
72 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
73 * cleared.
74 * At the time of writing the only CPU that has this combination of bits
75 * set is Power6.
76 */
Paul Mackerras98c45f52018-08-03 20:13:04 +100077test_feature = (SELFTEST_CASE == 1)
Mark Nelsona4e22f02008-11-11 00:53:34 +000078BEGIN_FTR_SECTION
79 nop
80FTR_SECTION_ELSE
Paul Mackerras14cf11a2005-09-26 16:04:21 +100081 bne .Ldst_unaligned
Mark Nelsona4e22f02008-11-11 00:53:34 +000082ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
83 CPU_FTR_UNALIGNED_LD_STD)
Paul Mackerras14cf11a2005-09-26 16:04:21 +100084.Ldst_aligned:
Paul Mackerras14cf11a2005-09-26 16:04:21 +100085 addi r3,r3,-16
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +100086r3_offset = 16
Paul Mackerras98c45f52018-08-03 20:13:04 +100087test_feature = (SELFTEST_CASE == 0)
Mark Nelsona4e22f02008-11-11 00:53:34 +000088BEGIN_FTR_SECTION
89 andi. r0,r4,7
Paul Mackerras14cf11a2005-09-26 16:04:21 +100090 bne .Lsrc_unaligned
Mark Nelsona4e22f02008-11-11 00:53:34 +000091END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
Anton Blanchard789c2992010-02-10 14:56:26 +000092 blt cr1,.Ldo_tail /* if < 16 bytes to copy */
93 srdi r0,r5,5
94 cmpdi cr1,r0,0
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +100095lex; ld r7,0(r4)
96lex; ld r6,8(r4)
Anton Blanchard789c2992010-02-10 14:56:26 +000097 addi r4,r4,16
98 mtctr r0
99 andi. r0,r5,0x10
100 beq 22f
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000101 addi r3,r3,16
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000102r3_offset = 0
Anton Blanchard789c2992010-02-10 14:56:26 +0000103 addi r4,r4,-16
104 mr r9,r7
105 mr r8,r6
106 beq cr1,72f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +100010721:
108lex; ld r7,16(r4)
109lex; ld r6,24(r4)
Anton Blanchard789c2992010-02-10 14:56:26 +0000110 addi r4,r4,32
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000111stex; std r9,0(r3)
112r3_offset = 8
113stex; std r8,8(r3)
114r3_offset = 16
11522:
116lex; ld r9,0(r4)
117lex; ld r8,8(r4)
118stex; std r7,16(r3)
119r3_offset = 24
120stex; std r6,24(r3)
Anton Blanchard789c2992010-02-10 14:56:26 +0000121 addi r3,r3,32
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000122r3_offset = 0
Anton Blanchard789c2992010-02-10 14:56:26 +0000123 bdnz 21b
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +100012472:
125stex; std r9,0(r3)
126r3_offset = 8
127stex; std r8,8(r3)
128r3_offset = 16
Anton Blanchard789c2992010-02-10 14:56:26 +0000129 andi. r5,r5,0xf
130 beq+ 3f
131 addi r4,r4,16
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000132.Ldo_tail:
Anton Blanchard789c2992010-02-10 14:56:26 +0000133 addi r3,r3,16
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000134r3_offset = 0
Anton Blanchard789c2992010-02-10 14:56:26 +0000135 bf cr7*4+0,246f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000136lex; ld r9,0(r4)
Anton Blanchard789c2992010-02-10 14:56:26 +0000137 addi r4,r4,8
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000138stex; std r9,0(r3)
Anton Blanchard789c2992010-02-10 14:56:26 +0000139 addi r3,r3,8
140246: bf cr7*4+1,1f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000141lex; lwz r9,0(r4)
Mark Nelsonf72b7282009-02-25 13:46:24 +0000142 addi r4,r4,4
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000143stex; stw r9,0(r3)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000144 addi r3,r3,4
1451: bf cr7*4+2,2f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000146lex; lhz r9,0(r4)
Mark Nelsonf72b7282009-02-25 13:46:24 +0000147 addi r4,r4,2
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000148stex; sth r9,0(r3)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000149 addi r3,r3,2
1502: bf cr7*4+3,3f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000151lex; lbz r9,0(r4)
152stex; stb r9,0(r3)
Paul Mackerras14cf11a2005-09-26 16:04:21 +10001533: li r3,0
154 blr
155
156.Lsrc_unaligned:
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000157r3_offset = 16
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000158 srdi r6,r5,3
159 addi r5,r5,-16
160 subf r4,r0,r4
161 srdi r7,r5,4
162 sldi r10,r0,3
163 cmpldi cr6,r6,3
164 andi. r5,r5,7
165 mtctr r7
166 subfic r11,r10,64
167 add r5,r5,r0
168 bt cr7*4+0,28f
169
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000170lex; ld r9,0(r4) /* 3+2n loads, 2+2n stores */
171lex; ld r0,8(r4)
Paul E. McKenney20151162013-12-18 09:29:57 +1100172 sLd r6,r9,r10
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000173lex; ldu r9,16(r4)
Paul E. McKenney20151162013-12-18 09:29:57 +1100174 sHd r7,r0,r11
175 sLd r8,r0,r10
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000176 or r7,r7,r6
177 blt cr6,79f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000178lex; ld r0,8(r4)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000179 b 2f
180
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +100018128:
182lex; ld r0,0(r4) /* 4+2n loads, 3+2n stores */
183lex; ldu r9,8(r4)
Paul E. McKenney20151162013-12-18 09:29:57 +1100184 sLd r8,r0,r10
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000185 addi r3,r3,-8
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000186r3_offset = 24
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000187 blt cr6,5f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000188lex; ld r0,8(r4)
Paul E. McKenney20151162013-12-18 09:29:57 +1100189 sHd r12,r9,r11
190 sLd r6,r9,r10
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000191lex; ldu r9,16(r4)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000192 or r12,r8,r12
Paul E. McKenney20151162013-12-18 09:29:57 +1100193 sHd r7,r0,r11
194 sLd r8,r0,r10
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000195 addi r3,r3,16
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000196r3_offset = 8
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000197 beq cr6,78f
198
1991: or r7,r7,r6
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000200lex; ld r0,8(r4)
201stex; std r12,8(r3)
202r3_offset = 16
Paul E. McKenney20151162013-12-18 09:29:57 +11002032: sHd r12,r9,r11
204 sLd r6,r9,r10
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000205lex; ldu r9,16(r4)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000206 or r12,r8,r12
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000207stex; stdu r7,16(r3)
208r3_offset = 8
Paul E. McKenney20151162013-12-18 09:29:57 +1100209 sHd r7,r0,r11
210 sLd r8,r0,r10
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000211 bdnz 1b
212
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +100021378:
214stex; std r12,8(r3)
215r3_offset = 16
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000216 or r7,r7,r6
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +100021779:
218stex; std r7,16(r3)
219r3_offset = 24
Paul E. McKenney20151162013-12-18 09:29:57 +11002205: sHd r12,r9,r11
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000221 or r12,r8,r12
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000222stex; std r12,24(r3)
223r3_offset = 32
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000224 bne 6f
225 li r3,0
226 blr
2276: cmpwi cr1,r5,8
228 addi r3,r3,32
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000229r3_offset = 0
Paul E. McKenney20151162013-12-18 09:29:57 +1100230 sLd r9,r9,r10
Mark Nelsonf72b7282009-02-25 13:46:24 +0000231 ble cr1,7f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000232lex; ld r0,8(r4)
Paul E. McKenney20151162013-12-18 09:29:57 +1100233 sHd r7,r0,r11
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000234 or r9,r7,r9
Mark Nelsonf72b7282009-02-25 13:46:24 +00002357:
236 bf cr7*4+1,1f
Paul E. McKenney20151162013-12-18 09:29:57 +1100237#ifdef __BIG_ENDIAN__
Mark Nelsonf72b7282009-02-25 13:46:24 +0000238 rotldi r9,r9,32
Paul E. McKenney20151162013-12-18 09:29:57 +1100239#endif
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000240stex; stw r9,0(r3)
Paul E. McKenney20151162013-12-18 09:29:57 +1100241#ifdef __LITTLE_ENDIAN__
242 rotrdi r9,r9,32
243#endif
Mark Nelsonf72b7282009-02-25 13:46:24 +0000244 addi r3,r3,4
2451: bf cr7*4+2,2f
Paul E. McKenney20151162013-12-18 09:29:57 +1100246#ifdef __BIG_ENDIAN__
Mark Nelsonf72b7282009-02-25 13:46:24 +0000247 rotldi r9,r9,16
Paul E. McKenney20151162013-12-18 09:29:57 +1100248#endif
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000249stex; sth r9,0(r3)
Paul E. McKenney20151162013-12-18 09:29:57 +1100250#ifdef __LITTLE_ENDIAN__
251 rotrdi r9,r9,16
252#endif
Mark Nelsonf72b7282009-02-25 13:46:24 +0000253 addi r3,r3,2
2542: bf cr7*4+3,3f
Paul E. McKenney20151162013-12-18 09:29:57 +1100255#ifdef __BIG_ENDIAN__
Mark Nelsonf72b7282009-02-25 13:46:24 +0000256 rotldi r9,r9,8
Paul E. McKenney20151162013-12-18 09:29:57 +1100257#endif
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000258stex; stb r9,0(r3)
Paul E. McKenney20151162013-12-18 09:29:57 +1100259#ifdef __LITTLE_ENDIAN__
260 rotrdi r9,r9,8
261#endif
Mark Nelsonf72b7282009-02-25 13:46:24 +00002623: li r3,0
263 blr
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000264
265.Ldst_unaligned:
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000266r3_offset = 0
Anton Blanchard694caf02012-04-18 02:21:52 +0000267 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000268 subf r5,r6,r5
269 li r7,0
Mark Nelsona4e22f02008-11-11 00:53:34 +0000270 cmpldi cr1,r5,16
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000271 bf cr7*4+3,1f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000272100: EX_TABLE(100b, .Lld_exc_r7)
273 lbz r0,0(r4)
274100: EX_TABLE(100b, .Lst_exc_r7)
275 stb r0,0(r3)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000276 addi r7,r7,1
2771: bf cr7*4+2,2f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000278100: EX_TABLE(100b, .Lld_exc_r7)
279 lhzx r0,r7,r4
280100: EX_TABLE(100b, .Lst_exc_r7)
281 sthx r0,r7,r3
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000282 addi r7,r7,2
2832: bf cr7*4+1,3f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000284100: EX_TABLE(100b, .Lld_exc_r7)
285 lwzx r0,r7,r4
286100: EX_TABLE(100b, .Lst_exc_r7)
287 stwx r0,r7,r3
Anton Blanchard694caf02012-04-18 02:21:52 +00002883: PPC_MTOCRF(0x01,r5)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000289 add r4,r6,r4
290 add r3,r6,r3
291 b .Ldst_aligned
292
293.Lshort_copy:
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000294r3_offset = 0
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000295 bf cr7*4+0,1f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000296lex; lwz r0,0(r4)
297lex; lwz r9,4(r4)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000298 addi r4,r4,8
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000299stex; stw r0,0(r3)
300stex; stw r9,4(r3)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000301 addi r3,r3,8
3021: bf cr7*4+1,2f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000303lex; lwz r0,0(r4)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000304 addi r4,r4,4
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000305stex; stw r0,0(r3)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000306 addi r3,r3,4
3072: bf cr7*4+2,3f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000308lex; lhz r0,0(r4)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000309 addi r4,r4,2
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000310stex; sth r0,0(r3)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000311 addi r3,r3,2
3123: bf cr7*4+3,4f
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000313lex; lbz r0,0(r4)
314stex; stb r0,0(r3)
Paul Mackerras14cf11a2005-09-26 16:04:21 +10003154: li r3,0
316 blr
317
318/*
319 * exception handlers follow
320 * we have to return the number of bytes not copied
321 * for an exception on a load, we set the rest of the destination to 0
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000322 * Note that the number of bytes of instructions for adjusting r3 needs
323 * to equal the amount of the adjustment, due to the trick of using
324 * .Lld_exc - r3_offset as the handler address.
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000325 */
326
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000327.Lld_exc_r7:
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000328 add r3,r3,r7
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000329 b .Lld_exc
330
331 /* adjust by 24 */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000332 addi r3,r3,8
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000333 nop
334 /* adjust by 16 */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000335 addi r3,r3,8
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000336 nop
337 /* adjust by 8 */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000338 addi r3,r3,8
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000339 nop
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000340
341/*
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000342 * Here we have had a fault on a load and r3 points to the first
343 * unmodified byte of the destination. We use the original arguments
344 * and r3 to work out how much wasn't copied. Since we load some
345 * distance ahead of the stores, we continue copying byte-by-byte until
346 * we hit the load fault again in order to copy as much as possible.
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000347 */
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000348.Lld_exc:
349 ld r6,-24(r1)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000350 ld r4,-16(r1)
351 ld r5,-8(r1)
352 subf r6,r6,r3
353 add r4,r4,r6
354 subf r5,r6,r5 /* #bytes left to go */
355
356/*
357 * first see if we can copy any more bytes before hitting another exception
358 */
359 mtctr r5
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000360r3_offset = 0
361100: EX_TABLE(100b, .Ldone)
Paul Mackerras14cf11a2005-09-26 16:04:21 +100036243: lbz r0,0(r4)
363 addi r4,r4,1
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000364stex; stb r0,0(r3)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000365 addi r3,r3,1
366 bdnz 43b
367 li r3,0 /* huh? all copied successfully this time? */
368 blr
369
370/*
Al Viro34488902017-03-21 16:35:08 -0400371 * here we have trapped again, amount remaining is in ctr.
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000372 */
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000373.Ldone:
374 mfctr r3
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000375 blr
376
377/*
Paul Mackerrasf8db2002018-08-03 20:13:06 +1000378 * exception handlers for stores: we need to work out how many bytes
379 * weren't copied, and we may need to copy some more.
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000380 * Note that the number of bytes of instructions for adjusting r3 needs
381 * to equal the amount of the adjustment, due to the trick of using
382 * .Lst_exc - r3_offset as the handler address.
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000383 */
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000384.Lst_exc_r7:
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000385 add r3,r3,r7
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000386 b .Lst_exc
387
388 /* adjust by 24 */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000389 addi r3,r3,8
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000390 nop
391 /* adjust by 16 */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000392 addi r3,r3,8
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000393 nop
394 /* adjust by 8 */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000395 addi r3,r3,4
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000396 /* adjust by 4 */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000397 addi r3,r3,4
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000398.Lst_exc:
Paul Mackerrasf8db2002018-08-03 20:13:06 +1000399 ld r6,-24(r1) /* original destination pointer */
400 ld r4,-16(r1) /* original source pointer */
401 ld r5,-8(r1) /* original number of bytes */
402 add r7,r6,r5
403 /*
404 * If the destination pointer isn't 8-byte aligned,
405 * we may have got the exception as a result of a
406 * store that overlapped a page boundary, so we may be
407 * able to copy a few more bytes.
408 */
40917: andi. r0,r3,7
410 beq 19f
411 subf r8,r6,r3 /* #bytes copied */
412100: EX_TABLE(100b,19f)
413 lbzx r0,r8,r4
414100: EX_TABLE(100b,19f)
415 stb r0,0(r3)
416 addi r3,r3,1
417 cmpld r3,r7
418 blt 17b
41919: subf r3,r3,r7 /* #bytes not copied in r3 */
Al Viro34488902017-03-21 16:35:08 -0400420 blr
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000421
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000422/*
423 * Routine to copy a whole page of data, optimized for POWER4.
424 * On POWER4 it is more than 50% faster than the simple loop
Michael Ellerman0f369102014-07-10 12:29:24 +1000425 * above (following the .Ldst_aligned label).
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000426 */
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000427 .macro exc
428100: EX_TABLE(100b, .Labort)
429 .endm
Benjamin Herrenschmidt3c726f82005-11-07 11:06:55 +1100430.Lcopy_page_4K:
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000431 std r31,-32(1)
432 std r30,-40(1)
433 std r29,-48(1)
434 std r28,-56(1)
435 std r27,-64(1)
436 std r26,-72(1)
437 std r25,-80(1)
438 std r24,-88(1)
439 std r23,-96(1)
440 std r22,-104(1)
441 std r21,-112(1)
442 std r20,-120(1)
443 li r5,4096/32 - 1
444 addi r3,r3,-8
445 li r0,5
4460: addi r5,r5,-24
447 mtctr r0
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000448exc; ld r22,640(4)
449exc; ld r21,512(4)
450exc; ld r20,384(4)
451exc; ld r11,256(4)
452exc; ld r9,128(4)
453exc; ld r7,0(4)
454exc; ld r25,648(4)
455exc; ld r24,520(4)
456exc; ld r23,392(4)
457exc; ld r10,264(4)
458exc; ld r8,136(4)
459exc; ldu r6,8(4)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000460 cmpwi r5,24
4611:
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000462exc; std r22,648(3)
463exc; std r21,520(3)
464exc; std r20,392(3)
465exc; std r11,264(3)
466exc; std r9,136(3)
467exc; std r7,8(3)
468exc; ld r28,648(4)
469exc; ld r27,520(4)
470exc; ld r26,392(4)
471exc; ld r31,264(4)
472exc; ld r30,136(4)
473exc; ld r29,8(4)
474exc; std r25,656(3)
475exc; std r24,528(3)
476exc; std r23,400(3)
477exc; std r10,272(3)
478exc; std r8,144(3)
479exc; std r6,16(3)
480exc; ld r22,656(4)
481exc; ld r21,528(4)
482exc; ld r20,400(4)
483exc; ld r11,272(4)
484exc; ld r9,144(4)
485exc; ld r7,16(4)
486exc; std r28,664(3)
487exc; std r27,536(3)
488exc; std r26,408(3)
489exc; std r31,280(3)
490exc; std r30,152(3)
491exc; stdu r29,24(3)
492exc; ld r25,664(4)
493exc; ld r24,536(4)
494exc; ld r23,408(4)
495exc; ld r10,280(4)
496exc; ld r8,152(4)
497exc; ldu r6,24(4)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000498 bdnz 1b
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000499exc; std r22,648(3)
500exc; std r21,520(3)
501exc; std r20,392(3)
502exc; std r11,264(3)
503exc; std r9,136(3)
504exc; std r7,8(3)
505 addi r4,r4,640
506 addi r3,r3,648
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000507 bge 0b
508 mtctr r5
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000509exc; ld r7,0(4)
510exc; ld r8,8(4)
511exc; ldu r9,16(4)
Paul Mackerras14cf11a2005-09-26 16:04:21 +10005123:
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000513exc; ld r10,8(4)
514exc; std r7,8(3)
515exc; ld r7,16(4)
516exc; std r8,16(3)
517exc; ld r8,24(4)
518exc; std r9,24(3)
519exc; ldu r9,32(4)
520exc; stdu r10,32(3)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000521 bdnz 3b
5224:
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000523exc; ld r10,8(4)
524exc; std r7,8(3)
525exc; std r8,16(3)
526exc; std r9,24(3)
527exc; std r10,32(3)
Paul Mackerras14cf11a2005-09-26 16:04:21 +10005289: ld r20,-120(1)
529 ld r21,-112(1)
530 ld r22,-104(1)
531 ld r23,-96(1)
532 ld r24,-88(1)
533 ld r25,-80(1)
534 ld r26,-72(1)
535 ld r27,-64(1)
536 ld r28,-56(1)
537 ld r29,-48(1)
538 ld r30,-40(1)
539 ld r31,-32(1)
540 li r3,0
541 blr
542
543/*
544 * on an exception, reset to the beginning and jump back into the
545 * standard __copy_tofrom_user
546 */
Paul Mackerrasa7c81ce2018-08-03 20:13:03 +1000547.Labort:
548 ld r20,-120(1)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000549 ld r21,-112(1)
550 ld r22,-104(1)
551 ld r23,-96(1)
552 ld r24,-88(1)
553 ld r25,-80(1)
554 ld r26,-72(1)
555 ld r27,-64(1)
556 ld r28,-56(1)
557 ld r29,-48(1)
558 ld r30,-40(1)
559 ld r31,-32(1)
560 ld r3,-24(r1)
561 ld r4,-16(r1)
562 li r5,4096
563 b .Ldst_aligned
Al Viro9445aa12016-01-13 23:33:46 -0500564EXPORT_SYMBOL(__copy_tofrom_user)