blob: 0fd5c10e90a7de480c693b4a1b969947d11f017b [file] [log] [blame]
Thomas Gleixnerd2912cb2019-06-04 10:11:33 +02001/* SPDX-License-Identifier: GPL-2.0-only */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * linux/arch/arm/lib/csumpartialcopygeneric.S
4 *
5 * Copyright (C) 1995-2001 Russell King
Linus Torvalds1da177e2005-04-16 15:20:36 -07006 */
Russell King6ebbf2c2014-06-30 16:29:12 +01007#include <asm/assembler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -07008
9/*
10 * unsigned int
11 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
12 * r0 = src, r1 = dst, r2 = len, r3 = sum
13 * Returns : r0 = checksum
14 *
15 * Note that 'tst' and 'teq' preserve the carry flag.
16 */
17
18src .req r0
19dst .req r1
20len .req r2
21sum .req r3
22
Nicolas Pitre8adbb372005-11-11 21:51:49 +000023.Lzero: mov r0, sum
Catalin Marinas90303b12006-01-12 16:53:51 +000024 load_regs
Linus Torvalds1da177e2005-04-16 15:20:36 -070025
26 /*
27 * Align an unaligned destination pointer. We know that
28 * we have >= 8 bytes here, so we don't need to check
29 * the length. Note that the source pointer hasn't been
30 * aligned yet.
31 */
Nicolas Pitre8adbb372005-11-11 21:51:49 +000032.Ldst_unaligned:
33 tst dst, #1
34 beq .Ldst_16bit
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36 load1b ip
37 sub len, len, #1
38 adcs sum, sum, ip, put_byte_1 @ update checksum
39 strb ip, [dst], #1
40 tst dst, #2
Russell King6ebbf2c2014-06-30 16:29:12 +010041 reteq lr @ dst is now 32bit aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -070042
Nicolas Pitre8adbb372005-11-11 21:51:49 +000043.Ldst_16bit: load2b r8, ip
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 sub len, len, #2
45 adcs sum, sum, r8, put_byte_0
46 strb r8, [dst], #1
47 adcs sum, sum, ip, put_byte_1
48 strb ip, [dst], #1
Russell King6ebbf2c2014-06-30 16:29:12 +010049 ret lr @ dst is now 32bit aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
51 /*
52 * Handle 0 to 7 bytes, with any alignment of source and
53 * destination pointers. Note that when we get here, C = 0
54 */
Nicolas Pitre8adbb372005-11-11 21:51:49 +000055.Lless8: teq len, #0 @ check for zero count
56 beq .Lzero
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58 /* we must have at least one byte. */
59 tst dst, #1 @ dst 16-bit aligned
Nicolas Pitre8adbb372005-11-11 21:51:49 +000060 beq .Lless8_aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -070061
62 /* Align dst */
63 load1b ip
64 sub len, len, #1
65 adcs sum, sum, ip, put_byte_1 @ update checksum
66 strb ip, [dst], #1
67 tst len, #6
Nicolas Pitre8adbb372005-11-11 21:51:49 +000068 beq .Lless8_byteonly
Linus Torvalds1da177e2005-04-16 15:20:36 -070069
701: load2b r8, ip
71 sub len, len, #2
72 adcs sum, sum, r8, put_byte_0
73 strb r8, [dst], #1
74 adcs sum, sum, ip, put_byte_1
75 strb ip, [dst], #1
Nicolas Pitre8adbb372005-11-11 21:51:49 +000076.Lless8_aligned:
77 tst len, #6
Linus Torvalds1da177e2005-04-16 15:20:36 -070078 bne 1b
Nicolas Pitre8adbb372005-11-11 21:51:49 +000079.Lless8_byteonly:
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 tst len, #1
Nicolas Pitre8adbb372005-11-11 21:51:49 +000081 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -070082 load1b r8
83 adcs sum, sum, r8, put_byte_0 @ update checksum
84 strb r8, [dst], #1
Nicolas Pitre8adbb372005-11-11 21:51:49 +000085 b .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
87FN_ENTRY
Linus Torvalds1da177e2005-04-16 15:20:36 -070088 save_regs
Al Viro1d60be32020-07-12 22:56:34 -040089 mov sum, #-1
Linus Torvalds1da177e2005-04-16 15:20:36 -070090
91 cmp len, #8 @ Ensure that we have at least
Nicolas Pitre8adbb372005-11-11 21:51:49 +000092 blo .Lless8 @ 8 bytes to copy.
Linus Torvalds1da177e2005-04-16 15:20:36 -070093
94 adds sum, sum, #0 @ C = 0
95 tst dst, #3 @ Test destination alignment
Nicolas Pitre8adbb372005-11-11 21:51:49 +000096 blne .Ldst_unaligned @ align destination, return here
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98 /*
99 * Ok, the dst pointer is now 32bit aligned, and we know
100 * that we must have more than 4 bytes to copy. Note
101 * that C contains the carry from the dst alignment above.
102 */
103
104 tst src, #3 @ Test source alignment
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000105 bne .Lsrc_not_aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106
107 /* Routine for src & dst aligned */
108
109 bics ip, len, #15
110 beq 2f
111
1121: load4l r4, r5, r6, r7
113 stmia dst!, {r4, r5, r6, r7}
114 adcs sum, sum, r4
115 adcs sum, sum, r5
116 adcs sum, sum, r6
117 adcs sum, sum, r7
118 sub ip, ip, #16
119 teq ip, #0
120 bne 1b
121
1222: ands ip, len, #12
123 beq 4f
124 tst ip, #8
125 beq 3f
126 load2l r4, r5
127 stmia dst!, {r4, r5}
128 adcs sum, sum, r4
129 adcs sum, sum, r5
130 tst ip, #4
131 beq 4f
132
1333: load1l r4
134 str r4, [dst], #4
135 adcs sum, sum, r4
136
1374: ands len, len, #3
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000138 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 load1l r4
140 tst len, #2
141 mov r5, r4, get_byte_0
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000142 beq .Lexit
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100143 adcs sum, sum, r4, lspush #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 strb r5, [dst], #1
145 mov r5, r4, get_byte_1
146 strb r5, [dst], #1
147 mov r5, r4, get_byte_2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000148.Lexit: tst len, #1
Stefan Agnere44fc382019-02-18 00:57:38 +0100149 strbne r5, [dst], #1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 andne r5, r5, #255
Stefan Agnere44fc382019-02-18 00:57:38 +0100151 adcsne sum, sum, r5, put_byte_0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
153 /*
154 * If the dst pointer was not 16-bit aligned, we
155 * need to rotate the checksum here to get around
156 * the inefficient byte manipulations in the
157 * architecture independent code.
158 */
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000159.Ldone: adc r0, sum, #0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 ldr sum, [sp, #0] @ dst
161 tst sum, #1
162 movne r0, r0, ror #8
Catalin Marinas90303b12006-01-12 16:53:51 +0000163 load_regs
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000165.Lsrc_not_aligned:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 adc sum, sum, #0 @ include C from dst alignment
167 and ip, src, #3
168 bic src, src, #3
169 load1l r5
170 cmp ip, #2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000171 beq .Lsrc2_aligned
172 bhi .Lsrc3_aligned
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100173 mov r4, r5, lspull #8 @ C = 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 bics ip, len, #15
175 beq 2f
1761: load4l r5, r6, r7, r8
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100177 orr r4, r4, r5, lspush #24
178 mov r5, r5, lspull #8
179 orr r5, r5, r6, lspush #24
180 mov r6, r6, lspull #8
181 orr r6, r6, r7, lspush #24
182 mov r7, r7, lspull #8
183 orr r7, r7, r8, lspush #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184 stmia dst!, {r4, r5, r6, r7}
185 adcs sum, sum, r4
186 adcs sum, sum, r5
187 adcs sum, sum, r6
188 adcs sum, sum, r7
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100189 mov r4, r8, lspull #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190 sub ip, ip, #16
191 teq ip, #0
192 bne 1b
1932: ands ip, len, #12
194 beq 4f
195 tst ip, #8
196 beq 3f
197 load2l r5, r6
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100198 orr r4, r4, r5, lspush #24
199 mov r5, r5, lspull #8
200 orr r5, r5, r6, lspush #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 stmia dst!, {r4, r5}
202 adcs sum, sum, r4
203 adcs sum, sum, r5
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100204 mov r4, r6, lspull #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 tst ip, #4
206 beq 4f
2073: load1l r5
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100208 orr r4, r4, r5, lspush #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 str r4, [dst], #4
210 adcs sum, sum, r4
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100211 mov r4, r5, lspull #8
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124: ands len, len, #3
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000213 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 mov r5, r4, get_byte_0
215 tst len, #2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000216 beq .Lexit
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100217 adcs sum, sum, r4, lspush #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218 strb r5, [dst], #1
219 mov r5, r4, get_byte_1
220 strb r5, [dst], #1
221 mov r5, r4, get_byte_2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000222 b .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100224.Lsrc2_aligned: mov r4, r5, lspull #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 adds sum, sum, #0
226 bics ip, len, #15
227 beq 2f
2281: load4l r5, r6, r7, r8
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100229 orr r4, r4, r5, lspush #16
230 mov r5, r5, lspull #16
231 orr r5, r5, r6, lspush #16
232 mov r6, r6, lspull #16
233 orr r6, r6, r7, lspush #16
234 mov r7, r7, lspull #16
235 orr r7, r7, r8, lspush #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 stmia dst!, {r4, r5, r6, r7}
237 adcs sum, sum, r4
238 adcs sum, sum, r5
239 adcs sum, sum, r6
240 adcs sum, sum, r7
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100241 mov r4, r8, lspull #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 sub ip, ip, #16
243 teq ip, #0
244 bne 1b
2452: ands ip, len, #12
246 beq 4f
247 tst ip, #8
248 beq 3f
249 load2l r5, r6
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100250 orr r4, r4, r5, lspush #16
251 mov r5, r5, lspull #16
252 orr r5, r5, r6, lspush #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 stmia dst!, {r4, r5}
254 adcs sum, sum, r4
255 adcs sum, sum, r5
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100256 mov r4, r6, lspull #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 tst ip, #4
258 beq 4f
2593: load1l r5
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100260 orr r4, r4, r5, lspush #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 str r4, [dst], #4
262 adcs sum, sum, r4
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100263 mov r4, r5, lspull #16
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644: ands len, len, #3
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000265 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 mov r5, r4, get_byte_0
267 tst len, #2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000268 beq .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 adcs sum, sum, r4
270 strb r5, [dst], #1
271 mov r5, r4, get_byte_1
272 strb r5, [dst], #1
273 tst len, #1
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000274 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 load1b r5
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000276 b .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100278.Lsrc3_aligned: mov r4, r5, lspull #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 adds sum, sum, #0
280 bics ip, len, #15
281 beq 2f
2821: load4l r5, r6, r7, r8
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100283 orr r4, r4, r5, lspush #8
284 mov r5, r5, lspull #24
285 orr r5, r5, r6, lspush #8
286 mov r6, r6, lspull #24
287 orr r6, r6, r7, lspush #8
288 mov r7, r7, lspull #24
289 orr r7, r7, r8, lspush #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 stmia dst!, {r4, r5, r6, r7}
291 adcs sum, sum, r4
292 adcs sum, sum, r5
293 adcs sum, sum, r6
294 adcs sum, sum, r7
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100295 mov r4, r8, lspull #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 sub ip, ip, #16
297 teq ip, #0
298 bne 1b
2992: ands ip, len, #12
300 beq 4f
301 tst ip, #8
302 beq 3f
303 load2l r5, r6
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100304 orr r4, r4, r5, lspush #8
305 mov r5, r5, lspull #24
306 orr r5, r5, r6, lspush #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 stmia dst!, {r4, r5}
308 adcs sum, sum, r4
309 adcs sum, sum, r5
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100310 mov r4, r6, lspull #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 tst ip, #4
312 beq 4f
3133: load1l r5
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100314 orr r4, r4, r5, lspush #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 str r4, [dst], #4
316 adcs sum, sum, r4
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100317 mov r4, r5, lspull #24
Linus Torvalds1da177e2005-04-16 15:20:36 -07003184: ands len, len, #3
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000319 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 mov r5, r4, get_byte_0
321 tst len, #2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000322 beq .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 strb r5, [dst], #1
324 adcs sum, sum, r4
325 load1l r4
326 mov r5, r4, get_byte_0
327 strb r5, [dst], #1
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100328 adcs sum, sum, r4, lspush #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 mov r5, r4, get_byte_1
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000330 b .Lexit
Catalin Marinas93ed3972008-08-28 11:22:32 +0100331FN_EXIT