blob: 8a2faa42b57e0860e4c84629d2c03c48c56c7031 [file] [log] [blame]
Thomas Gleixnerd2912cb2019-06-04 10:11:33 +02001/* SPDX-License-Identifier: GPL-2.0-only */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +01002/*
3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 *
Ard Biesheuvel48606202017-02-03 14:49:37 +00005 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
Ard Biesheuvel49788fe2014-03-21 10:19:17 +01006 */
7
8/* included by aes-ce.S and aes-neon.S */
9
10 .text
11 .align 4
12
Ard Biesheuvele2174132019-06-24 19:38:30 +020013#ifndef MAX_STRIDE
14#define MAX_STRIDE 4
15#endif
16
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020017#if MAX_STRIDE == 4
18#define ST4(x...) x
19#define ST5(x...)
20#else
21#define ST4(x...)
22#define ST5(x...) x
23#endif
24
Mark Brown0e896402019-12-13 15:49:10 +000025SYM_FUNC_START_LOCAL(aes_encrypt_block4x)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020026 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010027 ret
Mark Brown0e896402019-12-13 15:49:10 +000028SYM_FUNC_END(aes_encrypt_block4x)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010029
Mark Brown0e896402019-12-13 15:49:10 +000030SYM_FUNC_START_LOCAL(aes_decrypt_block4x)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020031 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010032 ret
Mark Brown0e896402019-12-13 15:49:10 +000033SYM_FUNC_END(aes_decrypt_block4x)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010034
Ard Biesheuvele2174132019-06-24 19:38:30 +020035#if MAX_STRIDE == 5
Mark Brown0e896402019-12-13 15:49:10 +000036SYM_FUNC_START_LOCAL(aes_encrypt_block5x)
Ard Biesheuvele2174132019-06-24 19:38:30 +020037 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
38 ret
Mark Brown0e896402019-12-13 15:49:10 +000039SYM_FUNC_END(aes_encrypt_block5x)
Ard Biesheuvele2174132019-06-24 19:38:30 +020040
Mark Brown0e896402019-12-13 15:49:10 +000041SYM_FUNC_START_LOCAL(aes_decrypt_block5x)
Ard Biesheuvele2174132019-06-24 19:38:30 +020042 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
43 ret
Mark Brown0e896402019-12-13 15:49:10 +000044SYM_FUNC_END(aes_decrypt_block5x)
Ard Biesheuvele2174132019-06-24 19:38:30 +020045#endif
46
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010047 /*
48 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +000049 * int blocks)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010050 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +000051 * int blocks)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010052 */
53
54AES_ENTRY(aes_ecb_encrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020055 stp x29, x30, [sp, #-16]!
56 mov x29, sp
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010057
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020058 enc_prepare w3, x2, x5
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010059
60.LecbencloopNx:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020061 subs w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010062 bmi .Lecbenc1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020063 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020064ST4( bl aes_encrypt_block4x )
65ST5( ld1 {v4.16b}, [x1], #16 )
66ST5( bl aes_encrypt_block5x )
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020067 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020068ST5( st1 {v4.16b}, [x0], #16 )
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010069 b .LecbencloopNx
70.Lecbenc1x:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020071 adds w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010072 beq .Lecbencout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010073.Lecbencloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020074 ld1 {v0.16b}, [x1], #16 /* get next pt block */
75 encrypt_block v0, w3, x2, x5, w6
76 st1 {v0.16b}, [x0], #16
77 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010078 bne .Lecbencloop
79.Lecbencout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020080 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010081 ret
82AES_ENDPROC(aes_ecb_encrypt)
83
84
85AES_ENTRY(aes_ecb_decrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020086 stp x29, x30, [sp, #-16]!
87 mov x29, sp
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010088
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020089 dec_prepare w3, x2, x5
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010090
91.LecbdecloopNx:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020092 subs w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010093 bmi .Lecbdec1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020094 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020095ST4( bl aes_decrypt_block4x )
96ST5( ld1 {v4.16b}, [x1], #16 )
97ST5( bl aes_decrypt_block5x )
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020098 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020099ST5( st1 {v4.16b}, [x0], #16 )
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100100 b .LecbdecloopNx
101.Lecbdec1x:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200102 adds w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100103 beq .Lecbdecout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100104.Lecbdecloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200105 ld1 {v0.16b}, [x1], #16 /* get next ct block */
106 decrypt_block v0, w3, x2, x5, w6
107 st1 {v0.16b}, [x0], #16
108 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100109 bne .Lecbdecloop
110.Lecbdecout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200111 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100112 ret
113AES_ENDPROC(aes_ecb_decrypt)
114
115
116 /*
117 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +0000118 * int blocks, u8 iv[])
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100119 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +0000120 * int blocks, u8 iv[])
Ard Biesheuvel735177c2019-08-19 17:17:36 +0300121 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
122 * int rounds, int blocks, u8 iv[],
123 * u32 const rk2[]);
124 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
125 * int rounds, int blocks, u8 iv[],
126 * u32 const rk2[]);
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100127 */
128
Ard Biesheuvel735177c2019-08-19 17:17:36 +0300129AES_ENTRY(aes_essiv_cbc_encrypt)
130 ld1 {v4.16b}, [x5] /* get iv */
131
132 mov w8, #14 /* AES-256: 14 rounds */
133 enc_prepare w8, x6, x7
134 encrypt_block v4, w8, x6, x7, w9
135 enc_switch_key w3, x2, x6
136 b .Lcbcencloop4x
137
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100138AES_ENTRY(aes_cbc_encrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200139 ld1 {v4.16b}, [x5] /* get iv */
140 enc_prepare w3, x2, x6
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100141
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000142.Lcbcencloop4x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200143 subs w4, w4, #4
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000144 bmi .Lcbcenc1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200145 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000146 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200147 encrypt_block v0, w3, x2, x6, w7
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000148 eor v1.16b, v1.16b, v0.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200149 encrypt_block v1, w3, x2, x6, w7
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000150 eor v2.16b, v2.16b, v1.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200151 encrypt_block v2, w3, x2, x6, w7
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000152 eor v3.16b, v3.16b, v2.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200153 encrypt_block v3, w3, x2, x6, w7
154 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000155 mov v4.16b, v3.16b
156 b .Lcbcencloop4x
157.Lcbcenc1x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200158 adds w4, w4, #4
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000159 beq .Lcbcencout
160.Lcbcencloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200161 ld1 {v0.16b}, [x1], #16 /* get next pt block */
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000162 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200163 encrypt_block v4, w3, x2, x6, w7
164 st1 {v4.16b}, [x0], #16
165 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100166 bne .Lcbcencloop
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000167.Lcbcencout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200168 st1 {v4.16b}, [x5] /* return iv */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100169 ret
170AES_ENDPROC(aes_cbc_encrypt)
Ard Biesheuvel735177c2019-08-19 17:17:36 +0300171AES_ENDPROC(aes_essiv_cbc_encrypt)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100172
Ard Biesheuvel735177c2019-08-19 17:17:36 +0300173AES_ENTRY(aes_essiv_cbc_decrypt)
174 stp x29, x30, [sp, #-16]!
175 mov x29, sp
176
177 ld1 {cbciv.16b}, [x5] /* get iv */
178
179 mov w8, #14 /* AES-256: 14 rounds */
180 enc_prepare w8, x6, x7
181 encrypt_block cbciv, w8, x6, x7, w9
182 b .Lessivcbcdecstart
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100183
184AES_ENTRY(aes_cbc_decrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200185 stp x29, x30, [sp, #-16]!
186 mov x29, sp
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100187
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200188 ld1 {cbciv.16b}, [x5] /* get iv */
Ard Biesheuvel735177c2019-08-19 17:17:36 +0300189.Lessivcbcdecstart:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200190 dec_prepare w3, x2, x6
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100191
192.LcbcdecloopNx:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200193 subs w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100194 bmi .Lcbcdec1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200195 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200196#if MAX_STRIDE == 5
197 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */
198 mov v5.16b, v0.16b
199 mov v6.16b, v1.16b
200 mov v7.16b, v2.16b
201 bl aes_decrypt_block5x
202 sub x1, x1, #32
203 eor v0.16b, v0.16b, cbciv.16b
204 eor v1.16b, v1.16b, v5.16b
205 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */
206 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
207 eor v2.16b, v2.16b, v6.16b
208 eor v3.16b, v3.16b, v7.16b
209 eor v4.16b, v4.16b, v5.16b
210#else
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100211 mov v4.16b, v0.16b
212 mov v5.16b, v1.16b
213 mov v6.16b, v2.16b
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000214 bl aes_decrypt_block4x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200215 sub x1, x1, #16
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200216 eor v0.16b, v0.16b, cbciv.16b
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100217 eor v1.16b, v1.16b, v4.16b
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200218 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100219 eor v2.16b, v2.16b, v5.16b
220 eor v3.16b, v3.16b, v6.16b
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200221#endif
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200222 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200223ST5( st1 {v4.16b}, [x0], #16 )
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100224 b .LcbcdecloopNx
225.Lcbcdec1x:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200226 adds w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100227 beq .Lcbcdecout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100228.Lcbcdecloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200229 ld1 {v1.16b}, [x1], #16 /* get next ct block */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100230 mov v0.16b, v1.16b /* ...and copy to v0 */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200231 decrypt_block v0, w3, x2, x6, w7
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200232 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */
233 mov cbciv.16b, v1.16b /* ct is next iv */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200234 st1 {v0.16b}, [x0], #16
235 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100236 bne .Lcbcdecloop
237.Lcbcdecout:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200238 st1 {cbciv.16b}, [x5] /* return iv */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200239 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100240 ret
241AES_ENDPROC(aes_cbc_decrypt)
Ard Biesheuvel735177c2019-08-19 17:17:36 +0300242AES_ENDPROC(aes_essiv_cbc_decrypt)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100243
244
245 /*
Ard Biesheuveldd597fb2018-09-10 16:41:14 +0200246 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
247 * int rounds, int bytes, u8 const iv[])
248 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
249 * int rounds, int bytes, u8 const iv[])
250 */
251
252AES_ENTRY(aes_cbc_cts_encrypt)
253 adr_l x8, .Lcts_permute_table
254 sub x4, x4, #16
255 add x9, x8, #32
256 add x8, x8, x4
257 sub x9, x9, x4
258 ld1 {v3.16b}, [x8]
259 ld1 {v4.16b}, [x9]
260
261 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
262 ld1 {v1.16b}, [x1]
263
264 ld1 {v5.16b}, [x5] /* get iv */
265 enc_prepare w3, x2, x6
266
267 eor v0.16b, v0.16b, v5.16b /* xor with iv */
268 tbl v1.16b, {v1.16b}, v4.16b
269 encrypt_block v0, w3, x2, x6, w7
270
271 eor v1.16b, v1.16b, v0.16b
272 tbl v0.16b, {v0.16b}, v3.16b
273 encrypt_block v1, w3, x2, x6, w7
274
275 add x4, x0, x4
276 st1 {v0.16b}, [x4] /* overlapping stores */
277 st1 {v1.16b}, [x0]
278 ret
279AES_ENDPROC(aes_cbc_cts_encrypt)
280
281AES_ENTRY(aes_cbc_cts_decrypt)
282 adr_l x8, .Lcts_permute_table
283 sub x4, x4, #16
284 add x9, x8, #32
285 add x8, x8, x4
286 sub x9, x9, x4
287 ld1 {v3.16b}, [x8]
288 ld1 {v4.16b}, [x9]
289
290 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
291 ld1 {v1.16b}, [x1]
292
293 ld1 {v5.16b}, [x5] /* get iv */
294 dec_prepare w3, x2, x6
295
Ard Biesheuveldd597fb2018-09-10 16:41:14 +0200296 decrypt_block v0, w3, x2, x6, w7
Ard Biesheuvel0cfd5072019-09-03 09:43:31 -0700297 tbl v2.16b, {v0.16b}, v3.16b
298 eor v2.16b, v2.16b, v1.16b
Ard Biesheuveldd597fb2018-09-10 16:41:14 +0200299
300 tbx v0.16b, {v1.16b}, v4.16b
Ard Biesheuveldd597fb2018-09-10 16:41:14 +0200301 decrypt_block v0, w3, x2, x6, w7
302 eor v0.16b, v0.16b, v5.16b /* xor with iv */
303
304 add x4, x0, x4
305 st1 {v2.16b}, [x4] /* overlapping stores */
306 st1 {v0.16b}, [x0]
307 ret
308AES_ENDPROC(aes_cbc_cts_decrypt)
309
310 .section ".rodata", "a"
311 .align 6
312.Lcts_permute_table:
313 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
314 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
315 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
316 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
317 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
318 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
319 .previous
320
321
322 /*
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100323 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +0000324 * int blocks, u8 ctr[])
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100325 */
326
327AES_ENTRY(aes_ctr_encrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200328 stp x29, x30, [sp, #-16]!
329 mov x29, sp
Ard Biesheuvel68338172018-03-10 15:21:48 +0000330
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200331 enc_prepare w3, x2, x6
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200332 ld1 {vctr.16b}, [x5]
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000333
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200334 umov x6, vctr.d[1] /* keep swabbed ctr in reg */
Ard Biesheuvel68338172018-03-10 15:21:48 +0000335 rev x6, x6
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200336 cmn w6, w4 /* 32 bit overflow? */
337 bcs .Lctrloop
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100338.LctrloopNx:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200339 subs w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100340 bmi .Lctr1x
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100341 add w7, w6, #1
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200342 mov v0.16b, vctr.16b
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100343 add w8, w6, #2
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200344 mov v1.16b, vctr.16b
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100345 add w9, w6, #3
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200346 mov v2.16b, vctr.16b
347 add w9, w6, #3
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100348 rev w7, w7
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200349 mov v3.16b, vctr.16b
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100350 rev w8, w8
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200351ST5( mov v4.16b, vctr.16b )
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100352 mov v1.s[3], w7
353 rev w9, w9
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200354ST5( add w10, w6, #4 )
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100355 mov v2.s[3], w8
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200356ST5( rev w10, w10 )
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100357 mov v3.s[3], w9
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200358ST5( mov v4.s[3], w10 )
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200359 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200360ST4( bl aes_encrypt_block4x )
361ST5( bl aes_encrypt_block5x )
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100362 eor v0.16b, v5.16b, v0.16b
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200363ST4( ld1 {v5.16b}, [x1], #16 )
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100364 eor v1.16b, v6.16b, v1.16b
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200365ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100366 eor v2.16b, v7.16b, v2.16b
367 eor v3.16b, v5.16b, v3.16b
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200368ST5( eor v4.16b, v6.16b, v4.16b )
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200369 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200370ST5( st1 {v4.16b}, [x0], #16 )
371 add x6, x6, #MAX_STRIDE
Ard Biesheuvel68338172018-03-10 15:21:48 +0000372 rev x7, x6
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200373 ins vctr.d[1], x7
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200374 cbz w4, .Lctrout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100375 b .LctrloopNx
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100376.Lctr1x:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200377 adds w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100378 beq .Lctrout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100379.Lctrloop:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200380 mov v0.16b, vctr.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200381 encrypt_block v0, w3, x2, x8, w7
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000382
Ard Biesheuvel68338172018-03-10 15:21:48 +0000383 adds x6, x6, #1 /* increment BE ctr */
384 rev x7, x6
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200385 ins vctr.d[1], x7
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000386 bcs .Lctrcarry /* overflow? */
387
388.Lctrcarrydone:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200389 subs w4, w4, #1
Ard Biesheuvelccc5d512017-01-28 23:25:34 +0000390 bmi .Lctrtailblock /* blocks <0 means tail block */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200391 ld1 {v3.16b}, [x1], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100392 eor v3.16b, v0.16b, v3.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200393 st1 {v3.16b}, [x0], #16
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000394 bne .Lctrloop
395
396.Lctrout:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200397 st1 {vctr.16b}, [x5] /* return next CTR value */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200398 ldp x29, x30, [sp], #16
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000399 ret
400
Ard Biesheuvelccc5d512017-01-28 23:25:34 +0000401.Lctrtailblock:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200402 st1 {v0.16b}, [x0]
Eric Biggersfa5fd3a2019-02-14 00:03:54 -0800403 b .Lctrout
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000404
405.Lctrcarry:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200406 umov x7, vctr.d[0] /* load upper word of ctr */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100407 rev x7, x7 /* ... to handle the carry */
408 add x7, x7, #1
409 rev x7, x7
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200410 ins vctr.d[0], x7
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000411 b .Lctrcarrydone
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100412AES_ENDPROC(aes_ctr_encrypt)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100413
414
415 /*
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700416 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
417 * int bytes, u8 const rk2[], u8 iv[], int first)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100418 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700419 * int bytes, u8 const rk2[], u8 iv[], int first)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100420 */
421
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200422 .macro next_tweak, out, in, tmp
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100423 sshr \tmp\().2d, \in\().2d, #63
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200424 and \tmp\().16b, \tmp\().16b, xtsmask.16b
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100425 add \out\().2d, \in\().2d, \in\().2d
426 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
427 eor \out\().16b, \out\().16b, \tmp\().16b
428 .endm
429
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200430 .macro xts_load_mask, tmp
431 movi xtsmask.2s, #0x1
432 movi \tmp\().2s, #0x87
433 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
434 .endm
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100435
436AES_ENTRY(aes_xts_encrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200437 stp x29, x30, [sp, #-16]!
438 mov x29, sp
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000439
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200440 ld1 {v4.16b}, [x6]
Ard Biesheuvelcc3cc482018-10-08 13:16:59 +0200441 xts_load_mask v8
Ard Biesheuvel68338172018-03-10 15:21:48 +0000442 cbz w7, .Lxtsencnotfirst
443
444 enc_prepare w3, x5, x8
Ard Biesheuvel67cfa5d2019-09-03 09:43:34 -0700445 xts_cts_skip_tw w7, .LxtsencNx
Ard Biesheuvel68338172018-03-10 15:21:48 +0000446 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
447 enc_switch_key w3, x2, x8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100448 b .LxtsencNx
449
Ard Biesheuvel68338172018-03-10 15:21:48 +0000450.Lxtsencnotfirst:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200451 enc_prepare w3, x2, x8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100452.LxtsencloopNx:
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200453 next_tweak v4, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100454.LxtsencNx:
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700455 subs w4, w4, #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100456 bmi .Lxtsenc1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200457 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200458 next_tweak v5, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100459 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200460 next_tweak v6, v5, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100461 eor v1.16b, v1.16b, v5.16b
462 eor v2.16b, v2.16b, v6.16b
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200463 next_tweak v7, v6, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100464 eor v3.16b, v3.16b, v7.16b
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000465 bl aes_encrypt_block4x
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100466 eor v3.16b, v3.16b, v7.16b
467 eor v0.16b, v0.16b, v4.16b
468 eor v1.16b, v1.16b, v5.16b
469 eor v2.16b, v2.16b, v6.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200470 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100471 mov v4.16b, v7.16b
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700472 cbz w4, .Lxtsencret
Ard Biesheuvelcc3cc482018-10-08 13:16:59 +0200473 xts_reload_mask v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100474 b .LxtsencloopNx
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100475.Lxtsenc1x:
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700476 adds w4, w4, #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100477 beq .Lxtsencout
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700478 subs w4, w4, #16
479 bmi .LxtsencctsNx
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100480.Lxtsencloop:
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700481 ld1 {v0.16b}, [x1], #16
482.Lxtsencctsout:
483 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200484 encrypt_block v0, w3, x2, x8, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100485 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700486 cbz w4, .Lxtsencout
487 subs w4, w4, #16
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200488 next_tweak v4, v4, v8
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700489 bmi .Lxtsenccts
490 st1 {v0.16b}, [x0], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100491 b .Lxtsencloop
492.Lxtsencout:
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700493 st1 {v0.16b}, [x0]
494.Lxtsencret:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200495 st1 {v4.16b}, [x6]
496 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100497 ret
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100498
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700499.LxtsencctsNx:
500 mov v0.16b, v3.16b
501 sub x0, x0, #16
502.Lxtsenccts:
503 adr_l x8, .Lcts_permute_table
504
505 add x1, x1, w4, sxtw /* rewind input pointer */
506 add w4, w4, #16 /* # bytes in final block */
507 add x9, x8, #32
508 add x8, x8, x4
509 sub x9, x9, x4
510 add x4, x0, x4 /* output address of final block */
511
512 ld1 {v1.16b}, [x1] /* load final block */
513 ld1 {v2.16b}, [x8]
514 ld1 {v3.16b}, [x9]
515
516 tbl v2.16b, {v0.16b}, v2.16b
517 tbx v0.16b, {v1.16b}, v3.16b
518 st1 {v2.16b}, [x4] /* overlapping stores */
519 mov w4, wzr
520 b .Lxtsencctsout
521AES_ENDPROC(aes_xts_encrypt)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100522
523AES_ENTRY(aes_xts_decrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200524 stp x29, x30, [sp, #-16]!
525 mov x29, sp
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000526
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700527 /* subtract 16 bytes if we are doing CTS */
528 sub w8, w4, #0x10
529 tst w4, #0xf
530 csel w4, w4, w8, eq
531
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200532 ld1 {v4.16b}, [x6]
Ard Biesheuvelcc3cc482018-10-08 13:16:59 +0200533 xts_load_mask v8
Ard Biesheuvel67cfa5d2019-09-03 09:43:34 -0700534 xts_cts_skip_tw w7, .Lxtsdecskiptw
Ard Biesheuvel68338172018-03-10 15:21:48 +0000535 cbz w7, .Lxtsdecnotfirst
536
537 enc_prepare w3, x5, x8
538 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
Ard Biesheuvel67cfa5d2019-09-03 09:43:34 -0700539.Lxtsdecskiptw:
Ard Biesheuvel68338172018-03-10 15:21:48 +0000540 dec_prepare w3, x2, x8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100541 b .LxtsdecNx
542
Ard Biesheuvel68338172018-03-10 15:21:48 +0000543.Lxtsdecnotfirst:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200544 dec_prepare w3, x2, x8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100545.LxtsdecloopNx:
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200546 next_tweak v4, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100547.LxtsdecNx:
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700548 subs w4, w4, #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100549 bmi .Lxtsdec1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200550 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200551 next_tweak v5, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100552 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200553 next_tweak v6, v5, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100554 eor v1.16b, v1.16b, v5.16b
555 eor v2.16b, v2.16b, v6.16b
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200556 next_tweak v7, v6, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100557 eor v3.16b, v3.16b, v7.16b
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000558 bl aes_decrypt_block4x
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100559 eor v3.16b, v3.16b, v7.16b
560 eor v0.16b, v0.16b, v4.16b
561 eor v1.16b, v1.16b, v5.16b
562 eor v2.16b, v2.16b, v6.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200563 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100564 mov v4.16b, v7.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200565 cbz w4, .Lxtsdecout
Ard Biesheuvelcc3cc482018-10-08 13:16:59 +0200566 xts_reload_mask v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100567 b .LxtsdecloopNx
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100568.Lxtsdec1x:
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700569 adds w4, w4, #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100570 beq .Lxtsdecout
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700571 subs w4, w4, #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100572.Lxtsdecloop:
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700573 ld1 {v0.16b}, [x1], #16
574 bmi .Lxtsdeccts
575.Lxtsdecctsout:
576 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200577 decrypt_block v0, w3, x2, x8, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100578 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200579 st1 {v0.16b}, [x0], #16
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700580 cbz w4, .Lxtsdecout
581 subs w4, w4, #16
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200582 next_tweak v4, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100583 b .Lxtsdecloop
584.Lxtsdecout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200585 st1 {v4.16b}, [x6]
586 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100587 ret
Ard Biesheuvel7cceca82019-09-03 09:43:33 -0700588
589.Lxtsdeccts:
590 adr_l x8, .Lcts_permute_table
591
592 add x1, x1, w4, sxtw /* rewind input pointer */
593 add w4, w4, #16 /* # bytes in final block */
594 add x9, x8, #32
595 add x8, x8, x4
596 sub x9, x9, x4
597 add x4, x0, x4 /* output address of final block */
598
599 next_tweak v5, v4, v8
600
601 ld1 {v1.16b}, [x1] /* load final block */
602 ld1 {v2.16b}, [x8]
603 ld1 {v3.16b}, [x9]
604
605 eor v0.16b, v0.16b, v5.16b
606 decrypt_block v0, w3, x2, x8, w7
607 eor v0.16b, v0.16b, v5.16b
608
609 tbl v2.16b, {v0.16b}, v2.16b
610 tbx v0.16b, {v1.16b}, v3.16b
611
612 st1 {v2.16b}, [x4] /* overlapping stores */
613 mov w4, wzr
614 b .Lxtsdecctsout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100615AES_ENDPROC(aes_xts_decrypt)
Ard Biesheuvel48606202017-02-03 14:49:37 +0000616
617 /*
618 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
619 * int blocks, u8 dg[], int enc_before, int enc_after)
620 */
621AES_ENTRY(aes_mac_update)
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200622 frame_push 6
623
624 mov x19, x0
625 mov x20, x1
626 mov x21, x2
627 mov x22, x3
628 mov x23, x4
629 mov x24, x6
630
631 ld1 {v0.16b}, [x23] /* get dg */
Ard Biesheuvel48606202017-02-03 14:49:37 +0000632 enc_prepare w2, x1, x7
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000633 cbz w5, .Lmacloop4x
Ard Biesheuvel48606202017-02-03 14:49:37 +0000634
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000635 encrypt_block v0, w2, x1, x7, w8
636
637.Lmacloop4x:
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200638 subs w22, w22, #4
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000639 bmi .Lmac1x
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200640 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000641 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200642 encrypt_block v0, w21, x20, x7, w8
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000643 eor v0.16b, v0.16b, v2.16b
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200644 encrypt_block v0, w21, x20, x7, w8
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000645 eor v0.16b, v0.16b, v3.16b
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200646 encrypt_block v0, w21, x20, x7, w8
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000647 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200648 cmp w22, wzr
649 csinv x5, x24, xzr, eq
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000650 cbz w5, .Lmacout
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200651 encrypt_block v0, w21, x20, x7, w8
652 st1 {v0.16b}, [x23] /* return dg */
653 cond_yield_neon .Lmacrestart
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000654 b .Lmacloop4x
655.Lmac1x:
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200656 add w22, w22, #4
Ard Biesheuvel48606202017-02-03 14:49:37 +0000657.Lmacloop:
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200658 cbz w22, .Lmacout
659 ld1 {v1.16b}, [x19], #16 /* get next pt block */
Ard Biesheuvel48606202017-02-03 14:49:37 +0000660 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
661
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200662 subs w22, w22, #1
663 csinv x5, x24, xzr, eq
Ard Biesheuvel48606202017-02-03 14:49:37 +0000664 cbz w5, .Lmacout
665
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200666.Lmacenc:
667 encrypt_block v0, w21, x20, x7, w8
Ard Biesheuvel48606202017-02-03 14:49:37 +0000668 b .Lmacloop
669
670.Lmacout:
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200671 st1 {v0.16b}, [x23] /* return dg */
672 frame_pop
Ard Biesheuvel48606202017-02-03 14:49:37 +0000673 ret
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200674
675.Lmacrestart:
676 ld1 {v0.16b}, [x23] /* get dg */
677 enc_prepare w21, x20, x0
678 b .Lmacloop4x
Ard Biesheuvel48606202017-02-03 14:49:37 +0000679AES_ENDPROC(aes_mac_update)