blob: 38cd5a2091a8c276e186481e5a6335175d09171d [file] [log] [blame]
Thomas Gleixnerd2912cb2019-06-04 10:11:33 +02001/* SPDX-License-Identifier: GPL-2.0-only */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +01002/*
3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 *
Ard Biesheuvel48606202017-02-03 14:49:37 +00005 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
Ard Biesheuvel49788fe2014-03-21 10:19:17 +01006 */
7
8/* included by aes-ce.S and aes-neon.S */
9
10 .text
11 .align 4
12
Ard Biesheuvele2174132019-06-24 19:38:30 +020013#ifndef MAX_STRIDE
14#define MAX_STRIDE 4
15#endif
16
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020017#if MAX_STRIDE == 4
18#define ST4(x...) x
19#define ST5(x...)
20#else
21#define ST4(x...)
22#define ST5(x...) x
23#endif
24
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010025aes_encrypt_block4x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020026 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010027 ret
28ENDPROC(aes_encrypt_block4x)
29
30aes_decrypt_block4x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020031 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010032 ret
33ENDPROC(aes_decrypt_block4x)
34
Ard Biesheuvele2174132019-06-24 19:38:30 +020035#if MAX_STRIDE == 5
36aes_encrypt_block5x:
37 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
38 ret
39ENDPROC(aes_encrypt_block5x)
40
41aes_decrypt_block5x:
42 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
43 ret
44ENDPROC(aes_decrypt_block5x)
45#endif
46
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010047 /*
48 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +000049 * int blocks)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010050 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +000051 * int blocks)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010052 */
53
54AES_ENTRY(aes_ecb_encrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020055 stp x29, x30, [sp, #-16]!
56 mov x29, sp
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010057
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020058 enc_prepare w3, x2, x5
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010059
60.LecbencloopNx:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020061 subs w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010062 bmi .Lecbenc1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020063 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020064ST4( bl aes_encrypt_block4x )
65ST5( ld1 {v4.16b}, [x1], #16 )
66ST5( bl aes_encrypt_block5x )
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020067 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020068ST5( st1 {v4.16b}, [x0], #16 )
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010069 b .LecbencloopNx
70.Lecbenc1x:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020071 adds w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010072 beq .Lecbencout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010073.Lecbencloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020074 ld1 {v0.16b}, [x1], #16 /* get next pt block */
75 encrypt_block v0, w3, x2, x5, w6
76 st1 {v0.16b}, [x0], #16
77 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010078 bne .Lecbencloop
79.Lecbencout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020080 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010081 ret
82AES_ENDPROC(aes_ecb_encrypt)
83
84
85AES_ENTRY(aes_ecb_decrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020086 stp x29, x30, [sp, #-16]!
87 mov x29, sp
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010088
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020089 dec_prepare w3, x2, x5
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010090
91.LecbdecloopNx:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020092 subs w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010093 bmi .Lecbdec1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020094 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020095ST4( bl aes_decrypt_block4x )
96ST5( ld1 {v4.16b}, [x1], #16 )
97ST5( bl aes_decrypt_block5x )
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020098 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +020099ST5( st1 {v4.16b}, [x0], #16 )
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100100 b .LecbdecloopNx
101.Lecbdec1x:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200102 adds w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100103 beq .Lecbdecout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100104.Lecbdecloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200105 ld1 {v0.16b}, [x1], #16 /* get next ct block */
106 decrypt_block v0, w3, x2, x5, w6
107 st1 {v0.16b}, [x0], #16
108 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100109 bne .Lecbdecloop
110.Lecbdecout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200111 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100112 ret
113AES_ENDPROC(aes_ecb_decrypt)
114
115
116 /*
117 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +0000118 * int blocks, u8 iv[])
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100119 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +0000120 * int blocks, u8 iv[])
Ard Biesheuvel735177c2019-08-19 17:17:36 +0300121 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
122 * int rounds, int blocks, u8 iv[],
123 * u32 const rk2[]);
124 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
125 * int rounds, int blocks, u8 iv[],
126 * u32 const rk2[]);
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100127 */
128
Ard Biesheuvel735177c2019-08-19 17:17:36 +0300129AES_ENTRY(aes_essiv_cbc_encrypt)
130 ld1 {v4.16b}, [x5] /* get iv */
131
132 mov w8, #14 /* AES-256: 14 rounds */
133 enc_prepare w8, x6, x7
134 encrypt_block v4, w8, x6, x7, w9
135 enc_switch_key w3, x2, x6
136 b .Lcbcencloop4x
137
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100138AES_ENTRY(aes_cbc_encrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200139 ld1 {v4.16b}, [x5] /* get iv */
140 enc_prepare w3, x2, x6
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100141
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000142.Lcbcencloop4x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200143 subs w4, w4, #4
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000144 bmi .Lcbcenc1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200145 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000146 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200147 encrypt_block v0, w3, x2, x6, w7
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000148 eor v1.16b, v1.16b, v0.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200149 encrypt_block v1, w3, x2, x6, w7
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000150 eor v2.16b, v2.16b, v1.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200151 encrypt_block v2, w3, x2, x6, w7
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000152 eor v3.16b, v3.16b, v2.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200153 encrypt_block v3, w3, x2, x6, w7
154 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000155 mov v4.16b, v3.16b
156 b .Lcbcencloop4x
157.Lcbcenc1x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200158 adds w4, w4, #4
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000159 beq .Lcbcencout
160.Lcbcencloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200161 ld1 {v0.16b}, [x1], #16 /* get next pt block */
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000162 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200163 encrypt_block v4, w3, x2, x6, w7
164 st1 {v4.16b}, [x0], #16
165 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100166 bne .Lcbcencloop
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000167.Lcbcencout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200168 st1 {v4.16b}, [x5] /* return iv */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100169 ret
170AES_ENDPROC(aes_cbc_encrypt)
Ard Biesheuvel735177c2019-08-19 17:17:36 +0300171AES_ENDPROC(aes_essiv_cbc_encrypt)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100172
Ard Biesheuvel735177c2019-08-19 17:17:36 +0300173AES_ENTRY(aes_essiv_cbc_decrypt)
174 stp x29, x30, [sp, #-16]!
175 mov x29, sp
176
177 ld1 {cbciv.16b}, [x5] /* get iv */
178
179 mov w8, #14 /* AES-256: 14 rounds */
180 enc_prepare w8, x6, x7
181 encrypt_block cbciv, w8, x6, x7, w9
182 b .Lessivcbcdecstart
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100183
184AES_ENTRY(aes_cbc_decrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200185 stp x29, x30, [sp, #-16]!
186 mov x29, sp
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100187
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200188 ld1 {cbciv.16b}, [x5] /* get iv */
Ard Biesheuvel735177c2019-08-19 17:17:36 +0300189.Lessivcbcdecstart:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200190 dec_prepare w3, x2, x6
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100191
192.LcbcdecloopNx:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200193 subs w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100194 bmi .Lcbcdec1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200195 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200196#if MAX_STRIDE == 5
197 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */
198 mov v5.16b, v0.16b
199 mov v6.16b, v1.16b
200 mov v7.16b, v2.16b
201 bl aes_decrypt_block5x
202 sub x1, x1, #32
203 eor v0.16b, v0.16b, cbciv.16b
204 eor v1.16b, v1.16b, v5.16b
205 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */
206 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
207 eor v2.16b, v2.16b, v6.16b
208 eor v3.16b, v3.16b, v7.16b
209 eor v4.16b, v4.16b, v5.16b
210#else
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100211 mov v4.16b, v0.16b
212 mov v5.16b, v1.16b
213 mov v6.16b, v2.16b
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000214 bl aes_decrypt_block4x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200215 sub x1, x1, #16
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200216 eor v0.16b, v0.16b, cbciv.16b
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100217 eor v1.16b, v1.16b, v4.16b
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200218 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100219 eor v2.16b, v2.16b, v5.16b
220 eor v3.16b, v3.16b, v6.16b
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200221#endif
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200222 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200223ST5( st1 {v4.16b}, [x0], #16 )
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100224 b .LcbcdecloopNx
225.Lcbcdec1x:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200226 adds w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100227 beq .Lcbcdecout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100228.Lcbcdecloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200229 ld1 {v1.16b}, [x1], #16 /* get next ct block */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100230 mov v0.16b, v1.16b /* ...and copy to v0 */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200231 decrypt_block v0, w3, x2, x6, w7
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200232 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */
233 mov cbciv.16b, v1.16b /* ct is next iv */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200234 st1 {v0.16b}, [x0], #16
235 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100236 bne .Lcbcdecloop
237.Lcbcdecout:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200238 st1 {cbciv.16b}, [x5] /* return iv */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200239 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100240 ret
241AES_ENDPROC(aes_cbc_decrypt)
Ard Biesheuvel735177c2019-08-19 17:17:36 +0300242AES_ENDPROC(aes_essiv_cbc_decrypt)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100243
244
245 /*
Ard Biesheuveldd597fb2018-09-10 16:41:14 +0200246 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
247 * int rounds, int bytes, u8 const iv[])
248 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
249 * int rounds, int bytes, u8 const iv[])
250 */
251
252AES_ENTRY(aes_cbc_cts_encrypt)
253 adr_l x8, .Lcts_permute_table
254 sub x4, x4, #16
255 add x9, x8, #32
256 add x8, x8, x4
257 sub x9, x9, x4
258 ld1 {v3.16b}, [x8]
259 ld1 {v4.16b}, [x9]
260
261 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
262 ld1 {v1.16b}, [x1]
263
264 ld1 {v5.16b}, [x5] /* get iv */
265 enc_prepare w3, x2, x6
266
267 eor v0.16b, v0.16b, v5.16b /* xor with iv */
268 tbl v1.16b, {v1.16b}, v4.16b
269 encrypt_block v0, w3, x2, x6, w7
270
271 eor v1.16b, v1.16b, v0.16b
272 tbl v0.16b, {v0.16b}, v3.16b
273 encrypt_block v1, w3, x2, x6, w7
274
275 add x4, x0, x4
276 st1 {v0.16b}, [x4] /* overlapping stores */
277 st1 {v1.16b}, [x0]
278 ret
279AES_ENDPROC(aes_cbc_cts_encrypt)
280
281AES_ENTRY(aes_cbc_cts_decrypt)
282 adr_l x8, .Lcts_permute_table
283 sub x4, x4, #16
284 add x9, x8, #32
285 add x8, x8, x4
286 sub x9, x9, x4
287 ld1 {v3.16b}, [x8]
288 ld1 {v4.16b}, [x9]
289
290 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
291 ld1 {v1.16b}, [x1]
292
293 ld1 {v5.16b}, [x5] /* get iv */
294 dec_prepare w3, x2, x6
295
Ard Biesheuveldd597fb2018-09-10 16:41:14 +0200296 decrypt_block v0, w3, x2, x6, w7
Ard Biesheuvel0cfd5072019-09-03 09:43:31 -0700297 tbl v2.16b, {v0.16b}, v3.16b
298 eor v2.16b, v2.16b, v1.16b
Ard Biesheuveldd597fb2018-09-10 16:41:14 +0200299
300 tbx v0.16b, {v1.16b}, v4.16b
Ard Biesheuveldd597fb2018-09-10 16:41:14 +0200301 decrypt_block v0, w3, x2, x6, w7
302 eor v0.16b, v0.16b, v5.16b /* xor with iv */
303
304 add x4, x0, x4
305 st1 {v2.16b}, [x4] /* overlapping stores */
306 st1 {v0.16b}, [x0]
307 ret
308AES_ENDPROC(aes_cbc_cts_decrypt)
309
310 .section ".rodata", "a"
311 .align 6
312.Lcts_permute_table:
313 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
314 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
315 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
316 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
317 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
318 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
319 .previous
320
321
322 /*
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100323 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +0000324 * int blocks, u8 ctr[])
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100325 */
326
327AES_ENTRY(aes_ctr_encrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200328 stp x29, x30, [sp, #-16]!
329 mov x29, sp
Ard Biesheuvel68338172018-03-10 15:21:48 +0000330
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200331 enc_prepare w3, x2, x6
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200332 ld1 {vctr.16b}, [x5]
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000333
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200334 umov x6, vctr.d[1] /* keep swabbed ctr in reg */
Ard Biesheuvel68338172018-03-10 15:21:48 +0000335 rev x6, x6
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200336 cmn w6, w4 /* 32 bit overflow? */
337 bcs .Lctrloop
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100338.LctrloopNx:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200339 subs w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100340 bmi .Lctr1x
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100341 add w7, w6, #1
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200342 mov v0.16b, vctr.16b
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100343 add w8, w6, #2
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200344 mov v1.16b, vctr.16b
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100345 add w9, w6, #3
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200346 mov v2.16b, vctr.16b
347 add w9, w6, #3
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100348 rev w7, w7
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200349 mov v3.16b, vctr.16b
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100350 rev w8, w8
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200351ST5( mov v4.16b, vctr.16b )
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100352 mov v1.s[3], w7
353 rev w9, w9
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200354ST5( add w10, w6, #4 )
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100355 mov v2.s[3], w8
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200356ST5( rev w10, w10 )
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100357 mov v3.s[3], w9
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200358ST5( mov v4.s[3], w10 )
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200359 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200360ST4( bl aes_encrypt_block4x )
361ST5( bl aes_encrypt_block5x )
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100362 eor v0.16b, v5.16b, v0.16b
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200363ST4( ld1 {v5.16b}, [x1], #16 )
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100364 eor v1.16b, v6.16b, v1.16b
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200365ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100366 eor v2.16b, v7.16b, v2.16b
367 eor v3.16b, v5.16b, v3.16b
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200368ST5( eor v4.16b, v6.16b, v4.16b )
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200369 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200370ST5( st1 {v4.16b}, [x0], #16 )
371 add x6, x6, #MAX_STRIDE
Ard Biesheuvel68338172018-03-10 15:21:48 +0000372 rev x7, x6
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200373 ins vctr.d[1], x7
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200374 cbz w4, .Lctrout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100375 b .LctrloopNx
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100376.Lctr1x:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200377 adds w4, w4, #MAX_STRIDE
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100378 beq .Lctrout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100379.Lctrloop:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200380 mov v0.16b, vctr.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200381 encrypt_block v0, w3, x2, x8, w7
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000382
Ard Biesheuvel68338172018-03-10 15:21:48 +0000383 adds x6, x6, #1 /* increment BE ctr */
384 rev x7, x6
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200385 ins vctr.d[1], x7
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000386 bcs .Lctrcarry /* overflow? */
387
388.Lctrcarrydone:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200389 subs w4, w4, #1
Ard Biesheuvelccc5d512017-01-28 23:25:34 +0000390 bmi .Lctrtailblock /* blocks <0 means tail block */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200391 ld1 {v3.16b}, [x1], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100392 eor v3.16b, v0.16b, v3.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200393 st1 {v3.16b}, [x0], #16
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000394 bne .Lctrloop
395
396.Lctrout:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200397 st1 {vctr.16b}, [x5] /* return next CTR value */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200398 ldp x29, x30, [sp], #16
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000399 ret
400
Ard Biesheuvelccc5d512017-01-28 23:25:34 +0000401.Lctrtailblock:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200402 st1 {v0.16b}, [x0]
Eric Biggersfa5fd3a2019-02-14 00:03:54 -0800403 b .Lctrout
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000404
405.Lctrcarry:
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200406 umov x7, vctr.d[0] /* load upper word of ctr */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100407 rev x7, x7 /* ... to handle the carry */
408 add x7, x7, #1
409 rev x7, x7
Ard Biesheuvel7367bfe2019-06-24 19:38:31 +0200410 ins vctr.d[0], x7
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000411 b .Lctrcarrydone
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100412AES_ENDPROC(aes_ctr_encrypt)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100413
414
415 /*
416 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
417 * int blocks, u8 const rk2[], u8 iv[], int first)
418 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
419 * int blocks, u8 const rk2[], u8 iv[], int first)
420 */
421
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200422 .macro next_tweak, out, in, tmp
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100423 sshr \tmp\().2d, \in\().2d, #63
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200424 and \tmp\().16b, \tmp\().16b, xtsmask.16b
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100425 add \out\().2d, \in\().2d, \in\().2d
426 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
427 eor \out\().16b, \out\().16b, \tmp\().16b
428 .endm
429
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200430 .macro xts_load_mask, tmp
431 movi xtsmask.2s, #0x1
432 movi \tmp\().2s, #0x87
433 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
434 .endm
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100435
436AES_ENTRY(aes_xts_encrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200437 stp x29, x30, [sp, #-16]!
438 mov x29, sp
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000439
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200440 ld1 {v4.16b}, [x6]
Ard Biesheuvelcc3cc482018-10-08 13:16:59 +0200441 xts_load_mask v8
Ard Biesheuvel68338172018-03-10 15:21:48 +0000442 cbz w7, .Lxtsencnotfirst
443
444 enc_prepare w3, x5, x8
445 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
446 enc_switch_key w3, x2, x8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100447 b .LxtsencNx
448
Ard Biesheuvel68338172018-03-10 15:21:48 +0000449.Lxtsencnotfirst:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200450 enc_prepare w3, x2, x8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100451.LxtsencloopNx:
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200452 next_tweak v4, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100453.LxtsencNx:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200454 subs w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100455 bmi .Lxtsenc1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200456 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200457 next_tweak v5, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100458 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200459 next_tweak v6, v5, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100460 eor v1.16b, v1.16b, v5.16b
461 eor v2.16b, v2.16b, v6.16b
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200462 next_tweak v7, v6, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100463 eor v3.16b, v3.16b, v7.16b
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000464 bl aes_encrypt_block4x
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100465 eor v3.16b, v3.16b, v7.16b
466 eor v0.16b, v0.16b, v4.16b
467 eor v1.16b, v1.16b, v5.16b
468 eor v2.16b, v2.16b, v6.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200469 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100470 mov v4.16b, v7.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200471 cbz w4, .Lxtsencout
Ard Biesheuvelcc3cc482018-10-08 13:16:59 +0200472 xts_reload_mask v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100473 b .LxtsencloopNx
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100474.Lxtsenc1x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200475 adds w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100476 beq .Lxtsencout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100477.Lxtsencloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200478 ld1 {v1.16b}, [x1], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100479 eor v0.16b, v1.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200480 encrypt_block v0, w3, x2, x8, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100481 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200482 st1 {v0.16b}, [x0], #16
483 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100484 beq .Lxtsencout
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200485 next_tweak v4, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100486 b .Lxtsencloop
487.Lxtsencout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200488 st1 {v4.16b}, [x6]
489 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100490 ret
491AES_ENDPROC(aes_xts_encrypt)
492
493
494AES_ENTRY(aes_xts_decrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200495 stp x29, x30, [sp, #-16]!
496 mov x29, sp
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000497
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200498 ld1 {v4.16b}, [x6]
Ard Biesheuvelcc3cc482018-10-08 13:16:59 +0200499 xts_load_mask v8
Ard Biesheuvel68338172018-03-10 15:21:48 +0000500 cbz w7, .Lxtsdecnotfirst
501
502 enc_prepare w3, x5, x8
503 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
504 dec_prepare w3, x2, x8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100505 b .LxtsdecNx
506
Ard Biesheuvel68338172018-03-10 15:21:48 +0000507.Lxtsdecnotfirst:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200508 dec_prepare w3, x2, x8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100509.LxtsdecloopNx:
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200510 next_tweak v4, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100511.LxtsdecNx:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200512 subs w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100513 bmi .Lxtsdec1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200514 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200515 next_tweak v5, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100516 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200517 next_tweak v6, v5, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100518 eor v1.16b, v1.16b, v5.16b
519 eor v2.16b, v2.16b, v6.16b
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200520 next_tweak v7, v6, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100521 eor v3.16b, v3.16b, v7.16b
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000522 bl aes_decrypt_block4x
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100523 eor v3.16b, v3.16b, v7.16b
524 eor v0.16b, v0.16b, v4.16b
525 eor v1.16b, v1.16b, v5.16b
526 eor v2.16b, v2.16b, v6.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200527 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100528 mov v4.16b, v7.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200529 cbz w4, .Lxtsdecout
Ard Biesheuvelcc3cc482018-10-08 13:16:59 +0200530 xts_reload_mask v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100531 b .LxtsdecloopNx
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100532.Lxtsdec1x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200533 adds w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100534 beq .Lxtsdecout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100535.Lxtsdecloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200536 ld1 {v1.16b}, [x1], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100537 eor v0.16b, v1.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200538 decrypt_block v0, w3, x2, x8, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100539 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200540 st1 {v0.16b}, [x0], #16
541 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100542 beq .Lxtsdecout
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200543 next_tweak v4, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100544 b .Lxtsdecloop
545.Lxtsdecout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200546 st1 {v4.16b}, [x6]
547 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100548 ret
549AES_ENDPROC(aes_xts_decrypt)
Ard Biesheuvel48606202017-02-03 14:49:37 +0000550
551 /*
552 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
553 * int blocks, u8 dg[], int enc_before, int enc_after)
554 */
555AES_ENTRY(aes_mac_update)
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200556 frame_push 6
557
558 mov x19, x0
559 mov x20, x1
560 mov x21, x2
561 mov x22, x3
562 mov x23, x4
563 mov x24, x6
564
565 ld1 {v0.16b}, [x23] /* get dg */
Ard Biesheuvel48606202017-02-03 14:49:37 +0000566 enc_prepare w2, x1, x7
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000567 cbz w5, .Lmacloop4x
Ard Biesheuvel48606202017-02-03 14:49:37 +0000568
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000569 encrypt_block v0, w2, x1, x7, w8
570
571.Lmacloop4x:
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200572 subs w22, w22, #4
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000573 bmi .Lmac1x
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200574 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000575 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200576 encrypt_block v0, w21, x20, x7, w8
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000577 eor v0.16b, v0.16b, v2.16b
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200578 encrypt_block v0, w21, x20, x7, w8
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000579 eor v0.16b, v0.16b, v3.16b
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200580 encrypt_block v0, w21, x20, x7, w8
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000581 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200582 cmp w22, wzr
583 csinv x5, x24, xzr, eq
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000584 cbz w5, .Lmacout
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200585 encrypt_block v0, w21, x20, x7, w8
586 st1 {v0.16b}, [x23] /* return dg */
587 cond_yield_neon .Lmacrestart
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000588 b .Lmacloop4x
589.Lmac1x:
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200590 add w22, w22, #4
Ard Biesheuvel48606202017-02-03 14:49:37 +0000591.Lmacloop:
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200592 cbz w22, .Lmacout
593 ld1 {v1.16b}, [x19], #16 /* get next pt block */
Ard Biesheuvel48606202017-02-03 14:49:37 +0000594 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
595
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200596 subs w22, w22, #1
597 csinv x5, x24, xzr, eq
Ard Biesheuvel48606202017-02-03 14:49:37 +0000598 cbz w5, .Lmacout
599
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200600.Lmacenc:
601 encrypt_block v0, w21, x20, x7, w8
Ard Biesheuvel48606202017-02-03 14:49:37 +0000602 b .Lmacloop
603
604.Lmacout:
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200605 st1 {v0.16b}, [x23] /* return dg */
606 frame_pop
Ard Biesheuvel48606202017-02-03 14:49:37 +0000607 ret
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200608
609.Lmacrestart:
610 ld1 {v0.16b}, [x23] /* get dg */
611 enc_prepare w21, x20, x0
612 b .Lmacloop4x
Ard Biesheuvel48606202017-02-03 14:49:37 +0000613AES_ENDPROC(aes_mac_update)