blob: 039738ae23f662f7ad656815e0a922485bc54428 [file] [log] [blame]
Ard Biesheuvel49788fe2014-03-21 10:19:17 +01001/*
2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
3 *
Ard Biesheuvel48606202017-02-03 14:49:37 +00004 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
Ard Biesheuvel49788fe2014-03-21 10:19:17 +01005 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/* included by aes-ce.S and aes-neon.S */
12
13 .text
14 .align 4
15
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010016aes_encrypt_block4x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020017 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010018 ret
19ENDPROC(aes_encrypt_block4x)
20
21aes_decrypt_block4x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020022 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010023 ret
24ENDPROC(aes_decrypt_block4x)
25
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010026 /*
27 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +000028 * int blocks)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010029 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +000030 * int blocks)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010031 */
32
33AES_ENTRY(aes_ecb_encrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020034 stp x29, x30, [sp, #-16]!
35 mov x29, sp
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010036
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020037 enc_prepare w3, x2, x5
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010038
39.LecbencloopNx:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020040 subs w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010041 bmi .Lecbenc1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020042 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
Ard Biesheuvel55868b42018-03-10 15:21:51 +000043 bl aes_encrypt_block4x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020044 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010045 b .LecbencloopNx
46.Lecbenc1x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020047 adds w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010048 beq .Lecbencout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010049.Lecbencloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020050 ld1 {v0.16b}, [x1], #16 /* get next pt block */
51 encrypt_block v0, w3, x2, x5, w6
52 st1 {v0.16b}, [x0], #16
53 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010054 bne .Lecbencloop
55.Lecbencout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020056 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010057 ret
58AES_ENDPROC(aes_ecb_encrypt)
59
60
61AES_ENTRY(aes_ecb_decrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020062 stp x29, x30, [sp, #-16]!
63 mov x29, sp
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010064
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020065 dec_prepare w3, x2, x5
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010066
67.LecbdecloopNx:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020068 subs w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010069 bmi .Lecbdec1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020070 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
Ard Biesheuvel55868b42018-03-10 15:21:51 +000071 bl aes_decrypt_block4x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020072 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010073 b .LecbdecloopNx
74.Lecbdec1x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020075 adds w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010076 beq .Lecbdecout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010077.Lecbdecloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020078 ld1 {v0.16b}, [x1], #16 /* get next ct block */
79 decrypt_block v0, w3, x2, x5, w6
80 st1 {v0.16b}, [x0], #16
81 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010082 bne .Lecbdecloop
83.Lecbdecout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020084 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010085 ret
86AES_ENDPROC(aes_ecb_decrypt)
87
88
89 /*
90 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +000091 * int blocks, u8 iv[])
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010092 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +000093 * int blocks, u8 iv[])
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010094 */
95
96AES_ENTRY(aes_cbc_encrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +020097 ld1 {v4.16b}, [x5] /* get iv */
98 enc_prepare w3, x2, x6
Ard Biesheuvel49788fe2014-03-21 10:19:17 +010099
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000100.Lcbcencloop4x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200101 subs w4, w4, #4
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000102 bmi .Lcbcenc1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200103 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000104 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200105 encrypt_block v0, w3, x2, x6, w7
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000106 eor v1.16b, v1.16b, v0.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200107 encrypt_block v1, w3, x2, x6, w7
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000108 eor v2.16b, v2.16b, v1.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200109 encrypt_block v2, w3, x2, x6, w7
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000110 eor v3.16b, v3.16b, v2.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200111 encrypt_block v3, w3, x2, x6, w7
112 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000113 mov v4.16b, v3.16b
114 b .Lcbcencloop4x
115.Lcbcenc1x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200116 adds w4, w4, #4
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000117 beq .Lcbcencout
118.Lcbcencloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200119 ld1 {v0.16b}, [x1], #16 /* get next pt block */
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000120 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200121 encrypt_block v4, w3, x2, x6, w7
122 st1 {v4.16b}, [x0], #16
123 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100124 bne .Lcbcencloop
Ard Biesheuvela8f8a692018-03-10 15:21:52 +0000125.Lcbcencout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200126 st1 {v4.16b}, [x5] /* return iv */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100127 ret
128AES_ENDPROC(aes_cbc_encrypt)
129
130
131AES_ENTRY(aes_cbc_decrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200132 stp x29, x30, [sp, #-16]!
133 mov x29, sp
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100134
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200135 ld1 {v7.16b}, [x5] /* get iv */
136 dec_prepare w3, x2, x6
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100137
138.LcbcdecloopNx:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200139 subs w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100140 bmi .Lcbcdec1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200141 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100142 mov v4.16b, v0.16b
143 mov v5.16b, v1.16b
144 mov v6.16b, v2.16b
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000145 bl aes_decrypt_block4x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200146 sub x1, x1, #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100147 eor v0.16b, v0.16b, v7.16b
148 eor v1.16b, v1.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200149 ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100150 eor v2.16b, v2.16b, v5.16b
151 eor v3.16b, v3.16b, v6.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200152 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100153 b .LcbcdecloopNx
154.Lcbcdec1x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200155 adds w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100156 beq .Lcbcdecout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100157.Lcbcdecloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200158 ld1 {v1.16b}, [x1], #16 /* get next ct block */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100159 mov v0.16b, v1.16b /* ...and copy to v0 */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200160 decrypt_block v0, w3, x2, x6, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100161 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
162 mov v7.16b, v1.16b /* ct is next iv */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200163 st1 {v0.16b}, [x0], #16
164 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100165 bne .Lcbcdecloop
166.Lcbcdecout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200167 st1 {v7.16b}, [x5] /* return iv */
168 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100169 ret
170AES_ENDPROC(aes_cbc_decrypt)
171
172
173 /*
Ard Biesheuveldd597fb2018-09-10 16:41:14 +0200174 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
175 * int rounds, int bytes, u8 const iv[])
176 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
177 * int rounds, int bytes, u8 const iv[])
178 */
179
180AES_ENTRY(aes_cbc_cts_encrypt)
181 adr_l x8, .Lcts_permute_table
182 sub x4, x4, #16
183 add x9, x8, #32
184 add x8, x8, x4
185 sub x9, x9, x4
186 ld1 {v3.16b}, [x8]
187 ld1 {v4.16b}, [x9]
188
189 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
190 ld1 {v1.16b}, [x1]
191
192 ld1 {v5.16b}, [x5] /* get iv */
193 enc_prepare w3, x2, x6
194
195 eor v0.16b, v0.16b, v5.16b /* xor with iv */
196 tbl v1.16b, {v1.16b}, v4.16b
197 encrypt_block v0, w3, x2, x6, w7
198
199 eor v1.16b, v1.16b, v0.16b
200 tbl v0.16b, {v0.16b}, v3.16b
201 encrypt_block v1, w3, x2, x6, w7
202
203 add x4, x0, x4
204 st1 {v0.16b}, [x4] /* overlapping stores */
205 st1 {v1.16b}, [x0]
206 ret
207AES_ENDPROC(aes_cbc_cts_encrypt)
208
209AES_ENTRY(aes_cbc_cts_decrypt)
210 adr_l x8, .Lcts_permute_table
211 sub x4, x4, #16
212 add x9, x8, #32
213 add x8, x8, x4
214 sub x9, x9, x4
215 ld1 {v3.16b}, [x8]
216 ld1 {v4.16b}, [x9]
217
218 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
219 ld1 {v1.16b}, [x1]
220
221 ld1 {v5.16b}, [x5] /* get iv */
222 dec_prepare w3, x2, x6
223
224 tbl v2.16b, {v1.16b}, v4.16b
225 decrypt_block v0, w3, x2, x6, w7
226 eor v2.16b, v2.16b, v0.16b
227
228 tbx v0.16b, {v1.16b}, v4.16b
229 tbl v2.16b, {v2.16b}, v3.16b
230 decrypt_block v0, w3, x2, x6, w7
231 eor v0.16b, v0.16b, v5.16b /* xor with iv */
232
233 add x4, x0, x4
234 st1 {v2.16b}, [x4] /* overlapping stores */
235 st1 {v0.16b}, [x0]
236 ret
237AES_ENDPROC(aes_cbc_cts_decrypt)
238
239 .section ".rodata", "a"
240 .align 6
241.Lcts_permute_table:
242 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
243 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
244 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
245 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
246 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
247 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
248 .previous
249
250
251 /*
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100252 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
Ard Biesheuvel68338172018-03-10 15:21:48 +0000253 * int blocks, u8 ctr[])
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100254 */
255
256AES_ENTRY(aes_ctr_encrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200257 stp x29, x30, [sp, #-16]!
258 mov x29, sp
Ard Biesheuvel68338172018-03-10 15:21:48 +0000259
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200260 enc_prepare w3, x2, x6
261 ld1 {v4.16b}, [x5]
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000262
Ard Biesheuvel68338172018-03-10 15:21:48 +0000263 umov x6, v4.d[1] /* keep swabbed ctr in reg */
264 rev x6, x6
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200265 cmn w6, w4 /* 32 bit overflow? */
266 bcs .Lctrloop
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100267.LctrloopNx:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200268 subs w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100269 bmi .Lctr1x
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100270 add w7, w6, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100271 mov v0.16b, v4.16b
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100272 add w8, w6, #2
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100273 mov v1.16b, v4.16b
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100274 add w9, w6, #3
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100275 mov v2.16b, v4.16b
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100276 rev w7, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100277 mov v3.16b, v4.16b
Ard Biesheuveled6ed112018-08-23 17:48:45 +0100278 rev w8, w8
279 mov v1.s[3], w7
280 rev w9, w9
281 mov v2.s[3], w8
282 mov v3.s[3], w9
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200283 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000284 bl aes_encrypt_block4x
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100285 eor v0.16b, v5.16b, v0.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200286 ld1 {v5.16b}, [x1], #16 /* get 1 input block */
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100287 eor v1.16b, v6.16b, v1.16b
288 eor v2.16b, v7.16b, v2.16b
289 eor v3.16b, v5.16b, v3.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200290 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000291 add x6, x6, #4
Ard Biesheuvel68338172018-03-10 15:21:48 +0000292 rev x7, x6
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100293 ins v4.d[1], x7
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200294 cbz w4, .Lctrout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100295 b .LctrloopNx
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100296.Lctr1x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200297 adds w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100298 beq .Lctrout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100299.Lctrloop:
300 mov v0.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200301 encrypt_block v0, w3, x2, x8, w7
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000302
Ard Biesheuvel68338172018-03-10 15:21:48 +0000303 adds x6, x6, #1 /* increment BE ctr */
304 rev x7, x6
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000305 ins v4.d[1], x7
306 bcs .Lctrcarry /* overflow? */
307
308.Lctrcarrydone:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200309 subs w4, w4, #1
Ard Biesheuvelccc5d512017-01-28 23:25:34 +0000310 bmi .Lctrtailblock /* blocks <0 means tail block */
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200311 ld1 {v3.16b}, [x1], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100312 eor v3.16b, v0.16b, v3.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200313 st1 {v3.16b}, [x0], #16
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000314 bne .Lctrloop
315
316.Lctrout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200317 st1 {v4.16b}, [x5] /* return next CTR value */
318 ldp x29, x30, [sp], #16
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000319 ret
320
Ard Biesheuvelccc5d512017-01-28 23:25:34 +0000321.Lctrtailblock:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200322 st1 {v0.16b}, [x0]
323 ldp x29, x30, [sp], #16
324 ret
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000325
326.Lctrcarry:
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100327 umov x7, v4.d[0] /* load upper word of ctr */
328 rev x7, x7 /* ... to handle the carry */
329 add x7, x7, #1
330 rev x7, x7
331 ins v4.d[0], x7
Ard Biesheuvel11e3b722017-01-17 13:46:29 +0000332 b .Lctrcarrydone
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100333AES_ENDPROC(aes_ctr_encrypt)
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100334
335
336 /*
337 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
338 * int blocks, u8 const rk2[], u8 iv[], int first)
339 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
340 * int blocks, u8 const rk2[], u8 iv[], int first)
341 */
342
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200343 .macro next_tweak, out, in, tmp
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100344 sshr \tmp\().2d, \in\().2d, #63
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200345 and \tmp\().16b, \tmp\().16b, xtsmask.16b
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100346 add \out\().2d, \in\().2d, \in\().2d
347 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
348 eor \out\().16b, \out\().16b, \tmp\().16b
349 .endm
350
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200351 .macro xts_load_mask, tmp
352 movi xtsmask.2s, #0x1
353 movi \tmp\().2s, #0x87
354 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
355 .endm
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100356
357AES_ENTRY(aes_xts_encrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200358 stp x29, x30, [sp, #-16]!
359 mov x29, sp
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000360
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200361 ld1 {v4.16b}, [x6]
Ard Biesheuvel68338172018-03-10 15:21:48 +0000362 cbz w7, .Lxtsencnotfirst
363
364 enc_prepare w3, x5, x8
365 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
366 enc_switch_key w3, x2, x8
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200367 xts_load_mask v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100368 b .LxtsencNx
369
Ard Biesheuvel68338172018-03-10 15:21:48 +0000370.Lxtsencnotfirst:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200371 enc_prepare w3, x2, x8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100372.LxtsencloopNx:
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200373 xts_reload_mask v8
374 next_tweak v4, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100375.LxtsencNx:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200376 subs w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100377 bmi .Lxtsenc1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200378 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200379 next_tweak v5, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100380 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200381 next_tweak v6, v5, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100382 eor v1.16b, v1.16b, v5.16b
383 eor v2.16b, v2.16b, v6.16b
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200384 next_tweak v7, v6, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100385 eor v3.16b, v3.16b, v7.16b
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000386 bl aes_encrypt_block4x
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100387 eor v3.16b, v3.16b, v7.16b
388 eor v0.16b, v0.16b, v4.16b
389 eor v1.16b, v1.16b, v5.16b
390 eor v2.16b, v2.16b, v6.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200391 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100392 mov v4.16b, v7.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200393 cbz w4, .Lxtsencout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100394 b .LxtsencloopNx
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100395.Lxtsenc1x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200396 adds w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100397 beq .Lxtsencout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100398.Lxtsencloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200399 ld1 {v1.16b}, [x1], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100400 eor v0.16b, v1.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200401 encrypt_block v0, w3, x2, x8, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100402 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200403 st1 {v0.16b}, [x0], #16
404 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100405 beq .Lxtsencout
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200406 next_tweak v4, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100407 b .Lxtsencloop
408.Lxtsencout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200409 st1 {v4.16b}, [x6]
410 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100411 ret
412AES_ENDPROC(aes_xts_encrypt)
413
414
415AES_ENTRY(aes_xts_decrypt)
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200416 stp x29, x30, [sp, #-16]!
417 mov x29, sp
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000418
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200419 ld1 {v4.16b}, [x6]
Ard Biesheuvel68338172018-03-10 15:21:48 +0000420 cbz w7, .Lxtsdecnotfirst
421
422 enc_prepare w3, x5, x8
423 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
424 dec_prepare w3, x2, x8
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200425 xts_load_mask v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100426 b .LxtsdecNx
427
Ard Biesheuvel68338172018-03-10 15:21:48 +0000428.Lxtsdecnotfirst:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200429 dec_prepare w3, x2, x8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100430.LxtsdecloopNx:
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200431 xts_reload_mask v8
432 next_tweak v4, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100433.LxtsdecNx:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200434 subs w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100435 bmi .Lxtsdec1x
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200436 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200437 next_tweak v5, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100438 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200439 next_tweak v6, v5, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100440 eor v1.16b, v1.16b, v5.16b
441 eor v2.16b, v2.16b, v6.16b
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200442 next_tweak v7, v6, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100443 eor v3.16b, v3.16b, v7.16b
Ard Biesheuvel55868b42018-03-10 15:21:51 +0000444 bl aes_decrypt_block4x
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100445 eor v3.16b, v3.16b, v7.16b
446 eor v0.16b, v0.16b, v4.16b
447 eor v1.16b, v1.16b, v5.16b
448 eor v2.16b, v2.16b, v6.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200449 st1 {v0.16b-v3.16b}, [x0], #64
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100450 mov v4.16b, v7.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200451 cbz w4, .Lxtsdecout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100452 b .LxtsdecloopNx
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100453.Lxtsdec1x:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200454 adds w4, w4, #4
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100455 beq .Lxtsdecout
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100456.Lxtsdecloop:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200457 ld1 {v1.16b}, [x1], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100458 eor v0.16b, v1.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200459 decrypt_block v0, w3, x2, x8, w7
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100460 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200461 st1 {v0.16b}, [x0], #16
462 subs w4, w4, #1
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100463 beq .Lxtsdecout
Ard Biesheuvel2e5d2f32018-09-10 16:41:15 +0200464 next_tweak v4, v4, v8
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100465 b .Lxtsdecloop
466.Lxtsdecout:
Ard Biesheuvel6e7de6a2018-09-10 16:41:13 +0200467 st1 {v4.16b}, [x6]
468 ldp x29, x30, [sp], #16
Ard Biesheuvel49788fe2014-03-21 10:19:17 +0100469 ret
470AES_ENDPROC(aes_xts_decrypt)
Ard Biesheuvel48606202017-02-03 14:49:37 +0000471
472 /*
473 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
474 * int blocks, u8 dg[], int enc_before, int enc_after)
475 */
476AES_ENTRY(aes_mac_update)
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200477 frame_push 6
478
479 mov x19, x0
480 mov x20, x1
481 mov x21, x2
482 mov x22, x3
483 mov x23, x4
484 mov x24, x6
485
486 ld1 {v0.16b}, [x23] /* get dg */
Ard Biesheuvel48606202017-02-03 14:49:37 +0000487 enc_prepare w2, x1, x7
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000488 cbz w5, .Lmacloop4x
Ard Biesheuvel48606202017-02-03 14:49:37 +0000489
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000490 encrypt_block v0, w2, x1, x7, w8
491
492.Lmacloop4x:
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200493 subs w22, w22, #4
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000494 bmi .Lmac1x
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200495 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000496 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200497 encrypt_block v0, w21, x20, x7, w8
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000498 eor v0.16b, v0.16b, v2.16b
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200499 encrypt_block v0, w21, x20, x7, w8
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000500 eor v0.16b, v0.16b, v3.16b
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200501 encrypt_block v0, w21, x20, x7, w8
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000502 eor v0.16b, v0.16b, v4.16b
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200503 cmp w22, wzr
504 csinv x5, x24, xzr, eq
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000505 cbz w5, .Lmacout
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200506 encrypt_block v0, w21, x20, x7, w8
507 st1 {v0.16b}, [x23] /* return dg */
508 cond_yield_neon .Lmacrestart
Ard Biesheuvel870c1632018-03-10 15:21:53 +0000509 b .Lmacloop4x
510.Lmac1x:
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200511 add w22, w22, #4
Ard Biesheuvel48606202017-02-03 14:49:37 +0000512.Lmacloop:
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200513 cbz w22, .Lmacout
514 ld1 {v1.16b}, [x19], #16 /* get next pt block */
Ard Biesheuvel48606202017-02-03 14:49:37 +0000515 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
516
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200517 subs w22, w22, #1
518 csinv x5, x24, xzr, eq
Ard Biesheuvel48606202017-02-03 14:49:37 +0000519 cbz w5, .Lmacout
520
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200521.Lmacenc:
522 encrypt_block v0, w21, x20, x7, w8
Ard Biesheuvel48606202017-02-03 14:49:37 +0000523 b .Lmacloop
524
525.Lmacout:
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200526 st1 {v0.16b}, [x23] /* return dg */
527 frame_pop
Ard Biesheuvel48606202017-02-03 14:49:37 +0000528 ret
Ard Biesheuvel0c8f8382018-04-30 18:18:24 +0200529
530.Lmacrestart:
531 ld1 {v0.16b}, [x23] /* get dg */
532 enc_prepare w21, x20, x0
533 b .Lmacloop4x
Ard Biesheuvel48606202017-02-03 14:49:37 +0000534AES_ENDPROC(aes_mac_update)