blob: 1372e64088507a45a50fb67d72362164a99c3a92 [file] [log] [blame]
Thomas Gleixner1a59d1b82019-05-27 08:55:05 +02001/* SPDX-License-Identifier: GPL-2.0-or-later */
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +02002/*
3 * Camellia Cipher Algorithm (x86_64)
4 *
5 * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +02006 */
7
Jussi Kivilinna59990682013-01-19 13:39:05 +02008#include <linux/linkage.h>
9
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +020010.file "camellia-x86_64-asm_64.S"
11.text
12
13.extern camellia_sp10011110;
14.extern camellia_sp22000222;
15.extern camellia_sp03303033;
16.extern camellia_sp00444404;
17.extern camellia_sp02220222;
18.extern camellia_sp30333033;
19.extern camellia_sp44044404;
20.extern camellia_sp11101110;
21
22#define sp10011110 camellia_sp10011110
23#define sp22000222 camellia_sp22000222
24#define sp03303033 camellia_sp03303033
25#define sp00444404 camellia_sp00444404
26#define sp02220222 camellia_sp02220222
27#define sp30333033 camellia_sp30333033
28#define sp44044404 camellia_sp44044404
29#define sp11101110 camellia_sp11101110
30
31#define CAMELLIA_TABLE_BYTE_LEN 272
32
33/* struct camellia_ctx: */
34#define key_table 0
35#define key_length CAMELLIA_TABLE_BYTE_LEN
36
37/* register macros */
38#define CTX %rdi
39#define RIO %rsi
40#define RIOd %esi
41
42#define RAB0 %rax
43#define RCD0 %rcx
44#define RAB1 %rbx
45#define RCD1 %rdx
46
47#define RAB0d %eax
48#define RCD0d %ecx
49#define RAB1d %ebx
50#define RCD1d %edx
51
52#define RAB0bl %al
53#define RCD0bl %cl
54#define RAB1bl %bl
55#define RCD1bl %dl
56
57#define RAB0bh %ah
58#define RCD0bh %ch
59#define RAB1bh %bh
60#define RCD1bh %dh
61
62#define RT0 %rsi
Josh Poimboeufb46c9d72017-09-18 14:42:01 -050063#define RT1 %r12
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +020064#define RT2 %r8
65
66#define RT0d %esi
Josh Poimboeufb46c9d72017-09-18 14:42:01 -050067#define RT1d %r12d
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +020068#define RT2d %r8d
69
70#define RT2bl %r8b
71
72#define RXOR %r9
Josh Poimboeufb46c9d72017-09-18 14:42:01 -050073#define RR12 %r10
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +020074#define RDST %r11
75
76#define RXORd %r9d
77#define RXORbl %r9b
78
79#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
80 movzbl ab ## bl, tmp2 ## d; \
81 movzbl ab ## bh, tmp1 ## d; \
82 rorq $16, ab; \
83 xorq T0(, tmp2, 8), dst; \
84 xorq T1(, tmp1, 8), dst;
85
86/**********************************************************************
87 1-way camellia
88 **********************************************************************/
89#define roundsm(ab, subkey, cd) \
90 movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
91 \
92 xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
93 xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
94 xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
95 xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
96 \
97 xorq RT2, cd ## 0;
98
99#define fls(l, r, kl, kr) \
100 movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
101 andl l ## 0d, RT0d; \
102 roll $1, RT0d; \
103 shlq $32, RT0; \
104 xorq RT0, l ## 0; \
105 movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
106 orq r ## 0, RT1; \
107 shrq $32, RT1; \
108 xorq RT1, r ## 0; \
109 \
110 movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \
111 orq l ## 0, RT2; \
112 shrq $32, RT2; \
113 xorq RT2, l ## 0; \
114 movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \
115 andl r ## 0d, RT0d; \
116 roll $1, RT0d; \
117 shlq $32, RT0; \
118 xorq RT0, r ## 0;
119
120#define enc_rounds(i) \
121 roundsm(RAB, i + 2, RCD); \
122 roundsm(RCD, i + 3, RAB); \
123 roundsm(RAB, i + 4, RCD); \
124 roundsm(RCD, i + 5, RAB); \
125 roundsm(RAB, i + 6, RCD); \
126 roundsm(RCD, i + 7, RAB);
127
128#define enc_fls(i) \
129 fls(RAB, RCD, i + 0, i + 1);
130
131#define enc_inpack() \
132 movq (RIO), RAB0; \
133 bswapq RAB0; \
134 rolq $32, RAB0; \
135 movq 4*2(RIO), RCD0; \
136 bswapq RCD0; \
137 rorq $32, RCD0; \
138 xorq key_table(CTX), RAB0;
139
140#define enc_outunpack(op, max) \
141 xorq key_table(CTX, max, 8), RCD0; \
142 rorq $32, RCD0; \
143 bswapq RCD0; \
144 op ## q RCD0, (RIO); \
145 rolq $32, RAB0; \
146 bswapq RAB0; \
147 op ## q RAB0, 4*2(RIO);
148
149#define dec_rounds(i) \
150 roundsm(RAB, i + 7, RCD); \
151 roundsm(RCD, i + 6, RAB); \
152 roundsm(RAB, i + 5, RCD); \
153 roundsm(RCD, i + 4, RAB); \
154 roundsm(RAB, i + 3, RCD); \
155 roundsm(RCD, i + 2, RAB);
156
157#define dec_fls(i) \
158 fls(RAB, RCD, i + 1, i + 0);
159
160#define dec_inpack(max) \
161 movq (RIO), RAB0; \
162 bswapq RAB0; \
163 rolq $32, RAB0; \
164 movq 4*2(RIO), RCD0; \
165 bswapq RCD0; \
166 rorq $32, RCD0; \
167 xorq key_table(CTX, max, 8), RAB0;
168
169#define dec_outunpack() \
170 xorq key_table(CTX), RCD0; \
171 rorq $32, RCD0; \
172 bswapq RCD0; \
173 movq RCD0, (RIO); \
174 rolq $32, RAB0; \
175 bswapq RAB0; \
176 movq RAB0, 4*2(RIO);
177
Jiri Slaby6dcc5622019-10-11 13:51:04 +0200178SYM_FUNC_START(__camellia_enc_blk)
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200179 /* input:
180 * %rdi: ctx, CTX
181 * %rsi: dst
182 * %rdx: src
183 * %rcx: bool xor
184 */
Josh Poimboeufb46c9d72017-09-18 14:42:01 -0500185 movq %r12, RR12;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200186
187 movq %rcx, RXOR;
188 movq %rsi, RDST;
189 movq %rdx, RIO;
190
191 enc_inpack();
192
193 enc_rounds(0);
194 enc_fls(8);
195 enc_rounds(8);
196 enc_fls(16);
197 enc_rounds(16);
198 movl $24, RT1d; /* max */
199
200 cmpb $16, key_length(CTX);
Jussi Kivilinna59990682013-01-19 13:39:05 +0200201 je .L__enc_done;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200202
203 enc_fls(24);
204 enc_rounds(24);
205 movl $32, RT1d; /* max */
206
Jussi Kivilinna59990682013-01-19 13:39:05 +0200207.L__enc_done:
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200208 testb RXORbl, RXORbl;
209 movq RDST, RIO;
210
Jussi Kivilinna59990682013-01-19 13:39:05 +0200211 jnz .L__enc_xor;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200212
213 enc_outunpack(mov, RT1);
214
Josh Poimboeufb46c9d72017-09-18 14:42:01 -0500215 movq RR12, %r12;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200216 ret;
217
Jussi Kivilinna59990682013-01-19 13:39:05 +0200218.L__enc_xor:
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200219 enc_outunpack(xor, RT1);
220
Josh Poimboeufb46c9d72017-09-18 14:42:01 -0500221 movq RR12, %r12;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200222 ret;
Jiri Slaby6dcc5622019-10-11 13:51:04 +0200223SYM_FUNC_END(__camellia_enc_blk)
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200224
Jiri Slaby6dcc5622019-10-11 13:51:04 +0200225SYM_FUNC_START(camellia_dec_blk)
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200226 /* input:
227 * %rdi: ctx, CTX
228 * %rsi: dst
229 * %rdx: src
230 */
231 cmpl $16, key_length(CTX);
232 movl $32, RT2d;
233 movl $24, RXORd;
234 cmovel RXORd, RT2d; /* max */
235
Josh Poimboeufb46c9d72017-09-18 14:42:01 -0500236 movq %r12, RR12;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200237 movq %rsi, RDST;
238 movq %rdx, RIO;
239
240 dec_inpack(RT2);
241
242 cmpb $24, RT2bl;
Jussi Kivilinna59990682013-01-19 13:39:05 +0200243 je .L__dec_rounds16;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200244
245 dec_rounds(24);
246 dec_fls(24);
247
Jussi Kivilinna59990682013-01-19 13:39:05 +0200248.L__dec_rounds16:
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200249 dec_rounds(16);
250 dec_fls(16);
251 dec_rounds(8);
252 dec_fls(8);
253 dec_rounds(0);
254
255 movq RDST, RIO;
256
257 dec_outunpack();
258
Josh Poimboeufb46c9d72017-09-18 14:42:01 -0500259 movq RR12, %r12;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200260 ret;
Jiri Slaby6dcc5622019-10-11 13:51:04 +0200261SYM_FUNC_END(camellia_dec_blk)
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200262
263/**********************************************************************
264 2-way camellia
265 **********************************************************************/
266#define roundsm2(ab, subkey, cd) \
267 movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
268 xorq RT2, cd ## 1; \
269 \
270 xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
271 xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
272 xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
273 xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
274 \
275 xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
276 xorq RT2, cd ## 0; \
277 xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
278 xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
279 xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
280
281#define fls2(l, r, kl, kr) \
282 movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
283 andl l ## 0d, RT0d; \
284 roll $1, RT0d; \
285 shlq $32, RT0; \
286 xorq RT0, l ## 0; \
287 movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
288 orq r ## 0, RT1; \
289 shrq $32, RT1; \
290 xorq RT1, r ## 0; \
291 \
292 movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \
293 andl l ## 1d, RT2d; \
294 roll $1, RT2d; \
295 shlq $32, RT2; \
296 xorq RT2, l ## 1; \
297 movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \
298 orq r ## 1, RT0; \
299 shrq $32, RT0; \
300 xorq RT0, r ## 1; \
301 \
302 movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \
303 orq l ## 0, RT1; \
304 shrq $32, RT1; \
305 xorq RT1, l ## 0; \
306 movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \
307 andl r ## 0d, RT2d; \
308 roll $1, RT2d; \
309 shlq $32, RT2; \
310 xorq RT2, r ## 0; \
311 \
312 movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \
313 orq l ## 1, RT0; \
314 shrq $32, RT0; \
315 xorq RT0, l ## 1; \
316 movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \
317 andl r ## 1d, RT1d; \
318 roll $1, RT1d; \
319 shlq $32, RT1; \
320 xorq RT1, r ## 1;
321
322#define enc_rounds2(i) \
323 roundsm2(RAB, i + 2, RCD); \
324 roundsm2(RCD, i + 3, RAB); \
325 roundsm2(RAB, i + 4, RCD); \
326 roundsm2(RCD, i + 5, RAB); \
327 roundsm2(RAB, i + 6, RCD); \
328 roundsm2(RCD, i + 7, RAB);
329
330#define enc_fls2(i) \
331 fls2(RAB, RCD, i + 0, i + 1);
332
333#define enc_inpack2() \
334 movq (RIO), RAB0; \
335 bswapq RAB0; \
336 rorq $32, RAB0; \
337 movq 4*2(RIO), RCD0; \
338 bswapq RCD0; \
339 rolq $32, RCD0; \
340 xorq key_table(CTX), RAB0; \
341 \
342 movq 8*2(RIO), RAB1; \
343 bswapq RAB1; \
344 rorq $32, RAB1; \
345 movq 12*2(RIO), RCD1; \
346 bswapq RCD1; \
347 rolq $32, RCD1; \
348 xorq key_table(CTX), RAB1;
349
350#define enc_outunpack2(op, max) \
351 xorq key_table(CTX, max, 8), RCD0; \
352 rolq $32, RCD0; \
353 bswapq RCD0; \
354 op ## q RCD0, (RIO); \
355 rorq $32, RAB0; \
356 bswapq RAB0; \
357 op ## q RAB0, 4*2(RIO); \
358 \
359 xorq key_table(CTX, max, 8), RCD1; \
360 rolq $32, RCD1; \
361 bswapq RCD1; \
362 op ## q RCD1, 8*2(RIO); \
363 rorq $32, RAB1; \
364 bswapq RAB1; \
365 op ## q RAB1, 12*2(RIO);
366
367#define dec_rounds2(i) \
368 roundsm2(RAB, i + 7, RCD); \
369 roundsm2(RCD, i + 6, RAB); \
370 roundsm2(RAB, i + 5, RCD); \
371 roundsm2(RCD, i + 4, RAB); \
372 roundsm2(RAB, i + 3, RCD); \
373 roundsm2(RCD, i + 2, RAB);
374
375#define dec_fls2(i) \
376 fls2(RAB, RCD, i + 1, i + 0);
377
378#define dec_inpack2(max) \
379 movq (RIO), RAB0; \
380 bswapq RAB0; \
381 rorq $32, RAB0; \
382 movq 4*2(RIO), RCD0; \
383 bswapq RCD0; \
384 rolq $32, RCD0; \
385 xorq key_table(CTX, max, 8), RAB0; \
386 \
387 movq 8*2(RIO), RAB1; \
388 bswapq RAB1; \
389 rorq $32, RAB1; \
390 movq 12*2(RIO), RCD1; \
391 bswapq RCD1; \
392 rolq $32, RCD1; \
393 xorq key_table(CTX, max, 8), RAB1;
394
395#define dec_outunpack2() \
396 xorq key_table(CTX), RCD0; \
397 rolq $32, RCD0; \
398 bswapq RCD0; \
399 movq RCD0, (RIO); \
400 rorq $32, RAB0; \
401 bswapq RAB0; \
402 movq RAB0, 4*2(RIO); \
403 \
404 xorq key_table(CTX), RCD1; \
405 rolq $32, RCD1; \
406 bswapq RCD1; \
407 movq RCD1, 8*2(RIO); \
408 rorq $32, RAB1; \
409 bswapq RAB1; \
410 movq RAB1, 12*2(RIO);
411
Jiri Slaby6dcc5622019-10-11 13:51:04 +0200412SYM_FUNC_START(__camellia_enc_blk_2way)
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200413 /* input:
414 * %rdi: ctx, CTX
415 * %rsi: dst
416 * %rdx: src
417 * %rcx: bool xor
418 */
419 pushq %rbx;
420
Josh Poimboeufb46c9d72017-09-18 14:42:01 -0500421 movq %r12, RR12;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200422 movq %rcx, RXOR;
423 movq %rsi, RDST;
424 movq %rdx, RIO;
425
426 enc_inpack2();
427
428 enc_rounds2(0);
429 enc_fls2(8);
430 enc_rounds2(8);
431 enc_fls2(16);
432 enc_rounds2(16);
433 movl $24, RT2d; /* max */
434
435 cmpb $16, key_length(CTX);
Jussi Kivilinna59990682013-01-19 13:39:05 +0200436 je .L__enc2_done;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200437
438 enc_fls2(24);
439 enc_rounds2(24);
440 movl $32, RT2d; /* max */
441
Jussi Kivilinna59990682013-01-19 13:39:05 +0200442.L__enc2_done:
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200443 test RXORbl, RXORbl;
444 movq RDST, RIO;
Jussi Kivilinna59990682013-01-19 13:39:05 +0200445 jnz .L__enc2_xor;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200446
447 enc_outunpack2(mov, RT2);
448
Josh Poimboeufb46c9d72017-09-18 14:42:01 -0500449 movq RR12, %r12;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200450 popq %rbx;
451 ret;
452
Jussi Kivilinna59990682013-01-19 13:39:05 +0200453.L__enc2_xor:
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200454 enc_outunpack2(xor, RT2);
455
Josh Poimboeufb46c9d72017-09-18 14:42:01 -0500456 movq RR12, %r12;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200457 popq %rbx;
458 ret;
Jiri Slaby6dcc5622019-10-11 13:51:04 +0200459SYM_FUNC_END(__camellia_enc_blk_2way)
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200460
Jiri Slaby6dcc5622019-10-11 13:51:04 +0200461SYM_FUNC_START(camellia_dec_blk_2way)
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200462 /* input:
463 * %rdi: ctx, CTX
464 * %rsi: dst
465 * %rdx: src
466 */
467 cmpl $16, key_length(CTX);
468 movl $32, RT2d;
469 movl $24, RXORd;
470 cmovel RXORd, RT2d; /* max */
471
472 movq %rbx, RXOR;
Josh Poimboeufb46c9d72017-09-18 14:42:01 -0500473 movq %r12, RR12;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200474 movq %rsi, RDST;
475 movq %rdx, RIO;
476
477 dec_inpack2(RT2);
478
479 cmpb $24, RT2bl;
Jussi Kivilinna59990682013-01-19 13:39:05 +0200480 je .L__dec2_rounds16;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200481
482 dec_rounds2(24);
483 dec_fls2(24);
484
Jussi Kivilinna59990682013-01-19 13:39:05 +0200485.L__dec2_rounds16:
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200486 dec_rounds2(16);
487 dec_fls2(16);
488 dec_rounds2(8);
489 dec_fls2(8);
490 dec_rounds2(0);
491
492 movq RDST, RIO;
493
494 dec_outunpack2();
495
Josh Poimboeufb46c9d72017-09-18 14:42:01 -0500496 movq RR12, %r12;
Jussi Kivilinna0b95ec52012-03-05 20:26:47 +0200497 movq RXOR, %rbx;
498 ret;
Jiri Slaby6dcc5622019-10-11 13:51:04 +0200499SYM_FUNC_END(camellia_dec_blk_2way)