Thomas Gleixner | 1a59d1b8 | 2019-05-27 08:55:05 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 2 | /* |
| 3 | * Camellia Cipher Algorithm (x86_64) |
| 4 | * |
| 5 | * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 6 | */ |
| 7 | |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 8 | #include <linux/linkage.h> |
| 9 | |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 10 | .file "camellia-x86_64-asm_64.S" |
| 11 | .text |
| 12 | |
| 13 | .extern camellia_sp10011110; |
| 14 | .extern camellia_sp22000222; |
| 15 | .extern camellia_sp03303033; |
| 16 | .extern camellia_sp00444404; |
| 17 | .extern camellia_sp02220222; |
| 18 | .extern camellia_sp30333033; |
| 19 | .extern camellia_sp44044404; |
| 20 | .extern camellia_sp11101110; |
| 21 | |
| 22 | #define sp10011110 camellia_sp10011110 |
| 23 | #define sp22000222 camellia_sp22000222 |
| 24 | #define sp03303033 camellia_sp03303033 |
| 25 | #define sp00444404 camellia_sp00444404 |
| 26 | #define sp02220222 camellia_sp02220222 |
| 27 | #define sp30333033 camellia_sp30333033 |
| 28 | #define sp44044404 camellia_sp44044404 |
| 29 | #define sp11101110 camellia_sp11101110 |
| 30 | |
| 31 | #define CAMELLIA_TABLE_BYTE_LEN 272 |
| 32 | |
| 33 | /* struct camellia_ctx: */ |
| 34 | #define key_table 0 |
| 35 | #define key_length CAMELLIA_TABLE_BYTE_LEN |
| 36 | |
| 37 | /* register macros */ |
| 38 | #define CTX %rdi |
| 39 | #define RIO %rsi |
| 40 | #define RIOd %esi |
| 41 | |
| 42 | #define RAB0 %rax |
| 43 | #define RCD0 %rcx |
| 44 | #define RAB1 %rbx |
| 45 | #define RCD1 %rdx |
| 46 | |
| 47 | #define RAB0d %eax |
| 48 | #define RCD0d %ecx |
| 49 | #define RAB1d %ebx |
| 50 | #define RCD1d %edx |
| 51 | |
| 52 | #define RAB0bl %al |
| 53 | #define RCD0bl %cl |
| 54 | #define RAB1bl %bl |
| 55 | #define RCD1bl %dl |
| 56 | |
| 57 | #define RAB0bh %ah |
| 58 | #define RCD0bh %ch |
| 59 | #define RAB1bh %bh |
| 60 | #define RCD1bh %dh |
| 61 | |
| 62 | #define RT0 %rsi |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 63 | #define RT1 %r12 |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 64 | #define RT2 %r8 |
| 65 | |
| 66 | #define RT0d %esi |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 67 | #define RT1d %r12d |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 68 | #define RT2d %r8d |
| 69 | |
| 70 | #define RT2bl %r8b |
| 71 | |
| 72 | #define RXOR %r9 |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 73 | #define RR12 %r10 |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 74 | #define RDST %r11 |
| 75 | |
| 76 | #define RXORd %r9d |
| 77 | #define RXORbl %r9b |
| 78 | |
| 79 | #define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \ |
| 80 | movzbl ab ## bl, tmp2 ## d; \ |
| 81 | movzbl ab ## bh, tmp1 ## d; \ |
| 82 | rorq $16, ab; \ |
| 83 | xorq T0(, tmp2, 8), dst; \ |
| 84 | xorq T1(, tmp1, 8), dst; |
| 85 | |
| 86 | /********************************************************************** |
| 87 | 1-way camellia |
| 88 | **********************************************************************/ |
| 89 | #define roundsm(ab, subkey, cd) \ |
| 90 | movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ |
| 91 | \ |
| 92 | xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ |
| 93 | xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ |
| 94 | xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ |
| 95 | xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ |
| 96 | \ |
| 97 | xorq RT2, cd ## 0; |
| 98 | |
| 99 | #define fls(l, r, kl, kr) \ |
| 100 | movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ |
| 101 | andl l ## 0d, RT0d; \ |
| 102 | roll $1, RT0d; \ |
| 103 | shlq $32, RT0; \ |
| 104 | xorq RT0, l ## 0; \ |
| 105 | movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ |
| 106 | orq r ## 0, RT1; \ |
| 107 | shrq $32, RT1; \ |
| 108 | xorq RT1, r ## 0; \ |
| 109 | \ |
| 110 | movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \ |
| 111 | orq l ## 0, RT2; \ |
| 112 | shrq $32, RT2; \ |
| 113 | xorq RT2, l ## 0; \ |
| 114 | movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \ |
| 115 | andl r ## 0d, RT0d; \ |
| 116 | roll $1, RT0d; \ |
| 117 | shlq $32, RT0; \ |
| 118 | xorq RT0, r ## 0; |
| 119 | |
| 120 | #define enc_rounds(i) \ |
| 121 | roundsm(RAB, i + 2, RCD); \ |
| 122 | roundsm(RCD, i + 3, RAB); \ |
| 123 | roundsm(RAB, i + 4, RCD); \ |
| 124 | roundsm(RCD, i + 5, RAB); \ |
| 125 | roundsm(RAB, i + 6, RCD); \ |
| 126 | roundsm(RCD, i + 7, RAB); |
| 127 | |
| 128 | #define enc_fls(i) \ |
| 129 | fls(RAB, RCD, i + 0, i + 1); |
| 130 | |
| 131 | #define enc_inpack() \ |
| 132 | movq (RIO), RAB0; \ |
| 133 | bswapq RAB0; \ |
| 134 | rolq $32, RAB0; \ |
| 135 | movq 4*2(RIO), RCD0; \ |
| 136 | bswapq RCD0; \ |
| 137 | rorq $32, RCD0; \ |
| 138 | xorq key_table(CTX), RAB0; |
| 139 | |
| 140 | #define enc_outunpack(op, max) \ |
| 141 | xorq key_table(CTX, max, 8), RCD0; \ |
| 142 | rorq $32, RCD0; \ |
| 143 | bswapq RCD0; \ |
| 144 | op ## q RCD0, (RIO); \ |
| 145 | rolq $32, RAB0; \ |
| 146 | bswapq RAB0; \ |
| 147 | op ## q RAB0, 4*2(RIO); |
| 148 | |
| 149 | #define dec_rounds(i) \ |
| 150 | roundsm(RAB, i + 7, RCD); \ |
| 151 | roundsm(RCD, i + 6, RAB); \ |
| 152 | roundsm(RAB, i + 5, RCD); \ |
| 153 | roundsm(RCD, i + 4, RAB); \ |
| 154 | roundsm(RAB, i + 3, RCD); \ |
| 155 | roundsm(RCD, i + 2, RAB); |
| 156 | |
| 157 | #define dec_fls(i) \ |
| 158 | fls(RAB, RCD, i + 1, i + 0); |
| 159 | |
| 160 | #define dec_inpack(max) \ |
| 161 | movq (RIO), RAB0; \ |
| 162 | bswapq RAB0; \ |
| 163 | rolq $32, RAB0; \ |
| 164 | movq 4*2(RIO), RCD0; \ |
| 165 | bswapq RCD0; \ |
| 166 | rorq $32, RCD0; \ |
| 167 | xorq key_table(CTX, max, 8), RAB0; |
| 168 | |
| 169 | #define dec_outunpack() \ |
| 170 | xorq key_table(CTX), RCD0; \ |
| 171 | rorq $32, RCD0; \ |
| 172 | bswapq RCD0; \ |
| 173 | movq RCD0, (RIO); \ |
| 174 | rolq $32, RAB0; \ |
| 175 | bswapq RAB0; \ |
| 176 | movq RAB0, 4*2(RIO); |
| 177 | |
Jiri Slaby | 6dcc562 | 2019-10-11 13:51:04 +0200 | [diff] [blame] | 178 | SYM_FUNC_START(__camellia_enc_blk) |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 179 | /* input: |
| 180 | * %rdi: ctx, CTX |
| 181 | * %rsi: dst |
| 182 | * %rdx: src |
| 183 | * %rcx: bool xor |
| 184 | */ |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 185 | movq %r12, RR12; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 186 | |
| 187 | movq %rcx, RXOR; |
| 188 | movq %rsi, RDST; |
| 189 | movq %rdx, RIO; |
| 190 | |
| 191 | enc_inpack(); |
| 192 | |
| 193 | enc_rounds(0); |
| 194 | enc_fls(8); |
| 195 | enc_rounds(8); |
| 196 | enc_fls(16); |
| 197 | enc_rounds(16); |
| 198 | movl $24, RT1d; /* max */ |
| 199 | |
| 200 | cmpb $16, key_length(CTX); |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 201 | je .L__enc_done; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 202 | |
| 203 | enc_fls(24); |
| 204 | enc_rounds(24); |
| 205 | movl $32, RT1d; /* max */ |
| 206 | |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 207 | .L__enc_done: |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 208 | testb RXORbl, RXORbl; |
| 209 | movq RDST, RIO; |
| 210 | |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 211 | jnz .L__enc_xor; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 212 | |
| 213 | enc_outunpack(mov, RT1); |
| 214 | |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 215 | movq RR12, %r12; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 216 | ret; |
| 217 | |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 218 | .L__enc_xor: |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 219 | enc_outunpack(xor, RT1); |
| 220 | |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 221 | movq RR12, %r12; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 222 | ret; |
Jiri Slaby | 6dcc562 | 2019-10-11 13:51:04 +0200 | [diff] [blame] | 223 | SYM_FUNC_END(__camellia_enc_blk) |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 224 | |
Jiri Slaby | 6dcc562 | 2019-10-11 13:51:04 +0200 | [diff] [blame] | 225 | SYM_FUNC_START(camellia_dec_blk) |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 226 | /* input: |
| 227 | * %rdi: ctx, CTX |
| 228 | * %rsi: dst |
| 229 | * %rdx: src |
| 230 | */ |
| 231 | cmpl $16, key_length(CTX); |
| 232 | movl $32, RT2d; |
| 233 | movl $24, RXORd; |
| 234 | cmovel RXORd, RT2d; /* max */ |
| 235 | |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 236 | movq %r12, RR12; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 237 | movq %rsi, RDST; |
| 238 | movq %rdx, RIO; |
| 239 | |
| 240 | dec_inpack(RT2); |
| 241 | |
| 242 | cmpb $24, RT2bl; |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 243 | je .L__dec_rounds16; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 244 | |
| 245 | dec_rounds(24); |
| 246 | dec_fls(24); |
| 247 | |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 248 | .L__dec_rounds16: |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 249 | dec_rounds(16); |
| 250 | dec_fls(16); |
| 251 | dec_rounds(8); |
| 252 | dec_fls(8); |
| 253 | dec_rounds(0); |
| 254 | |
| 255 | movq RDST, RIO; |
| 256 | |
| 257 | dec_outunpack(); |
| 258 | |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 259 | movq RR12, %r12; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 260 | ret; |
Jiri Slaby | 6dcc562 | 2019-10-11 13:51:04 +0200 | [diff] [blame] | 261 | SYM_FUNC_END(camellia_dec_blk) |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 262 | |
| 263 | /********************************************************************** |
| 264 | 2-way camellia |
| 265 | **********************************************************************/ |
| 266 | #define roundsm2(ab, subkey, cd) \ |
| 267 | movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ |
| 268 | xorq RT2, cd ## 1; \ |
| 269 | \ |
| 270 | xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ |
| 271 | xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ |
| 272 | xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ |
| 273 | xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ |
| 274 | \ |
| 275 | xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \ |
| 276 | xorq RT2, cd ## 0; \ |
| 277 | xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \ |
| 278 | xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \ |
| 279 | xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1); |
| 280 | |
| 281 | #define fls2(l, r, kl, kr) \ |
| 282 | movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ |
| 283 | andl l ## 0d, RT0d; \ |
| 284 | roll $1, RT0d; \ |
| 285 | shlq $32, RT0; \ |
| 286 | xorq RT0, l ## 0; \ |
| 287 | movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ |
| 288 | orq r ## 0, RT1; \ |
| 289 | shrq $32, RT1; \ |
| 290 | xorq RT1, r ## 0; \ |
| 291 | \ |
| 292 | movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \ |
| 293 | andl l ## 1d, RT2d; \ |
| 294 | roll $1, RT2d; \ |
| 295 | shlq $32, RT2; \ |
| 296 | xorq RT2, l ## 1; \ |
| 297 | movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \ |
| 298 | orq r ## 1, RT0; \ |
| 299 | shrq $32, RT0; \ |
| 300 | xorq RT0, r ## 1; \ |
| 301 | \ |
| 302 | movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \ |
| 303 | orq l ## 0, RT1; \ |
| 304 | shrq $32, RT1; \ |
| 305 | xorq RT1, l ## 0; \ |
| 306 | movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \ |
| 307 | andl r ## 0d, RT2d; \ |
| 308 | roll $1, RT2d; \ |
| 309 | shlq $32, RT2; \ |
| 310 | xorq RT2, r ## 0; \ |
| 311 | \ |
| 312 | movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \ |
| 313 | orq l ## 1, RT0; \ |
| 314 | shrq $32, RT0; \ |
| 315 | xorq RT0, l ## 1; \ |
| 316 | movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \ |
| 317 | andl r ## 1d, RT1d; \ |
| 318 | roll $1, RT1d; \ |
| 319 | shlq $32, RT1; \ |
| 320 | xorq RT1, r ## 1; |
| 321 | |
| 322 | #define enc_rounds2(i) \ |
| 323 | roundsm2(RAB, i + 2, RCD); \ |
| 324 | roundsm2(RCD, i + 3, RAB); \ |
| 325 | roundsm2(RAB, i + 4, RCD); \ |
| 326 | roundsm2(RCD, i + 5, RAB); \ |
| 327 | roundsm2(RAB, i + 6, RCD); \ |
| 328 | roundsm2(RCD, i + 7, RAB); |
| 329 | |
| 330 | #define enc_fls2(i) \ |
| 331 | fls2(RAB, RCD, i + 0, i + 1); |
| 332 | |
| 333 | #define enc_inpack2() \ |
| 334 | movq (RIO), RAB0; \ |
| 335 | bswapq RAB0; \ |
| 336 | rorq $32, RAB0; \ |
| 337 | movq 4*2(RIO), RCD0; \ |
| 338 | bswapq RCD0; \ |
| 339 | rolq $32, RCD0; \ |
| 340 | xorq key_table(CTX), RAB0; \ |
| 341 | \ |
| 342 | movq 8*2(RIO), RAB1; \ |
| 343 | bswapq RAB1; \ |
| 344 | rorq $32, RAB1; \ |
| 345 | movq 12*2(RIO), RCD1; \ |
| 346 | bswapq RCD1; \ |
| 347 | rolq $32, RCD1; \ |
| 348 | xorq key_table(CTX), RAB1; |
| 349 | |
| 350 | #define enc_outunpack2(op, max) \ |
| 351 | xorq key_table(CTX, max, 8), RCD0; \ |
| 352 | rolq $32, RCD0; \ |
| 353 | bswapq RCD0; \ |
| 354 | op ## q RCD0, (RIO); \ |
| 355 | rorq $32, RAB0; \ |
| 356 | bswapq RAB0; \ |
| 357 | op ## q RAB0, 4*2(RIO); \ |
| 358 | \ |
| 359 | xorq key_table(CTX, max, 8), RCD1; \ |
| 360 | rolq $32, RCD1; \ |
| 361 | bswapq RCD1; \ |
| 362 | op ## q RCD1, 8*2(RIO); \ |
| 363 | rorq $32, RAB1; \ |
| 364 | bswapq RAB1; \ |
| 365 | op ## q RAB1, 12*2(RIO); |
| 366 | |
| 367 | #define dec_rounds2(i) \ |
| 368 | roundsm2(RAB, i + 7, RCD); \ |
| 369 | roundsm2(RCD, i + 6, RAB); \ |
| 370 | roundsm2(RAB, i + 5, RCD); \ |
| 371 | roundsm2(RCD, i + 4, RAB); \ |
| 372 | roundsm2(RAB, i + 3, RCD); \ |
| 373 | roundsm2(RCD, i + 2, RAB); |
| 374 | |
| 375 | #define dec_fls2(i) \ |
| 376 | fls2(RAB, RCD, i + 1, i + 0); |
| 377 | |
| 378 | #define dec_inpack2(max) \ |
| 379 | movq (RIO), RAB0; \ |
| 380 | bswapq RAB0; \ |
| 381 | rorq $32, RAB0; \ |
| 382 | movq 4*2(RIO), RCD0; \ |
| 383 | bswapq RCD0; \ |
| 384 | rolq $32, RCD0; \ |
| 385 | xorq key_table(CTX, max, 8), RAB0; \ |
| 386 | \ |
| 387 | movq 8*2(RIO), RAB1; \ |
| 388 | bswapq RAB1; \ |
| 389 | rorq $32, RAB1; \ |
| 390 | movq 12*2(RIO), RCD1; \ |
| 391 | bswapq RCD1; \ |
| 392 | rolq $32, RCD1; \ |
| 393 | xorq key_table(CTX, max, 8), RAB1; |
| 394 | |
| 395 | #define dec_outunpack2() \ |
| 396 | xorq key_table(CTX), RCD0; \ |
| 397 | rolq $32, RCD0; \ |
| 398 | bswapq RCD0; \ |
| 399 | movq RCD0, (RIO); \ |
| 400 | rorq $32, RAB0; \ |
| 401 | bswapq RAB0; \ |
| 402 | movq RAB0, 4*2(RIO); \ |
| 403 | \ |
| 404 | xorq key_table(CTX), RCD1; \ |
| 405 | rolq $32, RCD1; \ |
| 406 | bswapq RCD1; \ |
| 407 | movq RCD1, 8*2(RIO); \ |
| 408 | rorq $32, RAB1; \ |
| 409 | bswapq RAB1; \ |
| 410 | movq RAB1, 12*2(RIO); |
| 411 | |
Jiri Slaby | 6dcc562 | 2019-10-11 13:51:04 +0200 | [diff] [blame] | 412 | SYM_FUNC_START(__camellia_enc_blk_2way) |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 413 | /* input: |
| 414 | * %rdi: ctx, CTX |
| 415 | * %rsi: dst |
| 416 | * %rdx: src |
| 417 | * %rcx: bool xor |
| 418 | */ |
| 419 | pushq %rbx; |
| 420 | |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 421 | movq %r12, RR12; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 422 | movq %rcx, RXOR; |
| 423 | movq %rsi, RDST; |
| 424 | movq %rdx, RIO; |
| 425 | |
| 426 | enc_inpack2(); |
| 427 | |
| 428 | enc_rounds2(0); |
| 429 | enc_fls2(8); |
| 430 | enc_rounds2(8); |
| 431 | enc_fls2(16); |
| 432 | enc_rounds2(16); |
| 433 | movl $24, RT2d; /* max */ |
| 434 | |
| 435 | cmpb $16, key_length(CTX); |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 436 | je .L__enc2_done; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 437 | |
| 438 | enc_fls2(24); |
| 439 | enc_rounds2(24); |
| 440 | movl $32, RT2d; /* max */ |
| 441 | |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 442 | .L__enc2_done: |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 443 | test RXORbl, RXORbl; |
| 444 | movq RDST, RIO; |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 445 | jnz .L__enc2_xor; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 446 | |
| 447 | enc_outunpack2(mov, RT2); |
| 448 | |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 449 | movq RR12, %r12; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 450 | popq %rbx; |
| 451 | ret; |
| 452 | |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 453 | .L__enc2_xor: |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 454 | enc_outunpack2(xor, RT2); |
| 455 | |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 456 | movq RR12, %r12; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 457 | popq %rbx; |
| 458 | ret; |
Jiri Slaby | 6dcc562 | 2019-10-11 13:51:04 +0200 | [diff] [blame] | 459 | SYM_FUNC_END(__camellia_enc_blk_2way) |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 460 | |
Jiri Slaby | 6dcc562 | 2019-10-11 13:51:04 +0200 | [diff] [blame] | 461 | SYM_FUNC_START(camellia_dec_blk_2way) |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 462 | /* input: |
| 463 | * %rdi: ctx, CTX |
| 464 | * %rsi: dst |
| 465 | * %rdx: src |
| 466 | */ |
| 467 | cmpl $16, key_length(CTX); |
| 468 | movl $32, RT2d; |
| 469 | movl $24, RXORd; |
| 470 | cmovel RXORd, RT2d; /* max */ |
| 471 | |
| 472 | movq %rbx, RXOR; |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 473 | movq %r12, RR12; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 474 | movq %rsi, RDST; |
| 475 | movq %rdx, RIO; |
| 476 | |
| 477 | dec_inpack2(RT2); |
| 478 | |
| 479 | cmpb $24, RT2bl; |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 480 | je .L__dec2_rounds16; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 481 | |
| 482 | dec_rounds2(24); |
| 483 | dec_fls2(24); |
| 484 | |
Jussi Kivilinna | 5999068 | 2013-01-19 13:39:05 +0200 | [diff] [blame] | 485 | .L__dec2_rounds16: |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 486 | dec_rounds2(16); |
| 487 | dec_fls2(16); |
| 488 | dec_rounds2(8); |
| 489 | dec_fls2(8); |
| 490 | dec_rounds2(0); |
| 491 | |
| 492 | movq RDST, RIO; |
| 493 | |
| 494 | dec_outunpack2(); |
| 495 | |
Josh Poimboeuf | b46c9d7 | 2017-09-18 14:42:01 -0500 | [diff] [blame] | 496 | movq RR12, %r12; |
Jussi Kivilinna | 0b95ec5 | 2012-03-05 20:26:47 +0200 | [diff] [blame] | 497 | movq RXOR, %rbx; |
| 498 | ret; |
Jiri Slaby | 6dcc562 | 2019-10-11 13:51:04 +0200 | [diff] [blame] | 499 | SYM_FUNC_END(camellia_dec_blk_2way) |