blob: 50a0a18f35da380a8b455ce3dc1f2a54b55fe2ae [file] [log] [blame]
Paulo Flabiano Smorigo11c6e162016-07-18 12:26:25 -03001#! /usr/bin/env perl
Adam Langleyc2e415f2018-05-22 12:35:11 -07002# SPDX-License-Identifier: GPL-2.0
Paulo Flabiano Smorigo11c6e162016-07-18 12:26:25 -03003
Adam Langleyc2e415f2018-05-22 12:35:11 -07004# This code is taken from CRYPTOGAMs[1] and is included here using the option
5# in the license to distribute the code under the GPL. Therefore this program
6# is free software; you can redistribute it and/or modify it under the terms of
7# the GNU General Public License version 2 as published by the Free Software
8# Foundation.
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02009#
Adam Langleyc2e415f2018-05-22 12:35:11 -070010# [1] https://www.openssl.org/~appro/cryptogams/
11
12# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13# All rights reserved.
14#
15# Redistribution and use in source and binary forms, with or without
16# modification, are permitted provided that the following conditions
17# are met:
18#
19# * Redistributions of source code must retain copyright notices,
20# this list of conditions and the following disclaimer.
21#
22# * Redistributions in binary form must reproduce the above
23# copyright notice, this list of conditions and the following
24# disclaimer in the documentation and/or other materials
25# provided with the distribution.
26#
27# * Neither the name of the CRYPTOGAMS nor the names of its
28# copyright holder and contributors may be used to endorse or
29# promote products derived from this software without specific
30# prior written permission.
31#
32# ALTERNATIVELY, provided that this notice is retained in full, this
33# product may be distributed under the terms of the GNU General Public
34# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35# those given above.
36#
37# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -020049# ====================================================================
50# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51# project. The module is, however, dual licensed under OpenSSL and
52# CRYPTOGAMS licenses depending on where you obtain it. For further
Alexander A. Klimovc8ed9fc2020-07-18 12:39:58 +020053# details see https://www.openssl.org/~appro/cryptogams/.
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -020054# ====================================================================
55#
56# This module implements support for AES instructions as per PowerISA
57# specification version 2.07, first implemented by POWER8 processor.
58# The module is endian-agnostic in sense that it supports both big-
59# and little-endian cases. Data alignment in parallelizable modes is
60# handled with VSX loads and stores, which implies MSR.VSX flag being
61# set. It should also be noted that ISA specification doesn't prohibit
62# alignment exceptions for these instructions on page boundaries.
63# Initially alignment was handled in pure AltiVec/VMX way [when data
64# is aligned programmatically, which in turn guarantees exception-
65# free execution], but it turned to hamper performance when vcipher
66# instructions are interleaved. It's reckoned that eventual
67# misalignment penalties at page boundaries are in average lower
68# than additional overhead in pure AltiVec approach.
Paulo Flabiano Smorigo11c6e162016-07-18 12:26:25 -030069#
70# May 2016
71#
72# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73# systems were measured.
74#
75######################################################################
76# Current large-block performance in cycles per byte processed with
77# 128-bit key (less is better).
78#
79# CBC en-/decrypt CTR XTS
80# POWER8[le] 3.96/0.72 0.74 1.1
81# POWER8[be] 3.75/0.65 0.66 1.0
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -020082
83$flavour = shift;
84
85if ($flavour =~ /64/) {
86 $SIZE_T =8;
87 $LRSAVE =2*$SIZE_T;
88 $STU ="stdu";
89 $POP ="ld";
90 $PUSH ="std";
91 $UCMP ="cmpld";
92 $SHL ="sldi";
93} elsif ($flavour =~ /32/) {
94 $SIZE_T =4;
95 $LRSAVE =$SIZE_T;
96 $STU ="stwu";
97 $POP ="lwz";
98 $PUSH ="stw";
99 $UCMP ="cmplw";
100 $SHL ="slwi";
101} else { die "nonsense $flavour"; }
102
103$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104
105$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108die "can't locate ppc-xlate.pl";
109
110open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
111
112$FRAME=8*$SIZE_T;
113$prefix="aes_p8";
114
115$sp="r1";
116$vrsave="r12";
117
118#########################################################################
119{{{ # Key setup procedures #
120my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
123
124$code.=<<___;
125.machine "any"
126
127.text
128
129.align 7
130rcon:
131.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
132.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
133.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
134.long 0,0,0,0 ?asis
135Lconsts:
136 mflr r0
137 bcl 20,31,\$+4
138 mflr $ptr #vvvvv "distance between . and rcon
139 addi $ptr,$ptr,-0x48
140 mtlr r0
141 blr
142 .long 0
143 .byte 0,12,0x14,0,0,0,0,0
144.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
145
146.globl .${prefix}_set_encrypt_key
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -0200147Lset_encrypt_key:
148 mflr r11
149 $PUSH r11,$LRSAVE($sp)
150
151 li $ptr,-1
152 ${UCMP}i $inp,0
153 beq- Lenc_key_abort # if ($inp==0) return -1;
154 ${UCMP}i $out,0
155 beq- Lenc_key_abort # if ($out==0) return -1;
156 li $ptr,-2
157 cmpwi $bits,128
158 blt- Lenc_key_abort
159 cmpwi $bits,256
160 bgt- Lenc_key_abort
161 andi. r0,$bits,0x3f
162 bne- Lenc_key_abort
163
164 lis r0,0xfff0
165 mfspr $vrsave,256
166 mtspr 256,r0
167
168 bl Lconsts
169 mtlr r11
170
171 neg r9,$inp
172 lvx $in0,0,$inp
173 addi $inp,$inp,15 # 15 is not typo
174 lvsr $key,0,r9 # borrow $key
175 li r8,0x20
176 cmpwi $bits,192
177 lvx $in1,0,$inp
178 le?vspltisb $mask,0x0f # borrow $mask
179 lvx $rcon,0,$ptr
180 le?vxor $key,$key,$mask # adjust for byte swap
181 lvx $mask,r8,$ptr
182 addi $ptr,$ptr,0x10
183 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
184 li $cnt,8
185 vxor $zero,$zero,$zero
186 mtctr $cnt
187
188 ?lvsr $outperm,0,$out
189 vspltisb $outmask,-1
190 lvx $outhead,0,$out
191 ?vperm $outmask,$zero,$outmask,$outperm
192
193 blt Loop128
194 addi $inp,$inp,8
195 beq L192
196 addi $inp,$inp,8
197 b L256
198
199.align 4
200Loop128:
201 vperm $key,$in0,$in0,$mask # rotate-n-splat
202 vsldoi $tmp,$zero,$in0,12 # >>32
203 vperm $outtail,$in0,$in0,$outperm # rotate
204 vsel $stage,$outhead,$outtail,$outmask
205 vmr $outhead,$outtail
206 vcipherlast $key,$key,$rcon
207 stvx $stage,0,$out
208 addi $out,$out,16
209
210 vxor $in0,$in0,$tmp
211 vsldoi $tmp,$zero,$tmp,12 # >>32
212 vxor $in0,$in0,$tmp
213 vsldoi $tmp,$zero,$tmp,12 # >>32
214 vxor $in0,$in0,$tmp
215 vadduwm $rcon,$rcon,$rcon
216 vxor $in0,$in0,$key
217 bdnz Loop128
218
219 lvx $rcon,0,$ptr # last two round keys
220
221 vperm $key,$in0,$in0,$mask # rotate-n-splat
222 vsldoi $tmp,$zero,$in0,12 # >>32
223 vperm $outtail,$in0,$in0,$outperm # rotate
224 vsel $stage,$outhead,$outtail,$outmask
225 vmr $outhead,$outtail
226 vcipherlast $key,$key,$rcon
227 stvx $stage,0,$out
228 addi $out,$out,16
229
230 vxor $in0,$in0,$tmp
231 vsldoi $tmp,$zero,$tmp,12 # >>32
232 vxor $in0,$in0,$tmp
233 vsldoi $tmp,$zero,$tmp,12 # >>32
234 vxor $in0,$in0,$tmp
235 vadduwm $rcon,$rcon,$rcon
236 vxor $in0,$in0,$key
237
238 vperm $key,$in0,$in0,$mask # rotate-n-splat
239 vsldoi $tmp,$zero,$in0,12 # >>32
240 vperm $outtail,$in0,$in0,$outperm # rotate
241 vsel $stage,$outhead,$outtail,$outmask
242 vmr $outhead,$outtail
243 vcipherlast $key,$key,$rcon
244 stvx $stage,0,$out
245 addi $out,$out,16
246
247 vxor $in0,$in0,$tmp
248 vsldoi $tmp,$zero,$tmp,12 # >>32
249 vxor $in0,$in0,$tmp
250 vsldoi $tmp,$zero,$tmp,12 # >>32
251 vxor $in0,$in0,$tmp
252 vxor $in0,$in0,$key
253 vperm $outtail,$in0,$in0,$outperm # rotate
254 vsel $stage,$outhead,$outtail,$outmask
255 vmr $outhead,$outtail
256 stvx $stage,0,$out
257
258 addi $inp,$out,15 # 15 is not typo
259 addi $out,$out,0x50
260
261 li $rounds,10
262 b Ldone
263
264.align 4
265L192:
266 lvx $tmp,0,$inp
267 li $cnt,4
268 vperm $outtail,$in0,$in0,$outperm # rotate
269 vsel $stage,$outhead,$outtail,$outmask
270 vmr $outhead,$outtail
271 stvx $stage,0,$out
272 addi $out,$out,16
273 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
274 vspltisb $key,8 # borrow $key
275 mtctr $cnt
276 vsububm $mask,$mask,$key # adjust the mask
277
278Loop192:
279 vperm $key,$in1,$in1,$mask # roate-n-splat
280 vsldoi $tmp,$zero,$in0,12 # >>32
281 vcipherlast $key,$key,$rcon
282
283 vxor $in0,$in0,$tmp
284 vsldoi $tmp,$zero,$tmp,12 # >>32
285 vxor $in0,$in0,$tmp
286 vsldoi $tmp,$zero,$tmp,12 # >>32
287 vxor $in0,$in0,$tmp
288
289 vsldoi $stage,$zero,$in1,8
290 vspltw $tmp,$in0,3
291 vxor $tmp,$tmp,$in1
292 vsldoi $in1,$zero,$in1,12 # >>32
293 vadduwm $rcon,$rcon,$rcon
294 vxor $in1,$in1,$tmp
295 vxor $in0,$in0,$key
296 vxor $in1,$in1,$key
297 vsldoi $stage,$stage,$in0,8
298
299 vperm $key,$in1,$in1,$mask # rotate-n-splat
300 vsldoi $tmp,$zero,$in0,12 # >>32
301 vperm $outtail,$stage,$stage,$outperm # rotate
302 vsel $stage,$outhead,$outtail,$outmask
303 vmr $outhead,$outtail
304 vcipherlast $key,$key,$rcon
305 stvx $stage,0,$out
306 addi $out,$out,16
307
308 vsldoi $stage,$in0,$in1,8
309 vxor $in0,$in0,$tmp
310 vsldoi $tmp,$zero,$tmp,12 # >>32
311 vperm $outtail,$stage,$stage,$outperm # rotate
312 vsel $stage,$outhead,$outtail,$outmask
313 vmr $outhead,$outtail
314 vxor $in0,$in0,$tmp
315 vsldoi $tmp,$zero,$tmp,12 # >>32
316 vxor $in0,$in0,$tmp
317 stvx $stage,0,$out
318 addi $out,$out,16
319
320 vspltw $tmp,$in0,3
321 vxor $tmp,$tmp,$in1
322 vsldoi $in1,$zero,$in1,12 # >>32
323 vadduwm $rcon,$rcon,$rcon
324 vxor $in1,$in1,$tmp
325 vxor $in0,$in0,$key
326 vxor $in1,$in1,$key
327 vperm $outtail,$in0,$in0,$outperm # rotate
328 vsel $stage,$outhead,$outtail,$outmask
329 vmr $outhead,$outtail
330 stvx $stage,0,$out
331 addi $inp,$out,15 # 15 is not typo
332 addi $out,$out,16
333 bdnz Loop192
334
335 li $rounds,12
336 addi $out,$out,0x20
337 b Ldone
338
339.align 4
340L256:
341 lvx $tmp,0,$inp
342 li $cnt,7
343 li $rounds,14
344 vperm $outtail,$in0,$in0,$outperm # rotate
345 vsel $stage,$outhead,$outtail,$outmask
346 vmr $outhead,$outtail
347 stvx $stage,0,$out
348 addi $out,$out,16
349 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
350 mtctr $cnt
351
352Loop256:
353 vperm $key,$in1,$in1,$mask # rotate-n-splat
354 vsldoi $tmp,$zero,$in0,12 # >>32
355 vperm $outtail,$in1,$in1,$outperm # rotate
356 vsel $stage,$outhead,$outtail,$outmask
357 vmr $outhead,$outtail
358 vcipherlast $key,$key,$rcon
359 stvx $stage,0,$out
360 addi $out,$out,16
361
362 vxor $in0,$in0,$tmp
363 vsldoi $tmp,$zero,$tmp,12 # >>32
364 vxor $in0,$in0,$tmp
365 vsldoi $tmp,$zero,$tmp,12 # >>32
366 vxor $in0,$in0,$tmp
367 vadduwm $rcon,$rcon,$rcon
368 vxor $in0,$in0,$key
369 vperm $outtail,$in0,$in0,$outperm # rotate
370 vsel $stage,$outhead,$outtail,$outmask
371 vmr $outhead,$outtail
372 stvx $stage,0,$out
373 addi $inp,$out,15 # 15 is not typo
374 addi $out,$out,16
375 bdz Ldone
376
377 vspltw $key,$in0,3 # just splat
378 vsldoi $tmp,$zero,$in1,12 # >>32
379 vsbox $key,$key
380
381 vxor $in1,$in1,$tmp
382 vsldoi $tmp,$zero,$tmp,12 # >>32
383 vxor $in1,$in1,$tmp
384 vsldoi $tmp,$zero,$tmp,12 # >>32
385 vxor $in1,$in1,$tmp
386
387 vxor $in1,$in1,$key
388 b Loop256
389
390.align 4
391Ldone:
392 lvx $in1,0,$inp # redundant in aligned case
393 vsel $in1,$outhead,$in1,$outmask
394 stvx $in1,0,$inp
395 li $ptr,0
396 mtspr 256,$vrsave
397 stw $rounds,0($out)
398
399Lenc_key_abort:
400 mr r3,$ptr
401 blr
402 .long 0
403 .byte 0,12,0x14,1,0,0,3,0
404 .long 0
405.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
406
407.globl .${prefix}_set_decrypt_key
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -0200408 $STU $sp,-$FRAME($sp)
409 mflr r10
410 $PUSH r10,$FRAME+$LRSAVE($sp)
411 bl Lset_encrypt_key
412 mtlr r10
413
414 cmpwi r3,0
415 bne- Ldec_key_abort
416
417 slwi $cnt,$rounds,4
418 subi $inp,$out,240 # first round key
419 srwi $rounds,$rounds,1
420 add $out,$inp,$cnt # last round key
421 mtctr $rounds
422
423Ldeckey:
424 lwz r0, 0($inp)
425 lwz r6, 4($inp)
426 lwz r7, 8($inp)
427 lwz r8, 12($inp)
428 addi $inp,$inp,16
429 lwz r9, 0($out)
430 lwz r10,4($out)
431 lwz r11,8($out)
432 lwz r12,12($out)
433 stw r0, 0($out)
434 stw r6, 4($out)
435 stw r7, 8($out)
436 stw r8, 12($out)
437 subi $out,$out,16
438 stw r9, -16($inp)
439 stw r10,-12($inp)
440 stw r11,-8($inp)
441 stw r12,-4($inp)
442 bdnz Ldeckey
443
444 xor r3,r3,r3 # return value
445Ldec_key_abort:
446 addi $sp,$sp,$FRAME
447 blr
448 .long 0
449 .byte 0,12,4,1,0x80,0,3,0
450 .long 0
451.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
452___
453}}}
454#########################################################################
455{{{ # Single block en- and decrypt procedures #
456sub gen_block () {
457my $dir = shift;
458my $n = $dir eq "de" ? "n" : "";
459my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
460
461$code.=<<___;
462.globl .${prefix}_${dir}crypt
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -0200463 lwz $rounds,240($key)
464 lis r0,0xfc00
465 mfspr $vrsave,256
466 li $idx,15 # 15 is not typo
467 mtspr 256,r0
468
469 lvx v0,0,$inp
470 neg r11,$out
471 lvx v1,$idx,$inp
472 lvsl v2,0,$inp # inpperm
473 le?vspltisb v4,0x0f
474 ?lvsl v3,0,r11 # outperm
475 le?vxor v2,v2,v4
476 li $idx,16
477 vperm v0,v0,v1,v2 # align [and byte swap in LE]
478 lvx v1,0,$key
479 ?lvsl v5,0,$key # keyperm
480 srwi $rounds,$rounds,1
481 lvx v2,$idx,$key
482 addi $idx,$idx,16
483 subi $rounds,$rounds,1
484 ?vperm v1,v1,v2,v5 # align round key
485
486 vxor v0,v0,v1
487 lvx v1,$idx,$key
488 addi $idx,$idx,16
489 mtctr $rounds
490
491Loop_${dir}c:
492 ?vperm v2,v2,v1,v5
493 v${n}cipher v0,v0,v2
494 lvx v2,$idx,$key
495 addi $idx,$idx,16
496 ?vperm v1,v1,v2,v5
497 v${n}cipher v0,v0,v1
498 lvx v1,$idx,$key
499 addi $idx,$idx,16
500 bdnz Loop_${dir}c
501
502 ?vperm v2,v2,v1,v5
503 v${n}cipher v0,v0,v2
504 lvx v2,$idx,$key
505 ?vperm v1,v1,v2,v5
506 v${n}cipherlast v0,v0,v1
507
508 vspltisb v2,-1
509 vxor v1,v1,v1
510 li $idx,15 # 15 is not typo
511 ?vperm v2,v1,v2,v3 # outmask
512 le?vxor v3,v3,v4
513 lvx v1,0,$out # outhead
514 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
515 vsel v1,v1,v0,v2
516 lvx v4,$idx,$out
517 stvx v1,0,$out
518 vsel v0,v0,v4,v2
519 stvx v0,$idx,$out
520
521 mtspr 256,$vrsave
522 blr
523 .long 0
524 .byte 0,12,0x14,0,0,0,3,0
525 .long 0
526.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
527___
528}
529&gen_block("en");
530&gen_block("de");
531}}}
532#########################################################################
533{{{ # CBC en- and decrypt procedures #
534my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
535my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
536my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
537 map("v$_",(4..10));
538$code.=<<___;
539.globl .${prefix}_cbc_encrypt
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -0200540 ${UCMP}i $len,16
541 bltlr-
542
543 cmpwi $enc,0 # test direction
544 lis r0,0xffe0
545 mfspr $vrsave,256
546 mtspr 256,r0
547
548 li $idx,15
549 vxor $rndkey0,$rndkey0,$rndkey0
550 le?vspltisb $tmp,0x0f
551
552 lvx $ivec,0,$ivp # load [unaligned] iv
553 lvsl $inpperm,0,$ivp
554 lvx $inptail,$idx,$ivp
555 le?vxor $inpperm,$inpperm,$tmp
556 vperm $ivec,$ivec,$inptail,$inpperm
557
558 neg r11,$inp
559 ?lvsl $keyperm,0,$key # prepare for unaligned key
560 lwz $rounds,240($key)
561
562 lvsr $inpperm,0,r11 # prepare for unaligned load
563 lvx $inptail,0,$inp
564 addi $inp,$inp,15 # 15 is not typo
565 le?vxor $inpperm,$inpperm,$tmp
566
567 ?lvsr $outperm,0,$out # prepare for unaligned store
568 vspltisb $outmask,-1
569 lvx $outhead,0,$out
570 ?vperm $outmask,$rndkey0,$outmask,$outperm
571 le?vxor $outperm,$outperm,$tmp
572
573 srwi $rounds,$rounds,1
574 li $idx,16
575 subi $rounds,$rounds,1
576 beq Lcbc_dec
577
578Lcbc_enc:
579 vmr $inout,$inptail
580 lvx $inptail,0,$inp
581 addi $inp,$inp,16
582 mtctr $rounds
583 subi $len,$len,16 # len-=16
584
585 lvx $rndkey0,0,$key
586 vperm $inout,$inout,$inptail,$inpperm
587 lvx $rndkey1,$idx,$key
588 addi $idx,$idx,16
589 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
590 vxor $inout,$inout,$rndkey0
591 lvx $rndkey0,$idx,$key
592 addi $idx,$idx,16
593 vxor $inout,$inout,$ivec
594
595Loop_cbc_enc:
596 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
597 vcipher $inout,$inout,$rndkey1
598 lvx $rndkey1,$idx,$key
599 addi $idx,$idx,16
600 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
601 vcipher $inout,$inout,$rndkey0
602 lvx $rndkey0,$idx,$key
603 addi $idx,$idx,16
604 bdnz Loop_cbc_enc
605
606 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
607 vcipher $inout,$inout,$rndkey1
608 lvx $rndkey1,$idx,$key
609 li $idx,16
610 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
611 vcipherlast $ivec,$inout,$rndkey0
612 ${UCMP}i $len,16
613
614 vperm $tmp,$ivec,$ivec,$outperm
615 vsel $inout,$outhead,$tmp,$outmask
616 vmr $outhead,$tmp
617 stvx $inout,0,$out
618 addi $out,$out,16
619 bge Lcbc_enc
620
621 b Lcbc_done
622
623.align 4
624Lcbc_dec:
625 ${UCMP}i $len,128
626 bge _aesp8_cbc_decrypt8x
627 vmr $tmp,$inptail
628 lvx $inptail,0,$inp
629 addi $inp,$inp,16
630 mtctr $rounds
631 subi $len,$len,16 # len-=16
632
633 lvx $rndkey0,0,$key
634 vperm $tmp,$tmp,$inptail,$inpperm
635 lvx $rndkey1,$idx,$key
636 addi $idx,$idx,16
637 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
638 vxor $inout,$tmp,$rndkey0
639 lvx $rndkey0,$idx,$key
640 addi $idx,$idx,16
641
642Loop_cbc_dec:
643 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
644 vncipher $inout,$inout,$rndkey1
645 lvx $rndkey1,$idx,$key
646 addi $idx,$idx,16
647 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
648 vncipher $inout,$inout,$rndkey0
649 lvx $rndkey0,$idx,$key
650 addi $idx,$idx,16
651 bdnz Loop_cbc_dec
652
653 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
654 vncipher $inout,$inout,$rndkey1
655 lvx $rndkey1,$idx,$key
656 li $idx,16
657 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
658 vncipherlast $inout,$inout,$rndkey0
659 ${UCMP}i $len,16
660
661 vxor $inout,$inout,$ivec
662 vmr $ivec,$tmp
663 vperm $tmp,$inout,$inout,$outperm
664 vsel $inout,$outhead,$tmp,$outmask
665 vmr $outhead,$tmp
666 stvx $inout,0,$out
667 addi $out,$out,16
668 bge Lcbc_dec
669
670Lcbc_done:
671 addi $out,$out,-1
672 lvx $inout,0,$out # redundant in aligned case
673 vsel $inout,$outhead,$inout,$outmask
674 stvx $inout,0,$out
675
676 neg $enc,$ivp # write [unaligned] iv
677 li $idx,15 # 15 is not typo
678 vxor $rndkey0,$rndkey0,$rndkey0
679 vspltisb $outmask,-1
680 le?vspltisb $tmp,0x0f
681 ?lvsl $outperm,0,$enc
682 ?vperm $outmask,$rndkey0,$outmask,$outperm
683 le?vxor $outperm,$outperm,$tmp
684 lvx $outhead,0,$ivp
685 vperm $ivec,$ivec,$ivec,$outperm
686 vsel $inout,$outhead,$ivec,$outmask
687 lvx $inptail,$idx,$ivp
688 stvx $inout,0,$ivp
689 vsel $inout,$ivec,$inptail,$outmask
690 stvx $inout,$idx,$ivp
691
692 mtspr 256,$vrsave
693 blr
694 .long 0
695 .byte 0,12,0x14,0,0,0,6,0
696 .long 0
697___
698#########################################################################
699{{ # Optimized CBC decrypt procedure #
700my $key_="r11";
701my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
702my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
703my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
704my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
705 # v26-v31 last 6 round keys
706my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
707
708$code.=<<___;
709.align 5
710_aesp8_cbc_decrypt8x:
711 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
712 li r10,`$FRAME+8*16+15`
713 li r11,`$FRAME+8*16+31`
714 stvx v20,r10,$sp # ABI says so
715 addi r10,r10,32
716 stvx v21,r11,$sp
717 addi r11,r11,32
718 stvx v22,r10,$sp
719 addi r10,r10,32
720 stvx v23,r11,$sp
721 addi r11,r11,32
722 stvx v24,r10,$sp
723 addi r10,r10,32
724 stvx v25,r11,$sp
725 addi r11,r11,32
726 stvx v26,r10,$sp
727 addi r10,r10,32
728 stvx v27,r11,$sp
729 addi r11,r11,32
730 stvx v28,r10,$sp
731 addi r10,r10,32
732 stvx v29,r11,$sp
733 addi r11,r11,32
734 stvx v30,r10,$sp
735 stvx v31,r11,$sp
736 li r0,-1
737 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
738 li $x10,0x10
739 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
740 li $x20,0x20
741 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
742 li $x30,0x30
743 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
744 li $x40,0x40
745 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
746 li $x50,0x50
747 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
748 li $x60,0x60
749 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
750 li $x70,0x70
751 mtspr 256,r0
752
753 subi $rounds,$rounds,3 # -4 in total
754 subi $len,$len,128 # bias
755
756 lvx $rndkey0,$x00,$key # load key schedule
757 lvx v30,$x10,$key
758 addi $key,$key,0x20
759 lvx v31,$x00,$key
760 ?vperm $rndkey0,$rndkey0,v30,$keyperm
761 addi $key_,$sp,$FRAME+15
762 mtctr $rounds
763
764Load_cbc_dec_key:
765 ?vperm v24,v30,v31,$keyperm
766 lvx v30,$x10,$key
767 addi $key,$key,0x20
768 stvx v24,$x00,$key_ # off-load round[1]
769 ?vperm v25,v31,v30,$keyperm
770 lvx v31,$x00,$key
771 stvx v25,$x10,$key_ # off-load round[2]
772 addi $key_,$key_,0x20
773 bdnz Load_cbc_dec_key
774
775 lvx v26,$x10,$key
776 ?vperm v24,v30,v31,$keyperm
777 lvx v27,$x20,$key
778 stvx v24,$x00,$key_ # off-load round[3]
779 ?vperm v25,v31,v26,$keyperm
780 lvx v28,$x30,$key
781 stvx v25,$x10,$key_ # off-load round[4]
782 addi $key_,$sp,$FRAME+15 # rewind $key_
783 ?vperm v26,v26,v27,$keyperm
784 lvx v29,$x40,$key
785 ?vperm v27,v27,v28,$keyperm
786 lvx v30,$x50,$key
787 ?vperm v28,v28,v29,$keyperm
788 lvx v31,$x60,$key
789 ?vperm v29,v29,v30,$keyperm
790 lvx $out0,$x70,$key # borrow $out0
791 ?vperm v30,v30,v31,$keyperm
792 lvx v24,$x00,$key_ # pre-load round[1]
793 ?vperm v31,v31,$out0,$keyperm
794 lvx v25,$x10,$key_ # pre-load round[2]
795
796 #lvx $inptail,0,$inp # "caller" already did this
797 #addi $inp,$inp,15 # 15 is not typo
798 subi $inp,$inp,15 # undo "caller"
799
800 le?li $idx,8
801 lvx_u $in0,$x00,$inp # load first 8 "words"
802 le?lvsl $inpperm,0,$idx
803 le?vspltisb $tmp,0x0f
804 lvx_u $in1,$x10,$inp
805 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
806 lvx_u $in2,$x20,$inp
807 le?vperm $in0,$in0,$in0,$inpperm
808 lvx_u $in3,$x30,$inp
809 le?vperm $in1,$in1,$in1,$inpperm
810 lvx_u $in4,$x40,$inp
811 le?vperm $in2,$in2,$in2,$inpperm
812 vxor $out0,$in0,$rndkey0
813 lvx_u $in5,$x50,$inp
814 le?vperm $in3,$in3,$in3,$inpperm
815 vxor $out1,$in1,$rndkey0
816 lvx_u $in6,$x60,$inp
817 le?vperm $in4,$in4,$in4,$inpperm
818 vxor $out2,$in2,$rndkey0
819 lvx_u $in7,$x70,$inp
820 addi $inp,$inp,0x80
821 le?vperm $in5,$in5,$in5,$inpperm
822 vxor $out3,$in3,$rndkey0
823 le?vperm $in6,$in6,$in6,$inpperm
824 vxor $out4,$in4,$rndkey0
825 le?vperm $in7,$in7,$in7,$inpperm
826 vxor $out5,$in5,$rndkey0
827 vxor $out6,$in6,$rndkey0
828 vxor $out7,$in7,$rndkey0
829
830 mtctr $rounds
831 b Loop_cbc_dec8x
832.align 5
833Loop_cbc_dec8x:
834 vncipher $out0,$out0,v24
835 vncipher $out1,$out1,v24
836 vncipher $out2,$out2,v24
837 vncipher $out3,$out3,v24
838 vncipher $out4,$out4,v24
839 vncipher $out5,$out5,v24
840 vncipher $out6,$out6,v24
841 vncipher $out7,$out7,v24
842 lvx v24,$x20,$key_ # round[3]
843 addi $key_,$key_,0x20
844
845 vncipher $out0,$out0,v25
846 vncipher $out1,$out1,v25
847 vncipher $out2,$out2,v25
848 vncipher $out3,$out3,v25
849 vncipher $out4,$out4,v25
850 vncipher $out5,$out5,v25
851 vncipher $out6,$out6,v25
852 vncipher $out7,$out7,v25
853 lvx v25,$x10,$key_ # round[4]
854 bdnz Loop_cbc_dec8x
855
856 subic $len,$len,128 # $len-=128
857 vncipher $out0,$out0,v24
858 vncipher $out1,$out1,v24
859 vncipher $out2,$out2,v24
860 vncipher $out3,$out3,v24
861 vncipher $out4,$out4,v24
862 vncipher $out5,$out5,v24
863 vncipher $out6,$out6,v24
864 vncipher $out7,$out7,v24
865
866 subfe. r0,r0,r0 # borrow?-1:0
867 vncipher $out0,$out0,v25
868 vncipher $out1,$out1,v25
869 vncipher $out2,$out2,v25
870 vncipher $out3,$out3,v25
871 vncipher $out4,$out4,v25
872 vncipher $out5,$out5,v25
873 vncipher $out6,$out6,v25
874 vncipher $out7,$out7,v25
875
876 and r0,r0,$len
877 vncipher $out0,$out0,v26
878 vncipher $out1,$out1,v26
879 vncipher $out2,$out2,v26
880 vncipher $out3,$out3,v26
881 vncipher $out4,$out4,v26
882 vncipher $out5,$out5,v26
883 vncipher $out6,$out6,v26
884 vncipher $out7,$out7,v26
885
886 add $inp,$inp,r0 # $inp is adjusted in such
887 # way that at exit from the
888 # loop inX-in7 are loaded
889 # with last "words"
890 vncipher $out0,$out0,v27
891 vncipher $out1,$out1,v27
892 vncipher $out2,$out2,v27
893 vncipher $out3,$out3,v27
894 vncipher $out4,$out4,v27
895 vncipher $out5,$out5,v27
896 vncipher $out6,$out6,v27
897 vncipher $out7,$out7,v27
898
899 addi $key_,$sp,$FRAME+15 # rewind $key_
900 vncipher $out0,$out0,v28
901 vncipher $out1,$out1,v28
902 vncipher $out2,$out2,v28
903 vncipher $out3,$out3,v28
904 vncipher $out4,$out4,v28
905 vncipher $out5,$out5,v28
906 vncipher $out6,$out6,v28
907 vncipher $out7,$out7,v28
908 lvx v24,$x00,$key_ # re-pre-load round[1]
909
910 vncipher $out0,$out0,v29
911 vncipher $out1,$out1,v29
912 vncipher $out2,$out2,v29
913 vncipher $out3,$out3,v29
914 vncipher $out4,$out4,v29
915 vncipher $out5,$out5,v29
916 vncipher $out6,$out6,v29
917 vncipher $out7,$out7,v29
918 lvx v25,$x10,$key_ # re-pre-load round[2]
919
920 vncipher $out0,$out0,v30
921 vxor $ivec,$ivec,v31 # xor with last round key
922 vncipher $out1,$out1,v30
923 vxor $in0,$in0,v31
924 vncipher $out2,$out2,v30
925 vxor $in1,$in1,v31
926 vncipher $out3,$out3,v30
927 vxor $in2,$in2,v31
928 vncipher $out4,$out4,v30
929 vxor $in3,$in3,v31
930 vncipher $out5,$out5,v30
931 vxor $in4,$in4,v31
932 vncipher $out6,$out6,v30
933 vxor $in5,$in5,v31
934 vncipher $out7,$out7,v30
935 vxor $in6,$in6,v31
936
937 vncipherlast $out0,$out0,$ivec
938 vncipherlast $out1,$out1,$in0
939 lvx_u $in0,$x00,$inp # load next input block
940 vncipherlast $out2,$out2,$in1
941 lvx_u $in1,$x10,$inp
942 vncipherlast $out3,$out3,$in2
943 le?vperm $in0,$in0,$in0,$inpperm
944 lvx_u $in2,$x20,$inp
945 vncipherlast $out4,$out4,$in3
946 le?vperm $in1,$in1,$in1,$inpperm
947 lvx_u $in3,$x30,$inp
948 vncipherlast $out5,$out5,$in4
949 le?vperm $in2,$in2,$in2,$inpperm
950 lvx_u $in4,$x40,$inp
951 vncipherlast $out6,$out6,$in5
952 le?vperm $in3,$in3,$in3,$inpperm
953 lvx_u $in5,$x50,$inp
954 vncipherlast $out7,$out7,$in6
955 le?vperm $in4,$in4,$in4,$inpperm
956 lvx_u $in6,$x60,$inp
957 vmr $ivec,$in7
958 le?vperm $in5,$in5,$in5,$inpperm
959 lvx_u $in7,$x70,$inp
960 addi $inp,$inp,0x80
961
962 le?vperm $out0,$out0,$out0,$inpperm
963 le?vperm $out1,$out1,$out1,$inpperm
964 stvx_u $out0,$x00,$out
965 le?vperm $in6,$in6,$in6,$inpperm
966 vxor $out0,$in0,$rndkey0
967 le?vperm $out2,$out2,$out2,$inpperm
968 stvx_u $out1,$x10,$out
969 le?vperm $in7,$in7,$in7,$inpperm
970 vxor $out1,$in1,$rndkey0
971 le?vperm $out3,$out3,$out3,$inpperm
972 stvx_u $out2,$x20,$out
973 vxor $out2,$in2,$rndkey0
974 le?vperm $out4,$out4,$out4,$inpperm
975 stvx_u $out3,$x30,$out
976 vxor $out3,$in3,$rndkey0
977 le?vperm $out5,$out5,$out5,$inpperm
978 stvx_u $out4,$x40,$out
979 vxor $out4,$in4,$rndkey0
980 le?vperm $out6,$out6,$out6,$inpperm
981 stvx_u $out5,$x50,$out
982 vxor $out5,$in5,$rndkey0
983 le?vperm $out7,$out7,$out7,$inpperm
984 stvx_u $out6,$x60,$out
985 vxor $out6,$in6,$rndkey0
986 stvx_u $out7,$x70,$out
987 addi $out,$out,0x80
988 vxor $out7,$in7,$rndkey0
989
990 mtctr $rounds
991 beq Loop_cbc_dec8x # did $len-=128 borrow?
992
993 addic. $len,$len,128
994 beq Lcbc_dec8x_done
995 nop
996 nop
997
998Loop_cbc_dec8x_tail: # up to 7 "words" tail...
999 vncipher $out1,$out1,v24
1000 vncipher $out2,$out2,v24
1001 vncipher $out3,$out3,v24
1002 vncipher $out4,$out4,v24
1003 vncipher $out5,$out5,v24
1004 vncipher $out6,$out6,v24
1005 vncipher $out7,$out7,v24
1006 lvx v24,$x20,$key_ # round[3]
1007 addi $key_,$key_,0x20
1008
1009 vncipher $out1,$out1,v25
1010 vncipher $out2,$out2,v25
1011 vncipher $out3,$out3,v25
1012 vncipher $out4,$out4,v25
1013 vncipher $out5,$out5,v25
1014 vncipher $out6,$out6,v25
1015 vncipher $out7,$out7,v25
1016 lvx v25,$x10,$key_ # round[4]
1017 bdnz Loop_cbc_dec8x_tail
1018
1019 vncipher $out1,$out1,v24
1020 vncipher $out2,$out2,v24
1021 vncipher $out3,$out3,v24
1022 vncipher $out4,$out4,v24
1023 vncipher $out5,$out5,v24
1024 vncipher $out6,$out6,v24
1025 vncipher $out7,$out7,v24
1026
1027 vncipher $out1,$out1,v25
1028 vncipher $out2,$out2,v25
1029 vncipher $out3,$out3,v25
1030 vncipher $out4,$out4,v25
1031 vncipher $out5,$out5,v25
1032 vncipher $out6,$out6,v25
1033 vncipher $out7,$out7,v25
1034
1035 vncipher $out1,$out1,v26
1036 vncipher $out2,$out2,v26
1037 vncipher $out3,$out3,v26
1038 vncipher $out4,$out4,v26
1039 vncipher $out5,$out5,v26
1040 vncipher $out6,$out6,v26
1041 vncipher $out7,$out7,v26
1042
1043 vncipher $out1,$out1,v27
1044 vncipher $out2,$out2,v27
1045 vncipher $out3,$out3,v27
1046 vncipher $out4,$out4,v27
1047 vncipher $out5,$out5,v27
1048 vncipher $out6,$out6,v27
1049 vncipher $out7,$out7,v27
1050
1051 vncipher $out1,$out1,v28
1052 vncipher $out2,$out2,v28
1053 vncipher $out3,$out3,v28
1054 vncipher $out4,$out4,v28
1055 vncipher $out5,$out5,v28
1056 vncipher $out6,$out6,v28
1057 vncipher $out7,$out7,v28
1058
1059 vncipher $out1,$out1,v29
1060 vncipher $out2,$out2,v29
1061 vncipher $out3,$out3,v29
1062 vncipher $out4,$out4,v29
1063 vncipher $out5,$out5,v29
1064 vncipher $out6,$out6,v29
1065 vncipher $out7,$out7,v29
1066
1067 vncipher $out1,$out1,v30
1068 vxor $ivec,$ivec,v31 # last round key
1069 vncipher $out2,$out2,v30
1070 vxor $in1,$in1,v31
1071 vncipher $out3,$out3,v30
1072 vxor $in2,$in2,v31
1073 vncipher $out4,$out4,v30
1074 vxor $in3,$in3,v31
1075 vncipher $out5,$out5,v30
1076 vxor $in4,$in4,v31
1077 vncipher $out6,$out6,v30
1078 vxor $in5,$in5,v31
1079 vncipher $out7,$out7,v30
1080 vxor $in6,$in6,v31
1081
1082 cmplwi $len,32 # switch($len)
1083 blt Lcbc_dec8x_one
1084 nop
1085 beq Lcbc_dec8x_two
1086 cmplwi $len,64
1087 blt Lcbc_dec8x_three
1088 nop
1089 beq Lcbc_dec8x_four
1090 cmplwi $len,96
1091 blt Lcbc_dec8x_five
1092 nop
1093 beq Lcbc_dec8x_six
1094
1095Lcbc_dec8x_seven:
1096 vncipherlast $out1,$out1,$ivec
1097 vncipherlast $out2,$out2,$in1
1098 vncipherlast $out3,$out3,$in2
1099 vncipherlast $out4,$out4,$in3
1100 vncipherlast $out5,$out5,$in4
1101 vncipherlast $out6,$out6,$in5
1102 vncipherlast $out7,$out7,$in6
1103 vmr $ivec,$in7
1104
1105 le?vperm $out1,$out1,$out1,$inpperm
1106 le?vperm $out2,$out2,$out2,$inpperm
1107 stvx_u $out1,$x00,$out
1108 le?vperm $out3,$out3,$out3,$inpperm
1109 stvx_u $out2,$x10,$out
1110 le?vperm $out4,$out4,$out4,$inpperm
1111 stvx_u $out3,$x20,$out
1112 le?vperm $out5,$out5,$out5,$inpperm
1113 stvx_u $out4,$x30,$out
1114 le?vperm $out6,$out6,$out6,$inpperm
1115 stvx_u $out5,$x40,$out
1116 le?vperm $out7,$out7,$out7,$inpperm
1117 stvx_u $out6,$x50,$out
1118 stvx_u $out7,$x60,$out
1119 addi $out,$out,0x70
1120 b Lcbc_dec8x_done
1121
1122.align 5
1123Lcbc_dec8x_six:
1124 vncipherlast $out2,$out2,$ivec
1125 vncipherlast $out3,$out3,$in2
1126 vncipherlast $out4,$out4,$in3
1127 vncipherlast $out5,$out5,$in4
1128 vncipherlast $out6,$out6,$in5
1129 vncipherlast $out7,$out7,$in6
1130 vmr $ivec,$in7
1131
1132 le?vperm $out2,$out2,$out2,$inpperm
1133 le?vperm $out3,$out3,$out3,$inpperm
1134 stvx_u $out2,$x00,$out
1135 le?vperm $out4,$out4,$out4,$inpperm
1136 stvx_u $out3,$x10,$out
1137 le?vperm $out5,$out5,$out5,$inpperm
1138 stvx_u $out4,$x20,$out
1139 le?vperm $out6,$out6,$out6,$inpperm
1140 stvx_u $out5,$x30,$out
1141 le?vperm $out7,$out7,$out7,$inpperm
1142 stvx_u $out6,$x40,$out
1143 stvx_u $out7,$x50,$out
1144 addi $out,$out,0x60
1145 b Lcbc_dec8x_done
1146
1147.align 5
1148Lcbc_dec8x_five:
1149 vncipherlast $out3,$out3,$ivec
1150 vncipherlast $out4,$out4,$in3
1151 vncipherlast $out5,$out5,$in4
1152 vncipherlast $out6,$out6,$in5
1153 vncipherlast $out7,$out7,$in6
1154 vmr $ivec,$in7
1155
1156 le?vperm $out3,$out3,$out3,$inpperm
1157 le?vperm $out4,$out4,$out4,$inpperm
1158 stvx_u $out3,$x00,$out
1159 le?vperm $out5,$out5,$out5,$inpperm
1160 stvx_u $out4,$x10,$out
1161 le?vperm $out6,$out6,$out6,$inpperm
1162 stvx_u $out5,$x20,$out
1163 le?vperm $out7,$out7,$out7,$inpperm
1164 stvx_u $out6,$x30,$out
1165 stvx_u $out7,$x40,$out
1166 addi $out,$out,0x50
1167 b Lcbc_dec8x_done
1168
1169.align 5
1170Lcbc_dec8x_four:
1171 vncipherlast $out4,$out4,$ivec
1172 vncipherlast $out5,$out5,$in4
1173 vncipherlast $out6,$out6,$in5
1174 vncipherlast $out7,$out7,$in6
1175 vmr $ivec,$in7
1176
1177 le?vperm $out4,$out4,$out4,$inpperm
1178 le?vperm $out5,$out5,$out5,$inpperm
1179 stvx_u $out4,$x00,$out
1180 le?vperm $out6,$out6,$out6,$inpperm
1181 stvx_u $out5,$x10,$out
1182 le?vperm $out7,$out7,$out7,$inpperm
1183 stvx_u $out6,$x20,$out
1184 stvx_u $out7,$x30,$out
1185 addi $out,$out,0x40
1186 b Lcbc_dec8x_done
1187
1188.align 5
1189Lcbc_dec8x_three:
1190 vncipherlast $out5,$out5,$ivec
1191 vncipherlast $out6,$out6,$in5
1192 vncipherlast $out7,$out7,$in6
1193 vmr $ivec,$in7
1194
1195 le?vperm $out5,$out5,$out5,$inpperm
1196 le?vperm $out6,$out6,$out6,$inpperm
1197 stvx_u $out5,$x00,$out
1198 le?vperm $out7,$out7,$out7,$inpperm
1199 stvx_u $out6,$x10,$out
1200 stvx_u $out7,$x20,$out
1201 addi $out,$out,0x30
1202 b Lcbc_dec8x_done
1203
1204.align 5
1205Lcbc_dec8x_two:
1206 vncipherlast $out6,$out6,$ivec
1207 vncipherlast $out7,$out7,$in6
1208 vmr $ivec,$in7
1209
1210 le?vperm $out6,$out6,$out6,$inpperm
1211 le?vperm $out7,$out7,$out7,$inpperm
1212 stvx_u $out6,$x00,$out
1213 stvx_u $out7,$x10,$out
1214 addi $out,$out,0x20
1215 b Lcbc_dec8x_done
1216
1217.align 5
1218Lcbc_dec8x_one:
1219 vncipherlast $out7,$out7,$ivec
1220 vmr $ivec,$in7
1221
1222 le?vperm $out7,$out7,$out7,$inpperm
1223 stvx_u $out7,0,$out
1224 addi $out,$out,0x10
1225
1226Lcbc_dec8x_done:
1227 le?vperm $ivec,$ivec,$ivec,$inpperm
1228 stvx_u $ivec,0,$ivp # write [unaligned] iv
1229
1230 li r10,`$FRAME+15`
1231 li r11,`$FRAME+31`
1232 stvx $inpperm,r10,$sp # wipe copies of round keys
1233 addi r10,r10,32
1234 stvx $inpperm,r11,$sp
1235 addi r11,r11,32
1236 stvx $inpperm,r10,$sp
1237 addi r10,r10,32
1238 stvx $inpperm,r11,$sp
1239 addi r11,r11,32
1240 stvx $inpperm,r10,$sp
1241 addi r10,r10,32
1242 stvx $inpperm,r11,$sp
1243 addi r11,r11,32
1244 stvx $inpperm,r10,$sp
1245 addi r10,r10,32
1246 stvx $inpperm,r11,$sp
1247 addi r11,r11,32
1248
1249 mtspr 256,$vrsave
1250 lvx v20,r10,$sp # ABI says so
1251 addi r10,r10,32
1252 lvx v21,r11,$sp
1253 addi r11,r11,32
1254 lvx v22,r10,$sp
1255 addi r10,r10,32
1256 lvx v23,r11,$sp
1257 addi r11,r11,32
1258 lvx v24,r10,$sp
1259 addi r10,r10,32
1260 lvx v25,r11,$sp
1261 addi r11,r11,32
1262 lvx v26,r10,$sp
1263 addi r10,r10,32
1264 lvx v27,r11,$sp
1265 addi r11,r11,32
1266 lvx v28,r10,$sp
1267 addi r10,r10,32
1268 lvx v29,r11,$sp
1269 addi r11,r11,32
1270 lvx v30,r10,$sp
1271 lvx v31,r11,$sp
1272 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1273 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1274 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1275 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1276 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1277 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1278 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1279 blr
1280 .long 0
1281 .byte 0,12,0x14,0,0x80,6,6,0
1282 .long 0
1283.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1284___
1285}} }}}
1286
1287#########################################################################
1288{{{ # CTR procedure[s] #
Daniel Axtensf651bd92019-06-11 11:54:31 +10001289
1290####################### WARNING: Here be dragons! #######################
1291#
1292# This code is written as 'ctr32', based on a 32-bit counter used
1293# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
1294# a 128-bit counter.
1295#
1296# This leads to subtle changes from the upstream code: the counter
1297# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
1298# both the bulk (8 blocks at a time) path, and in the individual block
1299# path. Be aware of this when doing updates.
1300#
1301# See:
1302# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
1303# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
1304# https://github.com/openssl/openssl/pull/8942
1305#
1306#########################################################################
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001307my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1308my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1309my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1310 map("v$_",(4..11));
1311my $dat=$tmp;
1312
1313$code.=<<___;
1314.globl .${prefix}_ctr32_encrypt_blocks
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001315 ${UCMP}i $len,1
1316 bltlr-
1317
1318 lis r0,0xfff0
1319 mfspr $vrsave,256
1320 mtspr 256,r0
1321
1322 li $idx,15
1323 vxor $rndkey0,$rndkey0,$rndkey0
1324 le?vspltisb $tmp,0x0f
1325
1326 lvx $ivec,0,$ivp # load [unaligned] iv
1327 lvsl $inpperm,0,$ivp
1328 lvx $inptail,$idx,$ivp
1329 vspltisb $one,1
1330 le?vxor $inpperm,$inpperm,$tmp
1331 vperm $ivec,$ivec,$inptail,$inpperm
1332 vsldoi $one,$rndkey0,$one,1
1333
1334 neg r11,$inp
1335 ?lvsl $keyperm,0,$key # prepare for unaligned key
1336 lwz $rounds,240($key)
1337
1338 lvsr $inpperm,0,r11 # prepare for unaligned load
1339 lvx $inptail,0,$inp
1340 addi $inp,$inp,15 # 15 is not typo
1341 le?vxor $inpperm,$inpperm,$tmp
1342
1343 srwi $rounds,$rounds,1
1344 li $idx,16
1345 subi $rounds,$rounds,1
1346
1347 ${UCMP}i $len,8
1348 bge _aesp8_ctr32_encrypt8x
1349
1350 ?lvsr $outperm,0,$out # prepare for unaligned store
1351 vspltisb $outmask,-1
1352 lvx $outhead,0,$out
1353 ?vperm $outmask,$rndkey0,$outmask,$outperm
1354 le?vxor $outperm,$outperm,$tmp
1355
1356 lvx $rndkey0,0,$key
1357 mtctr $rounds
1358 lvx $rndkey1,$idx,$key
1359 addi $idx,$idx,16
1360 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1361 vxor $inout,$ivec,$rndkey0
1362 lvx $rndkey0,$idx,$key
1363 addi $idx,$idx,16
1364 b Loop_ctr32_enc
1365
1366.align 5
1367Loop_ctr32_enc:
1368 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1369 vcipher $inout,$inout,$rndkey1
1370 lvx $rndkey1,$idx,$key
1371 addi $idx,$idx,16
1372 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1373 vcipher $inout,$inout,$rndkey0
1374 lvx $rndkey0,$idx,$key
1375 addi $idx,$idx,16
1376 bdnz Loop_ctr32_enc
1377
Daniel Axtensf651bd92019-06-11 11:54:31 +10001378 vadduqm $ivec,$ivec,$one # Kernel change for 128-bit
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001379 vmr $dat,$inptail
1380 lvx $inptail,0,$inp
1381 addi $inp,$inp,16
1382 subic. $len,$len,1 # blocks--
1383
1384 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1385 vcipher $inout,$inout,$rndkey1
1386 lvx $rndkey1,$idx,$key
1387 vperm $dat,$dat,$inptail,$inpperm
1388 li $idx,16
1389 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1390 lvx $rndkey0,0,$key
1391 vxor $dat,$dat,$rndkey1 # last round key
1392 vcipherlast $inout,$inout,$dat
1393
1394 lvx $rndkey1,$idx,$key
1395 addi $idx,$idx,16
1396 vperm $inout,$inout,$inout,$outperm
1397 vsel $dat,$outhead,$inout,$outmask
1398 mtctr $rounds
1399 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1400 vmr $outhead,$inout
1401 vxor $inout,$ivec,$rndkey0
1402 lvx $rndkey0,$idx,$key
1403 addi $idx,$idx,16
1404 stvx $dat,0,$out
1405 addi $out,$out,16
1406 bne Loop_ctr32_enc
1407
1408 addi $out,$out,-1
1409 lvx $inout,0,$out # redundant in aligned case
1410 vsel $inout,$outhead,$inout,$outmask
1411 stvx $inout,0,$out
1412
1413 mtspr 256,$vrsave
1414 blr
1415 .long 0
1416 .byte 0,12,0x14,0,0,0,6,0
1417 .long 0
1418___
1419#########################################################################
1420{{ # Optimized CTR procedure #
1421my $key_="r11";
1422my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1423my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1424my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1425my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1426 # v26-v31 last 6 round keys
1427my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1428my ($two,$three,$four)=($outhead,$outperm,$outmask);
1429
1430$code.=<<___;
1431.align 5
1432_aesp8_ctr32_encrypt8x:
1433 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1434 li r10,`$FRAME+8*16+15`
1435 li r11,`$FRAME+8*16+31`
1436 stvx v20,r10,$sp # ABI says so
1437 addi r10,r10,32
1438 stvx v21,r11,$sp
1439 addi r11,r11,32
1440 stvx v22,r10,$sp
1441 addi r10,r10,32
1442 stvx v23,r11,$sp
1443 addi r11,r11,32
1444 stvx v24,r10,$sp
1445 addi r10,r10,32
1446 stvx v25,r11,$sp
1447 addi r11,r11,32
1448 stvx v26,r10,$sp
1449 addi r10,r10,32
1450 stvx v27,r11,$sp
1451 addi r11,r11,32
1452 stvx v28,r10,$sp
1453 addi r10,r10,32
1454 stvx v29,r11,$sp
1455 addi r11,r11,32
1456 stvx v30,r10,$sp
1457 stvx v31,r11,$sp
1458 li r0,-1
1459 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1460 li $x10,0x10
1461 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1462 li $x20,0x20
1463 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1464 li $x30,0x30
1465 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1466 li $x40,0x40
1467 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1468 li $x50,0x50
1469 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1470 li $x60,0x60
1471 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1472 li $x70,0x70
1473 mtspr 256,r0
1474
1475 subi $rounds,$rounds,3 # -4 in total
1476
1477 lvx $rndkey0,$x00,$key # load key schedule
1478 lvx v30,$x10,$key
1479 addi $key,$key,0x20
1480 lvx v31,$x00,$key
1481 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1482 addi $key_,$sp,$FRAME+15
1483 mtctr $rounds
1484
1485Load_ctr32_enc_key:
1486 ?vperm v24,v30,v31,$keyperm
1487 lvx v30,$x10,$key
1488 addi $key,$key,0x20
1489 stvx v24,$x00,$key_ # off-load round[1]
1490 ?vperm v25,v31,v30,$keyperm
1491 lvx v31,$x00,$key
1492 stvx v25,$x10,$key_ # off-load round[2]
1493 addi $key_,$key_,0x20
1494 bdnz Load_ctr32_enc_key
1495
1496 lvx v26,$x10,$key
1497 ?vperm v24,v30,v31,$keyperm
1498 lvx v27,$x20,$key
1499 stvx v24,$x00,$key_ # off-load round[3]
1500 ?vperm v25,v31,v26,$keyperm
1501 lvx v28,$x30,$key
1502 stvx v25,$x10,$key_ # off-load round[4]
1503 addi $key_,$sp,$FRAME+15 # rewind $key_
1504 ?vperm v26,v26,v27,$keyperm
1505 lvx v29,$x40,$key
1506 ?vperm v27,v27,v28,$keyperm
1507 lvx v30,$x50,$key
1508 ?vperm v28,v28,v29,$keyperm
1509 lvx v31,$x60,$key
1510 ?vperm v29,v29,v30,$keyperm
1511 lvx $out0,$x70,$key # borrow $out0
1512 ?vperm v30,v30,v31,$keyperm
1513 lvx v24,$x00,$key_ # pre-load round[1]
1514 ?vperm v31,v31,$out0,$keyperm
1515 lvx v25,$x10,$key_ # pre-load round[2]
1516
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001517 vadduqm $two,$one,$one
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001518 subi $inp,$inp,15 # undo "caller"
1519 $SHL $len,$len,4
1520
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001521 vadduqm $out1,$ivec,$one # counter values ...
Daniel Axtensf651bd92019-06-11 11:54:31 +10001522 vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit)
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001523 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1524 le?li $idx,8
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001525 vadduqm $out3,$out1,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001526 vxor $out1,$out1,$rndkey0
1527 le?lvsl $inpperm,0,$idx
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001528 vadduqm $out4,$out2,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001529 vxor $out2,$out2,$rndkey0
1530 le?vspltisb $tmp,0x0f
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001531 vadduqm $out5,$out3,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001532 vxor $out3,$out3,$rndkey0
1533 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001534 vadduqm $out6,$out4,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001535 vxor $out4,$out4,$rndkey0
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001536 vadduqm $out7,$out5,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001537 vxor $out5,$out5,$rndkey0
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001538 vadduqm $ivec,$out6,$two # next counter value
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001539 vxor $out6,$out6,$rndkey0
1540 vxor $out7,$out7,$rndkey0
1541
1542 mtctr $rounds
1543 b Loop_ctr32_enc8x
1544.align 5
1545Loop_ctr32_enc8x:
1546 vcipher $out0,$out0,v24
1547 vcipher $out1,$out1,v24
1548 vcipher $out2,$out2,v24
1549 vcipher $out3,$out3,v24
1550 vcipher $out4,$out4,v24
1551 vcipher $out5,$out5,v24
1552 vcipher $out6,$out6,v24
1553 vcipher $out7,$out7,v24
1554Loop_ctr32_enc8x_middle:
1555 lvx v24,$x20,$key_ # round[3]
1556 addi $key_,$key_,0x20
1557
1558 vcipher $out0,$out0,v25
1559 vcipher $out1,$out1,v25
1560 vcipher $out2,$out2,v25
1561 vcipher $out3,$out3,v25
1562 vcipher $out4,$out4,v25
1563 vcipher $out5,$out5,v25
1564 vcipher $out6,$out6,v25
1565 vcipher $out7,$out7,v25
1566 lvx v25,$x10,$key_ # round[4]
1567 bdnz Loop_ctr32_enc8x
1568
1569 subic r11,$len,256 # $len-256, borrow $key_
1570 vcipher $out0,$out0,v24
1571 vcipher $out1,$out1,v24
1572 vcipher $out2,$out2,v24
1573 vcipher $out3,$out3,v24
1574 vcipher $out4,$out4,v24
1575 vcipher $out5,$out5,v24
1576 vcipher $out6,$out6,v24
1577 vcipher $out7,$out7,v24
1578
1579 subfe r0,r0,r0 # borrow?-1:0
1580 vcipher $out0,$out0,v25
1581 vcipher $out1,$out1,v25
1582 vcipher $out2,$out2,v25
1583 vcipher $out3,$out3,v25
1584 vcipher $out4,$out4,v25
1585 vcipher $out5,$out5,v25
1586 vcipher $out6,$out6,v25
1587 vcipher $out7,$out7,v25
1588
1589 and r0,r0,r11
1590 addi $key_,$sp,$FRAME+15 # rewind $key_
1591 vcipher $out0,$out0,v26
1592 vcipher $out1,$out1,v26
1593 vcipher $out2,$out2,v26
1594 vcipher $out3,$out3,v26
1595 vcipher $out4,$out4,v26
1596 vcipher $out5,$out5,v26
1597 vcipher $out6,$out6,v26
1598 vcipher $out7,$out7,v26
1599 lvx v24,$x00,$key_ # re-pre-load round[1]
1600
1601 subic $len,$len,129 # $len-=129
1602 vcipher $out0,$out0,v27
1603 addi $len,$len,1 # $len-=128 really
1604 vcipher $out1,$out1,v27
1605 vcipher $out2,$out2,v27
1606 vcipher $out3,$out3,v27
1607 vcipher $out4,$out4,v27
1608 vcipher $out5,$out5,v27
1609 vcipher $out6,$out6,v27
1610 vcipher $out7,$out7,v27
1611 lvx v25,$x10,$key_ # re-pre-load round[2]
1612
1613 vcipher $out0,$out0,v28
1614 lvx_u $in0,$x00,$inp # load input
1615 vcipher $out1,$out1,v28
1616 lvx_u $in1,$x10,$inp
1617 vcipher $out2,$out2,v28
1618 lvx_u $in2,$x20,$inp
1619 vcipher $out3,$out3,v28
1620 lvx_u $in3,$x30,$inp
1621 vcipher $out4,$out4,v28
1622 lvx_u $in4,$x40,$inp
1623 vcipher $out5,$out5,v28
1624 lvx_u $in5,$x50,$inp
1625 vcipher $out6,$out6,v28
1626 lvx_u $in6,$x60,$inp
1627 vcipher $out7,$out7,v28
1628 lvx_u $in7,$x70,$inp
1629 addi $inp,$inp,0x80
1630
1631 vcipher $out0,$out0,v29
1632 le?vperm $in0,$in0,$in0,$inpperm
1633 vcipher $out1,$out1,v29
1634 le?vperm $in1,$in1,$in1,$inpperm
1635 vcipher $out2,$out2,v29
1636 le?vperm $in2,$in2,$in2,$inpperm
1637 vcipher $out3,$out3,v29
1638 le?vperm $in3,$in3,$in3,$inpperm
1639 vcipher $out4,$out4,v29
1640 le?vperm $in4,$in4,$in4,$inpperm
1641 vcipher $out5,$out5,v29
1642 le?vperm $in5,$in5,$in5,$inpperm
1643 vcipher $out6,$out6,v29
1644 le?vperm $in6,$in6,$in6,$inpperm
1645 vcipher $out7,$out7,v29
1646 le?vperm $in7,$in7,$in7,$inpperm
1647
1648 add $inp,$inp,r0 # $inp is adjusted in such
1649 # way that at exit from the
1650 # loop inX-in7 are loaded
1651 # with last "words"
1652 subfe. r0,r0,r0 # borrow?-1:0
1653 vcipher $out0,$out0,v30
1654 vxor $in0,$in0,v31 # xor with last round key
1655 vcipher $out1,$out1,v30
1656 vxor $in1,$in1,v31
1657 vcipher $out2,$out2,v30
1658 vxor $in2,$in2,v31
1659 vcipher $out3,$out3,v30
1660 vxor $in3,$in3,v31
1661 vcipher $out4,$out4,v30
1662 vxor $in4,$in4,v31
1663 vcipher $out5,$out5,v30
1664 vxor $in5,$in5,v31
1665 vcipher $out6,$out6,v30
1666 vxor $in6,$in6,v31
1667 vcipher $out7,$out7,v30
1668 vxor $in7,$in7,v31
1669
1670 bne Lctr32_enc8x_break # did $len-129 borrow?
1671
1672 vcipherlast $in0,$out0,$in0
1673 vcipherlast $in1,$out1,$in1
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001674 vadduqm $out1,$ivec,$one # counter values ...
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001675 vcipherlast $in2,$out2,$in2
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001676 vadduqm $out2,$ivec,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001677 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1678 vcipherlast $in3,$out3,$in3
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001679 vadduqm $out3,$out1,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001680 vxor $out1,$out1,$rndkey0
1681 vcipherlast $in4,$out4,$in4
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001682 vadduqm $out4,$out2,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001683 vxor $out2,$out2,$rndkey0
1684 vcipherlast $in5,$out5,$in5
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001685 vadduqm $out5,$out3,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001686 vxor $out3,$out3,$rndkey0
1687 vcipherlast $in6,$out6,$in6
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001688 vadduqm $out6,$out4,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001689 vxor $out4,$out4,$rndkey0
1690 vcipherlast $in7,$out7,$in7
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001691 vadduqm $out7,$out5,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001692 vxor $out5,$out5,$rndkey0
1693 le?vperm $in0,$in0,$in0,$inpperm
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001694 vadduqm $ivec,$out6,$two # next counter value
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001695 vxor $out6,$out6,$rndkey0
1696 le?vperm $in1,$in1,$in1,$inpperm
1697 vxor $out7,$out7,$rndkey0
1698 mtctr $rounds
1699
1700 vcipher $out0,$out0,v24
1701 stvx_u $in0,$x00,$out
1702 le?vperm $in2,$in2,$in2,$inpperm
1703 vcipher $out1,$out1,v24
1704 stvx_u $in1,$x10,$out
1705 le?vperm $in3,$in3,$in3,$inpperm
1706 vcipher $out2,$out2,v24
1707 stvx_u $in2,$x20,$out
1708 le?vperm $in4,$in4,$in4,$inpperm
1709 vcipher $out3,$out3,v24
1710 stvx_u $in3,$x30,$out
1711 le?vperm $in5,$in5,$in5,$inpperm
1712 vcipher $out4,$out4,v24
1713 stvx_u $in4,$x40,$out
1714 le?vperm $in6,$in6,$in6,$inpperm
1715 vcipher $out5,$out5,v24
1716 stvx_u $in5,$x50,$out
1717 le?vperm $in7,$in7,$in7,$inpperm
1718 vcipher $out6,$out6,v24
1719 stvx_u $in6,$x60,$out
1720 vcipher $out7,$out7,v24
1721 stvx_u $in7,$x70,$out
1722 addi $out,$out,0x80
1723
1724 b Loop_ctr32_enc8x_middle
1725
1726.align 5
1727Lctr32_enc8x_break:
1728 cmpwi $len,-0x60
1729 blt Lctr32_enc8x_one
1730 nop
1731 beq Lctr32_enc8x_two
1732 cmpwi $len,-0x40
1733 blt Lctr32_enc8x_three
1734 nop
1735 beq Lctr32_enc8x_four
1736 cmpwi $len,-0x20
1737 blt Lctr32_enc8x_five
1738 nop
1739 beq Lctr32_enc8x_six
1740 cmpwi $len,0x00
1741 blt Lctr32_enc8x_seven
1742
1743Lctr32_enc8x_eight:
1744 vcipherlast $out0,$out0,$in0
1745 vcipherlast $out1,$out1,$in1
1746 vcipherlast $out2,$out2,$in2
1747 vcipherlast $out3,$out3,$in3
1748 vcipherlast $out4,$out4,$in4
1749 vcipherlast $out5,$out5,$in5
1750 vcipherlast $out6,$out6,$in6
1751 vcipherlast $out7,$out7,$in7
1752
1753 le?vperm $out0,$out0,$out0,$inpperm
1754 le?vperm $out1,$out1,$out1,$inpperm
1755 stvx_u $out0,$x00,$out
1756 le?vperm $out2,$out2,$out2,$inpperm
1757 stvx_u $out1,$x10,$out
1758 le?vperm $out3,$out3,$out3,$inpperm
1759 stvx_u $out2,$x20,$out
1760 le?vperm $out4,$out4,$out4,$inpperm
1761 stvx_u $out3,$x30,$out
1762 le?vperm $out5,$out5,$out5,$inpperm
1763 stvx_u $out4,$x40,$out
1764 le?vperm $out6,$out6,$out6,$inpperm
1765 stvx_u $out5,$x50,$out
1766 le?vperm $out7,$out7,$out7,$inpperm
1767 stvx_u $out6,$x60,$out
1768 stvx_u $out7,$x70,$out
1769 addi $out,$out,0x80
1770 b Lctr32_enc8x_done
1771
1772.align 5
1773Lctr32_enc8x_seven:
1774 vcipherlast $out0,$out0,$in1
1775 vcipherlast $out1,$out1,$in2
1776 vcipherlast $out2,$out2,$in3
1777 vcipherlast $out3,$out3,$in4
1778 vcipherlast $out4,$out4,$in5
1779 vcipherlast $out5,$out5,$in6
1780 vcipherlast $out6,$out6,$in7
1781
1782 le?vperm $out0,$out0,$out0,$inpperm
1783 le?vperm $out1,$out1,$out1,$inpperm
1784 stvx_u $out0,$x00,$out
1785 le?vperm $out2,$out2,$out2,$inpperm
1786 stvx_u $out1,$x10,$out
1787 le?vperm $out3,$out3,$out3,$inpperm
1788 stvx_u $out2,$x20,$out
1789 le?vperm $out4,$out4,$out4,$inpperm
1790 stvx_u $out3,$x30,$out
1791 le?vperm $out5,$out5,$out5,$inpperm
1792 stvx_u $out4,$x40,$out
1793 le?vperm $out6,$out6,$out6,$inpperm
1794 stvx_u $out5,$x50,$out
1795 stvx_u $out6,$x60,$out
1796 addi $out,$out,0x70
1797 b Lctr32_enc8x_done
1798
1799.align 5
1800Lctr32_enc8x_six:
1801 vcipherlast $out0,$out0,$in2
1802 vcipherlast $out1,$out1,$in3
1803 vcipherlast $out2,$out2,$in4
1804 vcipherlast $out3,$out3,$in5
1805 vcipherlast $out4,$out4,$in6
1806 vcipherlast $out5,$out5,$in7
1807
1808 le?vperm $out0,$out0,$out0,$inpperm
1809 le?vperm $out1,$out1,$out1,$inpperm
1810 stvx_u $out0,$x00,$out
1811 le?vperm $out2,$out2,$out2,$inpperm
1812 stvx_u $out1,$x10,$out
1813 le?vperm $out3,$out3,$out3,$inpperm
1814 stvx_u $out2,$x20,$out
1815 le?vperm $out4,$out4,$out4,$inpperm
1816 stvx_u $out3,$x30,$out
1817 le?vperm $out5,$out5,$out5,$inpperm
1818 stvx_u $out4,$x40,$out
1819 stvx_u $out5,$x50,$out
1820 addi $out,$out,0x60
1821 b Lctr32_enc8x_done
1822
1823.align 5
1824Lctr32_enc8x_five:
1825 vcipherlast $out0,$out0,$in3
1826 vcipherlast $out1,$out1,$in4
1827 vcipherlast $out2,$out2,$in5
1828 vcipherlast $out3,$out3,$in6
1829 vcipherlast $out4,$out4,$in7
1830
1831 le?vperm $out0,$out0,$out0,$inpperm
1832 le?vperm $out1,$out1,$out1,$inpperm
1833 stvx_u $out0,$x00,$out
1834 le?vperm $out2,$out2,$out2,$inpperm
1835 stvx_u $out1,$x10,$out
1836 le?vperm $out3,$out3,$out3,$inpperm
1837 stvx_u $out2,$x20,$out
1838 le?vperm $out4,$out4,$out4,$inpperm
1839 stvx_u $out3,$x30,$out
1840 stvx_u $out4,$x40,$out
1841 addi $out,$out,0x50
1842 b Lctr32_enc8x_done
1843
1844.align 5
1845Lctr32_enc8x_four:
1846 vcipherlast $out0,$out0,$in4
1847 vcipherlast $out1,$out1,$in5
1848 vcipherlast $out2,$out2,$in6
1849 vcipherlast $out3,$out3,$in7
1850
1851 le?vperm $out0,$out0,$out0,$inpperm
1852 le?vperm $out1,$out1,$out1,$inpperm
1853 stvx_u $out0,$x00,$out
1854 le?vperm $out2,$out2,$out2,$inpperm
1855 stvx_u $out1,$x10,$out
1856 le?vperm $out3,$out3,$out3,$inpperm
1857 stvx_u $out2,$x20,$out
1858 stvx_u $out3,$x30,$out
1859 addi $out,$out,0x40
1860 b Lctr32_enc8x_done
1861
1862.align 5
1863Lctr32_enc8x_three:
1864 vcipherlast $out0,$out0,$in5
1865 vcipherlast $out1,$out1,$in6
1866 vcipherlast $out2,$out2,$in7
1867
1868 le?vperm $out0,$out0,$out0,$inpperm
1869 le?vperm $out1,$out1,$out1,$inpperm
1870 stvx_u $out0,$x00,$out
1871 le?vperm $out2,$out2,$out2,$inpperm
1872 stvx_u $out1,$x10,$out
1873 stvx_u $out2,$x20,$out
1874 addi $out,$out,0x30
Daniel Axtensdcf7b482019-03-15 13:09:01 +11001875 b Lctr32_enc8x_done
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001876
1877.align 5
1878Lctr32_enc8x_two:
1879 vcipherlast $out0,$out0,$in6
1880 vcipherlast $out1,$out1,$in7
1881
1882 le?vperm $out0,$out0,$out0,$inpperm
1883 le?vperm $out1,$out1,$out1,$inpperm
1884 stvx_u $out0,$x00,$out
1885 stvx_u $out1,$x10,$out
1886 addi $out,$out,0x20
Daniel Axtensdcf7b482019-03-15 13:09:01 +11001887 b Lctr32_enc8x_done
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001888
1889.align 5
1890Lctr32_enc8x_one:
1891 vcipherlast $out0,$out0,$in7
1892
1893 le?vperm $out0,$out0,$out0,$inpperm
1894 stvx_u $out0,0,$out
1895 addi $out,$out,0x10
1896
1897Lctr32_enc8x_done:
1898 li r10,`$FRAME+15`
1899 li r11,`$FRAME+31`
1900 stvx $inpperm,r10,$sp # wipe copies of round keys
1901 addi r10,r10,32
1902 stvx $inpperm,r11,$sp
1903 addi r11,r11,32
1904 stvx $inpperm,r10,$sp
1905 addi r10,r10,32
1906 stvx $inpperm,r11,$sp
1907 addi r11,r11,32
1908 stvx $inpperm,r10,$sp
1909 addi r10,r10,32
1910 stvx $inpperm,r11,$sp
1911 addi r11,r11,32
1912 stvx $inpperm,r10,$sp
1913 addi r10,r10,32
1914 stvx $inpperm,r11,$sp
1915 addi r11,r11,32
1916
1917 mtspr 256,$vrsave
1918 lvx v20,r10,$sp # ABI says so
1919 addi r10,r10,32
1920 lvx v21,r11,$sp
1921 addi r11,r11,32
1922 lvx v22,r10,$sp
1923 addi r10,r10,32
1924 lvx v23,r11,$sp
1925 addi r11,r11,32
1926 lvx v24,r10,$sp
1927 addi r10,r10,32
1928 lvx v25,r11,$sp
1929 addi r11,r11,32
1930 lvx v26,r10,$sp
1931 addi r10,r10,32
1932 lvx v27,r11,$sp
1933 addi r11,r11,32
1934 lvx v28,r10,$sp
1935 addi r10,r10,32
1936 lvx v29,r11,$sp
1937 addi r11,r11,32
1938 lvx v30,r10,$sp
1939 lvx v31,r11,$sp
1940 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1941 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1942 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1943 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1944 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1945 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1946 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1947 blr
1948 .long 0
1949 .byte 0,12,0x14,0,0x80,6,6,0
1950 .long 0
1951.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1952___
1953}} }}}
1954
Paulo Flabiano Smorigo11c6e162016-07-18 12:26:25 -03001955#########################################################################
1956{{{ # XTS procedures #
1957# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1958# const AES_KEY *key1, const AES_KEY *key2, #
1959# [const] unsigned char iv[16]); #
1960# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1961# input tweak value is assumed to be encrypted already, and last tweak #
1962# value, one suitable for consecutive call on same chunk of data, is #
1963# written back to original buffer. In addition, in "tweak chaining" #
1964# mode only complete input blocks are processed. #
1965
1966my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1967my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1968my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1969my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1970my $taillen = $key2;
1971
1972 ($inp,$idx) = ($idx,$inp); # reassign
1973
1974$code.=<<___;
1975.globl .${prefix}_xts_encrypt
1976 mr $inp,r3 # reassign
1977 li r3,-1
1978 ${UCMP}i $len,16
1979 bltlr-
1980
1981 lis r0,0xfff0
1982 mfspr r12,256 # save vrsave
1983 li r11,0
1984 mtspr 256,r0
1985
1986 vspltisb $seven,0x07 # 0x070707..07
1987 le?lvsl $leperm,r11,r11
1988 le?vspltisb $tmp,0x0f
1989 le?vxor $leperm,$leperm,$seven
1990
1991 li $idx,15
1992 lvx $tweak,0,$ivp # load [unaligned] iv
1993 lvsl $inpperm,0,$ivp
1994 lvx $inptail,$idx,$ivp
1995 le?vxor $inpperm,$inpperm,$tmp
1996 vperm $tweak,$tweak,$inptail,$inpperm
1997
1998 neg r11,$inp
1999 lvsr $inpperm,0,r11 # prepare for unaligned load
2000 lvx $inout,0,$inp
2001 addi $inp,$inp,15 # 15 is not typo
2002 le?vxor $inpperm,$inpperm,$tmp
2003
2004 ${UCMP}i $key2,0 # key2==NULL?
2005 beq Lxts_enc_no_key2
2006
2007 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2008 lwz $rounds,240($key2)
2009 srwi $rounds,$rounds,1
2010 subi $rounds,$rounds,1
2011 li $idx,16
2012
2013 lvx $rndkey0,0,$key2
2014 lvx $rndkey1,$idx,$key2
2015 addi $idx,$idx,16
2016 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2017 vxor $tweak,$tweak,$rndkey0
2018 lvx $rndkey0,$idx,$key2
2019 addi $idx,$idx,16
2020 mtctr $rounds
2021
2022Ltweak_xts_enc:
2023 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2024 vcipher $tweak,$tweak,$rndkey1
2025 lvx $rndkey1,$idx,$key2
2026 addi $idx,$idx,16
2027 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2028 vcipher $tweak,$tweak,$rndkey0
2029 lvx $rndkey0,$idx,$key2
2030 addi $idx,$idx,16
2031 bdnz Ltweak_xts_enc
2032
2033 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2034 vcipher $tweak,$tweak,$rndkey1
2035 lvx $rndkey1,$idx,$key2
2036 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2037 vcipherlast $tweak,$tweak,$rndkey0
2038
2039 li $ivp,0 # don't chain the tweak
2040 b Lxts_enc
2041
2042Lxts_enc_no_key2:
2043 li $idx,-16
2044 and $len,$len,$idx # in "tweak chaining"
2045 # mode only complete
2046 # blocks are processed
2047Lxts_enc:
2048 lvx $inptail,0,$inp
2049 addi $inp,$inp,16
2050
2051 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2052 lwz $rounds,240($key1)
2053 srwi $rounds,$rounds,1
2054 subi $rounds,$rounds,1
2055 li $idx,16
2056
2057 vslb $eighty7,$seven,$seven # 0x808080..80
2058 vor $eighty7,$eighty7,$seven # 0x878787..87
2059 vspltisb $tmp,1 # 0x010101..01
2060 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2061
2062 ${UCMP}i $len,96
2063 bge _aesp8_xts_encrypt6x
2064
2065 andi. $taillen,$len,15
2066 subic r0,$len,32
2067 subi $taillen,$taillen,16
2068 subfe r0,r0,r0
2069 and r0,r0,$taillen
2070 add $inp,$inp,r0
2071
2072 lvx $rndkey0,0,$key1
2073 lvx $rndkey1,$idx,$key1
2074 addi $idx,$idx,16
2075 vperm $inout,$inout,$inptail,$inpperm
2076 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2077 vxor $inout,$inout,$tweak
2078 vxor $inout,$inout,$rndkey0
2079 lvx $rndkey0,$idx,$key1
2080 addi $idx,$idx,16
2081 mtctr $rounds
2082 b Loop_xts_enc
2083
2084.align 5
2085Loop_xts_enc:
2086 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2087 vcipher $inout,$inout,$rndkey1
2088 lvx $rndkey1,$idx,$key1
2089 addi $idx,$idx,16
2090 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2091 vcipher $inout,$inout,$rndkey0
2092 lvx $rndkey0,$idx,$key1
2093 addi $idx,$idx,16
2094 bdnz Loop_xts_enc
2095
2096 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2097 vcipher $inout,$inout,$rndkey1
2098 lvx $rndkey1,$idx,$key1
2099 li $idx,16
2100 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2101 vxor $rndkey0,$rndkey0,$tweak
2102 vcipherlast $output,$inout,$rndkey0
2103
2104 le?vperm $tmp,$output,$output,$leperm
2105 be?nop
2106 le?stvx_u $tmp,0,$out
2107 be?stvx_u $output,0,$out
2108 addi $out,$out,16
2109
2110 subic. $len,$len,16
2111 beq Lxts_enc_done
2112
2113 vmr $inout,$inptail
2114 lvx $inptail,0,$inp
2115 addi $inp,$inp,16
2116 lvx $rndkey0,0,$key1
2117 lvx $rndkey1,$idx,$key1
2118 addi $idx,$idx,16
2119
2120 subic r0,$len,32
2121 subfe r0,r0,r0
2122 and r0,r0,$taillen
2123 add $inp,$inp,r0
2124
2125 vsrab $tmp,$tweak,$seven # next tweak value
2126 vaddubm $tweak,$tweak,$tweak
2127 vsldoi $tmp,$tmp,$tmp,15
2128 vand $tmp,$tmp,$eighty7
2129 vxor $tweak,$tweak,$tmp
2130
2131 vperm $inout,$inout,$inptail,$inpperm
2132 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2133 vxor $inout,$inout,$tweak
2134 vxor $output,$output,$rndkey0 # just in case $len<16
2135 vxor $inout,$inout,$rndkey0
2136 lvx $rndkey0,$idx,$key1
2137 addi $idx,$idx,16
2138
2139 mtctr $rounds
2140 ${UCMP}i $len,16
2141 bge Loop_xts_enc
2142
2143 vxor $output,$output,$tweak
2144 lvsr $inpperm,0,$len # $inpperm is no longer needed
2145 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2146 vspltisb $tmp,-1
2147 vperm $inptail,$inptail,$tmp,$inpperm
2148 vsel $inout,$inout,$output,$inptail
2149
2150 subi r11,$out,17
2151 subi $out,$out,16
2152 mtctr $len
2153 li $len,16
2154Loop_xts_enc_steal:
2155 lbzu r0,1(r11)
2156 stb r0,16(r11)
2157 bdnz Loop_xts_enc_steal
2158
2159 mtctr $rounds
2160 b Loop_xts_enc # one more time...
2161
2162Lxts_enc_done:
2163 ${UCMP}i $ivp,0
2164 beq Lxts_enc_ret
2165
2166 vsrab $tmp,$tweak,$seven # next tweak value
2167 vaddubm $tweak,$tweak,$tweak
2168 vsldoi $tmp,$tmp,$tmp,15
2169 vand $tmp,$tmp,$eighty7
2170 vxor $tweak,$tweak,$tmp
2171
2172 le?vperm $tweak,$tweak,$tweak,$leperm
2173 stvx_u $tweak,0,$ivp
2174
2175Lxts_enc_ret:
2176 mtspr 256,r12 # restore vrsave
2177 li r3,0
2178 blr
2179 .long 0
2180 .byte 0,12,0x04,0,0x80,6,6,0
2181 .long 0
2182.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2183
2184.globl .${prefix}_xts_decrypt
Paulo Flabiano Smorigo11c6e162016-07-18 12:26:25 -03002185 mr $inp,r3 # reassign
2186 li r3,-1
2187 ${UCMP}i $len,16
2188 bltlr-
2189
2190 lis r0,0xfff8
2191 mfspr r12,256 # save vrsave
2192 li r11,0
2193 mtspr 256,r0
2194
2195 andi. r0,$len,15
2196 neg r0,r0
2197 andi. r0,r0,16
2198 sub $len,$len,r0
2199
2200 vspltisb $seven,0x07 # 0x070707..07
2201 le?lvsl $leperm,r11,r11
2202 le?vspltisb $tmp,0x0f
2203 le?vxor $leperm,$leperm,$seven
2204
2205 li $idx,15
2206 lvx $tweak,0,$ivp # load [unaligned] iv
2207 lvsl $inpperm,0,$ivp
2208 lvx $inptail,$idx,$ivp
2209 le?vxor $inpperm,$inpperm,$tmp
2210 vperm $tweak,$tweak,$inptail,$inpperm
2211
2212 neg r11,$inp
2213 lvsr $inpperm,0,r11 # prepare for unaligned load
2214 lvx $inout,0,$inp
2215 addi $inp,$inp,15 # 15 is not typo
2216 le?vxor $inpperm,$inpperm,$tmp
2217
2218 ${UCMP}i $key2,0 # key2==NULL?
2219 beq Lxts_dec_no_key2
2220
2221 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2222 lwz $rounds,240($key2)
2223 srwi $rounds,$rounds,1
2224 subi $rounds,$rounds,1
2225 li $idx,16
2226
2227 lvx $rndkey0,0,$key2
2228 lvx $rndkey1,$idx,$key2
2229 addi $idx,$idx,16
2230 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2231 vxor $tweak,$tweak,$rndkey0
2232 lvx $rndkey0,$idx,$key2
2233 addi $idx,$idx,16
2234 mtctr $rounds
2235
2236Ltweak_xts_dec:
2237 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2238 vcipher $tweak,$tweak,$rndkey1
2239 lvx $rndkey1,$idx,$key2
2240 addi $idx,$idx,16
2241 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2242 vcipher $tweak,$tweak,$rndkey0
2243 lvx $rndkey0,$idx,$key2
2244 addi $idx,$idx,16
2245 bdnz Ltweak_xts_dec
2246
2247 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2248 vcipher $tweak,$tweak,$rndkey1
2249 lvx $rndkey1,$idx,$key2
2250 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2251 vcipherlast $tweak,$tweak,$rndkey0
2252
2253 li $ivp,0 # don't chain the tweak
2254 b Lxts_dec
2255
2256Lxts_dec_no_key2:
2257 neg $idx,$len
2258 andi. $idx,$idx,15
2259 add $len,$len,$idx # in "tweak chaining"
2260 # mode only complete
2261 # blocks are processed
2262Lxts_dec:
2263 lvx $inptail,0,$inp
2264 addi $inp,$inp,16
2265
2266 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2267 lwz $rounds,240($key1)
2268 srwi $rounds,$rounds,1
2269 subi $rounds,$rounds,1
2270 li $idx,16
2271
2272 vslb $eighty7,$seven,$seven # 0x808080..80
2273 vor $eighty7,$eighty7,$seven # 0x878787..87
2274 vspltisb $tmp,1 # 0x010101..01
2275 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2276
2277 ${UCMP}i $len,96
2278 bge _aesp8_xts_decrypt6x
2279
2280 lvx $rndkey0,0,$key1
2281 lvx $rndkey1,$idx,$key1
2282 addi $idx,$idx,16
2283 vperm $inout,$inout,$inptail,$inpperm
2284 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2285 vxor $inout,$inout,$tweak
2286 vxor $inout,$inout,$rndkey0
2287 lvx $rndkey0,$idx,$key1
2288 addi $idx,$idx,16
2289 mtctr $rounds
2290
2291 ${UCMP}i $len,16
2292 blt Ltail_xts_dec
2293 be?b Loop_xts_dec
2294
2295.align 5
2296Loop_xts_dec:
2297 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2298 vncipher $inout,$inout,$rndkey1
2299 lvx $rndkey1,$idx,$key1
2300 addi $idx,$idx,16
2301 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2302 vncipher $inout,$inout,$rndkey0
2303 lvx $rndkey0,$idx,$key1
2304 addi $idx,$idx,16
2305 bdnz Loop_xts_dec
2306
2307 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2308 vncipher $inout,$inout,$rndkey1
2309 lvx $rndkey1,$idx,$key1
2310 li $idx,16
2311 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2312 vxor $rndkey0,$rndkey0,$tweak
2313 vncipherlast $output,$inout,$rndkey0
2314
2315 le?vperm $tmp,$output,$output,$leperm
2316 be?nop
2317 le?stvx_u $tmp,0,$out
2318 be?stvx_u $output,0,$out
2319 addi $out,$out,16
2320
2321 subic. $len,$len,16
2322 beq Lxts_dec_done
2323
2324 vmr $inout,$inptail
2325 lvx $inptail,0,$inp
2326 addi $inp,$inp,16
2327 lvx $rndkey0,0,$key1
2328 lvx $rndkey1,$idx,$key1
2329 addi $idx,$idx,16
2330
2331 vsrab $tmp,$tweak,$seven # next tweak value
2332 vaddubm $tweak,$tweak,$tweak
2333 vsldoi $tmp,$tmp,$tmp,15
2334 vand $tmp,$tmp,$eighty7
2335 vxor $tweak,$tweak,$tmp
2336
2337 vperm $inout,$inout,$inptail,$inpperm
2338 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2339 vxor $inout,$inout,$tweak
2340 vxor $inout,$inout,$rndkey0
2341 lvx $rndkey0,$idx,$key1
2342 addi $idx,$idx,16
2343
2344 mtctr $rounds
2345 ${UCMP}i $len,16
2346 bge Loop_xts_dec
2347
2348Ltail_xts_dec:
2349 vsrab $tmp,$tweak,$seven # next tweak value
2350 vaddubm $tweak1,$tweak,$tweak
2351 vsldoi $tmp,$tmp,$tmp,15
2352 vand $tmp,$tmp,$eighty7
2353 vxor $tweak1,$tweak1,$tmp
2354
2355 subi $inp,$inp,16
2356 add $inp,$inp,$len
2357
2358 vxor $inout,$inout,$tweak # :-(
2359 vxor $inout,$inout,$tweak1 # :-)
2360
2361Loop_xts_dec_short:
2362 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2363 vncipher $inout,$inout,$rndkey1
2364 lvx $rndkey1,$idx,$key1
2365 addi $idx,$idx,16
2366 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2367 vncipher $inout,$inout,$rndkey0
2368 lvx $rndkey0,$idx,$key1
2369 addi $idx,$idx,16
2370 bdnz Loop_xts_dec_short
2371
2372 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2373 vncipher $inout,$inout,$rndkey1
2374 lvx $rndkey1,$idx,$key1
2375 li $idx,16
2376 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2377 vxor $rndkey0,$rndkey0,$tweak1
2378 vncipherlast $output,$inout,$rndkey0
2379
2380 le?vperm $tmp,$output,$output,$leperm
2381 be?nop
2382 le?stvx_u $tmp,0,$out
2383 be?stvx_u $output,0,$out
2384
2385 vmr $inout,$inptail
2386 lvx $inptail,0,$inp
2387 #addi $inp,$inp,16
2388 lvx $rndkey0,0,$key1
2389 lvx $rndkey1,$idx,$key1
2390 addi $idx,$idx,16
2391 vperm $inout,$inout,$inptail,$inpperm
2392 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2393
2394 lvsr $inpperm,0,$len # $inpperm is no longer needed
2395 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2396 vspltisb $tmp,-1
2397 vperm $inptail,$inptail,$tmp,$inpperm
2398 vsel $inout,$inout,$output,$inptail
2399
2400 vxor $rndkey0,$rndkey0,$tweak
2401 vxor $inout,$inout,$rndkey0
2402 lvx $rndkey0,$idx,$key1
2403 addi $idx,$idx,16
2404
2405 subi r11,$out,1
2406 mtctr $len
2407 li $len,16
2408Loop_xts_dec_steal:
2409 lbzu r0,1(r11)
2410 stb r0,16(r11)
2411 bdnz Loop_xts_dec_steal
2412
2413 mtctr $rounds
2414 b Loop_xts_dec # one more time...
2415
2416Lxts_dec_done:
2417 ${UCMP}i $ivp,0
2418 beq Lxts_dec_ret
2419
2420 vsrab $tmp,$tweak,$seven # next tweak value
2421 vaddubm $tweak,$tweak,$tweak
2422 vsldoi $tmp,$tmp,$tmp,15
2423 vand $tmp,$tmp,$eighty7
2424 vxor $tweak,$tweak,$tmp
2425
2426 le?vperm $tweak,$tweak,$tweak,$leperm
2427 stvx_u $tweak,0,$ivp
2428
2429Lxts_dec_ret:
2430 mtspr 256,r12 # restore vrsave
2431 li r3,0
2432 blr
2433 .long 0
2434 .byte 0,12,0x04,0,0x80,6,6,0
2435 .long 0
2436.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2437___
2438#########################################################################
2439{{ # Optimized XTS procedures #
2440my $key_=$key2;
2441my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2442 $x00=0 if ($flavour =~ /osx/);
2443my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2444my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2445my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2446my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2447 # v26-v31 last 6 round keys
2448my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2449my $taillen=$x70;
2450
2451$code.=<<___;
2452.align 5
2453_aesp8_xts_encrypt6x:
2454 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2455 mflr r11
2456 li r7,`$FRAME+8*16+15`
2457 li r3,`$FRAME+8*16+31`
2458 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2459 stvx v20,r7,$sp # ABI says so
2460 addi r7,r7,32
2461 stvx v21,r3,$sp
2462 addi r3,r3,32
2463 stvx v22,r7,$sp
2464 addi r7,r7,32
2465 stvx v23,r3,$sp
2466 addi r3,r3,32
2467 stvx v24,r7,$sp
2468 addi r7,r7,32
2469 stvx v25,r3,$sp
2470 addi r3,r3,32
2471 stvx v26,r7,$sp
2472 addi r7,r7,32
2473 stvx v27,r3,$sp
2474 addi r3,r3,32
2475 stvx v28,r7,$sp
2476 addi r7,r7,32
2477 stvx v29,r3,$sp
2478 addi r3,r3,32
2479 stvx v30,r7,$sp
2480 stvx v31,r3,$sp
2481 li r0,-1
2482 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2483 li $x10,0x10
2484 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2485 li $x20,0x20
2486 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2487 li $x30,0x30
2488 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2489 li $x40,0x40
2490 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2491 li $x50,0x50
2492 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2493 li $x60,0x60
2494 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2495 li $x70,0x70
2496 mtspr 256,r0
2497
2498 subi $rounds,$rounds,3 # -4 in total
2499
2500 lvx $rndkey0,$x00,$key1 # load key schedule
2501 lvx v30,$x10,$key1
2502 addi $key1,$key1,0x20
2503 lvx v31,$x00,$key1
2504 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2505 addi $key_,$sp,$FRAME+15
2506 mtctr $rounds
2507
2508Load_xts_enc_key:
2509 ?vperm v24,v30,v31,$keyperm
2510 lvx v30,$x10,$key1
2511 addi $key1,$key1,0x20
2512 stvx v24,$x00,$key_ # off-load round[1]
2513 ?vperm v25,v31,v30,$keyperm
2514 lvx v31,$x00,$key1
2515 stvx v25,$x10,$key_ # off-load round[2]
2516 addi $key_,$key_,0x20
2517 bdnz Load_xts_enc_key
2518
2519 lvx v26,$x10,$key1
2520 ?vperm v24,v30,v31,$keyperm
2521 lvx v27,$x20,$key1
2522 stvx v24,$x00,$key_ # off-load round[3]
2523 ?vperm v25,v31,v26,$keyperm
2524 lvx v28,$x30,$key1
2525 stvx v25,$x10,$key_ # off-load round[4]
2526 addi $key_,$sp,$FRAME+15 # rewind $key_
2527 ?vperm v26,v26,v27,$keyperm
2528 lvx v29,$x40,$key1
2529 ?vperm v27,v27,v28,$keyperm
2530 lvx v30,$x50,$key1
2531 ?vperm v28,v28,v29,$keyperm
2532 lvx v31,$x60,$key1
2533 ?vperm v29,v29,v30,$keyperm
2534 lvx $twk5,$x70,$key1 # borrow $twk5
2535 ?vperm v30,v30,v31,$keyperm
2536 lvx v24,$x00,$key_ # pre-load round[1]
2537 ?vperm v31,v31,$twk5,$keyperm
2538 lvx v25,$x10,$key_ # pre-load round[2]
2539
2540 vperm $in0,$inout,$inptail,$inpperm
2541 subi $inp,$inp,31 # undo "caller"
2542 vxor $twk0,$tweak,$rndkey0
2543 vsrab $tmp,$tweak,$seven # next tweak value
2544 vaddubm $tweak,$tweak,$tweak
2545 vsldoi $tmp,$tmp,$tmp,15
2546 vand $tmp,$tmp,$eighty7
2547 vxor $out0,$in0,$twk0
2548 vxor $tweak,$tweak,$tmp
2549
2550 lvx_u $in1,$x10,$inp
2551 vxor $twk1,$tweak,$rndkey0
2552 vsrab $tmp,$tweak,$seven # next tweak value
2553 vaddubm $tweak,$tweak,$tweak
2554 vsldoi $tmp,$tmp,$tmp,15
2555 le?vperm $in1,$in1,$in1,$leperm
2556 vand $tmp,$tmp,$eighty7
2557 vxor $out1,$in1,$twk1
2558 vxor $tweak,$tweak,$tmp
2559
2560 lvx_u $in2,$x20,$inp
2561 andi. $taillen,$len,15
2562 vxor $twk2,$tweak,$rndkey0
2563 vsrab $tmp,$tweak,$seven # next tweak value
2564 vaddubm $tweak,$tweak,$tweak
2565 vsldoi $tmp,$tmp,$tmp,15
2566 le?vperm $in2,$in2,$in2,$leperm
2567 vand $tmp,$tmp,$eighty7
2568 vxor $out2,$in2,$twk2
2569 vxor $tweak,$tweak,$tmp
2570
2571 lvx_u $in3,$x30,$inp
2572 sub $len,$len,$taillen
2573 vxor $twk3,$tweak,$rndkey0
2574 vsrab $tmp,$tweak,$seven # next tweak value
2575 vaddubm $tweak,$tweak,$tweak
2576 vsldoi $tmp,$tmp,$tmp,15
2577 le?vperm $in3,$in3,$in3,$leperm
2578 vand $tmp,$tmp,$eighty7
2579 vxor $out3,$in3,$twk3
2580 vxor $tweak,$tweak,$tmp
2581
2582 lvx_u $in4,$x40,$inp
2583 subi $len,$len,0x60
2584 vxor $twk4,$tweak,$rndkey0
2585 vsrab $tmp,$tweak,$seven # next tweak value
2586 vaddubm $tweak,$tweak,$tweak
2587 vsldoi $tmp,$tmp,$tmp,15
2588 le?vperm $in4,$in4,$in4,$leperm
2589 vand $tmp,$tmp,$eighty7
2590 vxor $out4,$in4,$twk4
2591 vxor $tweak,$tweak,$tmp
2592
2593 lvx_u $in5,$x50,$inp
2594 addi $inp,$inp,0x60
2595 vxor $twk5,$tweak,$rndkey0
2596 vsrab $tmp,$tweak,$seven # next tweak value
2597 vaddubm $tweak,$tweak,$tweak
2598 vsldoi $tmp,$tmp,$tmp,15
2599 le?vperm $in5,$in5,$in5,$leperm
2600 vand $tmp,$tmp,$eighty7
2601 vxor $out5,$in5,$twk5
2602 vxor $tweak,$tweak,$tmp
2603
2604 vxor v31,v31,$rndkey0
2605 mtctr $rounds
2606 b Loop_xts_enc6x
2607
2608.align 5
2609Loop_xts_enc6x:
2610 vcipher $out0,$out0,v24
2611 vcipher $out1,$out1,v24
2612 vcipher $out2,$out2,v24
2613 vcipher $out3,$out3,v24
2614 vcipher $out4,$out4,v24
2615 vcipher $out5,$out5,v24
2616 lvx v24,$x20,$key_ # round[3]
2617 addi $key_,$key_,0x20
2618
2619 vcipher $out0,$out0,v25
2620 vcipher $out1,$out1,v25
2621 vcipher $out2,$out2,v25
2622 vcipher $out3,$out3,v25
2623 vcipher $out4,$out4,v25
2624 vcipher $out5,$out5,v25
2625 lvx v25,$x10,$key_ # round[4]
2626 bdnz Loop_xts_enc6x
2627
2628 subic $len,$len,96 # $len-=96
2629 vxor $in0,$twk0,v31 # xor with last round key
2630 vcipher $out0,$out0,v24
2631 vcipher $out1,$out1,v24
2632 vsrab $tmp,$tweak,$seven # next tweak value
2633 vxor $twk0,$tweak,$rndkey0
2634 vaddubm $tweak,$tweak,$tweak
2635 vcipher $out2,$out2,v24
2636 vcipher $out3,$out3,v24
2637 vsldoi $tmp,$tmp,$tmp,15
2638 vcipher $out4,$out4,v24
2639 vcipher $out5,$out5,v24
2640
2641 subfe. r0,r0,r0 # borrow?-1:0
2642 vand $tmp,$tmp,$eighty7
2643 vcipher $out0,$out0,v25
2644 vcipher $out1,$out1,v25
2645 vxor $tweak,$tweak,$tmp
2646 vcipher $out2,$out2,v25
2647 vcipher $out3,$out3,v25
2648 vxor $in1,$twk1,v31
2649 vsrab $tmp,$tweak,$seven # next tweak value
2650 vxor $twk1,$tweak,$rndkey0
2651 vcipher $out4,$out4,v25
2652 vcipher $out5,$out5,v25
2653
2654 and r0,r0,$len
2655 vaddubm $tweak,$tweak,$tweak
2656 vsldoi $tmp,$tmp,$tmp,15
2657 vcipher $out0,$out0,v26
2658 vcipher $out1,$out1,v26
2659 vand $tmp,$tmp,$eighty7
2660 vcipher $out2,$out2,v26
2661 vcipher $out3,$out3,v26
2662 vxor $tweak,$tweak,$tmp
2663 vcipher $out4,$out4,v26
2664 vcipher $out5,$out5,v26
2665
2666 add $inp,$inp,r0 # $inp is adjusted in such
2667 # way that at exit from the
2668 # loop inX-in5 are loaded
2669 # with last "words"
2670 vxor $in2,$twk2,v31
2671 vsrab $tmp,$tweak,$seven # next tweak value
2672 vxor $twk2,$tweak,$rndkey0
2673 vaddubm $tweak,$tweak,$tweak
2674 vcipher $out0,$out0,v27
2675 vcipher $out1,$out1,v27
2676 vsldoi $tmp,$tmp,$tmp,15
2677 vcipher $out2,$out2,v27
2678 vcipher $out3,$out3,v27
2679 vand $tmp,$tmp,$eighty7
2680 vcipher $out4,$out4,v27
2681 vcipher $out5,$out5,v27
2682
2683 addi $key_,$sp,$FRAME+15 # rewind $key_
2684 vxor $tweak,$tweak,$tmp
2685 vcipher $out0,$out0,v28
2686 vcipher $out1,$out1,v28
2687 vxor $in3,$twk3,v31
2688 vsrab $tmp,$tweak,$seven # next tweak value
2689 vxor $twk3,$tweak,$rndkey0
2690 vcipher $out2,$out2,v28
2691 vcipher $out3,$out3,v28
2692 vaddubm $tweak,$tweak,$tweak
2693 vsldoi $tmp,$tmp,$tmp,15
2694 vcipher $out4,$out4,v28
2695 vcipher $out5,$out5,v28
2696 lvx v24,$x00,$key_ # re-pre-load round[1]
2697 vand $tmp,$tmp,$eighty7
2698
2699 vcipher $out0,$out0,v29
2700 vcipher $out1,$out1,v29
2701 vxor $tweak,$tweak,$tmp
2702 vcipher $out2,$out2,v29
2703 vcipher $out3,$out3,v29
2704 vxor $in4,$twk4,v31
2705 vsrab $tmp,$tweak,$seven # next tweak value
2706 vxor $twk4,$tweak,$rndkey0
2707 vcipher $out4,$out4,v29
2708 vcipher $out5,$out5,v29
2709 lvx v25,$x10,$key_ # re-pre-load round[2]
2710 vaddubm $tweak,$tweak,$tweak
2711 vsldoi $tmp,$tmp,$tmp,15
2712
2713 vcipher $out0,$out0,v30
2714 vcipher $out1,$out1,v30
2715 vand $tmp,$tmp,$eighty7
2716 vcipher $out2,$out2,v30
2717 vcipher $out3,$out3,v30
2718 vxor $tweak,$tweak,$tmp
2719 vcipher $out4,$out4,v30
2720 vcipher $out5,$out5,v30
2721 vxor $in5,$twk5,v31
2722 vsrab $tmp,$tweak,$seven # next tweak value
2723 vxor $twk5,$tweak,$rndkey0
2724
2725 vcipherlast $out0,$out0,$in0
2726 lvx_u $in0,$x00,$inp # load next input block
2727 vaddubm $tweak,$tweak,$tweak
2728 vsldoi $tmp,$tmp,$tmp,15
2729 vcipherlast $out1,$out1,$in1
2730 lvx_u $in1,$x10,$inp
2731 vcipherlast $out2,$out2,$in2
2732 le?vperm $in0,$in0,$in0,$leperm
2733 lvx_u $in2,$x20,$inp
2734 vand $tmp,$tmp,$eighty7
2735 vcipherlast $out3,$out3,$in3
2736 le?vperm $in1,$in1,$in1,$leperm
2737 lvx_u $in3,$x30,$inp
2738 vcipherlast $out4,$out4,$in4
2739 le?vperm $in2,$in2,$in2,$leperm
2740 lvx_u $in4,$x40,$inp
2741 vxor $tweak,$tweak,$tmp
2742 vcipherlast $tmp,$out5,$in5 # last block might be needed
2743 # in stealing mode
2744 le?vperm $in3,$in3,$in3,$leperm
2745 lvx_u $in5,$x50,$inp
2746 addi $inp,$inp,0x60
2747 le?vperm $in4,$in4,$in4,$leperm
2748 le?vperm $in5,$in5,$in5,$leperm
2749
2750 le?vperm $out0,$out0,$out0,$leperm
2751 le?vperm $out1,$out1,$out1,$leperm
2752 stvx_u $out0,$x00,$out # store output
2753 vxor $out0,$in0,$twk0
2754 le?vperm $out2,$out2,$out2,$leperm
2755 stvx_u $out1,$x10,$out
2756 vxor $out1,$in1,$twk1
2757 le?vperm $out3,$out3,$out3,$leperm
2758 stvx_u $out2,$x20,$out
2759 vxor $out2,$in2,$twk2
2760 le?vperm $out4,$out4,$out4,$leperm
2761 stvx_u $out3,$x30,$out
2762 vxor $out3,$in3,$twk3
2763 le?vperm $out5,$tmp,$tmp,$leperm
2764 stvx_u $out4,$x40,$out
2765 vxor $out4,$in4,$twk4
2766 le?stvx_u $out5,$x50,$out
2767 be?stvx_u $tmp, $x50,$out
2768 vxor $out5,$in5,$twk5
2769 addi $out,$out,0x60
2770
2771 mtctr $rounds
2772 beq Loop_xts_enc6x # did $len-=96 borrow?
2773
2774 addic. $len,$len,0x60
2775 beq Lxts_enc6x_zero
2776 cmpwi $len,0x20
2777 blt Lxts_enc6x_one
2778 nop
2779 beq Lxts_enc6x_two
2780 cmpwi $len,0x40
2781 blt Lxts_enc6x_three
2782 nop
2783 beq Lxts_enc6x_four
2784
2785Lxts_enc6x_five:
2786 vxor $out0,$in1,$twk0
2787 vxor $out1,$in2,$twk1
2788 vxor $out2,$in3,$twk2
2789 vxor $out3,$in4,$twk3
2790 vxor $out4,$in5,$twk4
2791
2792 bl _aesp8_xts_enc5x
2793
2794 le?vperm $out0,$out0,$out0,$leperm
2795 vmr $twk0,$twk5 # unused tweak
2796 le?vperm $out1,$out1,$out1,$leperm
2797 stvx_u $out0,$x00,$out # store output
2798 le?vperm $out2,$out2,$out2,$leperm
2799 stvx_u $out1,$x10,$out
2800 le?vperm $out3,$out3,$out3,$leperm
2801 stvx_u $out2,$x20,$out
2802 vxor $tmp,$out4,$twk5 # last block prep for stealing
2803 le?vperm $out4,$out4,$out4,$leperm
2804 stvx_u $out3,$x30,$out
2805 stvx_u $out4,$x40,$out
2806 addi $out,$out,0x50
2807 bne Lxts_enc6x_steal
2808 b Lxts_enc6x_done
2809
2810.align 4
2811Lxts_enc6x_four:
2812 vxor $out0,$in2,$twk0
2813 vxor $out1,$in3,$twk1
2814 vxor $out2,$in4,$twk2
2815 vxor $out3,$in5,$twk3
2816 vxor $out4,$out4,$out4
2817
2818 bl _aesp8_xts_enc5x
2819
2820 le?vperm $out0,$out0,$out0,$leperm
2821 vmr $twk0,$twk4 # unused tweak
2822 le?vperm $out1,$out1,$out1,$leperm
2823 stvx_u $out0,$x00,$out # store output
2824 le?vperm $out2,$out2,$out2,$leperm
2825 stvx_u $out1,$x10,$out
2826 vxor $tmp,$out3,$twk4 # last block prep for stealing
2827 le?vperm $out3,$out3,$out3,$leperm
2828 stvx_u $out2,$x20,$out
2829 stvx_u $out3,$x30,$out
2830 addi $out,$out,0x40
2831 bne Lxts_enc6x_steal
2832 b Lxts_enc6x_done
2833
2834.align 4
2835Lxts_enc6x_three:
2836 vxor $out0,$in3,$twk0
2837 vxor $out1,$in4,$twk1
2838 vxor $out2,$in5,$twk2
2839 vxor $out3,$out3,$out3
2840 vxor $out4,$out4,$out4
2841
2842 bl _aesp8_xts_enc5x
2843
2844 le?vperm $out0,$out0,$out0,$leperm
2845 vmr $twk0,$twk3 # unused tweak
2846 le?vperm $out1,$out1,$out1,$leperm
2847 stvx_u $out0,$x00,$out # store output
2848 vxor $tmp,$out2,$twk3 # last block prep for stealing
2849 le?vperm $out2,$out2,$out2,$leperm
2850 stvx_u $out1,$x10,$out
2851 stvx_u $out2,$x20,$out
2852 addi $out,$out,0x30
2853 bne Lxts_enc6x_steal
2854 b Lxts_enc6x_done
2855
2856.align 4
2857Lxts_enc6x_two:
2858 vxor $out0,$in4,$twk0
2859 vxor $out1,$in5,$twk1
2860 vxor $out2,$out2,$out2
2861 vxor $out3,$out3,$out3
2862 vxor $out4,$out4,$out4
2863
2864 bl _aesp8_xts_enc5x
2865
2866 le?vperm $out0,$out0,$out0,$leperm
2867 vmr $twk0,$twk2 # unused tweak
2868 vxor $tmp,$out1,$twk2 # last block prep for stealing
2869 le?vperm $out1,$out1,$out1,$leperm
2870 stvx_u $out0,$x00,$out # store output
2871 stvx_u $out1,$x10,$out
2872 addi $out,$out,0x20
2873 bne Lxts_enc6x_steal
2874 b Lxts_enc6x_done
2875
2876.align 4
2877Lxts_enc6x_one:
2878 vxor $out0,$in5,$twk0
2879 nop
2880Loop_xts_enc1x:
2881 vcipher $out0,$out0,v24
2882 lvx v24,$x20,$key_ # round[3]
2883 addi $key_,$key_,0x20
2884
2885 vcipher $out0,$out0,v25
2886 lvx v25,$x10,$key_ # round[4]
2887 bdnz Loop_xts_enc1x
2888
2889 add $inp,$inp,$taillen
2890 cmpwi $taillen,0
2891 vcipher $out0,$out0,v24
2892
2893 subi $inp,$inp,16
2894 vcipher $out0,$out0,v25
2895
2896 lvsr $inpperm,0,$taillen
2897 vcipher $out0,$out0,v26
2898
2899 lvx_u $in0,0,$inp
2900 vcipher $out0,$out0,v27
2901
2902 addi $key_,$sp,$FRAME+15 # rewind $key_
2903 vcipher $out0,$out0,v28
2904 lvx v24,$x00,$key_ # re-pre-load round[1]
2905
2906 vcipher $out0,$out0,v29
2907 lvx v25,$x10,$key_ # re-pre-load round[2]
2908 vxor $twk0,$twk0,v31
2909
2910 le?vperm $in0,$in0,$in0,$leperm
2911 vcipher $out0,$out0,v30
2912
2913 vperm $in0,$in0,$in0,$inpperm
2914 vcipherlast $out0,$out0,$twk0
2915
2916 vmr $twk0,$twk1 # unused tweak
2917 vxor $tmp,$out0,$twk1 # last block prep for stealing
2918 le?vperm $out0,$out0,$out0,$leperm
2919 stvx_u $out0,$x00,$out # store output
2920 addi $out,$out,0x10
2921 bne Lxts_enc6x_steal
2922 b Lxts_enc6x_done
2923
2924.align 4
2925Lxts_enc6x_zero:
2926 cmpwi $taillen,0
2927 beq Lxts_enc6x_done
2928
2929 add $inp,$inp,$taillen
2930 subi $inp,$inp,16
2931 lvx_u $in0,0,$inp
2932 lvsr $inpperm,0,$taillen # $in5 is no more
2933 le?vperm $in0,$in0,$in0,$leperm
2934 vperm $in0,$in0,$in0,$inpperm
2935 vxor $tmp,$tmp,$twk0
2936Lxts_enc6x_steal:
2937 vxor $in0,$in0,$twk0
2938 vxor $out0,$out0,$out0
2939 vspltisb $out1,-1
2940 vperm $out0,$out0,$out1,$inpperm
2941 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2942
2943 subi r30,$out,17
2944 subi $out,$out,16
2945 mtctr $taillen
2946Loop_xts_enc6x_steal:
2947 lbzu r0,1(r30)
2948 stb r0,16(r30)
2949 bdnz Loop_xts_enc6x_steal
2950
2951 li $taillen,0
2952 mtctr $rounds
2953 b Loop_xts_enc1x # one more time...
2954
2955.align 4
2956Lxts_enc6x_done:
2957 ${UCMP}i $ivp,0
2958 beq Lxts_enc6x_ret
2959
2960 vxor $tweak,$twk0,$rndkey0
2961 le?vperm $tweak,$tweak,$tweak,$leperm
2962 stvx_u $tweak,0,$ivp
2963
2964Lxts_enc6x_ret:
2965 mtlr r11
2966 li r10,`$FRAME+15`
2967 li r11,`$FRAME+31`
2968 stvx $seven,r10,$sp # wipe copies of round keys
2969 addi r10,r10,32
2970 stvx $seven,r11,$sp
2971 addi r11,r11,32
2972 stvx $seven,r10,$sp
2973 addi r10,r10,32
2974 stvx $seven,r11,$sp
2975 addi r11,r11,32
2976 stvx $seven,r10,$sp
2977 addi r10,r10,32
2978 stvx $seven,r11,$sp
2979 addi r11,r11,32
2980 stvx $seven,r10,$sp
2981 addi r10,r10,32
2982 stvx $seven,r11,$sp
2983 addi r11,r11,32
2984
2985 mtspr 256,$vrsave
2986 lvx v20,r10,$sp # ABI says so
2987 addi r10,r10,32
2988 lvx v21,r11,$sp
2989 addi r11,r11,32
2990 lvx v22,r10,$sp
2991 addi r10,r10,32
2992 lvx v23,r11,$sp
2993 addi r11,r11,32
2994 lvx v24,r10,$sp
2995 addi r10,r10,32
2996 lvx v25,r11,$sp
2997 addi r11,r11,32
2998 lvx v26,r10,$sp
2999 addi r10,r10,32
3000 lvx v27,r11,$sp
3001 addi r11,r11,32
3002 lvx v28,r10,$sp
3003 addi r10,r10,32
3004 lvx v29,r11,$sp
3005 addi r11,r11,32
3006 lvx v30,r10,$sp
3007 lvx v31,r11,$sp
3008 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3009 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3010 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3011 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3012 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3013 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3014 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3015 blr
3016 .long 0
3017 .byte 0,12,0x04,1,0x80,6,6,0
3018 .long 0
3019
3020.align 5
3021_aesp8_xts_enc5x:
3022 vcipher $out0,$out0,v24
3023 vcipher $out1,$out1,v24
3024 vcipher $out2,$out2,v24
3025 vcipher $out3,$out3,v24
3026 vcipher $out4,$out4,v24
3027 lvx v24,$x20,$key_ # round[3]
3028 addi $key_,$key_,0x20
3029
3030 vcipher $out0,$out0,v25
3031 vcipher $out1,$out1,v25
3032 vcipher $out2,$out2,v25
3033 vcipher $out3,$out3,v25
3034 vcipher $out4,$out4,v25
3035 lvx v25,$x10,$key_ # round[4]
3036 bdnz _aesp8_xts_enc5x
3037
3038 add $inp,$inp,$taillen
3039 cmpwi $taillen,0
3040 vcipher $out0,$out0,v24
3041 vcipher $out1,$out1,v24
3042 vcipher $out2,$out2,v24
3043 vcipher $out3,$out3,v24
3044 vcipher $out4,$out4,v24
3045
3046 subi $inp,$inp,16
3047 vcipher $out0,$out0,v25
3048 vcipher $out1,$out1,v25
3049 vcipher $out2,$out2,v25
3050 vcipher $out3,$out3,v25
3051 vcipher $out4,$out4,v25
3052 vxor $twk0,$twk0,v31
3053
3054 vcipher $out0,$out0,v26
3055 lvsr $inpperm,r0,$taillen # $in5 is no more
3056 vcipher $out1,$out1,v26
3057 vcipher $out2,$out2,v26
3058 vcipher $out3,$out3,v26
3059 vcipher $out4,$out4,v26
3060 vxor $in1,$twk1,v31
3061
3062 vcipher $out0,$out0,v27
3063 lvx_u $in0,0,$inp
3064 vcipher $out1,$out1,v27
3065 vcipher $out2,$out2,v27
3066 vcipher $out3,$out3,v27
3067 vcipher $out4,$out4,v27
3068 vxor $in2,$twk2,v31
3069
3070 addi $key_,$sp,$FRAME+15 # rewind $key_
3071 vcipher $out0,$out0,v28
3072 vcipher $out1,$out1,v28
3073 vcipher $out2,$out2,v28
3074 vcipher $out3,$out3,v28
3075 vcipher $out4,$out4,v28
3076 lvx v24,$x00,$key_ # re-pre-load round[1]
3077 vxor $in3,$twk3,v31
3078
3079 vcipher $out0,$out0,v29
3080 le?vperm $in0,$in0,$in0,$leperm
3081 vcipher $out1,$out1,v29
3082 vcipher $out2,$out2,v29
3083 vcipher $out3,$out3,v29
3084 vcipher $out4,$out4,v29
3085 lvx v25,$x10,$key_ # re-pre-load round[2]
3086 vxor $in4,$twk4,v31
3087
3088 vcipher $out0,$out0,v30
3089 vperm $in0,$in0,$in0,$inpperm
3090 vcipher $out1,$out1,v30
3091 vcipher $out2,$out2,v30
3092 vcipher $out3,$out3,v30
3093 vcipher $out4,$out4,v30
3094
3095 vcipherlast $out0,$out0,$twk0
3096 vcipherlast $out1,$out1,$in1
3097 vcipherlast $out2,$out2,$in2
3098 vcipherlast $out3,$out3,$in3
3099 vcipherlast $out4,$out4,$in4
3100 blr
3101 .long 0
3102 .byte 0,12,0x14,0,0,0,0,0
3103
3104.align 5
3105_aesp8_xts_decrypt6x:
3106 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3107 mflr r11
3108 li r7,`$FRAME+8*16+15`
3109 li r3,`$FRAME+8*16+31`
3110 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3111 stvx v20,r7,$sp # ABI says so
3112 addi r7,r7,32
3113 stvx v21,r3,$sp
3114 addi r3,r3,32
3115 stvx v22,r7,$sp
3116 addi r7,r7,32
3117 stvx v23,r3,$sp
3118 addi r3,r3,32
3119 stvx v24,r7,$sp
3120 addi r7,r7,32
3121 stvx v25,r3,$sp
3122 addi r3,r3,32
3123 stvx v26,r7,$sp
3124 addi r7,r7,32
3125 stvx v27,r3,$sp
3126 addi r3,r3,32
3127 stvx v28,r7,$sp
3128 addi r7,r7,32
3129 stvx v29,r3,$sp
3130 addi r3,r3,32
3131 stvx v30,r7,$sp
3132 stvx v31,r3,$sp
3133 li r0,-1
3134 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3135 li $x10,0x10
3136 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3137 li $x20,0x20
3138 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3139 li $x30,0x30
3140 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3141 li $x40,0x40
3142 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3143 li $x50,0x50
3144 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3145 li $x60,0x60
3146 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3147 li $x70,0x70
3148 mtspr 256,r0
3149
3150 subi $rounds,$rounds,3 # -4 in total
3151
3152 lvx $rndkey0,$x00,$key1 # load key schedule
3153 lvx v30,$x10,$key1
3154 addi $key1,$key1,0x20
3155 lvx v31,$x00,$key1
3156 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3157 addi $key_,$sp,$FRAME+15
3158 mtctr $rounds
3159
3160Load_xts_dec_key:
3161 ?vperm v24,v30,v31,$keyperm
3162 lvx v30,$x10,$key1
3163 addi $key1,$key1,0x20
3164 stvx v24,$x00,$key_ # off-load round[1]
3165 ?vperm v25,v31,v30,$keyperm
3166 lvx v31,$x00,$key1
3167 stvx v25,$x10,$key_ # off-load round[2]
3168 addi $key_,$key_,0x20
3169 bdnz Load_xts_dec_key
3170
3171 lvx v26,$x10,$key1
3172 ?vperm v24,v30,v31,$keyperm
3173 lvx v27,$x20,$key1
3174 stvx v24,$x00,$key_ # off-load round[3]
3175 ?vperm v25,v31,v26,$keyperm
3176 lvx v28,$x30,$key1
3177 stvx v25,$x10,$key_ # off-load round[4]
3178 addi $key_,$sp,$FRAME+15 # rewind $key_
3179 ?vperm v26,v26,v27,$keyperm
3180 lvx v29,$x40,$key1
3181 ?vperm v27,v27,v28,$keyperm
3182 lvx v30,$x50,$key1
3183 ?vperm v28,v28,v29,$keyperm
3184 lvx v31,$x60,$key1
3185 ?vperm v29,v29,v30,$keyperm
3186 lvx $twk5,$x70,$key1 # borrow $twk5
3187 ?vperm v30,v30,v31,$keyperm
3188 lvx v24,$x00,$key_ # pre-load round[1]
3189 ?vperm v31,v31,$twk5,$keyperm
3190 lvx v25,$x10,$key_ # pre-load round[2]
3191
3192 vperm $in0,$inout,$inptail,$inpperm
3193 subi $inp,$inp,31 # undo "caller"
3194 vxor $twk0,$tweak,$rndkey0
3195 vsrab $tmp,$tweak,$seven # next tweak value
3196 vaddubm $tweak,$tweak,$tweak
3197 vsldoi $tmp,$tmp,$tmp,15
3198 vand $tmp,$tmp,$eighty7
3199 vxor $out0,$in0,$twk0
3200 vxor $tweak,$tweak,$tmp
3201
3202 lvx_u $in1,$x10,$inp
3203 vxor $twk1,$tweak,$rndkey0
3204 vsrab $tmp,$tweak,$seven # next tweak value
3205 vaddubm $tweak,$tweak,$tweak
3206 vsldoi $tmp,$tmp,$tmp,15
3207 le?vperm $in1,$in1,$in1,$leperm
3208 vand $tmp,$tmp,$eighty7
3209 vxor $out1,$in1,$twk1
3210 vxor $tweak,$tweak,$tmp
3211
3212 lvx_u $in2,$x20,$inp
3213 andi. $taillen,$len,15
3214 vxor $twk2,$tweak,$rndkey0
3215 vsrab $tmp,$tweak,$seven # next tweak value
3216 vaddubm $tweak,$tweak,$tweak
3217 vsldoi $tmp,$tmp,$tmp,15
3218 le?vperm $in2,$in2,$in2,$leperm
3219 vand $tmp,$tmp,$eighty7
3220 vxor $out2,$in2,$twk2
3221 vxor $tweak,$tweak,$tmp
3222
3223 lvx_u $in3,$x30,$inp
3224 sub $len,$len,$taillen
3225 vxor $twk3,$tweak,$rndkey0
3226 vsrab $tmp,$tweak,$seven # next tweak value
3227 vaddubm $tweak,$tweak,$tweak
3228 vsldoi $tmp,$tmp,$tmp,15
3229 le?vperm $in3,$in3,$in3,$leperm
3230 vand $tmp,$tmp,$eighty7
3231 vxor $out3,$in3,$twk3
3232 vxor $tweak,$tweak,$tmp
3233
3234 lvx_u $in4,$x40,$inp
3235 subi $len,$len,0x60
3236 vxor $twk4,$tweak,$rndkey0
3237 vsrab $tmp,$tweak,$seven # next tweak value
3238 vaddubm $tweak,$tweak,$tweak
3239 vsldoi $tmp,$tmp,$tmp,15
3240 le?vperm $in4,$in4,$in4,$leperm
3241 vand $tmp,$tmp,$eighty7
3242 vxor $out4,$in4,$twk4
3243 vxor $tweak,$tweak,$tmp
3244
3245 lvx_u $in5,$x50,$inp
3246 addi $inp,$inp,0x60
3247 vxor $twk5,$tweak,$rndkey0
3248 vsrab $tmp,$tweak,$seven # next tweak value
3249 vaddubm $tweak,$tweak,$tweak
3250 vsldoi $tmp,$tmp,$tmp,15
3251 le?vperm $in5,$in5,$in5,$leperm
3252 vand $tmp,$tmp,$eighty7
3253 vxor $out5,$in5,$twk5
3254 vxor $tweak,$tweak,$tmp
3255
3256 vxor v31,v31,$rndkey0
3257 mtctr $rounds
3258 b Loop_xts_dec6x
3259
3260.align 5
3261Loop_xts_dec6x:
3262 vncipher $out0,$out0,v24
3263 vncipher $out1,$out1,v24
3264 vncipher $out2,$out2,v24
3265 vncipher $out3,$out3,v24
3266 vncipher $out4,$out4,v24
3267 vncipher $out5,$out5,v24
3268 lvx v24,$x20,$key_ # round[3]
3269 addi $key_,$key_,0x20
3270
3271 vncipher $out0,$out0,v25
3272 vncipher $out1,$out1,v25
3273 vncipher $out2,$out2,v25
3274 vncipher $out3,$out3,v25
3275 vncipher $out4,$out4,v25
3276 vncipher $out5,$out5,v25
3277 lvx v25,$x10,$key_ # round[4]
3278 bdnz Loop_xts_dec6x
3279
3280 subic $len,$len,96 # $len-=96
3281 vxor $in0,$twk0,v31 # xor with last round key
3282 vncipher $out0,$out0,v24
3283 vncipher $out1,$out1,v24
3284 vsrab $tmp,$tweak,$seven # next tweak value
3285 vxor $twk0,$tweak,$rndkey0
3286 vaddubm $tweak,$tweak,$tweak
3287 vncipher $out2,$out2,v24
3288 vncipher $out3,$out3,v24
3289 vsldoi $tmp,$tmp,$tmp,15
3290 vncipher $out4,$out4,v24
3291 vncipher $out5,$out5,v24
3292
3293 subfe. r0,r0,r0 # borrow?-1:0
3294 vand $tmp,$tmp,$eighty7
3295 vncipher $out0,$out0,v25
3296 vncipher $out1,$out1,v25
3297 vxor $tweak,$tweak,$tmp
3298 vncipher $out2,$out2,v25
3299 vncipher $out3,$out3,v25
3300 vxor $in1,$twk1,v31
3301 vsrab $tmp,$tweak,$seven # next tweak value
3302 vxor $twk1,$tweak,$rndkey0
3303 vncipher $out4,$out4,v25
3304 vncipher $out5,$out5,v25
3305
3306 and r0,r0,$len
3307 vaddubm $tweak,$tweak,$tweak
3308 vsldoi $tmp,$tmp,$tmp,15
3309 vncipher $out0,$out0,v26
3310 vncipher $out1,$out1,v26
3311 vand $tmp,$tmp,$eighty7
3312 vncipher $out2,$out2,v26
3313 vncipher $out3,$out3,v26
3314 vxor $tweak,$tweak,$tmp
3315 vncipher $out4,$out4,v26
3316 vncipher $out5,$out5,v26
3317
3318 add $inp,$inp,r0 # $inp is adjusted in such
3319 # way that at exit from the
3320 # loop inX-in5 are loaded
3321 # with last "words"
3322 vxor $in2,$twk2,v31
3323 vsrab $tmp,$tweak,$seven # next tweak value
3324 vxor $twk2,$tweak,$rndkey0
3325 vaddubm $tweak,$tweak,$tweak
3326 vncipher $out0,$out0,v27
3327 vncipher $out1,$out1,v27
3328 vsldoi $tmp,$tmp,$tmp,15
3329 vncipher $out2,$out2,v27
3330 vncipher $out3,$out3,v27
3331 vand $tmp,$tmp,$eighty7
3332 vncipher $out4,$out4,v27
3333 vncipher $out5,$out5,v27
3334
3335 addi $key_,$sp,$FRAME+15 # rewind $key_
3336 vxor $tweak,$tweak,$tmp
3337 vncipher $out0,$out0,v28
3338 vncipher $out1,$out1,v28
3339 vxor $in3,$twk3,v31
3340 vsrab $tmp,$tweak,$seven # next tweak value
3341 vxor $twk3,$tweak,$rndkey0
3342 vncipher $out2,$out2,v28
3343 vncipher $out3,$out3,v28
3344 vaddubm $tweak,$tweak,$tweak
3345 vsldoi $tmp,$tmp,$tmp,15
3346 vncipher $out4,$out4,v28
3347 vncipher $out5,$out5,v28
3348 lvx v24,$x00,$key_ # re-pre-load round[1]
3349 vand $tmp,$tmp,$eighty7
3350
3351 vncipher $out0,$out0,v29
3352 vncipher $out1,$out1,v29
3353 vxor $tweak,$tweak,$tmp
3354 vncipher $out2,$out2,v29
3355 vncipher $out3,$out3,v29
3356 vxor $in4,$twk4,v31
3357 vsrab $tmp,$tweak,$seven # next tweak value
3358 vxor $twk4,$tweak,$rndkey0
3359 vncipher $out4,$out4,v29
3360 vncipher $out5,$out5,v29
3361 lvx v25,$x10,$key_ # re-pre-load round[2]
3362 vaddubm $tweak,$tweak,$tweak
3363 vsldoi $tmp,$tmp,$tmp,15
3364
3365 vncipher $out0,$out0,v30
3366 vncipher $out1,$out1,v30
3367 vand $tmp,$tmp,$eighty7
3368 vncipher $out2,$out2,v30
3369 vncipher $out3,$out3,v30
3370 vxor $tweak,$tweak,$tmp
3371 vncipher $out4,$out4,v30
3372 vncipher $out5,$out5,v30
3373 vxor $in5,$twk5,v31
3374 vsrab $tmp,$tweak,$seven # next tweak value
3375 vxor $twk5,$tweak,$rndkey0
3376
3377 vncipherlast $out0,$out0,$in0
3378 lvx_u $in0,$x00,$inp # load next input block
3379 vaddubm $tweak,$tweak,$tweak
3380 vsldoi $tmp,$tmp,$tmp,15
3381 vncipherlast $out1,$out1,$in1
3382 lvx_u $in1,$x10,$inp
3383 vncipherlast $out2,$out2,$in2
3384 le?vperm $in0,$in0,$in0,$leperm
3385 lvx_u $in2,$x20,$inp
3386 vand $tmp,$tmp,$eighty7
3387 vncipherlast $out3,$out3,$in3
3388 le?vperm $in1,$in1,$in1,$leperm
3389 lvx_u $in3,$x30,$inp
3390 vncipherlast $out4,$out4,$in4
3391 le?vperm $in2,$in2,$in2,$leperm
3392 lvx_u $in4,$x40,$inp
3393 vxor $tweak,$tweak,$tmp
3394 vncipherlast $out5,$out5,$in5
3395 le?vperm $in3,$in3,$in3,$leperm
3396 lvx_u $in5,$x50,$inp
3397 addi $inp,$inp,0x60
3398 le?vperm $in4,$in4,$in4,$leperm
3399 le?vperm $in5,$in5,$in5,$leperm
3400
3401 le?vperm $out0,$out0,$out0,$leperm
3402 le?vperm $out1,$out1,$out1,$leperm
3403 stvx_u $out0,$x00,$out # store output
3404 vxor $out0,$in0,$twk0
3405 le?vperm $out2,$out2,$out2,$leperm
3406 stvx_u $out1,$x10,$out
3407 vxor $out1,$in1,$twk1
3408 le?vperm $out3,$out3,$out3,$leperm
3409 stvx_u $out2,$x20,$out
3410 vxor $out2,$in2,$twk2
3411 le?vperm $out4,$out4,$out4,$leperm
3412 stvx_u $out3,$x30,$out
3413 vxor $out3,$in3,$twk3
3414 le?vperm $out5,$out5,$out5,$leperm
3415 stvx_u $out4,$x40,$out
3416 vxor $out4,$in4,$twk4
3417 stvx_u $out5,$x50,$out
3418 vxor $out5,$in5,$twk5
3419 addi $out,$out,0x60
3420
3421 mtctr $rounds
3422 beq Loop_xts_dec6x # did $len-=96 borrow?
3423
3424 addic. $len,$len,0x60
3425 beq Lxts_dec6x_zero
3426 cmpwi $len,0x20
3427 blt Lxts_dec6x_one
3428 nop
3429 beq Lxts_dec6x_two
3430 cmpwi $len,0x40
3431 blt Lxts_dec6x_three
3432 nop
3433 beq Lxts_dec6x_four
3434
3435Lxts_dec6x_five:
3436 vxor $out0,$in1,$twk0
3437 vxor $out1,$in2,$twk1
3438 vxor $out2,$in3,$twk2
3439 vxor $out3,$in4,$twk3
3440 vxor $out4,$in5,$twk4
3441
3442 bl _aesp8_xts_dec5x
3443
3444 le?vperm $out0,$out0,$out0,$leperm
3445 vmr $twk0,$twk5 # unused tweak
3446 vxor $twk1,$tweak,$rndkey0
3447 le?vperm $out1,$out1,$out1,$leperm
3448 stvx_u $out0,$x00,$out # store output
3449 vxor $out0,$in0,$twk1
3450 le?vperm $out2,$out2,$out2,$leperm
3451 stvx_u $out1,$x10,$out
3452 le?vperm $out3,$out3,$out3,$leperm
3453 stvx_u $out2,$x20,$out
3454 le?vperm $out4,$out4,$out4,$leperm
3455 stvx_u $out3,$x30,$out
3456 stvx_u $out4,$x40,$out
3457 addi $out,$out,0x50
3458 bne Lxts_dec6x_steal
3459 b Lxts_dec6x_done
3460
3461.align 4
3462Lxts_dec6x_four:
3463 vxor $out0,$in2,$twk0
3464 vxor $out1,$in3,$twk1
3465 vxor $out2,$in4,$twk2
3466 vxor $out3,$in5,$twk3
3467 vxor $out4,$out4,$out4
3468
3469 bl _aesp8_xts_dec5x
3470
3471 le?vperm $out0,$out0,$out0,$leperm
3472 vmr $twk0,$twk4 # unused tweak
3473 vmr $twk1,$twk5
3474 le?vperm $out1,$out1,$out1,$leperm
3475 stvx_u $out0,$x00,$out # store output
3476 vxor $out0,$in0,$twk5
3477 le?vperm $out2,$out2,$out2,$leperm
3478 stvx_u $out1,$x10,$out
3479 le?vperm $out3,$out3,$out3,$leperm
3480 stvx_u $out2,$x20,$out
3481 stvx_u $out3,$x30,$out
3482 addi $out,$out,0x40
3483 bne Lxts_dec6x_steal
3484 b Lxts_dec6x_done
3485
3486.align 4
3487Lxts_dec6x_three:
3488 vxor $out0,$in3,$twk0
3489 vxor $out1,$in4,$twk1
3490 vxor $out2,$in5,$twk2
3491 vxor $out3,$out3,$out3
3492 vxor $out4,$out4,$out4
3493
3494 bl _aesp8_xts_dec5x
3495
3496 le?vperm $out0,$out0,$out0,$leperm
3497 vmr $twk0,$twk3 # unused tweak
3498 vmr $twk1,$twk4
3499 le?vperm $out1,$out1,$out1,$leperm
3500 stvx_u $out0,$x00,$out # store output
3501 vxor $out0,$in0,$twk4
3502 le?vperm $out2,$out2,$out2,$leperm
3503 stvx_u $out1,$x10,$out
3504 stvx_u $out2,$x20,$out
3505 addi $out,$out,0x30
3506 bne Lxts_dec6x_steal
3507 b Lxts_dec6x_done
3508
3509.align 4
3510Lxts_dec6x_two:
3511 vxor $out0,$in4,$twk0
3512 vxor $out1,$in5,$twk1
3513 vxor $out2,$out2,$out2
3514 vxor $out3,$out3,$out3
3515 vxor $out4,$out4,$out4
3516
3517 bl _aesp8_xts_dec5x
3518
3519 le?vperm $out0,$out0,$out0,$leperm
3520 vmr $twk0,$twk2 # unused tweak
3521 vmr $twk1,$twk3
3522 le?vperm $out1,$out1,$out1,$leperm
3523 stvx_u $out0,$x00,$out # store output
3524 vxor $out0,$in0,$twk3
3525 stvx_u $out1,$x10,$out
3526 addi $out,$out,0x20
3527 bne Lxts_dec6x_steal
3528 b Lxts_dec6x_done
3529
3530.align 4
3531Lxts_dec6x_one:
3532 vxor $out0,$in5,$twk0
3533 nop
3534Loop_xts_dec1x:
3535 vncipher $out0,$out0,v24
3536 lvx v24,$x20,$key_ # round[3]
3537 addi $key_,$key_,0x20
3538
3539 vncipher $out0,$out0,v25
3540 lvx v25,$x10,$key_ # round[4]
3541 bdnz Loop_xts_dec1x
3542
3543 subi r0,$taillen,1
3544 vncipher $out0,$out0,v24
3545
3546 andi. r0,r0,16
3547 cmpwi $taillen,0
3548 vncipher $out0,$out0,v25
3549
3550 sub $inp,$inp,r0
3551 vncipher $out0,$out0,v26
3552
3553 lvx_u $in0,0,$inp
3554 vncipher $out0,$out0,v27
3555
3556 addi $key_,$sp,$FRAME+15 # rewind $key_
3557 vncipher $out0,$out0,v28
3558 lvx v24,$x00,$key_ # re-pre-load round[1]
3559
3560 vncipher $out0,$out0,v29
3561 lvx v25,$x10,$key_ # re-pre-load round[2]
3562 vxor $twk0,$twk0,v31
3563
3564 le?vperm $in0,$in0,$in0,$leperm
3565 vncipher $out0,$out0,v30
3566
3567 mtctr $rounds
3568 vncipherlast $out0,$out0,$twk0
3569
3570 vmr $twk0,$twk1 # unused tweak
3571 vmr $twk1,$twk2
3572 le?vperm $out0,$out0,$out0,$leperm
3573 stvx_u $out0,$x00,$out # store output
3574 addi $out,$out,0x10
3575 vxor $out0,$in0,$twk2
3576 bne Lxts_dec6x_steal
3577 b Lxts_dec6x_done
3578
3579.align 4
3580Lxts_dec6x_zero:
3581 cmpwi $taillen,0
3582 beq Lxts_dec6x_done
3583
3584 lvx_u $in0,0,$inp
3585 le?vperm $in0,$in0,$in0,$leperm
3586 vxor $out0,$in0,$twk1
3587Lxts_dec6x_steal:
3588 vncipher $out0,$out0,v24
3589 lvx v24,$x20,$key_ # round[3]
3590 addi $key_,$key_,0x20
3591
3592 vncipher $out0,$out0,v25
3593 lvx v25,$x10,$key_ # round[4]
3594 bdnz Lxts_dec6x_steal
3595
3596 add $inp,$inp,$taillen
3597 vncipher $out0,$out0,v24
3598
3599 cmpwi $taillen,0
3600 vncipher $out0,$out0,v25
3601
3602 lvx_u $in0,0,$inp
3603 vncipher $out0,$out0,v26
3604
3605 lvsr $inpperm,0,$taillen # $in5 is no more
3606 vncipher $out0,$out0,v27
3607
3608 addi $key_,$sp,$FRAME+15 # rewind $key_
3609 vncipher $out0,$out0,v28
3610 lvx v24,$x00,$key_ # re-pre-load round[1]
3611
3612 vncipher $out0,$out0,v29
3613 lvx v25,$x10,$key_ # re-pre-load round[2]
3614 vxor $twk1,$twk1,v31
3615
3616 le?vperm $in0,$in0,$in0,$leperm
3617 vncipher $out0,$out0,v30
3618
3619 vperm $in0,$in0,$in0,$inpperm
3620 vncipherlast $tmp,$out0,$twk1
3621
3622 le?vperm $out0,$tmp,$tmp,$leperm
3623 le?stvx_u $out0,0,$out
3624 be?stvx_u $tmp,0,$out
3625
3626 vxor $out0,$out0,$out0
3627 vspltisb $out1,-1
3628 vperm $out0,$out0,$out1,$inpperm
3629 vsel $out0,$in0,$tmp,$out0
3630 vxor $out0,$out0,$twk0
3631
3632 subi r30,$out,1
3633 mtctr $taillen
3634Loop_xts_dec6x_steal:
3635 lbzu r0,1(r30)
3636 stb r0,16(r30)
3637 bdnz Loop_xts_dec6x_steal
3638
3639 li $taillen,0
3640 mtctr $rounds
3641 b Loop_xts_dec1x # one more time...
3642
3643.align 4
3644Lxts_dec6x_done:
3645 ${UCMP}i $ivp,0
3646 beq Lxts_dec6x_ret
3647
3648 vxor $tweak,$twk0,$rndkey0
3649 le?vperm $tweak,$tweak,$tweak,$leperm
3650 stvx_u $tweak,0,$ivp
3651
3652Lxts_dec6x_ret:
3653 mtlr r11
3654 li r10,`$FRAME+15`
3655 li r11,`$FRAME+31`
3656 stvx $seven,r10,$sp # wipe copies of round keys
3657 addi r10,r10,32
3658 stvx $seven,r11,$sp
3659 addi r11,r11,32
3660 stvx $seven,r10,$sp
3661 addi r10,r10,32
3662 stvx $seven,r11,$sp
3663 addi r11,r11,32
3664 stvx $seven,r10,$sp
3665 addi r10,r10,32
3666 stvx $seven,r11,$sp
3667 addi r11,r11,32
3668 stvx $seven,r10,$sp
3669 addi r10,r10,32
3670 stvx $seven,r11,$sp
3671 addi r11,r11,32
3672
3673 mtspr 256,$vrsave
3674 lvx v20,r10,$sp # ABI says so
3675 addi r10,r10,32
3676 lvx v21,r11,$sp
3677 addi r11,r11,32
3678 lvx v22,r10,$sp
3679 addi r10,r10,32
3680 lvx v23,r11,$sp
3681 addi r11,r11,32
3682 lvx v24,r10,$sp
3683 addi r10,r10,32
3684 lvx v25,r11,$sp
3685 addi r11,r11,32
3686 lvx v26,r10,$sp
3687 addi r10,r10,32
3688 lvx v27,r11,$sp
3689 addi r11,r11,32
3690 lvx v28,r10,$sp
3691 addi r10,r10,32
3692 lvx v29,r11,$sp
3693 addi r11,r11,32
3694 lvx v30,r10,$sp
3695 lvx v31,r11,$sp
3696 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3697 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3698 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3699 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3700 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3701 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3702 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3703 blr
3704 .long 0
3705 .byte 0,12,0x04,1,0x80,6,6,0
3706 .long 0
3707
3708.align 5
3709_aesp8_xts_dec5x:
3710 vncipher $out0,$out0,v24
3711 vncipher $out1,$out1,v24
3712 vncipher $out2,$out2,v24
3713 vncipher $out3,$out3,v24
3714 vncipher $out4,$out4,v24
3715 lvx v24,$x20,$key_ # round[3]
3716 addi $key_,$key_,0x20
3717
3718 vncipher $out0,$out0,v25
3719 vncipher $out1,$out1,v25
3720 vncipher $out2,$out2,v25
3721 vncipher $out3,$out3,v25
3722 vncipher $out4,$out4,v25
3723 lvx v25,$x10,$key_ # round[4]
3724 bdnz _aesp8_xts_dec5x
3725
3726 subi r0,$taillen,1
3727 vncipher $out0,$out0,v24
3728 vncipher $out1,$out1,v24
3729 vncipher $out2,$out2,v24
3730 vncipher $out3,$out3,v24
3731 vncipher $out4,$out4,v24
3732
3733 andi. r0,r0,16
3734 cmpwi $taillen,0
3735 vncipher $out0,$out0,v25
3736 vncipher $out1,$out1,v25
3737 vncipher $out2,$out2,v25
3738 vncipher $out3,$out3,v25
3739 vncipher $out4,$out4,v25
3740 vxor $twk0,$twk0,v31
3741
3742 sub $inp,$inp,r0
3743 vncipher $out0,$out0,v26
3744 vncipher $out1,$out1,v26
3745 vncipher $out2,$out2,v26
3746 vncipher $out3,$out3,v26
3747 vncipher $out4,$out4,v26
3748 vxor $in1,$twk1,v31
3749
3750 vncipher $out0,$out0,v27
3751 lvx_u $in0,0,$inp
3752 vncipher $out1,$out1,v27
3753 vncipher $out2,$out2,v27
3754 vncipher $out3,$out3,v27
3755 vncipher $out4,$out4,v27
3756 vxor $in2,$twk2,v31
3757
3758 addi $key_,$sp,$FRAME+15 # rewind $key_
3759 vncipher $out0,$out0,v28
3760 vncipher $out1,$out1,v28
3761 vncipher $out2,$out2,v28
3762 vncipher $out3,$out3,v28
3763 vncipher $out4,$out4,v28
3764 lvx v24,$x00,$key_ # re-pre-load round[1]
3765 vxor $in3,$twk3,v31
3766
3767 vncipher $out0,$out0,v29
3768 le?vperm $in0,$in0,$in0,$leperm
3769 vncipher $out1,$out1,v29
3770 vncipher $out2,$out2,v29
3771 vncipher $out3,$out3,v29
3772 vncipher $out4,$out4,v29
3773 lvx v25,$x10,$key_ # re-pre-load round[2]
3774 vxor $in4,$twk4,v31
3775
3776 vncipher $out0,$out0,v30
3777 vncipher $out1,$out1,v30
3778 vncipher $out2,$out2,v30
3779 vncipher $out3,$out3,v30
3780 vncipher $out4,$out4,v30
3781
3782 vncipherlast $out0,$out0,$twk0
3783 vncipherlast $out1,$out1,$in1
3784 vncipherlast $out2,$out2,$in2
3785 vncipherlast $out3,$out3,$in3
3786 vncipherlast $out4,$out4,$in4
3787 mtctr $rounds
3788 blr
3789 .long 0
3790 .byte 0,12,0x14,0,0,0,0,0
3791___
3792}} }}}
3793
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02003794my $consts=1;
3795foreach(split("\n",$code)) {
3796 s/\`([^\`]*)\`/eval($1)/geo;
3797
3798 # constants table endian-specific conversion
3799 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3800 my $conv=$3;
3801 my @bytes=();
3802
3803 # convert to endian-agnostic format
3804 if ($1 eq "long") {
3805 foreach (split(/,\s*/,$2)) {
3806 my $l = /^0/?oct:int;
3807 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3808 }
3809 } else {
3810 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3811 }
3812
3813 # little-endian conversion
3814 if ($flavour =~ /le$/o) {
3815 SWITCH: for($conv) {
3816 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
Paulo Flabiano Smorigo11c6e162016-07-18 12:26:25 -03003817 /\?rev/ && do { @bytes=reverse(@bytes); last; };
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02003818 }
3819 }
3820
3821 #emit
3822 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3823 next;
3824 }
3825 $consts=0 if (m/Lconsts:/o); # end of table
3826
3827 # instructions prefixed with '?' are endian-specific and need
3828 # to be adjusted accordingly...
3829 if ($flavour =~ /le$/o) { # little-endian
3830 s/le\?//o or
3831 s/be\?/#be#/o or
3832 s/\?lvsr/lvsl/o or
3833 s/\?lvsl/lvsr/o or
3834 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3835 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3836 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3837 } else { # big-endian
3838 s/le\?/#le#/o or
3839 s/be\?//o or
3840 s/\?([a-z]+)/$1/o;
3841 }
3842
3843 print $_,"\n";
3844}
3845
3846close STDOUT;