Michael Ellerman | 323a6bf | 2012-09-13 23:00:49 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * SHA-1 implementation for PowerPC. |
| 3 | * |
| 4 | * Copyright (C) 2005 Paul Mackerras <paulus@samba.org> |
| 5 | */ |
| 6 | |
| 7 | #include <asm/ppc_asm.h> |
| 8 | #include <asm/asm-offsets.h> |
| 9 | |
| 10 | /* |
| 11 | * We roll the registers for T, A, B, C, D, E around on each |
| 12 | * iteration; T on iteration t is A on iteration t+1, and so on. |
| 13 | * We use registers 7 - 12 for this. |
| 14 | */ |
| 15 | #define RT(t) ((((t)+5)%6)+7) |
| 16 | #define RA(t) ((((t)+4)%6)+7) |
| 17 | #define RB(t) ((((t)+3)%6)+7) |
| 18 | #define RC(t) ((((t)+2)%6)+7) |
| 19 | #define RD(t) ((((t)+1)%6)+7) |
| 20 | #define RE(t) ((((t)+0)%6)+7) |
| 21 | |
| 22 | /* We use registers 16 - 31 for the W values */ |
| 23 | #define W(t) (((t)%16)+16) |
| 24 | |
| 25 | #define LOADW(t) \ |
| 26 | lwz W(t),(t)*4(r4) |
| 27 | |
| 28 | #define STEPD0_LOAD(t) \ |
| 29 | andc r0,RD(t),RB(t); \ |
| 30 | and r6,RB(t),RC(t); \ |
| 31 | rotlwi RT(t),RA(t),5; \ |
| 32 | or r6,r6,r0; \ |
| 33 | add r0,RE(t),r15; \ |
| 34 | add RT(t),RT(t),r6; \ |
| 35 | add r14,r0,W(t); \ |
| 36 | lwz W((t)+4),((t)+4)*4(r4); \ |
| 37 | rotlwi RB(t),RB(t),30; \ |
| 38 | add RT(t),RT(t),r14 |
| 39 | |
| 40 | #define STEPD0_UPDATE(t) \ |
| 41 | and r6,RB(t),RC(t); \ |
| 42 | andc r0,RD(t),RB(t); \ |
| 43 | rotlwi RT(t),RA(t),5; \ |
| 44 | rotlwi RB(t),RB(t),30; \ |
| 45 | or r6,r6,r0; \ |
| 46 | add r0,RE(t),r15; \ |
| 47 | xor r5,W((t)+4-3),W((t)+4-8); \ |
| 48 | add RT(t),RT(t),r6; \ |
| 49 | xor W((t)+4),W((t)+4-16),W((t)+4-14); \ |
| 50 | add r0,r0,W(t); \ |
| 51 | xor W((t)+4),W((t)+4),r5; \ |
| 52 | add RT(t),RT(t),r0; \ |
| 53 | rotlwi W((t)+4),W((t)+4),1 |
| 54 | |
| 55 | #define STEPD1(t) \ |
| 56 | xor r6,RB(t),RC(t); \ |
| 57 | rotlwi RT(t),RA(t),5; \ |
| 58 | rotlwi RB(t),RB(t),30; \ |
| 59 | xor r6,r6,RD(t); \ |
| 60 | add r0,RE(t),r15; \ |
| 61 | add RT(t),RT(t),r6; \ |
| 62 | add r0,r0,W(t); \ |
| 63 | add RT(t),RT(t),r0 |
| 64 | |
| 65 | #define STEPD1_UPDATE(t) \ |
| 66 | xor r6,RB(t),RC(t); \ |
| 67 | rotlwi RT(t),RA(t),5; \ |
| 68 | rotlwi RB(t),RB(t),30; \ |
| 69 | xor r6,r6,RD(t); \ |
| 70 | add r0,RE(t),r15; \ |
| 71 | xor r5,W((t)+4-3),W((t)+4-8); \ |
| 72 | add RT(t),RT(t),r6; \ |
| 73 | xor W((t)+4),W((t)+4-16),W((t)+4-14); \ |
| 74 | add r0,r0,W(t); \ |
| 75 | xor W((t)+4),W((t)+4),r5; \ |
| 76 | add RT(t),RT(t),r0; \ |
| 77 | rotlwi W((t)+4),W((t)+4),1 |
| 78 | |
| 79 | #define STEPD2_UPDATE(t) \ |
| 80 | and r6,RB(t),RC(t); \ |
| 81 | and r0,RB(t),RD(t); \ |
| 82 | rotlwi RT(t),RA(t),5; \ |
| 83 | or r6,r6,r0; \ |
| 84 | rotlwi RB(t),RB(t),30; \ |
| 85 | and r0,RC(t),RD(t); \ |
| 86 | xor r5,W((t)+4-3),W((t)+4-8); \ |
| 87 | or r6,r6,r0; \ |
| 88 | xor W((t)+4),W((t)+4-16),W((t)+4-14); \ |
| 89 | add r0,RE(t),r15; \ |
| 90 | add RT(t),RT(t),r6; \ |
| 91 | add r0,r0,W(t); \ |
| 92 | xor W((t)+4),W((t)+4),r5; \ |
| 93 | add RT(t),RT(t),r0; \ |
| 94 | rotlwi W((t)+4),W((t)+4),1 |
| 95 | |
| 96 | #define STEP0LD4(t) \ |
| 97 | STEPD0_LOAD(t); \ |
| 98 | STEPD0_LOAD((t)+1); \ |
| 99 | STEPD0_LOAD((t)+2); \ |
| 100 | STEPD0_LOAD((t)+3) |
| 101 | |
| 102 | #define STEPUP4(t, fn) \ |
| 103 | STEP##fn##_UPDATE(t); \ |
| 104 | STEP##fn##_UPDATE((t)+1); \ |
| 105 | STEP##fn##_UPDATE((t)+2); \ |
| 106 | STEP##fn##_UPDATE((t)+3) |
| 107 | |
| 108 | #define STEPUP20(t, fn) \ |
| 109 | STEPUP4(t, fn); \ |
| 110 | STEPUP4((t)+4, fn); \ |
| 111 | STEPUP4((t)+8, fn); \ |
| 112 | STEPUP4((t)+12, fn); \ |
| 113 | STEPUP4((t)+16, fn) |
| 114 | |
| 115 | _GLOBAL(powerpc_sha_transform) |
| 116 | PPC_STLU r1,-STACKFRAMESIZE(r1) |
| 117 | SAVE_8GPRS(14, r1) |
| 118 | SAVE_10GPRS(22, r1) |
| 119 | |
| 120 | /* Load up A - E */ |
| 121 | lwz RA(0),0(r3) /* A */ |
| 122 | lwz RB(0),4(r3) /* B */ |
| 123 | lwz RC(0),8(r3) /* C */ |
| 124 | lwz RD(0),12(r3) /* D */ |
| 125 | lwz RE(0),16(r3) /* E */ |
| 126 | |
| 127 | LOADW(0) |
| 128 | LOADW(1) |
| 129 | LOADW(2) |
| 130 | LOADW(3) |
| 131 | |
| 132 | lis r15,0x5a82 /* K0-19 */ |
| 133 | ori r15,r15,0x7999 |
| 134 | STEP0LD4(0) |
| 135 | STEP0LD4(4) |
| 136 | STEP0LD4(8) |
| 137 | STEPUP4(12, D0) |
| 138 | STEPUP4(16, D0) |
| 139 | |
| 140 | lis r15,0x6ed9 /* K20-39 */ |
| 141 | ori r15,r15,0xeba1 |
| 142 | STEPUP20(20, D1) |
| 143 | |
| 144 | lis r15,0x8f1b /* K40-59 */ |
| 145 | ori r15,r15,0xbcdc |
| 146 | STEPUP20(40, D2) |
| 147 | |
| 148 | lis r15,0xca62 /* K60-79 */ |
| 149 | ori r15,r15,0xc1d6 |
| 150 | STEPUP4(60, D1) |
| 151 | STEPUP4(64, D1) |
| 152 | STEPUP4(68, D1) |
| 153 | STEPUP4(72, D1) |
| 154 | lwz r20,16(r3) |
| 155 | STEPD1(76) |
| 156 | lwz r19,12(r3) |
| 157 | STEPD1(77) |
| 158 | lwz r18,8(r3) |
| 159 | STEPD1(78) |
| 160 | lwz r17,4(r3) |
| 161 | STEPD1(79) |
| 162 | |
| 163 | lwz r16,0(r3) |
| 164 | add r20,RE(80),r20 |
| 165 | add RD(0),RD(80),r19 |
| 166 | add RC(0),RC(80),r18 |
| 167 | add RB(0),RB(80),r17 |
| 168 | add RA(0),RA(80),r16 |
| 169 | mr RE(0),r20 |
| 170 | stw RA(0),0(r3) |
| 171 | stw RB(0),4(r3) |
| 172 | stw RC(0),8(r3) |
| 173 | stw RD(0),12(r3) |
| 174 | stw RE(0),16(r3) |
| 175 | |
| 176 | REST_8GPRS(14, r1) |
| 177 | REST_10GPRS(22, r1) |
| 178 | addi r1,r1,STACKFRAMESIZE |
| 179 | blr |