sha.cpp

00001 // sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c
00002 
00003 // Steve Reid implemented SHA-1. Wei Dai implemented SHA-2.
00004 // Both are in the public domain.
00005 
00006 #include "pch.h"
00007 
00008 #ifndef CRYPTOPP_IMPORTS
00009 
00010 #include "sha.h"
00011 #include "misc.h"
00012 #include "cpu.h"
00013 
00014 NAMESPACE_BEGIN(CryptoPP)
00015 
00016 // start of Steve Reid's code
00017 
00018 #define blk0(i) (W[i] = data[i])
00019 #define blk1(i) (W[i&15] = rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1))
00020 
00021 void SHA1::InitState(HashWordType *state)
00022 {
00023         state[0] = 0x67452301L;
00024         state[1] = 0xEFCDAB89L;
00025         state[2] = 0x98BADCFEL;
00026         state[3] = 0x10325476L;
00027         state[4] = 0xC3D2E1F0L;
00028 }
00029 
00030 #define f1(x,y,z) (z^(x&(y^z)))
00031 #define f2(x,y,z) (x^y^z)
00032 #define f3(x,y,z) ((x&y)|(z&(x|y)))
00033 #define f4(x,y,z) (x^y^z)
00034 
00035 /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
00036 #define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30);
00037 #define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30);
00038 #define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlFixed(v,5);w=rotlFixed(w,30);
00039 #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlFixed(v,5);w=rotlFixed(w,30);
00040 #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlFixed(v,5);w=rotlFixed(w,30);
00041 
00042 void SHA1::Transform(word32 *state, const word32 *data)
00043 {
00044         word32 W[16];
00045     /* Copy context->state[] to working vars */
00046     word32 a = state[0];
00047     word32 b = state[1];
00048     word32 c = state[2];
00049     word32 d = state[3];
00050     word32 e = state[4];
00051     /* 4 rounds of 20 operations each. Loop unrolled. */
00052     R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
00053     R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
00054     R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
00055     R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
00056     R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
00057     R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
00058     R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
00059     R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
00060     R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
00061     R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
00062     R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
00063     R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
00064     R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
00065     R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
00066     R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
00067     R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
00068     R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
00069     R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
00070     R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
00071     R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
00072     /* Add the working vars back into context.state[] */
00073     state[0] += a;
00074     state[1] += b;
00075     state[2] += c;
00076     state[3] += d;
00077     state[4] += e;
00078 }
00079 
00080 // end of Steve Reid's code
00081 
00082 // *************************************************************
00083 
00084 void SHA224::InitState(HashWordType *state)
00085 {
00086         static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4};
00087         memcpy(state, s, sizeof(s));
00088 }
00089 
00090 void SHA256::InitState(HashWordType *state)
00091 {
00092         static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
00093         memcpy(state, s, sizeof(s));
00094 }
00095 
00096 static const word32 SHA256_K[64] = {
00097         0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
00098         0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
00099         0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
00100         0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
00101         0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
00102         0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
00103         0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
00104         0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
00105         0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
00106         0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
00107         0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
00108         0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
00109         0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
00110         0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
00111         0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
00112         0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
00113 };
00114 
00115 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
00116 
00117 #define Ch(x,y,z) (z^(x&(y^z)))
00118 #define Maj(x,y,z) ((x&y)|(z&(x|y)))
00119 
00120 #define a(i) T[(0-i)&7]
00121 #define b(i) T[(1-i)&7]
00122 #define c(i) T[(2-i)&7]
00123 #define d(i) T[(3-i)&7]
00124 #define e(i) T[(4-i)&7]
00125 #define f(i) T[(5-i)&7]
00126 #define g(i) T[(6-i)&7]
00127 #define h(i) T[(7-i)&7]
00128 
00129 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
00130         d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
00131 
00132 // for SHA256
00133 #define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
00134 #define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
00135 #define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
00136 #define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
00137 
00138 void SHA256::Transform(word32 *state, const word32 *data)
00139 {
00140         word32 W[16];
00141         word32 T[8];
00142     /* Copy context->state[] to working vars */
00143         memcpy(T, state, sizeof(T));
00144     /* 64 operations, partially loop unrolled */
00145         for (unsigned int j=0; j<64; j+=16)
00146         {
00147                 R( 0); R( 1); R( 2); R( 3);
00148                 R( 4); R( 5); R( 6); R( 7);
00149                 R( 8); R( 9); R(10); R(11);
00150                 R(12); R(13); R(14); R(15);
00151         }
00152     /* Add the working vars back into context.state[] */
00153     state[0] += a(0);
00154     state[1] += b(0);
00155     state[2] += c(0);
00156     state[3] += d(0);
00157     state[4] += e(0);
00158     state[5] += f(0);
00159     state[6] += g(0);
00160     state[7] += h(0);
00161 }
00162 
00163 /* 
00164 // smaller but slower
00165 void SHA256_Transform(word32 *state, const word32 *data)
00166 {
00167         word32 T[20];
00168         word32 W[32];
00169         unsigned int i = 0, j = 0;
00170         word32 *t = T+8;
00171 
00172         memcpy(t, state, 8*4);
00173         word32 e = t[4], a = t[0];
00174 
00175         do 
00176         {
00177                 word32 w = data[j];
00178                 W[j] = w;
00179                 w += K[j];
00180                 w += t[7];
00181                 w += S1(e);
00182                 w += Ch(e, t[5], t[6]);
00183                 e = t[3] + w;
00184                 t[3] = t[3+8] = e;
00185                 w += S0(t[0]);
00186                 a = w + Maj(a, t[1], t[2]);
00187                 t[-1] = t[7] = a;
00188                 --t;
00189                 ++j;
00190                 if (j%8 == 0)
00191                         t += 8;
00192         } while (j<16);
00193 
00194         do
00195         {
00196                 i = j&0xf;
00197                 word32 w = s1(W[i+16-2]) + s0(W[i+16-15]) + W[i] + W[i+16-7];
00198                 W[i+16] = W[i] = w;
00199                 w += K[j];
00200                 w += t[7];
00201                 w += S1(e);
00202                 w += Ch(e, t[5], t[6]);
00203                 e = t[3] + w;
00204                 t[3] = t[3+8] = e;
00205                 w += S0(t[0]);
00206                 a = w + Maj(a, t[1], t[2]);
00207                 t[-1] = t[7] = a;
00208 
00209                 w = s1(W[(i+1)+16-2]) + s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7];
00210                 W[(i+1)+16] = W[(i+1)] = w;
00211                 w += K[j+1];
00212                 w += (t-1)[7];
00213                 w += S1(e);
00214                 w += Ch(e, (t-1)[5], (t-1)[6]);
00215                 e = (t-1)[3] + w;
00216                 (t-1)[3] = (t-1)[3+8] = e;
00217                 w += S0((t-1)[0]);
00218                 a = w + Maj(a, (t-1)[1], (t-1)[2]);
00219                 (t-1)[-1] = (t-1)[7] = a;
00220 
00221                 t-=2;
00222                 j+=2;
00223                 if (j%8 == 0)
00224                         t += 8;
00225         } while (j<64);
00226 
00227     state[0] += a;
00228     state[1] += t[1];
00229     state[2] += t[2];
00230     state[3] += t[3];
00231     state[4] += e;
00232     state[5] += t[5];
00233     state[6] += t[6];
00234     state[7] += t[7];
00235 }
00236 */
00237 
00238 #undef S0
00239 #undef S1
00240 #undef s0
00241 #undef s1
00242 #undef R
00243 
00244 // *************************************************************
00245 
00246 #ifdef WORD64_AVAILABLE
00247 
00248 void SHA384::InitState(HashWordType *state)
00249 {
00250         static const word64 s[8] = {
00251                 W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507),
00252                 W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939),
00253                 W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511),
00254                 W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)};
00255         memcpy(state, s, sizeof(s));
00256 }
00257 
00258 void SHA512::InitState(HashWordType *state)
00259 {
00260         static const word64 s[8] = {
00261                 W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b),
00262                 W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1),
00263                 W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f),
00264                 W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)};
00265         memcpy(state, s, sizeof(s));
00266 }
00267 
00268 CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = {
00269         W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
00270         W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
00271         W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
00272         W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
00273         W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
00274         W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
00275         W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
00276         W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
00277         W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
00278         W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
00279         W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
00280         W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
00281         W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
00282         W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
00283         W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
00284         W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
00285         W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
00286         W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
00287         W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
00288         W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
00289         W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
00290         W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
00291         W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
00292         W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
00293         W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
00294         W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
00295         W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
00296         W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
00297         W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
00298         W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
00299         W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
00300         W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
00301         W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
00302         W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
00303         W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
00304         W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
00305         W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
00306         W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
00307         W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
00308         W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
00309 };
00310 
00311 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
00312 // put assembly version in separate function, otherwise MSVC 2005 SP1 doesn't generate correct code for the non-assembly version
00313 CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state, const word64 *data)
00314 {
00315 #ifdef __GNUC__
00316         __asm__ __volatile__
00317         (
00318                 ".intel_syntax noprefix;"
00319         AS1(    push    ebx)
00320         AS2(    mov             ebx, eax)
00321 #else
00322         AS1(    push    ebx)
00323         AS1(    push    esi)
00324         AS1(    push    edi)
00325         AS2(    lea             ebx, SHA512_K)
00326 #endif
00327 
00328         AS2(    mov             eax, esp)
00329         AS2(    and             esp, 0xfffffff0)
00330         AS2(    sub             esp, 27*16)                             // 17*16 for expanded data, 20*8 for state
00331         AS1(    push    eax)
00332         AS2(    xor             eax, eax)
00333         AS2(    lea             edi, [esp+4+8*8])               // start at middle of state buffer. will decrement pointer each round to avoid copying
00334         AS2(    lea             esi, [esp+4+20*8+8])    // 16-byte alignment, then add 8
00335 
00336         AS2(    movq    mm4, [ecx+0*8])
00337         AS2(    movq    [edi+0*8], mm4)
00338         AS2(    movq    mm0, [ecx+1*8])
00339         AS2(    movq    [edi+1*8], mm0)
00340         AS2(    movq    mm0, [ecx+2*8])
00341         AS2(    movq    [edi+2*8], mm0)
00342         AS2(    movq    mm0, [ecx+3*8])
00343         AS2(    movq    [edi+3*8], mm0)
00344         AS2(    movq    mm5, [ecx+4*8])
00345         AS2(    movq    [edi+4*8], mm5)
00346         AS2(    movq    mm0, [ecx+5*8])
00347         AS2(    movq    [edi+5*8], mm0)
00348         AS2(    movq    mm0, [ecx+6*8])
00349         AS2(    movq    [edi+6*8], mm0)
00350         AS2(    movq    mm0, [ecx+7*8])
00351         AS2(    movq    [edi+7*8], mm0)
00352         ASJ(    jmp,    0, f)
00353 
00354 #define SSE2_S0_S1(r, a, b, c)  \
00355         AS2(    movq    mm6, r)\
00356         AS2(    psrlq   r, a)\
00357         AS2(    movq    mm7, r)\
00358         AS2(    psllq   mm6, 64-c)\
00359         AS2(    pxor    mm7, mm6)\
00360         AS2(    psrlq   r, b-a)\
00361         AS2(    pxor    mm7, r)\
00362         AS2(    psllq   mm6, c-b)\
00363         AS2(    pxor    mm7, mm6)\
00364         AS2(    psrlq   r, c-b)\
00365         AS2(    pxor    r, mm7)\
00366         AS2(    psllq   mm6, b-a)\
00367         AS2(    pxor    r, mm6)
00368 
00369 #define SSE2_s0(r, a, b, c)     \
00370         AS2(    movdqa  xmm6, r)\
00371         AS2(    psrlq   r, a)\
00372         AS2(    movdqa  xmm7, r)\
00373         AS2(    psllq   xmm6, 64-c)\
00374         AS2(    pxor    xmm7, xmm6)\
00375         AS2(    psrlq   r, b-a)\
00376         AS2(    pxor    xmm7, r)\
00377         AS2(    psrlq   r, c-b)\
00378         AS2(    pxor    r, xmm7)\
00379         AS2(    psllq   xmm6, c-a)\
00380         AS2(    pxor    r, xmm6)
00381 
00382 #define SSE2_s1(r, a, b, c)     \
00383         AS2(    movdqa  xmm6, r)\
00384         AS2(    psrlq   r, a)\
00385         AS2(    movdqa  xmm7, r)\
00386         AS2(    psllq   xmm6, 64-c)\
00387         AS2(    pxor    xmm7, xmm6)\
00388         AS2(    psrlq   r, b-a)\
00389         AS2(    pxor    xmm7, r)\
00390         AS2(    psllq   xmm6, c-b)\
00391         AS2(    pxor    xmm7, xmm6)\
00392         AS2(    psrlq   r, c-b)\
00393         AS2(    pxor    r, xmm7)
00394 
00395         ASL(SHA512_Round)
00396         // k + w is in mm0, a is in mm4, e is in mm5
00397         AS2(    paddq   mm0, [edi+7*8])         // h
00398         AS2(    movq    mm2, [edi+5*8])         // f
00399         AS2(    movq    mm3, [edi+6*8])         // g
00400         AS2(    pxor    mm2, mm3)
00401         AS2(    pand    mm2, mm5)
00402         SSE2_S0_S1(mm5,14,18,41)
00403         AS2(    pxor    mm2, mm3)
00404         AS2(    paddq   mm0, mm2)                       // h += Ch(e,f,g)
00405         AS2(    paddq   mm5, mm0)                       // h += S1(e)
00406         AS2(    movq    mm2, [edi+1*8])         // b
00407         AS2(    movq    mm1, mm2)
00408         AS2(    por             mm2, mm4)
00409         AS2(    pand    mm2, [edi+2*8])         // c
00410         AS2(    pand    mm1, mm4)
00411         AS2(    por             mm1, mm2)
00412         AS2(    paddq   mm1, mm5)                       // temp = h + Maj(a,b,c)
00413         AS2(    paddq   mm5, [edi+3*8])         // e = d + h
00414         AS2(    movq    [edi+3*8], mm5)
00415         AS2(    movq    [edi+11*8], mm5)
00416         SSE2_S0_S1(mm4,28,34,39)                        // S0(a)
00417         AS2(    paddq   mm4, mm1)                       // a = temp + S0(a)
00418         AS2(    movq    [edi-8], mm4)
00419         AS2(    movq    [edi+7*8], mm4)
00420         AS1(    ret)
00421 
00422         // first 16 rounds
00423         ASL(0)
00424         AS2(    movq    mm0, [edx+eax*8])
00425         AS2(    movq    [esi+eax*8], mm0)
00426         AS2(    movq    [esi+eax*8+16*8], mm0)
00427         AS2(    paddq   mm0, [ebx+eax*8])
00428         ASC(    call,   SHA512_Round)
00429         AS1(    inc             eax)
00430         AS2(    sub             edi, 8)
00431         AS2(    test    eax, 7)
00432         ASJ(    jnz,    0, b)
00433         AS2(    add             edi, 8*8)
00434         AS2(    cmp             eax, 16)
00435         ASJ(    jne,    0, b)
00436 
00437         // rest of the rounds
00438         AS2(    movdqu  xmm0, [esi+(16-2)*8])
00439         ASL(1)
00440         // data expansion, W[i-2] already in xmm0
00441         AS2(    movdqu  xmm3, [esi])
00442         AS2(    paddq   xmm3, [esi+(16-7)*8])
00443         AS2(    movdqa  xmm2, [esi+(16-15)*8])
00444         SSE2_s1(xmm0, 6, 19, 61)
00445         AS2(    paddq   xmm0, xmm3)
00446         SSE2_s0(xmm2, 1, 7, 8)
00447         AS2(    paddq   xmm0, xmm2)
00448         AS2(    movdq2q mm0, xmm0)
00449         AS2(    movhlps xmm1, xmm0)
00450         AS2(    paddq   mm0, [ebx+eax*8])
00451         AS2(    movlps  [esi], xmm0)
00452         AS2(    movlps  [esi+8], xmm1)
00453         AS2(    movlps  [esi+8*16], xmm0)
00454         AS2(    movlps  [esi+8*17], xmm1)
00455         // 2 rounds
00456         ASC(    call,   SHA512_Round)
00457         AS2(    sub             edi, 8)
00458         AS2(    movdq2q mm0, xmm1)
00459         AS2(    paddq   mm0, [ebx+eax*8+8])
00460         ASC(    call,   SHA512_Round)
00461         // update indices and loop
00462         AS2(    add             esi, 16)
00463         AS2(    add             eax, 2)
00464         AS2(    sub             edi, 8)
00465         AS2(    test    eax, 7)
00466         ASJ(    jnz,    1, b)
00467         // do housekeeping every 8 rounds
00468         AS2(    mov             esi, 0xf)
00469         AS2(    and             esi, eax)
00470         AS2(    lea             esi, [esp+4+20*8+8+esi*8])
00471         AS2(    add             edi, 8*8)
00472         AS2(    cmp             eax, 80)
00473         ASJ(    jne,    1, b)
00474 
00475 #define SSE2_CombineState(i)    \
00476         AS2(    movq    mm0, [edi+i*8])\
00477         AS2(    paddq   mm0, [ecx+i*8])\
00478         AS2(    movq    [ecx+i*8], mm0)
00479 
00480         SSE2_CombineState(0)
00481         SSE2_CombineState(1)
00482         SSE2_CombineState(2)
00483         SSE2_CombineState(3)
00484         SSE2_CombineState(4)
00485         SSE2_CombineState(5)
00486         SSE2_CombineState(6)
00487         SSE2_CombineState(7)
00488 
00489         AS1(    pop             esp)
00490         AS1(    emms)
00491 
00492 #if defined(__GNUC__)
00493         AS1(    pop             ebx)
00494         ".att_syntax prefix;"
00495                 :
00496                 : "a" (SHA512_K), "c" (state), "d" (data)
00497                 : "%esi", "%edi", "memory", "cc"
00498         );
00499 #else
00500         AS1(    pop             edi)
00501         AS1(    pop             esi)
00502         AS1(    pop             ebx)
00503         AS1(    ret)
00504 #endif
00505 }
00506 #endif  // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00507 
00508 void SHA512::Transform(word64 *state, const word64 *data)
00509 {
00510 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
00511         if (HasSSE2())
00512         {
00513                 SHA512_SSE2_Transform(state, data);
00514                 return;
00515         }
00516 #endif
00517 
00518 #define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39))
00519 #define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41))
00520 #define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7))
00521 #define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
00522 
00523 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+(j?blk2(i):blk0(i));\
00524         d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
00525 
00526         word64 W[16];
00527         word64 T[8];
00528     /* Copy context->state[] to working vars */
00529         memcpy(T, state, sizeof(T));
00530     /* 80 operations, partially loop unrolled */
00531         for (unsigned int j=0; j<80; j+=16)
00532         {
00533                 R( 0); R( 1); R( 2); R( 3);
00534                 R( 4); R( 5); R( 6); R( 7);
00535                 R( 8); R( 9); R(10); R(11);
00536                 R(12); R(13); R(14); R(15);
00537         }
00538     /* Add the working vars back into context.state[] */
00539     state[0] += a(0);
00540     state[1] += b(0);
00541     state[2] += c(0);
00542     state[3] += d(0);
00543     state[4] += e(0);
00544     state[5] += f(0);
00545     state[6] += g(0);
00546     state[7] += h(0);
00547 }
00548 
00549 #endif
00550 
00551 NAMESPACE_END
00552 
00553 #endif  // #ifndef CRYPTOPP_IMPORTS

Generated on Fri Jun 1 11:11:24 2007 for Crypto++ by  doxygen 1.5.2