tiger.cpp

00001 // tiger.cpp - written and placed in the public domain by Wei Dai
00002 
00003 #include "pch.h"
00004 #include "tiger.h"
00005 #include "misc.h"
00006 #include "cpu.h"
00007 
00008 #ifdef WORD64_AVAILABLE
00009 
00010 NAMESPACE_BEGIN(CryptoPP)
00011 
00012 void Tiger::InitState(HashWordType *state)
00013 {
00014         state[0] = W64LIT(0x0123456789ABCDEF);
00015         state[1] = W64LIT(0xFEDCBA9876543210);
00016         state[2] = W64LIT(0xF096A5B4C3B2E187);
00017 }
00018 
00019 void Tiger::TruncatedFinal(byte *hash, size_t size)
00020 {
00021         ThrowIfInvalidTruncatedSize(size);
00022 
00023         PadLastBlock(56, 0x01);
00024         CorrectEndianess(m_data, m_data, 56);
00025 
00026         m_data[7] = GetBitCountLo();
00027 
00028         Transform(m_state, m_data);
00029         CorrectEndianess(m_state, m_state, DigestSize());
00030         memcpy(hash, m_state, size);
00031 
00032         Restart();              // reinit for next use
00033 }
00034 
00035 void Tiger::Transform (word64 *digest, const word64 *X)
00036 {
00037 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
00038         if (HasSSE2())
00039         {
00040 #ifdef __GNUC__
00041                 __asm__ __volatile__
00042                 (
00043                 ".intel_syntax noprefix;"
00044                 AS1(    push    ebx)
00045 #else
00046         #if _MSC_VER < 1300
00047                 const word64 *t = table;
00048                 AS2(    mov             edx, t)
00049         #else
00050                 AS2(    lea             edx, [table])
00051         #endif
00052                 AS2(    mov             eax, digest)
00053                 AS2(    mov             esi, X)
00054 #endif
00055                 AS2(    movq    mm0, [eax])
00056                 AS2(    movq    mm1, [eax+1*8])
00057                 AS2(    movq    mm5, mm1)
00058                 AS2(    movq    mm2, [eax+2*8])
00059                 AS2(    movq    mm7, [edx+4*2048+0*8])
00060                 AS2(    movq    mm6, [edx+4*2048+1*8])
00061                 AS2(    mov             ecx, esp)
00062                 AS2(    and             esp, 0xfffffff0)
00063                 AS2(    sub             esp, 8*8)
00064                 AS1(    push    ecx)
00065 
00066 #define SSE2_round(a,b,c,x,mul) \
00067                 AS2(    pxor    c, [x])\
00068                 AS2(    movd    ecx, c)\
00069                 AS2(    movzx   edi, cl)\
00070                 AS2(    movq    mm3, [edx+0*2048+edi*8])\
00071                 AS2(    movzx   edi, ch)\
00072                 AS2(    movq    mm4, [edx+3*2048+edi*8])\
00073                 AS2(    shr             ecx, 16)\
00074                 AS2(    movzx   edi, cl)\
00075                 AS2(    pxor    mm3, [edx+1*2048+edi*8])\
00076                 AS2(    movzx   edi, ch)\
00077                 AS2(    pxor    mm4, [edx+2*2048+edi*8])\
00078                 AS3(    pextrw  ecx, c, 2)\
00079                 AS2(    movzx   edi, cl)\
00080                 AS2(    pxor    mm3, [edx+2*2048+edi*8])\
00081                 AS2(    movzx   edi, ch)\
00082                 AS2(    pxor    mm4, [edx+1*2048+edi*8])\
00083                 AS3(    pextrw  ecx, c, 3)\
00084                 AS2(    movzx   edi, cl)\
00085                 AS2(    pxor    mm3, [edx+3*2048+edi*8])\
00086                 AS2(    psubq   a, mm3)\
00087                 AS2(    movzx   edi, ch)\
00088                 AS2(    pxor    mm4, [edx+0*2048+edi*8])\
00089                 AS2(    paddq   b, mm4)\
00090                 SSE2_mul_##mul(b)
00091 
00092 #define SSE2_mul_5(b)   \
00093                 AS2(    movq    mm3, b)\
00094                 AS2(    psllq   b, 2)\
00095                 AS2(    paddq   b, mm3)
00096 
00097 #define SSE2_mul_7(b)   \
00098                 AS2(    movq    mm3, b)\
00099                 AS2(    psllq   b, 3)\
00100                 AS2(    psubq   b, mm3)
00101 
00102 #define SSE2_mul_9(b)   \
00103                 AS2(    movq    mm3, b)\
00104                 AS2(    psllq   b, 3)\
00105                 AS2(    paddq   b, mm3)
00106 
00107 #define label2_5 1
00108 #define label2_7 2
00109 #define label2_9 3
00110 
00111 #define SSE2_pass(A,B,C,mul,X)  \
00112                 AS2(    xor             ebx, ebx)\
00113                 ASL(mul)\
00114                 SSE2_round(A,B,C,X+0*8+ebx,mul)\
00115                 SSE2_round(B,C,A,X+1*8+ebx,mul)\
00116                 AS2(    cmp             ebx, 6*8)\
00117                 ASJ(    je,             label2_##mul, f)\
00118                 SSE2_round(C,A,B,X+2*8+ebx,mul)\
00119                 AS2(    add             ebx, 3*8)\
00120                 ASJ(    jmp,    mul, b)\
00121                 ASL(label2_##mul)
00122 
00123 #define SSE2_key_schedule(Y,X) \
00124                 AS2(    movq    mm3, [X+7*8])\
00125                 AS2(    pxor    mm3, mm6)\
00126                 AS2(    movq    mm4, [X+0*8])\
00127                 AS2(    psubq   mm4, mm3)\
00128                 AS2(    movq    [Y+0*8], mm4)\
00129                 AS2(    pxor    mm4, [X+1*8])\
00130                 AS2(    movq    mm3, mm4)\
00131                 AS2(    movq    [Y+1*8], mm4)\
00132                 AS2(    paddq   mm4, [X+2*8])\
00133                 AS2(    pxor    mm3, mm7)\
00134                 AS2(    psllq   mm3, 19)\
00135                 AS2(    movq    [Y+2*8], mm4)\
00136                 AS2(    pxor    mm3, mm4)\
00137                 AS2(    movq    mm4, [X+3*8])\
00138                 AS2(    psubq   mm4, mm3)\
00139                 AS2(    movq    [Y+3*8], mm4)\
00140                 AS2(    pxor    mm4, [X+4*8])\
00141                 AS2(    movq    mm3, mm4)\
00142                 AS2(    movq    [Y+4*8], mm4)\
00143                 AS2(    paddq   mm4, [X+5*8])\
00144                 AS2(    pxor    mm3, mm7)\
00145                 AS2(    psrlq   mm3, 23)\
00146                 AS2(    movq    [Y+5*8], mm4)\
00147                 AS2(    pxor    mm3, mm4)\
00148                 AS2(    movq    mm4, [X+6*8])\
00149                 AS2(    psubq   mm4, mm3)\
00150                 AS2(    movq    [Y+6*8], mm4)\
00151                 AS2(    pxor    mm4, [X+7*8])\
00152                 AS2(    movq    mm3, mm4)\
00153                 AS2(    movq    [Y+7*8], mm4)\
00154                 AS2(    paddq   mm4, [Y+0*8])\
00155                 AS2(    pxor    mm3, mm7)\
00156                 AS2(    psllq   mm3, 19)\
00157                 AS2(    movq    [Y+0*8], mm4)\
00158                 AS2(    pxor    mm3, mm4)\
00159                 AS2(    movq    mm4, [Y+1*8])\
00160                 AS2(    psubq   mm4, mm3)\
00161                 AS2(    movq    [Y+1*8], mm4)\
00162                 AS2(    pxor    mm4, [Y+2*8])\
00163                 AS2(    movq    mm3, mm4)\
00164                 AS2(    movq    [Y+2*8], mm4)\
00165                 AS2(    paddq   mm4, [Y+3*8])\
00166                 AS2(    pxor    mm3, mm7)\
00167                 AS2(    psrlq   mm3, 23)\
00168                 AS2(    movq    [Y+3*8], mm4)\
00169                 AS2(    pxor    mm3, mm4)\
00170                 AS2(    movq    mm4, [Y+4*8])\
00171                 AS2(    psubq   mm4, mm3)\
00172                 AS2(    movq    [Y+4*8], mm4)\
00173                 AS2(    pxor    mm4, [Y+5*8])\
00174                 AS2(    movq    [Y+5*8], mm4)\
00175                 AS2(    paddq   mm4, [Y+6*8])\
00176                 AS2(    movq    [Y+6*8], mm4)\
00177                 AS2(    pxor    mm4, [edx+4*2048+2*8])\
00178                 AS2(    movq    mm3, [Y+7*8])\
00179                 AS2(    psubq   mm3, mm4)\
00180                 AS2(    movq    [Y+7*8], mm3)
00181 
00182                 SSE2_pass(mm0, mm1, mm2, 5, esi)
00183                 SSE2_key_schedule(esp+4, esi)
00184                 SSE2_pass(mm2, mm0, mm1, 7, esp+4)
00185                 SSE2_key_schedule(esp+4, esp+4)
00186                 SSE2_pass(mm1, mm2, mm0, 9, esp+4)
00187 
00188                 AS2(    pxor    mm0, [eax+0*8])
00189                 AS2(    movq    [eax+0*8], mm0)
00190                 AS2(    psubq   mm1, mm5)
00191                 AS2(    movq    [eax+1*8], mm1)
00192                 AS2(    paddq   mm2, [eax+2*8])
00193                 AS2(    movq    [eax+2*8], mm2)
00194 
00195                 AS1(    pop             esp)
00196                 AS1(    emms)
00197 #ifdef __GNUC__
00198                 AS1(    pop             ebx)
00199                 ".att_syntax prefix;"
00200                         :
00201                         : "a" (digest), "S" (X), "d" (table)
00202                         : "%ecx", "%edi", "memory", "cc"
00203                 );
00204 #endif
00205         }
00206         else
00207 #endif
00208         {
00209                 word64 a = digest[0];
00210                 word64 b = digest[1];
00211                 word64 c = digest[2];
00212                 word64 Y[8];
00213 
00214 #define t1 (table)
00215 #define t2 (table+256)
00216 #define t3 (table+256*2)
00217 #define t4 (table+256*3)
00218 
00219 #define round(a,b,c,x,mul) \
00220         c ^= x; \
00221         a -= t1[GETBYTE(c,0)] ^ t2[GETBYTE(c,2)] ^ t3[GETBYTE(c,4)] ^ t4[GETBYTE(c,6)]; \
00222         b += t4[GETBYTE(c,1)] ^ t3[GETBYTE(c,3)] ^ t2[GETBYTE(c,5)] ^ t1[GETBYTE(c,7)]; \
00223         b *= mul
00224 
00225 #define pass(a,b,c,mul,X) {\
00226         int i=0;\
00227         while (true)\
00228         {\
00229                 round(a,b,c,X[i+0],mul); \
00230                 round(b,c,a,X[i+1],mul); \
00231                 if (i==6)\
00232                         break;\
00233                 round(c,a,b,X[i+2],mul); \
00234                 i+=3;\
00235         }}
00236 
00237 #define key_schedule(Y,X) \
00238         Y[0] = X[0] - (X[7]^W64LIT(0xA5A5A5A5A5A5A5A5)); \
00239         Y[1] = X[1] ^ Y[0]; \
00240         Y[2] = X[2] + Y[1]; \
00241         Y[3] = X[3] - (Y[2] ^ ((~Y[1])<<19)); \
00242         Y[4] = X[4] ^ Y[3]; \
00243         Y[5] = X[5] + Y[4]; \
00244         Y[6] = X[6] - (Y[5] ^ ((~Y[4])>>23)); \
00245         Y[7] = X[7] ^ Y[6]; \
00246         Y[0] += Y[7]; \
00247         Y[1] -= Y[0] ^ ((~Y[7])<<19); \
00248         Y[2] ^= Y[1]; \
00249         Y[3] += Y[2]; \
00250         Y[4] -= Y[3] ^ ((~Y[2])>>23); \
00251         Y[5] ^= Y[4]; \
00252         Y[6] += Y[5]; \
00253         Y[7] -= Y[6] ^ W64LIT(0x0123456789ABCDEF)
00254 
00255                 pass(a,b,c,5,X);
00256                 key_schedule(Y,X);
00257                 pass(c,a,b,7,Y);
00258                 key_schedule(Y,Y);
00259                 pass(b,c,a,9,Y);
00260 
00261                 digest[0] = a ^ digest[0];
00262                 digest[1] = b - digest[1];
00263                 digest[2] = c + digest[2];
00264         }
00265 }
00266 
00267 NAMESPACE_END
00268 
00269 #endif  // WORD64_AVAILABLE

Generated on Fri Jun 1 11:11:25 2007 for Crypto++ by  doxygen 1.5.2