00001
00002
00003 #include "pch.h"
00004 #include "tiger.h"
00005 #include "misc.h"
00006 #include "cpu.h"
00007
00008 #ifdef WORD64_AVAILABLE
00009
00010 NAMESPACE_BEGIN(CryptoPP)
00011
00012 void Tiger::InitState(HashWordType *state)
00013 {
00014 state[0] = W64LIT(0x0123456789ABCDEF);
00015 state[1] = W64LIT(0xFEDCBA9876543210);
00016 state[2] = W64LIT(0xF096A5B4C3B2E187);
00017 }
00018
00019 void Tiger::TruncatedFinal(byte *hash, size_t size)
00020 {
00021 ThrowIfInvalidTruncatedSize(size);
00022
00023 PadLastBlock(56, 0x01);
00024 CorrectEndianess(m_data, m_data, 56);
00025
00026 m_data[7] = GetBitCountLo();
00027
00028 Transform(m_state, m_data);
00029 CorrectEndianess(m_state, m_state, DigestSize());
00030 memcpy(hash, m_state, size);
00031
00032 Restart();
00033 }
00034
00035 void Tiger::Transform (word64 *digest, const word64 *X)
00036 {
00037 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
00038 if (HasSSE2())
00039 {
00040 #ifdef __GNUC__
00041 __asm__ __volatile__
00042 (
00043 ".intel_syntax noprefix;"
00044 AS1( push ebx)
00045 #else
00046 #if _MSC_VER < 1300
00047 const word64 *t = table;
00048 AS2( mov edx, t)
00049 #else
00050 AS2( lea edx, [table])
00051 #endif
00052 AS2( mov eax, digest)
00053 AS2( mov esi, X)
00054 #endif
00055 AS2( movq mm0, [eax])
00056 AS2( movq mm1, [eax+1*8])
00057 AS2( movq mm5, mm1)
00058 AS2( movq mm2, [eax+2*8])
00059 AS2( movq mm7, [edx+4*2048+0*8])
00060 AS2( movq mm6, [edx+4*2048+1*8])
00061 AS2( mov ecx, esp)
00062 AS2( and esp, 0xfffffff0)
00063 AS2( sub esp, 8*8)
00064 AS1( push ecx)
00065
00066 #define SSE2_round(a,b,c,x,mul) \
00067 AS2( pxor c, [x])\
00068 AS2( movd ecx, c)\
00069 AS2( movzx edi, cl)\
00070 AS2( movq mm3, [edx+0*2048+edi*8])\
00071 AS2( movzx edi, ch)\
00072 AS2( movq mm4, [edx+3*2048+edi*8])\
00073 AS2( shr ecx, 16)\
00074 AS2( movzx edi, cl)\
00075 AS2( pxor mm3, [edx+1*2048+edi*8])\
00076 AS2( movzx edi, ch)\
00077 AS2( pxor mm4, [edx+2*2048+edi*8])\
00078 AS3( pextrw ecx, c, 2)\
00079 AS2( movzx edi, cl)\
00080 AS2( pxor mm3, [edx+2*2048+edi*8])\
00081 AS2( movzx edi, ch)\
00082 AS2( pxor mm4, [edx+1*2048+edi*8])\
00083 AS3( pextrw ecx, c, 3)\
00084 AS2( movzx edi, cl)\
00085 AS2( pxor mm3, [edx+3*2048+edi*8])\
00086 AS2( psubq a, mm3)\
00087 AS2( movzx edi, ch)\
00088 AS2( pxor mm4, [edx+0*2048+edi*8])\
00089 AS2( paddq b, mm4)\
00090 SSE2_mul_##mul(b)
00091
00092 #define SSE2_mul_5(b) \
00093 AS2( movq mm3, b)\
00094 AS2( psllq b, 2)\
00095 AS2( paddq b, mm3)
00096
00097 #define SSE2_mul_7(b) \
00098 AS2( movq mm3, b)\
00099 AS2( psllq b, 3)\
00100 AS2( psubq b, mm3)
00101
00102 #define SSE2_mul_9(b) \
00103 AS2( movq mm3, b)\
00104 AS2( psllq b, 3)\
00105 AS2( paddq b, mm3)
00106
00107 #define label2_5 1
00108 #define label2_7 2
00109 #define label2_9 3
00110
00111 #define SSE2_pass(A,B,C,mul,X) \
00112 AS2( xor ebx, ebx)\
00113 ASL(mul)\
00114 SSE2_round(A,B,C,X+0*8+ebx,mul)\
00115 SSE2_round(B,C,A,X+1*8+ebx,mul)\
00116 AS2( cmp ebx, 6*8)\
00117 ASJ( je, label2_##mul, f)\
00118 SSE2_round(C,A,B,X+2*8+ebx,mul)\
00119 AS2( add ebx, 3*8)\
00120 ASJ( jmp, mul, b)\
00121 ASL(label2_##mul)
00122
00123 #define SSE2_key_schedule(Y,X) \
00124 AS2( movq mm3, [X+7*8])\
00125 AS2( pxor mm3, mm6)\
00126 AS2( movq mm4, [X+0*8])\
00127 AS2( psubq mm4, mm3)\
00128 AS2( movq [Y+0*8], mm4)\
00129 AS2( pxor mm4, [X+1*8])\
00130 AS2( movq mm3, mm4)\
00131 AS2( movq [Y+1*8], mm4)\
00132 AS2( paddq mm4, [X+2*8])\
00133 AS2( pxor mm3, mm7)\
00134 AS2( psllq mm3, 19)\
00135 AS2( movq [Y+2*8], mm4)\
00136 AS2( pxor mm3, mm4)\
00137 AS2( movq mm4, [X+3*8])\
00138 AS2( psubq mm4, mm3)\
00139 AS2( movq [Y+3*8], mm4)\
00140 AS2( pxor mm4, [X+4*8])\
00141 AS2( movq mm3, mm4)\
00142 AS2( movq [Y+4*8], mm4)\
00143 AS2( paddq mm4, [X+5*8])\
00144 AS2( pxor mm3, mm7)\
00145 AS2( psrlq mm3, 23)\
00146 AS2( movq [Y+5*8], mm4)\
00147 AS2( pxor mm3, mm4)\
00148 AS2( movq mm4, [X+6*8])\
00149 AS2( psubq mm4, mm3)\
00150 AS2( movq [Y+6*8], mm4)\
00151 AS2( pxor mm4, [X+7*8])\
00152 AS2( movq mm3, mm4)\
00153 AS2( movq [Y+7*8], mm4)\
00154 AS2( paddq mm4, [Y+0*8])\
00155 AS2( pxor mm3, mm7)\
00156 AS2( psllq mm3, 19)\
00157 AS2( movq [Y+0*8], mm4)\
00158 AS2( pxor mm3, mm4)\
00159 AS2( movq mm4, [Y+1*8])\
00160 AS2( psubq mm4, mm3)\
00161 AS2( movq [Y+1*8], mm4)\
00162 AS2( pxor mm4, [Y+2*8])\
00163 AS2( movq mm3, mm4)\
00164 AS2( movq [Y+2*8], mm4)\
00165 AS2( paddq mm4, [Y+3*8])\
00166 AS2( pxor mm3, mm7)\
00167 AS2( psrlq mm3, 23)\
00168 AS2( movq [Y+3*8], mm4)\
00169 AS2( pxor mm3, mm4)\
00170 AS2( movq mm4, [Y+4*8])\
00171 AS2( psubq mm4, mm3)\
00172 AS2( movq [Y+4*8], mm4)\
00173 AS2( pxor mm4, [Y+5*8])\
00174 AS2( movq [Y+5*8], mm4)\
00175 AS2( paddq mm4, [Y+6*8])\
00176 AS2( movq [Y+6*8], mm4)\
00177 AS2( pxor mm4, [edx+4*2048+2*8])\
00178 AS2( movq mm3, [Y+7*8])\
00179 AS2( psubq mm3, mm4)\
00180 AS2( movq [Y+7*8], mm3)
00181
00182 SSE2_pass(mm0, mm1, mm2, 5, esi)
00183 SSE2_key_schedule(esp+4, esi)
00184 SSE2_pass(mm2, mm0, mm1, 7, esp+4)
00185 SSE2_key_schedule(esp+4, esp+4)
00186 SSE2_pass(mm1, mm2, mm0, 9, esp+4)
00187
00188 AS2( pxor mm0, [eax+0*8])
00189 AS2( movq [eax+0*8], mm0)
00190 AS2( psubq mm1, mm5)
00191 AS2( movq [eax+1*8], mm1)
00192 AS2( paddq mm2, [eax+2*8])
00193 AS2( movq [eax+2*8], mm2)
00194
00195 AS1( pop esp)
00196 AS1( emms)
00197 #ifdef __GNUC__
00198 AS1( pop ebx)
00199 ".att_syntax prefix;"
00200 :
00201 : "a" (digest), "S" (X), "d" (table)
00202 : "%ecx", "%edi", "memory", "cc"
00203 );
00204 #endif
00205 }
00206 else
00207 #endif
00208 {
00209 word64 a = digest[0];
00210 word64 b = digest[1];
00211 word64 c = digest[2];
00212 word64 Y[8];
00213
00214 #define t1 (table)
00215 #define t2 (table+256)
00216 #define t3 (table+256*2)
00217 #define t4 (table+256*3)
00218
00219 #define round(a,b,c,x,mul) \
00220 c ^= x; \
00221 a -= t1[GETBYTE(c,0)] ^ t2[GETBYTE(c,2)] ^ t3[GETBYTE(c,4)] ^ t4[GETBYTE(c,6)]; \
00222 b += t4[GETBYTE(c,1)] ^ t3[GETBYTE(c,3)] ^ t2[GETBYTE(c,5)] ^ t1[GETBYTE(c,7)]; \
00223 b *= mul
00224
00225 #define pass(a,b,c,mul,X) {\
00226 int i=0;\
00227 while (true)\
00228 {\
00229 round(a,b,c,X[i+0],mul); \
00230 round(b,c,a,X[i+1],mul); \
00231 if (i==6)\
00232 break;\
00233 round(c,a,b,X[i+2],mul); \
00234 i+=3;\
00235 }}
00236
00237 #define key_schedule(Y,X) \
00238 Y[0] = X[0] - (X[7]^W64LIT(0xA5A5A5A5A5A5A5A5)); \
00239 Y[1] = X[1] ^ Y[0]; \
00240 Y[2] = X[2] + Y[1]; \
00241 Y[3] = X[3] - (Y[2] ^ ((~Y[1])<<19)); \
00242 Y[4] = X[4] ^ Y[3]; \
00243 Y[5] = X[5] + Y[4]; \
00244 Y[6] = X[6] - (Y[5] ^ ((~Y[4])>>23)); \
00245 Y[7] = X[7] ^ Y[6]; \
00246 Y[0] += Y[7]; \
00247 Y[1] -= Y[0] ^ ((~Y[7])<<19); \
00248 Y[2] ^= Y[1]; \
00249 Y[3] += Y[2]; \
00250 Y[4] -= Y[3] ^ ((~Y[2])>>23); \
00251 Y[5] ^= Y[4]; \
00252 Y[6] += Y[5]; \
00253 Y[7] -= Y[6] ^ W64LIT(0x0123456789ABCDEF)
00254
00255 pass(a,b,c,5,X);
00256 key_schedule(Y,X);
00257 pass(c,a,b,7,Y);
00258 key_schedule(Y,Y);
00259 pass(b,c,a,9,Y);
00260
00261 digest[0] = a ^ digest[0];
00262 digest[1] = b - digest[1];
00263 digest[2] = c + digest[2];
00264 }
00265 }
00266
00267 NAMESPACE_END
00268
00269 #endif // WORD64_AVAILABLE