00001
00002
00003 #include "pch.h"
00004 #include "panama.h"
00005 #include "misc.h"
00006 #include "cpu.h"
00007
00008 NAMESPACE_BEGIN(CryptoPP)
00009
00010 template <class B>
00011 void Panama<B>::Reset()
00012 {
00013 memset(m_state, 0, m_state.SizeInBytes());
00014 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00015 m_state[17] = HasSSSE3();
00016 #endif
00017 }
00018
00019 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00020
00021 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
00022
00023 void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y)
00024 {
00025 #ifdef __GNUC__
00026 __asm__ __volatile__
00027 (
00028 ".intel_syntax noprefix;"
00029 AS_PUSH( bx)
00030 #else
00031 AS2( mov WORD_REG(cx), count)
00032 AS2( mov WORD_REG(si), state)
00033 AS2( mov WORD_REG(di), z)
00034 AS2( mov WORD_REG(dx), y)
00035 #endif
00036 AS2( shl WORD_REG(cx), 5)
00037 ASJ( jz, 5, f)
00038 AS2( mov ebx, [WORD_REG(si)+4*17])
00039 AS2( add WORD_REG(cx), WORD_REG(bx))
00040
00041 AS_PUSH( bp)
00042 AS_PUSH( cx)
00043
00044 AS2( movdqa xmm0, [WORD_REG(si)+0*16])
00045 AS2( movdqa xmm1, [WORD_REG(si)+1*16])
00046 AS2( movdqa xmm2, [WORD_REG(si)+2*16])
00047 AS2( movdqa xmm3, [WORD_REG(si)+3*16])
00048 AS2( mov eax, [WORD_REG(si)+4*16])
00049
00050 ASL(4)
00051
00052 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00053 AS2( test WORD_REG(bx), 1)
00054 ASJ( jnz, 6, f)
00055 #endif
00056 AS2( movdqa xmm6, xmm2)
00057 AS2( movss xmm6, xmm3)
00058 ASS( pshufd xmm5, xmm6, 0, 3, 2, 1)
00059 AS2( movd xmm6, eax)
00060 AS2( movdqa xmm7, xmm3)
00061 AS2( movss xmm7, xmm6)
00062 ASS( pshufd xmm6, xmm7, 0, 3, 2, 1)
00063 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00064 ASJ( jmp, 7, f)
00065 ASL(6)
00066 AS2( movdqa xmm5, xmm3)
00067 AS3( palignr xmm5, xmm2, 4)
00068 AS2( movd xmm6, eax)
00069 AS3( palignr xmm6, xmm3, 4)
00070 ASL(7)
00071 #endif
00072
00073 AS2( movd ecx, xmm2)
00074 AS1( not ecx)
00075 AS2( movd ebp, xmm3)
00076 AS2( or ecx, ebp)
00077 AS2( xor eax, ecx)
00078
00079 #define SSE2_Index(i) ASM_MOD(((i)*13+16), 17)
00080
00081 #define pi(i) \
00082 AS2( movd ecx, xmm7)\
00083 AS2( rol ecx, ASM_MOD((ASM_MOD(5*i,17)*(ASM_MOD(5*i,17)+1)/2), 32))\
00084 AS2( mov [WORD_REG(si)+SSE2_Index(ASM_MOD(5*(i), 17))*4], ecx)
00085
00086 #define pi4(x, y, z, a, b, c, d) \
00087 AS2( pcmpeqb xmm7, xmm7)\
00088 AS2( pxor xmm7, x)\
00089 AS2( por xmm7, y)\
00090 AS2( pxor xmm7, z)\
00091 pi(a)\
00092 ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
00093 pi(b)\
00094 AS2( punpckhqdq xmm7, xmm7)\
00095 pi(c)\
00096 ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
00097 pi(d)
00098
00099 pi4(xmm1, xmm2, xmm3, 1, 5, 9, 13)
00100 pi4(xmm0, xmm1, xmm2, 2, 6, 10, 14)
00101 pi4(xmm6, xmm0, xmm1, 3, 7, 11, 15)
00102 pi4(xmm5, xmm6, xmm0, 4, 8, 12, 16)
00103
00104
00105 AS2( movdqa xmm4, xmm3)
00106 AS2( punpcklqdq xmm3, xmm2)
00107 AS2( punpckhdq xmm4, xmm2)
00108 AS2( movdqa xmm2, xmm1)
00109 AS2( punpcklqdq xmm1, xmm0)
00110 AS2( punpckhdq xmm2, xmm0)
00111
00112
00113 AS2( test WORD_REG(di), WORD_REG(di))
00114 ASJ( jz, 0, f)
00115 AS2( movdqa xmm6, xmm4)
00116 AS2( punpcklqdq xmm4, xmm2)
00117 AS2( punpckhqdq xmm6, xmm2)
00118 AS2( test WORD_REG(dx), 0xf)
00119 ASJ( jnz, 2, f)
00120 AS2( test WORD_REG(dx), WORD_REG(dx))
00121 ASJ( jz, 1, f)
00122 AS2( pxor xmm4, [WORD_REG(dx)])
00123 AS2( pxor xmm6, [WORD_REG(dx)+16])
00124 AS2( add WORD_REG(dx), 32)
00125 ASJ( jmp, 1, f)
00126 ASL(2)
00127 AS2( movdqu xmm0, [WORD_REG(dx)])
00128 AS2( movdqu xmm2, [WORD_REG(dx)+16])
00129 AS2( pxor xmm4, xmm0)
00130 AS2( pxor xmm6, xmm2)
00131 AS2( add WORD_REG(dx), 32)
00132 ASL(1)
00133 AS2( test WORD_REG(di), 0xf)
00134 ASJ( jnz, 3, f)
00135 AS2( movdqa [WORD_REG(di)], xmm4)
00136 AS2( movdqa [WORD_REG(di)+16], xmm6)
00137 AS2( add WORD_REG(di), 32)
00138 ASJ( jmp, 0, f)
00139 ASL(3)
00140 AS2( movdqu [WORD_REG(di)], xmm4)
00141 AS2( movdqu [WORD_REG(di)+16], xmm6)
00142 AS2( add WORD_REG(di), 32)
00143 ASL(0)
00144
00145
00146 AS2( lea WORD_REG(cx), [WORD_REG(bx) + 32])
00147 AS2( and WORD_REG(cx), 31*32)
00148 AS2( lea WORD_REG(bp), [WORD_REG(bx) + (32-24)*32])
00149 AS2( and WORD_REG(bp), 31*32)
00150
00151 AS2( movdqa xmm0, [WORD_REG(si)+20*4+WORD_REG(cx)+0*8])
00152 AS2( pxor xmm3, xmm0)
00153 ASS( pshufd xmm0, xmm0, 2, 3, 0, 1)
00154 AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(cx)+0*8], xmm3)
00155 AS2( pxor xmm0, [WORD_REG(si)+20*4+WORD_REG(bp)+2*8])
00156 AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(bp)+2*8], xmm0)
00157
00158 AS2( movdqa xmm4, [WORD_REG(si)+20*4+WORD_REG(cx)+2*8])
00159 AS2( pxor xmm1, xmm4)
00160 AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(cx)+2*8], xmm1)
00161 AS2( pxor xmm4, [WORD_REG(si)+20*4+WORD_REG(bp)+0*8])
00162 AS2( movdqa [WORD_REG(si)+20*4+WORD_REG(bp)+0*8], xmm4)
00163
00164
00165 AS2( movdqa xmm3, [WORD_REG(si)+3*16])
00166 AS2( movdqa xmm2, [WORD_REG(si)+2*16])
00167 AS2( movdqa xmm1, [WORD_REG(si)+1*16])
00168 AS2( movdqa xmm0, [WORD_REG(si)+0*16])
00169
00170 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00171 AS2( test WORD_REG(bx), 1)
00172 ASJ( jnz, 8, f)
00173 #endif
00174 AS2( movd xmm6, eax)
00175 AS2( movdqa xmm7, xmm3)
00176 AS2( movss xmm7, xmm6)
00177 AS2( movdqa xmm6, xmm2)
00178 AS2( movss xmm6, xmm3)
00179 AS2( movdqa xmm5, xmm1)
00180 AS2( movss xmm5, xmm2)
00181 AS2( movdqa xmm4, xmm0)
00182 AS2( movss xmm4, xmm1)
00183 ASS( pshufd xmm7, xmm7, 0, 3, 2, 1)
00184 ASS( pshufd xmm6, xmm6, 0, 3, 2, 1)
00185 ASS( pshufd xmm5, xmm5, 0, 3, 2, 1)
00186 ASS( pshufd xmm4, xmm4, 0, 3, 2, 1)
00187 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00188 ASJ( jmp, 9, f)
00189 ASL(8)
00190 AS2( movd xmm7, eax)
00191 AS3( palignr xmm7, xmm3, 4)
00192 AS2( movq xmm6, xmm3)
00193 AS3( palignr xmm6, xmm2, 4)
00194 AS2( movq xmm5, xmm2)
00195 AS3( palignr xmm5, xmm1, 4)
00196 AS2( movq xmm4, xmm1)
00197 AS3( palignr xmm4, xmm0, 4)
00198 ASL(9)
00199 #endif
00200
00201 AS2( xor eax, 1)
00202 AS2( movd ecx, xmm0)
00203 AS2( xor eax, ecx)
00204 AS2( movd ecx, xmm3)
00205 AS2( xor eax, ecx)
00206
00207 AS2( pxor xmm3, xmm2)
00208 AS2( pxor xmm2, xmm1)
00209 AS2( pxor xmm1, xmm0)
00210 AS2( pxor xmm0, xmm7)
00211 AS2( pxor xmm3, xmm7)
00212 AS2( pxor xmm2, xmm6)
00213 AS2( pxor xmm1, xmm5)
00214 AS2( pxor xmm0, xmm4)
00215
00216
00217 AS2( lea WORD_REG(cx), [WORD_REG(bx) + (32-4)*32])
00218 AS2( and WORD_REG(cx), 31*32)
00219 AS2( lea WORD_REG(bp), [WORD_REG(bx) + 16*32])
00220 AS2( and WORD_REG(bp), 31*32)
00221
00222 AS2( movdqa xmm4, [WORD_REG(si)+20*4+WORD_REG(cx)+0*16])
00223 AS2( movdqa xmm5, [WORD_REG(si)+20*4+WORD_REG(bp)+0*16])
00224 AS2( movdqa xmm6, xmm4)
00225 AS2( punpcklqdq xmm4, xmm5)
00226 AS2( punpckhqdq xmm6, xmm5)
00227 AS2( pxor xmm3, xmm4)
00228 AS2( pxor xmm2, xmm6)
00229
00230 AS2( movdqa xmm4, [WORD_REG(si)+20*4+WORD_REG(cx)+1*16])
00231 AS2( movdqa xmm5, [WORD_REG(si)+20*4+WORD_REG(bp)+1*16])
00232 AS2( movdqa xmm6, xmm4)
00233 AS2( punpcklqdq xmm4, xmm5)
00234 AS2( punpckhqdq xmm6, xmm5)
00235 AS2( pxor xmm1, xmm4)
00236 AS2( pxor xmm0, xmm6)
00237
00238
00239 AS2( add WORD_REG(bx), 32)
00240 AS2( cmp WORD_REG(bx), [WORD_REG(sp)])
00241 ASJ( jne, 4, b)
00242
00243
00244 AS2( add WORD_REG(sp), WORD_SZ)
00245 AS_POP( bp)
00246 AS2( mov [WORD_REG(si)+4*16], eax)
00247 AS2( movdqa [WORD_REG(si)+3*16], xmm3)
00248 AS2( movdqa [WORD_REG(si)+2*16], xmm2)
00249 AS2( movdqa [WORD_REG(si)+1*16], xmm1)
00250 AS2( movdqa [WORD_REG(si)+0*16], xmm0)
00251 ASL(5)
00252
00253 #ifdef __GNUC__
00254 AS_POP( bx)
00255 ".att_syntax prefix;"
00256 :
00257 : "c" (count), "S" (state), "D" (z), "d" (y)
00258 : "%eax", "memory", "cc"
00259 );
00260 #endif
00261 }
00262
00263 #endif
00264
00265 template <class B>
00266 void Panama<B>::Iterate(size_t count, const word32 *p, word32 *z, const word32 *y)
00267 {
00268 word32 bstart = m_state[17];
00269 word32 *const aPtr = m_state;
00270 word32 cPtr[17];
00271
00272 #define bPtr ((byte *)(aPtr+20))
00273
00274
00275
00276
00277 #define a(i) aPtr[((i)*13+16) % 17] // 13 is inverse of 4 mod 17
00278 #define c(i) cPtr[((i)*13+16) % 17]
00279
00280 #define b(i, j) b##i[(j)*2%8 + (j)/4]
00281
00282
00283 #define OA(i) z[i] = ConditionalByteReverse(B::ToEnum(), a(i+9))
00284 #define OX(i) z[i] = y[i] ^ ConditionalByteReverse(B::ToEnum(), a(i+9))
00285
00286 #define US(i) {word32 t=b(0,i); b(0,i)=ConditionalByteReverse(B::ToEnum(), p[i])^t; b(25,(i+6)%8)^=t;}
00287 #define UL(i) {word32 t=b(0,i); b(0,i)=a(i+1)^t; b(25,(i+6)%8)^=t;}
00288
00289 #define GP(i) c(5*i%17) = rotlFixed(a(i) ^ (a((i+1)%17) | ~a((i+2)%17)), ((5*i%17)*((5*i%17)+1)/2)%32)
00290
00291 #define T(i,x) a(i) = c(i) ^ c((i+1)%17) ^ c((i+4)%17) ^ x
00292 #define TS1S(i) T(i+1, ConditionalByteReverse(B::ToEnum(), p[i]))
00293 #define TS1L(i) T(i+1, b(4,i))
00294 #define TS2(i) T(i+9, b(16,i))
00295
00296 while (count--)
00297 {
00298 if (z)
00299 {
00300 if (y)
00301 {
00302 OX(0); OX(1); OX(2); OX(3); OX(4); OX(5); OX(6); OX(7);
00303 y += 8;
00304 }
00305 else
00306 {
00307 OA(0); OA(1); OA(2); OA(3); OA(4); OA(5); OA(6); OA(7);
00308 }
00309 z += 8;
00310 }
00311
00312 word32 *const b16 = (word32 *)(bPtr+((bstart+16*32) & 31*32));
00313 word32 *const b4 = (word32 *)(bPtr+((bstart+(32-4)*32) & 31*32));
00314 bstart += 32;
00315 word32 *const b0 = (word32 *)(bPtr+((bstart) & 31*32));
00316 word32 *const b25 = (word32 *)(bPtr+((bstart+(32-25)*32) & 31*32));
00317
00318 if (p)
00319 {
00320 US(0); US(1); US(2); US(3); US(4); US(5); US(6); US(7);
00321 }
00322 else
00323 {
00324 UL(0); UL(1); UL(2); UL(3); UL(4); UL(5); UL(6); UL(7);
00325 }
00326
00327 GP(0);
00328 GP(1);
00329 GP(2);
00330 GP(3);
00331 GP(4);
00332 GP(5);
00333 GP(6);
00334 GP(7);
00335 GP(8);
00336 GP(9);
00337 GP(10);
00338 GP(11);
00339 GP(12);
00340 GP(13);
00341 GP(14);
00342 GP(15);
00343 GP(16);
00344
00345 T(0,1);
00346
00347 if (p)
00348 {
00349 TS1S(0); TS1S(1); TS1S(2); TS1S(3); TS1S(4); TS1S(5); TS1S(6); TS1S(7);
00350 p += 8;
00351 }
00352 else
00353 {
00354 TS1L(0); TS1L(1); TS1L(2); TS1L(3); TS1L(4); TS1L(5); TS1L(6); TS1L(7);
00355 }
00356
00357 TS2(0); TS2(1); TS2(2); TS2(3); TS2(4); TS2(5); TS2(6); TS2(7);
00358 }
00359 m_state[17] = bstart;
00360 }
00361
00362 namespace Weak {
00363 template <class B>
00364 size_t PanamaHash<B>::HashMultipleBlocks(const word32 *input, size_t length)
00365 {
00366 this->Iterate(length / this->BLOCKSIZE, input);
00367 return length % this->BLOCKSIZE;
00368 }
00369
00370 template <class B>
00371 void PanamaHash<B>::TruncatedFinal(byte *hash, size_t size)
00372 {
00373 this->ThrowIfInvalidTruncatedSize(size);
00374
00375 PadLastBlock(this->BLOCKSIZE, 0x01);
00376
00377 HashEndianCorrectedBlock(this->m_data);
00378
00379 this->Iterate(32);
00380
00381 FixedSizeSecBlock<word32, 8> buf;
00382 this->Iterate(1, NULL, buf, NULL);
00383
00384 memcpy(hash, buf, size);
00385
00386 this->Restart();
00387 }
00388 }
00389
00390 template <class B>
00391 void PanamaCipherPolicy<B>::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
00392 {
00393 assert(length==32);
00394 memcpy(m_key, key, 32);
00395 }
00396
00397 template <class B>
00398 void PanamaCipherPolicy<B>::CipherResynchronize(byte *keystreamBuffer, const byte *iv)
00399 {
00400 this->Reset();
00401 this->Iterate(1, m_key);
00402 if (iv && IsAligned<word32>(iv))
00403 this->Iterate(1, (const word32 *)iv);
00404 else
00405 {
00406 FixedSizeSecBlock<word32, 8> buf;
00407 if (iv)
00408 memcpy(buf, iv, 32);
00409 else
00410 memset(buf, 0, 32);
00411 this->Iterate(1, buf);
00412 }
00413
00414 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00415 if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
00416 Panama_SSE2_Pull(32, this->m_state, NULL, NULL);
00417 else
00418 #endif
00419 this->Iterate(32);
00420 }
00421
00422 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
00423 template <class B>
00424 unsigned int PanamaCipherPolicy<B>::GetAlignment() const
00425 {
00426 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00427 if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
00428 return 16;
00429 else
00430 #endif
00431 return 1;
00432 }
00433 #endif
00434
00435 template <class B>
00436 void PanamaCipherPolicy<B>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
00437 {
00438 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00439 if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
00440 Panama_SSE2_Pull(iterationCount, this->m_state, (word32 *)output, (const word32 *)input);
00441 else
00442 #endif
00443 this->Iterate(iterationCount, NULL, (word32 *)output, (const word32 *)input);
00444 }
00445
00446 template class Panama<BigEndian>;
00447 template class Panama<LittleEndian>;
00448
00449 template class Weak::PanamaHash<BigEndian>;
00450 template class Weak::PanamaHash<LittleEndian>;
00451
00452 template class PanamaCipherPolicy<BigEndian>;
00453 template class PanamaCipherPolicy<LittleEndian>;
00454
00455 NAMESPACE_END