00001
00002
00003
00004 #include "pch.h"
00005 #include "vmac.h"
00006 #include "argnames.h"
00007 #include "cpu.h"
00008
00009 NAMESPACE_BEGIN(CryptoPP)
00010
00011 #if defined(_MSC_VER) && !defined(CRYPTOPP_SLOW_WORD64)
00012 #include <intrin.h>
00013 #endif
00014
00015 #define VMAC_BOOL_WORD128 (defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE))
00016 #ifdef __BORLANDC__
00017 #define const // Turbo C++ 2006 workaround
00018 #endif
00019 static const word64 p64 = W64LIT(0xfffffffffffffeff);
00020 static const word64 m62 = W64LIT(0x3fffffffffffffff);
00021 static const word64 m63 = W64LIT(0x7fffffffffffffff);
00022 static const word64 m64 = W64LIT(0xffffffffffffffff);
00023 static const word64 mpoly = W64LIT(0x1fffffff1fffffff);
00024 #ifdef __BORLANDC__
00025 #undef const
00026 #endif
00027 #if VMAC_BOOL_WORD128
00028 static const word128 m126 = (word128(m62)<<64)|m64;
00029 #endif
00030
00031 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs ¶ms)
00032 {
00033 int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
00034 if (digestLength != 8 && digestLength != 16)
00035 throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
00036 m_is128 = digestLength == 16;
00037
00038 m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
00039 if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
00040 throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
00041
00042 AllocateBlocks();
00043
00044 BlockCipher &cipher = AccessCipher();
00045 cipher.SetKey(userKey, keylength, params);
00046 unsigned int blockSize = cipher.BlockSize();
00047 unsigned int blockSizeInWords = blockSize / sizeof(word64);
00048 SecBlock<word64> out(blockSizeInWords);
00049 SecByteBlock in;
00050 in.CleanNew(blockSize);
00051 size_t i;
00052
00053
00054 in[0] = 0x80;
00055 for (i = 0; i < m_nhKeySize()*sizeof(word64); i += blockSize)
00056 {
00057 cipher.ProcessBlock(in, out.BytePtr());
00058 ConditionalByteReverse(BIG_ENDIAN_ORDER, m_nhKey()+i/sizeof(word64), out.begin(), blockSize);
00059 in[15]++;
00060 }
00061
00062
00063 in[0] = 0xC0;
00064 in[15] = 0;
00065 for (i = 0; i <= (size_t)m_is128; i++)
00066 {
00067 cipher.ProcessBlock(in, out.BytePtr());
00068 m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
00069 m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
00070 in[15]++;
00071 }
00072
00073
00074 in[0] = 0xE0;
00075 in[15] = 0;
00076 word64 *l3Key = m_l3Key();
00077 for (i = 0; i <= (size_t)m_is128; i++)
00078 do
00079 {
00080 cipher.ProcessBlock(in, out.BytePtr());
00081 l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
00082 l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
00083 in[15]++;
00084 } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
00085
00086 m_padCached = false;
00087 Resynchronize(GetIVAndThrowIfInvalid(params));
00088 }
00089
00090 void VMAC_Base::GetNextIV(RandomNumberGenerator &rng, byte *IV)
00091 {
00092 SimpleKeyingInterface::GetNextIV(rng, IV);
00093 IV[0] &= 0x7f;
00094 }
00095
00096 void VMAC_Base::Resynchronize(const byte *IV)
00097 {
00098 int s = IVSize();
00099 if (m_is128)
00100 {
00101 memcpy(m_nonce(), IV, s);
00102 AccessCipher().ProcessBlock(m_nonce(), m_pad());
00103 }
00104 else
00105 {
00106 m_padCached = m_padCached && (m_nonce()[s-1] | 1) == (IV[s-1] | 1) && memcmp(m_nonce(), IV, s-1) == 0;
00107 if (!m_padCached)
00108 {
00109 memcpy(m_nonce(), IV, s);
00110 m_nonce()[s-1] &= 0xfe;
00111 AccessCipher().ProcessBlock(m_nonce(), m_pad());
00112 m_padCached = true;
00113 }
00114 m_nonce()[s-1] = IV[s-1];
00115 }
00116 m_isFirstBlock = true;
00117 Restart();
00118 }
00119
00120 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
00121 {
00122 assert(false);
00123 }
00124
00125 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
00126 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
00127 void
00128 #ifdef __GNUC__
00129 __attribute__ ((noinline))
00130 #endif
00131 VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
00132 {
00133 const word64 *nhK = m_nhKey();
00134 word64 *polyS = m_polyState();
00135
00136 #ifdef __GNUC__
00137 word32 temp;
00138 __asm__ __volatile__
00139 (
00140 AS2( mov %%ebx, %0)
00141 AS2( mov %1, %%ebx)
00142 ".intel_syntax noprefix;"
00143 #else
00144 #if _MSC_VER < 1300
00145 word32 L1KeyLength = m_L1KeyLength;
00146 char isFirstBlock = m_isFirstBlock;
00147 AS2( mov ebx, [L1KeyLength])
00148 AS2( mov dl, [isFirstBlock])
00149 #else
00150 AS2( mov ecx, this)
00151 AS2( mov ebx, [ecx+m_L1KeyLength])
00152 AS2( mov dl, [ecx+m_isFirstBlock])
00153 #endif
00154 AS2( mov eax, tagPart)
00155 AS2( shl eax, 4)
00156 AS2( mov edi, nhK)
00157 AS2( add edi, eax)
00158 AS2( add eax, eax)
00159 AS2( add eax, polyS)
00160
00161 AS2( mov esi, data)
00162 AS2( mov ecx, blocksRemainingInWord64)
00163 #endif
00164
00165 AS2( shr ebx, 3)
00166 AS1( push ebp)
00167 AS2( sub esp, 12)
00168 ASL(4)
00169 AS2( mov ebp, ebx)
00170 AS2( cmp ecx, ebx)
00171 AS2( cmovl ebp, ecx)
00172 AS2( sub ecx, ebp)
00173 AS2( lea ebp, [edi+8*ebp])
00174 AS2( movq mm6, [esi])
00175 AS2( paddq mm6, [edi])
00176 AS2( movq mm5, [esi+8])
00177 AS2( paddq mm5, [edi+8])
00178 AS2( add esi, 16)
00179 AS2( add edi, 16)
00180 AS2( movq mm4, mm6)
00181 ASS( pshufw mm2, mm6, 1, 0, 3, 2)
00182 AS2( pmuludq mm6, mm5)
00183 ASS( pshufw mm3, mm5, 1, 0, 3, 2)
00184 AS2( pmuludq mm5, mm2)
00185 AS2( pmuludq mm2, mm3)
00186 AS2( pmuludq mm3, mm4)
00187 AS2( pxor mm7, mm7)
00188 AS2( movd [esp], mm6)
00189 AS2( psrlq mm6, 32)
00190 AS2( movd [esp+4], mm5)
00191 AS2( psrlq mm5, 32)
00192 AS2( cmp edi, ebp)
00193 ASJ( je, 1, f)
00194 ASL(0)
00195 AS2( movq mm0, [esi])
00196 AS2( paddq mm0, [edi])
00197 AS2( movq mm1, [esi+8])
00198 AS2( paddq mm1, [edi+8])
00199 AS2( add esi, 16)
00200 AS2( add edi, 16)
00201 AS2( movq mm4, mm0)
00202 AS2( paddq mm5, mm2)
00203 ASS( pshufw mm2, mm0, 1, 0, 3, 2)
00204 AS2( pmuludq mm0, mm1)
00205 AS2( movd [esp+8], mm3)
00206 AS2( psrlq mm3, 32)
00207 AS2( paddq mm5, mm3)
00208 ASS( pshufw mm3, mm1, 1, 0, 3, 2)
00209 AS2( pmuludq mm1, mm2)
00210 AS2( pmuludq mm2, mm3)
00211 AS2( pmuludq mm3, mm4)
00212 AS2( movd mm4, [esp])
00213 AS2( paddq mm7, mm4)
00214 AS2( movd mm4, [esp+4])
00215 AS2( paddq mm6, mm4)
00216 AS2( movd mm4, [esp+8])
00217 AS2( paddq mm6, mm4)
00218 AS2( movd [esp], mm0)
00219 AS2( psrlq mm0, 32)
00220 AS2( paddq mm6, mm0)
00221 AS2( movd [esp+4], mm1)
00222 AS2( psrlq mm1, 32)
00223 AS2( paddq mm5, mm1)
00224 AS2( cmp edi, ebp)
00225 ASJ( jne, 0, b)
00226 ASL(1)
00227 AS2( paddq mm5, mm2)
00228 AS2( movd [esp+8], mm3)
00229 AS2( psrlq mm3, 32)
00230 AS2( paddq mm5, mm3)
00231 AS2( movd mm4, [esp])
00232 AS2( paddq mm7, mm4)
00233 AS2( movd mm4, [esp+4])
00234 AS2( paddq mm6, mm4)
00235 AS2( movd mm4, [esp+8])
00236 AS2( paddq mm6, mm4)
00237 AS2( lea ebp, [8*ebx])
00238 AS2( sub edi, ebp)
00239
00240 AS2( movd [esp], mm7)
00241 AS2( psrlq mm7, 32)
00242 AS2( paddq mm6, mm7)
00243 AS2( movd [esp+4], mm6)
00244 AS2( psrlq mm6, 32)
00245 AS2( paddq mm5, mm6)
00246 AS2( psllq mm5, 2)
00247 AS2( psrlq mm5, 2)
00248
00249 #define a0 [eax+2*4]
00250 #define a1 [eax+3*4]
00251 #define a2 [eax+0*4]
00252 #define a3 [eax+1*4]
00253 #define k0 [eax+2*8+2*4]
00254 #define k1 [eax+2*8+3*4]
00255 #define k2 [eax+2*8+0*4]
00256 #define k3 [eax+2*8+1*4]
00257 AS2( test dl, dl)
00258 ASJ( jz, 2, f)
00259 AS2( movd mm1, k0)
00260 AS2( movd mm0, [esp])
00261 AS2( paddq mm0, mm1)
00262 AS2( movd a0, mm0)
00263 AS2( psrlq mm0, 32)
00264 AS2( movd mm1, k1)
00265 AS2( movd mm2, [esp+4])
00266 AS2( paddq mm1, mm2)
00267 AS2( paddq mm0, mm1)
00268 AS2( movd a1, mm0)
00269 AS2( psrlq mm0, 32)
00270 AS2( paddq mm5, k2)
00271 AS2( paddq mm0, mm5)
00272 AS2( movq a2, mm0)
00273 AS2( xor edx, edx)
00274 ASJ( jmp, 3, f)
00275 ASL(2)
00276 AS2( movd mm0, a3)
00277 AS2( movq mm4, mm0)
00278 AS2( pmuludq mm0, k3)
00279 AS2( movd mm1, a0)
00280 AS2( pmuludq mm1, k2)
00281 AS2( movd mm2, a1)
00282 AS2( movd mm6, k1)
00283 AS2( pmuludq mm2, mm6)
00284 AS2( movd mm3, a2)
00285 AS2( psllq mm0, 1)
00286 AS2( paddq mm0, mm5)
00287 AS2( movq mm5, mm3)
00288 AS2( movd mm7, k0)
00289 AS2( pmuludq mm3, mm7)
00290 AS2( pmuludq mm4, mm7)
00291 AS2( pmuludq mm5, mm6)
00292 AS2( paddq mm0, mm1)
00293 AS2( movd mm1, a1)
00294 AS2( paddq mm4, mm5)
00295 AS2( movq mm5, mm1)
00296 AS2( pmuludq mm1, k2)
00297 AS2( paddq mm0, mm2)
00298 AS2( movd mm2, a0)
00299 AS2( paddq mm0, mm3)
00300 AS2( movq mm3, mm2)
00301 AS2( pmuludq mm2, k3)
00302 AS2( pmuludq mm3, mm7)
00303 AS2( movd [esp+8], mm0)
00304 AS2( psrlq mm0, 32)
00305 AS2( pmuludq mm7, mm5)
00306 AS2( pmuludq mm5, k3)
00307 AS2( paddq mm0, mm1)
00308 AS2( movd mm1, a2)
00309 AS2( pmuludq mm1, k2)
00310 AS2( paddq mm0, mm2)
00311 AS2( paddq mm0, mm4)
00312 AS2( movq mm4, mm0)
00313 AS2( movd mm2, a3)
00314 AS2( pmuludq mm2, mm6)
00315 AS2( pmuludq mm6, a0)
00316 AS2( psrlq mm0, 31)
00317 AS2( paddq mm0, mm3)
00318 AS2( movd mm3, [esp])
00319 AS2( paddq mm0, mm3)
00320 AS2( movd mm3, a2)
00321 AS2( pmuludq mm3, k3)
00322 AS2( paddq mm5, mm1)
00323 AS2( movd mm1, a3)
00324 AS2( pmuludq mm1, k2)
00325 AS2( paddq mm5, mm2)
00326 AS2( movd mm2, [esp+4])
00327 AS2( psllq mm5, 1)
00328 AS2( paddq mm0, mm5)
00329 AS2( psllq mm4, 33)
00330 AS2( movd a0, mm0)
00331 AS2( psrlq mm0, 32)
00332 AS2( paddq mm6, mm7)
00333 AS2( movd mm7, [esp+8])
00334 AS2( paddq mm0, mm6)
00335 AS2( paddq mm0, mm2)
00336 AS2( paddq mm3, mm1)
00337 AS2( psllq mm3, 1)
00338 AS2( paddq mm0, mm3)
00339 AS2( psrlq mm4, 1)
00340 AS2( movd a1, mm0)
00341 AS2( psrlq mm0, 32)
00342 AS2( por mm4, mm7)
00343 AS2( paddq mm0, mm4)
00344 AS2( movq a2, mm0)
00345 #undef a0
00346 #undef a1
00347 #undef a2
00348 #undef a3
00349 #undef k0
00350 #undef k1
00351 #undef k2
00352 #undef k3
00353
00354 ASL(3)
00355 AS2( test ecx, ecx)
00356 ASJ( jnz, 4, b)
00357
00358 AS2( add esp, 12)
00359 AS1( pop ebp)
00360 AS1( emms)
00361 #ifdef __GNUC__
00362 ".att_syntax prefix;"
00363 AS2( mov %0, %%ebx)
00364 : "=m" (temp)
00365 : "m" (m_L1KeyLength), "c" (blocksRemainingInWord64), "S" (data), "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
00366 : "memory", "cc"
00367 );
00368 #endif
00369 }
00370 #endif
00371
00372 #if VMAC_BOOL_WORD128
00373 #define DeclareNH(a) word128 a=0
00374 #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
00375 #define AccumulateNH(a, b, c) a += word128(b)*(c)
00376 #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
00377 #else
00378 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00379 #define MUL32(a, b) __emulu(word32(a), word32(b))
00380 #else
00381 #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
00382 #endif
00383 #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
00384 #define DeclareNH(a) word64 a##0=0, a##1=0
00385 #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
00386 #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
00387 #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
00388 #elif defined(_MSC_VER) && !defined(CRYPTOPP_SLOW_WORD64)
00389 #define DeclareNH(a) word64 a##0=0, a##1=0
00390 #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
00391 #define AccumulateNH(a, b, c) {\
00392 word64 ph, pl;\
00393 pl = _umul128(b,c,&ph);\
00394 a##0 += pl;\
00395 a##1 += ph + (a##0 < pl);}
00396 #else
00397 #define VMAC_BOOL_32BIT 1
00398 #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
00399 #define MUL64(rh,rl,i1,i2) \
00400 { word64 _i1 = (i1), _i2 = (i2); \
00401 word64 m1= MUL32(_i1,_i2>>32); \
00402 word64 m2= MUL32(_i1>>32,_i2); \
00403 rh = MUL32(_i1>>32,_i2>>32); \
00404 rl = MUL32(_i1,_i2); \
00405 ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
00406 ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
00407 }
00408 #define AccumulateNH(a, b, c) {\
00409 word64 p = MUL32(b, c);\
00410 a##1 += word32((p)>>32);\
00411 a##0 += word32(p);\
00412 p = MUL32((b)>>32, c);\
00413 a##2 += word32((p)>>32);\
00414 a##1 += word32(p);\
00415 p = MUL32((b)>>32, (c)>>32);\
00416 a##2 += p;\
00417 p = MUL32(b, (c)>>32);\
00418 a##1 += word32(p);\
00419 a##2 += word32(p>>32);}
00420 #endif
00421 #endif
00422 #ifndef VMAC_BOOL_32BIT
00423 #define VMAC_BOOL_32BIT 0
00424 #endif
00425 #ifndef ADD128
00426 #define ADD128(rh,rl,ih,il) \
00427 { word64 _il = (il); \
00428 (rl) += (_il); \
00429 (rh) += (ih) + ((rl) < (_il)); \
00430 }
00431 #endif
00432
00433 #if !(defined(_MSC_VER) && _MSC_VER < 1300)
00434 template <bool T_128BitTag>
00435 #endif
00436 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
00437 {
00438 #define INNER_LOOP_ITERATION(j) {\
00439 word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
00440 word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
00441 AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
00442 if (T_128BitTag)\
00443 AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
00444 }
00445
00446 #if (defined(_MSC_VER) && _MSC_VER < 1300)
00447 bool T_128BitTag = m_is128;
00448 #endif
00449 size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
00450 size_t innerLoopEnd = L1KeyLengthInWord64;
00451 const word64 *nhK = m_nhKey();
00452 word64 *polyS = m_polyState();
00453 bool isFirstBlock = true;
00454 size_t i;
00455
00456 #if !VMAC_BOOL_32BIT
00457 #if VMAC_BOOL_WORD128
00458 word128 a1, a2;
00459 #else
00460 word64 ah1, al1, ah2, al2;
00461 #endif
00462 word64 kh1, kl1, kh2, kl2;
00463 kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
00464 if (T_128BitTag)
00465 {
00466 kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
00467 }
00468 #endif
00469
00470 do
00471 {
00472 DeclareNH(nhA);
00473 DeclareNH(nhB);
00474
00475 if (blocksRemainingInWord64 < L1KeyLengthInWord64)
00476 {
00477 if (blocksRemainingInWord64 % 8)
00478 {
00479 innerLoopEnd = blocksRemainingInWord64 % 8;
00480 for (i=0; i<innerLoopEnd; i+=2)
00481 INNER_LOOP_ITERATION(0);
00482 blocksRemainingInWord64 -= innerLoopEnd;
00483 data += innerLoopEnd;
00484 }
00485 innerLoopEnd = blocksRemainingInWord64;
00486 }
00487
00488 for (i=0; i<innerLoopEnd; i+=8)
00489 {
00490 INNER_LOOP_ITERATION(0);
00491 INNER_LOOP_ITERATION(1);
00492 INNER_LOOP_ITERATION(2);
00493 INNER_LOOP_ITERATION(3);
00494 }
00495 blocksRemainingInWord64 -= innerLoopEnd;
00496 data += innerLoopEnd;
00497
00498 #if VMAC_BOOL_32BIT
00499 word32 nh0[2], nh1[2];
00500 word64 nh2[2];
00501
00502 nh0[0] = word32(nhA0);
00503 nhA1 += (nhA0 >> 32);
00504 nh1[0] = word32(nhA1);
00505 nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
00506
00507 if (T_128BitTag)
00508 {
00509 nh0[1] = word32(nhB0);
00510 nhB1 += (nhB0 >> 32);
00511 nh1[1] = word32(nhB1);
00512 nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
00513 }
00514
00515 #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
00516 #define a1 (((word32 *)(polyS+i*4))[3-NativeByteOrder::ToEnum()])
00517 #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
00518 #define a3 (((word32 *)(polyS+i*4))[1-NativeByteOrder::ToEnum()])
00519 #define aHi ((polyS+i*4)[0])
00520 #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
00521 #define k1 (((word32 *)(polyS+i*4+2))[3-NativeByteOrder::ToEnum()])
00522 #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
00523 #define k3 (((word32 *)(polyS+i*4+2))[1-NativeByteOrder::ToEnum()])
00524 #define kHi ((polyS+i*4+2)[0])
00525
00526 if (isFirstBlock)
00527 {
00528 isFirstBlock = false;
00529 if (m_isFirstBlock)
00530 {
00531 m_isFirstBlock = false;
00532 for (i=0; i<=(size_t)T_128BitTag; i++)
00533 {
00534 word64 t = (word64)nh0[i] + k0;
00535 a0 = (word32)t;
00536 t = (t >> 32) + nh1[i] + k1;
00537 a1 = (word32)t;
00538 aHi = (t >> 32) + nh2[i] + kHi;
00539 }
00540 continue;
00541 }
00542 }
00543 for (i=0; i<=(size_t)T_128BitTag; i++)
00544 {
00545 word64 p, t;
00546 word32 t2;
00547
00548 p = MUL32(a3, 2*k3);
00549 p += nh2[i];
00550 p += MUL32(a0, k2);
00551 p += MUL32(a1, k1);
00552 p += MUL32(a2, k0);
00553 t2 = (word32)p;
00554 p >>= 32;
00555 p += MUL32(a0, k3);
00556 p += MUL32(a1, k2);
00557 p += MUL32(a2, k1);
00558 p += MUL32(a3, k0);
00559 t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
00560 p >>= 31;
00561 p += nh0[i];
00562 p += MUL32(a0, k0);
00563 p += MUL32(a1, 2*k3);
00564 p += MUL32(a2, 2*k2);
00565 p += MUL32(a3, 2*k1);
00566 t2 = (word32)p;
00567 p >>= 32;
00568 p += nh1[i];
00569 p += MUL32(a0, k1);
00570 p += MUL32(a1, k0);
00571 p += MUL32(a2, 2*k3);
00572 p += MUL32(a3, 2*k2);
00573 a0 = t2;
00574 a1 = (word32)p;
00575 aHi = (p >> 32) + t;
00576 }
00577
00578 #undef a0
00579 #undef a1
00580 #undef a2
00581 #undef a3
00582 #undef aHi
00583 #undef k0
00584 #undef k1
00585 #undef k2
00586 #undef k3
00587 #undef kHi
00588 #else // #if VMAC_BOOL_32BIT
00589 if (isFirstBlock)
00590 {
00591 isFirstBlock = false;
00592 if (m_isFirstBlock)
00593 {
00594 m_isFirstBlock = false;
00595 #if VMAC_BOOL_WORD128
00596 #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
00597
00598 first_poly_step(a1, kh1, kl1, nhA);
00599 if (T_128BitTag)
00600 first_poly_step(a2, kh2, kl2, nhB);
00601 #else
00602 #define first_poly_step(ah, al, kh, kl, mh, ml) {\
00603 mh &= m62;\
00604 ADD128(mh, ml, kh, kl); \
00605 ah = mh; al = ml;}
00606
00607 first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
00608 if (T_128BitTag)
00609 first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
00610 #endif
00611 continue;
00612 }
00613 else
00614 {
00615 #if VMAC_BOOL_WORD128
00616 a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
00617 #else
00618 ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
00619 #endif
00620 if (T_128BitTag)
00621 {
00622 #if VMAC_BOOL_WORD128
00623 a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
00624 #else
00625 ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
00626 #endif
00627 }
00628 }
00629 }
00630
00631 #if VMAC_BOOL_WORD128
00632 #define poly_step(a, kh, kl, m) \
00633 { word128 t1, t2, t3, t4;\
00634 Multiply128(t2, a>>64, kl);\
00635 Multiply128(t3, a, kh);\
00636 Multiply128(t1, a, kl);\
00637 Multiply128(t4, a>>64, 2*kh);\
00638 t2 += t3;\
00639 t4 += t1;\
00640 t2 += t4>>64;\
00641 a = (word128(word64(t2)&m63) << 64) | word64(t4);\
00642 t2 *= 2;\
00643 a += m & m126;\
00644 a += t2>>64;}
00645
00646 poly_step(a1, kh1, kl1, nhA);
00647 if (T_128BitTag)
00648 poly_step(a2, kh2, kl2, nhB);
00649 #else
00650 #define poly_step(ah, al, kh, kl, mh, ml) \
00651 { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
00652 \
00653 MUL64(t2h,t2l,ah,kl); \
00654 MUL64(t3h,t3l,al,kh); \
00655 MUL64(t1h,t1l,ah,2*kh); \
00656 MUL64(ah,al,al,kl); \
00657 \
00658 ADD128(t2h,t2l,t3h,t3l); \
00659 \
00660 ADD128(ah,al,t1h,t1l); \
00661 \
00662 \
00663 ADD128(t2h,ah,z,t2l); \
00664 \
00665 t2h += t2h + (ah >> 63); \
00666 ah &= m63; \
00667 \
00668 mh &= m62; \
00669 ADD128(ah,al,mh,ml); \
00670 ADD128(ah,al,z,t2h); \
00671 }
00672
00673 poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
00674 if (T_128BitTag)
00675 poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
00676 #endif
00677 #endif // #if VMAC_BOOL_32BIT
00678 } while (blocksRemainingInWord64);
00679
00680 #if VMAC_BOOL_WORD128
00681 (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
00682 if (T_128BitTag)
00683 {
00684 (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
00685 }
00686 #elif !VMAC_BOOL_32BIT
00687 (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
00688 if (T_128BitTag)
00689 {
00690 (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
00691 }
00692 #endif
00693 }
00694
00695 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
00696 {
00697 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
00698 if (HasSSE2())
00699 {
00700 VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
00701 if (m_is128)
00702 VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
00703 m_isFirstBlock = false;
00704 }
00705 else
00706 #endif
00707 {
00708 #if defined(_MSC_VER) && _MSC_VER < 1300
00709 VHASH_Update_Template(data, blocksRemainingInWord64);
00710 #else
00711 if (m_is128)
00712 VHASH_Update_Template<true>(data, blocksRemainingInWord64);
00713 else
00714 VHASH_Update_Template<false>(data, blocksRemainingInWord64);
00715 #endif
00716 }
00717 }
00718
00719 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
00720 {
00721 size_t remaining = ModPowerOf2(length, m_L1KeyLength);
00722 VHASH_Update(data, (length-remaining)/8);
00723 return remaining;
00724 }
00725
00726 static word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
00727 {
00728 word64 rh, rl, t, z=0;
00729 word64 p1 = input[0], p2 = input[1];
00730 word64 k1 = l3Key[0], k2 = l3Key[1];
00731
00732
00733 t = p1 >> 63;
00734 p1 &= m63;
00735 ADD128(p1, p2, len, t);
00736
00737 t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
00738 ADD128(p1, p2, z, t);
00739 p1 &= m63;
00740
00741
00742 t = p1 + (p2 >> 32);
00743 t += (t >> 32);
00744 t += (word32)t > 0xfffffffeU;
00745 p1 += (t >> 32);
00746 p2 += (p1 << 32);
00747
00748
00749 p1 += k1;
00750 p1 += (0 - (p1 < k1)) & 257;
00751 p2 += k2;
00752 p2 += (0 - (p2 < k2)) & 257;
00753
00754
00755 MUL64(rh, rl, p1, p2);
00756 t = rh >> 56;
00757 ADD128(t, rl, z, rh);
00758 rh <<= 8;
00759 ADD128(t, rl, z, rh);
00760 t += t << 8;
00761 rl += t;
00762 rl += (0 - (rl < t)) & 257;
00763 rl += (0 - (rl > p64-1)) & 257;
00764 return rl;
00765 }
00766
00767 void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
00768 {
00769 size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
00770
00771 if (len)
00772 {
00773 memset(m_data()+len, 0, (0-len)%16);
00774 VHASH_Update(DataBuf(), ((len+15)/16)*2);
00775 len *= 8;
00776 }
00777 else if (m_isFirstBlock)
00778 {
00779
00780 m_polyState()[0] = m_polyState()[2];
00781 m_polyState()[1] = m_polyState()[3];
00782 if (m_is128)
00783 {
00784 m_polyState()[4] = m_polyState()[6];
00785 m_polyState()[5] = m_polyState()[7];
00786 }
00787 }
00788
00789 if (m_is128)
00790 {
00791 word64 t[2];
00792 t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
00793 t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
00794 if (size == 16)
00795 {
00796 PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
00797 PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
00798 }
00799 else
00800 {
00801 t[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[0]);
00802 t[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[1]);
00803 memcpy(mac, t, size);
00804 }
00805 }
00806 else
00807 {
00808 word64 t = L3Hash(m_polyState(), m_l3Key(), len);
00809 t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
00810 if (size == 8)
00811 PutWord(false, BIG_ENDIAN_ORDER, mac, t);
00812 else
00813 {
00814 t = ConditionalByteReverse(BIG_ENDIAN_ORDER, t);
00815 memcpy(mac, &t, size);
00816 }
00817 }
00818 }
00819
00820 NAMESPACE_END