• Main Page
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

vmac.cpp

00001 // vmac.cpp - written and placed in the public domain by Wei Dai
00002 // based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt
00003 
00004 #include "pch.h"
00005 #include "vmac.h"
00006 #include "argnames.h"
00007 #include "cpu.h"
00008 
00009 NAMESPACE_BEGIN(CryptoPP)
00010 
00011 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
00012 #include <intrin.h>
00013 #endif
00014 
00015 #define VMAC_BOOL_WORD128 (defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE))
00016 #ifdef __BORLANDC__
00017 #define const   // Turbo C++ 2006 workaround
00018 #endif
00019 static const word64 p64   = W64LIT(0xfffffffffffffeff);  /* 2^64 - 257 prime  */
00020 static const word64 m62   = W64LIT(0x3fffffffffffffff);  /* 62-bit mask       */
00021 static const word64 m63   = W64LIT(0x7fffffffffffffff);  /* 63-bit mask       */
00022 static const word64 m64   = W64LIT(0xffffffffffffffff);  /* 64-bit mask       */
00023 static const word64 mpoly = W64LIT(0x1fffffff1fffffff);  /* Poly key mask     */
00024 #ifdef __BORLANDC__
00025 #undef const
00026 #endif
00027 #if VMAC_BOOL_WORD128
00028 #ifdef __powerpc__
00029 // workaround GCC Bug 31690: ICE with const __uint128_t and C++ front-end
00030 #define m126                            ((word128(m62)<<64)|m64)
00031 #else
00032 static const word128 m126 = (word128(m62)<<64)|m64;              /* 126-bit mask      */
00033 #endif
00034 #endif
00035 
00036 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
00037 {
00038         int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
00039         if (digestLength != 8 && digestLength != 16)
00040                 throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
00041         m_is128 = digestLength == 16;
00042 
00043         m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
00044         if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
00045                 throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
00046 
00047         AllocateBlocks();
00048 
00049         BlockCipher &cipher = AccessCipher();
00050         cipher.SetKey(userKey, keylength, params);
00051         unsigned int blockSize = cipher.BlockSize();
00052         unsigned int blockSizeInWords = blockSize / sizeof(word64);
00053         SecBlock<word64> out(blockSizeInWords);
00054         SecByteBlock in;
00055         in.CleanNew(blockSize);
00056         size_t i;
00057 
00058         /* Fill nh key */
00059         in[0] = 0x80; 
00060         cipher.AdvancedProcessBlocks(in, NULL, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
00061         ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
00062 
00063         /* Fill poly key */
00064         in[0] = 0xC0;
00065         in[15] = 0;
00066         for (i = 0; i <= (size_t)m_is128; i++)
00067         {
00068                 cipher.ProcessBlock(in, out.BytePtr());
00069                 m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
00070                 m_polyState()[i*4+3]  = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
00071                 in[15]++;
00072         }
00073 
00074         /* Fill ip key */
00075         in[0] = 0xE0;
00076         in[15] = 0;
00077         word64 *l3Key = m_l3Key();
00078         for (i = 0; i <= (size_t)m_is128; i++)
00079                 do
00080                 {
00081                         cipher.ProcessBlock(in, out.BytePtr());
00082                         l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
00083                         l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
00084                         in[15]++;
00085                 } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
00086 
00087         m_padCached = false;
00088         size_t nonceLength;
00089         const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
00090         Resynchronize(nonce, (int)nonceLength);
00091 }
00092 
00093 void VMAC_Base::GetNextIV(RandomNumberGenerator &rng, byte *IV)
00094 {
00095         SimpleKeyingInterface::GetNextIV(rng, IV);
00096         IV[0] &= 0x7f;
00097 }
00098 
00099 void VMAC_Base::Resynchronize(const byte *nonce, int len)
00100 {
00101         size_t length = ThrowIfInvalidIVLength(len);
00102         size_t s = IVSize();
00103         byte *storedNonce = m_nonce();
00104 
00105         if (m_is128)
00106         {
00107                 memset(storedNonce, 0, s-length);
00108                 memcpy(storedNonce+s-length, nonce, length);
00109                 AccessCipher().ProcessBlock(storedNonce, m_pad());
00110         }
00111         else
00112         {
00113                 if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
00114                 {
00115                         m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
00116                         for (size_t i=0; m_padCached && i<s-length; i++)
00117                                 m_padCached = (storedNonce[i] == 0);
00118                 }
00119                 if (!m_padCached)
00120                 {
00121                         memset(storedNonce, 0, s-length);
00122                         memcpy(storedNonce+s-length, nonce, length-1);
00123                         storedNonce[s-1] = nonce[length-1] & 0xfe;
00124                         AccessCipher().ProcessBlock(storedNonce, m_pad());
00125                         m_padCached = true;
00126                 }
00127                 storedNonce[s-1] = nonce[length-1];
00128         }
00129         m_isFirstBlock = true;
00130         Restart();
00131 }
00132 
00133 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
00134 {
00135         assert(false);
00136         throw 0;
00137 }
00138 
00139 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
00140 #pragma warning(disable: 4731)  // frame pointer register 'ebp' modified by inline assembly code
00141 void
00142 #ifdef __GNUC__
00143 __attribute__ ((noinline))              // Intel Compiler 9.1 workaround
00144 #endif
00145 VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
00146 {
00147         const word64 *nhK = m_nhKey();
00148         word64 *polyS = m_polyState();
00149         word32 L1KeyLength = m_L1KeyLength;
00150 
00151 #ifdef __GNUC__
00152         word32 temp;
00153         __asm__ __volatile__
00154         (
00155         AS2(    mov             %%ebx, %0)
00156         AS2(    mov             %1, %%ebx)
00157         ".intel_syntax noprefix;"
00158 #else
00159         #if _MSC_VER < 1300 || defined(__INTEL_COMPILER)
00160         char isFirstBlock = m_isFirstBlock;
00161         AS2(    mov             ebx, [L1KeyLength])
00162         AS2(    mov             dl, [isFirstBlock])
00163         #else
00164         AS2(    mov             ecx, this)
00165         AS2(    mov             ebx, [ecx+m_L1KeyLength])
00166         AS2(    mov             dl, [ecx+m_isFirstBlock])
00167         #endif
00168         AS2(    mov             eax, tagPart)
00169         AS2(    shl             eax, 4)
00170         AS2(    mov             edi, nhK)
00171         AS2(    add             edi, eax)
00172         AS2(    add             eax, eax)
00173         AS2(    add             eax, polyS)
00174 
00175         AS2(    mov             esi, data)
00176         AS2(    mov             ecx, blocksRemainingInWord64)
00177 #endif
00178 
00179         AS2(    shr             ebx, 3)
00180         AS1(    push    ebp)
00181         AS2(    sub             esp, 12)
00182         ASL(4)
00183         AS2(    mov             ebp, ebx)
00184         AS2(    cmp             ecx, ebx)
00185         AS2(    cmovl   ebp, ecx)
00186         AS2(    sub             ecx, ebp)
00187         AS2(    lea             ebp, [edi+8*ebp])       // end of nhK
00188         AS2(    movq    mm6, [esi])
00189         AS2(    paddq   mm6, [edi])
00190         AS2(    movq    mm5, [esi+8])
00191         AS2(    paddq   mm5, [edi+8])
00192         AS2(    add             esi, 16)
00193         AS2(    add             edi, 16)
00194         AS2(    movq    mm4, mm6)
00195         ASS(    pshufw  mm2, mm6, 1, 0, 3, 2)
00196         AS2(    pmuludq mm6, mm5)
00197         ASS(    pshufw  mm3, mm5, 1, 0, 3, 2)
00198         AS2(    pmuludq mm5, mm2)
00199         AS2(    pmuludq mm2, mm3)
00200         AS2(    pmuludq mm3, mm4)
00201         AS2(    pxor    mm7, mm7)
00202         AS2(    movd    [esp], mm6)
00203         AS2(    psrlq   mm6, 32)
00204         AS2(    movd    [esp+4], mm5)
00205         AS2(    psrlq   mm5, 32)
00206         AS2(    cmp             edi, ebp)
00207         ASJ(    je,             1, f)
00208         ASL(0)
00209         AS2(    movq    mm0, [esi])
00210         AS2(    paddq   mm0, [edi])
00211         AS2(    movq    mm1, [esi+8])
00212         AS2(    paddq   mm1, [edi+8])
00213         AS2(    add             esi, 16)
00214         AS2(    add             edi, 16)
00215         AS2(    movq    mm4, mm0)
00216         AS2(    paddq   mm5, mm2)
00217         ASS(    pshufw  mm2, mm0, 1, 0, 3, 2)
00218         AS2(    pmuludq mm0, mm1)
00219         AS2(    movd    [esp+8], mm3)
00220         AS2(    psrlq   mm3, 32)
00221         AS2(    paddq   mm5, mm3)
00222         ASS(    pshufw  mm3, mm1, 1, 0, 3, 2)
00223         AS2(    pmuludq mm1, mm2)
00224         AS2(    pmuludq mm2, mm3)
00225         AS2(    pmuludq mm3, mm4)
00226         AS2(    movd    mm4, [esp])
00227         AS2(    paddq   mm7, mm4)
00228         AS2(    movd    mm4, [esp+4])
00229         AS2(    paddq   mm6, mm4)
00230         AS2(    movd    mm4, [esp+8])
00231         AS2(    paddq   mm6, mm4)
00232         AS2(    movd    [esp], mm0)
00233         AS2(    psrlq   mm0, 32)
00234         AS2(    paddq   mm6, mm0)
00235         AS2(    movd    [esp+4], mm1)
00236         AS2(    psrlq   mm1, 32)
00237         AS2(    paddq   mm5, mm1)
00238         AS2(    cmp             edi, ebp)
00239         ASJ(    jne,    0, b)
00240         ASL(1)
00241         AS2(    paddq   mm5, mm2)
00242         AS2(    movd    [esp+8], mm3)
00243         AS2(    psrlq   mm3, 32)
00244         AS2(    paddq   mm5, mm3)
00245         AS2(    movd    mm4, [esp])
00246         AS2(    paddq   mm7, mm4)
00247         AS2(    movd    mm4, [esp+4])
00248         AS2(    paddq   mm6, mm4)
00249         AS2(    movd    mm4, [esp+8])
00250         AS2(    paddq   mm6, mm4)
00251         AS2(    lea             ebp, [8*ebx])
00252         AS2(    sub             edi, ebp)               // reset edi to start of nhK
00253 
00254         AS2(    movd    [esp], mm7)
00255         AS2(    psrlq   mm7, 32)
00256         AS2(    paddq   mm6, mm7)
00257         AS2(    movd    [esp+4], mm6)
00258         AS2(    psrlq   mm6, 32)
00259         AS2(    paddq   mm5, mm6)
00260         AS2(    psllq   mm5, 2)
00261         AS2(    psrlq   mm5, 2)
00262 
00263 #define a0 [eax+2*4]
00264 #define a1 [eax+3*4]
00265 #define a2 [eax+0*4]
00266 #define a3 [eax+1*4]
00267 #define k0 [eax+2*8+2*4]
00268 #define k1 [eax+2*8+3*4]
00269 #define k2 [eax+2*8+0*4]
00270 #define k3 [eax+2*8+1*4]
00271         AS2(    test    dl, dl)
00272         ASJ(    jz,             2, f)
00273         AS2(    movd    mm1, k0)
00274         AS2(    movd    mm0, [esp])
00275         AS2(    paddq   mm0, mm1)
00276         AS2(    movd    a0, mm0)
00277         AS2(    psrlq   mm0, 32)
00278         AS2(    movd    mm1, k1)
00279         AS2(    movd    mm2, [esp+4])
00280         AS2(    paddq   mm1, mm2)
00281         AS2(    paddq   mm0, mm1)
00282         AS2(    movd    a1, mm0)
00283         AS2(    psrlq   mm0, 32)
00284         AS2(    paddq   mm5, k2)
00285         AS2(    paddq   mm0, mm5)
00286         AS2(    movq    a2, mm0)
00287         AS2(    xor             edx, edx)
00288         ASJ(    jmp,    3, f)
00289         ASL(2)
00290         AS2(    movd    mm0, a3)
00291         AS2(    movq    mm4, mm0)
00292         AS2(    pmuludq mm0, k3)                // a3*k3
00293         AS2(    movd    mm1, a0)
00294         AS2(    pmuludq mm1, k2)                // a0*k2
00295         AS2(    movd    mm2, a1)
00296         AS2(    movd    mm6, k1)
00297         AS2(    pmuludq mm2, mm6)               // a1*k1
00298         AS2(    movd    mm3, a2)
00299         AS2(    psllq   mm0, 1)
00300         AS2(    paddq   mm0, mm5)
00301         AS2(    movq    mm5, mm3)
00302         AS2(    movd    mm7, k0)
00303         AS2(    pmuludq mm3, mm7)               // a2*k0
00304         AS2(    pmuludq mm4, mm7)               // a3*k0
00305         AS2(    pmuludq mm5, mm6)               // a2*k1
00306         AS2(    paddq   mm0, mm1)
00307         AS2(    movd    mm1, a1)
00308         AS2(    paddq   mm4, mm5)
00309         AS2(    movq    mm5, mm1)
00310         AS2(    pmuludq mm1, k2)                // a1*k2
00311         AS2(    paddq   mm0, mm2)
00312         AS2(    movd    mm2, a0)
00313         AS2(    paddq   mm0, mm3)
00314         AS2(    movq    mm3, mm2)
00315         AS2(    pmuludq mm2, k3)                // a0*k3
00316         AS2(    pmuludq mm3, mm7)               // a0*k0
00317         AS2(    movd    [esp+8], mm0)
00318         AS2(    psrlq   mm0, 32)
00319         AS2(    pmuludq mm7, mm5)               // a1*k0
00320         AS2(    pmuludq mm5, k3)                // a1*k3
00321         AS2(    paddq   mm0, mm1)
00322         AS2(    movd    mm1, a2)
00323         AS2(    pmuludq mm1, k2)                // a2*k2
00324         AS2(    paddq   mm0, mm2)
00325         AS2(    paddq   mm0, mm4)
00326         AS2(    movq    mm4, mm0)
00327         AS2(    movd    mm2, a3)
00328         AS2(    pmuludq mm2, mm6)               // a3*k1
00329         AS2(    pmuludq mm6, a0)                // a0*k1
00330         AS2(    psrlq   mm0, 31)
00331         AS2(    paddq   mm0, mm3)
00332         AS2(    movd    mm3, [esp])
00333         AS2(    paddq   mm0, mm3)
00334         AS2(    movd    mm3, a2)
00335         AS2(    pmuludq mm3, k3)                // a2*k3
00336         AS2(    paddq   mm5, mm1)
00337         AS2(    movd    mm1, a3)
00338         AS2(    pmuludq mm1, k2)                // a3*k2
00339         AS2(    paddq   mm5, mm2)
00340         AS2(    movd    mm2, [esp+4])
00341         AS2(    psllq   mm5, 1)
00342         AS2(    paddq   mm0, mm5)
00343         AS2(    psllq   mm4, 33)
00344         AS2(    movd    a0, mm0)
00345         AS2(    psrlq   mm0, 32)
00346         AS2(    paddq   mm6, mm7)
00347         AS2(    movd    mm7, [esp+8])
00348         AS2(    paddq   mm0, mm6)
00349         AS2(    paddq   mm0, mm2)
00350         AS2(    paddq   mm3, mm1)
00351         AS2(    psllq   mm3, 1)
00352         AS2(    paddq   mm0, mm3)
00353         AS2(    psrlq   mm4, 1)
00354         AS2(    movd    a1, mm0)
00355         AS2(    psrlq   mm0, 32)
00356         AS2(    por             mm4, mm7)
00357         AS2(    paddq   mm0, mm4)
00358         AS2(    movq    a2, mm0)
00359 #undef a0
00360 #undef a1
00361 #undef a2
00362 #undef a3
00363 #undef k0
00364 #undef k1
00365 #undef k2
00366 #undef k3
00367 
00368         ASL(3)
00369         AS2(    test    ecx, ecx)
00370         ASJ(    jnz,    4, b)
00371 
00372         AS2(    add             esp, 12)
00373         AS1(    pop             ebp)
00374         AS1(    emms)
00375 #ifdef __GNUC__
00376         ".att_syntax prefix;"
00377         AS2(    mov     %0, %%ebx)
00378                 : "=m" (temp)
00379                 : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data), "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
00380                 : "memory", "cc"
00381         );
00382 #endif
00383 }
00384 #endif
00385 
00386 #if VMAC_BOOL_WORD128
00387         #define DeclareNH(a) word128 a=0
00388         #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
00389         #define AccumulateNH(a, b, c) a += word128(b)*(c)
00390         #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
00391 #else
00392         #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00393                 #define MUL32(a, b) __emulu(word32(a), word32(b))
00394         #else
00395                 #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
00396         #endif
00397         #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
00398                 #define DeclareNH(a)                    word64 a##0=0, a##1=0
00399                 #define MUL64(rh,rl,i1,i2)              asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
00400                 #define AccumulateNH(a, b, c)   asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
00401                 #define ADD128(rh,rl,ih,il)     asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
00402         #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
00403                 #define DeclareNH(a) word64 a##0=0, a##1=0
00404                 #define MUL64(rh,rl,i1,i2)   (rl) = _umul128(i1,i2,&(rh));
00405                 #define AccumulateNH(a, b, c)   {\
00406                         word64 ph, pl;\
00407                         pl = _umul128(b,c,&ph);\
00408                         a##0 += pl;\
00409                         a##1 += ph + (a##0 < pl);}
00410         #else
00411                 #define VMAC_BOOL_32BIT 1
00412                 #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
00413                 #define MUL64(rh,rl,i1,i2)                                               \
00414                         {   word64 _i1 = (i1), _i2 = (i2);                                 \
00415                                 word64 m1= MUL32(_i1,_i2>>32);                                 \
00416                                 word64 m2= MUL32(_i1>>32,_i2);                                 \
00417                                 rh         = MUL32(_i1>>32,_i2>>32);                             \
00418                                 rl         = MUL32(_i1,_i2);                                     \
00419                                 ADD128(rh,rl,(m1 >> 32),(m1 << 32));                             \
00420                                 ADD128(rh,rl,(m2 >> 32),(m2 << 32));                             \
00421                         }
00422                 #define AccumulateNH(a, b, c)   {\
00423                         word64 p = MUL32(b, c);\
00424                         a##1 += word32((p)>>32);\
00425                         a##0 += word32(p);\
00426                         p = MUL32((b)>>32, c);\
00427                         a##2 += word32((p)>>32);\
00428                         a##1 += word32(p);\
00429                         p = MUL32((b)>>32, (c)>>32);\
00430                         a##2 += p;\
00431                         p = MUL32(b, (c)>>32);\
00432                         a##1 += word32(p);\
00433                         a##2 += word32(p>>32);}
00434         #endif
00435 #endif
00436 #ifndef VMAC_BOOL_32BIT
00437         #define VMAC_BOOL_32BIT 0
00438 #endif
00439 #ifndef ADD128
00440         #define ADD128(rh,rl,ih,il)                                          \
00441                 {   word64 _il = (il);                                         \
00442                         (rl) += (_il);                                               \
00443                         (rh) += (ih) + ((rl) < (_il));                               \
00444                 }
00445 #endif
00446 
00447 #if !(defined(_MSC_VER) && _MSC_VER < 1300)
00448 template <bool T_128BitTag>
00449 #endif
00450 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
00451 {
00452         #define INNER_LOOP_ITERATION(j) {\
00453                 word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
00454                 word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
00455                 AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
00456                 if (T_128BitTag)\
00457                         AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
00458                 }
00459 
00460 #if (defined(_MSC_VER) && _MSC_VER < 1300)
00461         bool T_128BitTag = m_is128;
00462 #endif
00463         size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
00464         size_t innerLoopEnd = L1KeyLengthInWord64;
00465         const word64 *nhK = m_nhKey();
00466         word64 *polyS = m_polyState();
00467         bool isFirstBlock = true;
00468         size_t i;
00469 
00470         #if !VMAC_BOOL_32BIT
00471                 #if VMAC_BOOL_WORD128
00472                         word128 a1, a2;
00473                 #else
00474                         word64 ah1, al1, ah2, al2;
00475                 #endif
00476                 word64 kh1, kl1, kh2, kl2;
00477                 kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
00478                 if (T_128BitTag)
00479                 {
00480                         kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
00481                 }
00482         #endif
00483 
00484         do
00485         {
00486                 DeclareNH(nhA);
00487                 DeclareNH(nhB);
00488 
00489                 i = 0;
00490                 if (blocksRemainingInWord64 < L1KeyLengthInWord64)
00491                 {
00492                         if (blocksRemainingInWord64 % 8)
00493                         {
00494                                 innerLoopEnd = blocksRemainingInWord64 % 8;
00495                                 for (; i<innerLoopEnd; i+=2)
00496                                         INNER_LOOP_ITERATION(0);
00497                         }
00498                         innerLoopEnd = blocksRemainingInWord64;
00499                 }
00500                 for (; i<innerLoopEnd; i+=8)
00501                 {
00502                         INNER_LOOP_ITERATION(0);
00503                         INNER_LOOP_ITERATION(1);
00504                         INNER_LOOP_ITERATION(2);
00505                         INNER_LOOP_ITERATION(3);
00506                 }
00507                 blocksRemainingInWord64 -= innerLoopEnd;
00508                 data += innerLoopEnd;
00509 
00510                 #if VMAC_BOOL_32BIT
00511                         word32 nh0[2],  nh1[2];
00512                         word64 nh2[2];
00513 
00514                         nh0[0] = word32(nhA0);
00515                         nhA1 += (nhA0 >> 32);
00516                         nh1[0] = word32(nhA1);
00517                         nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
00518 
00519                         if (T_128BitTag)
00520                         {
00521                                 nh0[1] = word32(nhB0);
00522                                 nhB1 += (nhB0 >> 32);
00523                                 nh1[1] = word32(nhB1);
00524                                 nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
00525                         }
00526 
00527                         #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
00528                         #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum()))             // workaround for GCC 3.2
00529                         #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
00530                         #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
00531                         #define aHi ((polyS+i*4)[0])
00532                         #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
00533                         #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
00534                         #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
00535                         #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
00536                         #define kHi ((polyS+i*4+2)[0])
00537 
00538                         if (isFirstBlock)
00539                         {
00540                                 isFirstBlock = false;
00541                                 if (m_isFirstBlock)
00542                                 {
00543                                         m_isFirstBlock = false;
00544                                         for (i=0; i<=(size_t)T_128BitTag; i++)
00545                                         {
00546                                                 word64 t = (word64)nh0[i] + k0;
00547                                                 a0 = (word32)t;
00548                                                 t = (t >> 32) + nh1[i] + k1;
00549                                                 a1 = (word32)t;
00550                                                 aHi = (t >> 32) + nh2[i] + kHi;
00551                                         }
00552                                         continue;
00553                                 }
00554                         }
00555                         for (i=0; i<=(size_t)T_128BitTag; i++)
00556                         {
00557                                 word64 p, t;
00558                                 word32 t2;
00559 
00560                                 p = MUL32(a3, 2*k3);
00561                                 p += nh2[i];
00562                                 p += MUL32(a0, k2);
00563                                 p += MUL32(a1, k1);
00564                                 p += MUL32(a2, k0);
00565                                 t2 = (word32)p;
00566                                 p >>= 32;
00567                                 p += MUL32(a0, k3);
00568                                 p += MUL32(a1, k2);
00569                                 p += MUL32(a2, k1);
00570                                 p += MUL32(a3, k0);
00571                                 t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
00572                                 p >>= 31;
00573                                 p += nh0[i];
00574                                 p += MUL32(a0, k0);
00575                                 p += MUL32(a1, 2*k3);
00576                                 p += MUL32(a2, 2*k2);
00577                                 p += MUL32(a3, 2*k1);
00578                                 t2 = (word32)p;
00579                                 p >>= 32;
00580                                 p += nh1[i];
00581                                 p += MUL32(a0, k1);
00582                                 p += MUL32(a1, k0);
00583                                 p += MUL32(a2, 2*k3);
00584                                 p += MUL32(a3, 2*k2);
00585                                 a0 = t2;
00586                                 a1 = (word32)p;
00587                                 aHi = (p >> 32) + t;
00588                         }
00589 
00590                         #undef a0
00591                         #undef a1
00592                         #undef a2
00593                         #undef a3
00594                         #undef aHi
00595                         #undef k0
00596                         #undef k1
00597                         #undef k2
00598                         #undef k3               
00599                         #undef kHi
00600                 #else           // #if VMAC_BOOL_32BIT
00601                         if (isFirstBlock)
00602                         {
00603                                 isFirstBlock = false;
00604                                 if (m_isFirstBlock)
00605                                 {
00606                                         m_isFirstBlock = false;
00607                                         #if VMAC_BOOL_WORD128
00608                                                 #define first_poly_step(a, kh, kl, m)   a = (m & m126) + ((word128(kh) << 64) | kl)
00609 
00610                                                 first_poly_step(a1, kh1, kl1, nhA);
00611                                                 if (T_128BitTag)
00612                                                         first_poly_step(a2, kh2, kl2, nhB);
00613                                         #else
00614                                                 #define first_poly_step(ah, al, kh, kl, mh, ml)         {\
00615                                                         mh &= m62;\
00616                                                         ADD128(mh, ml, kh, kl); \
00617                                                         ah = mh; al = ml;}
00618 
00619                                                 first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
00620                                                 if (T_128BitTag)
00621                                                         first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
00622                                         #endif
00623                                         continue;
00624                                 }
00625                                 else
00626                                 {
00627                                         #if VMAC_BOOL_WORD128
00628                                                 a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
00629                                         #else
00630                                                 ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
00631                                         #endif
00632                                         if (T_128BitTag)
00633                                         {
00634                                                 #if VMAC_BOOL_WORD128
00635                                                         a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
00636                                                 #else
00637                                                         ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
00638                                                 #endif
00639                                         }
00640                                 }
00641                         }
00642 
00643                         #if VMAC_BOOL_WORD128
00644                                 #define poly_step(a, kh, kl, m) \
00645                                 {   word128 t1, t2, t3, t4;\
00646                                         Multiply128(t2, a>>64, kl);\
00647                                         Multiply128(t3, a, kh);\
00648                                         Multiply128(t1, a, kl);\
00649                                         Multiply128(t4, a>>64, 2*kh);\
00650                                         t2 += t3;\
00651                                         t4 += t1;\
00652                                         t2 += t4>>64;\
00653                                         a = (word128(word64(t2)&m63) << 64) | word64(t4);\
00654                                         t2 *= 2;\
00655                                         a += m & m126;\
00656                                         a += t2>>64;}
00657 
00658                                 poly_step(a1, kh1, kl1, nhA);
00659                                 if (T_128BitTag)
00660                                         poly_step(a2, kh2, kl2, nhB);
00661                         #else
00662                                 #define poly_step(ah, al, kh, kl, mh, ml)                                       \
00663                                 {   word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0;                           \
00664                                         /* compute ab*cd, put bd into result registers */       \
00665                                         MUL64(t2h,t2l,ah,kl);                                   \
00666                                         MUL64(t3h,t3l,al,kh);                                   \
00667                                         MUL64(t1h,t1l,ah,2*kh);                                 \
00668                                         MUL64(ah,al,al,kl);                                     \
00669                                         /* add together ad + bc */                              \
00670                                         ADD128(t2h,t2l,t3h,t3l);                                \
00671                                         /* add 2 * ac to result */                              \
00672                                         ADD128(ah,al,t1h,t1l);                                  \
00673                                         /* now (ah,al), (t2l,2*t2h) need summing */             \
00674                                         /* first add the high registers, carrying into t2h */   \
00675                                         ADD128(t2h,ah,z,t2l);                                   \
00676                                         /* double t2h and add top bit of ah */                  \
00677                                         t2h += t2h + (ah >> 63);                                \
00678                                         ah &= m63;                                              \
00679                                         /* now add the low registers */                         \
00680                                         mh &= m62;                                                                                              \
00681                                         ADD128(ah,al,mh,ml);                                    \
00682                                         ADD128(ah,al,z,t2h);                                    \
00683                                 }
00684 
00685                                 poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
00686                                 if (T_128BitTag)
00687                                         poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
00688                         #endif
00689                 #endif          // #if VMAC_BOOL_32BIT
00690         } while (blocksRemainingInWord64);
00691 
00692         #if VMAC_BOOL_WORD128
00693                 (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
00694                 if (T_128BitTag)
00695                 {
00696                         (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
00697                 }
00698         #elif !VMAC_BOOL_32BIT
00699                 (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
00700                 if (T_128BitTag)
00701                 {
00702                         (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
00703                 }
00704         #endif
00705 }
00706 
00707 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
00708 {
00709 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
00710         if (HasSSE2())
00711         {
00712                 VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
00713                 if (m_is128)
00714                         VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
00715                 m_isFirstBlock = false;
00716         }
00717         else
00718 #endif
00719         {
00720 #if defined(_MSC_VER) && _MSC_VER < 1300
00721                 VHASH_Update_Template(data, blocksRemainingInWord64);
00722 #else
00723                 if (m_is128)
00724                         VHASH_Update_Template<true>(data, blocksRemainingInWord64);
00725                 else
00726                         VHASH_Update_Template<false>(data, blocksRemainingInWord64);
00727 #endif
00728         }
00729 }
00730 
00731 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
00732 {
00733         size_t remaining = ModPowerOf2(length, m_L1KeyLength);
00734         VHASH_Update(data, (length-remaining)/8);
00735         return remaining;
00736 }
00737 
00738 static word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
00739 {
00740     word64 rh, rl, t, z=0;
00741         word64 p1 = input[0], p2 = input[1];
00742         word64 k1 = l3Key[0], k2 = l3Key[1];
00743 
00744     /* fully reduce (p1,p2)+(len,0) mod p127 */
00745     t = p1 >> 63;
00746     p1 &= m63;
00747     ADD128(p1, p2, len, t);
00748     /* At this point, (p1,p2) is at most 2^127+(len<<64) */
00749     t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
00750     ADD128(p1, p2, z, t);
00751     p1 &= m63;
00752 
00753     /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
00754     t = p1 + (p2 >> 32);
00755     t += (t >> 32);
00756     t += (word32)t > 0xfffffffeU;
00757     p1 += (t >> 32);
00758     p2 += (p1 << 32);
00759 
00760     /* compute (p1+k1)%p64 and (p2+k2)%p64 */
00761     p1 += k1;
00762     p1 += (0 - (p1 < k1)) & 257;
00763     p2 += k2;
00764     p2 += (0 - (p2 < k2)) & 257;
00765 
00766     /* compute (p1+k1)*(p2+k2)%p64 */
00767     MUL64(rh, rl, p1, p2);
00768     t = rh >> 56;
00769     ADD128(t, rl, z, rh);
00770     rh <<= 8;
00771     ADD128(t, rl, z, rh);
00772     t += t << 8;
00773     rl += t;
00774     rl += (0 - (rl < t)) & 257;
00775     rl += (0 - (rl > p64-1)) & 257;
00776     return rl;
00777 }
00778 
00779 void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
00780 {
00781         size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
00782 
00783         if (len)
00784         {
00785                 memset(m_data()+len, 0, (0-len)%16);
00786                 VHASH_Update(DataBuf(), ((len+15)/16)*2);
00787                 len *= 8;       // convert to bits
00788         }
00789         else if (m_isFirstBlock)
00790         {
00791                 // special case for empty string
00792                 m_polyState()[0] = m_polyState()[2];
00793                 m_polyState()[1] = m_polyState()[3];
00794                 if (m_is128)
00795                 {
00796                         m_polyState()[4] = m_polyState()[6];
00797                         m_polyState()[5] = m_polyState()[7];
00798                 }
00799         }
00800 
00801         if (m_is128)
00802         {
00803                 word64 t[2];
00804                 t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
00805                 t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
00806                 if (size == 16)
00807                 {
00808                         PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
00809                         PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
00810                 }
00811                 else
00812                 {
00813                         t[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[0]);
00814                         t[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[1]);
00815                         memcpy(mac, t, size);
00816                 }
00817         }
00818         else
00819         {
00820                 word64 t = L3Hash(m_polyState(), m_l3Key(), len);
00821                 t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
00822                 if (size == 8)
00823                         PutWord(false, BIG_ENDIAN_ORDER, mac, t);
00824                 else
00825                 {
00826                         t = ConditionalByteReverse(BIG_ENDIAN_ORDER, t);
00827                         memcpy(mac, &t, size);
00828                 }
00829         }
00830 }
00831 
00832 NAMESPACE_END

Generated on Mon Aug 9 2010 15:56:38 for Crypto++ by  doxygen 1.7.1