vmac.cpp

00001 // vmac.cpp - written and placed in the public domain by Wei Dai
00002 // based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt
00003 
00004 #include "pch.h"
00005 #include "vmac.h"
00006 #include "argnames.h"
00007 #include "cpu.h"
00008 
00009 NAMESPACE_BEGIN(CryptoPP)
00010 
00011 #if defined(_MSC_VER) && !defined(CRYPTOPP_SLOW_WORD64)
00012 #include <intrin.h>
00013 #endif
00014 
00015 #define VMAC_BOOL_WORD128 (defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE))
00016 #ifdef __BORLANDC__
00017 #define const   // Turbo C++ 2006 workaround
00018 #endif
00019 static const word64 p64   = W64LIT(0xfffffffffffffeff);  /* 2^64 - 257 prime  */
00020 static const word64 m62   = W64LIT(0x3fffffffffffffff);  /* 62-bit mask       */
00021 static const word64 m63   = W64LIT(0x7fffffffffffffff);  /* 63-bit mask       */
00022 static const word64 m64   = W64LIT(0xffffffffffffffff);  /* 64-bit mask       */
00023 static const word64 mpoly = W64LIT(0x1fffffff1fffffff);  /* Poly key mask     */
00024 #ifdef __BORLANDC__
00025 #undef const
00026 #endif
00027 #if VMAC_BOOL_WORD128
00028 static const word128 m126 = (word128(m62)<<64)|m64;              /* 126-bit mask      */
00029 #endif
00030 
00031 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
00032 {
00033         int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
00034         if (digestLength != 8 && digestLength != 16)
00035                 throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
00036         m_is128 = digestLength == 16;
00037 
00038         m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
00039         if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
00040                 throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
00041 
00042         AllocateBlocks();
00043 
00044         BlockCipher &cipher = AccessCipher();
00045         cipher.SetKey(userKey, keylength, params);
00046         unsigned int blockSize = cipher.BlockSize();
00047         unsigned int blockSizeInWords = blockSize / sizeof(word64);
00048         SecBlock<word64> out(blockSizeInWords);
00049         SecByteBlock in;
00050         in.CleanNew(blockSize);
00051         size_t i;
00052 
00053         /* Fill nh key */
00054         in[0] = 0x80; 
00055         for (i = 0; i < m_nhKeySize()*sizeof(word64); i += blockSize)
00056         {
00057                 cipher.ProcessBlock(in, out.BytePtr());
00058                 ConditionalByteReverse(BIG_ENDIAN_ORDER, m_nhKey()+i/sizeof(word64), out.begin(), blockSize);
00059                 in[15]++;
00060         }
00061 
00062         /* Fill poly key */
00063         in[0] = 0xC0;
00064         in[15] = 0;
00065         for (i = 0; i <= (size_t)m_is128; i++)
00066         {
00067                 cipher.ProcessBlock(in, out.BytePtr());
00068                 m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
00069                 m_polyState()[i*4+3]  = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
00070                 in[15]++;
00071         }
00072 
00073         /* Fill ip key */
00074         in[0] = 0xE0;
00075         in[15] = 0;
00076         word64 *l3Key = m_l3Key();
00077         for (i = 0; i <= (size_t)m_is128; i++)
00078                 do
00079                 {
00080                         cipher.ProcessBlock(in, out.BytePtr());
00081                         l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
00082                         l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
00083                         in[15]++;
00084                 } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
00085 
00086         m_padCached = false;
00087         Resynchronize(GetIVAndThrowIfInvalid(params));
00088 }
00089 
00090 void VMAC_Base::GetNextIV(RandomNumberGenerator &rng, byte *IV)
00091 {
00092         SimpleKeyingInterface::GetNextIV(rng, IV);
00093         IV[0] &= 0x7f;
00094 }
00095 
00096 void VMAC_Base::Resynchronize(const byte *IV)
00097 {
00098         int s = IVSize();
00099         if (m_is128)
00100         {
00101                 memcpy(m_nonce(), IV, s);
00102                 AccessCipher().ProcessBlock(m_nonce(), m_pad());
00103         }
00104         else
00105         {
00106                 m_padCached = m_padCached && (m_nonce()[s-1] | 1) == (IV[s-1] | 1) && memcmp(m_nonce(), IV, s-1) == 0;
00107                 if (!m_padCached)
00108                 {
00109                         memcpy(m_nonce(), IV, s);
00110                         m_nonce()[s-1] &= 0xfe;
00111                         AccessCipher().ProcessBlock(m_nonce(), m_pad());
00112                         m_padCached = true;
00113                 }
00114                 m_nonce()[s-1] = IV[s-1];
00115         }
00116         m_isFirstBlock = true;
00117         Restart();
00118 }
00119 
00120 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
00121 {
00122         assert(false);
00123 }
00124 
00125 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
00126 #pragma warning(disable: 4731)  // frame pointer register 'ebp' modified by inline assembly code
00127 void
00128 #ifdef __GNUC__
00129 __attribute__ ((noinline))              // Intel Compiler 9.1 workaround
00130 #endif
00131 VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
00132 {
00133         const word64 *nhK = m_nhKey();
00134         word64 *polyS = m_polyState();
00135 
00136 #ifdef __GNUC__
00137         word32 temp;
00138         __asm__ __volatile__
00139         (
00140         AS2(    mov             %%ebx, %0)
00141         AS2(    mov             %1, %%ebx)
00142         ".intel_syntax noprefix;"
00143 #else
00144         #if _MSC_VER < 1300
00145         word32 L1KeyLength = m_L1KeyLength;
00146         char isFirstBlock = m_isFirstBlock;
00147         AS2(    mov             ebx, [L1KeyLength])
00148         AS2(    mov             dl, [isFirstBlock])
00149         #else
00150         AS2(    mov             ecx, this)
00151         AS2(    mov             ebx, [ecx+m_L1KeyLength])
00152         AS2(    mov             dl, [ecx+m_isFirstBlock])
00153         #endif
00154         AS2(    mov             eax, tagPart)
00155         AS2(    shl             eax, 4)
00156         AS2(    mov             edi, nhK)
00157         AS2(    add             edi, eax)
00158         AS2(    add             eax, eax)
00159         AS2(    add             eax, polyS)
00160 
00161         AS2(    mov             esi, data)
00162         AS2(    mov             ecx, blocksRemainingInWord64)
00163 #endif
00164 
00165         AS2(    shr             ebx, 3)
00166         AS1(    push    ebp)
00167         AS2(    sub             esp, 12)
00168         ASL(4)
00169         AS2(    mov             ebp, ebx)
00170         AS2(    cmp             ecx, ebx)
00171         AS2(    cmovl   ebp, ecx)
00172         AS2(    sub             ecx, ebp)
00173         AS2(    lea             ebp, [edi+8*ebp])       // end of nhK
00174         AS2(    movq    mm6, [esi])
00175         AS2(    paddq   mm6, [edi])
00176         AS2(    movq    mm5, [esi+8])
00177         AS2(    paddq   mm5, [edi+8])
00178         AS2(    add             esi, 16)
00179         AS2(    add             edi, 16)
00180         AS2(    movq    mm4, mm6)
00181         ASS(    pshufw  mm2, mm6, 1, 0, 3, 2)
00182         AS2(    pmuludq mm6, mm5)
00183         ASS(    pshufw  mm3, mm5, 1, 0, 3, 2)
00184         AS2(    pmuludq mm5, mm2)
00185         AS2(    pmuludq mm2, mm3)
00186         AS2(    pmuludq mm3, mm4)
00187         AS2(    pxor    mm7, mm7)
00188         AS2(    movd    [esp], mm6)
00189         AS2(    psrlq   mm6, 32)
00190         AS2(    movd    [esp+4], mm5)
00191         AS2(    psrlq   mm5, 32)
00192         AS2(    cmp             edi, ebp)
00193         ASJ(    je,             1, f)
00194         ASL(0)
00195         AS2(    movq    mm0, [esi])
00196         AS2(    paddq   mm0, [edi])
00197         AS2(    movq    mm1, [esi+8])
00198         AS2(    paddq   mm1, [edi+8])
00199         AS2(    add             esi, 16)
00200         AS2(    add             edi, 16)
00201         AS2(    movq    mm4, mm0)
00202         AS2(    paddq   mm5, mm2)
00203         ASS(    pshufw  mm2, mm0, 1, 0, 3, 2)
00204         AS2(    pmuludq mm0, mm1)
00205         AS2(    movd    [esp+8], mm3)
00206         AS2(    psrlq   mm3, 32)
00207         AS2(    paddq   mm5, mm3)
00208         ASS(    pshufw  mm3, mm1, 1, 0, 3, 2)
00209         AS2(    pmuludq mm1, mm2)
00210         AS2(    pmuludq mm2, mm3)
00211         AS2(    pmuludq mm3, mm4)
00212         AS2(    movd    mm4, [esp])
00213         AS2(    paddq   mm7, mm4)
00214         AS2(    movd    mm4, [esp+4])
00215         AS2(    paddq   mm6, mm4)
00216         AS2(    movd    mm4, [esp+8])
00217         AS2(    paddq   mm6, mm4)
00218         AS2(    movd    [esp], mm0)
00219         AS2(    psrlq   mm0, 32)
00220         AS2(    paddq   mm6, mm0)
00221         AS2(    movd    [esp+4], mm1)
00222         AS2(    psrlq   mm1, 32)
00223         AS2(    paddq   mm5, mm1)
00224         AS2(    cmp             edi, ebp)
00225         ASJ(    jne,    0, b)
00226         ASL(1)
00227         AS2(    paddq   mm5, mm2)
00228         AS2(    movd    [esp+8], mm3)
00229         AS2(    psrlq   mm3, 32)
00230         AS2(    paddq   mm5, mm3)
00231         AS2(    movd    mm4, [esp])
00232         AS2(    paddq   mm7, mm4)
00233         AS2(    movd    mm4, [esp+4])
00234         AS2(    paddq   mm6, mm4)
00235         AS2(    movd    mm4, [esp+8])
00236         AS2(    paddq   mm6, mm4)
00237         AS2(    lea             ebp, [8*ebx])
00238         AS2(    sub             edi, ebp)               // reset edi to start of nhK
00239 
00240         AS2(    movd    [esp], mm7)
00241         AS2(    psrlq   mm7, 32)
00242         AS2(    paddq   mm6, mm7)
00243         AS2(    movd    [esp+4], mm6)
00244         AS2(    psrlq   mm6, 32)
00245         AS2(    paddq   mm5, mm6)
00246         AS2(    psllq   mm5, 2)
00247         AS2(    psrlq   mm5, 2)
00248 
00249 #define a0 [eax+2*4]
00250 #define a1 [eax+3*4]
00251 #define a2 [eax+0*4]
00252 #define a3 [eax+1*4]
00253 #define k0 [eax+2*8+2*4]
00254 #define k1 [eax+2*8+3*4]
00255 #define k2 [eax+2*8+0*4]
00256 #define k3 [eax+2*8+1*4]
00257         AS2(    test    dl, dl)
00258         ASJ(    jz,             2, f)
00259         AS2(    movd    mm1, k0)
00260         AS2(    movd    mm0, [esp])
00261         AS2(    paddq   mm0, mm1)
00262         AS2(    movd    a0, mm0)
00263         AS2(    psrlq   mm0, 32)
00264         AS2(    movd    mm1, k1)
00265         AS2(    movd    mm2, [esp+4])
00266         AS2(    paddq   mm1, mm2)
00267         AS2(    paddq   mm0, mm1)
00268         AS2(    movd    a1, mm0)
00269         AS2(    psrlq   mm0, 32)
00270         AS2(    paddq   mm5, k2)
00271         AS2(    paddq   mm0, mm5)
00272         AS2(    movq    a2, mm0)
00273         AS2(    xor             edx, edx)
00274         ASJ(    jmp,    3, f)
00275         ASL(2)
00276         AS2(    movd    mm0, a3)
00277         AS2(    movq    mm4, mm0)
00278         AS2(    pmuludq mm0, k3)                // a3*k3
00279         AS2(    movd    mm1, a0)
00280         AS2(    pmuludq mm1, k2)                // a0*k2
00281         AS2(    movd    mm2, a1)
00282         AS2(    movd    mm6, k1)
00283         AS2(    pmuludq mm2, mm6)               // a1*k1
00284         AS2(    movd    mm3, a2)
00285         AS2(    psllq   mm0, 1)
00286         AS2(    paddq   mm0, mm5)
00287         AS2(    movq    mm5, mm3)
00288         AS2(    movd    mm7, k0)
00289         AS2(    pmuludq mm3, mm7)               // a2*k0
00290         AS2(    pmuludq mm4, mm7)               // a3*k0
00291         AS2(    pmuludq mm5, mm6)               // a2*k1
00292         AS2(    paddq   mm0, mm1)
00293         AS2(    movd    mm1, a1)
00294         AS2(    paddq   mm4, mm5)
00295         AS2(    movq    mm5, mm1)
00296         AS2(    pmuludq mm1, k2)                // a1*k2
00297         AS2(    paddq   mm0, mm2)
00298         AS2(    movd    mm2, a0)
00299         AS2(    paddq   mm0, mm3)
00300         AS2(    movq    mm3, mm2)
00301         AS2(    pmuludq mm2, k3)                // a0*k3
00302         AS2(    pmuludq mm3, mm7)               // a0*k0
00303         AS2(    movd    [esp+8], mm0)
00304         AS2(    psrlq   mm0, 32)
00305         AS2(    pmuludq mm7, mm5)               // a1*k0
00306         AS2(    pmuludq mm5, k3)                // a1*k3
00307         AS2(    paddq   mm0, mm1)
00308         AS2(    movd    mm1, a2)
00309         AS2(    pmuludq mm1, k2)                // a2*k2
00310         AS2(    paddq   mm0, mm2)
00311         AS2(    paddq   mm0, mm4)
00312         AS2(    movq    mm4, mm0)
00313         AS2(    movd    mm2, a3)
00314         AS2(    pmuludq mm2, mm6)               // a3*k1
00315         AS2(    pmuludq mm6, a0)                // a0*k1
00316         AS2(    psrlq   mm0, 31)
00317         AS2(    paddq   mm0, mm3)
00318         AS2(    movd    mm3, [esp])
00319         AS2(    paddq   mm0, mm3)
00320         AS2(    movd    mm3, a2)
00321         AS2(    pmuludq mm3, k3)                // a2*k3
00322         AS2(    paddq   mm5, mm1)
00323         AS2(    movd    mm1, a3)
00324         AS2(    pmuludq mm1, k2)                // a3*k2
00325         AS2(    paddq   mm5, mm2)
00326         AS2(    movd    mm2, [esp+4])
00327         AS2(    psllq   mm5, 1)
00328         AS2(    paddq   mm0, mm5)
00329         AS2(    psllq   mm4, 33)
00330         AS2(    movd    a0, mm0)
00331         AS2(    psrlq   mm0, 32)
00332         AS2(    paddq   mm6, mm7)
00333         AS2(    movd    mm7, [esp+8])
00334         AS2(    paddq   mm0, mm6)
00335         AS2(    paddq   mm0, mm2)
00336         AS2(    paddq   mm3, mm1)
00337         AS2(    psllq   mm3, 1)
00338         AS2(    paddq   mm0, mm3)
00339         AS2(    psrlq   mm4, 1)
00340         AS2(    movd    a1, mm0)
00341         AS2(    psrlq   mm0, 32)
00342         AS2(    por             mm4, mm7)
00343         AS2(    paddq   mm0, mm4)
00344         AS2(    movq    a2, mm0)
00345 #undef a0
00346 #undef a1
00347 #undef a2
00348 #undef a3
00349 #undef k0
00350 #undef k1
00351 #undef k2
00352 #undef k3
00353 
00354         ASL(3)
00355         AS2(    test    ecx, ecx)
00356         ASJ(    jnz,    4, b)
00357 
00358         AS2(    add             esp, 12)
00359         AS1(    pop             ebp)
00360         AS1(    emms)
00361 #ifdef __GNUC__
00362         ".att_syntax prefix;"
00363         AS2(    mov     %0, %%ebx)
00364                 : "=m" (temp)
00365                 : "m" (m_L1KeyLength), "c" (blocksRemainingInWord64), "S" (data), "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
00366                 : "memory", "cc"
00367         );
00368 #endif
00369 }
00370 #endif
00371 
00372 #if VMAC_BOOL_WORD128
00373         #define DeclareNH(a) word128 a=0
00374         #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
00375         #define AccumulateNH(a, b, c) a += word128(b)*(c)
00376         #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
00377 #else
00378         #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00379                 #define MUL32(a, b) __emulu(word32(a), word32(b))
00380         #else
00381                 #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
00382         #endif
00383         #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
00384                 #define DeclareNH(a)                    word64 a##0=0, a##1=0
00385                 #define MUL64(rh,rl,i1,i2)              asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
00386                 #define AccumulateNH(a, b, c)   asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
00387                 #define ADD128(rh,rl,ih,il)     asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
00388         #elif defined(_MSC_VER) && !defined(CRYPTOPP_SLOW_WORD64)
00389                 #define DeclareNH(a) word64 a##0=0, a##1=0
00390                 #define MUL64(rh,rl,i1,i2)   (rl) = _umul128(i1,i2,&(rh));
00391                 #define AccumulateNH(a, b, c)   {\
00392                         word64 ph, pl;\
00393                         pl = _umul128(b,c,&ph);\
00394                         a##0 += pl;\
00395                         a##1 += ph + (a##0 < pl);}
00396         #else
00397                 #define VMAC_BOOL_32BIT 1
00398                 #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
00399                 #define MUL64(rh,rl,i1,i2)                                               \
00400                         {   word64 _i1 = (i1), _i2 = (i2);                                 \
00401                                 word64 m1= MUL32(_i1,_i2>>32);                                 \
00402                                 word64 m2= MUL32(_i1>>32,_i2);                                 \
00403                                 rh         = MUL32(_i1>>32,_i2>>32);                             \
00404                                 rl         = MUL32(_i1,_i2);                                     \
00405                                 ADD128(rh,rl,(m1 >> 32),(m1 << 32));                             \
00406                                 ADD128(rh,rl,(m2 >> 32),(m2 << 32));                             \
00407                         }
00408                 #define AccumulateNH(a, b, c)   {\
00409                         word64 p = MUL32(b, c);\
00410                         a##1 += word32((p)>>32);\
00411                         a##0 += word32(p);\
00412                         p = MUL32((b)>>32, c);\
00413                         a##2 += word32((p)>>32);\
00414                         a##1 += word32(p);\
00415                         p = MUL32((b)>>32, (c)>>32);\
00416                         a##2 += p;\
00417                         p = MUL32(b, (c)>>32);\
00418                         a##1 += word32(p);\
00419                         a##2 += word32(p>>32);}
00420         #endif
00421 #endif
00422 #ifndef VMAC_BOOL_32BIT
00423         #define VMAC_BOOL_32BIT 0
00424 #endif
00425 #ifndef ADD128
00426         #define ADD128(rh,rl,ih,il)                                          \
00427                 {   word64 _il = (il);                                         \
00428                         (rl) += (_il);                                               \
00429                         (rh) += (ih) + ((rl) < (_il));                               \
00430                 }
00431 #endif
00432 
00433 #if !(defined(_MSC_VER) && _MSC_VER < 1300)
00434 template <bool T_128BitTag>
00435 #endif
00436 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
00437 {
00438         #define INNER_LOOP_ITERATION(j) {\
00439                 word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
00440                 word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
00441                 AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
00442                 if (T_128BitTag)\
00443                         AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
00444                 }
00445 
00446 #if (defined(_MSC_VER) && _MSC_VER < 1300)
00447         bool T_128BitTag = m_is128;
00448 #endif
00449         size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
00450         size_t innerLoopEnd = L1KeyLengthInWord64;
00451         const word64 *nhK = m_nhKey();
00452         word64 *polyS = m_polyState();
00453         bool isFirstBlock = true;
00454         size_t i;
00455 
00456         #if !VMAC_BOOL_32BIT
00457                 #if VMAC_BOOL_WORD128
00458                         word128 a1, a2;
00459                 #else
00460                         word64 ah1, al1, ah2, al2;
00461                 #endif
00462                 word64 kh1, kl1, kh2, kl2;
00463                 kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
00464                 if (T_128BitTag)
00465                 {
00466                         kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
00467                 }
00468         #endif
00469 
00470         do
00471         {
00472                 DeclareNH(nhA);
00473                 DeclareNH(nhB);
00474 
00475                 if (blocksRemainingInWord64 < L1KeyLengthInWord64)
00476                 {
00477                         if (blocksRemainingInWord64 % 8)
00478                         {
00479                                 innerLoopEnd = blocksRemainingInWord64 % 8;
00480                                 for (i=0; i<innerLoopEnd; i+=2)
00481                                         INNER_LOOP_ITERATION(0);
00482                                 blocksRemainingInWord64 -= innerLoopEnd;
00483                                 data += innerLoopEnd;
00484                         }
00485                         innerLoopEnd = blocksRemainingInWord64;
00486                 }
00487 
00488                 for (i=0; i<innerLoopEnd; i+=8)
00489                 {
00490                         INNER_LOOP_ITERATION(0);
00491                         INNER_LOOP_ITERATION(1);
00492                         INNER_LOOP_ITERATION(2);
00493                         INNER_LOOP_ITERATION(3);
00494                 }
00495                 blocksRemainingInWord64 -= innerLoopEnd;
00496                 data += innerLoopEnd;
00497 
00498                 #if VMAC_BOOL_32BIT
00499                         word32 nh0[2],  nh1[2];
00500                         word64 nh2[2];
00501 
00502                         nh0[0] = word32(nhA0);
00503                         nhA1 += (nhA0 >> 32);
00504                         nh1[0] = word32(nhA1);
00505                         nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
00506 
00507                         if (T_128BitTag)
00508                         {
00509                                 nh0[1] = word32(nhB0);
00510                                 nhB1 += (nhB0 >> 32);
00511                                 nh1[1] = word32(nhB1);
00512                                 nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
00513                         }
00514 
00515                         #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
00516                         #define a1 (((word32 *)(polyS+i*4))[3-NativeByteOrder::ToEnum()])
00517                         #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
00518                         #define a3 (((word32 *)(polyS+i*4))[1-NativeByteOrder::ToEnum()])
00519                         #define aHi ((polyS+i*4)[0])
00520                         #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
00521                         #define k1 (((word32 *)(polyS+i*4+2))[3-NativeByteOrder::ToEnum()])
00522                         #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
00523                         #define k3 (((word32 *)(polyS+i*4+2))[1-NativeByteOrder::ToEnum()])
00524                         #define kHi ((polyS+i*4+2)[0])
00525 
00526                         if (isFirstBlock)
00527                         {
00528                                 isFirstBlock = false;
00529                                 if (m_isFirstBlock)
00530                                 {
00531                                         m_isFirstBlock = false;
00532                                         for (i=0; i<=(size_t)T_128BitTag; i++)
00533                                         {
00534                                                 word64 t = (word64)nh0[i] + k0;
00535                                                 a0 = (word32)t;
00536                                                 t = (t >> 32) + nh1[i] + k1;
00537                                                 a1 = (word32)t;
00538                                                 aHi = (t >> 32) + nh2[i] + kHi;
00539                                         }
00540                                         continue;
00541                                 }
00542                         }
00543                         for (i=0; i<=(size_t)T_128BitTag; i++)
00544                         {
00545                                 word64 p, t;
00546                                 word32 t2;
00547 
00548                                 p = MUL32(a3, 2*k3);
00549                                 p += nh2[i];
00550                                 p += MUL32(a0, k2);
00551                                 p += MUL32(a1, k1);
00552                                 p += MUL32(a2, k0);
00553                                 t2 = (word32)p;
00554                                 p >>= 32;
00555                                 p += MUL32(a0, k3);
00556                                 p += MUL32(a1, k2);
00557                                 p += MUL32(a2, k1);
00558                                 p += MUL32(a3, k0);
00559                                 t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
00560                                 p >>= 31;
00561                                 p += nh0[i];
00562                                 p += MUL32(a0, k0);
00563                                 p += MUL32(a1, 2*k3);
00564                                 p += MUL32(a2, 2*k2);
00565                                 p += MUL32(a3, 2*k1);
00566                                 t2 = (word32)p;
00567                                 p >>= 32;
00568                                 p += nh1[i];
00569                                 p += MUL32(a0, k1);
00570                                 p += MUL32(a1, k0);
00571                                 p += MUL32(a2, 2*k3);
00572                                 p += MUL32(a3, 2*k2);
00573                                 a0 = t2;
00574                                 a1 = (word32)p;
00575                                 aHi = (p >> 32) + t;
00576                         }
00577 
00578                         #undef a0
00579                         #undef a1
00580                         #undef a2
00581                         #undef a3
00582                         #undef aHi
00583                         #undef k0
00584                         #undef k1
00585                         #undef k2
00586                         #undef k3               
00587                         #undef kHi
00588                 #else           // #if VMAC_BOOL_32BIT
00589                         if (isFirstBlock)
00590                         {
00591                                 isFirstBlock = false;
00592                                 if (m_isFirstBlock)
00593                                 {
00594                                         m_isFirstBlock = false;
00595                                         #if VMAC_BOOL_WORD128
00596                                                 #define first_poly_step(a, kh, kl, m)   a = (m & m126) + ((word128(kh) << 64) | kl)
00597 
00598                                                 first_poly_step(a1, kh1, kl1, nhA);
00599                                                 if (T_128BitTag)
00600                                                         first_poly_step(a2, kh2, kl2, nhB);
00601                                         #else
00602                                                 #define first_poly_step(ah, al, kh, kl, mh, ml)         {\
00603                                                         mh &= m62;\
00604                                                         ADD128(mh, ml, kh, kl); \
00605                                                         ah = mh; al = ml;}
00606 
00607                                                 first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
00608                                                 if (T_128BitTag)
00609                                                         first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
00610                                         #endif
00611                                         continue;
00612                                 }
00613                                 else
00614                                 {
00615                                         #if VMAC_BOOL_WORD128
00616                                                 a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
00617                                         #else
00618                                                 ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
00619                                         #endif
00620                                         if (T_128BitTag)
00621                                         {
00622                                                 #if VMAC_BOOL_WORD128
00623                                                         a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
00624                                                 #else
00625                                                         ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
00626                                                 #endif
00627                                         }
00628                                 }
00629                         }
00630 
00631                         #if VMAC_BOOL_WORD128
00632                                 #define poly_step(a, kh, kl, m) \
00633                                 {   word128 t1, t2, t3, t4;\
00634                                         Multiply128(t2, a>>64, kl);\
00635                                         Multiply128(t3, a, kh);\
00636                                         Multiply128(t1, a, kl);\
00637                                         Multiply128(t4, a>>64, 2*kh);\
00638                                         t2 += t3;\
00639                                         t4 += t1;\
00640                                         t2 += t4>>64;\
00641                                         a = (word128(word64(t2)&m63) << 64) | word64(t4);\
00642                                         t2 *= 2;\
00643                                         a += m & m126;\
00644                                         a += t2>>64;}
00645 
00646                                 poly_step(a1, kh1, kl1, nhA);
00647                                 if (T_128BitTag)
00648                                         poly_step(a2, kh2, kl2, nhB);
00649                         #else
00650                                 #define poly_step(ah, al, kh, kl, mh, ml)                                       \
00651                                 {   word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0;                           \
00652                                         /* compute ab*cd, put bd into result registers */       \
00653                                         MUL64(t2h,t2l,ah,kl);                                   \
00654                                         MUL64(t3h,t3l,al,kh);                                   \
00655                                         MUL64(t1h,t1l,ah,2*kh);                                 \
00656                                         MUL64(ah,al,al,kl);                                     \
00657                                         /* add together ad + bc */                              \
00658                                         ADD128(t2h,t2l,t3h,t3l);                                \
00659                                         /* add 2 * ac to result */                              \
00660                                         ADD128(ah,al,t1h,t1l);                                  \
00661                                         /* now (ah,al), (t2l,2*t2h) need summing */             \
00662                                         /* first add the high registers, carrying into t2h */   \
00663                                         ADD128(t2h,ah,z,t2l);                                   \
00664                                         /* double t2h and add top bit of ah */                  \
00665                                         t2h += t2h + (ah >> 63);                                \
00666                                         ah &= m63;                                              \
00667                                         /* now add the low registers */                         \
00668                                         mh &= m62;                                                                                              \
00669                                         ADD128(ah,al,mh,ml);                                    \
00670                                         ADD128(ah,al,z,t2h);                                    \
00671                                 }
00672 
00673                                 poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
00674                                 if (T_128BitTag)
00675                                         poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
00676                         #endif
00677                 #endif          // #if VMAC_BOOL_32BIT
00678         } while (blocksRemainingInWord64);
00679 
00680         #if VMAC_BOOL_WORD128
00681                 (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
00682                 if (T_128BitTag)
00683                 {
00684                         (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
00685                 }
00686         #elif !VMAC_BOOL_32BIT
00687                 (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
00688                 if (T_128BitTag)
00689                 {
00690                         (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
00691                 }
00692         #endif
00693 }
00694 
00695 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
00696 {
00697 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
00698         if (HasSSE2())
00699         {
00700                 VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
00701                 if (m_is128)
00702                         VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
00703                 m_isFirstBlock = false;
00704         }
00705         else
00706 #endif
00707         {
00708 #if defined(_MSC_VER) && _MSC_VER < 1300
00709                 VHASH_Update_Template(data, blocksRemainingInWord64);
00710 #else
00711                 if (m_is128)
00712                         VHASH_Update_Template<true>(data, blocksRemainingInWord64);
00713                 else
00714                         VHASH_Update_Template<false>(data, blocksRemainingInWord64);
00715 #endif
00716         }
00717 }
00718 
00719 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
00720 {
00721         size_t remaining = ModPowerOf2(length, m_L1KeyLength);
00722         VHASH_Update(data, (length-remaining)/8);
00723         return remaining;
00724 }
00725 
00726 static word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
00727 {
00728     word64 rh, rl, t, z=0;
00729         word64 p1 = input[0], p2 = input[1];
00730         word64 k1 = l3Key[0], k2 = l3Key[1];
00731 
00732     /* fully reduce (p1,p2)+(len,0) mod p127 */
00733     t = p1 >> 63;
00734     p1 &= m63;
00735     ADD128(p1, p2, len, t);
00736     /* At this point, (p1,p2) is at most 2^127+(len<<64) */
00737     t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
00738     ADD128(p1, p2, z, t);
00739     p1 &= m63;
00740 
00741     /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
00742     t = p1 + (p2 >> 32);
00743     t += (t >> 32);
00744     t += (word32)t > 0xfffffffeU;
00745     p1 += (t >> 32);
00746     p2 += (p1 << 32);
00747 
00748     /* compute (p1+k1)%p64 and (p2+k2)%p64 */
00749     p1 += k1;
00750     p1 += (0 - (p1 < k1)) & 257;
00751     p2 += k2;
00752     p2 += (0 - (p2 < k2)) & 257;
00753 
00754     /* compute (p1+k1)*(p2+k2)%p64 */
00755     MUL64(rh, rl, p1, p2);
00756     t = rh >> 56;
00757     ADD128(t, rl, z, rh);
00758     rh <<= 8;
00759     ADD128(t, rl, z, rh);
00760     t += t << 8;
00761     rl += t;
00762     rl += (0 - (rl < t)) & 257;
00763     rl += (0 - (rl > p64-1)) & 257;
00764     return rl;
00765 }
00766 
00767 void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
00768 {
00769         size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
00770 
00771         if (len)
00772         {
00773                 memset(m_data()+len, 0, (0-len)%16);
00774                 VHASH_Update(DataBuf(), ((len+15)/16)*2);
00775                 len *= 8;       // convert to bits
00776         }
00777         else if (m_isFirstBlock)
00778         {
00779                 // special case for empty string
00780                 m_polyState()[0] = m_polyState()[2];
00781                 m_polyState()[1] = m_polyState()[3];
00782                 if (m_is128)
00783                 {
00784                         m_polyState()[4] = m_polyState()[6];
00785                         m_polyState()[5] = m_polyState()[7];
00786                 }
00787         }
00788 
00789         if (m_is128)
00790         {
00791                 word64 t[2];
00792                 t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
00793                 t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
00794                 if (size == 16)
00795                 {
00796                         PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
00797                         PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
00798                 }
00799                 else
00800                 {
00801                         t[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[0]);
00802                         t[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[1]);
00803                         memcpy(mac, t, size);
00804                 }
00805         }
00806         else
00807         {
00808                 word64 t = L3Hash(m_polyState(), m_l3Key(), len);
00809                 t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
00810                 if (size == 8)
00811                         PutWord(false, BIG_ENDIAN_ORDER, mac, t);
00812                 else
00813                 {
00814                         t = ConditionalByteReverse(BIG_ENDIAN_ORDER, t);
00815                         memcpy(mac, &t, size);
00816                 }
00817         }
00818 }
00819 
00820 NAMESPACE_END

Generated on Fri Jun 1 11:11:25 2007 for Crypto++ by  doxygen 1.5.2