Crypto++  8.2
Free C++ class library of cryptographic schemes
vmac.cpp
1 // vmac.cpp - originally written and placed in the public domain by Wei Dai
2 // based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #include "vmac.h"
8 #include "cpu.h"
9 #include "argnames.h"
10 #include "secblock.h"
11 
12 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
13 #include <intrin.h>
14 #endif
15 
16 #if defined(CRYPTOPP_DISABLE_VMAC_ASM)
17 # undef CRYPTOPP_X86_ASM_AVAILABLE
18 # undef CRYPTOPP_X32_ASM_AVAILABLE
19 # undef CRYPTOPP_X64_ASM_AVAILABLE
20 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
21 #endif
22 
23 #if CRYPTOPP_MSC_VERSION
24 # pragma warning(disable: 4731)
25 #endif
26 
27 ANONYMOUS_NAMESPACE_BEGIN
28 
29 #if defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE)
30 using CryptoPP::word128;
31 using CryptoPP::word64;
32 # define VMAC_BOOL_WORD128 1
33 #else
34 using CryptoPP::word64;
35 # define VMAC_BOOL_WORD128 0
36 #endif
37 
38 #ifdef __BORLANDC__
39 #define const // Turbo C++ 2006 workaround
40 #endif
41 const word64 p64 = W64LIT(0xfffffffffffffeff); /* 2^64 - 257 prime */
42 const word64 m62 = W64LIT(0x3fffffffffffffff); /* 62-bit mask */
43 const word64 m63 = W64LIT(0x7fffffffffffffff); /* 63-bit mask */
44 const word64 m64 = W64LIT(0xffffffffffffffff); /* 64-bit mask */
45 const word64 mpoly = W64LIT(0x1fffffff1fffffff); /* Poly key mask */
46 #ifdef __BORLANDC__
47 #undef const
48 #endif
49 
50 #if VMAC_BOOL_WORD128
51 // workaround GCC Bug 31690: ICE with const __uint128_t and C++ front-end
52 # if defined(__powerpc__) && defined (CRYPTOPP_GCC_VERSION) && (CRYPTOPP_GCC_VERSION < 50300)
53 # define m126 ((word128(m62)<<64)|m64)
54 # else
55 const word128 m126 = (word128(m62)<<64)|m64; /* 126-bit mask */
56 # endif
57 #endif
58 
59 ANONYMOUS_NAMESPACE_END
60 
61 NAMESPACE_BEGIN(CryptoPP)
62 
63 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
64 {
65  int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
66  if (digestLength != 8 && digestLength != 16)
67  throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
68  m_is128 = digestLength == 16;
69 
70  m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
71  if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
72  throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
73 
74  AllocateBlocks();
75 
76  BlockCipher &cipher = AccessCipher();
77  cipher.SetKey(userKey, keylength, params);
78  const unsigned int blockSize = cipher.BlockSize();
79  const unsigned int blockSizeInWords = blockSize / sizeof(word64);
82  in.CleanNew(blockSize);
83  size_t i;
84 
85  /* Fill nh key */
86  in[0] = 0x80;
87  cipher.AdvancedProcessBlocks(in, NULLPTR, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
88  ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
89 
90  /* Fill poly key */
91  in[0] = 0xC0;
92  in[15] = 0;
93  for (i = 0; i <= (size_t)m_is128; i++)
94  {
95  cipher.ProcessBlock(in, out.BytePtr());
96  m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
97  m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
98  in[15]++;
99  }
100 
101  /* Fill ip key */
102  in[0] = 0xE0;
103  in[15] = 0;
104  word64 *l3Key = m_l3Key();
105  CRYPTOPP_ASSERT(IsAlignedOn(l3Key,GetAlignmentOf<word64>()));
106 
107  for (i = 0; i <= (size_t)m_is128; i++)
108  do
109  {
110  cipher.ProcessBlock(in, out.BytePtr());
111  l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
112  l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
113  in[15]++;
114  } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
115 
116  m_padCached = false;
117  size_t nonceLength;
118  const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
119  Resynchronize(nonce, (int)nonceLength);
120 }
121 
123 {
125  IV[0] &= 0x7f;
126 }
127 
128 void VMAC_Base::Resynchronize(const byte *nonce, int len)
129 {
130  size_t length = ThrowIfInvalidIVLength(len);
131  size_t s = IVSize();
132  byte *storedNonce = m_nonce();
133 
134  if (m_is128)
135  {
136  memset(storedNonce, 0, s-length);
137  memcpy(storedNonce+s-length, nonce, length);
138  AccessCipher().ProcessBlock(storedNonce, m_pad());
139  }
140  else
141  {
142  if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
143  {
144  m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
145  for (size_t i=0; m_padCached && i<s-length; i++)
146  m_padCached = (storedNonce[i] == 0);
147  }
148  if (!m_padCached)
149  {
150  memset(storedNonce, 0, s-length);
151  memcpy(storedNonce+s-length, nonce, length-1);
152  storedNonce[s-1] = nonce[length-1] & 0xfe;
153  AccessCipher().ProcessBlock(storedNonce, m_pad());
154  m_padCached = true;
155  }
156  storedNonce[s-1] = nonce[length-1];
157  }
158  m_isFirstBlock = true;
159  Restart();
160 }
161 
162 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
163 {
164  CRYPTOPP_UNUSED(data);
165  CRYPTOPP_ASSERT(false);
166  throw NotImplemented("VMAC: HashEndianCorrectedBlock is not implemented");
167 }
168 
170 {
171  return
172 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
173  HasSSE2() ? 16 :
174 #endif
175  GetCipher().OptimalDataAlignment();
176 }
177 
178 #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
179 #if CRYPTOPP_MSC_VERSION
180 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
181 #endif
182 
183 CRYPTOPP_NOINLINE
184 void VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
185 {
186  const word64 *nhK = m_nhKey();
187  word64 *polyS = (word64*)(void*)m_polyState();
188  word32 L1KeyLength = m_L1KeyLength;
189 
190  // These are used in the ASM, but some analysis services miss it.
191  CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart);
192  CRYPTOPP_UNUSED(L1KeyLength);
193  CRYPTOPP_UNUSED(blocksRemainingInWord64);
194 
195  // This inline ASM is tricky, and down right difficult when PIC is
196  // in effect. The ASM uses all the general purpose registers. When
197  // PIC is in effect, GCC uses EBX as a base register. Saving EBX with
198  // 'mov %%ebx, %0' and restoring EBX with 'mov %0, %%ebx' causes GCC
199  // to generate 'mov -0x40(%ebx), %ebx' for the restore. That obviously
200  // won't work. We can push and pop EBX, but then we have to be careful
201  // because GCC references %1 (L1KeyLength) relative to ESP, which is
202  // also used in the function and no longer accurate. Attempting to
203  // sidestep the issues with clobber lists results in "error: ‘asm’
204  // operand has impossible constraints", though we were able to tell
205  // GCC that ESP is dirty. The problems with GCC are the reason for the
206  // pushes and pops rather than the original moves.
207 #ifdef __GNUC__
208  __asm__ __volatile__
209  (
210  AS1( push %%ebx)
211  AS1( push %0) // L1KeyLength
212  AS1( pop %%ebx)
213  INTEL_NOPREFIX
214 #else
215  #if defined(__INTEL_COMPILER)
216  char isFirstBlock = m_isFirstBlock;
217  AS2( mov ebx, [L1KeyLength])
218  AS2( mov dl, [isFirstBlock])
219  #else
220  AS2( mov ecx, this)
221  AS2( mov ebx, [ecx+m_L1KeyLength])
222  AS2( mov dl, [ecx+m_isFirstBlock])
223  #endif
224  AS2( mov eax, tagPart)
225  AS2( shl eax, 4)
226  AS2( mov edi, nhK)
227  AS2( add edi, eax)
228  AS2( add eax, eax)
229  AS2( add eax, polyS)
230 
231  AS2( mov esi, data)
232  AS2( mov ecx, blocksRemainingInWord64)
233 #endif
234 
235  AS2( shr ebx, 3)
236  AS_PUSH_IF86( bp)
237  AS2( sub esp, 12)
238  ASL(4)
239  AS2( mov ebp, ebx)
240  AS2( cmp ecx, ebx)
241  AS2( cmovl ebp, ecx)
242  AS2( sub ecx, ebp)
243  AS2( lea ebp, [edi+8*ebp]) // end of nhK
244  AS2( movq mm6, [esi])
245  AS2( paddq mm6, [edi])
246  AS2( movq mm5, [esi+8])
247  AS2( paddq mm5, [edi+8])
248  AS2( add esi, 16)
249  AS2( add edi, 16)
250  AS2( movq mm4, mm6)
251  ASS( pshufw mm2, mm6, 1, 0, 3, 2)
252  AS2( pmuludq mm6, mm5)
253  ASS( pshufw mm3, mm5, 1, 0, 3, 2)
254  AS2( pmuludq mm5, mm2)
255  AS2( pmuludq mm2, mm3)
256  AS2( pmuludq mm3, mm4)
257  AS2( pxor mm7, mm7)
258  AS2( movd [esp], mm6)
259  AS2( psrlq mm6, 32)
260  AS2( movd [esp+4], mm5)
261  AS2( psrlq mm5, 32)
262  AS2( cmp edi, ebp)
263  ASJ( je, 1, f)
264  ASL(0)
265  AS2( movq mm0, [esi])
266  AS2( paddq mm0, [edi])
267  AS2( movq mm1, [esi+8])
268  AS2( paddq mm1, [edi+8])
269  AS2( add esi, 16)
270  AS2( add edi, 16)
271  AS2( movq mm4, mm0)
272  AS2( paddq mm5, mm2)
273  ASS( pshufw mm2, mm0, 1, 0, 3, 2)
274  AS2( pmuludq mm0, mm1)
275  AS2( movd [esp+8], mm3)
276  AS2( psrlq mm3, 32)
277  AS2( paddq mm5, mm3)
278  ASS( pshufw mm3, mm1, 1, 0, 3, 2)
279  AS2( pmuludq mm1, mm2)
280  AS2( pmuludq mm2, mm3)
281  AS2( pmuludq mm3, mm4)
282  AS2( movd mm4, [esp])
283  AS2( paddq mm7, mm4)
284  AS2( movd mm4, [esp+4])
285  AS2( paddq mm6, mm4)
286  AS2( movd mm4, [esp+8])
287  AS2( paddq mm6, mm4)
288  AS2( movd [esp], mm0)
289  AS2( psrlq mm0, 32)
290  AS2( paddq mm6, mm0)
291  AS2( movd [esp+4], mm1)
292  AS2( psrlq mm1, 32)
293  AS2( paddq mm5, mm1)
294  AS2( cmp edi, ebp)
295  ASJ( jne, 0, b)
296  ASL(1)
297  AS2( paddq mm5, mm2)
298  AS2( movd [esp+8], mm3)
299  AS2( psrlq mm3, 32)
300  AS2( paddq mm5, mm3)
301  AS2( movd mm4, [esp])
302  AS2( paddq mm7, mm4)
303  AS2( movd mm4, [esp+4])
304  AS2( paddq mm6, mm4)
305  AS2( movd mm4, [esp+8])
306  AS2( paddq mm6, mm4)
307  AS2( lea ebp, [8*ebx])
308  AS2( sub edi, ebp) // reset edi to start of nhK
309 
310  AS2( movd [esp], mm7)
311  AS2( psrlq mm7, 32)
312  AS2( paddq mm6, mm7)
313  AS2( movd [esp+4], mm6)
314  AS2( psrlq mm6, 32)
315  AS2( paddq mm5, mm6)
316  AS2( psllq mm5, 2)
317  AS2( psrlq mm5, 2)
318 
319 #define a0 [eax+2*4]
320 #define a1 [eax+3*4]
321 #define a2 [eax+0*4]
322 #define a3 [eax+1*4]
323 #define k0 [eax+2*8+2*4]
324 #define k1 [eax+2*8+3*4]
325 #define k2 [eax+2*8+0*4]
326 #define k3 [eax+2*8+1*4]
327  AS2( test dl, dl)
328  ASJ( jz, 2, f)
329  AS2( movd mm1, k0)
330  AS2( movd mm0, [esp])
331  AS2( paddq mm0, mm1)
332  AS2( movd a0, mm0)
333  AS2( psrlq mm0, 32)
334  AS2( movd mm1, k1)
335  AS2( movd mm2, [esp+4])
336  AS2( paddq mm1, mm2)
337  AS2( paddq mm0, mm1)
338  AS2( movd a1, mm0)
339  AS2( psrlq mm0, 32)
340  AS2( paddq mm5, k2)
341  AS2( paddq mm0, mm5)
342  AS2( movq a2, mm0)
343  AS2( xor edx, edx)
344  ASJ( jmp, 3, f)
345  ASL(2)
346  AS2( movd mm0, a3)
347  AS2( movq mm4, mm0)
348  AS2( pmuludq mm0, k3) // a3*k3
349  AS2( movd mm1, a0)
350  AS2( pmuludq mm1, k2) // a0*k2
351  AS2( movd mm2, a1)
352  AS2( movd mm6, k1)
353  AS2( pmuludq mm2, mm6) // a1*k1
354  AS2( movd mm3, a2)
355  AS2( psllq mm0, 1)
356  AS2( paddq mm0, mm5)
357  AS2( movq mm5, mm3)
358  AS2( movd mm7, k0)
359  AS2( pmuludq mm3, mm7) // a2*k0
360  AS2( pmuludq mm4, mm7) // a3*k0
361  AS2( pmuludq mm5, mm6) // a2*k1
362  AS2( paddq mm0, mm1)
363  AS2( movd mm1, a1)
364  AS2( paddq mm4, mm5)
365  AS2( movq mm5, mm1)
366  AS2( pmuludq mm1, k2) // a1*k2
367  AS2( paddq mm0, mm2)
368  AS2( movd mm2, a0)
369  AS2( paddq mm0, mm3)
370  AS2( movq mm3, mm2)
371  AS2( pmuludq mm2, k3) // a0*k3
372  AS2( pmuludq mm3, mm7) // a0*k0
373  AS2( movd [esp+8], mm0)
374  AS2( psrlq mm0, 32)
375  AS2( pmuludq mm7, mm5) // a1*k0
376  AS2( pmuludq mm5, k3) // a1*k3
377  AS2( paddq mm0, mm1)
378  AS2( movd mm1, a2)
379  AS2( pmuludq mm1, k2) // a2*k2
380  AS2( paddq mm0, mm2)
381  AS2( paddq mm0, mm4)
382  AS2( movq mm4, mm0)
383  AS2( movd mm2, a3)
384  AS2( pmuludq mm2, mm6) // a3*k1
385  AS2( pmuludq mm6, a0) // a0*k1
386  AS2( psrlq mm0, 31)
387  AS2( paddq mm0, mm3)
388  AS2( movd mm3, [esp])
389  AS2( paddq mm0, mm3)
390  AS2( movd mm3, a2)
391  AS2( pmuludq mm3, k3) // a2*k3
392  AS2( paddq mm5, mm1)
393  AS2( movd mm1, a3)
394  AS2( pmuludq mm1, k2) // a3*k2
395  AS2( paddq mm5, mm2)
396  AS2( movd mm2, [esp+4])
397  AS2( psllq mm5, 1)
398  AS2( paddq mm0, mm5)
399  AS2( psllq mm4, 33)
400  AS2( movd a0, mm0)
401  AS2( psrlq mm0, 32)
402  AS2( paddq mm6, mm7)
403  AS2( movd mm7, [esp+8])
404  AS2( paddq mm0, mm6)
405  AS2( paddq mm0, mm2)
406  AS2( paddq mm3, mm1)
407  AS2( psllq mm3, 1)
408  AS2( paddq mm0, mm3)
409  AS2( psrlq mm4, 1)
410  AS2( movd a1, mm0)
411  AS2( psrlq mm0, 32)
412  AS2( por mm4, mm7)
413  AS2( paddq mm0, mm4)
414  AS2( movq a2, mm0)
415 #undef a0
416 #undef a1
417 #undef a2
418 #undef a3
419 #undef k0
420 #undef k1
421 #undef k2
422 #undef k3
423 
424  ASL(3)
425  AS2( test ecx, ecx)
426  ASJ( jnz, 4, b)
427  AS2( add esp, 12)
428  AS_POP_IF86( bp)
429  AS1( emms)
430 #ifdef __GNUC__
431  ATT_PREFIX
432  AS1( pop %%ebx)
433  :
434  : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data),
435  "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
436  : "esp", "memory", "cc"
437  );
438 #endif
439 }
440 #endif
441 
442 #if VMAC_BOOL_WORD128
443  #define DeclareNH(a) word128 a=0
444  #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
445  #define AccumulateNH(a, b, c) a += word128(b)*(c)
446  #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
447 #else
448  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64))
449  #define MUL32(a, b) __emulu(word32(a), word32(b))
450  #else
451  #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
452  #endif
453  #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
454  #define DeclareNH(a) word64 a##0=0, a##1=0
455  #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
456  #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
457  #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
458  #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
459  #define DeclareNH(a) word64 a##0=0, a##1=0
460  #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
461  #define AccumulateNH(a, b, c) {\
462  word64 ph, pl;\
463  pl = _umul128(b,c,&ph);\
464  a##0 += pl;\
465  a##1 += ph + (a##0 < pl);}
466  #else
467  #define VMAC_BOOL_32BIT 1
468  #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
469  #define MUL64(rh,rl,i1,i2) \
470  { word64 _i1 = (i1), _i2 = (i2); \
471  word64 m1= MUL32(_i1,_i2>>32); \
472  word64 m2= MUL32(_i1>>32,_i2); \
473  rh = MUL32(_i1>>32,_i2>>32); \
474  rl = MUL32(_i1,_i2); \
475  ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
476  ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
477  }
478  #define AccumulateNH(a, b, c) {\
479  word64 p = MUL32(b, c);\
480  a##1 += word32((p)>>32);\
481  a##0 += word32(p);\
482  p = MUL32((b)>>32, c);\
483  a##2 += word32((p)>>32);\
484  a##1 += word32(p);\
485  p = MUL32((b)>>32, (c)>>32);\
486  a##2 += p;\
487  p = MUL32(b, (c)>>32);\
488  a##1 += word32(p);\
489  a##2 += word32(p>>32);}
490  #endif
491 #endif
492 #ifndef VMAC_BOOL_32BIT
493  #define VMAC_BOOL_32BIT 0
494 #endif
495 #ifndef ADD128
496  #define ADD128(rh,rl,ih,il) \
497  { word64 _il = (il); \
498  (rl) += (_il); \
499  (rh) += (ih) + ((rl) < (_il)); \
500  }
501 #endif
502 
503 template <bool T_128BitTag>
504 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
505 {
506  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
507  CRYPTOPP_ASSERT(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
508 
509  #define INNER_LOOP_ITERATION(j) {\
510  word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
511  word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
512  AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
513  if (T_128BitTag)\
514  AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
515  }
516 
517  size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
518  size_t innerLoopEnd = L1KeyLengthInWord64;
519  const word64 *nhK = m_nhKey();
520  word64 *polyS = (word64*)(void*)m_polyState();
521  bool isFirstBlock = true;
522  size_t i;
523 
524  #if !VMAC_BOOL_32BIT
525  #if VMAC_BOOL_WORD128
526  word128 a1=0, a2=0;
527  #else
528  word64 ah1=0, al1=0, ah2=0, al2=0;
529  #endif
530  word64 kh1, kl1, kh2, kl2;
531  kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
532  if (T_128BitTag)
533  {
534  kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
535  }
536  #endif
537 
538  do
539  {
540  DeclareNH(nhA);
541  DeclareNH(nhB);
542 
543  i = 0;
544  if (blocksRemainingInWord64 < L1KeyLengthInWord64)
545  {
546  if (blocksRemainingInWord64 % 8)
547  {
548  innerLoopEnd = blocksRemainingInWord64 % 8;
549  for (; i<innerLoopEnd; i+=2)
550  INNER_LOOP_ITERATION(0);
551  }
552  innerLoopEnd = blocksRemainingInWord64;
553  }
554  for (; i<innerLoopEnd; i+=8)
555  {
556  INNER_LOOP_ITERATION(0);
557  INNER_LOOP_ITERATION(1);
558  INNER_LOOP_ITERATION(2);
559  INNER_LOOP_ITERATION(3);
560  }
561  blocksRemainingInWord64 -= innerLoopEnd;
562  data += innerLoopEnd;
563 
564  #if VMAC_BOOL_32BIT
565  word32 nh0[2], nh1[2];
566  word64 nh2[2];
567 
568  nh0[0] = word32(nhA0);
569  nhA1 += (nhA0 >> 32);
570  nh1[0] = word32(nhA1);
571  nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
572 
573  if (T_128BitTag)
574  {
575  nh0[1] = word32(nhB0);
576  nhB1 += (nhB0 >> 32);
577  nh1[1] = word32(nhB1);
578  nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
579  }
580 
581  #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
582  #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
583  #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
584  #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
585  #define aHi ((polyS+i*4)[0])
586  #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
587  #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
588  #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
589  #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
590  #define kHi ((polyS+i*4+2)[0])
591 
592  if (isFirstBlock)
593  {
594  isFirstBlock = false;
595  if (m_isFirstBlock)
596  {
597  m_isFirstBlock = false;
598  for (i=0; i<=(size_t)T_128BitTag; i++)
599  {
600  word64 t = (word64)nh0[i] + k0;
601  a0 = (word32)t;
602  t = (t >> 32) + nh1[i] + k1;
603  a1 = (word32)t;
604  aHi = (t >> 32) + nh2[i] + kHi;
605  }
606  continue;
607  }
608  }
609  for (i=0; i<=(size_t)T_128BitTag; i++)
610  {
611  word64 p, t;
612  word32 t2;
613 
614  p = MUL32(a3, 2*k3);
615  p += nh2[i];
616  p += MUL32(a0, k2);
617  p += MUL32(a1, k1);
618  p += MUL32(a2, k0);
619  t2 = (word32)p;
620  p >>= 32;
621  p += MUL32(a0, k3);
622  p += MUL32(a1, k2);
623  p += MUL32(a2, k1);
624  p += MUL32(a3, k0);
625  t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
626  p >>= 31;
627  p += nh0[i];
628  p += MUL32(a0, k0);
629  p += MUL32(a1, 2*k3);
630  p += MUL32(a2, 2*k2);
631  p += MUL32(a3, 2*k1);
632  t2 = (word32)p;
633  p >>= 32;
634  p += nh1[i];
635  p += MUL32(a0, k1);
636  p += MUL32(a1, k0);
637  p += MUL32(a2, 2*k3);
638  p += MUL32(a3, 2*k2);
639  a0 = t2;
640  a1 = (word32)p;
641  aHi = (p >> 32) + t;
642  }
643 
644  #undef a0
645  #undef a1
646  #undef a2
647  #undef a3
648  #undef aHi
649  #undef k0
650  #undef k1
651  #undef k2
652  #undef k3
653  #undef kHi
654  #else // #if VMAC_BOOL_32BIT
655  if (isFirstBlock)
656  {
657  isFirstBlock = false;
658  if (m_isFirstBlock)
659  {
660  m_isFirstBlock = false;
661  #if VMAC_BOOL_WORD128
662  #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
663 
664  first_poly_step(a1, kh1, kl1, nhA);
665  if (T_128BitTag)
666  first_poly_step(a2, kh2, kl2, nhB);
667  #else
668  #define first_poly_step(ah, al, kh, kl, mh, ml) {\
669  mh &= m62;\
670  ADD128(mh, ml, kh, kl); \
671  ah = mh; al = ml;}
672 
673  first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
674  if (T_128BitTag)
675  first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
676  #endif
677  continue;
678  }
679  else
680  {
681  #if VMAC_BOOL_WORD128
682  a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
683  #else
684  ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
685  #endif
686  if (T_128BitTag)
687  {
688  #if VMAC_BOOL_WORD128
689  a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
690  #else
691  ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
692  #endif
693  }
694  }
695  }
696 
697  #if VMAC_BOOL_WORD128
698  #define poly_step(a, kh, kl, m) \
699  { word128 t1, t2, t3, t4;\
700  Multiply128(t2, a>>64, kl);\
701  Multiply128(t3, a, kh);\
702  Multiply128(t1, a, kl);\
703  Multiply128(t4, a>>64, 2*kh);\
704  t2 += t3;\
705  t4 += t1;\
706  t2 += t4>>64;\
707  a = (word128(word64(t2)&m63) << 64) | word64(t4);\
708  t2 *= 2;\
709  a += m & m126;\
710  a += t2>>64;}
711 
712  poly_step(a1, kh1, kl1, nhA);
713  if (T_128BitTag)
714  poly_step(a2, kh2, kl2, nhB);
715  #else
716  #define poly_step(ah, al, kh, kl, mh, ml) \
717  { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
718  /* compute ab*cd, put bd into result registers */ \
719  MUL64(t2h,t2l,ah,kl); \
720  MUL64(t3h,t3l,al,kh); \
721  MUL64(t1h,t1l,ah,2*kh); \
722  MUL64(ah,al,al,kl); \
723  /* add together ad + bc */ \
724  ADD128(t2h,t2l,t3h,t3l); \
725  /* add 2 * ac to result */ \
726  ADD128(ah,al,t1h,t1l); \
727  /* now (ah,al), (t2l,2*t2h) need summing */ \
728  /* first add the high registers, carrying into t2h */ \
729  ADD128(t2h,ah,z,t2l); \
730  /* double t2h and add top bit of ah */ \
731  t2h += t2h + (ah >> 63); \
732  ah &= m63; \
733  /* now add the low registers */ \
734  mh &= m62; \
735  ADD128(ah,al,mh,ml); \
736  ADD128(ah,al,z,t2h); \
737  }
738 
739  poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
740  if (T_128BitTag)
741  poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
742  #endif
743  #endif // #if VMAC_BOOL_32BIT
744  } while (blocksRemainingInWord64);
745 
746  #if VMAC_BOOL_WORD128
747  (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
748  if (T_128BitTag)
749  {
750  (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
751  }
752  #elif !VMAC_BOOL_32BIT
753  (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
754  if (T_128BitTag)
755  {
756  (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
757  }
758  #endif
759 }
760 
761 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
762 {
763 #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
764  if (HasSSE2())
765  {
766  VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
767  if (m_is128)
768  VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
769  m_isFirstBlock = false;
770  }
771  else
772 #endif
773  {
774  if (m_is128)
775  VHASH_Update_Template<true>(data, blocksRemainingInWord64);
776  else
777  VHASH_Update_Template<false>(data, blocksRemainingInWord64);
778  }
779 }
780 
781 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
782 {
783  size_t remaining = ModPowerOf2(length, m_L1KeyLength);
784  VHASH_Update(data, (length-remaining)/8);
785  return remaining;
786 }
787 
788 word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
789 {
790  word64 rh, rl, t, z=0;
791  word64 p1 = input[0], p2 = input[1];
792  word64 k1 = l3Key[0], k2 = l3Key[1];
793 
794  /* fully reduce (p1,p2)+(len,0) mod p127 */
795  t = p1 >> 63;
796  p1 &= m63;
797  ADD128(p1, p2, len, t);
798  /* At this point, (p1,p2) is at most 2^127+(len<<64) */
799  t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
800  ADD128(p1, p2, z, t);
801  p1 &= m63;
802 
803  /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
804  t = p1 + (p2 >> 32);
805  t += (t >> 32);
806  t += (word32)t > 0xfffffffeU;
807  p1 += (t >> 32);
808  p2 += (p1 << 32);
809 
810  /* compute (p1+k1)%p64 and (p2+k2)%p64 */
811  p1 += k1;
812  p1 += (0 - (p1 < k1)) & 257;
813  p2 += k2;
814  p2 += (0 - (p2 < k2)) & 257;
815 
816  /* compute (p1+k1)*(p2+k2)%p64 */
817  MUL64(rh, rl, p1, p2);
818  t = rh >> 56;
819  ADD128(t, rl, z, rh);
820  rh <<= 8;
821  ADD128(t, rl, z, rh);
822  t += t << 8;
823  rl += t;
824  rl += (0 - (rl < t)) & 257;
825  rl += (0 - (rl > p64-1)) & 257;
826  return rl;
827 }
828 
829 void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
830 {
831  CRYPTOPP_ASSERT(IsAlignedOn(DataBuf(),GetAlignmentOf<word64>()));
832  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
833  size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
834 
835  if (len)
836  {
837  memset(m_data()+len, 0, (0-len)%16);
838  VHASH_Update(DataBuf(), ((len+15)/16)*2);
839  len *= 8; // convert to bits
840  }
841  else if (m_isFirstBlock)
842  {
843  // special case for empty string
844  m_polyState()[0] = m_polyState()[2];
845  m_polyState()[1] = m_polyState()[3];
846  if (m_is128)
847  {
848  m_polyState()[4] = m_polyState()[6];
849  m_polyState()[5] = m_polyState()[7];
850  }
851  }
852 
853  if (m_is128)
854  {
855  word64 t[2];
856  t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
857  t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
858  if (size == 16)
859  {
860  PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
861  PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
862  }
863  else
864  {
867  memcpy(mac, t, size);
868  }
869  }
870  else
871  {
872  word64 t = L3Hash(m_polyState(), m_l3Key(), len);
873  t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
874  if (size == 8)
875  PutWord(false, BIG_ENDIAN_ORDER, mac, t);
876  else
877  {
879  memcpy(mac, &t, size);
880  }
881  }
882 }
883 
884 NAMESPACE_END
Standard names for retrieving values by name when working with NameValuePairs.
const char * DigestSize()
int, in bytes
Definition: argnames.h:79
An invalid argument was detected.
Definition: cryptlib.h:202
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition: cryptlib.cpp:59
T2 ModPowerOf2(const T1 &a, const T2 &b)
Reduces a value to a power of 2.
Definition: misc.h:1078
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:980
VMAC message authentication code base class.
Definition: vmac.h:24
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
Definition: misc.h:2512
Secure memory block with allocator and cleanup.
Definition: secblock.h:688
Library configuration file.
Interface for random number generators.
Definition: cryptlib.h:1412
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: cryptlib.cpp:191
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1279
Classes and functions for secure memory allocations.
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1185
Classes for the VMAC message authentication code.
A method was called which was not implemented.
Definition: cryptlib.h:232
CRYPTOPP_DLL bool CRYPTOPP_API VerifyBufsEqual(const byte *buf1, const byte *buf2, size_t count)
Performs a near constant-time comparison of two equally sized buffers.
Definition: misc.cpp:100
unsigned int IVSize() const
Returns length of the IV accepted by this object.
Definition: vmac.h:29
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2197
virtual size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
Encrypt and xor multiple blocks using additional flags.
Definition: cryptlib.cpp:142
Precompiled header file.
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:880
SecBlock using AllocatorWithCleanup<byte, true> typedef.
Definition: secblock.h:1062
byte order is big-endian
Definition: cryptlib.h:147
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:69
Functions for CPU features and intrinsics.
void TruncatedFinal(byte *mac, size_t size)
Computes the hash of the current message.
Definition: vmac.cpp:829
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
Definition: argnames.h:21
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:116
const char * L1KeyLength()
int, in bytes
Definition: argnames.h:80
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: vmac.cpp:169
Crypto++ library namespace.
void Resynchronize(const byte *nonce, int length=-1)
Resynchronize with an IV.
Definition: vmac.cpp:128
virtual void GetNextIV(RandomNumberGenerator &rng, byte *iv)
Retrieves a secure IV for the next message.
Definition: cryptlib.cpp:137
void GetNextIV(RandomNumberGenerator &rng, byte *IV)
Retrieves a secure IV for the next message.
Definition: vmac.cpp:122
Interface for retrieving values given their names.
Definition: cryptlib.h:321
byte * BytePtr()
Provides a byte pointer to the first element in the memory block.
Definition: secblock.h:804