Crypto++  8.2
Free C++ class library of cryptographic schemes
vmac.cpp
1 // vmac.cpp - originally written and placed in the public domain by Wei Dai
2 // based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #include "vmac.h"
8 #include "cpu.h"
9 #include "argnames.h"
10 #include "secblock.h"
11 
12 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
13 #include <intrin.h>
14 #endif
15 
16 #if defined(CRYPTOPP_DISABLE_VMAC_ASM)
17 # undef CRYPTOPP_X86_ASM_AVAILABLE
18 # undef CRYPTOPP_X32_ASM_AVAILABLE
19 # undef CRYPTOPP_X64_ASM_AVAILABLE
20 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
21 #endif
22 
23 #if CRYPTOPP_MSC_VERSION
24 # pragma warning(disable: 4731)
25 #endif
26 
27 ANONYMOUS_NAMESPACE_BEGIN
28 
29 #if defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE)
30 using CryptoPP::word128;
31 using CryptoPP::word64;
32 # define VMAC_BOOL_WORD128 1
33 #else
34 using CryptoPP::word64;
35 # define VMAC_BOOL_WORD128 0
36 #endif
37 
38 #ifdef __BORLANDC__
39 #define const // Turbo C++ 2006 workaround
40 #endif
41 const word64 p64 = W64LIT(0xfffffffffffffeff); /* 2^64 - 257 prime */
42 const word64 m62 = W64LIT(0x3fffffffffffffff); /* 62-bit mask */
43 const word64 m63 = W64LIT(0x7fffffffffffffff); /* 63-bit mask */
44 const word64 m64 = W64LIT(0xffffffffffffffff); /* 64-bit mask */
45 const word64 mpoly = W64LIT(0x1fffffff1fffffff); /* Poly key mask */
46 #ifdef __BORLANDC__
47 #undef const
48 #endif
49 
50 #if VMAC_BOOL_WORD128
51 // workaround GCC Bug 31690: ICE with const __uint128_t and C++ front-end
52 # if defined(__powerpc__) && defined (CRYPTOPP_GCC_VERSION) && (CRYPTOPP_GCC_VERSION < 50300)
53 # define m126 ((word128(m62)<<64)|m64)
54 # else
55 const word128 m126 = (word128(m62)<<64)|m64; /* 126-bit mask */
56 # endif
57 #endif
58 
59 ANONYMOUS_NAMESPACE_END
60 
61 NAMESPACE_BEGIN(CryptoPP)
62 
63 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
64 {
65  int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
66  if (digestLength != 8 && digestLength != 16)
67  throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
68  m_is128 = digestLength == 16;
69 
70  m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
71  if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
72  throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
73 
74  AllocateBlocks();
75 
76  BlockCipher &cipher = AccessCipher();
77  cipher.SetKey(userKey, keylength, params);
78  const unsigned int blockSize = cipher.BlockSize();
79  const unsigned int blockSizeInWords = blockSize / sizeof(word64);
82  in.CleanNew(blockSize);
83  size_t i;
84 
85  /* Fill nh key */
86  in[0] = 0x80;
87  cipher.AdvancedProcessBlocks(in, NULLPTR, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
88  ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
89 
90  /* Fill poly key */
91  in[0] = 0xC0;
92  in[15] = 0;
93  for (i = 0; i <= (size_t)m_is128; i++)
94  {
95  cipher.ProcessBlock(in, out.BytePtr());
96  m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
97  m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
98  in[15]++;
99  }
100 
101  /* Fill ip key */
102  in[0] = 0xE0;
103  in[15] = 0;
104  word64 *l3Key = m_l3Key();
105  CRYPTOPP_ASSERT(IsAlignedOn(l3Key,GetAlignmentOf<word64>()));
106 
107  for (i = 0; i <= (size_t)m_is128; i++)
108  do
109  {
110  cipher.ProcessBlock(in, out.BytePtr());
111  l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
112  l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
113  in[15]++;
114  } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
115 
116  m_padCached = false;
117  size_t nonceLength;
118  const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
119  Resynchronize(nonce, (int)nonceLength);
120 }
121 
123 {
125  IV[0] &= 0x7f;
126 }
127 
128 void VMAC_Base::Resynchronize(const byte *nonce, int len)
129 {
130  size_t length = ThrowIfInvalidIVLength(len);
131  size_t s = IVSize();
132  byte *storedNonce = m_nonce();
133 
134  if (m_is128)
135  {
136  memset(storedNonce, 0, s-length);
137  memcpy(storedNonce+s-length, nonce, length);
138  AccessCipher().ProcessBlock(storedNonce, m_pad());
139  }
140  else
141  {
142  if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
143  {
144  m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
145  for (size_t i=0; m_padCached && i<s-length; i++)
146  m_padCached = (storedNonce[i] == 0);
147  }
148  if (!m_padCached)
149  {
150  memset(storedNonce, 0, s-length);
151  memcpy(storedNonce+s-length, nonce, length-1);
152  storedNonce[s-1] = nonce[length-1] & 0xfe;
153  AccessCipher().ProcessBlock(storedNonce, m_pad());
154  m_padCached = true;
155  }
156  storedNonce[s-1] = nonce[length-1];
157  }
158  m_isFirstBlock = true;
159  Restart();
160 }
161 
162 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
163 {
164  CRYPTOPP_UNUSED(data);
165  CRYPTOPP_ASSERT(false);
166  throw NotImplemented("VMAC: HashEndianCorrectedBlock is not implemented");
167 }
168 
170 {
171  return
172 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
173  HasSSE2() ? 16 :
174 #endif
175  GetCipher().OptimalDataAlignment();
176 }
177 
178 #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
179 #if CRYPTOPP_MSC_VERSION
180 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
181 #endif
182 
183 CRYPTOPP_NOINLINE
184 void VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
185 {
186  const word64 *nhK = m_nhKey();
187  word64 *polyS = (word64*)(void*)m_polyState();
188  word32 L1KeyLength = m_L1KeyLength;
189 
190  // These are used in the ASM, but some analysis services miss it.
191  CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart);
192  CRYPTOPP_UNUSED(L1KeyLength);
193  CRYPTOPP_UNUSED(blocksRemainingInWord64);
194 
195  // This inline ASM is tricky, and down right difficult when PIC is
196  // in effect. The ASM uses all the general purpose registers. When
197  // PIC is in effect, GCC uses EBX as a base register. Saving EBX with
198  // 'mov %%ebx, %0' and restoring EBX with 'mov %0, %%ebx' causes GCC
199  // to generate 'mov -0x40(%ebx), %ebx' for the restore. That obviously
200  // won't work. We can push and pop EBX, but then we have to be careful
201  // because GCC references %1 (L1KeyLength) relative to ESP, which is
202  // also used in the function and no longer accurate. Attempting to
203  // sidestep the issues with clobber lists results in "error: ‘asm’
204  // operand has impossible constraints", though we were able to tell
205  // GCC that ESP is dirty. The problems with GCC are the reason for the
206  // pushes and pops rather than the original moves.
207 #ifdef __GNUC__
208  __asm__ __volatile__
209  (
210  AS1( push %0) // L1KeyLength
211  AS1( pop %%ebx)
212  INTEL_NOPREFIX
213 #else
214  #if defined(__INTEL_COMPILER)
215  char isFirstBlock = m_isFirstBlock;
216  AS2( mov ebx, [L1KeyLength])
217  AS2( mov dl, [isFirstBlock])
218  #else
219  AS2( mov ecx, this)
220  AS2( mov ebx, [ecx+m_L1KeyLength])
221  AS2( mov dl, [ecx+m_isFirstBlock])
222  #endif
223  AS2( mov eax, tagPart)
224  AS2( shl eax, 4)
225  AS2( mov edi, nhK)
226  AS2( add edi, eax)
227  AS2( add eax, eax)
228  AS2( add eax, polyS)
229 
230  AS2( mov esi, data)
231  AS2( mov ecx, blocksRemainingInWord64)
232 #endif
233 
234  AS2( shr ebx, 3)
235  AS_PUSH_IF86( bp)
236  AS2( sub esp, 12)
237  ASL(4)
238  AS2( mov ebp, ebx)
239  AS2( cmp ecx, ebx)
240  AS2( cmovl ebp, ecx)
241  AS2( sub ecx, ebp)
242  AS2( lea ebp, [edi+8*ebp]) // end of nhK
243  AS2( movq mm6, [esi])
244  AS2( paddq mm6, [edi])
245  AS2( movq mm5, [esi+8])
246  AS2( paddq mm5, [edi+8])
247  AS2( add esi, 16)
248  AS2( add edi, 16)
249  AS2( movq mm4, mm6)
250  ASS( pshufw mm2, mm6, 1, 0, 3, 2)
251  AS2( pmuludq mm6, mm5)
252  ASS( pshufw mm3, mm5, 1, 0, 3, 2)
253  AS2( pmuludq mm5, mm2)
254  AS2( pmuludq mm2, mm3)
255  AS2( pmuludq mm3, mm4)
256  AS2( pxor mm7, mm7)
257  AS2( movd [esp], mm6)
258  AS2( psrlq mm6, 32)
259  AS2( movd [esp+4], mm5)
260  AS2( psrlq mm5, 32)
261  AS2( cmp edi, ebp)
262  ASJ( je, 1, f)
263  ASL(0)
264  AS2( movq mm0, [esi])
265  AS2( paddq mm0, [edi])
266  AS2( movq mm1, [esi+8])
267  AS2( paddq mm1, [edi+8])
268  AS2( add esi, 16)
269  AS2( add edi, 16)
270  AS2( movq mm4, mm0)
271  AS2( paddq mm5, mm2)
272  ASS( pshufw mm2, mm0, 1, 0, 3, 2)
273  AS2( pmuludq mm0, mm1)
274  AS2( movd [esp+8], mm3)
275  AS2( psrlq mm3, 32)
276  AS2( paddq mm5, mm3)
277  ASS( pshufw mm3, mm1, 1, 0, 3, 2)
278  AS2( pmuludq mm1, mm2)
279  AS2( pmuludq mm2, mm3)
280  AS2( pmuludq mm3, mm4)
281  AS2( movd mm4, [esp])
282  AS2( paddq mm7, mm4)
283  AS2( movd mm4, [esp+4])
284  AS2( paddq mm6, mm4)
285  AS2( movd mm4, [esp+8])
286  AS2( paddq mm6, mm4)
287  AS2( movd [esp], mm0)
288  AS2( psrlq mm0, 32)
289  AS2( paddq mm6, mm0)
290  AS2( movd [esp+4], mm1)
291  AS2( psrlq mm1, 32)
292  AS2( paddq mm5, mm1)
293  AS2( cmp edi, ebp)
294  ASJ( jne, 0, b)
295  ASL(1)
296  AS2( paddq mm5, mm2)
297  AS2( movd [esp+8], mm3)
298  AS2( psrlq mm3, 32)
299  AS2( paddq mm5, mm3)
300  AS2( movd mm4, [esp])
301  AS2( paddq mm7, mm4)
302  AS2( movd mm4, [esp+4])
303  AS2( paddq mm6, mm4)
304  AS2( movd mm4, [esp+8])
305  AS2( paddq mm6, mm4)
306  AS2( lea ebp, [8*ebx])
307  AS2( sub edi, ebp) // reset edi to start of nhK
308 
309  AS2( movd [esp], mm7)
310  AS2( psrlq mm7, 32)
311  AS2( paddq mm6, mm7)
312  AS2( movd [esp+4], mm6)
313  AS2( psrlq mm6, 32)
314  AS2( paddq mm5, mm6)
315  AS2( psllq mm5, 2)
316  AS2( psrlq mm5, 2)
317 
318 #define a0 [eax+2*4]
319 #define a1 [eax+3*4]
320 #define a2 [eax+0*4]
321 #define a3 [eax+1*4]
322 #define k0 [eax+2*8+2*4]
323 #define k1 [eax+2*8+3*4]
324 #define k2 [eax+2*8+0*4]
325 #define k3 [eax+2*8+1*4]
326  AS2( test dl, dl)
327  ASJ( jz, 2, f)
328  AS2( movd mm1, k0)
329  AS2( movd mm0, [esp])
330  AS2( paddq mm0, mm1)
331  AS2( movd a0, mm0)
332  AS2( psrlq mm0, 32)
333  AS2( movd mm1, k1)
334  AS2( movd mm2, [esp+4])
335  AS2( paddq mm1, mm2)
336  AS2( paddq mm0, mm1)
337  AS2( movd a1, mm0)
338  AS2( psrlq mm0, 32)
339  AS2( paddq mm5, k2)
340  AS2( paddq mm0, mm5)
341  AS2( movq a2, mm0)
342  AS2( xor edx, edx)
343  ASJ( jmp, 3, f)
344  ASL(2)
345  AS2( movd mm0, a3)
346  AS2( movq mm4, mm0)
347  AS2( pmuludq mm0, k3) // a3*k3
348  AS2( movd mm1, a0)
349  AS2( pmuludq mm1, k2) // a0*k2
350  AS2( movd mm2, a1)
351  AS2( movd mm6, k1)
352  AS2( pmuludq mm2, mm6) // a1*k1
353  AS2( movd mm3, a2)
354  AS2( psllq mm0, 1)
355  AS2( paddq mm0, mm5)
356  AS2( movq mm5, mm3)
357  AS2( movd mm7, k0)
358  AS2( pmuludq mm3, mm7) // a2*k0
359  AS2( pmuludq mm4, mm7) // a3*k0
360  AS2( pmuludq mm5, mm6) // a2*k1
361  AS2( paddq mm0, mm1)
362  AS2( movd mm1, a1)
363  AS2( paddq mm4, mm5)
364  AS2( movq mm5, mm1)
365  AS2( pmuludq mm1, k2) // a1*k2
366  AS2( paddq mm0, mm2)
367  AS2( movd mm2, a0)
368  AS2( paddq mm0, mm3)
369  AS2( movq mm3, mm2)
370  AS2( pmuludq mm2, k3) // a0*k3
371  AS2( pmuludq mm3, mm7) // a0*k0
372  AS2( movd [esp+8], mm0)
373  AS2( psrlq mm0, 32)
374  AS2( pmuludq mm7, mm5) // a1*k0
375  AS2( pmuludq mm5, k3) // a1*k3
376  AS2( paddq mm0, mm1)
377  AS2( movd mm1, a2)
378  AS2( pmuludq mm1, k2) // a2*k2
379  AS2( paddq mm0, mm2)
380  AS2( paddq mm0, mm4)
381  AS2( movq mm4, mm0)
382  AS2( movd mm2, a3)
383  AS2( pmuludq mm2, mm6) // a3*k1
384  AS2( pmuludq mm6, a0) // a0*k1
385  AS2( psrlq mm0, 31)
386  AS2( paddq mm0, mm3)
387  AS2( movd mm3, [esp])
388  AS2( paddq mm0, mm3)
389  AS2( movd mm3, a2)
390  AS2( pmuludq mm3, k3) // a2*k3
391  AS2( paddq mm5, mm1)
392  AS2( movd mm1, a3)
393  AS2( pmuludq mm1, k2) // a3*k2
394  AS2( paddq mm5, mm2)
395  AS2( movd mm2, [esp+4])
396  AS2( psllq mm5, 1)
397  AS2( paddq mm0, mm5)
398  AS2( psllq mm4, 33)
399  AS2( movd a0, mm0)
400  AS2( psrlq mm0, 32)
401  AS2( paddq mm6, mm7)
402  AS2( movd mm7, [esp+8])
403  AS2( paddq mm0, mm6)
404  AS2( paddq mm0, mm2)
405  AS2( paddq mm3, mm1)
406  AS2( psllq mm3, 1)
407  AS2( paddq mm0, mm3)
408  AS2( psrlq mm4, 1)
409  AS2( movd a1, mm0)
410  AS2( psrlq mm0, 32)
411  AS2( por mm4, mm7)
412  AS2( paddq mm0, mm4)
413  AS2( movq a2, mm0)
414 #undef a0
415 #undef a1
416 #undef a2
417 #undef a3
418 #undef k0
419 #undef k1
420 #undef k2
421 #undef k3
422 
423  ASL(3)
424  AS2( test ecx, ecx)
425  ASJ( jnz, 4, b)
426  AS2( add esp, 12)
427  AS_POP_IF86( bp)
428  AS1( emms)
429 #ifdef __GNUC__
430  ATT_PREFIX
431  :
432  : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data),
433  "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
434  : "ebx", "memory", "cc"
435  );
436 #endif
437 }
438 #endif
439 
440 #if VMAC_BOOL_WORD128
441  #define DeclareNH(a) word128 a=0
442  #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
443  #define AccumulateNH(a, b, c) a += word128(b)*(c)
444  #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
445 #else
446  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64))
447  #define MUL32(a, b) __emulu(word32(a), word32(b))
448  #else
449  #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
450  #endif
451  #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
452  #define DeclareNH(a) word64 a##0=0, a##1=0
453  #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
454  #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
455  #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
456  #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
457  #define DeclareNH(a) word64 a##0=0, a##1=0
458  #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
459  #define AccumulateNH(a, b, c) {\
460  word64 ph, pl;\
461  pl = _umul128(b,c,&ph);\
462  a##0 += pl;\
463  a##1 += ph + (a##0 < pl);}
464  #else
465  #define VMAC_BOOL_32BIT 1
466  #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
467  #define MUL64(rh,rl,i1,i2) \
468  { word64 _i1 = (i1), _i2 = (i2); \
469  word64 m1= MUL32(_i1,_i2>>32); \
470  word64 m2= MUL32(_i1>>32,_i2); \
471  rh = MUL32(_i1>>32,_i2>>32); \
472  rl = MUL32(_i1,_i2); \
473  ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
474  ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
475  }
476  #define AccumulateNH(a, b, c) {\
477  word64 p = MUL32(b, c);\
478  a##1 += word32((p)>>32);\
479  a##0 += word32(p);\
480  p = MUL32((b)>>32, c);\
481  a##2 += word32((p)>>32);\
482  a##1 += word32(p);\
483  p = MUL32((b)>>32, (c)>>32);\
484  a##2 += p;\
485  p = MUL32(b, (c)>>32);\
486  a##1 += word32(p);\
487  a##2 += word32(p>>32);}
488  #endif
489 #endif
490 #ifndef VMAC_BOOL_32BIT
491  #define VMAC_BOOL_32BIT 0
492 #endif
493 #ifndef ADD128
494  #define ADD128(rh,rl,ih,il) \
495  { word64 _il = (il); \
496  (rl) += (_il); \
497  (rh) += (ih) + ((rl) < (_il)); \
498  }
499 #endif
500 
501 template <bool T_128BitTag>
502 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
503 {
504  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
505  CRYPTOPP_ASSERT(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
506 
507  #define INNER_LOOP_ITERATION(j) {\
508  word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
509  word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
510  AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
511  if (T_128BitTag)\
512  AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
513  }
514 
515  size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
516  size_t innerLoopEnd = L1KeyLengthInWord64;
517  const word64 *nhK = m_nhKey();
518  word64 *polyS = (word64*)(void*)m_polyState();
519  bool isFirstBlock = true;
520  size_t i;
521 
522  #if !VMAC_BOOL_32BIT
523  #if VMAC_BOOL_WORD128
524  word128 a1=0, a2=0;
525  #else
526  word64 ah1=0, al1=0, ah2=0, al2=0;
527  #endif
528  word64 kh1, kl1, kh2, kl2;
529  kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
530  if (T_128BitTag)
531  {
532  kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
533  }
534  #endif
535 
536  do
537  {
538  DeclareNH(nhA);
539  DeclareNH(nhB);
540 
541  i = 0;
542  if (blocksRemainingInWord64 < L1KeyLengthInWord64)
543  {
544  if (blocksRemainingInWord64 % 8)
545  {
546  innerLoopEnd = blocksRemainingInWord64 % 8;
547  for (; i<innerLoopEnd; i+=2)
548  INNER_LOOP_ITERATION(0);
549  }
550  innerLoopEnd = blocksRemainingInWord64;
551  }
552  for (; i<innerLoopEnd; i+=8)
553  {
554  INNER_LOOP_ITERATION(0);
555  INNER_LOOP_ITERATION(1);
556  INNER_LOOP_ITERATION(2);
557  INNER_LOOP_ITERATION(3);
558  }
559  blocksRemainingInWord64 -= innerLoopEnd;
560  data += innerLoopEnd;
561 
562  #if VMAC_BOOL_32BIT
563  word32 nh0[2], nh1[2];
564  word64 nh2[2];
565 
566  nh0[0] = word32(nhA0);
567  nhA1 += (nhA0 >> 32);
568  nh1[0] = word32(nhA1);
569  nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
570 
571  if (T_128BitTag)
572  {
573  nh0[1] = word32(nhB0);
574  nhB1 += (nhB0 >> 32);
575  nh1[1] = word32(nhB1);
576  nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
577  }
578 
579  #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
580  #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
581  #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
582  #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
583  #define aHi ((polyS+i*4)[0])
584  #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
585  #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
586  #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
587  #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
588  #define kHi ((polyS+i*4+2)[0])
589 
590  if (isFirstBlock)
591  {
592  isFirstBlock = false;
593  if (m_isFirstBlock)
594  {
595  m_isFirstBlock = false;
596  for (i=0; i<=(size_t)T_128BitTag; i++)
597  {
598  word64 t = (word64)nh0[i] + k0;
599  a0 = (word32)t;
600  t = (t >> 32) + nh1[i] + k1;
601  a1 = (word32)t;
602  aHi = (t >> 32) + nh2[i] + kHi;
603  }
604  continue;
605  }
606  }
607  for (i=0; i<=(size_t)T_128BitTag; i++)
608  {
609  word64 p, t;
610  word32 t2;
611 
612  p = MUL32(a3, 2*k3);
613  p += nh2[i];
614  p += MUL32(a0, k2);
615  p += MUL32(a1, k1);
616  p += MUL32(a2, k0);
617  t2 = (word32)p;
618  p >>= 32;
619  p += MUL32(a0, k3);
620  p += MUL32(a1, k2);
621  p += MUL32(a2, k1);
622  p += MUL32(a3, k0);
623  t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
624  p >>= 31;
625  p += nh0[i];
626  p += MUL32(a0, k0);
627  p += MUL32(a1, 2*k3);
628  p += MUL32(a2, 2*k2);
629  p += MUL32(a3, 2*k1);
630  t2 = (word32)p;
631  p >>= 32;
632  p += nh1[i];
633  p += MUL32(a0, k1);
634  p += MUL32(a1, k0);
635  p += MUL32(a2, 2*k3);
636  p += MUL32(a3, 2*k2);
637  a0 = t2;
638  a1 = (word32)p;
639  aHi = (p >> 32) + t;
640  }
641 
642  #undef a0
643  #undef a1
644  #undef a2
645  #undef a3
646  #undef aHi
647  #undef k0
648  #undef k1
649  #undef k2
650  #undef k3
651  #undef kHi
652  #else // #if VMAC_BOOL_32BIT
653  if (isFirstBlock)
654  {
655  isFirstBlock = false;
656  if (m_isFirstBlock)
657  {
658  m_isFirstBlock = false;
659  #if VMAC_BOOL_WORD128
660  #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
661 
662  first_poly_step(a1, kh1, kl1, nhA);
663  if (T_128BitTag)
664  first_poly_step(a2, kh2, kl2, nhB);
665  #else
666  #define first_poly_step(ah, al, kh, kl, mh, ml) {\
667  mh &= m62;\
668  ADD128(mh, ml, kh, kl); \
669  ah = mh; al = ml;}
670 
671  first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
672  if (T_128BitTag)
673  first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
674  #endif
675  continue;
676  }
677  else
678  {
679  #if VMAC_BOOL_WORD128
680  a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
681  #else
682  ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
683  #endif
684  if (T_128BitTag)
685  {
686  #if VMAC_BOOL_WORD128
687  a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
688  #else
689  ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
690  #endif
691  }
692  }
693  }
694 
695  #if VMAC_BOOL_WORD128
696  #define poly_step(a, kh, kl, m) \
697  { word128 t1, t2, t3, t4;\
698  Multiply128(t2, a>>64, kl);\
699  Multiply128(t3, a, kh);\
700  Multiply128(t1, a, kl);\
701  Multiply128(t4, a>>64, 2*kh);\
702  t2 += t3;\
703  t4 += t1;\
704  t2 += t4>>64;\
705  a = (word128(word64(t2)&m63) << 64) | word64(t4);\
706  t2 *= 2;\
707  a += m & m126;\
708  a += t2>>64;}
709 
710  poly_step(a1, kh1, kl1, nhA);
711  if (T_128BitTag)
712  poly_step(a2, kh2, kl2, nhB);
713  #else
714  #define poly_step(ah, al, kh, kl, mh, ml) \
715  { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
716  /* compute ab*cd, put bd into result registers */ \
717  MUL64(t2h,t2l,ah,kl); \
718  MUL64(t3h,t3l,al,kh); \
719  MUL64(t1h,t1l,ah,2*kh); \
720  MUL64(ah,al,al,kl); \
721  /* add together ad + bc */ \
722  ADD128(t2h,t2l,t3h,t3l); \
723  /* add 2 * ac to result */ \
724  ADD128(ah,al,t1h,t1l); \
725  /* now (ah,al), (t2l,2*t2h) need summing */ \
726  /* first add the high registers, carrying into t2h */ \
727  ADD128(t2h,ah,z,t2l); \
728  /* double t2h and add top bit of ah */ \
729  t2h += t2h + (ah >> 63); \
730  ah &= m63; \
731  /* now add the low registers */ \
732  mh &= m62; \
733  ADD128(ah,al,mh,ml); \
734  ADD128(ah,al,z,t2h); \
735  }
736 
737  poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
738  if (T_128BitTag)
739  poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
740  #endif
741  #endif // #if VMAC_BOOL_32BIT
742  } while (blocksRemainingInWord64);
743 
744  #if VMAC_BOOL_WORD128
745  (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
746  if (T_128BitTag)
747  {
748  (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
749  }
750  #elif !VMAC_BOOL_32BIT
751  (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
752  if (T_128BitTag)
753  {
754  (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
755  }
756  #endif
757 }
758 
759 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
760 {
761 #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
762  if (HasSSE2())
763  {
764  VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
765  if (m_is128)
766  VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
767  m_isFirstBlock = false;
768  }
769  else
770 #endif
771  {
772  if (m_is128)
773  VHASH_Update_Template<true>(data, blocksRemainingInWord64);
774  else
775  VHASH_Update_Template<false>(data, blocksRemainingInWord64);
776  }
777 }
778 
779 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
780 {
781  size_t remaining = ModPowerOf2(length, m_L1KeyLength);
782  VHASH_Update(data, (length-remaining)/8);
783  return remaining;
784 }
785 
786 word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
787 {
788  word64 rh, rl, t, z=0;
789  word64 p1 = input[0], p2 = input[1];
790  word64 k1 = l3Key[0], k2 = l3Key[1];
791 
792  /* fully reduce (p1,p2)+(len,0) mod p127 */
793  t = p1 >> 63;
794  p1 &= m63;
795  ADD128(p1, p2, len, t);
796  /* At this point, (p1,p2) is at most 2^127+(len<<64) */
797  t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
798  ADD128(p1, p2, z, t);
799  p1 &= m63;
800 
801  /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
802  t = p1 + (p2 >> 32);
803  t += (t >> 32);
804  t += (word32)t > 0xfffffffeU;
805  p1 += (t >> 32);
806  p2 += (p1 << 32);
807 
808  /* compute (p1+k1)%p64 and (p2+k2)%p64 */
809  p1 += k1;
810  p1 += (0 - (p1 < k1)) & 257;
811  p2 += k2;
812  p2 += (0 - (p2 < k2)) & 257;
813 
814  /* compute (p1+k1)*(p2+k2)%p64 */
815  MUL64(rh, rl, p1, p2);
816  t = rh >> 56;
817  ADD128(t, rl, z, rh);
818  rh <<= 8;
819  ADD128(t, rl, z, rh);
820  t += t << 8;
821  rl += t;
822  rl += (0 - (rl < t)) & 257;
823  rl += (0 - (rl > p64-1)) & 257;
824  return rl;
825 }
826 
827 void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
828 {
829  CRYPTOPP_ASSERT(IsAlignedOn(DataBuf(),GetAlignmentOf<word64>()));
830  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
831  size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
832 
833  if (len)
834  {
835  memset(m_data()+len, 0, (0-len)%16);
836  VHASH_Update(DataBuf(), ((len+15)/16)*2);
837  len *= 8; // convert to bits
838  }
839  else if (m_isFirstBlock)
840  {
841  // special case for empty string
842  m_polyState()[0] = m_polyState()[2];
843  m_polyState()[1] = m_polyState()[3];
844  if (m_is128)
845  {
846  m_polyState()[4] = m_polyState()[6];
847  m_polyState()[5] = m_polyState()[7];
848  }
849  }
850 
851  if (m_is128)
852  {
853  word64 t[2];
854  t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
855  t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
856  if (size == 16)
857  {
858  PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
859  PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
860  }
861  else
862  {
865  memcpy(mac, t, size);
866  }
867  }
868  else
869  {
870  word64 t = L3Hash(m_polyState(), m_l3Key(), len);
871  t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
872  if (size == 8)
873  PutWord(false, BIG_ENDIAN_ORDER, mac, t);
874  else
875  {
877  memcpy(mac, &t, size);
878  }
879  }
880 }
881 
882 NAMESPACE_END
VMAC_Base
VMAC message authentication code base class.
Definition: vmac.h:24
NotImplemented
A method was called which was not implemented.
Definition: cryptlib.h:232
BlockTransformation::ProcessBlock
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:879
IsAlignedOn
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1209
BIG_ENDIAN_ORDER
@ BIG_ENDIAN_ORDER
byte order is big-endian
Definition: cryptlib.h:147
secblock.h
Classes and functions for secure memory allocations.
BlockTransformation::AdvancedProcessBlocks
virtual size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
Encrypt and xor multiple blocks using additional flags.
Definition: cryptlib.cpp:141
CRYPTOPP_ASSERT
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:69
BlockTransformation::BT_InBlockIsCounter
@ BT_InBlockIsCounter
inBlock is a counter
Definition: cryptlib.h:917
BlockTransformation::BlockSize
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
pch.h
Precompiled header file.
SecBlock::BytePtr
byte * BytePtr()
Provides a byte pointer to the first element in the memory block.
Definition: secblock.h:836
RandomNumberGenerator
Interface for random number generators.
Definition: cryptlib.h:1413
argnames.h
Standard names for retrieving values by name when working with NameValuePairs.
VMAC_Base::TruncatedFinal
void TruncatedFinal(byte *mac, size_t size)
Computes the hash of the current message.
Definition: vmac.cpp:827
SecBlock::CleanNew
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:1013
VMAC_Base::IVSize
unsigned int IVSize() const
Returns length of the IV accepted by this object.
Definition: vmac.h:29
BlockTransformation::OptimalDataAlignment
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: cryptlib.cpp:190
cpu.h
Functions for CPU features and intrinsics.
VMAC_Base::Resynchronize
void Resynchronize(const byte *nonce, int length=-1)
Resynchronize with an IV.
Definition: vmac.cpp:128
VMAC_Base::OptimalDataAlignment
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: vmac.cpp:169
ConditionalByteReverse
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2170
VerifyBufsEqual
CRYPTOPP_DLL bool CRYPTOPP_API VerifyBufsEqual(const byte *buf1, const byte *buf2, size_t count)
Performs a near constant-time comparison of two equally sized buffers.
Definition: misc.cpp:114
SimpleKeyingInterface::GetNextIV
virtual void GetNextIV(RandomNumberGenerator &rng, byte *iv)
Retrieves a secure IV for the next message.
Definition: cryptlib.cpp:136
InvalidArgument
An invalid argument was detected.
Definition: cryptlib.h:202
vmac.h
Classes for the VMAC message authentication code.
CryptoPP
Crypto++ library namespace.
VMAC_Base::GetNextIV
void GetNextIV(RandomNumberGenerator &rng, byte *IV)
Retrieves a secure IV for the next message.
Definition: vmac.cpp:122
config.h
Library configuration file.
SimpleKeyingInterface::SetKey
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition: cryptlib.cpp:58
BlockCipher
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1278
ModPowerOf2
T2 ModPowerOf2(const T1 &a, const T2 &b)
Reduces a value to a power of 2.
Definition: misc.h:1102
SecBlock
Secure memory block with allocator and cleanup.
Definition: secblock.h:707
AlignedSecByteBlock
SecBlock using AllocatorWithCleanup<byte, true> typedef.
Definition: secblock.h:1095
PutWord
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
Definition: misc.h:2485
NameValuePairs
Interface for retrieving values given their names.
Definition: cryptlib.h:321
IteratedHashBase< word64, MessageAuthenticationCode >::Restart
void Restart()
Restart the hash.
Definition: iterhash.cpp:159
HasSSE2
bool HasSSE2()
Determine SSE2 availability.
Definition: cpu.h:118