Crypto++  5.6.4
Free C++ class library of cryptographic schemes
vmac.cpp
1 // vmac.cpp - written and placed in the public domain by Wei Dai
2 // based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #include "vmac.h"
8 #include "cpu.h"
9 #include "argnames.h"
10 #include "secblock.h"
11 
12 #if defined(CRYPTOPP_DISABLE_VMAC_ASM)
13 # undef CRYPTOPP_X86_ASM_AVAILABLE
14 # undef CRYPTOPP_X32_ASM_AVAILABLE
15 # undef CRYPTOPP_X64_ASM_AVAILABLE
16 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
17 #endif
18 
19 #if CRYPTOPP_MSC_VERSION
20 # pragma warning(disable: 4731)
21 #endif
22 
23 NAMESPACE_BEGIN(CryptoPP)
24 
25 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
26 #include <intrin.h>
27 #endif
28 
29 #if defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE)
30 # define VMAC_BOOL_WORD128 1
31 #else
32 # define VMAC_BOOL_WORD128 0
33 #endif
34 
35 #ifdef __BORLANDC__
36 #define const // Turbo C++ 2006 workaround
37 #endif
38 static const word64 p64 = W64LIT(0xfffffffffffffeff); /* 2^64 - 257 prime */
39 static const word64 m62 = W64LIT(0x3fffffffffffffff); /* 62-bit mask */
40 static const word64 m63 = W64LIT(0x7fffffffffffffff); /* 63-bit mask */
41 static const word64 m64 = W64LIT(0xffffffffffffffff); /* 64-bit mask */
42 static const word64 mpoly = W64LIT(0x1fffffff1fffffff); /* Poly key mask */
43 #ifdef __BORLANDC__
44 #undef const
45 #endif
46 #if VMAC_BOOL_WORD128
47 #ifdef __powerpc__
48 // workaround GCC Bug 31690: ICE with const __uint128_t and C++ front-end
49 #define m126 ((word128(m62)<<64)|m64)
50 #else
51 static const word128 m126 = (word128(m62)<<64)|m64; /* 126-bit mask */
52 #endif
53 #endif
54 
55 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
56 {
57  int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
58  if (digestLength != 8 && digestLength != 16)
59  throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
60  m_is128 = digestLength == 16;
61 
62  m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
63  if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
64  throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
65 
66  AllocateBlocks();
67 
68  BlockCipher &cipher = AccessCipher();
69  cipher.SetKey(userKey, keylength, params);
70  const unsigned int blockSize = cipher.BlockSize();
71  const unsigned int blockSizeInWords = blockSize / sizeof(word64);
72  SecBlock<word64> out(blockSizeInWords);
73  SecByteBlock in;
74  in.CleanNew(blockSize);
75  size_t i;
76 
77  /* Fill nh key */
78  in[0] = 0x80;
79  cipher.AdvancedProcessBlocks(in, NULL, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
80  ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
81 
82  /* Fill poly key */
83  in[0] = 0xC0;
84  in[15] = 0;
85  for (i = 0; i <= (size_t)m_is128; i++)
86  {
87  cipher.ProcessBlock(in, out.BytePtr());
88  m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
89  m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
90  in[15]++;
91  }
92 
93  /* Fill ip key */
94  in[0] = 0xE0;
95  in[15] = 0;
96  word64 *l3Key = m_l3Key();
97  CRYPTOPP_ASSERT(IsAlignedOn(l3Key,GetAlignmentOf<word64>()));
98 
99  for (i = 0; i <= (size_t)m_is128; i++)
100  do
101  {
102  cipher.ProcessBlock(in, out.BytePtr());
103  l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
104  l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
105  in[15]++;
106  } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
107 
108  m_padCached = false;
109  size_t nonceLength;
110  const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
111  Resynchronize(nonce, (int)nonceLength);
112 }
113 
115 {
117  IV[0] &= 0x7f;
118 }
119 
120 void VMAC_Base::Resynchronize(const byte *nonce, int len)
121 {
122  size_t length = ThrowIfInvalidIVLength(len);
123  size_t s = IVSize();
124  byte *storedNonce = m_nonce();
125 
126  if (m_is128)
127  {
128  memset(storedNonce, 0, s-length);
129  memcpy(storedNonce+s-length, nonce, length);
130  AccessCipher().ProcessBlock(storedNonce, m_pad());
131  }
132  else
133  {
134  if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
135  {
136  m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
137  for (size_t i=0; m_padCached && i<s-length; i++)
138  m_padCached = (storedNonce[i] == 0);
139  }
140  if (!m_padCached)
141  {
142  memset(storedNonce, 0, s-length);
143  memcpy(storedNonce+s-length, nonce, length-1);
144  storedNonce[s-1] = nonce[length-1] & 0xfe;
145  AccessCipher().ProcessBlock(storedNonce, m_pad());
146  m_padCached = true;
147  }
148  storedNonce[s-1] = nonce[length-1];
149  }
150  m_isFirstBlock = true;
151  Restart();
152 }
153 
154 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
155 {
156  CRYPTOPP_UNUSED(data);
157  CRYPTOPP_ASSERT(false);
158  throw NotImplemented("VMAC: HashEndianCorrectedBlock is not implemented");
159 }
160 
162 {
163  return
164 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
165  HasSSE2() ? 16 :
166 #endif
167  GetCipher().OptimalDataAlignment();
168 }
169 
170 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
171 #if CRYPTOPP_MSC_VERSION
172 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
173 #endif
174 void
175 #ifdef __GNUC__
176 __attribute__ ((noinline)) // Intel Compiler 9.1 workaround
177 #endif
178 VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
179 {
180  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
181  CRYPTOPP_ASSERT(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
182 
183  const word64 *nhK = m_nhKey();
184  word64 *polyS = (word64*)(void*)m_polyState();
185  word32 L1KeyLength = m_L1KeyLength;
186 
187  // These are used in the ASM, but some analysis engines cnnot determine it.
188  CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart); CRYPTOPP_UNUSED(L1KeyLength);
189  CRYPTOPP_UNUSED(blocksRemainingInWord64);
190 
191 #ifdef __GNUC__
192  word32 temp;
193  __asm__ __volatile__
194  (
195  AS2( mov %%ebx, %0)
196  AS2( mov %1, %%ebx)
197  INTEL_NOPREFIX
198 #else
199  #if _MSC_VER < 1300 || defined(__INTEL_COMPILER)
200  char isFirstBlock = m_isFirstBlock;
201  AS2( mov ebx, [L1KeyLength])
202  AS2( mov dl, [isFirstBlock])
203  #else
204  AS2( mov ecx, this)
205  AS2( mov ebx, [ecx+m_L1KeyLength])
206  AS2( mov dl, [ecx+m_isFirstBlock])
207  #endif
208  AS2( mov eax, tagPart)
209  AS2( shl eax, 4)
210  AS2( mov edi, nhK)
211  AS2( add edi, eax)
212  AS2( add eax, eax)
213  AS2( add eax, polyS)
214 
215  AS2( mov esi, data)
216  AS2( mov ecx, blocksRemainingInWord64)
217 #endif
218 
219  AS2( shr ebx, 3)
220 #if CRYPTOPP_BOOL_X32
221  AS_PUSH_IF86( bp)
222  AS2( sub esp, 24)
223 #else
224  AS_PUSH_IF86( bp)
225  AS2( sub esp, 12)
226 #endif
227  ASL(4)
228  AS2( mov ebp, ebx)
229  AS2( cmp ecx, ebx)
230  AS2( cmovl ebp, ecx)
231  AS2( sub ecx, ebp)
232  AS2( lea ebp, [edi+8*ebp]) // end of nhK
233  AS2( movq mm6, [esi])
234  AS2( paddq mm6, [edi])
235  AS2( movq mm5, [esi+8])
236  AS2( paddq mm5, [edi+8])
237  AS2( add esi, 16)
238  AS2( add edi, 16)
239  AS2( movq mm4, mm6)
240  ASS( pshufw mm2, mm6, 1, 0, 3, 2)
241  AS2( pmuludq mm6, mm5)
242  ASS( pshufw mm3, mm5, 1, 0, 3, 2)
243  AS2( pmuludq mm5, mm2)
244  AS2( pmuludq mm2, mm3)
245  AS2( pmuludq mm3, mm4)
246  AS2( pxor mm7, mm7)
247  AS2( movd [esp], mm6)
248  AS2( psrlq mm6, 32)
249 #if CRYPTOPP_BOOL_X32
250  AS2( movd [esp+8], mm5)
251 #else
252  AS2( movd [esp+4], mm5)
253 #endif
254  AS2( psrlq mm5, 32)
255  AS2( cmp edi, ebp)
256  ASJ( je, 1, f)
257  ASL(0)
258  AS2( movq mm0, [esi])
259  AS2( paddq mm0, [edi])
260  AS2( movq mm1, [esi+8])
261  AS2( paddq mm1, [edi+8])
262  AS2( add esi, 16)
263  AS2( add edi, 16)
264  AS2( movq mm4, mm0)
265  AS2( paddq mm5, mm2)
266  ASS( pshufw mm2, mm0, 1, 0, 3, 2)
267  AS2( pmuludq mm0, mm1)
268 #if CRYPTOPP_BOOL_X32
269  AS2( movd [esp+16], mm3)
270 #else
271  AS2( movd [esp+8], mm3)
272 #endif
273  AS2( psrlq mm3, 32)
274  AS2( paddq mm5, mm3)
275  ASS( pshufw mm3, mm1, 1, 0, 3, 2)
276  AS2( pmuludq mm1, mm2)
277  AS2( pmuludq mm2, mm3)
278  AS2( pmuludq mm3, mm4)
279  AS2( movd mm4, [esp])
280  AS2( paddq mm7, mm4)
281 #if CRYPTOPP_BOOL_X32
282  AS2( movd mm4, [esp+8])
283  AS2( paddq mm6, mm4)
284  AS2( movd mm4, [esp+16])
285 #else
286  AS2( movd mm4, [esp+4])
287  AS2( paddq mm6, mm4)
288  AS2( movd mm4, [esp+8])
289 #endif
290  AS2( paddq mm6, mm4)
291  AS2( movd [esp], mm0)
292  AS2( psrlq mm0, 32)
293  AS2( paddq mm6, mm0)
294 #if CRYPTOPP_BOOL_X32
295  AS2( movd [esp+8], mm1)
296 #else
297  AS2( movd [esp+4], mm1)
298 #endif
299  AS2( psrlq mm1, 32)
300  AS2( paddq mm5, mm1)
301  AS2( cmp edi, ebp)
302  ASJ( jne, 0, b)
303  ASL(1)
304  AS2( paddq mm5, mm2)
305 #if CRYPTOPP_BOOL_X32
306  AS2( movd [esp+16], mm3)
307 #else
308  AS2( movd [esp+8], mm3)
309 #endif
310  AS2( psrlq mm3, 32)
311  AS2( paddq mm5, mm3)
312  AS2( movd mm4, [esp])
313  AS2( paddq mm7, mm4)
314 #if CRYPTOPP_BOOL_X32
315  AS2( movd mm4, [esp+8])
316  AS2( paddq mm6, mm4)
317  AS2( movd mm4, [esp+16])
318 #else
319  AS2( movd mm4, [esp+4])
320  AS2( paddq mm6, mm4)
321  AS2( movd mm4, [esp+8])
322 #endif
323  AS2( paddq mm6, mm4)
324  AS2( lea ebp, [8*ebx])
325  AS2( sub edi, ebp) // reset edi to start of nhK
326 
327  AS2( movd [esp], mm7)
328  AS2( psrlq mm7, 32)
329  AS2( paddq mm6, mm7)
330 #if CRYPTOPP_BOOL_X32
331  AS2( movd [esp+8], mm6)
332 #else
333  AS2( movd [esp+4], mm6)
334 #endif
335  AS2( psrlq mm6, 32)
336  AS2( paddq mm5, mm6)
337  AS2( psllq mm5, 2)
338  AS2( psrlq mm5, 2)
339 
340 #define a0 [eax+2*4]
341 #define a1 [eax+3*4]
342 #define a2 [eax+0*4]
343 #define a3 [eax+1*4]
344 #define k0 [eax+2*8+2*4]
345 #define k1 [eax+2*8+3*4]
346 #define k2 [eax+2*8+0*4]
347 #define k3 [eax+2*8+1*4]
348  AS2( test dl, dl)
349  ASJ( jz, 2, f)
350  AS2( movd mm1, k0)
351  AS2( movd mm0, [esp])
352  AS2( paddq mm0, mm1)
353  AS2( movd a0, mm0)
354  AS2( psrlq mm0, 32)
355  AS2( movd mm1, k1)
356 #if CRYPTOPP_BOOL_X32
357  AS2( movd mm2, [esp+8])
358 #else
359  AS2( movd mm2, [esp+4])
360 #endif
361  AS2( paddq mm1, mm2)
362  AS2( paddq mm0, mm1)
363  AS2( movd a1, mm0)
364  AS2( psrlq mm0, 32)
365  AS2( paddq mm5, k2)
366  AS2( paddq mm0, mm5)
367  AS2( movq a2, mm0)
368  AS2( xor edx, edx)
369  ASJ( jmp, 3, f)
370  ASL(2)
371  AS2( movd mm0, a3)
372  AS2( movq mm4, mm0)
373  AS2( pmuludq mm0, k3) // a3*k3
374  AS2( movd mm1, a0)
375  AS2( pmuludq mm1, k2) // a0*k2
376  AS2( movd mm2, a1)
377  AS2( movd mm6, k1)
378  AS2( pmuludq mm2, mm6) // a1*k1
379  AS2( movd mm3, a2)
380  AS2( psllq mm0, 1)
381  AS2( paddq mm0, mm5)
382  AS2( movq mm5, mm3)
383  AS2( movd mm7, k0)
384  AS2( pmuludq mm3, mm7) // a2*k0
385  AS2( pmuludq mm4, mm7) // a3*k0
386  AS2( pmuludq mm5, mm6) // a2*k1
387  AS2( paddq mm0, mm1)
388  AS2( movd mm1, a1)
389  AS2( paddq mm4, mm5)
390  AS2( movq mm5, mm1)
391  AS2( pmuludq mm1, k2) // a1*k2
392  AS2( paddq mm0, mm2)
393  AS2( movd mm2, a0)
394  AS2( paddq mm0, mm3)
395  AS2( movq mm3, mm2)
396  AS2( pmuludq mm2, k3) // a0*k3
397  AS2( pmuludq mm3, mm7) // a0*k0
398 #if CRYPTOPP_BOOL_X32
399  AS2( movd [esp+16], mm0)
400 #else
401  AS2( movd [esp+8], mm0)
402 #endif
403  AS2( psrlq mm0, 32)
404  AS2( pmuludq mm7, mm5) // a1*k0
405  AS2( pmuludq mm5, k3) // a1*k3
406  AS2( paddq mm0, mm1)
407  AS2( movd mm1, a2)
408  AS2( pmuludq mm1, k2) // a2*k2
409  AS2( paddq mm0, mm2)
410  AS2( paddq mm0, mm4)
411  AS2( movq mm4, mm0)
412  AS2( movd mm2, a3)
413  AS2( pmuludq mm2, mm6) // a3*k1
414  AS2( pmuludq mm6, a0) // a0*k1
415  AS2( psrlq mm0, 31)
416  AS2( paddq mm0, mm3)
417  AS2( movd mm3, [esp])
418  AS2( paddq mm0, mm3)
419  AS2( movd mm3, a2)
420  AS2( pmuludq mm3, k3) // a2*k3
421  AS2( paddq mm5, mm1)
422  AS2( movd mm1, a3)
423  AS2( pmuludq mm1, k2) // a3*k2
424  AS2( paddq mm5, mm2)
425 #if CRYPTOPP_BOOL_X32
426  AS2( movd mm2, [esp+8])
427 #else
428  AS2( movd mm2, [esp+4])
429 #endif
430  AS2( psllq mm5, 1)
431  AS2( paddq mm0, mm5)
432  AS2( psllq mm4, 33)
433  AS2( movd a0, mm0)
434  AS2( psrlq mm0, 32)
435  AS2( paddq mm6, mm7)
436 #if CRYPTOPP_BOOL_X32
437  AS2( movd mm7, [esp+16])
438 #else
439  AS2( movd mm7, [esp+8])
440 #endif
441  AS2( paddq mm0, mm6)
442  AS2( paddq mm0, mm2)
443  AS2( paddq mm3, mm1)
444  AS2( psllq mm3, 1)
445  AS2( paddq mm0, mm3)
446  AS2( psrlq mm4, 1)
447  AS2( movd a1, mm0)
448  AS2( psrlq mm0, 32)
449  AS2( por mm4, mm7)
450  AS2( paddq mm0, mm4)
451  AS2( movq a2, mm0)
452 #undef a0
453 #undef a1
454 #undef a2
455 #undef a3
456 #undef k0
457 #undef k1
458 #undef k2
459 #undef k3
460 
461  ASL(3)
462  AS2( test ecx, ecx)
463  ASJ( jnz, 4, b)
464 #if CRYPTOPP_BOOL_X32
465  AS2( add esp, 24)
466 #else
467  AS2( add esp, 12)
468 #endif
469  AS_POP_IF86( bp)
470  AS1( emms)
471 #ifdef __GNUC__
472  ATT_PREFIX
473  AS2( mov %0, %%ebx)
474  : "=m" (temp)
475  : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data), "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
476  : "memory", "cc"
477  );
478 #endif
479 }
480 #endif
481 
482 #if VMAC_BOOL_WORD128
483  #define DeclareNH(a) word128 a=0
484  #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
485  #define AccumulateNH(a, b, c) a += word128(b)*(c)
486  #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
487 #else
488  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
489  #define MUL32(a, b) __emulu(word32(a), word32(b))
490  #else
491  #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
492  #endif
493  #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
494  #define DeclareNH(a) word64 a##0=0, a##1=0
495  #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
496  #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
497  #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
498  #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
499  #define DeclareNH(a) word64 a##0=0, a##1=0
500  #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
501  #define AccumulateNH(a, b, c) {\
502  word64 ph, pl;\
503  pl = _umul128(b,c,&ph);\
504  a##0 += pl;\
505  a##1 += ph + (a##0 < pl);}
506  #else
507  #define VMAC_BOOL_32BIT 1
508  #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
509  #define MUL64(rh,rl,i1,i2) \
510  { word64 _i1 = (i1), _i2 = (i2); \
511  word64 m1= MUL32(_i1,_i2>>32); \
512  word64 m2= MUL32(_i1>>32,_i2); \
513  rh = MUL32(_i1>>32,_i2>>32); \
514  rl = MUL32(_i1,_i2); \
515  ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
516  ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
517  }
518  #define AccumulateNH(a, b, c) {\
519  word64 p = MUL32(b, c);\
520  a##1 += word32((p)>>32);\
521  a##0 += word32(p);\
522  p = MUL32((b)>>32, c);\
523  a##2 += word32((p)>>32);\
524  a##1 += word32(p);\
525  p = MUL32((b)>>32, (c)>>32);\
526  a##2 += p;\
527  p = MUL32(b, (c)>>32);\
528  a##1 += word32(p);\
529  a##2 += word32(p>>32);}
530  #endif
531 #endif
532 #ifndef VMAC_BOOL_32BIT
533  #define VMAC_BOOL_32BIT 0
534 #endif
535 #ifndef ADD128
536  #define ADD128(rh,rl,ih,il) \
537  { word64 _il = (il); \
538  (rl) += (_il); \
539  (rh) += (ih) + ((rl) < (_il)); \
540  }
541 #endif
542 
543 #if !(defined(_MSC_VER) && _MSC_VER < 1300)
544 template <bool T_128BitTag>
545 #endif
546 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
547 {
548  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
549  CRYPTOPP_ASSERT(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
550 
551  #define INNER_LOOP_ITERATION(j) {\
552  word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
553  word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
554  AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
555  if (T_128BitTag)\
556  AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
557  }
558 
559 #if (defined(_MSC_VER) && _MSC_VER < 1300)
560  bool T_128BitTag = m_is128;
561 #endif
562  size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
563  size_t innerLoopEnd = L1KeyLengthInWord64;
564  const word64 *nhK = m_nhKey();
565  word64 *polyS = (word64*)(void*)m_polyState();
566  bool isFirstBlock = true;
567  size_t i;
568 
569  #if !VMAC_BOOL_32BIT
570  #if VMAC_BOOL_WORD128
571  word128 a1=0, a2=0;
572  #else
573  word64 ah1=0, al1=0, ah2=0, al2=0;
574  #endif
575  word64 kh1, kl1, kh2, kl2;
576  kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
577  if (T_128BitTag)
578  {
579  kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
580  }
581  #endif
582 
583  do
584  {
585  DeclareNH(nhA);
586  DeclareNH(nhB);
587 
588  i = 0;
589  if (blocksRemainingInWord64 < L1KeyLengthInWord64)
590  {
591  if (blocksRemainingInWord64 % 8)
592  {
593  innerLoopEnd = blocksRemainingInWord64 % 8;
594  for (; i<innerLoopEnd; i+=2)
595  INNER_LOOP_ITERATION(0);
596  }
597  innerLoopEnd = blocksRemainingInWord64;
598  }
599  for (; i<innerLoopEnd; i+=8)
600  {
601  INNER_LOOP_ITERATION(0);
602  INNER_LOOP_ITERATION(1);
603  INNER_LOOP_ITERATION(2);
604  INNER_LOOP_ITERATION(3);
605  }
606  blocksRemainingInWord64 -= innerLoopEnd;
607  data += innerLoopEnd;
608 
609  #if VMAC_BOOL_32BIT
610  word32 nh0[2], nh1[2];
611  word64 nh2[2];
612 
613  nh0[0] = word32(nhA0);
614  nhA1 += (nhA0 >> 32);
615  nh1[0] = word32(nhA1);
616  nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
617 
618  if (T_128BitTag)
619  {
620  nh0[1] = word32(nhB0);
621  nhB1 += (nhB0 >> 32);
622  nh1[1] = word32(nhB1);
623  nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
624  }
625 
626  #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
627  #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
628  #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
629  #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
630  #define aHi ((polyS+i*4)[0])
631  #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
632  #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
633  #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
634  #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
635  #define kHi ((polyS+i*4+2)[0])
636 
637  if (isFirstBlock)
638  {
639  isFirstBlock = false;
640  if (m_isFirstBlock)
641  {
642  m_isFirstBlock = false;
643  for (i=0; i<=(size_t)T_128BitTag; i++)
644  {
645  word64 t = (word64)nh0[i] + k0;
646  a0 = (word32)t;
647  t = (t >> 32) + nh1[i] + k1;
648  a1 = (word32)t;
649  aHi = (t >> 32) + nh2[i] + kHi;
650  }
651  continue;
652  }
653  }
654  for (i=0; i<=(size_t)T_128BitTag; i++)
655  {
656  word64 p, t;
657  word32 t2;
658 
659  p = MUL32(a3, 2*k3);
660  p += nh2[i];
661  p += MUL32(a0, k2);
662  p += MUL32(a1, k1);
663  p += MUL32(a2, k0);
664  t2 = (word32)p;
665  p >>= 32;
666  p += MUL32(a0, k3);
667  p += MUL32(a1, k2);
668  p += MUL32(a2, k1);
669  p += MUL32(a3, k0);
670  t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
671  p >>= 31;
672  p += nh0[i];
673  p += MUL32(a0, k0);
674  p += MUL32(a1, 2*k3);
675  p += MUL32(a2, 2*k2);
676  p += MUL32(a3, 2*k1);
677  t2 = (word32)p;
678  p >>= 32;
679  p += nh1[i];
680  p += MUL32(a0, k1);
681  p += MUL32(a1, k0);
682  p += MUL32(a2, 2*k3);
683  p += MUL32(a3, 2*k2);
684  a0 = t2;
685  a1 = (word32)p;
686  aHi = (p >> 32) + t;
687  }
688 
689  #undef a0
690  #undef a1
691  #undef a2
692  #undef a3
693  #undef aHi
694  #undef k0
695  #undef k1
696  #undef k2
697  #undef k3
698  #undef kHi
699  #else // #if VMAC_BOOL_32BIT
700  if (isFirstBlock)
701  {
702  isFirstBlock = false;
703  if (m_isFirstBlock)
704  {
705  m_isFirstBlock = false;
706  #if VMAC_BOOL_WORD128
707  #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
708 
709  first_poly_step(a1, kh1, kl1, nhA);
710  if (T_128BitTag)
711  first_poly_step(a2, kh2, kl2, nhB);
712  #else
713  #define first_poly_step(ah, al, kh, kl, mh, ml) {\
714  mh &= m62;\
715  ADD128(mh, ml, kh, kl); \
716  ah = mh; al = ml;}
717 
718  first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
719  if (T_128BitTag)
720  first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
721  #endif
722  continue;
723  }
724  else
725  {
726  #if VMAC_BOOL_WORD128
727  a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
728  #else
729  ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
730  #endif
731  if (T_128BitTag)
732  {
733  #if VMAC_BOOL_WORD128
734  a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
735  #else
736  ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
737  #endif
738  }
739  }
740  }
741 
742  #if VMAC_BOOL_WORD128
743  #define poly_step(a, kh, kl, m) \
744  { word128 t1, t2, t3, t4;\
745  Multiply128(t2, a>>64, kl);\
746  Multiply128(t3, a, kh);\
747  Multiply128(t1, a, kl);\
748  Multiply128(t4, a>>64, 2*kh);\
749  t2 += t3;\
750  t4 += t1;\
751  t2 += t4>>64;\
752  a = (word128(word64(t2)&m63) << 64) | word64(t4);\
753  t2 *= 2;\
754  a += m & m126;\
755  a += t2>>64;}
756 
757  poly_step(a1, kh1, kl1, nhA);
758  if (T_128BitTag)
759  poly_step(a2, kh2, kl2, nhB);
760  #else
761  #define poly_step(ah, al, kh, kl, mh, ml) \
762  { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
763  /* compute ab*cd, put bd into result registers */ \
764  MUL64(t2h,t2l,ah,kl); \
765  MUL64(t3h,t3l,al,kh); \
766  MUL64(t1h,t1l,ah,2*kh); \
767  MUL64(ah,al,al,kl); \
768  /* add together ad + bc */ \
769  ADD128(t2h,t2l,t3h,t3l); \
770  /* add 2 * ac to result */ \
771  ADD128(ah,al,t1h,t1l); \
772  /* now (ah,al), (t2l,2*t2h) need summing */ \
773  /* first add the high registers, carrying into t2h */ \
774  ADD128(t2h,ah,z,t2l); \
775  /* double t2h and add top bit of ah */ \
776  t2h += t2h + (ah >> 63); \
777  ah &= m63; \
778  /* now add the low registers */ \
779  mh &= m62; \
780  ADD128(ah,al,mh,ml); \
781  ADD128(ah,al,z,t2h); \
782  }
783 
784  poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
785  if (T_128BitTag)
786  poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
787  #endif
788  #endif // #if VMAC_BOOL_32BIT
789  } while (blocksRemainingInWord64);
790 
791  #if VMAC_BOOL_WORD128
792  (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
793  if (T_128BitTag)
794  {
795  (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
796  }
797  #elif !VMAC_BOOL_32BIT
798  (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
799  if (T_128BitTag)
800  {
801  (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
802  }
803  #endif
804 }
805 
806 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
807 {
808 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
809  if (HasSSE2())
810  {
811  VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
812  if (m_is128)
813  VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
814  m_isFirstBlock = false;
815  }
816  else
817 #endif
818  {
819 #if defined(_MSC_VER) && _MSC_VER < 1300
820  VHASH_Update_Template(data, blocksRemainingInWord64);
821 #else
822  if (m_is128)
823  VHASH_Update_Template<true>(data, blocksRemainingInWord64);
824  else
825  VHASH_Update_Template<false>(data, blocksRemainingInWord64);
826 #endif
827  }
828 }
829 
830 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
831 {
832  size_t remaining = ModPowerOf2(length, m_L1KeyLength);
833  VHASH_Update(data, (length-remaining)/8);
834  return remaining;
835 }
836 
837 static word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
838 {
839  word64 rh, rl, t, z=0;
840  word64 p1 = input[0], p2 = input[1];
841  word64 k1 = l3Key[0], k2 = l3Key[1];
842 
843  /* fully reduce (p1,p2)+(len,0) mod p127 */
844  t = p1 >> 63;
845  p1 &= m63;
846  ADD128(p1, p2, len, t);
847  /* At this point, (p1,p2) is at most 2^127+(len<<64) */
848  t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
849  ADD128(p1, p2, z, t);
850  p1 &= m63;
851 
852  /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
853  t = p1 + (p2 >> 32);
854  t += (t >> 32);
855  t += (word32)t > 0xfffffffeU;
856  p1 += (t >> 32);
857  p2 += (p1 << 32);
858 
859  /* compute (p1+k1)%p64 and (p2+k2)%p64 */
860  p1 += k1;
861  p1 += (0 - (p1 < k1)) & 257;
862  p2 += k2;
863  p2 += (0 - (p2 < k2)) & 257;
864 
865  /* compute (p1+k1)*(p2+k2)%p64 */
866  MUL64(rh, rl, p1, p2);
867  t = rh >> 56;
868  ADD128(t, rl, z, rh);
869  rh <<= 8;
870  ADD128(t, rl, z, rh);
871  t += t << 8;
872  rl += t;
873  rl += (0 - (rl < t)) & 257;
874  rl += (0 - (rl > p64-1)) & 257;
875  return rl;
876 }
877 
878 void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
879 {
880  CRYPTOPP_ASSERT(IsAlignedOn(DataBuf(),GetAlignmentOf<word64>()));
881  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
882  size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
883 
884  if (len)
885  {
886  memset(m_data()+len, 0, (0-len)%16);
887  VHASH_Update(DataBuf(), ((len+15)/16)*2);
888  len *= 8; // convert to bits
889  }
890  else if (m_isFirstBlock)
891  {
892  // special case for empty string
893  m_polyState()[0] = m_polyState()[2];
894  m_polyState()[1] = m_polyState()[3];
895  if (m_is128)
896  {
897  m_polyState()[4] = m_polyState()[6];
898  m_polyState()[5] = m_polyState()[7];
899  }
900  }
901 
902  if (m_is128)
903  {
904  word64 t[2];
905  t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
906  t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
907  if (size == 16)
908  {
909  PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
910  PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
911  }
912  else
913  {
914  t[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[0]);
915  t[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[1]);
916  memcpy(mac, t, size);
917  }
918  }
919  else
920  {
921  word64 t = L3Hash(m_polyState(), m_l3Key(), len);
922  t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
923  if (size == 8)
924  PutWord(false, BIG_ENDIAN_ORDER, mac, t);
925  else
926  {
927  t = ConditionalByteReverse(BIG_ENDIAN_ORDER, t);
928  memcpy(mac, &t, size);
929  }
930  }
931 }
932 
933 NAMESPACE_END
Standard names for retrieving values by name when working with NameValuePairs.
const char * DigestSize()
int, in bytes
Definition: argnames.h:78
An invalid argument was detected.
Definition: cryptlib.h:187
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition: cryptlib.cpp:97
T2 ModPowerOf2(const T1 &a, const T2 &b)
Reduces a value to a power of 2.
Definition: misc.h:837
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:660
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
Library configuration file.
Interface for random number generators.
Definition: cryptlib.h:1193
virtual size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
Encrypt and xor multiple blocks using additional flags.
Definition: cryptlib.cpp:178
SecBlock typedef.
Definition: secblock.h:731
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1098
Classes and functions for secure memory allocations.
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:916
Classes for the VMAC message authentication code.
int GetIntValueWithDefault(const char *name, int defaultValue) const
Get a named value with type int, with default.
Definition: cryptlib.h:385
A method was called which was not implemented.
Definition: cryptlib.h:208
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:757
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianess.
Definition: misc.h:1858
void UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
Sets the key for this object without performing parameter validation.
Definition: vmac.cpp:55
byte order is big-endian
Definition: cryptlib.h:132
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:62
Functions for CPU features and intrinsics.
void TruncatedFinal(byte *mac, size_t size)
Computes the hash of the current message.
Definition: vmac.cpp:878
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
Definition: argnames.h:21
unsigned int IVSize() const
Returns length of the IV accepted by this object.
Definition: vmac.h:27
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: cryptlib.cpp:229
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:160
bool VerifyBufsEqual(const byte *buf1, const byte *buf2, size_t count)
Performs a near constant-time comparison of two equally sized buffers.
Definition: misc.cpp:96
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: vmac.cpp:161
const char * L1KeyLength()
int, in bytes
Definition: argnames.h:79
Crypto++ library namespace.
void Resynchronize(const byte *nonce, int length=-1)
Resynchronize with an IV.
Definition: vmac.cpp:120
virtual void GetNextIV(RandomNumberGenerator &rng, byte *iv)
Retrieves a secure IV for the next message.
Definition: cryptlib.cpp:173
void GetNextIV(RandomNumberGenerator &rng, byte *IV)
Retrieves a secure IV for the next message.
Definition: vmac.cpp:114
Interface for retrieving values given their names.
Definition: cryptlib.h:282
byte * BytePtr()
Provides a byte pointer to the first element in the memory block.
Definition: secblock.h:531