Crypto++  5.6.5
Free C++ class library of cryptographic schemes
vmac.cpp
1 // vmac.cpp - written and placed in the public domain by Wei Dai
2 // based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #include "vmac.h"
8 #include "cpu.h"
9 #include "argnames.h"
10 #include "secblock.h"
11 
12 #if defined(CRYPTOPP_DISABLE_VMAC_ASM)
13 # undef CRYPTOPP_X86_ASM_AVAILABLE
14 # undef CRYPTOPP_X32_ASM_AVAILABLE
15 # undef CRYPTOPP_X64_ASM_AVAILABLE
16 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
17 #endif
18 
19 #if CRYPTOPP_MSC_VERSION
20 # pragma warning(disable: 4731)
21 #endif
22 
23 NAMESPACE_BEGIN(CryptoPP)
24 
25 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
26 #include <intrin.h>
27 #endif
28 
29 #if defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE)
30 # define VMAC_BOOL_WORD128 1
31 #else
32 # define VMAC_BOOL_WORD128 0
33 #endif
34 
35 #ifdef __BORLANDC__
36 #define const // Turbo C++ 2006 workaround
37 #endif
38 static const word64 p64 = W64LIT(0xfffffffffffffeff); /* 2^64 - 257 prime */
39 static const word64 m62 = W64LIT(0x3fffffffffffffff); /* 62-bit mask */
40 static const word64 m63 = W64LIT(0x7fffffffffffffff); /* 63-bit mask */
41 static const word64 m64 = W64LIT(0xffffffffffffffff); /* 64-bit mask */
42 static const word64 mpoly = W64LIT(0x1fffffff1fffffff); /* Poly key mask */
43 #ifdef __BORLANDC__
44 #undef const
45 #endif
46 #if VMAC_BOOL_WORD128
47 #ifdef __powerpc__
48 // workaround GCC Bug 31690: ICE with const __uint128_t and C++ front-end
49 #define m126 ((word128(m62)<<64)|m64)
50 #else
51 static const word128 m126 = (word128(m62)<<64)|m64; /* 126-bit mask */
52 #endif
53 #endif
54 
55 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
56 {
57  int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
58  if (digestLength != 8 && digestLength != 16)
59  throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
60  m_is128 = digestLength == 16;
61 
62  m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
63  if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
64  throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
65 
66  AllocateBlocks();
67 
68  BlockCipher &cipher = AccessCipher();
69  cipher.SetKey(userKey, keylength, params);
70  const unsigned int blockSize = cipher.BlockSize();
71  const unsigned int blockSizeInWords = blockSize / sizeof(word64);
72  SecBlock<word64> out(blockSizeInWords);
73  SecByteBlock in;
74  in.CleanNew(blockSize);
75  size_t i;
76 
77  /* Fill nh key */
78  in[0] = 0x80;
79  cipher.AdvancedProcessBlocks(in, NULL, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
80  ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
81 
82  /* Fill poly key */
83  in[0] = 0xC0;
84  in[15] = 0;
85  for (i = 0; i <= (size_t)m_is128; i++)
86  {
87  cipher.ProcessBlock(in, out.BytePtr());
88  m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
89  m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
90  in[15]++;
91  }
92 
93  /* Fill ip key */
94  in[0] = 0xE0;
95  in[15] = 0;
96  word64 *l3Key = m_l3Key();
97  CRYPTOPP_ASSERT(IsAlignedOn(l3Key,GetAlignmentOf<word64>()));
98 
99  for (i = 0; i <= (size_t)m_is128; i++)
100  do
101  {
102  cipher.ProcessBlock(in, out.BytePtr());
103  l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
104  l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
105  in[15]++;
106  } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
107 
108  m_padCached = false;
109  size_t nonceLength;
110  const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
111  Resynchronize(nonce, (int)nonceLength);
112 }
113 
115 {
117  IV[0] &= 0x7f;
118 }
119 
120 void VMAC_Base::Resynchronize(const byte *nonce, int len)
121 {
122  size_t length = ThrowIfInvalidIVLength(len);
123  size_t s = IVSize();
124  byte *storedNonce = m_nonce();
125 
126  if (m_is128)
127  {
128  memset(storedNonce, 0, s-length);
129  memcpy(storedNonce+s-length, nonce, length);
130  AccessCipher().ProcessBlock(storedNonce, m_pad());
131  }
132  else
133  {
134  if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
135  {
136  m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
137  for (size_t i=0; m_padCached && i<s-length; i++)
138  m_padCached = (storedNonce[i] == 0);
139  }
140  if (!m_padCached)
141  {
142  memset(storedNonce, 0, s-length);
143  memcpy(storedNonce+s-length, nonce, length-1);
144  storedNonce[s-1] = nonce[length-1] & 0xfe;
145  AccessCipher().ProcessBlock(storedNonce, m_pad());
146  m_padCached = true;
147  }
148  storedNonce[s-1] = nonce[length-1];
149  }
150  m_isFirstBlock = true;
151  Restart();
152 }
153 
154 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
155 {
156  CRYPTOPP_UNUSED(data);
157  CRYPTOPP_ASSERT(false);
158  throw NotImplemented("VMAC: HashEndianCorrectedBlock is not implemented");
159 }
160 
162 {
163  return
164 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
165  HasSSE2() ? 16 :
166 #endif
167  GetCipher().OptimalDataAlignment();
168 }
169 
170 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
171 #if CRYPTOPP_MSC_VERSION
172 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
173 #endif
174 void
175 #ifdef __GNUC__
176 __attribute__ ((noinline)) // Intel Compiler 9.1 workaround
177 #endif
178 VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
179 {
180  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
181  CRYPTOPP_ASSERT(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
182 
183  const word64 *nhK = m_nhKey();
184  word64 *polyS = (word64*)(void*)m_polyState();
185  word32 L1KeyLength = m_L1KeyLength;
186 
187  // These are used in the ASM, but some analysis engines cnnot determine it.
188  CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart); CRYPTOPP_UNUSED(L1KeyLength);
189  CRYPTOPP_UNUSED(blocksRemainingInWord64);
190 
191 #ifdef __GNUC__
192  word32 temp;
193  __asm__ __volatile__
194  (
195  AS2( mov %%ebx, %0)
196  AS2( mov %1, %%ebx)
197  INTEL_NOPREFIX
198 #else
199  #if defined(__INTEL_COMPILER)
200  char isFirstBlock = m_isFirstBlock;
201  AS2( mov ebx, [L1KeyLength])
202  AS2( mov dl, [isFirstBlock])
203  #else
204  AS2( mov ecx, this)
205  AS2( mov ebx, [ecx+m_L1KeyLength])
206  AS2( mov dl, [ecx+m_isFirstBlock])
207  #endif
208  AS2( mov eax, tagPart)
209  AS2( shl eax, 4)
210  AS2( mov edi, nhK)
211  AS2( add edi, eax)
212  AS2( add eax, eax)
213  AS2( add eax, polyS)
214 
215  AS2( mov esi, data)
216  AS2( mov ecx, blocksRemainingInWord64)
217 #endif
218 
219  AS2( shr ebx, 3)
220 #if CRYPTOPP_BOOL_X32
221  AS_PUSH_IF86( bp)
222  AS2( sub esp, 24)
223 #else
224  AS_PUSH_IF86( bp)
225  AS2( sub esp, 12)
226 #endif
227  ASL(4)
228  AS2( mov ebp, ebx)
229  AS2( cmp ecx, ebx)
230  AS2( cmovl ebp, ecx)
231  AS2( sub ecx, ebp)
232  AS2( lea ebp, [edi+8*ebp]) // end of nhK
233  AS2( movq mm6, [esi])
234  AS2( paddq mm6, [edi])
235  AS2( movq mm5, [esi+8])
236  AS2( paddq mm5, [edi+8])
237  AS2( add esi, 16)
238  AS2( add edi, 16)
239  AS2( movq mm4, mm6)
240  ASS( pshufw mm2, mm6, 1, 0, 3, 2)
241  AS2( pmuludq mm6, mm5)
242  ASS( pshufw mm3, mm5, 1, 0, 3, 2)
243  AS2( pmuludq mm5, mm2)
244  AS2( pmuludq mm2, mm3)
245  AS2( pmuludq mm3, mm4)
246  AS2( pxor mm7, mm7)
247  AS2( movd [esp], mm6)
248  AS2( psrlq mm6, 32)
249 #if CRYPTOPP_BOOL_X32
250  AS2( movd [esp+8], mm5)
251 #else
252  AS2( movd [esp+4], mm5)
253 #endif
254  AS2( psrlq mm5, 32)
255  AS2( cmp edi, ebp)
256  ASJ( je, 1, f)
257  ASL(0)
258  AS2( movq mm0, [esi])
259  AS2( paddq mm0, [edi])
260  AS2( movq mm1, [esi+8])
261  AS2( paddq mm1, [edi+8])
262  AS2( add esi, 16)
263  AS2( add edi, 16)
264  AS2( movq mm4, mm0)
265  AS2( paddq mm5, mm2)
266  ASS( pshufw mm2, mm0, 1, 0, 3, 2)
267  AS2( pmuludq mm0, mm1)
268 #if CRYPTOPP_BOOL_X32
269  AS2( movd [esp+16], mm3)
270 #else
271  AS2( movd [esp+8], mm3)
272 #endif
273  AS2( psrlq mm3, 32)
274  AS2( paddq mm5, mm3)
275  ASS( pshufw mm3, mm1, 1, 0, 3, 2)
276  AS2( pmuludq mm1, mm2)
277  AS2( pmuludq mm2, mm3)
278  AS2( pmuludq mm3, mm4)
279  AS2( movd mm4, [esp])
280  AS2( paddq mm7, mm4)
281 #if CRYPTOPP_BOOL_X32
282  AS2( movd mm4, [esp+8])
283  AS2( paddq mm6, mm4)
284  AS2( movd mm4, [esp+16])
285 #else
286  AS2( movd mm4, [esp+4])
287  AS2( paddq mm6, mm4)
288  AS2( movd mm4, [esp+8])
289 #endif
290  AS2( paddq mm6, mm4)
291  AS2( movd [esp], mm0)
292  AS2( psrlq mm0, 32)
293  AS2( paddq mm6, mm0)
294 #if CRYPTOPP_BOOL_X32
295  AS2( movd [esp+8], mm1)
296 #else
297  AS2( movd [esp+4], mm1)
298 #endif
299  AS2( psrlq mm1, 32)
300  AS2( paddq mm5, mm1)
301  AS2( cmp edi, ebp)
302  ASJ( jne, 0, b)
303  ASL(1)
304  AS2( paddq mm5, mm2)
305 #if CRYPTOPP_BOOL_X32
306  AS2( movd [esp+16], mm3)
307 #else
308  AS2( movd [esp+8], mm3)
309 #endif
310  AS2( psrlq mm3, 32)
311  AS2( paddq mm5, mm3)
312  AS2( movd mm4, [esp])
313  AS2( paddq mm7, mm4)
314 #if CRYPTOPP_BOOL_X32
315  AS2( movd mm4, [esp+8])
316  AS2( paddq mm6, mm4)
317  AS2( movd mm4, [esp+16])
318 #else
319  AS2( movd mm4, [esp+4])
320  AS2( paddq mm6, mm4)
321  AS2( movd mm4, [esp+8])
322 #endif
323  AS2( paddq mm6, mm4)
324  AS2( lea ebp, [8*ebx])
325  AS2( sub edi, ebp) // reset edi to start of nhK
326 
327  AS2( movd [esp], mm7)
328  AS2( psrlq mm7, 32)
329  AS2( paddq mm6, mm7)
330 #if CRYPTOPP_BOOL_X32
331  AS2( movd [esp+8], mm6)
332 #else
333  AS2( movd [esp+4], mm6)
334 #endif
335  AS2( psrlq mm6, 32)
336  AS2( paddq mm5, mm6)
337  AS2( psllq mm5, 2)
338  AS2( psrlq mm5, 2)
339 
340 #define a0 [eax+2*4]
341 #define a1 [eax+3*4]
342 #define a2 [eax+0*4]
343 #define a3 [eax+1*4]
344 #define k0 [eax+2*8+2*4]
345 #define k1 [eax+2*8+3*4]
346 #define k2 [eax+2*8+0*4]
347 #define k3 [eax+2*8+1*4]
348  AS2( test dl, dl)
349  ASJ( jz, 2, f)
350  AS2( movd mm1, k0)
351  AS2( movd mm0, [esp])
352  AS2( paddq mm0, mm1)
353  AS2( movd a0, mm0)
354  AS2( psrlq mm0, 32)
355  AS2( movd mm1, k1)
356 #if CRYPTOPP_BOOL_X32
357  AS2( movd mm2, [esp+8])
358 #else
359  AS2( movd mm2, [esp+4])
360 #endif
361  AS2( paddq mm1, mm2)
362  AS2( paddq mm0, mm1)
363  AS2( movd a1, mm0)
364  AS2( psrlq mm0, 32)
365  AS2( paddq mm5, k2)
366  AS2( paddq mm0, mm5)
367  AS2( movq a2, mm0)
368  AS2( xor edx, edx)
369  ASJ( jmp, 3, f)
370  ASL(2)
371  AS2( movd mm0, a3)
372  AS2( movq mm4, mm0)
373  AS2( pmuludq mm0, k3) // a3*k3
374  AS2( movd mm1, a0)
375  AS2( pmuludq mm1, k2) // a0*k2
376  AS2( movd mm2, a1)
377  AS2( movd mm6, k1)
378  AS2( pmuludq mm2, mm6) // a1*k1
379  AS2( movd mm3, a2)
380  AS2( psllq mm0, 1)
381  AS2( paddq mm0, mm5)
382  AS2( movq mm5, mm3)
383  AS2( movd mm7, k0)
384  AS2( pmuludq mm3, mm7) // a2*k0
385  AS2( pmuludq mm4, mm7) // a3*k0
386  AS2( pmuludq mm5, mm6) // a2*k1
387  AS2( paddq mm0, mm1)
388  AS2( movd mm1, a1)
389  AS2( paddq mm4, mm5)
390  AS2( movq mm5, mm1)
391  AS2( pmuludq mm1, k2) // a1*k2
392  AS2( paddq mm0, mm2)
393  AS2( movd mm2, a0)
394  AS2( paddq mm0, mm3)
395  AS2( movq mm3, mm2)
396  AS2( pmuludq mm2, k3) // a0*k3
397  AS2( pmuludq mm3, mm7) // a0*k0
398 #if CRYPTOPP_BOOL_X32
399  AS2( movd [esp+16], mm0)
400 #else
401  AS2( movd [esp+8], mm0)
402 #endif
403  AS2( psrlq mm0, 32)
404  AS2( pmuludq mm7, mm5) // a1*k0
405  AS2( pmuludq mm5, k3) // a1*k3
406  AS2( paddq mm0, mm1)
407  AS2( movd mm1, a2)
408  AS2( pmuludq mm1, k2) // a2*k2
409  AS2( paddq mm0, mm2)
410  AS2( paddq mm0, mm4)
411  AS2( movq mm4, mm0)
412  AS2( movd mm2, a3)
413  AS2( pmuludq mm2, mm6) // a3*k1
414  AS2( pmuludq mm6, a0) // a0*k1
415  AS2( psrlq mm0, 31)
416  AS2( paddq mm0, mm3)
417  AS2( movd mm3, [esp])
418  AS2( paddq mm0, mm3)
419  AS2( movd mm3, a2)
420  AS2( pmuludq mm3, k3) // a2*k3
421  AS2( paddq mm5, mm1)
422  AS2( movd mm1, a3)
423  AS2( pmuludq mm1, k2) // a3*k2
424  AS2( paddq mm5, mm2)
425 #if CRYPTOPP_BOOL_X32
426  AS2( movd mm2, [esp+8])
427 #else
428  AS2( movd mm2, [esp+4])
429 #endif
430  AS2( psllq mm5, 1)
431  AS2( paddq mm0, mm5)
432  AS2( psllq mm4, 33)
433  AS2( movd a0, mm0)
434  AS2( psrlq mm0, 32)
435  AS2( paddq mm6, mm7)
436 #if CRYPTOPP_BOOL_X32
437  AS2( movd mm7, [esp+16])
438 #else
439  AS2( movd mm7, [esp+8])
440 #endif
441  AS2( paddq mm0, mm6)
442  AS2( paddq mm0, mm2)
443  AS2( paddq mm3, mm1)
444  AS2( psllq mm3, 1)
445  AS2( paddq mm0, mm3)
446  AS2( psrlq mm4, 1)
447  AS2( movd a1, mm0)
448  AS2( psrlq mm0, 32)
449  AS2( por mm4, mm7)
450  AS2( paddq mm0, mm4)
451  AS2( movq a2, mm0)
452 #undef a0
453 #undef a1
454 #undef a2
455 #undef a3
456 #undef k0
457 #undef k1
458 #undef k2
459 #undef k3
460 
461  ASL(3)
462  AS2( test ecx, ecx)
463  ASJ( jnz, 4, b)
464 #if CRYPTOPP_BOOL_X32
465  AS2( add esp, 24)
466 #else
467  AS2( add esp, 12)
468 #endif
469  AS_POP_IF86( bp)
470  AS1( emms)
471 #ifdef __GNUC__
472  ATT_PREFIX
473  AS2( mov %0, %%ebx)
474  : "=m" (temp)
475  : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data), "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
476  : "memory", "cc"
477  );
478 #endif
479 }
480 #endif
481 
482 #if VMAC_BOOL_WORD128
483  #define DeclareNH(a) word128 a=0
484  #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
485  #define AccumulateNH(a, b, c) a += word128(b)*(c)
486  #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
487 #else
488  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
489  #define MUL32(a, b) __emulu(word32(a), word32(b))
490  #else
491  #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
492  #endif
493  #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
494  #define DeclareNH(a) word64 a##0=0, a##1=0
495  #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
496  #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
497  #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
498  #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
499  #define DeclareNH(a) word64 a##0=0, a##1=0
500  #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
501  #define AccumulateNH(a, b, c) {\
502  word64 ph, pl;\
503  pl = _umul128(b,c,&ph);\
504  a##0 += pl;\
505  a##1 += ph + (a##0 < pl);}
506  #else
507  #define VMAC_BOOL_32BIT 1
508  #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
509  #define MUL64(rh,rl,i1,i2) \
510  { word64 _i1 = (i1), _i2 = (i2); \
511  word64 m1= MUL32(_i1,_i2>>32); \
512  word64 m2= MUL32(_i1>>32,_i2); \
513  rh = MUL32(_i1>>32,_i2>>32); \
514  rl = MUL32(_i1,_i2); \
515  ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
516  ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
517  }
518  #define AccumulateNH(a, b, c) {\
519  word64 p = MUL32(b, c);\
520  a##1 += word32((p)>>32);\
521  a##0 += word32(p);\
522  p = MUL32((b)>>32, c);\
523  a##2 += word32((p)>>32);\
524  a##1 += word32(p);\
525  p = MUL32((b)>>32, (c)>>32);\
526  a##2 += p;\
527  p = MUL32(b, (c)>>32);\
528  a##1 += word32(p);\
529  a##2 += word32(p>>32);}
530  #endif
531 #endif
532 #ifndef VMAC_BOOL_32BIT
533  #define VMAC_BOOL_32BIT 0
534 #endif
535 #ifndef ADD128
536  #define ADD128(rh,rl,ih,il) \
537  { word64 _il = (il); \
538  (rl) += (_il); \
539  (rh) += (ih) + ((rl) < (_il)); \
540  }
541 #endif
542 
543 template <bool T_128BitTag>
544 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
545 {
546  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
547  CRYPTOPP_ASSERT(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
548 
549  #define INNER_LOOP_ITERATION(j) {\
550  word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
551  word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
552  AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
553  if (T_128BitTag)\
554  AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
555  }
556 
557  size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
558  size_t innerLoopEnd = L1KeyLengthInWord64;
559  const word64 *nhK = m_nhKey();
560  word64 *polyS = (word64*)(void*)m_polyState();
561  bool isFirstBlock = true;
562  size_t i;
563 
564  #if !VMAC_BOOL_32BIT
565  #if VMAC_BOOL_WORD128
566  word128 a1=0, a2=0;
567  #else
568  word64 ah1=0, al1=0, ah2=0, al2=0;
569  #endif
570  word64 kh1, kl1, kh2, kl2;
571  kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
572  if (T_128BitTag)
573  {
574  kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
575  }
576  #endif
577 
578  do
579  {
580  DeclareNH(nhA);
581  DeclareNH(nhB);
582 
583  i = 0;
584  if (blocksRemainingInWord64 < L1KeyLengthInWord64)
585  {
586  if (blocksRemainingInWord64 % 8)
587  {
588  innerLoopEnd = blocksRemainingInWord64 % 8;
589  for (; i<innerLoopEnd; i+=2)
590  INNER_LOOP_ITERATION(0);
591  }
592  innerLoopEnd = blocksRemainingInWord64;
593  }
594  for (; i<innerLoopEnd; i+=8)
595  {
596  INNER_LOOP_ITERATION(0);
597  INNER_LOOP_ITERATION(1);
598  INNER_LOOP_ITERATION(2);
599  INNER_LOOP_ITERATION(3);
600  }
601  blocksRemainingInWord64 -= innerLoopEnd;
602  data += innerLoopEnd;
603 
604  #if VMAC_BOOL_32BIT
605  word32 nh0[2], nh1[2];
606  word64 nh2[2];
607 
608  nh0[0] = word32(nhA0);
609  nhA1 += (nhA0 >> 32);
610  nh1[0] = word32(nhA1);
611  nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
612 
613  if (T_128BitTag)
614  {
615  nh0[1] = word32(nhB0);
616  nhB1 += (nhB0 >> 32);
617  nh1[1] = word32(nhB1);
618  nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
619  }
620 
621  #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
622  #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
623  #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
624  #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
625  #define aHi ((polyS+i*4)[0])
626  #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
627  #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
628  #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
629  #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
630  #define kHi ((polyS+i*4+2)[0])
631 
632  if (isFirstBlock)
633  {
634  isFirstBlock = false;
635  if (m_isFirstBlock)
636  {
637  m_isFirstBlock = false;
638  for (i=0; i<=(size_t)T_128BitTag; i++)
639  {
640  word64 t = (word64)nh0[i] + k0;
641  a0 = (word32)t;
642  t = (t >> 32) + nh1[i] + k1;
643  a1 = (word32)t;
644  aHi = (t >> 32) + nh2[i] + kHi;
645  }
646  continue;
647  }
648  }
649  for (i=0; i<=(size_t)T_128BitTag; i++)
650  {
651  word64 p, t;
652  word32 t2;
653 
654  p = MUL32(a3, 2*k3);
655  p += nh2[i];
656  p += MUL32(a0, k2);
657  p += MUL32(a1, k1);
658  p += MUL32(a2, k0);
659  t2 = (word32)p;
660  p >>= 32;
661  p += MUL32(a0, k3);
662  p += MUL32(a1, k2);
663  p += MUL32(a2, k1);
664  p += MUL32(a3, k0);
665  t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
666  p >>= 31;
667  p += nh0[i];
668  p += MUL32(a0, k0);
669  p += MUL32(a1, 2*k3);
670  p += MUL32(a2, 2*k2);
671  p += MUL32(a3, 2*k1);
672  t2 = (word32)p;
673  p >>= 32;
674  p += nh1[i];
675  p += MUL32(a0, k1);
676  p += MUL32(a1, k0);
677  p += MUL32(a2, 2*k3);
678  p += MUL32(a3, 2*k2);
679  a0 = t2;
680  a1 = (word32)p;
681  aHi = (p >> 32) + t;
682  }
683 
684  #undef a0
685  #undef a1
686  #undef a2
687  #undef a3
688  #undef aHi
689  #undef k0
690  #undef k1
691  #undef k2
692  #undef k3
693  #undef kHi
694  #else // #if VMAC_BOOL_32BIT
695  if (isFirstBlock)
696  {
697  isFirstBlock = false;
698  if (m_isFirstBlock)
699  {
700  m_isFirstBlock = false;
701  #if VMAC_BOOL_WORD128
702  #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
703 
704  first_poly_step(a1, kh1, kl1, nhA);
705  if (T_128BitTag)
706  first_poly_step(a2, kh2, kl2, nhB);
707  #else
708  #define first_poly_step(ah, al, kh, kl, mh, ml) {\
709  mh &= m62;\
710  ADD128(mh, ml, kh, kl); \
711  ah = mh; al = ml;}
712 
713  first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
714  if (T_128BitTag)
715  first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
716  #endif
717  continue;
718  }
719  else
720  {
721  #if VMAC_BOOL_WORD128
722  a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
723  #else
724  ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
725  #endif
726  if (T_128BitTag)
727  {
728  #if VMAC_BOOL_WORD128
729  a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
730  #else
731  ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
732  #endif
733  }
734  }
735  }
736 
737  #if VMAC_BOOL_WORD128
738  #define poly_step(a, kh, kl, m) \
739  { word128 t1, t2, t3, t4;\
740  Multiply128(t2, a>>64, kl);\
741  Multiply128(t3, a, kh);\
742  Multiply128(t1, a, kl);\
743  Multiply128(t4, a>>64, 2*kh);\
744  t2 += t3;\
745  t4 += t1;\
746  t2 += t4>>64;\
747  a = (word128(word64(t2)&m63) << 64) | word64(t4);\
748  t2 *= 2;\
749  a += m & m126;\
750  a += t2>>64;}
751 
752  poly_step(a1, kh1, kl1, nhA);
753  if (T_128BitTag)
754  poly_step(a2, kh2, kl2, nhB);
755  #else
756  #define poly_step(ah, al, kh, kl, mh, ml) \
757  { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
758  /* compute ab*cd, put bd into result registers */ \
759  MUL64(t2h,t2l,ah,kl); \
760  MUL64(t3h,t3l,al,kh); \
761  MUL64(t1h,t1l,ah,2*kh); \
762  MUL64(ah,al,al,kl); \
763  /* add together ad + bc */ \
764  ADD128(t2h,t2l,t3h,t3l); \
765  /* add 2 * ac to result */ \
766  ADD128(ah,al,t1h,t1l); \
767  /* now (ah,al), (t2l,2*t2h) need summing */ \
768  /* first add the high registers, carrying into t2h */ \
769  ADD128(t2h,ah,z,t2l); \
770  /* double t2h and add top bit of ah */ \
771  t2h += t2h + (ah >> 63); \
772  ah &= m63; \
773  /* now add the low registers */ \
774  mh &= m62; \
775  ADD128(ah,al,mh,ml); \
776  ADD128(ah,al,z,t2h); \
777  }
778 
779  poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
780  if (T_128BitTag)
781  poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
782  #endif
783  #endif // #if VMAC_BOOL_32BIT
784  } while (blocksRemainingInWord64);
785 
786  #if VMAC_BOOL_WORD128
787  (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
788  if (T_128BitTag)
789  {
790  (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
791  }
792  #elif !VMAC_BOOL_32BIT
793  (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
794  if (T_128BitTag)
795  {
796  (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
797  }
798  #endif
799 }
800 
801 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
802 {
803 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
804  if (HasSSE2())
805  {
806  VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
807  if (m_is128)
808  VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
809  m_isFirstBlock = false;
810  }
811  else
812 #endif
813  {
814  if (m_is128)
815  VHASH_Update_Template<true>(data, blocksRemainingInWord64);
816  else
817  VHASH_Update_Template<false>(data, blocksRemainingInWord64);
818  }
819 }
820 
821 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
822 {
823  size_t remaining = ModPowerOf2(length, m_L1KeyLength);
824  VHASH_Update(data, (length-remaining)/8);
825  return remaining;
826 }
827 
828 static word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
829 {
830  word64 rh, rl, t, z=0;
831  word64 p1 = input[0], p2 = input[1];
832  word64 k1 = l3Key[0], k2 = l3Key[1];
833 
834  /* fully reduce (p1,p2)+(len,0) mod p127 */
835  t = p1 >> 63;
836  p1 &= m63;
837  ADD128(p1, p2, len, t);
838  /* At this point, (p1,p2) is at most 2^127+(len<<64) */
839  t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
840  ADD128(p1, p2, z, t);
841  p1 &= m63;
842 
843  /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
844  t = p1 + (p2 >> 32);
845  t += (t >> 32);
846  t += (word32)t > 0xfffffffeU;
847  p1 += (t >> 32);
848  p2 += (p1 << 32);
849 
850  /* compute (p1+k1)%p64 and (p2+k2)%p64 */
851  p1 += k1;
852  p1 += (0 - (p1 < k1)) & 257;
853  p2 += k2;
854  p2 += (0 - (p2 < k2)) & 257;
855 
856  /* compute (p1+k1)*(p2+k2)%p64 */
857  MUL64(rh, rl, p1, p2);
858  t = rh >> 56;
859  ADD128(t, rl, z, rh);
860  rh <<= 8;
861  ADD128(t, rl, z, rh);
862  t += t << 8;
863  rl += t;
864  rl += (0 - (rl < t)) & 257;
865  rl += (0 - (rl > p64-1)) & 257;
866  return rl;
867 }
868 
869 void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
870 {
871  CRYPTOPP_ASSERT(IsAlignedOn(DataBuf(),GetAlignmentOf<word64>()));
872  CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
873  size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
874 
875  if (len)
876  {
877  memset(m_data()+len, 0, (0-len)%16);
878  VHASH_Update(DataBuf(), ((len+15)/16)*2);
879  len *= 8; // convert to bits
880  }
881  else if (m_isFirstBlock)
882  {
883  // special case for empty string
884  m_polyState()[0] = m_polyState()[2];
885  m_polyState()[1] = m_polyState()[3];
886  if (m_is128)
887  {
888  m_polyState()[4] = m_polyState()[6];
889  m_polyState()[5] = m_polyState()[7];
890  }
891  }
892 
893  if (m_is128)
894  {
895  word64 t[2];
896  t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
897  t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
898  if (size == 16)
899  {
900  PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
901  PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
902  }
903  else
904  {
905  t[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[0]);
906  t[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[1]);
907  memcpy(mac, t, size);
908  }
909  }
910  else
911  {
912  word64 t = L3Hash(m_polyState(), m_l3Key(), len);
913  t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
914  if (size == 8)
915  PutWord(false, BIG_ENDIAN_ORDER, mac, t);
916  else
917  {
918  t = ConditionalByteReverse(BIG_ENDIAN_ORDER, t);
919  memcpy(mac, &t, size);
920  }
921  }
922 }
923 
924 NAMESPACE_END
Standard names for retrieving values by name when working with NameValuePairs.
const char * DigestSize()
int, in bytes
Definition: argnames.h:79
An invalid argument was detected.
Definition: cryptlib.h:184
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition: cryptlib.cpp:97
T2 ModPowerOf2(const T1 &a, const T2 &b)
Reduces a value to a power of 2.
Definition: misc.h:875
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:660
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
Definition: misc.h:2123
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
Library configuration file.
Interface for random number generators.
Definition: cryptlib.h:1188
virtual size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
Encrypt and xor multiple blocks using additional flags.
Definition: cryptlib.cpp:178
SecBlock typedef.
Definition: secblock.h:731
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1095
Classes and functions for secure memory allocations.
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:954
Classes for the VMAC message authentication code.
int GetIntValueWithDefault(const char *name, int defaultValue) const
Get a named value with type int, with default.
Definition: cryptlib.h:382
A method was called which was not implemented.
Definition: cryptlib.h:205
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:758
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianess.
Definition: misc.h:1807
void UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
Sets the key for this object without performing parameter validation.
Definition: vmac.cpp:55
byte order is big-endian
Definition: cryptlib.h:128
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:62
Functions for CPU features and intrinsics.
void TruncatedFinal(byte *mac, size_t size)
Computes the hash of the current message.
Definition: vmac.cpp:869
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
Definition: argnames.h:21
unsigned int IVSize() const
Returns length of the IV accepted by this object.
Definition: vmac.h:29
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: cryptlib.cpp:229
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:165
bool VerifyBufsEqual(const byte *buf1, const byte *buf2, size_t count)
Performs a near constant-time comparison of two equally sized buffers.
Definition: misc.cpp:96
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: vmac.cpp:161
const char * L1KeyLength()
int, in bytes
Definition: argnames.h:80
Crypto++ library namespace.
void Resynchronize(const byte *nonce, int length=-1)
Resynchronize with an IV.
Definition: vmac.cpp:120
virtual void GetNextIV(RandomNumberGenerator &rng, byte *iv)
Retrieves a secure IV for the next message.
Definition: cryptlib.cpp:173
void GetNextIV(RandomNumberGenerator &rng, byte *IV)
Retrieves a secure IV for the next message.
Definition: vmac.cpp:114
Interface for retrieving values given their names.
Definition: cryptlib.h:279
byte * BytePtr()
Provides a byte pointer to the first element in the memory block.
Definition: secblock.h:531