Crypto++  5.6.3
Free C++ class library of cryptographic schemes
vmac.cpp
1 // vmac.cpp - written and placed in the public domain by Wei Dai
2 // based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #include "vmac.h"
8 #include "cpu.h"
9 #include "argnames.h"
10 #include "secblock.h"
11 
12 #if CRYPTOPP_MSC_VERSION
13 # pragma warning(disable: 4731)
14 #endif
15 
16 NAMESPACE_BEGIN(CryptoPP)
17 
18 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
19 #include <intrin.h>
20 #endif
21 
22 #if defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE)
23 # define VMAC_BOOL_WORD128 1
24 #else
25 # define VMAC_BOOL_WORD128 0
26 #endif
27 
28 #ifdef __BORLANDC__
29 #define const // Turbo C++ 2006 workaround
30 #endif
31 static const word64 p64 = W64LIT(0xfffffffffffffeff); /* 2^64 - 257 prime */
32 static const word64 m62 = W64LIT(0x3fffffffffffffff); /* 62-bit mask */
33 static const word64 m63 = W64LIT(0x7fffffffffffffff); /* 63-bit mask */
34 static const word64 m64 = W64LIT(0xffffffffffffffff); /* 64-bit mask */
35 static const word64 mpoly = W64LIT(0x1fffffff1fffffff); /* Poly key mask */
36 #ifdef __BORLANDC__
37 #undef const
38 #endif
39 #if VMAC_BOOL_WORD128
40 #ifdef __powerpc__
41 // workaround GCC Bug 31690: ICE with const __uint128_t and C++ front-end
42 #define m126 ((word128(m62)<<64)|m64)
43 #else
44 static const word128 m126 = (word128(m62)<<64)|m64; /* 126-bit mask */
45 #endif
46 #endif
47 
48 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
49 {
50  int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
51  if (digestLength != 8 && digestLength != 16)
52  throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
53  m_is128 = digestLength == 16;
54 
55  m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
56  if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
57  throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
58 
59  AllocateBlocks();
60 
61  BlockCipher &cipher = AccessCipher();
62  cipher.SetKey(userKey, keylength, params);
63  const unsigned int blockSize = cipher.BlockSize();
64  const unsigned int blockSizeInWords = blockSize / sizeof(word64);
65  SecBlock<word64> out(blockSizeInWords);
66  SecByteBlock in;
67  in.CleanNew(blockSize);
68  size_t i;
69 
70  /* Fill nh key */
71  in[0] = 0x80;
72  cipher.AdvancedProcessBlocks(in, NULL, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
73  ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
74 
75  /* Fill poly key */
76  in[0] = 0xC0;
77  in[15] = 0;
78  for (i = 0; i <= (size_t)m_is128; i++)
79  {
80  cipher.ProcessBlock(in, out.BytePtr());
81  m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
82  m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
83  in[15]++;
84  }
85 
86  /* Fill ip key */
87  in[0] = 0xE0;
88  in[15] = 0;
89  word64 *l3Key = m_l3Key();
90  assert(IsAlignedOn(l3Key,GetAlignmentOf<word64>()));
91 
92  for (i = 0; i <= (size_t)m_is128; i++)
93  do
94  {
95  cipher.ProcessBlock(in, out.BytePtr());
96  l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
97  l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
98  in[15]++;
99  } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
100 
101  m_padCached = false;
102  size_t nonceLength;
103  const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
104  Resynchronize(nonce, (int)nonceLength);
105 }
106 
108 {
110  IV[0] &= 0x7f;
111 }
112 
113 void VMAC_Base::Resynchronize(const byte *nonce, int len)
114 {
115  size_t length = ThrowIfInvalidIVLength(len);
116  size_t s = IVSize();
117  byte *storedNonce = m_nonce();
118 
119  if (m_is128)
120  {
121  memset(storedNonce, 0, s-length);
122  memcpy(storedNonce+s-length, nonce, length);
123  AccessCipher().ProcessBlock(storedNonce, m_pad());
124  }
125  else
126  {
127  if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
128  {
129  m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
130  for (size_t i=0; m_padCached && i<s-length; i++)
131  m_padCached = (storedNonce[i] == 0);
132  }
133  if (!m_padCached)
134  {
135  memset(storedNonce, 0, s-length);
136  memcpy(storedNonce+s-length, nonce, length-1);
137  storedNonce[s-1] = nonce[length-1] & 0xfe;
138  AccessCipher().ProcessBlock(storedNonce, m_pad());
139  m_padCached = true;
140  }
141  storedNonce[s-1] = nonce[length-1];
142  }
143  m_isFirstBlock = true;
144  Restart();
145 }
146 
147 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
148 {
149  CRYPTOPP_UNUSED(data);
150  assert(false);
151  throw NotImplemented("VMAC: HashEndianCorrectedBlock is not implemented");
152 }
153 
155 {
156  return
157 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_VMAC_ASM)
158  HasSSE2() ? 16 :
159 #endif
160  GetCipher().OptimalDataAlignment();
161 }
162 
163 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || (CRYPTOPP_BOOL_X32 && !defined(CRYPTOPP_DISABLE_VMAC_ASM))))
164 #if CRYPTOPP_MSC_VERSION
165 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
166 #endif
167 void
168 #ifdef __GNUC__
169 __attribute__ ((noinline)) // Intel Compiler 9.1 workaround
170 #endif
171 VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
172 {
173  assert(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
174  assert(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
175 
176  const word64 *nhK = m_nhKey();
177  word64 *polyS = (word64*)(void*)m_polyState();
178  word32 L1KeyLength = m_L1KeyLength;
179 
180  // These are used in the ASM, but some analysis engines cnnot determine it.
181  CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart); CRYPTOPP_UNUSED(L1KeyLength);
182  CRYPTOPP_UNUSED(blocksRemainingInWord64);
183 
184 #ifdef __GNUC__
185  word32 temp;
186  __asm__ __volatile__
187  (
188  AS2( mov %%ebx, %0)
189  AS2( mov %1, %%ebx)
190  INTEL_NOPREFIX
191 #else
192  #if _MSC_VER < 1300 || defined(__INTEL_COMPILER)
193  char isFirstBlock = m_isFirstBlock;
194  AS2( mov ebx, [L1KeyLength])
195  AS2( mov dl, [isFirstBlock])
196  #else
197  AS2( mov ecx, this)
198  AS2( mov ebx, [ecx+m_L1KeyLength])
199  AS2( mov dl, [ecx+m_isFirstBlock])
200  #endif
201  AS2( mov eax, tagPart)
202  AS2( shl eax, 4)
203  AS2( mov edi, nhK)
204  AS2( add edi, eax)
205  AS2( add eax, eax)
206  AS2( add eax, polyS)
207 
208  AS2( mov esi, data)
209  AS2( mov ecx, blocksRemainingInWord64)
210 #endif
211 
212  AS2( shr ebx, 3)
213 #if CRYPTOPP_BOOL_X32
214  AS_PUSH_IF86( bp)
215  AS2( sub esp, 24)
216 #else
217  AS_PUSH_IF86( bp)
218  AS2( sub esp, 12)
219 #endif
220  ASL(4)
221  AS2( mov ebp, ebx)
222  AS2( cmp ecx, ebx)
223  AS2( cmovl ebp, ecx)
224  AS2( sub ecx, ebp)
225  AS2( lea ebp, [edi+8*ebp]) // end of nhK
226  AS2( movq mm6, [esi])
227  AS2( paddq mm6, [edi])
228  AS2( movq mm5, [esi+8])
229  AS2( paddq mm5, [edi+8])
230  AS2( add esi, 16)
231  AS2( add edi, 16)
232  AS2( movq mm4, mm6)
233  ASS( pshufw mm2, mm6, 1, 0, 3, 2)
234  AS2( pmuludq mm6, mm5)
235  ASS( pshufw mm3, mm5, 1, 0, 3, 2)
236  AS2( pmuludq mm5, mm2)
237  AS2( pmuludq mm2, mm3)
238  AS2( pmuludq mm3, mm4)
239  AS2( pxor mm7, mm7)
240  AS2( movd [esp], mm6)
241  AS2( psrlq mm6, 32)
242 #if CRYPTOPP_BOOL_X32
243  AS2( movd [esp+8], mm5)
244 #else
245  AS2( movd [esp+4], mm5)
246 #endif
247  AS2( psrlq mm5, 32)
248  AS2( cmp edi, ebp)
249  ASJ( je, 1, f)
250  ASL(0)
251  AS2( movq mm0, [esi])
252  AS2( paddq mm0, [edi])
253  AS2( movq mm1, [esi+8])
254  AS2( paddq mm1, [edi+8])
255  AS2( add esi, 16)
256  AS2( add edi, 16)
257  AS2( movq mm4, mm0)
258  AS2( paddq mm5, mm2)
259  ASS( pshufw mm2, mm0, 1, 0, 3, 2)
260  AS2( pmuludq mm0, mm1)
261 #if CRYPTOPP_BOOL_X32
262  AS2( movd [esp+16], mm3)
263 #else
264  AS2( movd [esp+8], mm3)
265 #endif
266  AS2( psrlq mm3, 32)
267  AS2( paddq mm5, mm3)
268  ASS( pshufw mm3, mm1, 1, 0, 3, 2)
269  AS2( pmuludq mm1, mm2)
270  AS2( pmuludq mm2, mm3)
271  AS2( pmuludq mm3, mm4)
272  AS2( movd mm4, [esp])
273  AS2( paddq mm7, mm4)
274 #if CRYPTOPP_BOOL_X32
275  AS2( movd mm4, [esp+8])
276  AS2( paddq mm6, mm4)
277  AS2( movd mm4, [esp+16])
278 #else
279  AS2( movd mm4, [esp+4])
280  AS2( paddq mm6, mm4)
281  AS2( movd mm4, [esp+8])
282 #endif
283  AS2( paddq mm6, mm4)
284  AS2( movd [esp], mm0)
285  AS2( psrlq mm0, 32)
286  AS2( paddq mm6, mm0)
287 #if CRYPTOPP_BOOL_X32
288  AS2( movd [esp+8], mm1)
289 #else
290  AS2( movd [esp+4], mm1)
291 #endif
292  AS2( psrlq mm1, 32)
293  AS2( paddq mm5, mm1)
294  AS2( cmp edi, ebp)
295  ASJ( jne, 0, b)
296  ASL(1)
297  AS2( paddq mm5, mm2)
298 #if CRYPTOPP_BOOL_X32
299  AS2( movd [esp+16], mm3)
300 #else
301  AS2( movd [esp+8], mm3)
302 #endif
303  AS2( psrlq mm3, 32)
304  AS2( paddq mm5, mm3)
305  AS2( movd mm4, [esp])
306  AS2( paddq mm7, mm4)
307 #if CRYPTOPP_BOOL_X32
308  AS2( movd mm4, [esp+8])
309  AS2( paddq mm6, mm4)
310  AS2( movd mm4, [esp+16])
311 #else
312  AS2( movd mm4, [esp+4])
313  AS2( paddq mm6, mm4)
314  AS2( movd mm4, [esp+8])
315 #endif
316  AS2( paddq mm6, mm4)
317  AS2( lea ebp, [8*ebx])
318  AS2( sub edi, ebp) // reset edi to start of nhK
319 
320  AS2( movd [esp], mm7)
321  AS2( psrlq mm7, 32)
322  AS2( paddq mm6, mm7)
323 #if CRYPTOPP_BOOL_X32
324  AS2( movd [esp+8], mm6)
325 #else
326  AS2( movd [esp+4], mm6)
327 #endif
328  AS2( psrlq mm6, 32)
329  AS2( paddq mm5, mm6)
330  AS2( psllq mm5, 2)
331  AS2( psrlq mm5, 2)
332 
333 #define a0 [eax+2*4]
334 #define a1 [eax+3*4]
335 #define a2 [eax+0*4]
336 #define a3 [eax+1*4]
337 #define k0 [eax+2*8+2*4]
338 #define k1 [eax+2*8+3*4]
339 #define k2 [eax+2*8+0*4]
340 #define k3 [eax+2*8+1*4]
341  AS2( test dl, dl)
342  ASJ( jz, 2, f)
343  AS2( movd mm1, k0)
344  AS2( movd mm0, [esp])
345  AS2( paddq mm0, mm1)
346  AS2( movd a0, mm0)
347  AS2( psrlq mm0, 32)
348  AS2( movd mm1, k1)
349 #if CRYPTOPP_BOOL_X32
350  AS2( movd mm2, [esp+8])
351 #else
352  AS2( movd mm2, [esp+4])
353 #endif
354  AS2( paddq mm1, mm2)
355  AS2( paddq mm0, mm1)
356  AS2( movd a1, mm0)
357  AS2( psrlq mm0, 32)
358  AS2( paddq mm5, k2)
359  AS2( paddq mm0, mm5)
360  AS2( movq a2, mm0)
361  AS2( xor edx, edx)
362  ASJ( jmp, 3, f)
363  ASL(2)
364  AS2( movd mm0, a3)
365  AS2( movq mm4, mm0)
366  AS2( pmuludq mm0, k3) // a3*k3
367  AS2( movd mm1, a0)
368  AS2( pmuludq mm1, k2) // a0*k2
369  AS2( movd mm2, a1)
370  AS2( movd mm6, k1)
371  AS2( pmuludq mm2, mm6) // a1*k1
372  AS2( movd mm3, a2)
373  AS2( psllq mm0, 1)
374  AS2( paddq mm0, mm5)
375  AS2( movq mm5, mm3)
376  AS2( movd mm7, k0)
377  AS2( pmuludq mm3, mm7) // a2*k0
378  AS2( pmuludq mm4, mm7) // a3*k0
379  AS2( pmuludq mm5, mm6) // a2*k1
380  AS2( paddq mm0, mm1)
381  AS2( movd mm1, a1)
382  AS2( paddq mm4, mm5)
383  AS2( movq mm5, mm1)
384  AS2( pmuludq mm1, k2) // a1*k2
385  AS2( paddq mm0, mm2)
386  AS2( movd mm2, a0)
387  AS2( paddq mm0, mm3)
388  AS2( movq mm3, mm2)
389  AS2( pmuludq mm2, k3) // a0*k3
390  AS2( pmuludq mm3, mm7) // a0*k0
391 #if CRYPTOPP_BOOL_X32
392  AS2( movd [esp+16], mm0)
393 #else
394  AS2( movd [esp+8], mm0)
395 #endif
396  AS2( psrlq mm0, 32)
397  AS2( pmuludq mm7, mm5) // a1*k0
398  AS2( pmuludq mm5, k3) // a1*k3
399  AS2( paddq mm0, mm1)
400  AS2( movd mm1, a2)
401  AS2( pmuludq mm1, k2) // a2*k2
402  AS2( paddq mm0, mm2)
403  AS2( paddq mm0, mm4)
404  AS2( movq mm4, mm0)
405  AS2( movd mm2, a3)
406  AS2( pmuludq mm2, mm6) // a3*k1
407  AS2( pmuludq mm6, a0) // a0*k1
408  AS2( psrlq mm0, 31)
409  AS2( paddq mm0, mm3)
410  AS2( movd mm3, [esp])
411  AS2( paddq mm0, mm3)
412  AS2( movd mm3, a2)
413  AS2( pmuludq mm3, k3) // a2*k3
414  AS2( paddq mm5, mm1)
415  AS2( movd mm1, a3)
416  AS2( pmuludq mm1, k2) // a3*k2
417  AS2( paddq mm5, mm2)
418 #if CRYPTOPP_BOOL_X32
419  AS2( movd mm2, [esp+8])
420 #else
421  AS2( movd mm2, [esp+4])
422 #endif
423  AS2( psllq mm5, 1)
424  AS2( paddq mm0, mm5)
425  AS2( psllq mm4, 33)
426  AS2( movd a0, mm0)
427  AS2( psrlq mm0, 32)
428  AS2( paddq mm6, mm7)
429 #if CRYPTOPP_BOOL_X32
430  AS2( movd mm7, [esp+16])
431 #else
432  AS2( movd mm7, [esp+8])
433 #endif
434  AS2( paddq mm0, mm6)
435  AS2( paddq mm0, mm2)
436  AS2( paddq mm3, mm1)
437  AS2( psllq mm3, 1)
438  AS2( paddq mm0, mm3)
439  AS2( psrlq mm4, 1)
440  AS2( movd a1, mm0)
441  AS2( psrlq mm0, 32)
442  AS2( por mm4, mm7)
443  AS2( paddq mm0, mm4)
444  AS2( movq a2, mm0)
445 #undef a0
446 #undef a1
447 #undef a2
448 #undef a3
449 #undef k0
450 #undef k1
451 #undef k2
452 #undef k3
453 
454  ASL(3)
455  AS2( test ecx, ecx)
456  ASJ( jnz, 4, b)
457 #if CRYPTOPP_BOOL_X32
458  AS2( add esp, 24)
459 #else
460  AS2( add esp, 12)
461 #endif
462  AS_POP_IF86( bp)
463  AS1( emms)
464 #ifdef __GNUC__
465  ATT_PREFIX
466  AS2( mov %0, %%ebx)
467  : "=m" (temp)
468  : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data), "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
469  : "memory", "cc"
470  );
471 #endif
472 }
473 #endif
474 
475 #if VMAC_BOOL_WORD128
476  #define DeclareNH(a) word128 a=0
477  #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
478  #define AccumulateNH(a, b, c) a += word128(b)*(c)
479  #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
480 #else
481  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
482  #define MUL32(a, b) __emulu(word32(a), word32(b))
483  #else
484  #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
485  #endif
486  #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
487  #define DeclareNH(a) word64 a##0=0, a##1=0
488  #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
489  #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
490  #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
491  #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
492  #define DeclareNH(a) word64 a##0=0, a##1=0
493  #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
494  #define AccumulateNH(a, b, c) {\
495  word64 ph, pl;\
496  pl = _umul128(b,c,&ph);\
497  a##0 += pl;\
498  a##1 += ph + (a##0 < pl);}
499  #else
500  #define VMAC_BOOL_32BIT 1
501  #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
502  #define MUL64(rh,rl,i1,i2) \
503  { word64 _i1 = (i1), _i2 = (i2); \
504  word64 m1= MUL32(_i1,_i2>>32); \
505  word64 m2= MUL32(_i1>>32,_i2); \
506  rh = MUL32(_i1>>32,_i2>>32); \
507  rl = MUL32(_i1,_i2); \
508  ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
509  ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
510  }
511  #define AccumulateNH(a, b, c) {\
512  word64 p = MUL32(b, c);\
513  a##1 += word32((p)>>32);\
514  a##0 += word32(p);\
515  p = MUL32((b)>>32, c);\
516  a##2 += word32((p)>>32);\
517  a##1 += word32(p);\
518  p = MUL32((b)>>32, (c)>>32);\
519  a##2 += p;\
520  p = MUL32(b, (c)>>32);\
521  a##1 += word32(p);\
522  a##2 += word32(p>>32);}
523  #endif
524 #endif
525 #ifndef VMAC_BOOL_32BIT
526  #define VMAC_BOOL_32BIT 0
527 #endif
528 #ifndef ADD128
529  #define ADD128(rh,rl,ih,il) \
530  { word64 _il = (il); \
531  (rl) += (_il); \
532  (rh) += (ih) + ((rl) < (_il)); \
533  }
534 #endif
535 
536 #if !(defined(_MSC_VER) && _MSC_VER < 1300)
537 template <bool T_128BitTag>
538 #endif
539 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
540 {
541  assert(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
542  assert(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
543 
544  #define INNER_LOOP_ITERATION(j) {\
545  word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
546  word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
547  AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
548  if (T_128BitTag)\
549  AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
550  }
551 
552 #if (defined(_MSC_VER) && _MSC_VER < 1300)
553  bool T_128BitTag = m_is128;
554 #endif
555  size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
556  size_t innerLoopEnd = L1KeyLengthInWord64;
557  const word64 *nhK = m_nhKey();
558  word64 *polyS = (word64*)(void*)m_polyState();
559  bool isFirstBlock = true;
560  size_t i;
561 
562  #if !VMAC_BOOL_32BIT
563  #if VMAC_BOOL_WORD128
564  word128 a1=0, a2=0;
565  #else
566  word64 ah1=0, al1=0, ah2=0, al2=0;
567  #endif
568  word64 kh1, kl1, kh2, kl2;
569  kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
570  if (T_128BitTag)
571  {
572  kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
573  }
574  #endif
575 
576  do
577  {
578  DeclareNH(nhA);
579  DeclareNH(nhB);
580 
581  i = 0;
582  if (blocksRemainingInWord64 < L1KeyLengthInWord64)
583  {
584  if (blocksRemainingInWord64 % 8)
585  {
586  innerLoopEnd = blocksRemainingInWord64 % 8;
587  for (; i<innerLoopEnd; i+=2)
588  INNER_LOOP_ITERATION(0);
589  }
590  innerLoopEnd = blocksRemainingInWord64;
591  }
592  for (; i<innerLoopEnd; i+=8)
593  {
594  INNER_LOOP_ITERATION(0);
595  INNER_LOOP_ITERATION(1);
596  INNER_LOOP_ITERATION(2);
597  INNER_LOOP_ITERATION(3);
598  }
599  blocksRemainingInWord64 -= innerLoopEnd;
600  data += innerLoopEnd;
601 
602  #if VMAC_BOOL_32BIT
603  word32 nh0[2], nh1[2];
604  word64 nh2[2];
605 
606  nh0[0] = word32(nhA0);
607  nhA1 += (nhA0 >> 32);
608  nh1[0] = word32(nhA1);
609  nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
610 
611  if (T_128BitTag)
612  {
613  nh0[1] = word32(nhB0);
614  nhB1 += (nhB0 >> 32);
615  nh1[1] = word32(nhB1);
616  nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
617  }
618 
619  #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
620  #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
621  #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
622  #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
623  #define aHi ((polyS+i*4)[0])
624  #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
625  #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
626  #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
627  #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
628  #define kHi ((polyS+i*4+2)[0])
629 
630  if (isFirstBlock)
631  {
632  isFirstBlock = false;
633  if (m_isFirstBlock)
634  {
635  m_isFirstBlock = false;
636  for (i=0; i<=(size_t)T_128BitTag; i++)
637  {
638  word64 t = (word64)nh0[i] + k0;
639  a0 = (word32)t;
640  t = (t >> 32) + nh1[i] + k1;
641  a1 = (word32)t;
642  aHi = (t >> 32) + nh2[i] + kHi;
643  }
644  continue;
645  }
646  }
647  for (i=0; i<=(size_t)T_128BitTag; i++)
648  {
649  word64 p, t;
650  word32 t2;
651 
652  p = MUL32(a3, 2*k3);
653  p += nh2[i];
654  p += MUL32(a0, k2);
655  p += MUL32(a1, k1);
656  p += MUL32(a2, k0);
657  t2 = (word32)p;
658  p >>= 32;
659  p += MUL32(a0, k3);
660  p += MUL32(a1, k2);
661  p += MUL32(a2, k1);
662  p += MUL32(a3, k0);
663  t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
664  p >>= 31;
665  p += nh0[i];
666  p += MUL32(a0, k0);
667  p += MUL32(a1, 2*k3);
668  p += MUL32(a2, 2*k2);
669  p += MUL32(a3, 2*k1);
670  t2 = (word32)p;
671  p >>= 32;
672  p += nh1[i];
673  p += MUL32(a0, k1);
674  p += MUL32(a1, k0);
675  p += MUL32(a2, 2*k3);
676  p += MUL32(a3, 2*k2);
677  a0 = t2;
678  a1 = (word32)p;
679  aHi = (p >> 32) + t;
680  }
681 
682  #undef a0
683  #undef a1
684  #undef a2
685  #undef a3
686  #undef aHi
687  #undef k0
688  #undef k1
689  #undef k2
690  #undef k3
691  #undef kHi
692  #else // #if VMAC_BOOL_32BIT
693  if (isFirstBlock)
694  {
695  isFirstBlock = false;
696  if (m_isFirstBlock)
697  {
698  m_isFirstBlock = false;
699  #if VMAC_BOOL_WORD128
700  #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
701 
702  first_poly_step(a1, kh1, kl1, nhA);
703  if (T_128BitTag)
704  first_poly_step(a2, kh2, kl2, nhB);
705  #else
706  #define first_poly_step(ah, al, kh, kl, mh, ml) {\
707  mh &= m62;\
708  ADD128(mh, ml, kh, kl); \
709  ah = mh; al = ml;}
710 
711  first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
712  if (T_128BitTag)
713  first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
714  #endif
715  continue;
716  }
717  else
718  {
719  #if VMAC_BOOL_WORD128
720  a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
721  #else
722  ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
723  #endif
724  if (T_128BitTag)
725  {
726  #if VMAC_BOOL_WORD128
727  a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
728  #else
729  ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
730  #endif
731  }
732  }
733  }
734 
735  #if VMAC_BOOL_WORD128
736  #define poly_step(a, kh, kl, m) \
737  { word128 t1, t2, t3, t4;\
738  Multiply128(t2, a>>64, kl);\
739  Multiply128(t3, a, kh);\
740  Multiply128(t1, a, kl);\
741  Multiply128(t4, a>>64, 2*kh);\
742  t2 += t3;\
743  t4 += t1;\
744  t2 += t4>>64;\
745  a = (word128(word64(t2)&m63) << 64) | word64(t4);\
746  t2 *= 2;\
747  a += m & m126;\
748  a += t2>>64;}
749 
750  poly_step(a1, kh1, kl1, nhA);
751  if (T_128BitTag)
752  poly_step(a2, kh2, kl2, nhB);
753  #else
754  #define poly_step(ah, al, kh, kl, mh, ml) \
755  { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
756  /* compute ab*cd, put bd into result registers */ \
757  MUL64(t2h,t2l,ah,kl); \
758  MUL64(t3h,t3l,al,kh); \
759  MUL64(t1h,t1l,ah,2*kh); \
760  MUL64(ah,al,al,kl); \
761  /* add together ad + bc */ \
762  ADD128(t2h,t2l,t3h,t3l); \
763  /* add 2 * ac to result */ \
764  ADD128(ah,al,t1h,t1l); \
765  /* now (ah,al), (t2l,2*t2h) need summing */ \
766  /* first add the high registers, carrying into t2h */ \
767  ADD128(t2h,ah,z,t2l); \
768  /* double t2h and add top bit of ah */ \
769  t2h += t2h + (ah >> 63); \
770  ah &= m63; \
771  /* now add the low registers */ \
772  mh &= m62; \
773  ADD128(ah,al,mh,ml); \
774  ADD128(ah,al,z,t2h); \
775  }
776 
777  poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
778  if (T_128BitTag)
779  poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
780  #endif
781  #endif // #if VMAC_BOOL_32BIT
782  } while (blocksRemainingInWord64);
783 
784  #if VMAC_BOOL_WORD128
785  (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
786  if (T_128BitTag)
787  {
788  (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
789  }
790  #elif !VMAC_BOOL_32BIT
791  (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
792  if (T_128BitTag)
793  {
794  (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
795  }
796  #endif
797 }
798 
799 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
800 {
801 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || (CRYPTOPP_BOOL_X32 && !defined(CRYPTOPP_DISABLE_VMAC_ASM))))
802  if (HasSSE2())
803  {
804  VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
805  if (m_is128)
806  VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
807  m_isFirstBlock = false;
808  }
809  else
810 #endif
811  {
812 #if defined(_MSC_VER) && _MSC_VER < 1300
813  VHASH_Update_Template(data, blocksRemainingInWord64);
814 #else
815  if (m_is128)
816  VHASH_Update_Template<true>(data, blocksRemainingInWord64);
817  else
818  VHASH_Update_Template<false>(data, blocksRemainingInWord64);
819 #endif
820  }
821 }
822 
823 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
824 {
825  size_t remaining = ModPowerOf2(length, m_L1KeyLength);
826  VHASH_Update(data, (length-remaining)/8);
827  return remaining;
828 }
829 
830 static word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
831 {
832  word64 rh, rl, t, z=0;
833  word64 p1 = input[0], p2 = input[1];
834  word64 k1 = l3Key[0], k2 = l3Key[1];
835 
836  /* fully reduce (p1,p2)+(len,0) mod p127 */
837  t = p1 >> 63;
838  p1 &= m63;
839  ADD128(p1, p2, len, t);
840  /* At this point, (p1,p2) is at most 2^127+(len<<64) */
841  t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
842  ADD128(p1, p2, z, t);
843  p1 &= m63;
844 
845  /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
846  t = p1 + (p2 >> 32);
847  t += (t >> 32);
848  t += (word32)t > 0xfffffffeU;
849  p1 += (t >> 32);
850  p2 += (p1 << 32);
851 
852  /* compute (p1+k1)%p64 and (p2+k2)%p64 */
853  p1 += k1;
854  p1 += (0 - (p1 < k1)) & 257;
855  p2 += k2;
856  p2 += (0 - (p2 < k2)) & 257;
857 
858  /* compute (p1+k1)*(p2+k2)%p64 */
859  MUL64(rh, rl, p1, p2);
860  t = rh >> 56;
861  ADD128(t, rl, z, rh);
862  rh <<= 8;
863  ADD128(t, rl, z, rh);
864  t += t << 8;
865  rl += t;
866  rl += (0 - (rl < t)) & 257;
867  rl += (0 - (rl > p64-1)) & 257;
868  return rl;
869 }
870 
871 void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
872 {
873  assert(IsAlignedOn(DataBuf(),GetAlignmentOf<word64>()));
874  assert(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
875  size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
876 
877  if (len)
878  {
879  memset(m_data()+len, 0, (0-len)%16);
880  VHASH_Update(DataBuf(), ((len+15)/16)*2);
881  len *= 8; // convert to bits
882  }
883  else if (m_isFirstBlock)
884  {
885  // special case for empty string
886  m_polyState()[0] = m_polyState()[2];
887  m_polyState()[1] = m_polyState()[3];
888  if (m_is128)
889  {
890  m_polyState()[4] = m_polyState()[6];
891  m_polyState()[5] = m_polyState()[7];
892  }
893  }
894 
895  if (m_is128)
896  {
897  word64 t[2];
898  t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
899  t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
900  if (size == 16)
901  {
902  PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
903  PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
904  }
905  else
906  {
907  t[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[0]);
908  t[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[1]);
909  memcpy(mac, t, size);
910  }
911  }
912  else
913  {
914  word64 t = L3Hash(m_polyState(), m_l3Key(), len);
915  t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
916  if (size == 8)
917  PutWord(false, BIG_ENDIAN_ORDER, mac, t);
918  else
919  {
920  t = ConditionalByteReverse(BIG_ENDIAN_ORDER, t);
921  memcpy(mac, &t, size);
922  }
923  }
924 }
925 
926 NAMESPACE_END
Standard names for retrieving values by name when working with NameValuePairs.
const char * DigestSize()
int, in bytes
Definition: argnames.h:78
An invalid argument was detected.
Definition: cryptlib.h:182
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition: cryptlib.cpp:101
T2 ModPowerOf2(const T1 &a, const T2 &b)
Tests whether the residue of a value is a power of 2.
Definition: misc.h:836
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:657
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
Library configuration file.
Interface for random number generators.
Definition: cryptlib.h:1186
virtual size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
Encrypt and xor multiple blocks using additional flags.
Definition: cryptlib.cpp:182
SecBlock typedef.
Definition: secblock.h:728
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1091
Classes and functions for secure memory allocations.
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:907
Classes for the VMAC message authentication code.
int GetIntValueWithDefault(const char *name, int defaultValue) const
Get a named value with type int, with default.
Definition: cryptlib.h:380
A method was called which was not implemented.
Definition: cryptlib.h:203
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:751
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianess.
Definition: misc.h:1831
void UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
Sets the key for this object without performing parameter validation.
Definition: vmac.cpp:48
byte order is big-endian
Definition: cryptlib.h:127
Functions for CPU features and intrinsics.
void TruncatedFinal(byte *mac, size_t size)
Computes the hash of the current message.
Definition: vmac.cpp:871
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
Definition: argnames.h:21
unsigned int IVSize() const
Returns length of the IV accepted by this object.
Definition: vmac.h:25
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: cryptlib.cpp:233
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:236
bool VerifyBufsEqual(const byte *buf1, const byte *buf2, size_t count)
Performs a near constant-time comparison of two equally sized buffers.
Definition: misc.cpp:96
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: vmac.cpp:154
const char * L1KeyLength()
int, in bytes
Definition: argnames.h:79
Crypto++ library namespace.
void Resynchronize(const byte *nonce, int length=-1)
Resynchronize with an IV.
Definition: vmac.cpp:113
virtual void GetNextIV(RandomNumberGenerator &rng, byte *iv)
Retrieves a secure IV for the next message.
Definition: cryptlib.cpp:177
void GetNextIV(RandomNumberGenerator &rng, byte *IV)
Retrieves a secure IV for the next message.
Definition: vmac.cpp:107
Interface for retrieving values given their names.
Definition: cryptlib.h:277
byte * BytePtr()
Provides a byte pointer to the first element in the memory block.
Definition: secblock.h:528