Crypto++  8.8
Free C++ class library of cryptographic schemes
gcm.cpp
1 // gcm.cpp - originally written and placed in the public domain by Wei Dai.
2 // ARM and Aarch64 added by Jeffrey Walton. The ARM carryless
3 // multiply routines are less efficient because they shadow x86.
4 // The precomputed key table integration makes it tricky to use the
5 // more efficient ARMv8 implementation of the multiply and reduce.
6 
7 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM gcm.cpp" to generate MASM code
8 
9 #include "pch.h"
10 #include "config.h"
11 
12 #ifndef CRYPTOPP_IMPORTS
13 #ifndef CRYPTOPP_GENERATE_X64_MASM
14 
15 // Visual Studio .Net 2003 compiler crash
16 #if defined(CRYPTOPP_MSC_VERSION) && (CRYPTOPP_MSC_VERSION < 1400)
17 # pragma optimize("", off)
18 #endif
19 
20 #include "gcm.h"
21 #include "cpu.h"
22 
23 #if defined(CRYPTOPP_DISABLE_GCM_ASM)
24 # undef CRYPTOPP_X86_ASM_AVAILABLE
25 # undef CRYPTOPP_X32_ASM_AVAILABLE
26 # undef CRYPTOPP_X64_ASM_AVAILABLE
27 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
28 #endif
29 
30 NAMESPACE_BEGIN(CryptoPP)
31 
32 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
33 // Different assemblers accept different mnemonics: 'movd eax, xmm0' vs
34 // 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0'
35 #if defined(CRYPTOPP_DISABLE_MIXED_ASM)
36 // 'movd eax, xmm0' only. REG_WORD() macro not used. Clang path.
37 # define USE_MOVD_REG32 1
38 #elif defined(__GNUC__) || defined(CRYPTOPP_MSC_VERSION)
39 // 'movd eax, xmm0' or 'movd rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
40 # define USE_MOVD_REG32_OR_REG64 1
41 #else
42 // 'mov eax, xmm0' or 'mov rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
43 # define USE_MOV_REG32_OR_REG64 1
44 #endif
45 #endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
46 
47 word16 GCM_Base::s_reductionTable[256];
48 volatile bool GCM_Base::s_reductionTableInitialized = false;
49 
50 void GCM_Base::GCTR::IncrementCounterBy256()
51 {
52  IncrementCounterByOne(m_counterArray+BlockSize()-4, 3);
53 }
54 
55 static inline void Xor16(byte *a, const byte *b, const byte *c)
56 {
57  CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>()));
58  CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>()));
59  CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>()));
60  ((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0];
61  ((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1];
62 }
63 
64 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
65 // SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
66 // a source file with a SSE architecture switch. Also see GH #226 and GH #284.
67 extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
68 #endif // SSE2
69 
70 #if CRYPTOPP_ARM_NEON_AVAILABLE
71 extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
72 #endif
73 
74 #if CRYPTOPP_POWER8_AVAILABLE
75 extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c);
76 #endif
77 
78 #if CRYPTOPP_CLMUL_AVAILABLE
79 extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
80 extern size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
81 const unsigned int s_cltableSizeInBlocks = 8;
82 extern void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer);
83 #endif // CRYPTOPP_CLMUL_AVAILABLE
84 
85 #if CRYPTOPP_ARM_PMULL_AVAILABLE
86 extern void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
87 extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
88 const unsigned int s_cltableSizeInBlocks = 8;
89 extern void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer);
90 #endif // CRYPTOPP_ARM_PMULL_AVAILABLE
91 
92 #if CRYPTOPP_POWER8_VMULL_AVAILABLE
93 extern void GCM_SetKeyWithoutResync_VMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
94 extern size_t GCM_AuthenticateBlocks_VMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
95 const unsigned int s_cltableSizeInBlocks = 8;
96 extern void GCM_ReverseHashBufferIfNeeded_VMULL(byte *hashBuffer);
97 #endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
98 
99 void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs &params)
100 {
101  BlockCipher &blockCipher = AccessBlockCipher();
102  blockCipher.SetKey(userKey, keylength, params);
103 
104  // GCM is only defined for 16-byte block ciphers at the moment.
105  // However, variable blocksize support means we have to defer
106  // blocksize checks to runtime after the key is set. Also see
107  // https://github.com/weidai11/cryptopp/issues/408.
108  const unsigned int blockSize = blockCipher.BlockSize();
109  CRYPTOPP_ASSERT(blockSize == REQUIRED_BLOCKSIZE);
110  if (blockCipher.BlockSize() != REQUIRED_BLOCKSIZE)
111  throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
112 
113  int tableSize, i, j, k;
114 
115 #if CRYPTOPP_CLMUL_AVAILABLE
116  if (HasCLMUL())
117  {
118  // Avoid "parameter not used" error and suppress Coverity finding
119  (void)params.GetIntValue(Name::TableSize(), tableSize);
120  tableSize = s_cltableSizeInBlocks * blockSize;
121  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
122  }
123  else
124 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
125  if (HasPMULL())
126  {
127  // Avoid "parameter not used" error and suppress Coverity finding
128  (void)params.GetIntValue(Name::TableSize(), tableSize);
129  tableSize = s_cltableSizeInBlocks * blockSize;
130  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
131  }
132  else
133 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
134  if (HasPMULL())
135  {
136  // Avoid "parameter not used" error and suppress Coverity finding
137  (void)params.GetIntValue(Name::TableSize(), tableSize);
138  tableSize = s_cltableSizeInBlocks * blockSize;
139  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
140  }
141  else
142 #endif
143  {
144  if (params.GetIntValue(Name::TableSize(), tableSize))
145  tableSize = (tableSize >= 64*1024) ? 64*1024 : 2*1024;
146  else
147  tableSize = (GetTablesOption() == GCM_64K_Tables) ? 64*1024 : 2*1024;
148 
149  //#if defined(CRYPTOPP_MSC_VERSION) && (CRYPTOPP_MSC_VERSION < 1400)
150  // VC 2003 workaround: compiler generates bad code for 64K tables
151  //tableSize = 2*1024;
152  //#endif
153  }
154 
155  m_buffer.resize(3*blockSize + tableSize);
156  byte *mulTable = MulTable();
157  byte *hashKey = HashKey();
158  std::memset(hashKey, 0, REQUIRED_BLOCKSIZE);
159  blockCipher.ProcessBlock(hashKey);
160 
161 #if CRYPTOPP_CLMUL_AVAILABLE
162  if (HasCLMUL())
163  {
164  GCM_SetKeyWithoutResync_CLMUL(hashKey, mulTable, tableSize);
165  return;
166  }
167 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
168  if (HasPMULL())
169  {
170  GCM_SetKeyWithoutResync_PMULL(hashKey, mulTable, tableSize);
171  return;
172  }
173 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
174  if (HasPMULL())
175  {
176  GCM_SetKeyWithoutResync_VMULL(hashKey, mulTable, tableSize);
177  return;
178  }
179 #endif
180 
181  word64 V0, V1;
183  Block::Get(hashKey)(V0)(V1);
184 
185  if (tableSize == 64*1024)
186  {
187  for (i=0; i<128; i++)
188  {
189  k = i%8;
190  Block::Put(NULLPTR, mulTable+(i/8)*256*16+(size_t(1)<<(11-k)))(V0)(V1);
191 
192  int x = (int)V1 & 1;
193  V1 = (V1>>1) | (V0<<63);
194  V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
195  }
196 
197  for (i=0; i<16; i++)
198  {
199  std::memset(mulTable+i*256*16, 0, 16);
200 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
201  if (HasSSE2())
202  for (j=2; j<=0x80; j*=2)
203  for (k=1; k<j; k++)
204  GCM_Xor16_SSE2(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
205  else
206 #elif CRYPTOPP_ARM_NEON_AVAILABLE
207  if (HasNEON())
208  for (j=2; j<=0x80; j*=2)
209  for (k=1; k<j; k++)
210  GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
211  else
212 #elif CRYPTOPP_POWER8_AVAILABLE
213  if (HasPower8())
214  for (j=2; j<=0x80; j*=2)
215  for (k=1; k<j; k++)
216  GCM_Xor16_POWER8(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
217  else
218 #endif
219  for (j=2; j<=0x80; j*=2)
220  for (k=1; k<j; k++)
221  Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
222  }
223  }
224  else
225  {
226  if (!s_reductionTableInitialized)
227  {
228  s_reductionTable[0] = 0;
229  word16 x = 0x01c2;
230  s_reductionTable[1] = ByteReverse(x);
231  for (unsigned int ii=2; ii<=0x80; ii*=2)
232  {
233  x <<= 1;
234  s_reductionTable[ii] = ByteReverse(x);
235  for (unsigned int jj=1; jj<ii; jj++)
236  s_reductionTable[ii+jj] = s_reductionTable[ii] ^ s_reductionTable[jj];
237  }
238  s_reductionTableInitialized = true;
239  }
240 
241  for (i=0; i<128-24; i++)
242  {
243  k = i%32;
244  if (k < 4)
245  Block::Put(NULLPTR, mulTable+1024+(i/32)*256+(size_t(1)<<(7-k)))(V0)(V1);
246  else if (k < 8)
247  Block::Put(NULLPTR, mulTable+(i/32)*256+(size_t(1)<<(11-k)))(V0)(V1);
248 
249  int x = (int)V1 & 1;
250  V1 = (V1>>1) | (V0<<63);
251  V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
252  }
253 
254  for (i=0; i<4; i++)
255  {
256  std::memset(mulTable+i*256, 0, 16);
257  std::memset(mulTable+1024+i*256, 0, 16);
258 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
259  if (HasSSE2())
260  for (j=2; j<=8; j*=2)
261  for (k=1; k<j; k++)
262  {
263  GCM_Xor16_SSE2(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
264  GCM_Xor16_SSE2(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
265  }
266  else
267 #elif CRYPTOPP_ARM_NEON_AVAILABLE
268  if (HasNEON())
269  for (j=2; j<=8; j*=2)
270  for (k=1; k<j; k++)
271  {
272  GCM_Xor16_NEON(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
273  GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
274  }
275  else
276 #elif CRYPTOPP_POWER8_AVAILABLE
277  if (HasPower8())
278  for (j=2; j<=8; j*=2)
279  for (k=1; k<j; k++)
280  {
281  GCM_Xor16_POWER8(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
282  GCM_Xor16_POWER8(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
283  }
284  else
285 #endif
286  for (j=2; j<=8; j*=2)
287  for (k=1; k<j; k++)
288  {
289  Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
290  Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
291  }
292  }
293  }
294 }
295 
296 inline void GCM_Base::ReverseHashBufferIfNeeded()
297 {
298 #if CRYPTOPP_CLMUL_AVAILABLE
299  if (HasCLMUL())
300  {
301  GCM_ReverseHashBufferIfNeeded_CLMUL(HashBuffer());
302  }
303 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
304  if (HasPMULL())
305  {
306  GCM_ReverseHashBufferIfNeeded_PMULL(HashBuffer());
307  }
308 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
309  if (HasPMULL())
310  {
311  GCM_ReverseHashBufferIfNeeded_VMULL(HashBuffer());
312  }
313 #endif
314 }
315 
316 void GCM_Base::Resync(const byte *iv, size_t len)
317 {
318  BlockCipher &cipher = AccessBlockCipher();
319  byte *hashBuffer = HashBuffer();
320 
321  if (len == 12)
322  {
323  std::memcpy(hashBuffer, iv, len);
324  std::memset(hashBuffer+len, 0, 3);
325  hashBuffer[len+3] = 1;
326  }
327  else
328  {
329  size_t origLen = len;
330  std::memset(hashBuffer, 0, HASH_BLOCKSIZE);
331 
332  if (len >= HASH_BLOCKSIZE)
333  {
334  len = GCM_Base::AuthenticateBlocks(iv, len);
335  iv += (origLen - len);
336  }
337 
338  if (len > 0)
339  {
340  std::memcpy(m_buffer, iv, len);
341  std::memset(m_buffer+len, 0, HASH_BLOCKSIZE-len);
342  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
343  }
344 
345  PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(0)(origLen*8);
346  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
347 
348  ReverseHashBufferIfNeeded();
349  }
350 
351  if (m_state >= State_IVSet)
352  m_ctr.Resynchronize(hashBuffer, REQUIRED_BLOCKSIZE);
353  else
354  m_ctr.SetCipherWithIV(cipher, hashBuffer);
355 
356  m_ctr.Seek(HASH_BLOCKSIZE);
357 
358  std::memset(hashBuffer, 0, HASH_BLOCKSIZE);
359 }
360 
361 unsigned int GCM_Base::OptimalDataAlignment() const
362 {
363  return
364 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
365  HasSSE2() ? 16 :
366 #elif CRYPTOPP_ARM_NEON_AVAILABLE
367  HasNEON() ? 4 :
368 #elif CRYPTOPP_POWER8_AVAILABLE
369  HasPower8() ? 16 :
370 #endif
371  GetBlockCipher().OptimalDataAlignment();
372 }
373 
374 #if CRYPTOPP_MSC_VERSION
375 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
376 #endif
377 
378 #endif // Not CRYPTOPP_GENERATE_X64_MASM
379 
380 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
381 extern "C" {
382 void GCM_AuthenticateBlocks_2K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer, const word16 *reductionTable);
383 void GCM_AuthenticateBlocks_64K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer);
384 }
385 #endif
386 
387 #ifndef CRYPTOPP_GENERATE_X64_MASM
388 
389 size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
390 {
391 #if CRYPTOPP_CLMUL_AVAILABLE
392  if (HasCLMUL())
393  {
394  return GCM_AuthenticateBlocks_CLMUL(data, len, MulTable(), HashBuffer());
395  }
396 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
397  if (HasPMULL())
398  {
399  return GCM_AuthenticateBlocks_PMULL(data, len, MulTable(), HashBuffer());
400  }
401 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
402  if (HasPMULL())
403  {
404  return GCM_AuthenticateBlocks_VMULL(data, len, MulTable(), HashBuffer());
405  }
406 #endif
407 
409  word64 *hashBuffer = (word64 *)(void *)HashBuffer();
410  CRYPTOPP_ASSERT(IsAlignedOn(hashBuffer,GetAlignmentOf<word64>()));
411 
412  switch (2*(m_buffer.size()>=64*1024)
413 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
414  + HasSSE2()
415 //#elif CRYPTOPP_ARM_NEON_AVAILABLE
416 // + HasNEON()
417 #endif
418  )
419  {
420  case 0: // non-SSE2 and 2K tables
421  {
422  byte *mulTable = MulTable();
423  word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
424 
425  do
426  {
427  word64 y0, y1, a0, a1, b0, b1, c0, c1, d0, d1;
428  Block::Get(data)(y0)(y1);
429  x0 ^= y0;
430  x1 ^= y1;
431 
432  data += HASH_BLOCKSIZE;
433  len -= HASH_BLOCKSIZE;
434 
435  #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(void *)(mulTable+(a*1024)+(b*256)+c+d*8)
436 
437  #if (CRYPTOPP_LITTLE_ENDIAN)
438  #if CRYPTOPP_BOOL_SLOW_WORD64
439  word32 z0 = (word32)x0;
440  word32 z1 = (word32)(x0>>32);
441  word32 z2 = (word32)x1;
442  word32 z3 = (word32)(x1>>32);
443  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, (d?(z##c>>((d?d-1:0)*4))&0xf0:(z##c&0xf)<<4), e)
444  #else
445  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, ((d+8*b)?(x##a>>(((d+8*b)?(d+8*b)-1:1)*4))&0xf0:(x##a&0xf)<<4), e)
446  #endif
447  #define GF_MOST_SIG_8BITS(a) (a##1 >> 7*8)
448  #define GF_SHIFT_8(a) a##1 = (a##1 << 8) ^ (a##0 >> 7*8); a##0 <<= 8;
449  #else
450  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((1-d%2), c, ((15-d-8*b)?(x##a>>(((15-d-8*b)?(15-d-8*b)-1:0)*4))&0xf0:(x##a&0xf)<<4), e)
451  #define GF_MOST_SIG_8BITS(a) (a##1 & 0xff)
452  #define GF_SHIFT_8(a) a##1 = (a##1 >> 8) ^ (a##0 << 7*8); a##0 >>= 8;
453  #endif
454 
455  #define GF_MUL_32BY128(op, a, b, c) \
456  a0 op READ_TABLE_WORD64(a, b, c, 0, 0) ^ READ_TABLE_WORD64(a, b, c, 1, 0); \
457  a1 op READ_TABLE_WORD64(a, b, c, 0, 1) ^ READ_TABLE_WORD64(a, b, c, 1, 1); \
458  b0 op READ_TABLE_WORD64(a, b, c, 2, 0) ^ READ_TABLE_WORD64(a, b, c, 3, 0); \
459  b1 op READ_TABLE_WORD64(a, b, c, 2, 1) ^ READ_TABLE_WORD64(a, b, c, 3, 1); \
460  c0 op READ_TABLE_WORD64(a, b, c, 4, 0) ^ READ_TABLE_WORD64(a, b, c, 5, 0); \
461  c1 op READ_TABLE_WORD64(a, b, c, 4, 1) ^ READ_TABLE_WORD64(a, b, c, 5, 1); \
462  d0 op READ_TABLE_WORD64(a, b, c, 6, 0) ^ READ_TABLE_WORD64(a, b, c, 7, 0); \
463  d1 op READ_TABLE_WORD64(a, b, c, 6, 1) ^ READ_TABLE_WORD64(a, b, c, 7, 1); \
464 
465  GF_MUL_32BY128(=, 0, 0, 0)
466  GF_MUL_32BY128(^=, 0, 1, 1)
467  GF_MUL_32BY128(^=, 1, 0, 2)
468  GF_MUL_32BY128(^=, 1, 1, 3)
469 
470  word32 r = (word32)s_reductionTable[GF_MOST_SIG_8BITS(d)] << 16;
471  GF_SHIFT_8(d)
472  c0 ^= d0; c1 ^= d1;
473  r ^= (word32)s_reductionTable[GF_MOST_SIG_8BITS(c)] << 8;
474  GF_SHIFT_8(c)
475  b0 ^= c0; b1 ^= c1;
476  r ^= s_reductionTable[GF_MOST_SIG_8BITS(b)];
477  GF_SHIFT_8(b)
478  a0 ^= b0; a1 ^= b1;
480  x0 = a0; x1 = a1;
481  }
482  while (len >= HASH_BLOCKSIZE);
483 
484  hashBuffer[0] = x0; hashBuffer[1] = x1;
485  return len;
486  }
487 
488  case 2: // non-SSE2 and 64K tables
489  {
490  byte *mulTable = MulTable();
491  word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
492 
493  do
494  {
495  word64 y0, y1, a0, a1;
496  Block::Get(data)(y0)(y1);
497  x0 ^= y0;
498  x1 ^= y1;
499 
500  data += HASH_BLOCKSIZE;
501  len -= HASH_BLOCKSIZE;
502 
503  #undef READ_TABLE_WORD64_COMMON
504  #undef READ_TABLE_WORD64
505 
506  #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(void *)(mulTable+(a)*256*16+(c)+(d)*8)
507 
508  #if (CRYPTOPP_LITTLE_ENDIAN)
509  #if CRYPTOPP_BOOL_SLOW_WORD64
510  word32 z0 = (word32)x0;
511  word32 z1 = (word32)(x0>>32);
512  word32 z2 = (word32)x1;
513  word32 z3 = (word32)(x1>>32);
514  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, (d?(z##c>>((d?d:1)*8-4))&0xff0:(z##c&0xff)<<4), e)
515  #else
516  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((d+4*(c%2))?(x##b>>(((d+4*(c%2))?(d+4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
517  #endif
518  #else
519  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((7-d-4*(c%2))?(x##b>>(((7-d-4*(c%2))?(7-d-4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
520  #endif
521 
522  #define GF_MUL_8BY128(op, b, c, d) \
523  a0 op READ_TABLE_WORD64(b, c, d, 0);\
524  a1 op READ_TABLE_WORD64(b, c, d, 1);\
525 
526  GF_MUL_8BY128(=, 0, 0, 0)
527  GF_MUL_8BY128(^=, 0, 0, 1)
528  GF_MUL_8BY128(^=, 0, 0, 2)
529  GF_MUL_8BY128(^=, 0, 0, 3)
530  GF_MUL_8BY128(^=, 0, 1, 0)
531  GF_MUL_8BY128(^=, 0, 1, 1)
532  GF_MUL_8BY128(^=, 0, 1, 2)
533  GF_MUL_8BY128(^=, 0, 1, 3)
534  GF_MUL_8BY128(^=, 1, 2, 0)
535  GF_MUL_8BY128(^=, 1, 2, 1)
536  GF_MUL_8BY128(^=, 1, 2, 2)
537  GF_MUL_8BY128(^=, 1, 2, 3)
538  GF_MUL_8BY128(^=, 1, 3, 0)
539  GF_MUL_8BY128(^=, 1, 3, 1)
540  GF_MUL_8BY128(^=, 1, 3, 2)
541  GF_MUL_8BY128(^=, 1, 3, 3)
542 
543  x0 = a0; x1 = a1;
544  }
545  while (len >= HASH_BLOCKSIZE);
546 
547  hashBuffer[0] = x0; hashBuffer[1] = x1;
548  return len;
549  }
550 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
551 
552 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
553  case 1: // SSE2 and 2K tables
554  GCM_AuthenticateBlocks_2K_SSE2(data, len/16, hashBuffer, s_reductionTable);
555  return len % 16;
556  case 3: // SSE2 and 64K tables
557  GCM_AuthenticateBlocks_64K_SSE2(data, len/16, hashBuffer);
558  return len % 16;
559 #endif
560 
561 #if CRYPTOPP_SSE2_ASM_AVAILABLE
562 
563  case 1: // SSE2 and 2K tables
564  {
565  #ifdef __GNUC__
566  __asm__ __volatile__
567  (
568  INTEL_NOPREFIX
569  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
570  ALIGN 8
571  GCM_AuthenticateBlocks_2K_SSE2 PROC FRAME
572  rex_push_reg rsi
573  push_reg rdi
574  push_reg rbx
575  .endprolog
576  mov rsi, r8
577  mov r11, r9
578  #else
579  AS2( mov WORD_REG(cx), data )
580  AS2( mov WORD_REG(dx), len )
581  AS2( mov WORD_REG(si), hashBuffer )
582  AS2( shr WORD_REG(dx), 4 )
583  #endif
584 
586  AS1(push rbx)
587  AS1(push rbp)
588  #else
589  AS_PUSH_IF86( bx)
590  AS_PUSH_IF86( bp)
591  #endif
592 
593  #ifdef __GNUC__
594  AS2( mov AS_REG_7, WORD_REG(di))
595  #elif CRYPTOPP_BOOL_X86
596  AS2( lea AS_REG_7, s_reductionTable)
597  #endif
598 
599  AS2( movdqa xmm0, [WORD_REG(si)] )
600 
601  #define MUL_TABLE_0 WORD_REG(si) + 32
602  #define MUL_TABLE_1 WORD_REG(si) + 32 + 1024
603  #define RED_TABLE AS_REG_7
604 
605  ASL(0)
606  AS2( movdqu xmm4, [WORD_REG(cx)] )
607  AS2( pxor xmm0, xmm4 )
608 
609  AS2( movd ebx, xmm0 )
610  AS2( mov eax, AS_HEX(f0f0f0f0) )
611  AS2( and eax, ebx )
612  AS2( shl ebx, 4 )
613  AS2( and ebx, AS_HEX(f0f0f0f0) )
614  AS2( movzx edi, ah )
615  AS2( movdqa xmm5, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
616  AS2( movzx edi, al )
617  AS2( movdqa xmm4, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
618  AS2( shr eax, 16 )
619  AS2( movzx edi, ah )
620  AS2( movdqa xmm3, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
621  AS2( movzx edi, al )
622  AS2( movdqa xmm2, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
623 
624  #define SSE2_MUL_32BITS(i) \
625  AS2( psrldq xmm0, 4 )\
626  AS2( movd eax, xmm0 )\
627  AS2( and eax, AS_HEX(f0f0f0f0) )\
628  AS2( movzx edi, bh )\
629  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
630  AS2( movzx edi, bl )\
631  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
632  AS2( shr ebx, 16 )\
633  AS2( movzx edi, bh )\
634  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
635  AS2( movzx edi, bl )\
636  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
637  AS2( movd ebx, xmm0 )\
638  AS2( shl ebx, 4 )\
639  AS2( and ebx, AS_HEX(f0f0f0f0) )\
640  AS2( movzx edi, ah )\
641  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
642  AS2( movzx edi, al )\
643  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
644  AS2( shr eax, 16 )\
645  AS2( movzx edi, ah )\
646  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
647  AS2( movzx edi, al )\
648  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
649 
650  SSE2_MUL_32BITS(1)
651  SSE2_MUL_32BITS(2)
652  SSE2_MUL_32BITS(3)
653 
654  AS2( movzx edi, bh )
655  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
656  AS2( movzx edi, bl )
657  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
658  AS2( shr ebx, 16 )
659  AS2( movzx edi, bh )
660  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
661  AS2( movzx edi, bl )
662  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
663 
664  AS2( movdqa xmm0, xmm3 )
665  AS2( pslldq xmm3, 1 )
666  AS2( pxor xmm2, xmm3 )
667  AS2( movdqa xmm1, xmm2 )
668  AS2( pslldq xmm2, 1 )
669  AS2( pxor xmm5, xmm2 )
670 
671  AS2( psrldq xmm0, 15 )
672 #if USE_MOVD_REG32
673  AS2( movd edi, xmm0 )
674 #elif USE_MOV_REG32_OR_REG64
675  AS2( mov WORD_REG(di), xmm0 )
676 #else // GNU Assembler
677  AS2( movd WORD_REG(di), xmm0 )
678 #endif
679  AS2( movzx eax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
680  AS2( shl eax, 8 )
681 
682  AS2( movdqa xmm0, xmm5 )
683  AS2( pslldq xmm5, 1 )
684  AS2( pxor xmm4, xmm5 )
685 
686  AS2( psrldq xmm1, 15 )
687 #if USE_MOVD_REG32
688  AS2( movd edi, xmm1 )
689 #elif USE_MOV_REG32_OR_REG64
690  AS2( mov WORD_REG(di), xmm1 )
691 #else
692  AS2( movd WORD_REG(di), xmm1 )
693 #endif
694  AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
695  AS2( shl eax, 8 )
696 
697  AS2( psrldq xmm0, 15 )
698 #if USE_MOVD_REG32
699  AS2( movd edi, xmm0 )
700 #elif USE_MOV_REG32_OR_REG64
701  AS2( mov WORD_REG(di), xmm0 )
702 #else
703  AS2( movd WORD_REG(di), xmm0 )
704 #endif
705  AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
706 
707  AS2( movd xmm0, eax )
708  AS2( pxor xmm0, xmm4 )
709 
710  AS2( add WORD_REG(cx), 16 )
711  AS2( sub WORD_REG(dx), 1 )
712  // ATT_NOPREFIX
713  ASJ( jnz, 0, b )
714  INTEL_NOPREFIX
715  AS2( movdqa [WORD_REG(si)], xmm0 )
716 
718  AS1(pop rbp)
719  AS1(pop rbx)
720  #else
721  AS_POP_IF86( bp)
722  AS_POP_IF86( bx)
723  #endif
724 
725  #ifdef __GNUC__
726  ATT_PREFIX
727  :
728  : "c" (data), "d" (len/16), "S" (hashBuffer), "D" (s_reductionTable)
729  : "memory", "cc", "%eax", "%ebx"
731  , PERCENT_REG(AS_REG_7), "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5"
732 #endif
733  );
734  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
735  pop rbx
736  pop rdi
737  pop rsi
738  ret
739  GCM_AuthenticateBlocks_2K_SSE2 ENDP
740  #endif
741 
742  return len%16;
743  }
744  case 3: // SSE2 and 64K tables
745  {
746  #ifdef __GNUC__
747  __asm__ __volatile__
748  (
749  INTEL_NOPREFIX
750  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
751  ALIGN 8
752  GCM_AuthenticateBlocks_64K_SSE2 PROC FRAME
753  rex_push_reg rsi
754  push_reg rdi
755  .endprolog
756  mov rsi, r8
757  #else
758  AS2( mov WORD_REG(cx), data )
759  AS2( mov WORD_REG(dx), len )
760  AS2( mov WORD_REG(si), hashBuffer )
761  AS2( shr WORD_REG(dx), 4 )
762  #endif
763 
764  AS2( movdqa xmm0, [WORD_REG(si)] )
765 
766  #undef MUL_TABLE
767  #define MUL_TABLE(i,j) WORD_REG(si) + 32 + (i*4+j)*256*16
768 
769  ASL(1)
770  AS2( movdqu xmm1, [WORD_REG(cx)] )
771  AS2( pxor xmm1, xmm0 )
772  AS2( pxor xmm0, xmm0 )
773 
774  #undef SSE2_MUL_32BITS
775  #define SSE2_MUL_32BITS(i) \
776  AS2( movd eax, xmm1 )\
777  AS2( psrldq xmm1, 4 )\
778  AS2( movzx edi, al )\
779  AS2( add WORD_REG(di), WORD_REG(di) )\
780  AS2( pxor xmm0, [MUL_TABLE(i,0) + WORD_REG(di)*8] )\
781  AS2( movzx edi, ah )\
782  AS2( add WORD_REG(di), WORD_REG(di) )\
783  AS2( pxor xmm0, [MUL_TABLE(i,1) + WORD_REG(di)*8] )\
784  AS2( shr eax, 16 )\
785  AS2( movzx edi, al )\
786  AS2( add WORD_REG(di), WORD_REG(di) )\
787  AS2( pxor xmm0, [MUL_TABLE(i,2) + WORD_REG(di)*8] )\
788  AS2( movzx edi, ah )\
789  AS2( add WORD_REG(di), WORD_REG(di) )\
790  AS2( pxor xmm0, [MUL_TABLE(i,3) + WORD_REG(di)*8] )\
791 
792  SSE2_MUL_32BITS(0)
793  SSE2_MUL_32BITS(1)
794  SSE2_MUL_32BITS(2)
795  SSE2_MUL_32BITS(3)
796 
797  AS2( add WORD_REG(cx), 16 )
798  AS2( sub WORD_REG(dx), 1 )
799  // ATT_NOPREFIX
800  ASJ( jnz, 1, b )
801  INTEL_NOPREFIX
802  AS2( movdqa [WORD_REG(si)], xmm0 )
803 
804  #ifdef __GNUC__
805  ATT_PREFIX
806  :
807  : "c" (data), "d" (len/16), "S" (hashBuffer)
808  : "memory", "cc", "%edi", "%eax"
810  , "%xmm0", "%xmm1"
811 #endif
812  );
813  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
814  pop rdi
815  pop rsi
816  ret
817  GCM_AuthenticateBlocks_64K_SSE2 ENDP
818  #endif
819 
820  return len%16;
821  }
822 #endif
823 #ifndef CRYPTOPP_GENERATE_X64_MASM
824  }
825 
826  return len%16;
827 }
828 
829 void GCM_Base::AuthenticateLastHeaderBlock()
830 {
831  if (m_bufferedDataLength > 0)
832  {
833  std::memset(m_buffer+m_bufferedDataLength, 0, HASH_BLOCKSIZE-m_bufferedDataLength);
834  m_bufferedDataLength = 0;
835  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
836  }
837 }
838 
839 void GCM_Base::AuthenticateLastConfidentialBlock()
840 {
841  GCM_Base::AuthenticateLastHeaderBlock();
842  PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(m_totalHeaderLength*8)(m_totalMessageLength*8);
843  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
844 }
845 
846 void GCM_Base::AuthenticateLastFooterBlock(byte *mac, size_t macSize)
847 {
848  m_ctr.Seek(0);
849  ReverseHashBufferIfNeeded();
850  m_ctr.ProcessData(mac, HashBuffer(), macSize);
851 }
852 
853 NAMESPACE_END
854 
855 #endif // Not CRYPTOPP_GENERATE_X64_MASM
856 #endif
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1288
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:884
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
std::string AlgorithmName() const
Provides the name of this algorithm.
Definition: gcm.h:36
An invalid argument was detected.
Definition: cryptlib.h:208
Interface for retrieving values given their names.
Definition: cryptlib.h:327
CRYPTOPP_DLL bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:420
Access a block of memory.
Definition: misc.h:3016
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Library configuration file.
#define CRYPTOPP_BOOL_X86
32-bit x86 platform
Definition: config_cpu.h:52
#define CRYPTOPP_BOOL_X32
32-bit x32 platform
Definition: config_cpu.h:44
#define CRYPTOPP_BOOL_X64
32-bit x86 platform
Definition: config_cpu.h:48
#define W64LIT(x)
Declare an unsigned word64.
Definition: config_int.h:129
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:72
unsigned short word16
16-bit unsigned datatype
Definition: config_int.h:69
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:101
Functions for CPU features and intrinsics.
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:150
GCM block cipher mode of operation.
@ GCM_64K_Tables
Use a table with 64K entries.
Definition: gcm.h:27
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:2231
void IncrementCounterByOne(byte *inout, unsigned int size)
Performs an addition with carry on a block of bytes.
Definition: misc.h:1508
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1436
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2417
Crypto++ library namespace.
const char * TableSize()
int, in bytes
Definition: argnames.h:81
const char * BlockSize()
int, in bytes
Definition: argnames.h:27
Precompiled header file.
Access a block of memory.
Definition: misc.h:3053
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68