Crypto++  6.0
Free C++ class library of cryptographic schemes
gcm.cpp
1 // gcm.cpp - originally written and placed in the public domain by Wei Dai.
2 // ARM and Aarch64 added by Jeffrey Walton. The ARM carryless
3 // multiply routines are less efficient because they shadow x86.
4 // The precomputed key table integration makes it tricky to use the
5 // more efficient ARMv8 implementation of the multiply and reduce.
6 
7 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM gcm.cpp" to generate MASM code
8 
9 #include "pch.h"
10 #include "config.h"
11 
12 #ifndef CRYPTOPP_IMPORTS
13 #ifndef CRYPTOPP_GENERATE_X64_MASM
14 
15 // Clang 3.3 integrated assembler crash on Linux. Other versions produce incorrect results.
16 // Clang has never handled Intel ASM very well. I wish LLVM would fix it.
17 #if defined(__clang__)
18 # undef CRYPTOPP_X86_ASM_AVAILABLE
19 # undef CRYPTOPP_X32_ASM_AVAILABLE
20 # undef CRYPTOPP_X64_ASM_AVAILABLE
21 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
22 #endif
23 
24 // SunCC 12.3 - 12.5 crash in GCM_Reduce_CLMUL
25 // http://github.com/weidai11/cryptopp/issues/226
26 #if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x5140)
27 # undef CRYPTOPP_CLMUL_AVAILABLE
28 #endif
29 
30 #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
31 # include <emmintrin.h>
32 #endif
33 
34 #include "gcm.h"
35 #include "cpu.h"
36 
37 NAMESPACE_BEGIN(CryptoPP)
38 
39 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
40 // Different assemblers accept different mnemonics: 'movd eax, xmm0' vs
41 // 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0'
42 #if (CRYPTOPP_LLVM_CLANG_VERSION >= 30600) || (CRYPTOPP_APPLE_CLANG_VERSION >= 70000) || defined(CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER)
43 // 'movd eax, xmm0' only. REG_WORD() macro not used.
44 # define USE_MOVD_REG32 1
45 #elif defined(__GNUC__) || defined(_MSC_VER)
46 // 'movd eax, xmm0' or 'movd rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
47 # define USE_MOVD_REG32_OR_REG64 1
48 #else
49 // 'mov eax, xmm0' or 'mov rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
50 # define USE_MOV_REG32_OR_REG64 1
51 #endif
52 #endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
53 
54 // Clang __m128i casts, http://bugs.llvm.org/show_bug.cgi?id=20670
55 #define M128_CAST(x) ((__m128i *)(void *)(x))
56 #define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
57 
58 #if CRYPTOPP_ARM_NEON_AVAILABLE
59 extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
60 #endif
61 
62 word16 GCM_Base::s_reductionTable[256];
63 volatile bool GCM_Base::s_reductionTableInitialized = false;
64 
65 void GCM_Base::GCTR::IncrementCounterBy256()
66 {
67  IncrementCounterByOne(m_counterArray+BlockSize()-4, 3);
68 }
69 
70 static inline void Xor16(byte *a, const byte *b, const byte *c)
71 {
72  CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>()));
73  CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>()));
74  CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>()));
75  ((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0];
76  ((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1];
77 }
78 
79 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
80 // SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
81 // a source file with a SSE architecture switch. Also see GH #226 and GH #284.
82 extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
83 #endif // SSE2
84 
85 #if CRYPTOPP_CLMUL_AVAILABLE
86 extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
87 extern size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
88 const unsigned int s_cltableSizeInBlocks = 8;
89 extern void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer);
90 #endif // CRYPTOPP_CLMUL_AVAILABLE
91 
92 #if CRYPTOPP_ARM_PMULL_AVAILABLE
93 extern void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
94 extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
95 const unsigned int s_cltableSizeInBlocks = 8;
96 extern void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer);
97 #endif // CRYPTOPP_ARM_PMULL_AVAILABLE
98 
99 void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs &params)
100 {
101  BlockCipher &blockCipher = AccessBlockCipher();
102  blockCipher.SetKey(userKey, keylength, params);
103 
104  // GCM is only defined for 16-byte block ciphers at the moment.
105  // However, variable blocksize support means we have to defer
106  // blocksize checks to runtime after the key is set. Also see
107  // https://github.com/weidai11/cryptopp/issues/408.
108  const unsigned int blockSize = blockCipher.BlockSize();
109  CRYPTOPP_ASSERT(blockSize == REQUIRED_BLOCKSIZE);
110  if (blockCipher.BlockSize() != REQUIRED_BLOCKSIZE)
111  throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
112 
113  int tableSize, i, j, k;
114 
115 #if CRYPTOPP_CLMUL_AVAILABLE
116  if (HasCLMUL())
117  {
118  // Avoid "parameter not used" error and suppress Coverity finding
119  (void)params.GetIntValue(Name::TableSize(), tableSize);
120  tableSize = s_cltableSizeInBlocks * blockSize;
121  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
122  }
123  else
124 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
125  if (HasPMULL())
126  {
127  // Avoid "parameter not used" error and suppress Coverity finding
128  (void)params.GetIntValue(Name::TableSize(), tableSize);
129  tableSize = s_cltableSizeInBlocks * blockSize;
130  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
131  }
132  else
133 #endif
134  {
135  if (params.GetIntValue(Name::TableSize(), tableSize))
136  tableSize = (tableSize >= 64*1024) ? 64*1024 : 2*1024;
137  else
138  tableSize = (GetTablesOption() == GCM_64K_Tables) ? 64*1024 : 2*1024;
139 
140  //#if defined(_MSC_VER) && (_MSC_VER < 1400)
141  // VC 2003 workaround: compiler generates bad code for 64K tables
142  //tableSize = 2*1024;
143  //#endif
144  }
145 
146  m_buffer.resize(3*blockSize + tableSize);
147  byte *mulTable = MulTable();
148  byte *hashKey = HashKey();
149  memset(hashKey, 0, REQUIRED_BLOCKSIZE);
150  blockCipher.ProcessBlock(hashKey);
151 
152 #if CRYPTOPP_CLMUL_AVAILABLE
153  if (HasCLMUL())
154  {
155  GCM_SetKeyWithoutResync_CLMUL(hashKey, mulTable, tableSize);
156  return;
157  }
158 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
159  if (HasPMULL())
160  {
161  GCM_SetKeyWithoutResync_PMULL(hashKey, mulTable, tableSize);
162  return;
163  }
164 #endif
165 
166  word64 V0, V1;
168  Block::Get(hashKey)(V0)(V1);
169 
170  if (tableSize == 64*1024)
171  {
172  for (i=0; i<128; i++)
173  {
174  k = i%8;
175  Block::Put(NULLPTR, mulTable+(i/8)*256*16+(size_t(1)<<(11-k)))(V0)(V1);
176 
177  int x = (int)V1 & 1;
178  V1 = (V1>>1) | (V0<<63);
179  V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
180  }
181 
182  for (i=0; i<16; i++)
183  {
184  memset(mulTable+i*256*16, 0, 16);
185 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
186  if (HasSSE2())
187  for (j=2; j<=0x80; j*=2)
188  for (k=1; k<j; k++)
189  GCM_Xor16_SSE2(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
190  else
191 #elif CRYPTOPP_ARM_NEON_AVAILABLE
192  if (HasNEON())
193  for (j=2; j<=0x80; j*=2)
194  for (k=1; k<j; k++)
195  GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
196  else
197 #endif
198  for (j=2; j<=0x80; j*=2)
199  for (k=1; k<j; k++)
200  Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
201  }
202  }
203  else
204  {
205  if (!s_reductionTableInitialized)
206  {
207  s_reductionTable[0] = 0;
208  word16 x = 0x01c2;
209  s_reductionTable[1] = ByteReverse(x);
210  for (unsigned int ii=2; ii<=0x80; ii*=2)
211  {
212  x <<= 1;
213  s_reductionTable[ii] = ByteReverse(x);
214  for (unsigned int jj=1; jj<ii; jj++)
215  s_reductionTable[ii+jj] = s_reductionTable[ii] ^ s_reductionTable[jj];
216  }
217  s_reductionTableInitialized = true;
218  }
219 
220  for (i=0; i<128-24; i++)
221  {
222  k = i%32;
223  if (k < 4)
224  Block::Put(NULLPTR, mulTable+1024+(i/32)*256+(size_t(1)<<(7-k)))(V0)(V1);
225  else if (k < 8)
226  Block::Put(NULLPTR, mulTable+(i/32)*256+(size_t(1)<<(11-k)))(V0)(V1);
227 
228  int x = (int)V1 & 1;
229  V1 = (V1>>1) | (V0<<63);
230  V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
231  }
232 
233  for (i=0; i<4; i++)
234  {
235  memset(mulTable+i*256, 0, 16);
236  memset(mulTable+1024+i*256, 0, 16);
237 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
238  if (HasSSE2())
239  for (j=2; j<=8; j*=2)
240  for (k=1; k<j; k++)
241  {
242  GCM_Xor16_SSE2(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
243  GCM_Xor16_SSE2(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
244  }
245  else
246 #elif CRYPTOPP_ARM_NEON_AVAILABLE
247  if (HasNEON())
248  for (j=2; j<=8; j*=2)
249  for (k=1; k<j; k++)
250  {
251  GCM_Xor16_NEON(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
252  GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
253  }
254  else
255 #endif
256  for (j=2; j<=8; j*=2)
257  for (k=1; k<j; k++)
258  {
259  Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
260  Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
261  }
262  }
263  }
264 }
265 
266 inline void GCM_Base::ReverseHashBufferIfNeeded()
267 {
268 #if CRYPTOPP_CLMUL_AVAILABLE
269  if (HasCLMUL())
270  {
271  GCM_ReverseHashBufferIfNeeded_CLMUL(HashBuffer());
272  }
273 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
274  if (HasPMULL())
275  {
276  GCM_ReverseHashBufferIfNeeded_PMULL(HashBuffer());
277  }
278 #endif
279 }
280 
281 void GCM_Base::Resync(const byte *iv, size_t len)
282 {
283  BlockCipher &cipher = AccessBlockCipher();
284  byte *hashBuffer = HashBuffer();
285 
286  if (len == 12)
287  {
288  memcpy(hashBuffer, iv, len);
289  memset(hashBuffer+len, 0, 3);
290  hashBuffer[len+3] = 1;
291  }
292  else
293  {
294  size_t origLen = len;
295  memset(hashBuffer, 0, HASH_BLOCKSIZE);
296 
297  if (len >= HASH_BLOCKSIZE)
298  {
299  len = GCM_Base::AuthenticateBlocks(iv, len);
300  iv += (origLen - len);
301  }
302 
303  if (len > 0)
304  {
305  memcpy(m_buffer, iv, len);
306  memset(m_buffer+len, 0, HASH_BLOCKSIZE-len);
307  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
308  }
309 
310  PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(0)(origLen*8);
311  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
312 
313  ReverseHashBufferIfNeeded();
314  }
315 
316  if (m_state >= State_IVSet)
317  m_ctr.Resynchronize(hashBuffer, REQUIRED_BLOCKSIZE);
318  else
319  m_ctr.SetCipherWithIV(cipher, hashBuffer);
320 
321  m_ctr.Seek(HASH_BLOCKSIZE);
322 
323  memset(hashBuffer, 0, HASH_BLOCKSIZE);
324 }
325 
326 unsigned int GCM_Base::OptimalDataAlignment() const
327 {
328  return
329 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
330  HasSSE2() ? 16 :
331 #elif CRYPTOPP_ARM_NEON_AVAILABLE
332  HasNEON() ? 4 :
333 #endif
334  GetBlockCipher().OptimalDataAlignment();
335 }
336 
337 #if CRYPTOPP_MSC_VERSION
338 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
339 #endif
340 
341 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
342 
343 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
344 extern "C" {
345 void GCM_AuthenticateBlocks_2K(const byte *data, size_t blocks, word64 *hashBuffer, const word16 *reductionTable);
346 void GCM_AuthenticateBlocks_64K(const byte *data, size_t blocks, word64 *hashBuffer);
347 }
348 #endif
349 
350 #ifndef CRYPTOPP_GENERATE_X64_MASM
351 
352 size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
353 {
354 #if CRYPTOPP_CLMUL_AVAILABLE
355  if (HasCLMUL())
356  {
357  return GCM_AuthenticateBlocks_CLMUL(data, len, MulTable(), HashBuffer());
358  }
359 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
360  if (HasPMULL())
361  {
362  return GCM_AuthenticateBlocks_PMULL(data, len, MulTable(), HashBuffer());
363  }
364 #endif
365 
367  word64 *hashBuffer = (word64 *)(void *)HashBuffer();
368  CRYPTOPP_ASSERT(IsAlignedOn(hashBuffer,GetAlignmentOf<word64>()));
369 
370  switch (2*(m_buffer.size()>=64*1024)
371 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
372  + HasSSE2()
373 //#elif CRYPTOPP_ARM_NEON_AVAILABLE
374 // + HasNEON()
375 #endif
376  )
377  {
378  case 0: // non-SSE2 and 2K tables
379  {
380  byte *mulTable = MulTable();
381  word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
382 
383  do
384  {
385  word64 y0, y1, a0, a1, b0, b1, c0, c1, d0, d1;
386  Block::Get(data)(y0)(y1);
387  x0 ^= y0;
388  x1 ^= y1;
389 
390  data += HASH_BLOCKSIZE;
391  len -= HASH_BLOCKSIZE;
392 
393  #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(void *)(mulTable+(a*1024)+(b*256)+c+d*8)
394 
395  #ifdef CRYPTOPP_LITTLE_ENDIAN
396  #if CRYPTOPP_BOOL_SLOW_WORD64
397  word32 z0 = (word32)x0;
398  word32 z1 = (word32)(x0>>32);
399  word32 z2 = (word32)x1;
400  word32 z3 = (word32)(x1>>32);
401  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, (d?(z##c>>((d?d-1:0)*4))&0xf0:(z##c&0xf)<<4), e)
402  #else
403  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, ((d+8*b)?(x##a>>(((d+8*b)?(d+8*b)-1:1)*4))&0xf0:(x##a&0xf)<<4), e)
404  #endif
405  #define GF_MOST_SIG_8BITS(a) (a##1 >> 7*8)
406  #define GF_SHIFT_8(a) a##1 = (a##1 << 8) ^ (a##0 >> 7*8); a##0 <<= 8;
407  #else
408  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((1-d%2), c, ((15-d-8*b)?(x##a>>(((15-d-8*b)?(15-d-8*b)-1:0)*4))&0xf0:(x##a&0xf)<<4), e)
409  #define GF_MOST_SIG_8BITS(a) (a##1 & 0xff)
410  #define GF_SHIFT_8(a) a##1 = (a##1 >> 8) ^ (a##0 << 7*8); a##0 >>= 8;
411  #endif
412 
413  #define GF_MUL_32BY128(op, a, b, c) \
414  a0 op READ_TABLE_WORD64(a, b, c, 0, 0) ^ READ_TABLE_WORD64(a, b, c, 1, 0); \
415  a1 op READ_TABLE_WORD64(a, b, c, 0, 1) ^ READ_TABLE_WORD64(a, b, c, 1, 1); \
416  b0 op READ_TABLE_WORD64(a, b, c, 2, 0) ^ READ_TABLE_WORD64(a, b, c, 3, 0); \
417  b1 op READ_TABLE_WORD64(a, b, c, 2, 1) ^ READ_TABLE_WORD64(a, b, c, 3, 1); \
418  c0 op READ_TABLE_WORD64(a, b, c, 4, 0) ^ READ_TABLE_WORD64(a, b, c, 5, 0); \
419  c1 op READ_TABLE_WORD64(a, b, c, 4, 1) ^ READ_TABLE_WORD64(a, b, c, 5, 1); \
420  d0 op READ_TABLE_WORD64(a, b, c, 6, 0) ^ READ_TABLE_WORD64(a, b, c, 7, 0); \
421  d1 op READ_TABLE_WORD64(a, b, c, 6, 1) ^ READ_TABLE_WORD64(a, b, c, 7, 1); \
422 
423  GF_MUL_32BY128(=, 0, 0, 0)
424  GF_MUL_32BY128(^=, 0, 1, 1)
425  GF_MUL_32BY128(^=, 1, 0, 2)
426  GF_MUL_32BY128(^=, 1, 1, 3)
427 
428  word32 r = (word32)s_reductionTable[GF_MOST_SIG_8BITS(d)] << 16;
429  GF_SHIFT_8(d)
430  c0 ^= d0; c1 ^= d1;
431  r ^= (word32)s_reductionTable[GF_MOST_SIG_8BITS(c)] << 8;
432  GF_SHIFT_8(c)
433  b0 ^= c0; b1 ^= c1;
434  r ^= s_reductionTable[GF_MOST_SIG_8BITS(b)];
435  GF_SHIFT_8(b)
436  a0 ^= b0; a1 ^= b1;
437  a0 ^= ConditionalByteReverse<word64>(LITTLE_ENDIAN_ORDER, r);
438  x0 = a0; x1 = a1;
439  }
440  while (len >= HASH_BLOCKSIZE);
441 
442  hashBuffer[0] = x0; hashBuffer[1] = x1;
443  return len;
444  }
445 
446  case 2: // non-SSE2 and 64K tables
447  {
448  byte *mulTable = MulTable();
449  word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
450 
451  do
452  {
453  word64 y0, y1, a0, a1;
454  Block::Get(data)(y0)(y1);
455  x0 ^= y0;
456  x1 ^= y1;
457 
458  data += HASH_BLOCKSIZE;
459  len -= HASH_BLOCKSIZE;
460 
461  #undef READ_TABLE_WORD64_COMMON
462  #undef READ_TABLE_WORD64
463 
464  #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(void *)(mulTable+(a)*256*16+(c)+(d)*8)
465 
466  #ifdef CRYPTOPP_LITTLE_ENDIAN
467  #if CRYPTOPP_BOOL_SLOW_WORD64
468  word32 z0 = (word32)x0;
469  word32 z1 = (word32)(x0>>32);
470  word32 z2 = (word32)x1;
471  word32 z3 = (word32)(x1>>32);
472  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, (d?(z##c>>((d?d:1)*8-4))&0xff0:(z##c&0xff)<<4), e)
473  #else
474  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((d+4*(c%2))?(x##b>>(((d+4*(c%2))?(d+4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
475  #endif
476  #else
477  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((7-d-4*(c%2))?(x##b>>(((7-d-4*(c%2))?(7-d-4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
478  #endif
479 
480  #define GF_MUL_8BY128(op, b, c, d) \
481  a0 op READ_TABLE_WORD64(b, c, d, 0);\
482  a1 op READ_TABLE_WORD64(b, c, d, 1);\
483 
484  GF_MUL_8BY128(=, 0, 0, 0)
485  GF_MUL_8BY128(^=, 0, 0, 1)
486  GF_MUL_8BY128(^=, 0, 0, 2)
487  GF_MUL_8BY128(^=, 0, 0, 3)
488  GF_MUL_8BY128(^=, 0, 1, 0)
489  GF_MUL_8BY128(^=, 0, 1, 1)
490  GF_MUL_8BY128(^=, 0, 1, 2)
491  GF_MUL_8BY128(^=, 0, 1, 3)
492  GF_MUL_8BY128(^=, 1, 2, 0)
493  GF_MUL_8BY128(^=, 1, 2, 1)
494  GF_MUL_8BY128(^=, 1, 2, 2)
495  GF_MUL_8BY128(^=, 1, 2, 3)
496  GF_MUL_8BY128(^=, 1, 3, 0)
497  GF_MUL_8BY128(^=, 1, 3, 1)
498  GF_MUL_8BY128(^=, 1, 3, 2)
499  GF_MUL_8BY128(^=, 1, 3, 3)
500 
501  x0 = a0; x1 = a1;
502  }
503  while (len >= HASH_BLOCKSIZE);
504 
505  hashBuffer[0] = x0; hashBuffer[1] = x1;
506  return len;
507  }
508 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
509 
510 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
511  case 1: // SSE2 and 2K tables
512  GCM_AuthenticateBlocks_2K(data, len/16, hashBuffer, s_reductionTable);
513  return len % 16;
514  case 3: // SSE2 and 64K tables
515  GCM_AuthenticateBlocks_64K(data, len/16, hashBuffer);
516  return len % 16;
517 #endif
518 
519 #if CRYPTOPP_SSE2_ASM_AVAILABLE
520  case 1: // SSE2 and 2K tables
521  {
522  #ifdef __GNUC__
523  __asm__ __volatile__
524  (
525  INTEL_NOPREFIX
526  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
527  ALIGN 8
528  GCM_AuthenticateBlocks_2K PROC FRAME
529  rex_push_reg rsi
530  push_reg rdi
531  push_reg rbx
532  .endprolog
533  mov rsi, r8
534  mov r11, r9
535  #else
536  AS2( mov WORD_REG(cx), data )
537  AS2( mov WORD_REG(dx), len )
538  AS2( mov WORD_REG(si), hashBuffer )
539  AS2( shr WORD_REG(dx), 4 )
540  #endif
541 
542  #if CRYPTOPP_BOOL_X32
543  AS1(push rbx)
544  AS1(push rbp)
545  #else
546  AS_PUSH_IF86( bx)
547  AS_PUSH_IF86( bp)
548  #endif
549 
550  #ifdef __GNUC__
551  AS2( mov AS_REG_7, WORD_REG(di))
552  #elif CRYPTOPP_BOOL_X86
553  AS2( lea AS_REG_7, s_reductionTable)
554  #endif
555 
556  AS2( movdqa xmm0, [WORD_REG(si)] )
557 
558  #define MUL_TABLE_0 WORD_REG(si) + 32
559  #define MUL_TABLE_1 WORD_REG(si) + 32 + 1024
560  #define RED_TABLE AS_REG_7
561 
562  ASL(0)
563  AS2( movdqu xmm4, [WORD_REG(cx)] )
564  AS2( pxor xmm0, xmm4 )
565 
566  AS2( movd ebx, xmm0 )
567  AS2( mov eax, AS_HEX(f0f0f0f0) )
568  AS2( and eax, ebx )
569  AS2( shl ebx, 4 )
570  AS2( and ebx, AS_HEX(f0f0f0f0) )
571  AS2( movzx edi, ah )
572  AS2( movdqa xmm5, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
573  AS2( movzx edi, al )
574  AS2( movdqa xmm4, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
575  AS2( shr eax, 16 )
576  AS2( movzx edi, ah )
577  AS2( movdqa xmm3, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
578  AS2( movzx edi, al )
579  AS2( movdqa xmm2, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
580 
581  #define SSE2_MUL_32BITS(i) \
582  AS2( psrldq xmm0, 4 )\
583  AS2( movd eax, xmm0 )\
584  AS2( and eax, AS_HEX(f0f0f0f0) )\
585  AS2( movzx edi, bh )\
586  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
587  AS2( movzx edi, bl )\
588  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
589  AS2( shr ebx, 16 )\
590  AS2( movzx edi, bh )\
591  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
592  AS2( movzx edi, bl )\
593  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
594  AS2( movd ebx, xmm0 )\
595  AS2( shl ebx, 4 )\
596  AS2( and ebx, AS_HEX(f0f0f0f0) )\
597  AS2( movzx edi, ah )\
598  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
599  AS2( movzx edi, al )\
600  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
601  AS2( shr eax, 16 )\
602  AS2( movzx edi, ah )\
603  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
604  AS2( movzx edi, al )\
605  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
606 
607  SSE2_MUL_32BITS(1)
608  SSE2_MUL_32BITS(2)
609  SSE2_MUL_32BITS(3)
610 
611  AS2( movzx edi, bh )
612  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
613  AS2( movzx edi, bl )
614  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
615  AS2( shr ebx, 16 )
616  AS2( movzx edi, bh )
617  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
618  AS2( movzx edi, bl )
619  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
620 
621  AS2( movdqa xmm0, xmm3 )
622  AS2( pslldq xmm3, 1 )
623  AS2( pxor xmm2, xmm3 )
624  AS2( movdqa xmm1, xmm2 )
625  AS2( pslldq xmm2, 1 )
626  AS2( pxor xmm5, xmm2 )
627 
628  AS2( psrldq xmm0, 15 )
629 #if USE_MOVD_REG32
630  AS2( movd edi, xmm0 )
631 #elif USE_MOV_REG32_OR_REG64
632  AS2( mov WORD_REG(di), xmm0 )
633 #else // GNU Assembler
634  AS2( movd WORD_REG(di), xmm0 )
635 #endif
636  AS2( movzx eax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
637  AS2( shl eax, 8 )
638 
639  AS2( movdqa xmm0, xmm5 )
640  AS2( pslldq xmm5, 1 )
641  AS2( pxor xmm4, xmm5 )
642 
643  AS2( psrldq xmm1, 15 )
644 #if USE_MOVD_REG32
645  AS2( movd edi, xmm1 )
646 #elif USE_MOV_REG32_OR_REG64
647  AS2( mov WORD_REG(di), xmm1 )
648 #else
649  AS2( movd WORD_REG(di), xmm1 )
650 #endif
651  AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
652  AS2( shl eax, 8 )
653 
654  AS2( psrldq xmm0, 15 )
655 #if USE_MOVD_REG32
656  AS2( movd edi, xmm0 )
657 #elif USE_MOV_REG32_OR_REG64
658  AS2( mov WORD_REG(di), xmm0 )
659 #else
660  AS2( movd WORD_REG(di), xmm0 )
661 #endif
662  AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
663 
664  AS2( movd xmm0, eax )
665  AS2( pxor xmm0, xmm4 )
666 
667  AS2( add WORD_REG(cx), 16 )
668  AS2( sub WORD_REG(dx), 1 )
669  ATT_NOPREFIX
670  ASJ( jnz, 0, b )
671  INTEL_NOPREFIX
672  AS2( movdqa [WORD_REG(si)], xmm0 )
673 
674  #if CRYPTOPP_BOOL_X32
675  AS1(pop rbp)
676  AS1(pop rbx)
677  #else
678  AS_POP_IF86( bp)
679  AS_POP_IF86( bx)
680  #endif
681 
682  #ifdef __GNUC__
683  ATT_PREFIX
684  :
685  : "c" (data), "d" (len/16), "S" (hashBuffer), "D" (s_reductionTable)
686  : "memory", "cc", "%eax"
687  #if CRYPTOPP_BOOL_X64
688  , "%ebx", "%r11"
689  #endif
690  );
691  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
692  pop rbx
693  pop rdi
694  pop rsi
695  ret
696  GCM_AuthenticateBlocks_2K ENDP
697  #endif
698 
699  return len%16;
700  }
701  case 3: // SSE2 and 64K tables
702  {
703  #ifdef __GNUC__
704  __asm__ __volatile__
705  (
706  INTEL_NOPREFIX
707  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
708  ALIGN 8
709  GCM_AuthenticateBlocks_64K PROC FRAME
710  rex_push_reg rsi
711  push_reg rdi
712  .endprolog
713  mov rsi, r8
714  #else
715  AS2( mov WORD_REG(cx), data )
716  AS2( mov WORD_REG(dx), len )
717  AS2( mov WORD_REG(si), hashBuffer )
718  AS2( shr WORD_REG(dx), 4 )
719  #endif
720 
721  AS2( movdqa xmm0, [WORD_REG(si)] )
722 
723  #undef MUL_TABLE
724  #define MUL_TABLE(i,j) WORD_REG(si) + 32 + (i*4+j)*256*16
725 
726  ASL(1)
727  AS2( movdqu xmm1, [WORD_REG(cx)] )
728  AS2( pxor xmm1, xmm0 )
729  AS2( pxor xmm0, xmm0 )
730 
731  #undef SSE2_MUL_32BITS
732  #define SSE2_MUL_32BITS(i) \
733  AS2( movd eax, xmm1 )\
734  AS2( psrldq xmm1, 4 )\
735  AS2( movzx edi, al )\
736  AS2( add WORD_REG(di), WORD_REG(di) )\
737  AS2( pxor xmm0, [MUL_TABLE(i,0) + WORD_REG(di)*8] )\
738  AS2( movzx edi, ah )\
739  AS2( add WORD_REG(di), WORD_REG(di) )\
740  AS2( pxor xmm0, [MUL_TABLE(i,1) + WORD_REG(di)*8] )\
741  AS2( shr eax, 16 )\
742  AS2( movzx edi, al )\
743  AS2( add WORD_REG(di), WORD_REG(di) )\
744  AS2( pxor xmm0, [MUL_TABLE(i,2) + WORD_REG(di)*8] )\
745  AS2( movzx edi, ah )\
746  AS2( add WORD_REG(di), WORD_REG(di) )\
747  AS2( pxor xmm0, [MUL_TABLE(i,3) + WORD_REG(di)*8] )\
748 
749  SSE2_MUL_32BITS(0)
750  SSE2_MUL_32BITS(1)
751  SSE2_MUL_32BITS(2)
752  SSE2_MUL_32BITS(3)
753 
754  AS2( add WORD_REG(cx), 16 )
755  AS2( sub WORD_REG(dx), 1 )
756  ATT_NOPREFIX
757  ASJ( jnz, 1, b )
758  INTEL_NOPREFIX
759  AS2( movdqa [WORD_REG(si)], xmm0 )
760 
761  #ifdef __GNUC__
762  ATT_PREFIX
763  :
764  : "c" (data), "d" (len/16), "S" (hashBuffer)
765  : "memory", "cc", "%edi", "%eax"
766  );
767  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
768  pop rdi
769  pop rsi
770  ret
771  GCM_AuthenticateBlocks_64K ENDP
772  #endif
773 
774  return len%16;
775  }
776 #endif
777 #ifndef CRYPTOPP_GENERATE_X64_MASM
778  }
779 
780  return len%16;
781 }
782 
783 void GCM_Base::AuthenticateLastHeaderBlock()
784 {
785  if (m_bufferedDataLength > 0)
786  {
787  memset(m_buffer+m_bufferedDataLength, 0, HASH_BLOCKSIZE-m_bufferedDataLength);
788  m_bufferedDataLength = 0;
789  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
790  }
791 }
792 
793 void GCM_Base::AuthenticateLastConfidentialBlock()
794 {
795  GCM_Base::AuthenticateLastHeaderBlock();
796  PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(m_totalHeaderLength*8)(m_totalMessageLength*8);
797  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
798 }
799 
800 void GCM_Base::AuthenticateLastFooterBlock(byte *mac, size_t macSize)
801 {
802  m_ctr.Seek(0);
803  ReverseHashBufferIfNeeded();
804  m_ctr.ProcessData(mac, HashBuffer(), macSize);
805 }
806 
807 NAMESPACE_END
808 
809 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
810 #endif
An invalid argument was detected.
Definition: cryptlib.h:200
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition: cryptlib.cpp:64
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: gcm.cpp:326
void IncrementCounterByOne(byte *inout, unsigned int size)
Performs an addition with carry on a block of bytes.
Definition: misc.h:1104
Library configuration file.
Access a block of memory.
Definition: misc.h:2398
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition: cryptlib.cpp:194
byte order is little-endian
Definition: cryptlib.h:143
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1228
Use a table with 64K entries.
Definition: gcm.h:21
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1026
const char * TableSize()
int, in bytes
Definition: argnames.h:81
bool HasCLMUL()
Determines Carryless Multiply availability.
Definition: cpu.h:173
Precompiled header file.
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:835
std::string AlgorithmName() const
Provides the name of this algorithm.
Definition: gcm.h:30
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:60
const char * BlockSize()
int, in bytes
Definition: argnames.h:27
Functions for CPU features and intrinsics.
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:114
GCM block cipher mode of operation.
Access a block of memory.
Definition: misc.h:2361
Crypto++ library namespace.
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:384
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:1834
bool HasPMULL()
Determine if an ARM processor provides Polynomial Multiplication.
Definition: cpu.h:348
bool HasNEON()
Determine if an ARM processor has Advanced SIMD available.
Definition: cpu.h:329
Interface for retrieving values given their names.
Definition: cryptlib.h:291