Crypto++  5.6.4
Free C++ class library of cryptographic schemes
cpu.cpp
1 // cpu.cpp - written and placed in the public domain by Wei Dai
2 
3 #include "pch.h"
4 #include "config.h"
5 
6 #ifndef EXCEPTION_EXECUTE_HANDLER
7 # define EXCEPTION_EXECUTE_HANDLER 1
8 #endif
9 
10 #ifndef CRYPTOPP_IMPORTS
11 
12 #include "cpu.h"
13 #include "misc.h"
14 #include <algorithm>
15 
16 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
17 #include <signal.h>
18 #include <setjmp.h>
19 #endif
20 
21 #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
22 #include <emmintrin.h>
23 #endif
24 
25 NAMESPACE_BEGIN(CryptoPP)
26 
27 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
28 extern "C" {
29  typedef void (*SigHandler)(int);
30 };
31 #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
32 
33 #ifdef CRYPTOPP_CPUID_AVAILABLE
34 
35 #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64
36 
37 bool CpuId(word32 input, word32 output[4])
38 {
39  __cpuid((int *)output, input);
40  return true;
41 }
42 
43 #else
44 
45 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
46 extern "C"
47 {
48  static jmp_buf s_jmpNoCPUID;
49  static void SigIllHandlerCPUID(int)
50  {
51  longjmp(s_jmpNoCPUID, 1);
52  }
53 
54  static jmp_buf s_jmpNoSSE2;
55  static void SigIllHandlerSSE2(int)
56  {
57  longjmp(s_jmpNoSSE2, 1);
58  }
59 }
60 #endif
61 
62 bool CpuId(word32 input, word32 output[4])
63 {
64 #if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
65  __try
66  {
67  __asm
68  {
69  mov eax, input
70  mov ecx, 0
71  cpuid
72  mov edi, output
73  mov [edi], eax
74  mov [edi+4], ebx
75  mov [edi+8], ecx
76  mov [edi+12], edx
77  }
78  }
79  // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
80  __except (EXCEPTION_EXECUTE_HANDLER)
81  {
82  return false;
83  }
84 
85  // function 0 returns the highest basic function understood in EAX
86  if(input == 0)
87  return !!output[0];
88 
89  return true;
90 #else
91  // longjmp and clobber warnings. Volatile is required.
92  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
93  volatile bool result = true;
94 
95  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
96  if (oldHandler == SIG_ERR)
97  return false;
98 
99 # ifndef __MINGW32__
100  volatile sigset_t oldMask;
101  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
102  return false;
103 # endif
104 
105  if (setjmp(s_jmpNoCPUID))
106  result = false;
107  else
108  {
109  asm volatile
110  (
111  // save ebx in case -fPIC is being used
112  // TODO: this might need an early clobber on EDI.
113 # if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
114  "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx"
115 # else
116  "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
117 # endif
118  : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
119  : "a" (input), "c" (0)
120  );
121  }
122 
123 # ifndef __MINGW32__
124  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
125 # endif
126 
127  signal(SIGILL, oldHandler);
128  return result;
129 #endif
130 }
131 
132 #endif
133 
134 static bool TrySSE2()
135 {
136 #if CRYPTOPP_BOOL_X64
137  return true;
138 #elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
139  __try
140  {
141 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
142  AS2(por xmm0, xmm0) // executing SSE2 instruction
143 #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
144  __m128i x = _mm_setzero_si128();
145  return _mm_cvtsi128_si32(x) == 0;
146 #endif
147  }
148  // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
149  __except (EXCEPTION_EXECUTE_HANDLER)
150  {
151  return false;
152  }
153  return true;
154 #else
155  // longjmp and clobber warnings. Volatile is required.
156  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
157  volatile bool result = true;
158 
159  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
160  if (oldHandler == SIG_ERR)
161  return false;
162 
163 # ifndef __MINGW32__
164  volatile sigset_t oldMask;
165  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
166  return false;
167 # endif
168 
169  if (setjmp(s_jmpNoSSE2))
170  result = false;
171  else
172  {
173 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
174  __asm __volatile ("por %xmm0, %xmm0");
175 #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
176  __m128i x = _mm_setzero_si128();
177  result = _mm_cvtsi128_si32(x) == 0;
178 #endif
179  }
180 
181 # ifndef __MINGW32__
182  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
183 # endif
184 
185  signal(SIGILL, oldHandler);
186  return result;
187 #endif
188 }
189 
190 bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false;
191 bool CRYPTOPP_SECTION_INIT g_hasMMX = false, CRYPTOPP_SECTION_INIT g_hasISSE = false, CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
192 bool CRYPTOPP_SECTION_INIT g_hasSSE4 = false, CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_isP4 = false;
193 bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false;
194 bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false;
195 bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false;
196 word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
197 
198 static inline bool IsIntel(const word32 output[4])
199 {
200  // This is the "GenuineIntel" string
201  return (output[1] /*EBX*/ == 0x756e6547) &&
202  (output[2] /*ECX*/ == 0x6c65746e) &&
203  (output[3] /*EDX*/ == 0x49656e69);
204 }
205 
206 static inline bool IsAMD(const word32 output[4])
207 {
208  // This is the "AuthenticAMD" string. Some early K5's can return "AMDisbetter!"
209  return (output[1] /*EBX*/ == 0x68747541) &&
210  (output[2] /*ECX*/ == 0x444D4163) &&
211  (output[3] /*EDX*/ == 0x69746E65);
212 }
213 
214 static inline bool IsVIA(const word32 output[4])
215 {
216  // This is the "CentaurHauls" string. Some non-PadLock's can return "VIA VIA VIA "
217  return (output[1] /*EBX*/ == 0x746e6543) &&
218  (output[2] /*ECX*/ == 0x736c7561) &&
219  (output[3] /*EDX*/ == 0x48727561);
220 }
221 
222 #if HAVE_GCC_CONSTRUCTOR1
223 void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectX86Features()
224 #elif HAVE_GCC_CONSTRUCTOR0
225 void __attribute__ ((constructor)) DetectX86Features()
226 #else
227 void DetectX86Features()
228 #endif
229 {
230  word32 cpuid[4], cpuid1[4];
231  if (!CpuId(0, cpuid))
232  return;
233  if (!CpuId(1, cpuid1))
234  return;
235 
236  g_hasMMX = (cpuid1[3] & (1 << 23)) != 0;
237  if ((cpuid1[3] & (1 << 26)) != 0)
238  g_hasSSE2 = TrySSE2();
239  g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
240  g_hasSSE4 = g_hasSSE2 && ((cpuid1[2] & (1<<19)) && (cpuid1[2] & (1<<20)));
241  g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25));
242  g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1));
243 
244  if ((cpuid1[3] & (1 << 25)) != 0)
245  g_hasISSE = true;
246  else
247  {
248  word32 cpuid2[4];
249  CpuId(0x080000000, cpuid2);
250  if (cpuid2[0] >= 0x080000001)
251  {
252  CpuId(0x080000001, cpuid2);
253  g_hasISSE = (cpuid2[3] & (1 << 22)) != 0;
254  }
255  }
256 
257  if (IsIntel(cpuid))
258  {
259  static const unsigned int RDRAND_FLAG = (1 << 30);
260  static const unsigned int RDSEED_FLAG = (1 << 18);
261 
262  g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
263  g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1);
264  g_hasRDRAND = !!(cpuid1[2] /*ECX*/ & RDRAND_FLAG);
265 
266  if (cpuid[0] /*EAX*/ >= 7)
267  {
268  word32 cpuid3[4];
269  if (CpuId(7, cpuid3))
270  g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG);
271  }
272  }
273  else if (IsAMD(cpuid))
274  {
275  static const unsigned int RDRAND_FLAG = (1 << 30);
276 
277  CpuId(0x01, cpuid);
278  g_hasRDRAND = !!(cpuid[2] /*ECX*/ & RDRAND_FLAG);
279 
280  CpuId(0x80000005, cpuid);
281  g_cacheLineSize = GETBYTE(cpuid[2], 0);
282  }
283  else if (IsVIA(cpuid))
284  {
285  static const unsigned int RNG_FLAGS = (0x3 << 2);
286  static const unsigned int ACE_FLAGS = (0x3 << 6);
287  static const unsigned int ACE2_FLAGS = (0x3 << 8);
288  static const unsigned int PHE_FLAGS = (0x3 << 10);
289  static const unsigned int PMM_FLAGS = (0x3 << 12);
290 
291  CpuId(0xC0000000, cpuid);
292  if (cpuid[0] >= 0xC0000001)
293  {
294  // Extended features available
295  CpuId(0xC0000001, cpuid);
296  g_hasPadlockRNG = !!(cpuid[3] /*EDX*/ & RNG_FLAGS);
297  g_hasPadlockACE = !!(cpuid[3] /*EDX*/ & ACE_FLAGS);
298  g_hasPadlockACE2 = !!(cpuid[3] /*EDX*/ & ACE2_FLAGS);
299  g_hasPadlockPHE = !!(cpuid[3] /*EDX*/ & PHE_FLAGS);
300  g_hasPadlockPMM = !!(cpuid[3] /*EDX*/ & PMM_FLAGS);
301  }
302  }
303 
304  if (!g_cacheLineSize)
305  g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
306 
307  *((volatile bool*)&g_x86DetectionDone) = true;
308 }
309 
310 #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
311 
312 // The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
313 // Attempting to run the code results in a SIGILL and termination.
314 //
315 // #if defined(__arm64__) || defined(__aarch64__)
316 // word64 caps = 0; // Read ID_AA64ISAR0_EL1
317 // __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps));
318 // #elif defined(__arm__) || defined(__aarch32__)
319 // word32 caps = 0; // Read ID_ISAR5_EL1
320 // __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps));
321 // #endif
322 //
323 // The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64
324 // http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
325 //
326 bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false;
327 bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasPMULL = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false;
328 bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false;
329 word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
330 
331 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
332 extern "C"
333 {
334  static jmp_buf s_jmpNoNEON;
335  static void SigIllHandlerNEON(int)
336  {
337  longjmp(s_jmpNoNEON, 1);
338  }
339 
340  static jmp_buf s_jmpNoPMULL;
341  static void SigIllHandlerPMULL(int)
342  {
343  longjmp(s_jmpNoPMULL, 1);
344  }
345 
346  static jmp_buf s_jmpNoCRC32;
347  static void SigIllHandlerCRC32(int)
348  {
349  longjmp(s_jmpNoCRC32, 1);
350  }
351 
352  static jmp_buf s_jmpNoAES;
353  static void SigIllHandlerAES(int)
354  {
355  longjmp(s_jmpNoAES, 1);
356  }
357 
358  static jmp_buf s_jmpNoSHA1;
359  static void SigIllHandlerSHA1(int)
360  {
361  longjmp(s_jmpNoSHA1, 1);
362  }
363 
364  static jmp_buf s_jmpNoSHA2;
365  static void SigIllHandlerSHA2(int)
366  {
367  longjmp(s_jmpNoSHA2, 1);
368  }
369 };
370 #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
371 
372 static bool TryNEON()
373 {
374 #if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
375 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
376  volatile bool result = true;
377  __try
378  {
379  uint32_t v1[4] = {1,1,1,1};
380  uint32x4_t x1 = vld1q_u32(v1);
381  uint64_t v2[2] = {1,1};
382  uint64x2_t x2 = vld1q_u64(v2);
383 
384  uint32x4_t x3 = vdupq_n_u32(2);
385  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
386  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
387  uint64x2_t x4 = vdupq_n_u64(2);
388  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
389  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
390 
391  result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
392  }
393  __except (EXCEPTION_EXECUTE_HANDLER)
394  {
395  return false;
396  }
397  return result;
398 # else
399  // longjmp and clobber warnings. Volatile is required.
400  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
401  volatile bool result = true;
402 
403  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
404  if (oldHandler == SIG_ERR)
405  return false;
406 
407  volatile sigset_t oldMask;
408  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
409  return false;
410 
411  if (setjmp(s_jmpNoNEON))
412  result = false;
413  else
414  {
415  uint32_t v1[4] = {1,1,1,1};
416  uint32x4_t x1 = vld1q_u32(v1);
417  uint64_t v2[2] = {1,1};
418  uint64x2_t x2 = vld1q_u64(v2);
419 
420  uint32x4_t x3 = {0,0,0,0};
421  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
422  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
423  uint64x2_t x4 = {0,0};
424  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
425  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
426 
427  // Hack... GCC optimizes away the code and returns true
428  result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
429  }
430 
431  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
432  signal(SIGILL, oldHandler);
433  return result;
434 # endif
435 #else
436  return false;
437 #endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
438 }
439 
440 static bool TryPMULL()
441 {
442 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
443 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
444  volatile bool result = true;
445  __try
446  {
447  const poly64_t a1={2}, b1={3};
448  const poly64x2_t a2={4,5}, b2={6,7};
449  const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
450 
451  const poly128_t r1 = vmull_p64(a1, b1);
452  const poly128_t r2 = vmull_high_p64(a2, b2);
453  const poly128_t r3 = vmull_high_p64(a3, b3);
454 
455  // Also see https://github.com/weidai11/cryptopp/issues/233.
456  const uint64x2_t& t1 = vreinterpretq_u64_p128(r1); // {6,0}
457  const uint64x2_t& t2 = vreinterpretq_u64_p128(r2); // {24,0}
458  const uint64x2_t& t3 = vreinterpretq_u64_p128(r3); // {bignum,bignum}
459 
460  result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
461  vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
462  }
463  __except (EXCEPTION_EXECUTE_HANDLER)
464  {
465  return false;
466  }
467  return result;
468 # else
469  // longjmp and clobber warnings. Volatile is required.
470  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
471  volatile bool result = true;
472 
473  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
474  if (oldHandler == SIG_ERR)
475  return false;
476 
477  volatile sigset_t oldMask;
478  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
479  return false;
480 
481  if (setjmp(s_jmpNoPMULL))
482  result = false;
483  else
484  {
485  const poly64_t a1={2}, b1={3};
486  const poly64x2_t a2={4,5}, b2={6,7};
487  const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
488 
489  const poly128_t r1 = vmull_p64(a1, b1);
490  const poly128_t r2 = vmull_high_p64(a2, b2);
491  const poly128_t r3 = vmull_high_p64(a3, b3);
492 
493  // Linaro is missing vreinterpretq_u64_p128. Also see https://github.com/weidai11/cryptopp/issues/233.
494  const uint64x2_t& t1 = (uint64x2_t)(r1); // {6,0}
495  const uint64x2_t& t2 = (uint64x2_t)(r2); // {24,0}
496  const uint64x2_t& t3 = (uint64x2_t)(r3); // {bignum,bignum}
497 
498  result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
499  vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
500  }
501 
502  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
503  signal(SIGILL, oldHandler);
504  return result;
505 # endif
506 #else
507  return false;
508 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
509 }
510 
511 static bool TryCRC32()
512 {
513 #if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
514 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
515  volatile bool result = true;
516  __try
517  {
518  word32 w=0, x=1; word16 y=2; byte z=3;
519  w = __crc32cw(w,x);
520  w = __crc32ch(w,y);
521  w = __crc32cb(w,z);
522 
523  result = !!w;
524  }
525  __except (EXCEPTION_EXECUTE_HANDLER)
526  {
527  return false;
528  }
529  return result;
530 # else
531  // longjmp and clobber warnings. Volatile is required.
532  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
533  volatile bool result = true;
534 
535  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
536  if (oldHandler == SIG_ERR)
537  return false;
538 
539  volatile sigset_t oldMask;
540  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
541  return false;
542 
543  if (setjmp(s_jmpNoCRC32))
544  result = false;
545  else
546  {
547  word32 w=0, x=1; word16 y=2; byte z=3;
548  w = __crc32cw(w,x);
549  w = __crc32ch(w,y);
550  w = __crc32cb(w,z);
551 
552  // Hack... GCC optimizes away the code and returns true
553  result = !!w;
554  }
555 
556  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
557  signal(SIGILL, oldHandler);
558  return result;
559 # endif
560 #else
561  return false;
562 #endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE
563 }
564 
565 static bool TryAES()
566 {
567 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
568 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
569  volatile bool result = true;
570  __try
571  {
572  // AES encrypt and decrypt
573  uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
574  uint8x16_t r1 = vaeseq_u8(data, key);
575  uint8x16_t r2 = vaesdq_u8(data, key);
576 
577  result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
578  }
579  __except (EXCEPTION_EXECUTE_HANDLER)
580  {
581  return false;
582  }
583  return result;
584 # else
585  // longjmp and clobber warnings. Volatile is required.
586  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
587  volatile bool result = true;
588 
589  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
590  if (oldHandler == SIG_ERR)
591  return false;
592 
593  volatile sigset_t oldMask;
594  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
595  return false;
596 
597  if (setjmp(s_jmpNoAES))
598  result = false;
599  else
600  {
601  uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
602  uint8x16_t r1 = vaeseq_u8(data, key);
603  uint8x16_t r2 = vaesdq_u8(data, key);
604 
605  // Hack... GCC optimizes away the code and returns true
606  result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
607  }
608 
609  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
610  signal(SIGILL, oldHandler);
611  return result;
612 # endif
613 #else
614  return false;
615 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
616 }
617 
618 static bool TrySHA1()
619 {
620 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
621 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
622  volatile bool result = true;
623  __try
624  {
625  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
626 
627  uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
628  uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
629  uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
630  uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
631  uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
632 
633  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
634  }
635  __except (EXCEPTION_EXECUTE_HANDLER)
636  {
637  return false;
638  }
639  return result;
640 # else
641  // longjmp and clobber warnings. Volatile is required.
642  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
643  volatile bool result = true;
644 
645  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
646  if (oldHandler == SIG_ERR)
647  return false;
648 
649  volatile sigset_t oldMask;
650  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
651  return false;
652 
653  if (setjmp(s_jmpNoSHA1))
654  result = false;
655  else
656  {
657  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
658 
659  uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
660  uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
661  uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
662  uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
663  uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
664 
665  // Hack... GCC optimizes away the code and returns true
666  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
667  }
668 
669  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
670  signal(SIGILL, oldHandler);
671  return result;
672 # endif
673 #else
674  return false;
675 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
676 }
677 
678 static bool TrySHA2()
679 {
680 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
681 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
682  volatile bool result = true;
683  __try
684  {
685  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
686 
687  uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
688  uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
689  uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
690  uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
691 
692  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
693  }
694  __except (EXCEPTION_EXECUTE_HANDLER)
695  {
696  return false;
697  }
698  return result;
699 # else
700  // longjmp and clobber warnings. Volatile is required.
701  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
702  volatile bool result = true;
703 
704  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
705  if (oldHandler == SIG_ERR)
706  return false;
707 
708  volatile sigset_t oldMask;
709  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
710  return false;
711 
712  if (setjmp(s_jmpNoSHA2))
713  result = false;
714  else
715  {
716  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
717 
718  uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
719  uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
720  uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
721  uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
722 
723  // Hack... GCC optimizes away the code and returns true
724  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
725  }
726 
727  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
728  signal(SIGILL, oldHandler);
729  return result;
730 # endif
731 #else
732  return false;
733 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
734 }
735 
736 #if HAVE_GCC_CONSTRUCTOR1
737 void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures()
738 #elif HAVE_GCC_CONSTRUCTOR0
739 void __attribute__ ((constructor)) DetectArmFeatures()
740 #else
741 void DetectArmFeatures()
742 #endif
743 {
744  g_hasNEON = TryNEON();
745  g_hasPMULL = TryPMULL();
746  g_hasCRC32 = TryCRC32();
747  g_hasAES = TryAES();
748  g_hasSHA1 = TrySHA1();
749  g_hasSHA2 = TrySHA2();
750 
751  *((volatile bool*)&g_ArmDetectionDone) = true;
752 }
753 
754 #endif
755 
756 NAMESPACE_END
757 
758 #endif
Utility functions for the Crypto++ library.
Library configuration file.
Functions for CPU features and intrinsics.
Crypto++ library namespace.