Crypto++  5.6.3
Free C++ class library of cryptographic schemes
cpu.cpp
1 // cpu.cpp - written and placed in the public domain by Wei Dai
2 
3 #include "pch.h"
4 #include "config.h"
5 
6 #ifndef EXCEPTION_EXECUTE_HANDLER
7 # define EXCEPTION_EXECUTE_HANDLER 1
8 #endif
9 
10 #ifndef CRYPTOPP_IMPORTS
11 
12 #include "cpu.h"
13 #include "misc.h"
14 #include <algorithm>
15 
16 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
17 #include <signal.h>
18 #include <setjmp.h>
19 #endif
20 
21 #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
22 #include <emmintrin.h>
23 #endif
24 
25 NAMESPACE_BEGIN(CryptoPP)
26 
27 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
28 
29 // MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it.
30 #define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_LLVM_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0))
31 #define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0))
32 
33 extern "C" {
34  typedef void (*SigHandler)(int);
35 };
36 #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
37 
38 #ifdef CRYPTOPP_CPUID_AVAILABLE
39 
40 #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64
41 
42 bool CpuId(word32 input, word32 output[4])
43 {
44  __cpuid((int *)output, input);
45  return true;
46 }
47 
48 #else
49 
50 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
51 extern "C"
52 {
53  static jmp_buf s_jmpNoCPUID;
54  static void SigIllHandlerCPUID(int)
55  {
56  longjmp(s_jmpNoCPUID, 1);
57  }
58 
59  static jmp_buf s_jmpNoSSE2;
60  static void SigIllHandlerSSE2(int)
61  {
62  longjmp(s_jmpNoSSE2, 1);
63  }
64 }
65 #endif
66 
67 bool CpuId(word32 input, word32 output[4])
68 {
69 #if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
70  __try
71  {
72  __asm
73  {
74  mov eax, input
75  mov ecx, 0
76  cpuid
77  mov edi, output
78  mov [edi], eax
79  mov [edi+4], ebx
80  mov [edi+8], ecx
81  mov [edi+12], edx
82  }
83  }
84  // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
85  __except (EXCEPTION_EXECUTE_HANDLER)
86  {
87  return false;
88  }
89 
90  // function 0 returns the highest basic function understood in EAX
91  if(input == 0)
92  return !!output[0];
93 
94  return true;
95 #else
96  // longjmp and clobber warnings. Volatile is required.
97  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
98  volatile bool result = true;
99 
100  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
101  if (oldHandler == SIG_ERR)
102  return false;
103 
104 # ifndef __MINGW32__
105  volatile sigset_t oldMask;
106  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
107  return false;
108 # endif
109 
110  if (setjmp(s_jmpNoCPUID))
111  result = false;
112  else
113  {
114  asm volatile
115  (
116  // save ebx in case -fPIC is being used
117  // TODO: this might need an early clobber on EDI.
118 # if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
119  "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx"
120 # else
121  "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
122 # endif
123  : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
124  : "a" (input), "c" (0)
125  );
126  }
127 
128 # ifndef __MINGW32__
129  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
130 # endif
131 
132  signal(SIGILL, oldHandler);
133  return result;
134 #endif
135 }
136 
137 #endif
138 
139 static bool TrySSE2()
140 {
141 #if CRYPTOPP_BOOL_X64
142  return true;
143 #elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
144  __try
145  {
146 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
147  AS2(por xmm0, xmm0) // executing SSE2 instruction
148 #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
149  __m128i x = _mm_setzero_si128();
150  return _mm_cvtsi128_si32(x) == 0;
151 #endif
152  }
153  // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
154  __except (EXCEPTION_EXECUTE_HANDLER)
155  {
156  return false;
157  }
158  return true;
159 #else
160  // longjmp and clobber warnings. Volatile is required.
161  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
162  volatile bool result = true;
163 
164  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
165  if (oldHandler == SIG_ERR)
166  return false;
167 
168 # ifndef __MINGW32__
169  volatile sigset_t oldMask;
170  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
171  return false;
172 # endif
173 
174  if (setjmp(s_jmpNoSSE2))
175  result = false;
176  else
177  {
178 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
179  __asm __volatile ("por %xmm0, %xmm0");
180 #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
181  __m128i x = _mm_setzero_si128();
182  result = _mm_cvtsi128_si32(x) == 0;
183 #endif
184  }
185 
186 # ifndef __MINGW32__
187  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
188 # endif
189 
190  signal(SIGILL, oldHandler);
191  return result;
192 #endif
193 }
194 
195 bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false;
196 bool CRYPTOPP_SECTION_INIT g_hasMMX = false, CRYPTOPP_SECTION_INIT g_hasISSE = false, CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
197 bool CRYPTOPP_SECTION_INIT g_hasSSE4 = false, CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_isP4 = false;
198 bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false;
199 bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false;
200 bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false;
201 word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
202 
203 static inline bool IsIntel(const word32 output[4])
204 {
205  // This is the "GenuineIntel" string
206  return (output[1] /*EBX*/ == 0x756e6547) &&
207  (output[2] /*ECX*/ == 0x6c65746e) &&
208  (output[3] /*EDX*/ == 0x49656e69);
209 }
210 
211 static inline bool IsAMD(const word32 output[4])
212 {
213  // This is the "AuthenticAMD" string. Some early K5's can return "AMDisbetter!"
214  return (output[1] /*EBX*/ == 0x68747541) &&
215  (output[2] /*ECX*/ == 0x444D4163) &&
216  (output[3] /*EDX*/ == 0x69746E65);
217 }
218 
219 static inline bool IsVIA(const word32 output[4])
220 {
221  // This is the "CentaurHauls" string. Some non-PadLock's can return "VIA VIA VIA "
222  return (output[1] /*EBX*/ == 0x746e6543) &&
223  (output[2] /*ECX*/ == 0x736c7561) &&
224  (output[3] /*EDX*/ == 0x48727561);
225 }
226 
227 #if HAVE_GCC_CONSTRUCTOR1
228 void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectX86Features()
229 #elif HAVE_GCC_CONSTRUCTOR0
230 void __attribute__ ((constructor)) DetectX86Features()
231 #else
232 void DetectX86Features()
233 #endif
234 {
235  word32 cpuid[4], cpuid1[4];
236  if (!CpuId(0, cpuid))
237  return;
238  if (!CpuId(1, cpuid1))
239  return;
240 
241  g_hasMMX = (cpuid1[3] & (1 << 23)) != 0;
242  if ((cpuid1[3] & (1 << 26)) != 0)
243  g_hasSSE2 = TrySSE2();
244  g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
245  g_hasSSE4 = g_hasSSE2 && ((cpuid1[2] & (1<<19)) && (cpuid1[2] & (1<<20)));
246  g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25));
247  g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1));
248 
249  if ((cpuid1[3] & (1 << 25)) != 0)
250  g_hasISSE = true;
251  else
252  {
253  word32 cpuid2[4];
254  CpuId(0x080000000, cpuid2);
255  if (cpuid2[0] >= 0x080000001)
256  {
257  CpuId(0x080000001, cpuid2);
258  g_hasISSE = (cpuid2[3] & (1 << 22)) != 0;
259  }
260  }
261 
262  if (IsIntel(cpuid))
263  {
264  static const unsigned int RDRAND_FLAG = (1 << 30);
265  static const unsigned int RDSEED_FLAG = (1 << 18);
266 
267  g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
268  g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1);
269  g_hasRDRAND = !!(cpuid1[2] /*ECX*/ & RDRAND_FLAG);
270 
271  if (cpuid[0] /*EAX*/ >= 7)
272  {
273  word32 cpuid3[4];
274  if (CpuId(7, cpuid3))
275  g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG);
276  }
277  }
278  else if (IsAMD(cpuid))
279  {
280  static const unsigned int RDRAND_FLAG = (1 << 30);
281 
282  CpuId(0x01, cpuid);
283  g_hasRDRAND = !!(cpuid[2] /*ECX*/ & RDRAND_FLAG);
284 
285  CpuId(0x80000005, cpuid);
286  g_cacheLineSize = GETBYTE(cpuid[2], 0);
287  }
288  else if (IsVIA(cpuid))
289  {
290  static const unsigned int RNG_FLAGS = (0x3 << 2);
291  static const unsigned int ACE_FLAGS = (0x3 << 6);
292  static const unsigned int ACE2_FLAGS = (0x3 << 8);
293  static const unsigned int PHE_FLAGS = (0x3 << 10);
294  static const unsigned int PMM_FLAGS = (0x3 << 12);
295 
296  CpuId(0xC0000000, cpuid);
297  if (cpuid[0] >= 0xC0000001)
298  {
299  // Extended features available
300  CpuId(0xC0000001, cpuid);
301  g_hasPadlockRNG = !!(cpuid[3] /*EDX*/ & RNG_FLAGS);
302  g_hasPadlockACE = !!(cpuid[3] /*EDX*/ & ACE_FLAGS);
303  g_hasPadlockACE2 = !!(cpuid[3] /*EDX*/ & ACE2_FLAGS);
304  g_hasPadlockPHE = !!(cpuid[3] /*EDX*/ & PHE_FLAGS);
305  g_hasPadlockPMM = !!(cpuid[3] /*EDX*/ & PMM_FLAGS);
306  }
307  }
308 
309  if (!g_cacheLineSize)
310  g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
311 
312  *((volatile bool*)&g_x86DetectionDone) = true;
313 }
314 
315 #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
316 
317 // The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
318 // Attempting to run the code results in a SIGILL and termination.
319 //
320 // #if defined(__arm64__) || defined(__aarch64__)
321 // word64 caps = 0; // Read ID_AA64ISAR0_EL1
322 // __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps));
323 // #elif defined(__arm__) || defined(__aarch32__)
324 // word32 caps = 0; // Read ID_ISAR5_EL1
325 // __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps));
326 // #endif
327 //
328 // The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64
329 // http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
330 //
331 bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false;
332 bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasPMULL = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false;
333 bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false;
334 word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
335 
336 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
337 extern "C"
338 {
339  static jmp_buf s_jmpNoNEON;
340  static void SigIllHandlerNEON(int)
341  {
342  longjmp(s_jmpNoNEON, 1);
343  }
344 
345  static jmp_buf s_jmpNoPMULL;
346  static void SigIllHandlerPMULL(int)
347  {
348  longjmp(s_jmpNoPMULL, 1);
349  }
350 
351  static jmp_buf s_jmpNoCRC32;
352  static void SigIllHandlerCRC32(int)
353  {
354  longjmp(s_jmpNoCRC32, 1);
355  }
356 
357  static jmp_buf s_jmpNoAES;
358  static void SigIllHandlerAES(int)
359  {
360  longjmp(s_jmpNoAES, 1);
361  }
362 
363  static jmp_buf s_jmpNoSHA1;
364  static void SigIllHandlerSHA1(int)
365  {
366  longjmp(s_jmpNoSHA1, 1);
367  }
368 
369  static jmp_buf s_jmpNoSHA2;
370  static void SigIllHandlerSHA2(int)
371  {
372  longjmp(s_jmpNoSHA2, 1);
373  }
374 };
375 #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
376 
377 static bool TryNEON()
378 {
379 #if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
380 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
381  volatile bool result = true;
382  __try
383  {
384  uint32_t v1[4] = {1,1,1,1};
385  uint32x4_t x1 = vld1q_u32(v1);
386  uint64_t v2[2] = {1,1};
387  uint64x2_t x2 = vld1q_u64(v2);
388 
389  uint32x4_t x3 = vdupq_n_u32(2);
390  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
391  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
392  uint64x2_t x4 = vdupq_n_u64(2);
393  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
394  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
395 
396  result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
397  }
398  __except (EXCEPTION_EXECUTE_HANDLER)
399  {
400  return false;
401  }
402  return result;
403 # else
404  // longjmp and clobber warnings. Volatile is required.
405  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
406  volatile bool result = true;
407 
408  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
409  if (oldHandler == SIG_ERR)
410  return false;
411 
412  volatile sigset_t oldMask;
413  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
414  return false;
415 
416  if (setjmp(s_jmpNoNEON))
417  result = false;
418  else
419  {
420  uint32_t v1[4] = {1,1,1,1};
421  uint32x4_t x1 = vld1q_u32(v1);
422  uint64_t v2[2] = {1,1};
423  uint64x2_t x2 = vld1q_u64(v2);
424 
425  uint32x4_t x3 = {0,0,0,0};
426  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
427  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
428  uint64x2_t x4 = {0,0};
429  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
430  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
431 
432  // Hack... GCC optimizes away the code and returns true
433  result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
434  }
435 
436  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
437  signal(SIGILL, oldHandler);
438  return result;
439 # endif
440 #else
441  return false;
442 #endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
443 }
444 
445 static bool TryPMULL()
446 {
447 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
448 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
449  volatile bool result = true;
450  __try
451  {
452  const poly64_t a1={2}, b1={3};
453  const poly64x2_t a2={4,5}, b2={6,7};
454  const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
455 
456  const poly128_t r1 = vmull_p64(a1, b1);
457  const poly128_t r2 = vmull_high_p64(a2, b2);
458  const poly128_t r3 = vmull_high_p64(a3, b3);
459 
460  // Also see https://github.com/weidai11/cryptopp/issues/233.
461  const uint64x2_t& t1 = vreinterpretq_u64_p128(r1); // {6,0}
462  const uint64x2_t& t2 = vreinterpretq_u64_p128(r2); // {24,0}
463  const uint64x2_t& t3 = vreinterpretq_u64_p128(r3); // {bignum,bignum}
464 
465  result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
466  vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
467  }
468  __except (EXCEPTION_EXECUTE_HANDLER)
469  {
470  return false;
471  }
472  return result;
473 # else
474  // longjmp and clobber warnings. Volatile is required.
475  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
476  volatile bool result = true;
477 
478  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
479  if (oldHandler == SIG_ERR)
480  return false;
481 
482  volatile sigset_t oldMask;
483  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
484  return false;
485 
486  if (setjmp(s_jmpNoPMULL))
487  result = false;
488  else
489  {
490  const poly64_t a1={2}, b1={3};
491  const poly64x2_t a2={4,5}, b2={6,7};
492  const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
493 
494  const poly128_t r1 = vmull_p64(a1, b1);
495  const poly128_t r2 = vmull_high_p64(a2, b2);
496  const poly128_t r3 = vmull_high_p64(a3, b3);
497 
498  // Linaro is missing vreinterpretq_u64_p128. Also see https://github.com/weidai11/cryptopp/issues/233.
499  const uint64x2_t& t1 = (uint64x2_t)(r1); // {6,0}
500  const uint64x2_t& t2 = (uint64x2_t)(r2); // {24,0}
501  const uint64x2_t& t3 = (uint64x2_t)(r3); // {bignum,bignum}
502 
503  result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
504  vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
505  }
506 
507  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
508  signal(SIGILL, oldHandler);
509  return result;
510 # endif
511 #else
512  return false;
513 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
514 }
515 
516 static bool TryCRC32()
517 {
518 #if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
519 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
520  volatile bool result = true;
521  __try
522  {
523  word32 w=0, x=1; word16 y=2; byte z=3;
524  w = __crc32cw(w,x);
525  w = __crc32ch(w,y);
526  w = __crc32cb(w,z);
527 
528  result = !!w;
529  }
530  __except (EXCEPTION_EXECUTE_HANDLER)
531  {
532  return false;
533  }
534  return result;
535 # else
536  // longjmp and clobber warnings. Volatile is required.
537  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
538  volatile bool result = true;
539 
540  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
541  if (oldHandler == SIG_ERR)
542  return false;
543 
544  volatile sigset_t oldMask;
545  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
546  return false;
547 
548  if (setjmp(s_jmpNoCRC32))
549  result = false;
550  else
551  {
552  word32 w=0, x=1; word16 y=2; byte z=3;
553  w = __crc32cw(w,x);
554  w = __crc32ch(w,y);
555  w = __crc32cb(w,z);
556 
557  // Hack... GCC optimizes away the code and returns true
558  result = !!w;
559  }
560 
561  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
562  signal(SIGILL, oldHandler);
563  return result;
564 # endif
565 #else
566  return false;
567 #endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE
568 }
569 
570 static bool TryAES()
571 {
572 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
573 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
574  volatile bool result = true;
575  __try
576  {
577  // AES encrypt and decrypt
578  uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
579  uint8x16_t r1 = vaeseq_u8(data, key);
580  uint8x16_t r2 = vaesdq_u8(data, key);
581 
582  result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
583  }
584  __except (EXCEPTION_EXECUTE_HANDLER)
585  {
586  return false;
587  }
588  return result;
589 # else
590  // longjmp and clobber warnings. Volatile is required.
591  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
592  volatile bool result = true;
593 
594  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
595  if (oldHandler == SIG_ERR)
596  return false;
597 
598  volatile sigset_t oldMask;
599  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
600  return false;
601 
602  if (setjmp(s_jmpNoAES))
603  result = false;
604  else
605  {
606  uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
607  uint8x16_t r1 = vaeseq_u8(data, key);
608  uint8x16_t r2 = vaesdq_u8(data, key);
609 
610  // Hack... GCC optimizes away the code and returns true
611  result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
612  }
613 
614  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
615  signal(SIGILL, oldHandler);
616  return result;
617 # endif
618 #else
619  return false;
620 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
621 }
622 
623 static bool TrySHA1()
624 {
625 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
626 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
627  volatile bool result = true;
628  __try
629  {
630  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
631 
632  uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
633  uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
634  uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
635  uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
636  uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
637 
638  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
639  }
640  __except (EXCEPTION_EXECUTE_HANDLER)
641  {
642  return false;
643  }
644  return result;
645 # else
646  // longjmp and clobber warnings. Volatile is required.
647  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
648  volatile bool result = true;
649 
650  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
651  if (oldHandler == SIG_ERR)
652  return false;
653 
654  volatile sigset_t oldMask;
655  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
656  return false;
657 
658  if (setjmp(s_jmpNoSHA1))
659  result = false;
660  else
661  {
662  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
663 
664  uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
665  uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
666  uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
667  uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
668  uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
669 
670  // Hack... GCC optimizes away the code and returns true
671  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
672  }
673 
674  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
675  signal(SIGILL, oldHandler);
676  return result;
677 # endif
678 #else
679  return false;
680 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
681 }
682 
683 static bool TrySHA2()
684 {
685 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
686 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
687  volatile bool result = true;
688  __try
689  {
690  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
691 
692  uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
693  uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
694  uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
695  uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
696 
697  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
698  }
699  __except (EXCEPTION_EXECUTE_HANDLER)
700  {
701  return false;
702  }
703  return result;
704 # else
705  // longjmp and clobber warnings. Volatile is required.
706  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
707  volatile bool result = true;
708 
709  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
710  if (oldHandler == SIG_ERR)
711  return false;
712 
713  volatile sigset_t oldMask;
714  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
715  return false;
716 
717  if (setjmp(s_jmpNoSHA2))
718  result = false;
719  else
720  {
721  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
722 
723  uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
724  uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
725  uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
726  uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
727 
728  // Hack... GCC optimizes away the code and returns true
729  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
730  }
731 
732  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
733  signal(SIGILL, oldHandler);
734  return result;
735 # endif
736 #else
737  return false;
738 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
739 }
740 
741 #if HAVE_GCC_CONSTRUCTOR1
742 void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures()
743 #elif HAVE_GCC_CONSTRUCTOR0
744 void __attribute__ ((constructor)) DetectArmFeatures()
745 #else
746 void DetectArmFeatures()
747 #endif
748 {
749  g_hasNEON = TryNEON();
750  g_hasPMULL = TryPMULL();
751  g_hasCRC32 = TryCRC32();
752  g_hasAES = TryAES();
753  g_hasSHA1 = TrySHA1();
754  g_hasSHA2 = TrySHA2();
755 
756  *((volatile bool*)&g_ArmDetectionDone) = true;
757 }
758 
759 #endif
760 
761 NAMESPACE_END
762 
763 #endif
Utility functions for the Crypto++ library.
Library configuration file.
Functions for CPU features and intrinsics.
Crypto++ library namespace.