Crypto++  5.6.5
Free C++ class library of cryptographic schemes
cpu.cpp
1 // cpu.cpp - originally written and placed in the public domain by Wei Dai
2 
3 #include "pch.h"
4 #include "config.h"
5 
6 #ifndef EXCEPTION_EXECUTE_HANDLER
7 # define EXCEPTION_EXECUTE_HANDLER 1
8 #endif
9 
10 #ifndef CRYPTOPP_IMPORTS
11 
12 #include "cpu.h"
13 #include "misc.h"
14 #include <algorithm>
15 
16 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
17 #include <signal.h>
18 #include <setjmp.h>
19 #endif
20 
21 NAMESPACE_BEGIN(CryptoPP)
22 
23 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
24 extern "C" {
25  typedef void (*SigHandler)(int);
26 };
27 #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
28 
29 #ifdef CRYPTOPP_CPUID_AVAILABLE
30 
31 #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64
32 
33 bool CpuId(word32 input, word32 output[4])
34 {
35  __cpuid((int *)output, input);
36  return true;
37 }
38 
39 #else
40 
41 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
42 extern "C"
43 {
44  static jmp_buf s_jmpNoCPUID;
45  static void SigIllHandlerCPUID(int)
46  {
47  longjmp(s_jmpNoCPUID, 1);
48  }
49 
50  static jmp_buf s_jmpNoSSE2;
51  static void SigIllHandlerSSE2(int)
52  {
53  longjmp(s_jmpNoSSE2, 1);
54  }
55 }
56 #endif
57 
58 bool CpuId(word32 input, word32 output[4])
59 {
60 #if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
61  __try
62  {
63  __asm
64  {
65  mov eax, input
66  mov ecx, 0
67  cpuid
68  mov edi, output
69  mov [edi], eax
70  mov [edi+4], ebx
71  mov [edi+8], ecx
72  mov [edi+12], edx
73  }
74  }
75  // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
76  __except (EXCEPTION_EXECUTE_HANDLER)
77  {
78  return false;
79  }
80 
81  // function 0 returns the highest basic function understood in EAX
82  if(input == 0)
83  return !!output[0];
84 
85  return true;
86 #else
87  // longjmp and clobber warnings. Volatile is required.
88  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
89  volatile bool result = true;
90 
91  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
92  if (oldHandler == SIG_ERR)
93  return false;
94 
95 # ifndef __MINGW32__
96  volatile sigset_t oldMask;
97  if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
98  return false;
99 # endif
100 
101  if (setjmp(s_jmpNoCPUID))
102  result = false;
103  else
104  {
105  asm volatile
106  (
107  // save ebx in case -fPIC is being used
108  // TODO: this might need an early clobber on EDI.
109 # if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
110  "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx"
111 # else
112  "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
113 # endif
114  : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
115  : "a" (input), "c" (0)
116  : "cc"
117  );
118  }
119 
120 # ifndef __MINGW32__
121  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
122 # endif
123 
124  signal(SIGILL, oldHandler);
125  return result;
126 #endif
127 }
128 
129 #endif
130 
131 static bool TrySSE2()
132 {
133 #if CRYPTOPP_BOOL_X64
134  return true;
135 #elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
136  __try
137  {
138 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
139  AS2(por xmm0, xmm0) // executing SSE2 instruction
140 #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
141  __m128i x = _mm_setzero_si128();
142  return _mm_cvtsi128_si32(x) == 0;
143 #endif
144  }
145  // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
146  __except (EXCEPTION_EXECUTE_HANDLER)
147  {
148  return false;
149  }
150  return true;
151 #else
152  // longjmp and clobber warnings. Volatile is required.
153  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
154  volatile bool result = true;
155 
156  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
157  if (oldHandler == SIG_ERR)
158  return false;
159 
160 # ifndef __MINGW32__
161  volatile sigset_t oldMask;
162  if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
163  return false;
164 # endif
165 
166  if (setjmp(s_jmpNoSSE2))
167  result = false;
168  else
169  {
170 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
171  __asm __volatile ("por %xmm0, %xmm0");
172 #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
173  __m128i x = _mm_setzero_si128();
174  result = _mm_cvtsi128_si32(x) == 0;
175 #endif
176  }
177 
178 # ifndef __MINGW32__
179  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
180 # endif
181 
182  signal(SIGILL, oldHandler);
183  return result;
184 #endif
185 }
186 
187 bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false;
188 bool CRYPTOPP_SECTION_INIT g_hasMMX = false, CRYPTOPP_SECTION_INIT g_hasISSE = false, CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
189 bool CRYPTOPP_SECTION_INIT g_hasSSE4 = false, CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_hasSHA = false;
190 bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false, CRYPTOPP_SECTION_INIT g_isP4 = false;
191 bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false;
192 bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false;
193 word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
194 
195 static inline bool IsIntel(const word32 output[4])
196 {
197  // This is the "GenuineIntel" string
198  return (output[1] /*EBX*/ == 0x756e6547) &&
199  (output[2] /*ECX*/ == 0x6c65746e) &&
200  (output[3] /*EDX*/ == 0x49656e69);
201 }
202 
203 static inline bool IsAMD(const word32 output[4])
204 {
205  // This is the "AuthenticAMD" string. Some early K5's can return "AMDisbetter!"
206  return (output[1] /*EBX*/ == 0x68747541) &&
207  (output[2] /*ECX*/ == 0x444D4163) &&
208  (output[3] /*EDX*/ == 0x69746E65);
209 }
210 
211 static inline bool IsVIA(const word32 output[4])
212 {
213  // This is the "CentaurHauls" string. Some non-PadLock's can return "VIA VIA VIA "
214  return (output[1] /*EBX*/ == 0x746e6543) &&
215  (output[2] /*ECX*/ == 0x736c7561) &&
216  (output[3] /*EDX*/ == 0x48727561);
217 }
218 
219 void DetectX86Features()
220 {
221  // Coverity finding CID 171239...
222  word32 cpuid1[4]={0}, cpuid2[4]={0}, cpuid3[4]={0};
223  if (!CpuId(0, cpuid1))
224  return;
225  if (!CpuId(1, cpuid2))
226  return;
227 
228  g_hasMMX = (cpuid2[3] & (1 << 23)) != 0;
229  if ((cpuid2[3] & (1 << 26)) != 0)
230  g_hasSSE2 = TrySSE2();
231  g_hasSSSE3 = g_hasSSE2 && (cpuid2[2] & (1<<9));
232  g_hasSSE4 = g_hasSSE2 && ((cpuid2[2] & (1<<19)) && (cpuid2[2] & (1<<20)));
233  g_hasAESNI = g_hasSSE2 && (cpuid2[2] & (1<<25));
234  g_hasCLMUL = g_hasSSE2 && (cpuid2[2] & (1<<1));
235 
236  if ((cpuid2[3] & (1 << 25)) != 0)
237  g_hasISSE = true;
238  else
239  {
240  CpuId(0x080000000, cpuid3);
241  if (cpuid3[0] >= 0x080000001)
242  {
243  CpuId(0x080000001, cpuid3);
244  g_hasISSE = (cpuid3[3] & (1 << 22)) != 0;
245  }
246  }
247 
248  if (IsIntel(cpuid1))
249  {
250  static const unsigned int RDRAND_FLAG = (1 << 30);
251  static const unsigned int RDSEED_FLAG = (1 << 18);
252  static const unsigned int SHA_FLAG = (1 << 29);
253 
254  g_isP4 = ((cpuid2[0] >> 8) & 0xf) == 0xf;
255  g_cacheLineSize = 8 * GETBYTE(cpuid2[1], 1);
256  g_hasRDRAND = !!(cpuid2[2] /*ECX*/ & RDRAND_FLAG);
257 
258  if (cpuid1[0] /*EAX*/ >= 7)
259  {
260  if (CpuId(7, cpuid3))
261  {
262  g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG);
263  g_hasSHA = !!(cpuid3[1] /*EBX*/ & SHA_FLAG);
264  }
265  }
266  }
267  else if (IsAMD(cpuid1))
268  {
269  static const unsigned int RDRAND_FLAG = (1 << 30);
270  static const unsigned int RDSEED_FLAG = (1 << 18);
271  static const unsigned int SHA_FLAG = (1 << 29);
272 
273  CpuId(0x01, cpuid1);
274  g_hasRDRAND = !!(cpuid1[2] /*ECX*/ & RDRAND_FLAG);
275 
276  if (cpuid1[0] /*EAX*/ >= 7)
277  {
278  if (CpuId(7, cpuid3))
279  {
280  g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG);
281  g_hasSHA = !!(cpuid3[1] /*EBX*/ & SHA_FLAG);
282  }
283  }
284 
285  CpuId(0x80000005, cpuid1);
286  g_cacheLineSize = GETBYTE(cpuid1[2], 0);
287  }
288  else if (IsVIA(cpuid1))
289  {
290  static const unsigned int RNG_FLAGS = (0x3 << 2);
291  static const unsigned int ACE_FLAGS = (0x3 << 6);
292  static const unsigned int ACE2_FLAGS = (0x3 << 8);
293  static const unsigned int PHE_FLAGS = (0x3 << 10);
294  static const unsigned int PMM_FLAGS = (0x3 << 12);
295 
296  CpuId(0xC0000000, cpuid1);
297  if (cpuid1[0] >= 0xC0000001)
298  {
299  // Extended features available
300  CpuId(0xC0000001, cpuid1);
301  g_hasPadlockRNG = !!(cpuid1[3] /*EDX*/ & RNG_FLAGS);
302  g_hasPadlockACE = !!(cpuid1[3] /*EDX*/ & ACE_FLAGS);
303  g_hasPadlockACE2 = !!(cpuid1[3] /*EDX*/ & ACE2_FLAGS);
304  g_hasPadlockPHE = !!(cpuid1[3] /*EDX*/ & PHE_FLAGS);
305  g_hasPadlockPMM = !!(cpuid1[3] /*EDX*/ & PMM_FLAGS);
306  }
307  }
308 
309  if (!g_cacheLineSize)
310  g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
311 
312  g_x86DetectionDone = true;
313 }
314 
315 #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
316 
317 // The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
318 // Attempting to run the code results in a SIGILL and termination.
319 //
320 // #if defined(__arm64__) || defined(__aarch64__)
321 // word64 caps = 0; // Read ID_AA64ISAR0_EL1
322 // __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps));
323 // #elif defined(__arm__) || defined(__aarch32__)
324 // word32 caps = 0; // Read ID_ISAR5_EL1
325 // __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps));
326 // #endif
327 //
328 // The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64
329 // http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
330 //
331 bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false;
332 bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasPMULL = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false;
333 bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false;
334 word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
335 
336 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
337 extern "C"
338 {
339  static jmp_buf s_jmpNoNEON;
340  static void SigIllHandlerNEON(int)
341  {
342  longjmp(s_jmpNoNEON, 1);
343  }
344 
345  static jmp_buf s_jmpNoPMULL;
346  static void SigIllHandlerPMULL(int)
347  {
348  longjmp(s_jmpNoPMULL, 1);
349  }
350 
351  static jmp_buf s_jmpNoCRC32;
352  static void SigIllHandlerCRC32(int)
353  {
354  longjmp(s_jmpNoCRC32, 1);
355  }
356 
357  static jmp_buf s_jmpNoAES;
358  static void SigIllHandlerAES(int)
359  {
360  longjmp(s_jmpNoAES, 1);
361  }
362 
363  static jmp_buf s_jmpNoSHA1;
364  static void SigIllHandlerSHA1(int)
365  {
366  longjmp(s_jmpNoSHA1, 1);
367  }
368 
369  static jmp_buf s_jmpNoSHA2;
370  static void SigIllHandlerSHA2(int)
371  {
372  longjmp(s_jmpNoSHA2, 1);
373  }
374 };
375 #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
376 
377 static bool TryNEON()
378 {
379 #if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
380 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
381  volatile bool result = true;
382  __try
383  {
384  uint32_t v1[4] = {1,1,1,1};
385  uint32x4_t x1 = vld1q_u32(v1);
386  uint64_t v2[2] = {1,1};
387  uint64x2_t x2 = vld1q_u64(v2);
388 
389  uint32x4_t x3 = vdupq_n_u32(2);
390  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
391  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
392  uint64x2_t x4 = vdupq_n_u64(2);
393  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
394  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
395 
396  result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
397  }
398  __except (EXCEPTION_EXECUTE_HANDLER)
399  {
400  return false;
401  }
402  return result;
403 # else
404  // longjmp and clobber warnings. Volatile is required.
405  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
406  volatile bool result = true;
407 
408  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
409  if (oldHandler == SIG_ERR)
410  return false;
411 
412  volatile sigset_t oldMask;
413  if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
414  return false;
415 
416  if (setjmp(s_jmpNoNEON))
417  result = false;
418  else
419  {
420  uint32_t v1[4] = {1,1,1,1};
421  uint32x4_t x1 = vld1q_u32(v1);
422  uint64_t v2[2] = {1,1};
423  uint64x2_t x2 = vld1q_u64(v2);
424 
425  uint32x4_t x3 = {0,0,0,0};
426  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
427  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
428  uint64x2_t x4 = {0,0};
429  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
430  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
431 
432  // Hack... GCC optimizes away the code and returns true
433  result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
434  }
435 
436  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
437  signal(SIGILL, oldHandler);
438  return result;
439 # endif
440 #else
441  return false;
442 #endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
443 }
444 
445 static bool TryPMULL()
446 {
447 #if (CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE)
448 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
449  volatile bool result = true;
450  __try
451  {
452  const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
453  const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
454  b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
455 
456  const poly128_t r1 = vmull_p64(a1, b1);
457  const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
458 
459  // Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
460  const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
461  const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum}
462 
463  result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
464  vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
465  }
466  __except (EXCEPTION_EXECUTE_HANDLER)
467  {
468  return false;
469  }
470  return result;
471 # else
472  // longjmp and clobber warnings. Volatile is required.
473  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
474  volatile bool result = true;
475 
476  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
477  if (oldHandler == SIG_ERR)
478  return false;
479 
480  volatile sigset_t oldMask;
481  if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
482  return false;
483 
484  if (setjmp(s_jmpNoPMULL))
485  result = false;
486  else
487  {
488  const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
489  const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
490  b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
491 
492  const poly128_t r1 = vmull_p64(a1, b1);
493  const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
494 
495  // Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
496  const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
497  const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum}
498 
499  result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
500  vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
501  }
502 
503  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
504  signal(SIGILL, oldHandler);
505  return result;
506 # endif
507 #else
508  return false;
509 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
510 }
511 
512 static bool TryCRC32()
513 {
514 #if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
515 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
516  volatile bool result = true;
517  __try
518  {
519  word32 w=0, x=1; word16 y=2; byte z=3;
520  w = __crc32cw(w,x);
521  w = __crc32ch(w,y);
522  w = __crc32cb(w,z);
523 
524  result = !!w;
525  }
526  __except (EXCEPTION_EXECUTE_HANDLER)
527  {
528  return false;
529  }
530  return result;
531 # else
532  // longjmp and clobber warnings. Volatile is required.
533  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
534  volatile bool result = true;
535 
536  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
537  if (oldHandler == SIG_ERR)
538  return false;
539 
540  volatile sigset_t oldMask;
541  if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
542  return false;
543 
544  if (setjmp(s_jmpNoCRC32))
545  result = false;
546  else
547  {
548  word32 w=0, x=1; word16 y=2; byte z=3;
549  w = __crc32cw(w,x);
550  w = __crc32ch(w,y);
551  w = __crc32cb(w,z);
552 
553  // Hack... GCC optimizes away the code and returns true
554  result = !!w;
555  }
556 
557  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
558  signal(SIGILL, oldHandler);
559  return result;
560 # endif
561 #else
562  return false;
563 #endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE
564 }
565 
566 static bool TryAES()
567 {
568 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
569 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
570  volatile bool result = true;
571  __try
572  {
573  // AES encrypt and decrypt
574  uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
575  uint8x16_t r1 = vaeseq_u8(data, key);
576  uint8x16_t r2 = vaesdq_u8(data, key);
577 
578  result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
579  }
580  __except (EXCEPTION_EXECUTE_HANDLER)
581  {
582  return false;
583  }
584  return result;
585 # else
586  // longjmp and clobber warnings. Volatile is required.
587  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
588  volatile bool result = true;
589 
590  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
591  if (oldHandler == SIG_ERR)
592  return false;
593 
594  volatile sigset_t oldMask;
595  if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
596  return false;
597 
598  if (setjmp(s_jmpNoAES))
599  result = false;
600  else
601  {
602  uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
603  uint8x16_t r1 = vaeseq_u8(data, key);
604  uint8x16_t r2 = vaesdq_u8(data, key);
605 
606  // Hack... GCC optimizes away the code and returns true
607  result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
608  }
609 
610  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
611  signal(SIGILL, oldHandler);
612  return result;
613 # endif
614 #else
615  return false;
616 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
617 }
618 
619 static bool TrySHA1()
620 {
621 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
622 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
623  volatile bool result = true;
624  __try
625  {
626  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
627 
628  uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
629  uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
630  uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
631  uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
632  uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
633 
634  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
635  }
636  __except (EXCEPTION_EXECUTE_HANDLER)
637  {
638  return false;
639  }
640  return result;
641 # else
642  // longjmp and clobber warnings. Volatile is required.
643  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
644  volatile bool result = true;
645 
646  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
647  if (oldHandler == SIG_ERR)
648  return false;
649 
650  volatile sigset_t oldMask;
651  if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
652  return false;
653 
654  if (setjmp(s_jmpNoSHA1))
655  result = false;
656  else
657  {
658  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
659 
660  uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
661  uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
662  uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
663  uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
664  uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
665 
666  // Hack... GCC optimizes away the code and returns true
667  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
668  }
669 
670  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
671  signal(SIGILL, oldHandler);
672  return result;
673 # endif
674 #else
675  return false;
676 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
677 }
678 
679 static bool TrySHA2()
680 {
681 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
682 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
683  volatile bool result = true;
684  __try
685  {
686  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
687 
688  uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
689  uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
690  uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
691  uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
692 
693  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
694  }
695  __except (EXCEPTION_EXECUTE_HANDLER)
696  {
697  return false;
698  }
699  return result;
700 # else
701  // longjmp and clobber warnings. Volatile is required.
702  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
703  volatile bool result = true;
704 
705  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
706  if (oldHandler == SIG_ERR)
707  return false;
708 
709  volatile sigset_t oldMask;
710  if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
711  return false;
712 
713  if (setjmp(s_jmpNoSHA2))
714  result = false;
715  else
716  {
717  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
718 
719  uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
720  uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
721  uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
722  uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
723 
724  // Hack... GCC optimizes away the code and returns true
725  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
726  }
727 
728  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
729  signal(SIGILL, oldHandler);
730  return result;
731 # endif
732 #else
733  return false;
734 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
735 }
736 
737 void DetectArmFeatures()
738 {
739  g_hasNEON = TryNEON();
740  g_hasPMULL = TryPMULL();
741  g_hasCRC32 = TryCRC32();
742  g_hasAES = TryAES();
743  g_hasSHA1 = TrySHA1();
744  g_hasSHA2 = TrySHA2();
745 
746  g_ArmDetectionDone = true;
747 }
748 
749 #endif
750 NAMESPACE_END
751 
752 // ***************** C++ Static Initialization ********************
753 
754 ANONYMOUS_NAMESPACE_BEGIN
755 struct InitializeCpu
756 {
757  InitializeCpu()
758  {
759 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
760  CryptoPP::DetectX86Features();
761 #elif CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64
762  CryptoPP::DetectArmFeatures();
763 #endif
764  }
765 };
766 
767 #if HAVE_GCC_INIT_PRIORITY
768 const InitializeCpu s_init __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 20))) = InitializeCpu();
769 #elif HAVE_MSC_INIT_PRIORITY
770 #pragma warning(disable: 4075)
771 #pragma init_seg(".CRT$XCU-020")
772 const InitializeCpu s_init;
773 #pragma warning(default: 4075)
774 #else
775 const InitializeCpu& s_init = CryptoPP::Singleton<InitializeCpu>().Ref();
776 #endif
777 ANONYMOUS_NAMESPACE_END
778 
779 #endif // CRYPTOPP_IMPORTS
Utility functions for the Crypto++ library.
Library configuration file.
Functions for CPU features and intrinsics.
Crypto++ library namespace.