Crypto++  5.6.5
Free C++ class library of cryptographic schemes
cpu.cpp
1 // cpu.cpp - written and placed in the public domain by Wei Dai
2 
3 #include "pch.h"
4 #include "config.h"
5 
6 #ifndef EXCEPTION_EXECUTE_HANDLER
7 # define EXCEPTION_EXECUTE_HANDLER 1
8 #endif
9 
10 #ifndef CRYPTOPP_IMPORTS
11 
12 #include "cpu.h"
13 #include "misc.h"
14 #include <algorithm>
15 
16 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
17 #include <signal.h>
18 #include <setjmp.h>
19 #endif
20 
21 NAMESPACE_BEGIN(CryptoPP)
22 
23 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
24 extern "C" {
25  typedef void (*SigHandler)(int);
26 };
27 #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
28 
29 #ifdef CRYPTOPP_CPUID_AVAILABLE
30 
31 #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64
32 
33 bool CpuId(word32 input, word32 output[4])
34 {
35  __cpuid((int *)output, input);
36  return true;
37 }
38 
39 #else
40 
41 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
42 extern "C"
43 {
44  static jmp_buf s_jmpNoCPUID;
45  static void SigIllHandlerCPUID(int)
46  {
47  longjmp(s_jmpNoCPUID, 1);
48  }
49 
50  static jmp_buf s_jmpNoSSE2;
51  static void SigIllHandlerSSE2(int)
52  {
53  longjmp(s_jmpNoSSE2, 1);
54  }
55 }
56 #endif
57 
58 bool CpuId(word32 input, word32 output[4])
59 {
60 #if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
61  __try
62  {
63  __asm
64  {
65  mov eax, input
66  mov ecx, 0
67  cpuid
68  mov edi, output
69  mov [edi], eax
70  mov [edi+4], ebx
71  mov [edi+8], ecx
72  mov [edi+12], edx
73  }
74  }
75  // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
76  __except (EXCEPTION_EXECUTE_HANDLER)
77  {
78  return false;
79  }
80 
81  // function 0 returns the highest basic function understood in EAX
82  if(input == 0)
83  return !!output[0];
84 
85  return true;
86 #else
87  // longjmp and clobber warnings. Volatile is required.
88  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
89  volatile bool result = true;
90 
91  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
92  if (oldHandler == SIG_ERR)
93  return false;
94 
95 # ifndef __MINGW32__
96  volatile sigset_t oldMask;
97  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
98  return false;
99 # endif
100 
101  if (setjmp(s_jmpNoCPUID))
102  result = false;
103  else
104  {
105  asm volatile
106  (
107  // save ebx in case -fPIC is being used
108  // TODO: this might need an early clobber on EDI.
109 # if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
110  "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx"
111 # else
112  "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
113 # endif
114  : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
115  : "a" (input), "c" (0)
116  : "cc"
117  );
118  }
119 
120 # ifndef __MINGW32__
121  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
122 # endif
123 
124  signal(SIGILL, oldHandler);
125  return result;
126 #endif
127 }
128 
129 #endif
130 
131 static bool TrySSE2()
132 {
133 #if CRYPTOPP_BOOL_X64
134  return true;
135 #elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
136  __try
137  {
138 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
139  AS2(por xmm0, xmm0) // executing SSE2 instruction
140 #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
141  __m128i x = _mm_setzero_si128();
142  return _mm_cvtsi128_si32(x) == 0;
143 #endif
144  }
145  // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
146  __except (EXCEPTION_EXECUTE_HANDLER)
147  {
148  return false;
149  }
150  return true;
151 #else
152  // longjmp and clobber warnings. Volatile is required.
153  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
154  volatile bool result = true;
155 
156  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
157  if (oldHandler == SIG_ERR)
158  return false;
159 
160 # ifndef __MINGW32__
161  volatile sigset_t oldMask;
162  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
163  return false;
164 # endif
165 
166  if (setjmp(s_jmpNoSSE2))
167  result = false;
168  else
169  {
170 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
171  __asm __volatile ("por %xmm0, %xmm0");
172 #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
173  __m128i x = _mm_setzero_si128();
174  result = _mm_cvtsi128_si32(x) == 0;
175 #endif
176  }
177 
178 # ifndef __MINGW32__
179  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
180 # endif
181 
182  signal(SIGILL, oldHandler);
183  return result;
184 #endif
185 }
186 
187 bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false;
188 bool CRYPTOPP_SECTION_INIT g_hasMMX = false, CRYPTOPP_SECTION_INIT g_hasISSE = false, CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
189 bool CRYPTOPP_SECTION_INIT g_hasSSE4 = false, CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_hasSHA = false;
190 bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false, CRYPTOPP_SECTION_INIT g_isP4 = false;
191 bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false;
192 bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false;
193 word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
194 
195 static inline bool IsIntel(const word32 output[4])
196 {
197  // This is the "GenuineIntel" string
198  return (output[1] /*EBX*/ == 0x756e6547) &&
199  (output[2] /*ECX*/ == 0x6c65746e) &&
200  (output[3] /*EDX*/ == 0x49656e69);
201 }
202 
203 static inline bool IsAMD(const word32 output[4])
204 {
205  // This is the "AuthenticAMD" string. Some early K5's can return "AMDisbetter!"
206  return (output[1] /*EBX*/ == 0x68747541) &&
207  (output[2] /*ECX*/ == 0x444D4163) &&
208  (output[3] /*EDX*/ == 0x69746E65);
209 }
210 
211 static inline bool IsVIA(const word32 output[4])
212 {
213  // This is the "CentaurHauls" string. Some non-PadLock's can return "VIA VIA VIA "
214  return (output[1] /*EBX*/ == 0x746e6543) &&
215  (output[2] /*ECX*/ == 0x736c7561) &&
216  (output[3] /*EDX*/ == 0x48727561);
217 }
218 
219 #if HAVE_GCC_CONSTRUCTOR1
220 void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectX86Features()
221 #elif HAVE_GCC_CONSTRUCTOR0
222 void __attribute__ ((constructor)) DetectX86Features()
223 #else
224 void DetectX86Features()
225 #endif
226 {
227  word32 cpuid[4], cpuid1[4];
228  if (!CpuId(0, cpuid))
229  return;
230  if (!CpuId(1, cpuid1))
231  return;
232 
233  g_hasMMX = (cpuid1[3] & (1 << 23)) != 0;
234  if ((cpuid1[3] & (1 << 26)) != 0)
235  g_hasSSE2 = TrySSE2();
236  g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
237  g_hasSSE4 = g_hasSSE2 && ((cpuid1[2] & (1<<19)) && (cpuid1[2] & (1<<20)));
238  g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25));
239  g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1));
240 
241  if ((cpuid1[3] & (1 << 25)) != 0)
242  g_hasISSE = true;
243  else
244  {
245  word32 cpuid2[4];
246  CpuId(0x080000000, cpuid2);
247  if (cpuid2[0] >= 0x080000001)
248  {
249  CpuId(0x080000001, cpuid2);
250  g_hasISSE = (cpuid2[3] & (1 << 22)) != 0;
251  }
252  }
253 
254  if (IsIntel(cpuid))
255  {
256  static const unsigned int RDRAND_FLAG = (1 << 30);
257  static const unsigned int RDSEED_FLAG = (1 << 18);
258  static const unsigned int SHA_FLAG = (1 << 29);
259 
260  g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
261  g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1);
262  g_hasRDRAND = !!(cpuid1[2] /*ECX*/ & RDRAND_FLAG);
263 
264  if (cpuid[0] /*EAX*/ >= 7)
265  {
266  word32 cpuid3[4];
267  if (CpuId(7, cpuid3))
268  {
269  g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG);
270  g_hasSHA = !!(cpuid3[1] /*EBX*/ & SHA_FLAG);
271  }
272  }
273  }
274  else if (IsAMD(cpuid))
275  {
276  static const unsigned int RDRAND_FLAG = (1 << 30);
277 
278  CpuId(0x01, cpuid);
279  g_hasRDRAND = !!(cpuid[2] /*ECX*/ & RDRAND_FLAG);
280 
281  CpuId(0x80000005, cpuid);
282  g_cacheLineSize = GETBYTE(cpuid[2], 0);
283  }
284  else if (IsVIA(cpuid))
285  {
286  static const unsigned int RNG_FLAGS = (0x3 << 2);
287  static const unsigned int ACE_FLAGS = (0x3 << 6);
288  static const unsigned int ACE2_FLAGS = (0x3 << 8);
289  static const unsigned int PHE_FLAGS = (0x3 << 10);
290  static const unsigned int PMM_FLAGS = (0x3 << 12);
291 
292  CpuId(0xC0000000, cpuid);
293  if (cpuid[0] >= 0xC0000001)
294  {
295  // Extended features available
296  CpuId(0xC0000001, cpuid);
297  g_hasPadlockRNG = !!(cpuid[3] /*EDX*/ & RNG_FLAGS);
298  g_hasPadlockACE = !!(cpuid[3] /*EDX*/ & ACE_FLAGS);
299  g_hasPadlockACE2 = !!(cpuid[3] /*EDX*/ & ACE2_FLAGS);
300  g_hasPadlockPHE = !!(cpuid[3] /*EDX*/ & PHE_FLAGS);
301  g_hasPadlockPMM = !!(cpuid[3] /*EDX*/ & PMM_FLAGS);
302  }
303  }
304 
305  if (!g_cacheLineSize)
306  g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
307 
308  *((volatile bool*)&g_x86DetectionDone) = true;
309 }
310 
311 #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
312 
313 // The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
314 // Attempting to run the code results in a SIGILL and termination.
315 //
316 // #if defined(__arm64__) || defined(__aarch64__)
317 // word64 caps = 0; // Read ID_AA64ISAR0_EL1
318 // __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps));
319 // #elif defined(__arm__) || defined(__aarch32__)
320 // word32 caps = 0; // Read ID_ISAR5_EL1
321 // __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps));
322 // #endif
323 //
324 // The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64
325 // http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
326 //
327 bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false;
328 bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasPMULL = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false;
329 bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false;
330 word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
331 
332 #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
333 extern "C"
334 {
335  static jmp_buf s_jmpNoNEON;
336  static void SigIllHandlerNEON(int)
337  {
338  longjmp(s_jmpNoNEON, 1);
339  }
340 
341  static jmp_buf s_jmpNoPMULL;
342  static void SigIllHandlerPMULL(int)
343  {
344  longjmp(s_jmpNoPMULL, 1);
345  }
346 
347  static jmp_buf s_jmpNoCRC32;
348  static void SigIllHandlerCRC32(int)
349  {
350  longjmp(s_jmpNoCRC32, 1);
351  }
352 
353  static jmp_buf s_jmpNoAES;
354  static void SigIllHandlerAES(int)
355  {
356  longjmp(s_jmpNoAES, 1);
357  }
358 
359  static jmp_buf s_jmpNoSHA1;
360  static void SigIllHandlerSHA1(int)
361  {
362  longjmp(s_jmpNoSHA1, 1);
363  }
364 
365  static jmp_buf s_jmpNoSHA2;
366  static void SigIllHandlerSHA2(int)
367  {
368  longjmp(s_jmpNoSHA2, 1);
369  }
370 };
371 #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
372 
373 static bool TryNEON()
374 {
375 #if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
376 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
377  volatile bool result = true;
378  __try
379  {
380  uint32_t v1[4] = {1,1,1,1};
381  uint32x4_t x1 = vld1q_u32(v1);
382  uint64_t v2[2] = {1,1};
383  uint64x2_t x2 = vld1q_u64(v2);
384 
385  uint32x4_t x3 = vdupq_n_u32(2);
386  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
387  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
388  uint64x2_t x4 = vdupq_n_u64(2);
389  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
390  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
391 
392  result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
393  }
394  __except (EXCEPTION_EXECUTE_HANDLER)
395  {
396  return false;
397  }
398  return result;
399 # else
400  // longjmp and clobber warnings. Volatile is required.
401  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
402  volatile bool result = true;
403 
404  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
405  if (oldHandler == SIG_ERR)
406  return false;
407 
408  volatile sigset_t oldMask;
409  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
410  return false;
411 
412  if (setjmp(s_jmpNoNEON))
413  result = false;
414  else
415  {
416  uint32_t v1[4] = {1,1,1,1};
417  uint32x4_t x1 = vld1q_u32(v1);
418  uint64_t v2[2] = {1,1};
419  uint64x2_t x2 = vld1q_u64(v2);
420 
421  uint32x4_t x3 = {0,0,0,0};
422  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
423  x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
424  uint64x2_t x4 = {0,0};
425  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
426  x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
427 
428  // Hack... GCC optimizes away the code and returns true
429  result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
430  }
431 
432  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
433  signal(SIGILL, oldHandler);
434  return result;
435 # endif
436 #else
437  return false;
438 #endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
439 }
440 
441 static bool TryPMULL()
442 {
443 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
444 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
445  volatile bool result = true;
446  __try
447  {
448  const poly64_t a1={2}, b1={3};
449  const poly64x2_t a2={4,5}, b2={6,7};
450  const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
451 
452  const poly128_t r1 = vmull_p64(a1, b1);
453  const poly128_t r2 = vmull_high_p64(a2, b2);
454  const poly128_t r3 = vmull_high_p64(a3, b3);
455 
456  // Also see https://github.com/weidai11/cryptopp/issues/233.
457  const uint64x2_t& t1 = vreinterpretq_u64_p128(r1); // {6,0}
458  const uint64x2_t& t2 = vreinterpretq_u64_p128(r2); // {24,0}
459  const uint64x2_t& t3 = vreinterpretq_u64_p128(r3); // {bignum,bignum}
460 
461  result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
462  vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
463  }
464  __except (EXCEPTION_EXECUTE_HANDLER)
465  {
466  return false;
467  }
468  return result;
469 # else
470  // longjmp and clobber warnings. Volatile is required.
471  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
472  volatile bool result = true;
473 
474  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
475  if (oldHandler == SIG_ERR)
476  return false;
477 
478  volatile sigset_t oldMask;
479  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
480  return false;
481 
482  if (setjmp(s_jmpNoPMULL))
483  result = false;
484  else
485  {
486  const poly64_t a1={2}, b1={3};
487  const poly64x2_t a2={4,5}, b2={6,7};
488  const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
489 
490  const poly128_t r1 = vmull_p64(a1, b1);
491  const poly128_t r2 = vmull_high_p64(a2, b2);
492  const poly128_t r3 = vmull_high_p64(a3, b3);
493 
494  // Linaro is missing vreinterpretq_u64_p128. Also see https://github.com/weidai11/cryptopp/issues/233.
495  const uint64x2_t& t1 = (uint64x2_t)(r1); // {6,0}
496  const uint64x2_t& t2 = (uint64x2_t)(r2); // {24,0}
497  const uint64x2_t& t3 = (uint64x2_t)(r3); // {bignum,bignum}
498 
499  result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
500  vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
501  }
502 
503  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
504  signal(SIGILL, oldHandler);
505  return result;
506 # endif
507 #else
508  return false;
509 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
510 }
511 
512 static bool TryCRC32()
513 {
514 #if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
515 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
516  volatile bool result = true;
517  __try
518  {
519  word32 w=0, x=1; word16 y=2; byte z=3;
520  w = __crc32cw(w,x);
521  w = __crc32ch(w,y);
522  w = __crc32cb(w,z);
523 
524  result = !!w;
525  }
526  __except (EXCEPTION_EXECUTE_HANDLER)
527  {
528  return false;
529  }
530  return result;
531 # else
532  // longjmp and clobber warnings. Volatile is required.
533  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
534  volatile bool result = true;
535 
536  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
537  if (oldHandler == SIG_ERR)
538  return false;
539 
540  volatile sigset_t oldMask;
541  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
542  return false;
543 
544  if (setjmp(s_jmpNoCRC32))
545  result = false;
546  else
547  {
548  word32 w=0, x=1; word16 y=2; byte z=3;
549  w = __crc32cw(w,x);
550  w = __crc32ch(w,y);
551  w = __crc32cb(w,z);
552 
553  // Hack... GCC optimizes away the code and returns true
554  result = !!w;
555  }
556 
557  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
558  signal(SIGILL, oldHandler);
559  return result;
560 # endif
561 #else
562  return false;
563 #endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE
564 }
565 
566 static bool TryAES()
567 {
568 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
569 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
570  volatile bool result = true;
571  __try
572  {
573  // AES encrypt and decrypt
574  uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
575  uint8x16_t r1 = vaeseq_u8(data, key);
576  uint8x16_t r2 = vaesdq_u8(data, key);
577 
578  result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
579  }
580  __except (EXCEPTION_EXECUTE_HANDLER)
581  {
582  return false;
583  }
584  return result;
585 # else
586  // longjmp and clobber warnings. Volatile is required.
587  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
588  volatile bool result = true;
589 
590  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
591  if (oldHandler == SIG_ERR)
592  return false;
593 
594  volatile sigset_t oldMask;
595  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
596  return false;
597 
598  if (setjmp(s_jmpNoAES))
599  result = false;
600  else
601  {
602  uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
603  uint8x16_t r1 = vaeseq_u8(data, key);
604  uint8x16_t r2 = vaesdq_u8(data, key);
605 
606  // Hack... GCC optimizes away the code and returns true
607  result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
608  }
609 
610  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
611  signal(SIGILL, oldHandler);
612  return result;
613 # endif
614 #else
615  return false;
616 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
617 }
618 
619 static bool TrySHA1()
620 {
621 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
622 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
623  volatile bool result = true;
624  __try
625  {
626  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
627 
628  uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
629  uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
630  uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
631  uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
632  uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
633 
634  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
635  }
636  __except (EXCEPTION_EXECUTE_HANDLER)
637  {
638  return false;
639  }
640  return result;
641 # else
642  // longjmp and clobber warnings. Volatile is required.
643  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
644  volatile bool result = true;
645 
646  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
647  if (oldHandler == SIG_ERR)
648  return false;
649 
650  volatile sigset_t oldMask;
651  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
652  return false;
653 
654  if (setjmp(s_jmpNoSHA1))
655  result = false;
656  else
657  {
658  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
659 
660  uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
661  uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
662  uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
663  uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
664  uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
665 
666  // Hack... GCC optimizes away the code and returns true
667  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
668  }
669 
670  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
671  signal(SIGILL, oldHandler);
672  return result;
673 # endif
674 #else
675  return false;
676 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
677 }
678 
679 static bool TrySHA2()
680 {
681 #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
682 # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
683  volatile bool result = true;
684  __try
685  {
686  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
687 
688  uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
689  uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
690  uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
691  uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
692 
693  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
694  }
695  __except (EXCEPTION_EXECUTE_HANDLER)
696  {
697  return false;
698  }
699  return result;
700 # else
701  // longjmp and clobber warnings. Volatile is required.
702  // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
703  volatile bool result = true;
704 
705  volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
706  if (oldHandler == SIG_ERR)
707  return false;
708 
709  volatile sigset_t oldMask;
710  if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
711  return false;
712 
713  if (setjmp(s_jmpNoSHA2))
714  result = false;
715  else
716  {
717  uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
718 
719  uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
720  uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
721  uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
722  uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
723 
724  // Hack... GCC optimizes away the code and returns true
725  result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
726  }
727 
728  sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
729  signal(SIGILL, oldHandler);
730  return result;
731 # endif
732 #else
733  return false;
734 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
735 }
736 
737 #if HAVE_GCC_CONSTRUCTOR1
738 void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures()
739 #elif HAVE_GCC_CONSTRUCTOR0
740 void __attribute__ ((constructor)) DetectArmFeatures()
741 #else
742 void DetectArmFeatures()
743 #endif
744 {
745  g_hasNEON = TryNEON();
746  g_hasPMULL = TryPMULL();
747  g_hasCRC32 = TryCRC32();
748  g_hasAES = TryAES();
749  g_hasSHA1 = TrySHA1();
750  g_hasSHA2 = TrySHA2();
751 
752  *((volatile bool*)&g_ArmDetectionDone) = true;
753 }
754 
755 #endif
756 
757 NAMESPACE_END
758 
759 #endif
Utility functions for the Crypto++ library.
Library configuration file.
Functions for CPU features and intrinsics.
Crypto++ library namespace.