Crypto++  5.6.5
Free C++ class library of cryptographic schemes
cpu.h
Go to the documentation of this file.
1 // cpu.h - originally written and placed in the public domain by Wei Dai
2 // updated for ARM by Jeffrey Walton
3 
4 //! \file cpu.h
5 //! \brief Functions for CPU features and intrinsics
6 //! \details The functions are used in X86/X32/X64 and ARM code paths
7 
8 #ifndef CRYPTOPP_CPU_H
9 #define CRYPTOPP_CPU_H
10 
11 #include "config.h"
12 #include <stdio.h>
13 
14 // Issue 340
15 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
16 # pragma GCC diagnostic push
17 # pragma GCC diagnostic ignored "-Wconversion"
18 # pragma GCC diagnostic ignored "-Wsign-conversion"
19 #endif
20 
21 // ARM32 and ARM64 Headers
22 #if (CRYPTOPP_ARM32 || CRYPTOPP_ARM64)
23 //# if defined(__GNUC__)
24 //# include <stdint.h>
25 //# endif
26 //# if defined(__ARM_NEON) || defined(_MSC_VER)
27 //# include <arm_neon.h>
28 //# endif
29 //# if defined(__GNUC__) && !defined(__apple_build_version__)
30 //# if defined(__ARM_ACLE) || defined(__ARM_FEATURE_CRC32) || defined(__ARM_FEATURE_CRYPTO)
31 //# include <arm_acle.h>
32 //# endif
33 //# endif
34 
35 #if defined(CRYPTOPP_NEON_AVAILABLE)
36 # include <stdint.h>
37 # include <arm_neon.h>
38 # if defined(__GNUC__) && !defined(__apple_build_version__)
39 # if defined(__ARM_ACLE) || defined(__ARM_FEATURE_CRC32) || defined(__ARM_FEATURE_CRYPTO)
40 # include <arm_acle.h>
41 # endif
42 # endif
43 #endif
44 
45 #endif // ARM32 and ARM64 Headers
46 
47 // X86/X64/X32 Headers
48 #if CRYPTOPP_X86 || CRYPTOPP_X32 || CRYPTOPP_X64
49 
50 // GCC X86 super-include
51 #if (CRYPTOPP_GCC_VERSION >= 40800)
52 # include <x86intrin.h>
53 #endif
54 #if (CRYPTOPP_MSC_VERSION >= 1400)
55 # include <intrin.h>
56 #endif
57 
58 // Baseline include
59 #if CRYPTOPP_SSE2_INTRINSICS_AVAILABLE
60 # include <emmintrin.h> // __m64, const __m128i, _mm_set_epi64x
61 #endif
62 
63 #if !defined(__clang__)
64 #if CRYPTOPP_SSSE3_INTRINSICS_AVAILABLE
65 # include <tmmintrin.h> // _mm_shuffle_pi8, _mm_shuffle_epi8
66 #endif // tmmintrin.h
67 #if CRYPTOPP_SSE4_AVAILABLE
68 # include <smmintrin.h> // _mm_blend_epi16
69 # include <nmmintrin.h> // _mm_crc32_u{8|16|32}
70 #endif // smmintrin.h
71 #if CRYPTOPP_AESNI_AVAILABLE
72 # include <wmmintrin.h> // aesenc, aesdec, etc
73 #endif // wmmintrin.h
74 #if CRYPTOPP_SSE_SHA_AVAILABLE
75 # include <immintrin.h> // rdrand, rdseed, avx, sha
76 #endif // immintrin.h
77 #endif // X86/X64/X32 Headers
78 #endif
79 
80 // Applies to both X86/X32/X64 and ARM32/ARM64. And we've got MIPS devices on the way.
81 #if defined(_MSC_VER) || defined(__BORLANDC__)
82 # define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
83 #else
84 # define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
85 #endif
86 
87 // Applies to both X86/X32/X64 and ARM32/ARM64
88 #if defined(CRYPTOPP_LLVM_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION) || defined(CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER)
89  #define NEW_LINE "\n"
90  #define INTEL_PREFIX ".intel_syntax;"
91  #define INTEL_NOPREFIX ".intel_syntax;"
92  #define ATT_PREFIX ".att_syntax;"
93  #define ATT_NOPREFIX ".att_syntax;"
94 #elif defined(__GNUC__)
95  #define NEW_LINE
96  #define INTEL_PREFIX ".intel_syntax prefix;"
97  #define INTEL_NOPREFIX ".intel_syntax noprefix;"
98  #define ATT_PREFIX ".att_syntax prefix;"
99  #define ATT_NOPREFIX ".att_syntax noprefix;"
100 #else
101  #define NEW_LINE
102  #define INTEL_PREFIX
103  #define INTEL_NOPREFIX
104  #define ATT_PREFIX
105  #define ATT_NOPREFIX
106 #endif
107 
108 #ifdef CRYPTOPP_GENERATE_X64_MASM
109 
110 #define CRYPTOPP_X86_ASM_AVAILABLE
111 #define CRYPTOPP_X64 1
112 #define CRYPTOPP_SSE2_ASM_AVAILABLE 1
113 #define NAMESPACE_END
114 
115 #else
116 
117 NAMESPACE_BEGIN(CryptoPP)
118 
119 #if CRYPTOPP_X86 || CRYPTOPP_X32 || CRYPTOPP_X64 || CRYPTOPP_DOXYGEN_PROCESSING
120 
121 #define CRYPTOPP_CPUID_AVAILABLE
122 
123 // Hide from Doxygen
124 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
125 // These should not be used directly
126 extern CRYPTOPP_DLL bool g_x86DetectionDone;
127 extern CRYPTOPP_DLL bool g_hasMMX;
128 extern CRYPTOPP_DLL bool g_hasISSE;
129 extern CRYPTOPP_DLL bool g_hasSSE2;
130 extern CRYPTOPP_DLL bool g_hasSSSE3;
131 extern CRYPTOPP_DLL bool g_hasSSE4;
132 extern CRYPTOPP_DLL bool g_hasAESNI;
133 extern CRYPTOPP_DLL bool g_hasCLMUL;
134 extern CRYPTOPP_DLL bool g_hasSHA;
135 extern CRYPTOPP_DLL bool g_isP4;
136 extern CRYPTOPP_DLL bool g_hasRDRAND;
137 extern CRYPTOPP_DLL bool g_hasRDSEED;
138 extern CRYPTOPP_DLL bool g_hasPadlockRNG;
139 extern CRYPTOPP_DLL bool g_hasPadlockACE;
140 extern CRYPTOPP_DLL bool g_hasPadlockACE2;
141 extern CRYPTOPP_DLL bool g_hasPadlockPHE;
142 extern CRYPTOPP_DLL bool g_hasPadlockPMM;
143 extern CRYPTOPP_DLL word32 g_cacheLineSize;
144 
145 CRYPTOPP_DLL void CRYPTOPP_API DetectX86Features();
146 CRYPTOPP_DLL bool CRYPTOPP_API CpuId(word32 input, word32 output[4]);
147 #endif // CRYPTOPP_DOXYGEN_PROCESSING
148 
149 //! \brief Determines MMX availability
150 //! \returns true if MMX is determined to be available, false otherwise
151 //! \details MMX, SSE and SSE2 are core processor features for x86_64, and
152 //! the function always returns true for the platform.
153 inline bool HasMMX()
154 {
155 #if CRYPTOPP_X32 || CRYPTOPP_X64
156  return true;
157 #else
158  if (!g_x86DetectionDone)
159  DetectX86Features();
160  return g_hasMMX;
161 #endif
162 }
163 
164 //! \brief Determines SSE availability
165 //! \returns true if SSE is determined to be available, false otherwise
166 //! \details MMX, SSE and SSE2 are core processor features for x86_64, and
167 //! the function always returns true for the platform.
168 inline bool HasISSE()
169 {
170 #if CRYPTOPP_X32 || CRYPTOPP_X64
171  return true;
172 #else
173  if (!g_x86DetectionDone)
174  DetectX86Features();
175  return g_hasISSE;
176 #endif
177 }
178 
179 //! \brief Determines SSE2 availability
180 //! \returns true if SSE2 is determined to be available, false otherwise
181 //! \details MMX, SSE and SSE2 are core processor features for x86_64, and
182 //! the function always returns true for the platform.
183 inline bool HasSSE2()
184 {
185 #if CRYPTOPP_X32 || CRYPTOPP_X64
186  return true;
187 #else
188  if (!g_x86DetectionDone)
189  DetectX86Features();
190  return g_hasSSE2;
191 #endif
192 }
193 
194 //! \brief Determines SSSE3 availability
195 //! \returns true if SSSE3 is determined to be available, false otherwise
196 //! \details HasSSSE3() is a runtime check performed using CPUID
197 //! \note Some Clang compilers incorrectly omit SSSE3 even though its native to the processor.
198 inline bool HasSSSE3()
199 {
200  if (!g_x86DetectionDone)
201  DetectX86Features();
202  return g_hasSSSE3;
203 }
204 
205 //! \brief Determines SSE4 availability
206 //! \returns true if SSE4.1 and SSE4.2 are determined to be available, false otherwise
207 //! \details HasSSE4() is a runtime check performed using CPUID which requires both SSE4.1 and SSE4.2
208 inline bool HasSSE4()
209 {
210  if (!g_x86DetectionDone)
211  DetectX86Features();
212  return g_hasSSE4;
213 }
214 
215 //! \brief Determines AES-NI availability
216 //! \returns true if AES-NI is determined to be available, false otherwise
217 //! \details HasAESNI() is a runtime check performed using CPUID
218 inline bool HasAESNI()
219 {
220  if (!g_x86DetectionDone)
221  DetectX86Features();
222  return g_hasAESNI;
223 }
224 
225 //! \brief Determines Carryless Multiply availability
226 //! \returns true if pclmulqdq is determined to be available, false otherwise
227 //! \details HasCLMUL() is a runtime check performed using CPUID
228 inline bool HasCLMUL()
229 {
230  if (!g_x86DetectionDone)
231  DetectX86Features();
232  return g_hasCLMUL;
233 }
234 
235 //! \brief Determines SHA availability
236 //! \returns true if SHA is determined to be available, false otherwise
237 //! \details HasSHA() is a runtime check performed using CPUID
238 inline bool HasSHA()
239 {
240  if (!g_x86DetectionDone)
241  DetectX86Features();
242  return g_hasSHA;
243 }
244 
245 //! \brief Determines if the CPU is an Intel P4
246 //! \returns true if the CPU is a P4, false otherwise
247 //! \details IsP4() is a runtime check performed using CPUID
248 inline bool IsP4()
249 {
250  if (!g_x86DetectionDone)
251  DetectX86Features();
252  return g_isP4;
253 }
254 
255 //! \brief Determines RDRAND availability
256 //! \returns true if RDRAND is determined to be available, false otherwise
257 //! \details HasRDRAND() is a runtime check performed using CPUID
258 inline bool HasRDRAND()
259 {
260  if (!g_x86DetectionDone)
261  DetectX86Features();
262  return g_hasRDRAND;
263 }
264 
265 //! \brief Determines RDSEED availability
266 //! \returns true if RDSEED is determined to be available, false otherwise
267 //! \details HasRDSEED() is a runtime check performed using CPUID
268 inline bool HasRDSEED()
269 {
270  if (!g_x86DetectionDone)
271  DetectX86Features();
272  return g_hasRDSEED;
273 }
274 
275 //! \brief Determines Padlock RNG availability
276 //! \returns true if VIA Padlock RNG is determined to be available, false otherwise
277 //! \details HasPadlockRNG() is a runtime check performed using CPUID
278 inline bool HasPadlockRNG()
279 {
280  if (!g_x86DetectionDone)
281  DetectX86Features();
282  return g_hasPadlockRNG;
283 }
284 
285 //! \brief Determines Padlock ACE availability
286 //! \returns true if VIA Padlock ACE is determined to be available, false otherwise
287 //! \details HasPadlockACE() is a runtime check performed using CPUID
288 inline bool HasPadlockACE()
289 {
290  if (!g_x86DetectionDone)
291  DetectX86Features();
292  return g_hasPadlockACE;
293 }
294 
295 //! \brief Determines Padlock ACE2 availability
296 //! \returns true if VIA Padlock ACE2 is determined to be available, false otherwise
297 //! \details HasPadlockACE2() is a runtime check performed using CPUID
298 inline bool HasPadlockACE2()
299 {
300  if (!g_x86DetectionDone)
301  DetectX86Features();
302  return g_hasPadlockACE2;
303 }
304 
305 //! \brief Determines Padlock PHE availability
306 //! \returns true if VIA Padlock PHE is determined to be available, false otherwise
307 //! \details HasPadlockPHE() is a runtime check performed using CPUID
308 inline bool HasPadlockPHE()
309 {
310  if (!g_x86DetectionDone)
311  DetectX86Features();
312  return g_hasPadlockPHE;
313 }
314 
315 //! \brief Determines Padlock PMM availability
316 //! \returns true if VIA Padlock PMM is determined to be available, false otherwise
317 //! \details HasPadlockPMM() is a runtime check performed using CPUID
318 inline bool HasPadlockPMM()
319 {
320  if (!g_x86DetectionDone)
321  DetectX86Features();
322  return g_hasPadlockPMM;
323 }
324 
325 #endif // X86 X32 and X64
326 
327 #if CRYPTOPP_ARM32 || CRYPTOPP_ARM64 || CRYPTOPP_DOXYGEN_PROCESSING
328 
329 extern bool g_ArmDetectionDone;
330 extern bool g_hasNEON, g_hasPMULL, g_hasCRC32, g_hasAES, g_hasSHA1, g_hasSHA2;
331 void CRYPTOPP_API DetectArmFeatures();
332 
333 //! \brief Determine if an ARM processor has Advanced SIMD available
334 //! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise.
335 //! \details Advanced SIMD instructions are available under Aarch64 (ARM-64) and Aarch32 (ARM-32).
336 //! \details Runtime support requires compile time support. When compiling with GCC, you may
337 //! need to compile with <tt>-mfpu=neon</tt> (32-bit) or <tt>-march=armv8-a</tt>
338 //! (64-bit). Also see ARM's <tt>__ARM_NEON</tt> preprocessor macro.
339 inline bool HasNEON()
340 {
341  if (!g_ArmDetectionDone)
342  DetectArmFeatures();
343  return g_hasNEON;
344 }
345 
346 //! \brief Determine if an ARM processor provides Polynomial Multiplication (long)
347 //! \returns true if the hardware is capable of polynomial multiplications at runtime, false otherwise.
348 //! \details The multiplication instructions are available under Aarch64 (ARM-64) and Aarch32 (ARM-32).
349 //! \details Runtime support requires compile time support. When compiling with GCC, you may
350 //! need to compile with <tt>-march=armv8-a+crypto</tt>; while Apple requires
351 //! <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt> preprocessor macro.
352 inline bool HasPMULL()
353 {
354  if (!g_ArmDetectionDone)
355  DetectArmFeatures();
356  return g_hasPMULL;
357 }
358 
359 //! \brief Determine if an ARM processor has CRC32 available
360 //! \returns true if the hardware is capable of CRC32 at runtime, false otherwise.
361 //! \details CRC32 instructions provide access to the processor's CRC32 and CRC32-C instructions.
362 //! They are provided by ARM C Language Extensions 2.0 (ACLE 2.0) and available under Aarch64
363 //! (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an AArch32 execution environment).
364 //! \details Runtime support requires compile time support. When compiling with GCC, you may
365 //! need to compile with <tt>-march=armv8-a+crc</tt>; while Apple requires
366 //! <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRC32</tt> preprocessor macro.
367 inline bool HasCRC32()
368 {
369  if (!g_ArmDetectionDone)
370  DetectArmFeatures();
371  return g_hasCRC32;
372 }
373 
374 //! \brief Determine if an ARM processor has AES available
375 //! \returns true if the hardware is capable of AES at runtime, false otherwise.
376 //! \details AES is part of the Crypto extensions from ARM C Language Extensions 2.0 (ACLE 2.0)
377 //! and available under Aarch64 (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an
378 //! AArch32 execution environment).
379 //! \details Runtime support requires compile time support. When compiling with GCC, you may
380 //! need to compile with <tt>-march=armv8-a+crypto</tt>; while Apple requires
381 //! <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt> preprocessor macro.
382 inline bool HasAES()
383 {
384  if (!g_ArmDetectionDone)
385  DetectArmFeatures();
386  return g_hasAES;
387 }
388 
389 //! \brief Determine if an ARM processor has SHA1 available
390 //! \returns true if the hardware is capable of SHA1 at runtime, false otherwise.
391 //! \details SHA1 is part of the Crypto extensions from ARM C Language Extensions 2.0 (ACLE 2.0)
392 //! and available under Aarch64 (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an
393 //! AArch32 execution environment).
394 //! \details Runtime support requires compile time support. When compiling with GCC, you may
395 //! need to compile with <tt>-march=armv8-a+crypto</tt>; while Apple requires
396 //! <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt> preprocessor macro.
397 inline bool HasSHA1()
398 {
399  if (!g_ArmDetectionDone)
400  DetectArmFeatures();
401  return g_hasSHA1;
402 }
403 
404 //! \brief Determine if an ARM processor has SHA2 available
405 //! \returns true if the hardware is capable of SHA2 at runtime, false otherwise.
406 //! \details SHA2 is part of the Crypto extensions from ARM C Language Extensions 2.0 (ACLE 2.0)
407 //! and available under Aarch64 (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an
408 //! AArch32 execution environment).
409 //! \details Runtime support requires compile time support. When compiling with GCC, you may
410 //! need to compile with <tt>-march=armv8-a+crypto</tt>; while Apple requires
411 //! <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt> preprocessor macro.
412 inline bool HasSHA2()
413 {
414  if (!g_ArmDetectionDone)
415  DetectArmFeatures();
416  return g_hasSHA2;
417 }
418 #endif // ARM32 and ARM64
419 
420 //! \brief Provides the L1 cache line size
421 //! \returns the size of the L1 cache.
422 //! \details GetCacheLineSize() provides the size of the L1 cache on X86, X32 and X64 machines
423 //! by calling CPUID. On non-Intel based hardware, the compile time constant
424 //! <tt>CRYPTOPP_L1_CACHE_LINE_SIZE</tt> is returned. <tt>CRYPTOPP_L1_CACHE_LINE_SIZE</tt>
425 //! should be a lower bound on the L1 cache line size. 32-bit machines usually return 32 bytes,
426 //! while 64-bit machines usually return 64 bytes.
427 inline int GetCacheLineSize()
428 {
429 #if CRYPTOPP_X86 || CRYPTOPP_X32 || CRYPTOPP_X64
430  if (!g_x86DetectionDone)
431  DetectX86Features();
432  return g_cacheLineSize;
433 #else
434  return CRYPTOPP_L1_CACHE_LINE_SIZE;
435 #endif
436 }
437 
438 #if CRYPTOPP_X86 || CRYPTOPP_X32 || CRYPTOPP_X64
439 
440 #ifdef CRYPTOPP_GENERATE_X64_MASM
441  #define AS1(x) x*newline*
442  #define AS2(x, y) x, y*newline*
443  #define AS3(x, y, z) x, y, z*newline*
444  #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline*
445  #define ASL(x) label##x:*newline*
446  #define ASJ(x, y, z) x label##y*newline*
447  #define ASC(x, y) x label##y*newline*
448  #define AS_HEX(y) 0##y##h
449 #elif defined(_MSC_VER) || defined(__BORLANDC__)
450  #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
451  #define AS1(x) __asm {x}
452  #define AS2(x, y) __asm {x, y}
453  #define AS3(x, y, z) __asm {x, y, z}
454  #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)}
455  #define ASL(x) __asm {label##x:}
456  #define ASJ(x, y, z) __asm {x label##y}
457  #define ASC(x, y) __asm {x label##y}
458  #define CRYPTOPP_NAKED __declspec(naked)
459  #define AS_HEX(y) 0x##y
460 #else
461  #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
462 
463  // define these in two steps to allow arguments to be expanded
464  #define GNU_AS1(x) #x ";" NEW_LINE
465  #define GNU_AS2(x, y) #x ", " #y ";" NEW_LINE
466  #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";" NEW_LINE
467  #define GNU_ASL(x) "\n" #x ":" NEW_LINE
468  #define GNU_ASJ(x, y, z) #x " " #y #z ";" NEW_LINE
469  #define AS1(x) GNU_AS1(x)
470  #define AS2(x, y) GNU_AS2(x, y)
471  #define AS3(x, y, z) GNU_AS3(x, y, z)
472  #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
473  #define ASL(x) GNU_ASL(x)
474  #define ASJ(x, y, z) GNU_ASJ(x, y, z)
475  #define ASC(x, y) #x " " #y ";"
476  #define CRYPTOPP_NAKED
477  #define AS_HEX(y) 0x##y
478 #endif
479 
480 #define IF0(y)
481 #define IF1(y) y
482 
483 #ifdef CRYPTOPP_GENERATE_X64_MASM
484 #define ASM_MOD(x, y) ((x) MOD (y))
485 #define XMMWORD_PTR XMMWORD PTR
486 #else
487 // GNU assembler doesn't seem to have mod operator
488 #define ASM_MOD(x, y) ((x)-((x)/(y))*(y))
489 // GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM
490 #define XMMWORD_PTR
491 #endif
492 
493 #if CRYPTOPP_X86
494  #define AS_REG_1 ecx
495  #define AS_REG_2 edx
496  #define AS_REG_3 esi
497  #define AS_REG_4 edi
498  #define AS_REG_5 eax
499  #define AS_REG_6 ebx
500  #define AS_REG_7 ebp
501  #define AS_REG_1d ecx
502  #define AS_REG_2d edx
503  #define AS_REG_3d esi
504  #define AS_REG_4d edi
505  #define AS_REG_5d eax
506  #define AS_REG_6d ebx
507  #define AS_REG_7d ebp
508  #define WORD_SZ 4
509  #define WORD_REG(x) e##x
510  #define WORD_PTR DWORD PTR
511  #define AS_PUSH_IF86(x) AS1(push e##x)
512  #define AS_POP_IF86(x) AS1(pop e##x)
513  #define AS_JCXZ jecxz
514 #elif CRYPTOPP_X32
515  #define AS_REG_1 ecx
516  #define AS_REG_2 edx
517  #define AS_REG_3 r8d
518  #define AS_REG_4 r9d
519  #define AS_REG_5 eax
520  #define AS_REG_6 r10d
521  #define AS_REG_7 r11d
522  #define AS_REG_1d ecx
523  #define AS_REG_2d edx
524  #define AS_REG_3d r8d
525  #define AS_REG_4d r9d
526  #define AS_REG_5d eax
527  #define AS_REG_6d r10d
528  #define AS_REG_7d r11d
529  #define WORD_SZ 4
530  #define WORD_REG(x) e##x
531  #define WORD_PTR DWORD PTR
532  #define AS_PUSH_IF86(x) AS1(push r##x)
533  #define AS_POP_IF86(x) AS1(pop r##x)
534  #define AS_JCXZ jecxz
535 #elif CRYPTOPP_X64
536  #ifdef CRYPTOPP_GENERATE_X64_MASM
537  #define AS_REG_1 rcx
538  #define AS_REG_2 rdx
539  #define AS_REG_3 r8
540  #define AS_REG_4 r9
541  #define AS_REG_5 rax
542  #define AS_REG_6 r10
543  #define AS_REG_7 r11
544  #define AS_REG_1d ecx
545  #define AS_REG_2d edx
546  #define AS_REG_3d r8d
547  #define AS_REG_4d r9d
548  #define AS_REG_5d eax
549  #define AS_REG_6d r10d
550  #define AS_REG_7d r11d
551  #else
552  #define AS_REG_1 rdi
553  #define AS_REG_2 rsi
554  #define AS_REG_3 rdx
555  #define AS_REG_4 rcx
556  #define AS_REG_5 r8
557  #define AS_REG_6 r9
558  #define AS_REG_7 r10
559  #define AS_REG_1d edi
560  #define AS_REG_2d esi
561  #define AS_REG_3d edx
562  #define AS_REG_4d ecx
563  #define AS_REG_5d r8d
564  #define AS_REG_6d r9d
565  #define AS_REG_7d r10d
566  #endif
567  #define WORD_SZ 8
568  #define WORD_REG(x) r##x
569  #define WORD_PTR QWORD PTR
570  #define AS_PUSH_IF86(x)
571  #define AS_POP_IF86(x)
572  #define AS_JCXZ jrcxz
573 #endif
574 
575 // helper macro for stream cipher output
576 #define AS_XMM_OUTPUT4(labelPrefix, inputPtr, outputPtr, x0, x1, x2, x3, t, p0, p1, p2, p3, increment)\
577  AS2( test inputPtr, inputPtr)\
578  ASC( jz, labelPrefix##3)\
579  AS2( test inputPtr, 15)\
580  ASC( jnz, labelPrefix##7)\
581  AS2( pxor xmm##x0, [inputPtr+p0*16])\
582  AS2( pxor xmm##x1, [inputPtr+p1*16])\
583  AS2( pxor xmm##x2, [inputPtr+p2*16])\
584  AS2( pxor xmm##x3, [inputPtr+p3*16])\
585  AS2( add inputPtr, increment*16)\
586  ASC( jmp, labelPrefix##3)\
587  ASL(labelPrefix##7)\
588  AS2( movdqu xmm##t, [inputPtr+p0*16])\
589  AS2( pxor xmm##x0, xmm##t)\
590  AS2( movdqu xmm##t, [inputPtr+p1*16])\
591  AS2( pxor xmm##x1, xmm##t)\
592  AS2( movdqu xmm##t, [inputPtr+p2*16])\
593  AS2( pxor xmm##x2, xmm##t)\
594  AS2( movdqu xmm##t, [inputPtr+p3*16])\
595  AS2( pxor xmm##x3, xmm##t)\
596  AS2( add inputPtr, increment*16)\
597  ASL(labelPrefix##3)\
598  AS2( test outputPtr, 15)\
599  ASC( jnz, labelPrefix##8)\
600  AS2( movdqa [outputPtr+p0*16], xmm##x0)\
601  AS2( movdqa [outputPtr+p1*16], xmm##x1)\
602  AS2( movdqa [outputPtr+p2*16], xmm##x2)\
603  AS2( movdqa [outputPtr+p3*16], xmm##x3)\
604  ASC( jmp, labelPrefix##9)\
605  ASL(labelPrefix##8)\
606  AS2( movdqu [outputPtr+p0*16], xmm##x0)\
607  AS2( movdqu [outputPtr+p1*16], xmm##x1)\
608  AS2( movdqu [outputPtr+p2*16], xmm##x2)\
609  AS2( movdqu [outputPtr+p3*16], xmm##x3)\
610  ASL(labelPrefix##9)\
611  AS2( add outputPtr, increment*16)
612 
613 #endif
614 #endif // X86/X32/X64
615 
616 NAMESPACE_END
617 
618 NAMESPACE_BEGIN(CryptoPP)
619 
620 // Below are inline assembly functions to make intrinsics appear to be available
621 // when a feature is not available to the compiler. We cannot use intrinsics
622 // because they require the architecture option, like -march=native or -msha.
623 // Also see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57202.
624 // Crypto++ macros, like CRYPTOPP_SSE4_AVAILABLE, are enabled
625 // or disabled depending on compiler capablity. However, compiling with
626 // -march=i686 or -march=x86_64 effectively disables the cpu feature and the
627 // intrinsics. Hence we fallback to the inline assembler, which is almost
628 // always available.
629 
630 // Wei's original code was very similar to the code below. WD used inline
631 // functions that mirrored Intel and AMD functions. However, under a C++
632 // compiler, some of the constants needed to be constexpr. The code failed
633 // to compile with -O0 and versions of Clang and GCC 4.x. The inline function
634 // names also caused problems under Clang because the identifiers were
635 // reserved (the leading underscore). The code now uses macros to avoid
636 // identifier violations and provide contexpr-ness. The features have also
637 // been extended to ARM 32-bit and 64-bit platforms,
638 
639 #if (CRYPTOPP_X86 || CRYPTOPP_X32 || CRYPTOPP_X64 || CRYPTOPP_ARM32 || CRYPTOPP_ARM64)
640 # if defined(__clang__)
641 # define GCC_INLINE static inline
642 # define GCC_INLINE_ATTRIB __attribute__((__gnu_inline__, __always_inline__))
643 # elif defined(__GNUC__)
644 # define GCC_INLINE __inline
645 # define GCC_INLINE_ATTRIB __attribute__((__gnu_inline__, __always_inline__, __artificial__))
646 # else
647 # define GCC_INLINE static inline
648 # define GCC_INLINE_ATTRIB
649 # endif
650 #endif
651 
652 #if CRYPTOPP_X86 || CRYPTOPP_X32 || CRYPTOPP_X64
653 
654 #if CRYPTOPP_SSSE3_INTRINSICS_AVAILABLE && (defined(__SSSE3__) || defined(_MSC_VER))
655 # define MM_SHUFFLE_EPI8(a,b) _mm_shuffle_epi8(a,b)
656 # define MM_ALIGNR_EPI8(a,b,c) _mm_alignr_epi8(a,b,c)
657 #else
658 GCC_INLINE __m128i GCC_INLINE_ATTRIB
659 MM_SHUFFLE_EPI8(__m128i a, const __m128i b)
660 {
661  asm ("pshufb %1, %0" : "+x"(a) : "xm"(b));
662  return a;
663 }
664 # define MM_SHUFFLE_EPI32(a,b) MM_SHUFFLE_EPI32_TEMPLATE<b>(a)
665 template <unsigned int b>
666 GCC_INLINE __m128i GCC_INLINE_ATTRIB
667 MM_SHUFFLE_EPI32_TEMPLATE(__m128i a)
668 {
669  // pshufd uses imm8
670  asm ("pshufd %2, %1, %0" : "+x"(a) : "x"(a), "N"(b));
671  return a;
672 }
673 # define MM_ALIGNR_EPI8(a,b,c) MM_ALIGNR_EPI8_TEMPLATE<c>(a,b)
674 template <int c>
675 GCC_INLINE __m128i GCC_INLINE_ATTRIB
676 MM_ALIGNR_EPI8_TEMPLATE(__m128i a, const __m128i b)
677 {
678  // palignr uses imm
679  asm ("palignr %2, %1, %0" : "+x"(a) : "xm"(b), "i"(c));
680  return a;
681 }
682 #endif // __SSSE3__
683 
684 #if CRYPTOPP_SSE4_AVAILABLE && (defined(__SSE4_1__) || defined(_MSC_VER))
685 # define MM_BLEND_EPI16(a,b,c) _mm_blend_epi16(a,b,c)
686 # define MM_EXTRACT_EPI32(a,b) _mm_extract_epi32(a,b)
687 # define MM_INSERT_EPI32(a,b,c) _mm_insert_epi32(a,b,c)
688 # define MM_SHUFFLE_EPI32(a,b) _mm_shuffle_epi32(a,b)
689 #else
690 # define MM_BLEND_EPI16(a,b,c) MM_BLEND_EPI16_TEMPLATE<c>(a,b)
691 template <unsigned int c>
692 GCC_INLINE __m128i GCC_INLINE_ATTRIB
693 MM_BLEND_EPI16_TEMPLATE(__m128i a, const __m128i b)
694 {
695  // pblendw uses imm8
696  asm ("pblendw %2, %1, %0" : "+x"(a) : "xm"(b), "N"(c));
697  return a;
698 }
699 # define MM_EXTRACT_EPI32(a,b) MM_EXTRACT_EPI32_TEMPLATE<b>(a)
700 template <unsigned int b>
701 GCC_INLINE int GCC_INLINE_ATTRIB
702 MM_EXTRACT_EPI32_TEMPLATE(__m128i a)
703 {
704  int r; // pextrd uses imm8
705  asm ("pextrd %2, %1, %0" : "=rm"(r) : "x"(a), "N"(b));
706  return r;
707 }
708 # define MM_INSERT_EPI32(a,b,c) MM_INSERT_EPI32_TEMPLATE<c>(a,b)
709 template <unsigned int c>
710 GCC_INLINE __m128i GCC_INLINE_ATTRIB
711 MM_INSERT_EPI32_TEMPLATE(__m128i a, int b)
712 {
713  // pinsrd uses imm8
714  asm ("pinsrd %2, %1, %0" : "+x"(a) : "rm"(b), "N"(c));
715  return a;
716 }
717 #endif // __SSE4_1__
718 
719 #if CRYPTOPP_SSE4_AVAILABLE && (defined(__SSE4_2__) || defined(_MSC_VER))
720 # define MM_CRC32_U8(a,b) _mm_crc32_u8(a,b)
721 # define MM_CRC32_U16(a,b) _mm_crc32_u16(a,b)
722 # define MM_CRC32_U32(a,b) _mm_crc32_u32(a,b)
723 #else
724 GCC_INLINE unsigned int GCC_INLINE_ATTRIB
725 MM_CRC32_U8(unsigned int crc, unsigned char val)
726 {
727  asm ("crc32 %1, %0" : "+r"(crc) : "r"(val));
728  return crc;
729 }
730 GCC_INLINE unsigned int GCC_INLINE_ATTRIB
731 MM_CRC32_U16(unsigned int crc, unsigned short val)
732 {
733  asm ("crc32 %1, %0" : "+r"(crc) : "r"(val));
734  return crc;
735 }
736 GCC_INLINE unsigned int GCC_INLINE_ATTRIB
737 MM_CRC32_U32(unsigned int crc, unsigned int val)
738 {
739  asm ("crc32 %1, %0" : "+r"(crc) : "r"(val));
740  return crc;
741 }
742 #endif // __SSE4_2__
743 
744 #if CRYPTOPP_AESNI_AVAILABLE && (defined(__PCLMUL__) || defined(_MSC_VER))
745 # define MM_CLMULEPI64_SI128(a,b,c) _mm_clmulepi64_si128(a,b,c)
746 #else
747 # define MM_CLMULEPI64_SI128(a,b,c) MM_CLMULEPI64_SI128_TEMPLATE<c>(a,b)
748 template <unsigned int c>
749 GCC_INLINE __m128i GCC_INLINE_ATTRIB
750 MM_CLMULEPI64_SI128_TEMPLATE(__m128i a, const __m128i b)
751 {
752  // pclmulqdq uses imm8
753  asm ("pclmulqdq %2, %1, %0" : "+x"(a) : "xm"(b), "N"(c));
754  return a;
755 }
756 #endif // __PCLMUL__
757 
758 #if CRYPTOPP_AESNI_AVAILABLE && (defined(__AES__) || defined(_MSC_VER))
759 # define MM_AESIMC_SI128(a) _mm_aesimc_si128(a)
760 # define MM_AESKEYGENASSIST_SI128(a,b) _mm_aeskeygenassist_si128(a,b)
761 # define MM_AESENC_SI128(a,b) _mm_aesenc_si128(a,b)
762 # define MM_AESENCLAST_SI128(a,b) _mm_aesenclast_si128(a,b)
763 # define MM_AESDEC_SI128(a,b) _mm_aesdec_si128(a,b)
764 # define MM_AESDECLAST_SI128(a,b) _mm_aesdeclast_si128(a,b)
765 #else
766 # define MM_AESKEYGENASSIST_SI128(a,b) MM_AESKEYGENASSIST_SI128_TEMPLATE<b>(a)
767 template <unsigned int b>
768 GCC_INLINE __m128i GCC_INLINE_ATTRIB
769 MM_AESKEYGENASSIST_SI128_TEMPLATE(const __m128i a)
770 {
771  __m128i r; // aeskeygenassist uses imm8
772  asm ("aeskeygenassist %2, %1, %0" : "=x"(r) : "xm"(a), "N"(b));
773  return r;
774 }
775 GCC_INLINE __m128i GCC_INLINE_ATTRIB
776 MM_AESIMC_SI128 (const __m128i a)
777 {
778  __m128i r;
779  asm ("aesimc %1, %0" : "=x"(r) : "xm"(a));
780  return r;
781 }
782 GCC_INLINE __m128i GCC_INLINE_ATTRIB
783 MM_AESENC_SI128 (__m128i a, const __m128i b)
784 {
785  asm ("aesenc %1, %0" : "+x"(a) : "xm"(b));
786  return a;
787 }
788 GCC_INLINE __m128i GCC_INLINE_ATTRIB
789 MM_AESENCLAST_SI128 (__m128i a, const __m128i b)
790 {
791  asm ("aesenclast %1, %0" : "+x"(a) : "xm"(b));
792  return a;
793 }
794 GCC_INLINE __m128i GCC_INLINE_ATTRIB
795 MM_AESDEC_SI128 (__m128i a, const __m128i b)
796 {
797  asm ("aesdec %1, %0" : "+x"(a) : "xm"(b));
798  return a;
799 }
800 GCC_INLINE __m128i GCC_INLINE_ATTRIB
801 MM_AESDECLAST_SI128 (__m128i a, const __m128i b)
802 {
803  asm ("aesdeclast %1, %0" : "+x"(a) : "xm"(b));
804  return a;
805 }
806 #endif // __AES__
807 
808 #if CRYPTOPP_SSE_SHA_AVAILABLE && (defined(__SHA__) || defined(_MSC_VER))
809 # define MM_SHA256RNDS2_EPU32(a,b,c) _mm_sha256rnds2_epu32(a,b,c)
810 # define MM_SHA256MSG1_EPU32(a,b) _mm_sha256msg1_epu32(a,b)
811 # define MM_SHA256MSG2_EPU32(a,b) _mm_sha256msg2_epu32(a,b)
812 # define MM_SHA1RNDS4_EPU32(a,b,c) _mm_sha1rnds4_epu32(a,b,c)
813 # define MM_SHA1NEXTE_EPU32(a,b) _mm_sha1nexte_epu32(a,b)
814 # define MM_SHA256MSG2_EPU32(a,b) _mm_sha256msg2_epu32(a,b)
815 # define MM_SHA1MSG1_EPU32(a,b) _mm_sha1msg1_epu32(a,b)
816 # define MM_SHA1MSG2_EPU32(a,b) _mm_sha1msg2_epu32(a,b)
817 #else
818 GCC_INLINE __m128i GCC_INLINE_ATTRIB
819 MM_SHA256RNDS2_EPU32(__m128i a, const __m128i b, const __m128i c)
820 {
821  asm ("sha256rnds2 %2, %1, %0" : "+x"(a) : "xm"(b), "Yz" (c));
822  return a;
823 }
824 GCC_INLINE __m128i GCC_INLINE_ATTRIB
825 MM_SHA256MSG1_EPU32(__m128i a, const __m128i b)
826 {
827  asm ("sha256msg1 %1, %0" : "+x"(a) : "xm"(b));
828  return a;
829 }
830 GCC_INLINE __m128i GCC_INLINE_ATTRIB
831 MM_SHA256MSG2_EPU32(__m128i a, const __m128i b)
832 {
833  asm ("sha256msg2 %1, %0" : "+x"(a) : "xm"(b));
834  return a;
835 }
836 # define MM_SHA1RNDS4_EPU32(a,b,c) MM_SHA1RNDS4_EPU32_TEMPLATE<c>(a,b)
837 template<int c>
838 GCC_INLINE __m128i GCC_INLINE_ATTRIB
839 MM_SHA1RNDS4_EPU32_TEMPLATE(__m128i a, const __m128i b)
840 {
841  // sha1rnds4 uses imm
842  asm ("sha1rnds4 %2, %1, %0" : "+x"(a) : "xm"(b), "i"(c));
843  return a;
844 }
845 GCC_INLINE __m128i GCC_INLINE_ATTRIB
846 MM_SHA1NEXTE_EPU32(__m128i a, const __m128i b)
847 {
848  asm ("sha1nexte %1, %0" : "+x"(a) : "xm"(b));
849  return a;
850 }
851 GCC_INLINE __m128i GCC_INLINE_ATTRIB
852 MM_SHA1MSG1_EPU32(__m128i a, const __m128i b)
853 {
854  asm ("sha1msg1 %1, %0" : "+x"(a) : "xm"(b));
855  return a;
856 }
857 GCC_INLINE __m128i GCC_INLINE_ATTRIB
858 MM_SHA1MSG2_EPU32(__m128i a, const __m128i b)
859 {
860  asm ("sha1msg2 %1, %0" : "+x"(a) : "xm"(b));
861  return a;
862 }
863 #endif // __SHA__
864 
865 #endif // CRYPTOPP_X86 || CRYPTOPP_X32 || CRYPTOPP_X64
866 
867 #if (CRYPTOPP_ARM32 || CRYPTOPP_ARM64)
868 
869 // Aarch32 and Aarch64 is tricky. GCC 4.8/Binutil 2.24 cannot handle ASM or intrinsics.
870 // GCC 4.9/Binutil 2.25 can handle ASM (but not intrinsics) as long as cpu options
871 // includes +crc. GCC 5 and GCC 6 with Binutils 2.26 gets better. GCC 6 provides '#prgama
872 // target' on Aarch64 to add +crc to cpu, but it requires Binutils 2.26. We use the techique
873 // below to bypass compiler and assembler architectural requirements.
874 // Also see https://sourceware.org/ml/binutils/2017-04/msg00171.html.
875 
876 #if CRYPTOPP_NEON_AVAILABLE && defined(__GNUC__) && !defined(__ARM_NEON__)
877 GCC_INLINE uint8x16_t GCC_INLINE_ATTRIB
878 PMULL_LOW(const uint8x16_t a, const uint8x16_t b)
879 {
880  uint8x16_t r;
881  asm ("pmull %0, %1, %2" : "=w"(r) : "w"(a), "w"(b));
882  return r;
883 }
884 #endif
885 
886 #if CRYPTOPP_ARM_CRC32_AVAILABLE && (defined(__ARM_FEATURE_CRC32) || defined(_MSC_VER))
887 # define CRC32B(a,b) __crc32b(a,b)
888 # define CRC32H(a,b) __crc32h(a,b)
889 # define CRC32W(a,b) __crc32w(a,b)
890 # define CRC32CB(a,b) __crc32cb(a,b)
891 # define CRC32CH(a,b) __crc32ch(a,b)
892 # define CRC32CW(a,b) __crc32cw(a,b)
893 #else
894 GCC_INLINE unsigned int GCC_INLINE_ATTRIB
895 CRC32B(unsigned int crc, unsigned char val)
896 {
897 #if defined(__clang__)
898  unsigned int res;
899  asm ("crc32b %w0, %w1, %w2" : "=r"(res) : "r"(crc), "r"(val));
900  return res;
901 #else
902  unsigned int res;
903  asm ("\n"
904  "\t" ".set reg_x0, 0" "\n"
905  "\t" ".set reg_x1, 1" "\n"
906  "\t" ".set reg_x2, 2" "\n"
907  "\t" ".set reg_x3, 3" "\n"
908  "\t" ".set reg_x4, 4" "\n"
909  "\t" ".set reg_x5, 5" "\n"
910  "\t" ".set reg_x6, 6" "\n"
911  "\t" ".set reg_x7, 7" "\n"
912  "\t" "#crc32b %w0, %w1, %w2" "\n"
913  "\t" ".inst 0x1ac04000 | (reg_%0) | (reg_%1 << 5) | (reg_%2 << 16)" "\n"
914  : "=r"(res) : "r"(crc), "r"(val)
915  );
916  return res;
917 #endif
918 }
919 GCC_INLINE unsigned int GCC_INLINE_ATTRIB
920 CRC32H(unsigned int crc, unsigned short val)
921 {
922 #if defined(__clang__)
923  unsigned int res;
924  asm ("crc32h %w0, %w1, %w2" : "=r"(res) : "r"(crc), "r"(val));
925  return res;
926 #else
927  unsigned int res;
928  asm ("\n"
929  "\t" ".set reg_x0, 0" "\n"
930  "\t" ".set reg_x1, 1" "\n"
931  "\t" ".set reg_x2, 2" "\n"
932  "\t" ".set reg_x3, 3" "\n"
933  "\t" ".set reg_x4, 4" "\n"
934  "\t" ".set reg_x5, 5" "\n"
935  "\t" ".set reg_x6, 6" "\n"
936  "\t" ".set reg_x7, 7" "\n"
937  "\t" "#crc32h %w0, %w1, %w2" "\n"
938  "\t" ".inst 0x1ac04400 | (reg_%0) | (reg_%1 << 5) | (reg_%2 << 16)" "\n"
939  : "=r"(res) : "r"(crc), "r"(val)
940  );
941  return res;
942 #endif
943 }
944 GCC_INLINE unsigned int GCC_INLINE_ATTRIB
945 CRC32W(unsigned int crc, unsigned int val)
946 {
947 #if defined(__clang__)
948  unsigned int res;
949  asm ("crc32w %w0, %w1, %w2" : "=r"(res) : "r"(crc), "r"(val));
950  return res;
951 #else
952  unsigned int res;
953  asm ("\n"
954  "\t" ".set reg_x0, 0" "\n"
955  "\t" ".set reg_x1, 1" "\n"
956  "\t" ".set reg_x2, 2" "\n"
957  "\t" ".set reg_x3, 3" "\n"
958  "\t" ".set reg_x4, 4" "\n"
959  "\t" ".set reg_x5, 5" "\n"
960  "\t" ".set reg_x6, 6" "\n"
961  "\t" ".set reg_x7, 7" "\n"
962  "\t" "#crc32w %w0, %w1, %w2" "\n"
963  "\t" ".inst 0x1ac04800 | (reg_%0) | (reg_%1 << 5) | (reg_%2 << 16)" "\n"
964  : "=r"(res) : "r"(crc), "r"(val)
965  );
966  return res;
967 #endif
968 }
969 GCC_INLINE unsigned int GCC_INLINE_ATTRIB
970 CRC32CB(unsigned int crc, unsigned char val)
971 {
972 #if defined(__clang__)
973  unsigned int res;
974  asm ("crc32cb %w0, %w1, %w2" : "=r"(res) : "r"(crc), "r"(val));
975  return res;
976 #else
977  unsigned int res;
978  asm ("\n"
979  "\t" ".set reg_x0, 0" "\n"
980  "\t" ".set reg_x1, 1" "\n"
981  "\t" ".set reg_x2, 2" "\n"
982  "\t" ".set reg_x3, 3" "\n"
983  "\t" ".set reg_x4, 4" "\n"
984  "\t" ".set reg_x5, 5" "\n"
985  "\t" ".set reg_x6, 6" "\n"
986  "\t" ".set reg_x7, 7" "\n"
987  "\t" "#crc32cb %w0, %w1, %w2" "\n"
988  "\t" ".inst 0x1ac05000 | (reg_%0) | (reg_%1 << 5) | (reg_%2 << 16)" "\n"
989  : "=r"(res) : "r"(crc), "r"(val)
990  );
991  return res;
992 #endif
993 }
994 GCC_INLINE unsigned int GCC_INLINE_ATTRIB
995 CRC32CH(unsigned int crc, unsigned short val)
996 {
997 #if defined(__clang__)
998  unsigned int res;
999  asm ("crc32ch %w0, %w1, %w2" : "=r"(res) : "r"(crc), "r"(val));
1000  return res;
1001 #else
1002  unsigned int res;
1003  asm ("\n"
1004  "\t" ".set reg_x0, 0" "\n"
1005  "\t" ".set reg_x1, 1" "\n"
1006  "\t" ".set reg_x2, 2" "\n"
1007  "\t" ".set reg_x3, 3" "\n"
1008  "\t" ".set reg_x4, 4" "\n"
1009  "\t" ".set reg_x5, 5" "\n"
1010  "\t" ".set reg_x6, 6" "\n"
1011  "\t" ".set reg_x7, 7" "\n"
1012  "\t" "#crc32ch %w0, %w1, %w2" "\n"
1013  "\t" ".inst 0x1ac05400 | (reg_%0) | (reg_%1 << 5) | (reg_%2 << 16)" "\n"
1014  : "=r"(res) : "r"(crc), "r"(val)
1015  );
1016  return res;
1017 #endif
1018 }
1019 GCC_INLINE unsigned int GCC_INLINE_ATTRIB
1020 CRC32CW(unsigned int crc, unsigned int val)
1021 {
1022 #if defined(__clang__)
1023  unsigned int res;
1024  asm ("crc32cw %w0, %w1, %w2" : "=r"(res) : "r"(crc), "r"(val));
1025  return res;
1026 #else
1027  unsigned int res;
1028  asm ("\n"
1029  "\t" ".set reg_x0, 0" "\n"
1030  "\t" ".set reg_x1, 1" "\n"
1031  "\t" ".set reg_x2, 2" "\n"
1032  "\t" ".set reg_x3, 3" "\n"
1033  "\t" ".set reg_x4, 4" "\n"
1034  "\t" ".set reg_x5, 5" "\n"
1035  "\t" ".set reg_x6, 6" "\n"
1036  "\t" ".set reg_x7, 7" "\n"
1037  "\t" "#crc32cw %w0, %w1, %w2" "\n"
1038  "\t" ".inst 0x1ac05800 | (reg_%0) | (reg_%1 << 5) | (reg_%2 << 16)" "\n"
1039  : "=r"(res) : "r"(crc), "r"(val)
1040  );
1041  return res;
1042 #endif
1043 }
1044 #endif // CRYPTOPP_ARM_CRC32_AVAILABLE
1045 
1046 #if CRYPTOPP_ARM_PMULL_AVAILABLE && defined(__GNUC__) && !defined(__ARM_NEON__)
1047 GCC_INLINE uint8x16_t GCC_INLINE_ATTRIB
1048 ARM_PMULL(const uint8x16_t a, const uint8x16_t b)
1049 {
1050  uint8x16_t r;
1051  asm ("pmull %0, %1, %2" : "=w"(r) : "w"(a), "w"(b));
1052  return r;
1053 }
1054 #endif
1055 
1056 #endif // CRYPTOPP_ARM32 || CRYPTOPP_ARM64
1057 
1058 NAMESPACE_END // CrpytoPP
1059 
1060 // Issue 340
1061 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
1062 # pragma GCC diagnostic pop
1063 #endif
1064 
1065 #endif // CRYPTOPP_CPU_H
bool HasSHA()
Determines SHA availability.
Definition: cpu.h:238
bool HasISSE()
Determines SSE availability.
Definition: cpu.h:168
bool HasCRC32()
Determine if an ARM processor has CRC32 available.
Definition: cpu.h:367
bool HasSSE4()
Determines SSE4 availability.
Definition: cpu.h:208
bool HasSSSE3()
Determines SSSE3 availability.
Definition: cpu.h:198
bool HasPadlockRNG()
Determines Padlock RNG availability.
Definition: cpu.h:278
bool HasAES()
Determine if an ARM processor has AES available.
Definition: cpu.h:382
bool IsP4()
Determines if the CPU is an Intel P4.
Definition: cpu.h:248
Library configuration file.
int GetCacheLineSize()
Provides the L1 cache line size.
Definition: cpu.h:427
bool HasRDRAND()
Determines RDRAND availability.
Definition: cpu.h:258
bool HasRDSEED()
Determines RDSEED availability.
Definition: cpu.h:268
bool HasCLMUL()
Determines Carryless Multiply availability.
Definition: cpu.h:228
bool HasSHA1()
Determine if an ARM processor has SHA1 available.
Definition: cpu.h:397
bool HasPadlockACE2()
Determines Padlock ACE2 availability.
Definition: cpu.h:298
bool HasPadlockPHE()
Determines Padlock PHE availability.
Definition: cpu.h:308
bool HasPadlockPMM()
Determines Padlock PMM availability.
Definition: cpu.h:318
bool HasAESNI()
Determines AES-NI availability.
Definition: cpu.h:218
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:183
bool HasSHA2()
Determine if an ARM processor has SHA2 available.
Definition: cpu.h:412
bool HasMMX()
Determines MMX availability.
Definition: cpu.h:153
Crypto++ library namespace.
bool HasPadlockACE()
Determines Padlock ACE availability.
Definition: cpu.h:288
bool HasPMULL()
Determine if an ARM processor provides Polynomial Multiplication (long)
Definition: cpu.h:352
bool HasNEON()
Determine if an ARM processor has Advanced SIMD available.
Definition: cpu.h:339