Crypto++  5.6.3
Free C++ class library of cryptographic schemes
cpu.h
Go to the documentation of this file.
1 // cpu.h - written and placed in the public domain by Wei Dai
2 
3 //! \file
4 //! \headerfile cpu.h
5 //! \brief Classes, functions, intrinsics and features for X86, X32 nd X64 assembly
6 
7 #ifndef CRYPTOPP_CPU_H
8 #define CRYPTOPP_CPU_H
9 
10 #include "config.h"
11 
12 #ifdef CRYPTOPP_GENERATE_X64_MASM
13 
14 #define CRYPTOPP_X86_ASM_AVAILABLE
15 #define CRYPTOPP_BOOL_X64 1
16 #define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1
17 #define NAMESPACE_END
18 
19 #else
20 
21 # if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
22 # include <emmintrin.h>
23 # endif
24 
25 #if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
26 #if !defined(__GNUC__) || defined(__SSSE3__) || defined(__INTEL_COMPILER)
27 #include <tmmintrin.h>
28 #else
29 NAMESPACE_BEGIN(CryptoPP)
30 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
31 _mm_shuffle_epi8 (__m128i a, __m128i b)
32 {
33  asm ("pshufb %1, %0" : "+x"(a) : "xm"(b));
34  return a;
35 }
36 NAMESPACE_END
37 #endif // tmmintrin.h
38 #if !defined(__GNUC__) || defined(__SSE4_1__) || defined(__INTEL_COMPILER)
39 #include <smmintrin.h>
40 #else
41 NAMESPACE_BEGIN(CryptoPP)
42 __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
43 _mm_extract_epi32 (__m128i a, const int i)
44 {
45  int r;
46  asm ("pextrd %2, %1, %0" : "=rm"(r) : "x"(a), "i"(i));
47  return r;
48 }
49 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
50 _mm_insert_epi32 (__m128i a, int b, const int i)
51 {
52  asm ("pinsrd %2, %1, %0" : "+x"(a) : "rm"(b), "i"(i));
53  return a;
54 }
55 NAMESPACE_END
56 #endif // smmintrin.h
57 #if !defined(__GNUC__) || (defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER)
58 #include <wmmintrin.h>
59 #else
60 NAMESPACE_BEGIN(CryptoPP)
61 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
62 _mm_clmulepi64_si128 (__m128i a, __m128i b, const int i)
63 {
64  asm ("pclmulqdq %2, %1, %0" : "+x"(a) : "xm"(b), "i"(i));
65  return a;
66 }
67 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
68 _mm_aeskeygenassist_si128 (__m128i a, const int i)
69 {
70  __m128i r;
71  asm ("aeskeygenassist %2, %1, %0" : "=x"(r) : "xm"(a), "i"(i));
72  return r;
73 }
74 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
75 _mm_aesimc_si128 (__m128i a)
76 {
77  __m128i r;
78  asm ("aesimc %1, %0" : "=x"(r) : "xm"(a));
79  return r;
80 }
81 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
82 _mm_aesenc_si128 (__m128i a, __m128i b)
83 {
84  asm ("aesenc %1, %0" : "+x"(a) : "xm"(b));
85  return a;
86 }
87 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
88 _mm_aesenclast_si128 (__m128i a, __m128i b)
89 {
90  asm ("aesenclast %1, %0" : "+x"(a) : "xm"(b));
91  return a;
92 }
93 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
94 _mm_aesdec_si128 (__m128i a, __m128i b)
95 {
96  asm ("aesdec %1, %0" : "+x"(a) : "xm"(b));
97  return a;
98 }
99 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
100 _mm_aesdeclast_si128 (__m128i a, __m128i b)
101 {
102  asm ("aesdeclast %1, %0" : "+x"(a) : "xm"(b));
103  return a;
104 }
105 NAMESPACE_END
106 #endif // wmmintrin.h
107 #endif // CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
108 
109 NAMESPACE_BEGIN(CryptoPP)
110 
111 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
112 
113 #define CRYPTOPP_CPUID_AVAILABLE
114 
115 // these should not be used directly
116 extern CRYPTOPP_DLL bool g_x86DetectionDone;
117 extern CRYPTOPP_DLL bool g_hasMMX;
118 extern CRYPTOPP_DLL bool g_hasISSE;
119 extern CRYPTOPP_DLL bool g_hasSSE2;
120 extern CRYPTOPP_DLL bool g_hasSSSE3;
121 extern CRYPTOPP_DLL bool g_hasAESNI;
122 extern CRYPTOPP_DLL bool g_hasCLMUL;
123 extern CRYPTOPP_DLL bool g_isP4;
124 extern CRYPTOPP_DLL bool g_hasRDRAND;
125 extern CRYPTOPP_DLL bool g_hasRDSEED;
126 extern CRYPTOPP_DLL word32 g_cacheLineSize;
127 
128 CRYPTOPP_DLL void CRYPTOPP_API DetectX86Features();
129 CRYPTOPP_DLL bool CRYPTOPP_API CpuId(word32 input, word32 output[4]);
130 
131 inline bool HasMMX()
132 {
133 #if CRYPTOPP_BOOL_X64
134  return true;
135 #else
136  if (!g_x86DetectionDone)
137  DetectX86Features();
138  return g_hasMMX;
139 #endif
140 }
141 
142 inline bool HasISSE()
143 {
144 #if CRYPTOPP_BOOL_X64
145  return true;
146 #else
147  if (!g_x86DetectionDone)
148  DetectX86Features();
149  return g_hasISSE;
150 #endif
151 }
152 
153 inline bool HasSSE2()
154 {
155 #if CRYPTOPP_BOOL_X64
156  return true;
157 #else
158  if (!g_x86DetectionDone)
159  DetectX86Features();
160  return g_hasSSE2;
161 #endif
162 }
163 
164 inline bool HasSSSE3()
165 {
166  if (!g_x86DetectionDone)
167  DetectX86Features();
168  return g_hasSSSE3;
169 }
170 
171 inline bool HasAESNI()
172 {
173  if (!g_x86DetectionDone)
174  DetectX86Features();
175  return g_hasAESNI;
176 }
177 
178 inline bool HasCLMUL()
179 {
180  if (!g_x86DetectionDone)
181  DetectX86Features();
182  return g_hasCLMUL;
183 }
184 
185 inline bool IsP4()
186 {
187  if (!g_x86DetectionDone)
188  DetectX86Features();
189  return g_isP4;
190 }
191 
192 inline bool HasRDRAND()
193 {
194  if (!g_x86DetectionDone)
195  DetectX86Features();
196  return g_hasRDRAND;
197 }
198 
199 inline bool HasRDSEED()
200 {
201  if (!g_x86DetectionDone)
202  DetectX86Features();
203  return g_hasRDSEED;
204 }
205 
206 inline int GetCacheLineSize()
207 {
208  if (!g_x86DetectionDone)
209  DetectX86Features();
210  return g_cacheLineSize;
211 }
212 
213 #else
214 
215 inline int GetCacheLineSize()
216 {
217  return CRYPTOPP_L1_CACHE_LINE_SIZE;
218 }
219 
220 #endif
221 
222 #endif
223 
224 #ifdef CRYPTOPP_GENERATE_X64_MASM
225  #define AS1(x) x*newline*
226  #define AS2(x, y) x, y*newline*
227  #define AS3(x, y, z) x, y, z*newline*
228  #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline*
229  #define ASL(x) label##x:*newline*
230  #define ASJ(x, y, z) x label##y*newline*
231  #define ASC(x, y) x label##y*newline*
232  #define AS_HEX(y) 0##y##h
233 #elif defined(_MSC_VER) || defined(__BORLANDC__)
234  #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
235  #define AS1(x) __asm {x}
236  #define AS2(x, y) __asm {x, y}
237  #define AS3(x, y, z) __asm {x, y, z}
238  #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)}
239  #define ASL(x) __asm {label##x:}
240  #define ASJ(x, y, z) __asm {x label##y}
241  #define ASC(x, y) __asm {x label##y}
242  #define CRYPTOPP_NAKED __declspec(naked)
243  #define AS_HEX(y) 0x##y
244 #else
245  #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
246 
247 #if defined(CRYPTOPP_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION)
248  #define NEW_LINE "\n"
249  #define INTEL_PREFIX ".intel_syntax;"
250  #define INTEL_NOPREFIX ".intel_syntax;"
251  #define ATT_PREFIX ".att_syntax;"
252  #define ATT_NOPREFIX ".att_syntax;"
253 #else
254  #define NEW_LINE
255  #define INTEL_PREFIX ".intel_syntax prefix;"
256  #define INTEL_NOPREFIX ".intel_syntax noprefix;"
257  #define ATT_PREFIX ".att_syntax prefix;"
258  #define ATT_NOPREFIX ".att_syntax noprefix;"
259 #endif
260 
261  // define these in two steps to allow arguments to be expanded
262  #define GNU_AS1(x) #x ";" NEW_LINE
263  #define GNU_AS2(x, y) #x ", " #y ";" NEW_LINE
264  #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";" NEW_LINE
265  #define GNU_ASL(x) "\n" #x ":" NEW_LINE
266  #define GNU_ASJ(x, y, z) #x " " #y #z ";" NEW_LINE
267  #define AS1(x) GNU_AS1(x)
268  #define AS2(x, y) GNU_AS2(x, y)
269  #define AS3(x, y, z) GNU_AS3(x, y, z)
270  #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
271  #define ASL(x) GNU_ASL(x)
272  #define ASJ(x, y, z) GNU_ASJ(x, y, z)
273  #define ASC(x, y) #x " " #y ";"
274  #define CRYPTOPP_NAKED
275  #define AS_HEX(y) 0x##y
276 #endif
277 
278 #define IF0(y)
279 #define IF1(y) y
280 
281 // Should be confined to GCC, but its used to help manage Clang 3.4 compiler error.
282 // Also see LLVM Bug 24232, http://llvm.org/bugs/show_bug.cgi?id=24232 .
283 #ifndef INTEL_PREFIX
284  #define INTEL_PREFIX
285 #endif
286 #ifndef INTEL_NOPREFIX
287  #define INTEL_NOPREFIX
288 #endif
289 #ifndef ATT_PREFIX
290  #define ATT_PREFIX
291 #endif
292 #ifndef ATT_NOPREFIX
293  #define ATT_NOPREFIX
294 #endif
295 
296 #ifdef CRYPTOPP_GENERATE_X64_MASM
297 #define ASM_MOD(x, y) ((x) MOD (y))
298 #define XMMWORD_PTR XMMWORD PTR
299 #else
300 // GNU assembler doesn't seem to have mod operator
301 #define ASM_MOD(x, y) ((x)-((x)/(y))*(y))
302 // GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM
303 #define XMMWORD_PTR
304 #endif
305 
306 #if CRYPTOPP_BOOL_X86
307  #define AS_REG_1 ecx
308  #define AS_REG_2 edx
309  #define AS_REG_3 esi
310  #define AS_REG_4 edi
311  #define AS_REG_5 eax
312  #define AS_REG_6 ebx
313  #define AS_REG_7 ebp
314  #define AS_REG_1d ecx
315  #define AS_REG_2d edx
316  #define AS_REG_3d esi
317  #define AS_REG_4d edi
318  #define AS_REG_5d eax
319  #define AS_REG_6d ebx
320  #define AS_REG_7d ebp
321  #define WORD_SZ 4
322  #define WORD_REG(x) e##x
323  #define WORD_PTR DWORD PTR
324  #define AS_PUSH_IF86(x) AS1(push e##x)
325  #define AS_POP_IF86(x) AS1(pop e##x)
326  #define AS_JCXZ jecxz
327 #elif CRYPTOPP_BOOL_X32
328  #define AS_REG_1 ecx
329  #define AS_REG_2 edx
330  #define AS_REG_3 r8d
331  #define AS_REG_4 r9d
332  #define AS_REG_5 eax
333  #define AS_REG_6 r10d
334  #define AS_REG_7 r11d
335  #define AS_REG_1d ecx
336  #define AS_REG_2d edx
337  #define AS_REG_3d r8d
338  #define AS_REG_4d r9d
339  #define AS_REG_5d eax
340  #define AS_REG_6d r10d
341  #define AS_REG_7d r11d
342  #define WORD_SZ 4
343  #define WORD_REG(x) e##x
344  #define WORD_PTR DWORD PTR
345  #define AS_PUSH_IF86(x) AS1(push r##x)
346  #define AS_POP_IF86(x) AS1(pop r##x)
347  #define AS_JCXZ jecxz
348 #elif CRYPTOPP_BOOL_X64
349  #ifdef CRYPTOPP_GENERATE_X64_MASM
350  #define AS_REG_1 rcx
351  #define AS_REG_2 rdx
352  #define AS_REG_3 r8
353  #define AS_REG_4 r9
354  #define AS_REG_5 rax
355  #define AS_REG_6 r10
356  #define AS_REG_7 r11
357  #define AS_REG_1d ecx
358  #define AS_REG_2d edx
359  #define AS_REG_3d r8d
360  #define AS_REG_4d r9d
361  #define AS_REG_5d eax
362  #define AS_REG_6d r10d
363  #define AS_REG_7d r11d
364  #else
365  #define AS_REG_1 rdi
366  #define AS_REG_2 rsi
367  #define AS_REG_3 rdx
368  #define AS_REG_4 rcx
369  #define AS_REG_5 r8
370  #define AS_REG_6 r9
371  #define AS_REG_7 r10
372  #define AS_REG_1d edi
373  #define AS_REG_2d esi
374  #define AS_REG_3d edx
375  #define AS_REG_4d ecx
376  #define AS_REG_5d r8d
377  #define AS_REG_6d r9d
378  #define AS_REG_7d r10d
379  #endif
380  #define WORD_SZ 8
381  #define WORD_REG(x) r##x
382  #define WORD_PTR QWORD PTR
383  #define AS_PUSH_IF86(x)
384  #define AS_POP_IF86(x)
385  #define AS_JCXZ jrcxz
386 #endif
387 
388 // helper macro for stream cipher output
389 #define AS_XMM_OUTPUT4(labelPrefix, inputPtr, outputPtr, x0, x1, x2, x3, t, p0, p1, p2, p3, increment)\
390  AS2( test inputPtr, inputPtr)\
391  ASC( jz, labelPrefix##3)\
392  AS2( test inputPtr, 15)\
393  ASC( jnz, labelPrefix##7)\
394  AS2( pxor xmm##x0, [inputPtr+p0*16])\
395  AS2( pxor xmm##x1, [inputPtr+p1*16])\
396  AS2( pxor xmm##x2, [inputPtr+p2*16])\
397  AS2( pxor xmm##x3, [inputPtr+p3*16])\
398  AS2( add inputPtr, increment*16)\
399  ASC( jmp, labelPrefix##3)\
400  ASL(labelPrefix##7)\
401  AS2( movdqu xmm##t, [inputPtr+p0*16])\
402  AS2( pxor xmm##x0, xmm##t)\
403  AS2( movdqu xmm##t, [inputPtr+p1*16])\
404  AS2( pxor xmm##x1, xmm##t)\
405  AS2( movdqu xmm##t, [inputPtr+p2*16])\
406  AS2( pxor xmm##x2, xmm##t)\
407  AS2( movdqu xmm##t, [inputPtr+p3*16])\
408  AS2( pxor xmm##x3, xmm##t)\
409  AS2( add inputPtr, increment*16)\
410  ASL(labelPrefix##3)\
411  AS2( test outputPtr, 15)\
412  ASC( jnz, labelPrefix##8)\
413  AS2( movdqa [outputPtr+p0*16], xmm##x0)\
414  AS2( movdqa [outputPtr+p1*16], xmm##x1)\
415  AS2( movdqa [outputPtr+p2*16], xmm##x2)\
416  AS2( movdqa [outputPtr+p3*16], xmm##x3)\
417  ASC( jmp, labelPrefix##9)\
418  ASL(labelPrefix##8)\
419  AS2( movdqu [outputPtr+p0*16], xmm##x0)\
420  AS2( movdqu [outputPtr+p1*16], xmm##x1)\
421  AS2( movdqu [outputPtr+p2*16], xmm##x2)\
422  AS2( movdqu [outputPtr+p3*16], xmm##x3)\
423  ASL(labelPrefix##9)\
424  AS2( add outputPtr, increment*16)
425 
426 NAMESPACE_END
427 
428 #endif
Library configuration file.
Crypto++ library namespace.