Crypto++  5.6.3
Free C++ class library of cryptographic schemes
cpu.h
Go to the documentation of this file.
1 // cpu.h - written and placed in the public domain by Wei Dai
2 
3 //! \file
4 //! \headerfile cpu.h
5 //! \brief Classes, functions, intrinsics and features for X86, X32 nd X64 assembly
6 
7 #ifndef CRYPTOPP_CPU_H
8 #define CRYPTOPP_CPU_H
9 
10 #include "config.h"
11 
12 #ifdef CRYPTOPP_GENERATE_X64_MASM
13 
14 #define CRYPTOPP_X86_ASM_AVAILABLE
15 #define CRYPTOPP_BOOL_X64 1
16 #define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1
17 #define NAMESPACE_END
18 
19 #else
20 
21 # if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
22 # include <emmintrin.h>
23 # endif
24 
25 #if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
26 #if !defined(__GNUC__) || defined(__SSSE3__) || defined(__INTEL_COMPILER)
27 #include <tmmintrin.h>
28 #else
29 NAMESPACE_BEGIN(CryptoPP)
30 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
31 _mm_shuffle_epi8 (__m128i a, __m128i b)
32 {
33  asm ("pshufb %1, %0" : "+x"(a) : "xm"(b));
34  return a;
35 }
36 NAMESPACE_END
37 #endif // tmmintrin.h
38 #if !defined(__GNUC__) || defined(__SSE4_1__) || defined(__INTEL_COMPILER)
39 #include <smmintrin.h>
40 #else
41 NAMESPACE_BEGIN(CryptoPP)
42 __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
43 _mm_extract_epi32 (__m128i a, const int i)
44 {
45  int r;
46  asm ("pextrd %2, %1, %0" : "=rm"(r) : "x"(a), "i"(i));
47  return r;
48 }
49 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
50 _mm_insert_epi32 (__m128i a, int b, const int i)
51 {
52  asm ("pinsrd %2, %1, %0" : "+x"(a) : "rm"(b), "i"(i));
53  return a;
54 }
55 NAMESPACE_END
56 #endif // smmintrin.h
57 #if !defined(__GNUC__) || (defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER)
58 #include <wmmintrin.h>
59 #else
60 NAMESPACE_BEGIN(CryptoPP)
61 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
62 _mm_clmulepi64_si128 (__m128i a, __m128i b, const int i)
63 {
64  asm ("pclmulqdq %2, %1, %0" : "+x"(a) : "xm"(b), "i"(i));
65  return a;
66 }
67 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
68 _mm_aeskeygenassist_si128 (__m128i a, const int i)
69 {
70  __m128i r;
71  asm ("aeskeygenassist %2, %1, %0" : "=x"(r) : "xm"(a), "i"(i));
72  return r;
73 }
74 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
75 _mm_aesimc_si128 (__m128i a)
76 {
77  __m128i r;
78  asm ("aesimc %1, %0" : "=x"(r) : "xm"(a));
79  return r;
80 }
81 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
82 _mm_aesenc_si128 (__m128i a, __m128i b)
83 {
84  asm ("aesenc %1, %0" : "+x"(a) : "xm"(b));
85  return a;
86 }
87 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
88 _mm_aesenclast_si128 (__m128i a, __m128i b)
89 {
90  asm ("aesenclast %1, %0" : "+x"(a) : "xm"(b));
91  return a;
92 }
93 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
94 _mm_aesdec_si128 (__m128i a, __m128i b)
95 {
96  asm ("aesdec %1, %0" : "+x"(a) : "xm"(b));
97  return a;
98 }
99 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
100 _mm_aesdeclast_si128 (__m128i a, __m128i b)
101 {
102  asm ("aesdeclast %1, %0" : "+x"(a) : "xm"(b));
103  return a;
104 }
105 NAMESPACE_END
106 #endif // wmmintrin.h
107 #endif // CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
108 
109 NAMESPACE_BEGIN(CryptoPP)
110 
111 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
112 
113 #define CRYPTOPP_CPUID_AVAILABLE
114 
115 // these should not be used directly
116 extern CRYPTOPP_DLL bool g_x86DetectionDone;
117 extern CRYPTOPP_DLL bool g_hasMMX;
118 extern CRYPTOPP_DLL bool g_hasISSE;
119 extern CRYPTOPP_DLL bool g_hasSSE2;
120 extern CRYPTOPP_DLL bool g_hasSSSE3;
121 extern CRYPTOPP_DLL bool g_hasSSE4;
122 extern CRYPTOPP_DLL bool g_hasAESNI;
123 extern CRYPTOPP_DLL bool g_hasCLMUL;
124 extern CRYPTOPP_DLL bool g_isP4;
125 extern CRYPTOPP_DLL bool g_hasRDRAND;
126 extern CRYPTOPP_DLL bool g_hasRDSEED;
127 extern CRYPTOPP_DLL word32 g_cacheLineSize;
128 
129 CRYPTOPP_DLL void CRYPTOPP_API DetectX86Features();
130 CRYPTOPP_DLL bool CRYPTOPP_API CpuId(word32 input, word32 output[4]);
131 
132 inline bool HasMMX()
133 {
134 #if CRYPTOPP_BOOL_X64
135  return true;
136 #else
137  if (!g_x86DetectionDone)
138  DetectX86Features();
139  return g_hasMMX;
140 #endif
141 }
142 
143 inline bool HasISSE()
144 {
145 #if CRYPTOPP_BOOL_X64
146  return true;
147 #else
148  if (!g_x86DetectionDone)
149  DetectX86Features();
150  return g_hasISSE;
151 #endif
152 }
153 
154 inline bool HasSSE2()
155 {
156 #if CRYPTOPP_BOOL_X64
157  return true;
158 #else
159  if (!g_x86DetectionDone)
160  DetectX86Features();
161  return g_hasSSE2;
162 #endif
163 }
164 
165 inline bool HasSSSE3()
166 {
167  if (!g_x86DetectionDone)
168  DetectX86Features();
169  return g_hasSSSE3;
170 }
171 
172 inline bool HasSSE4()
173 {
174  if (!g_x86DetectionDone)
175  DetectX86Features();
176  return g_hasSSE4;
177 }
178 
179 inline bool HasAESNI()
180 {
181  if (!g_x86DetectionDone)
182  DetectX86Features();
183  return g_hasAESNI;
184 }
185 
186 inline bool HasCLMUL()
187 {
188  if (!g_x86DetectionDone)
189  DetectX86Features();
190  return g_hasCLMUL;
191 }
192 
193 inline bool IsP4()
194 {
195  if (!g_x86DetectionDone)
196  DetectX86Features();
197  return g_isP4;
198 }
199 
200 inline bool HasRDRAND()
201 {
202  if (!g_x86DetectionDone)
203  DetectX86Features();
204  return g_hasRDRAND;
205 }
206 
207 inline bool HasRDSEED()
208 {
209  if (!g_x86DetectionDone)
210  DetectX86Features();
211  return g_hasRDSEED;
212 }
213 
214 inline int GetCacheLineSize()
215 {
216  if (!g_x86DetectionDone)
217  DetectX86Features();
218  return g_cacheLineSize;
219 }
220 
221 #else
222 
223 inline int GetCacheLineSize()
224 {
225  return CRYPTOPP_L1_CACHE_LINE_SIZE;
226 }
227 
228 #endif
229 
230 #endif
231 
232 #ifdef CRYPTOPP_GENERATE_X64_MASM
233  #define AS1(x) x*newline*
234  #define AS2(x, y) x, y*newline*
235  #define AS3(x, y, z) x, y, z*newline*
236  #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline*
237  #define ASL(x) label##x:*newline*
238  #define ASJ(x, y, z) x label##y*newline*
239  #define ASC(x, y) x label##y*newline*
240  #define AS_HEX(y) 0##y##h
241 #elif defined(_MSC_VER) || defined(__BORLANDC__)
242  #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
243  #define AS1(x) __asm {x}
244  #define AS2(x, y) __asm {x, y}
245  #define AS3(x, y, z) __asm {x, y, z}
246  #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)}
247  #define ASL(x) __asm {label##x:}
248  #define ASJ(x, y, z) __asm {x label##y}
249  #define ASC(x, y) __asm {x label##y}
250  #define CRYPTOPP_NAKED __declspec(naked)
251  #define AS_HEX(y) 0x##y
252 #else
253  #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
254 
255 #if defined(CRYPTOPP_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION)
256  #define NEW_LINE "\n"
257  #define INTEL_PREFIX ".intel_syntax;"
258  #define INTEL_NOPREFIX ".intel_syntax;"
259  #define ATT_PREFIX ".att_syntax;"
260  #define ATT_NOPREFIX ".att_syntax;"
261 #else
262  #define NEW_LINE
263  #define INTEL_PREFIX ".intel_syntax prefix;"
264  #define INTEL_NOPREFIX ".intel_syntax noprefix;"
265  #define ATT_PREFIX ".att_syntax prefix;"
266  #define ATT_NOPREFIX ".att_syntax noprefix;"
267 #endif
268 
269  // define these in two steps to allow arguments to be expanded
270  #define GNU_AS1(x) #x ";" NEW_LINE
271  #define GNU_AS2(x, y) #x ", " #y ";" NEW_LINE
272  #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";" NEW_LINE
273  #define GNU_ASL(x) "\n" #x ":" NEW_LINE
274  #define GNU_ASJ(x, y, z) #x " " #y #z ";" NEW_LINE
275  #define AS1(x) GNU_AS1(x)
276  #define AS2(x, y) GNU_AS2(x, y)
277  #define AS3(x, y, z) GNU_AS3(x, y, z)
278  #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
279  #define ASL(x) GNU_ASL(x)
280  #define ASJ(x, y, z) GNU_ASJ(x, y, z)
281  #define ASC(x, y) #x " " #y ";"
282  #define CRYPTOPP_NAKED
283  #define AS_HEX(y) 0x##y
284 #endif
285 
286 #define IF0(y)
287 #define IF1(y) y
288 
289 // Should be confined to GCC, but its used to help manage Clang 3.4 compiler error.
290 // Also see LLVM Bug 24232, http://llvm.org/bugs/show_bug.cgi?id=24232 .
291 #ifndef INTEL_PREFIX
292  #define INTEL_PREFIX
293 #endif
294 #ifndef INTEL_NOPREFIX
295  #define INTEL_NOPREFIX
296 #endif
297 #ifndef ATT_PREFIX
298  #define ATT_PREFIX
299 #endif
300 #ifndef ATT_NOPREFIX
301  #define ATT_NOPREFIX
302 #endif
303 
304 #ifdef CRYPTOPP_GENERATE_X64_MASM
305 #define ASM_MOD(x, y) ((x) MOD (y))
306 #define XMMWORD_PTR XMMWORD PTR
307 #else
308 // GNU assembler doesn't seem to have mod operator
309 #define ASM_MOD(x, y) ((x)-((x)/(y))*(y))
310 // GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM
311 #define XMMWORD_PTR
312 #endif
313 
314 #if CRYPTOPP_BOOL_X86
315  #define AS_REG_1 ecx
316  #define AS_REG_2 edx
317  #define AS_REG_3 esi
318  #define AS_REG_4 edi
319  #define AS_REG_5 eax
320  #define AS_REG_6 ebx
321  #define AS_REG_7 ebp
322  #define AS_REG_1d ecx
323  #define AS_REG_2d edx
324  #define AS_REG_3d esi
325  #define AS_REG_4d edi
326  #define AS_REG_5d eax
327  #define AS_REG_6d ebx
328  #define AS_REG_7d ebp
329  #define WORD_SZ 4
330  #define WORD_REG(x) e##x
331  #define WORD_PTR DWORD PTR
332  #define AS_PUSH_IF86(x) AS1(push e##x)
333  #define AS_POP_IF86(x) AS1(pop e##x)
334  #define AS_JCXZ jecxz
335 #elif CRYPTOPP_BOOL_X32
336  #define AS_REG_1 ecx
337  #define AS_REG_2 edx
338  #define AS_REG_3 r8d
339  #define AS_REG_4 r9d
340  #define AS_REG_5 eax
341  #define AS_REG_6 r10d
342  #define AS_REG_7 r11d
343  #define AS_REG_1d ecx
344  #define AS_REG_2d edx
345  #define AS_REG_3d r8d
346  #define AS_REG_4d r9d
347  #define AS_REG_5d eax
348  #define AS_REG_6d r10d
349  #define AS_REG_7d r11d
350  #define WORD_SZ 4
351  #define WORD_REG(x) e##x
352  #define WORD_PTR DWORD PTR
353  #define AS_PUSH_IF86(x) AS1(push r##x)
354  #define AS_POP_IF86(x) AS1(pop r##x)
355  #define AS_JCXZ jecxz
356 #elif CRYPTOPP_BOOL_X64
357  #ifdef CRYPTOPP_GENERATE_X64_MASM
358  #define AS_REG_1 rcx
359  #define AS_REG_2 rdx
360  #define AS_REG_3 r8
361  #define AS_REG_4 r9
362  #define AS_REG_5 rax
363  #define AS_REG_6 r10
364  #define AS_REG_7 r11
365  #define AS_REG_1d ecx
366  #define AS_REG_2d edx
367  #define AS_REG_3d r8d
368  #define AS_REG_4d r9d
369  #define AS_REG_5d eax
370  #define AS_REG_6d r10d
371  #define AS_REG_7d r11d
372  #else
373  #define AS_REG_1 rdi
374  #define AS_REG_2 rsi
375  #define AS_REG_3 rdx
376  #define AS_REG_4 rcx
377  #define AS_REG_5 r8
378  #define AS_REG_6 r9
379  #define AS_REG_7 r10
380  #define AS_REG_1d edi
381  #define AS_REG_2d esi
382  #define AS_REG_3d edx
383  #define AS_REG_4d ecx
384  #define AS_REG_5d r8d
385  #define AS_REG_6d r9d
386  #define AS_REG_7d r10d
387  #endif
388  #define WORD_SZ 8
389  #define WORD_REG(x) r##x
390  #define WORD_PTR QWORD PTR
391  #define AS_PUSH_IF86(x)
392  #define AS_POP_IF86(x)
393  #define AS_JCXZ jrcxz
394 #endif
395 
396 // helper macro for stream cipher output
397 #define AS_XMM_OUTPUT4(labelPrefix, inputPtr, outputPtr, x0, x1, x2, x3, t, p0, p1, p2, p3, increment)\
398  AS2( test inputPtr, inputPtr)\
399  ASC( jz, labelPrefix##3)\
400  AS2( test inputPtr, 15)\
401  ASC( jnz, labelPrefix##7)\
402  AS2( pxor xmm##x0, [inputPtr+p0*16])\
403  AS2( pxor xmm##x1, [inputPtr+p1*16])\
404  AS2( pxor xmm##x2, [inputPtr+p2*16])\
405  AS2( pxor xmm##x3, [inputPtr+p3*16])\
406  AS2( add inputPtr, increment*16)\
407  ASC( jmp, labelPrefix##3)\
408  ASL(labelPrefix##7)\
409  AS2( movdqu xmm##t, [inputPtr+p0*16])\
410  AS2( pxor xmm##x0, xmm##t)\
411  AS2( movdqu xmm##t, [inputPtr+p1*16])\
412  AS2( pxor xmm##x1, xmm##t)\
413  AS2( movdqu xmm##t, [inputPtr+p2*16])\
414  AS2( pxor xmm##x2, xmm##t)\
415  AS2( movdqu xmm##t, [inputPtr+p3*16])\
416  AS2( pxor xmm##x3, xmm##t)\
417  AS2( add inputPtr, increment*16)\
418  ASL(labelPrefix##3)\
419  AS2( test outputPtr, 15)\
420  ASC( jnz, labelPrefix##8)\
421  AS2( movdqa [outputPtr+p0*16], xmm##x0)\
422  AS2( movdqa [outputPtr+p1*16], xmm##x1)\
423  AS2( movdqa [outputPtr+p2*16], xmm##x2)\
424  AS2( movdqa [outputPtr+p3*16], xmm##x3)\
425  ASC( jmp, labelPrefix##9)\
426  ASL(labelPrefix##8)\
427  AS2( movdqu [outputPtr+p0*16], xmm##x0)\
428  AS2( movdqu [outputPtr+p1*16], xmm##x1)\
429  AS2( movdqu [outputPtr+p2*16], xmm##x2)\
430  AS2( movdqu [outputPtr+p3*16], xmm##x3)\
431  ASL(labelPrefix##9)\
432  AS2( add outputPtr, increment*16)
433 
434 NAMESPACE_END
435 
436 #endif
Library configuration file.
Crypto++ library namespace.