Crypto++
cpu.h
1 #ifndef CRYPTOPP_CPU_H
2 #define CRYPTOPP_CPU_H
3 
4 #ifdef CRYPTOPP_GENERATE_X64_MASM
5 
6 #define CRYPTOPP_X86_ASM_AVAILABLE
7 #define CRYPTOPP_BOOL_X64 1
8 #define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1
9 #define NAMESPACE_END
10 
11 #else
12 
13 #include "config.h"
14 
15 #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
16 #include <emmintrin.h>
17 #endif
18 
19 #if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
20 #if !defined(__GNUC__) || defined(__SSSE3__) || defined(__INTEL_COMPILER)
21 #include <tmmintrin.h>
22 #else
23 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
24 _mm_shuffle_epi8 (__m128i a, __m128i b)
25 {
26  asm ("pshufb %1, %0" : "+x"(a) : "xm"(b));
27  return a;
28 }
29 #endif
30 #if !defined(__GNUC__) || defined(__SSE4_1__) || defined(__INTEL_COMPILER)
31 #include <smmintrin.h>
32 #else
33 __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
34 _mm_extract_epi32 (__m128i a, const int i)
35 {
36  int r;
37  asm ("pextrd %2, %1, %0" : "=rm"(r) : "x"(a), "i"(i));
38  return r;
39 }
40 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
41 _mm_insert_epi32 (__m128i a, int b, const int i)
42 {
43  asm ("pinsrd %2, %1, %0" : "+x"(a) : "rm"(b), "i"(i));
44  return a;
45 }
46 #endif
47 #if !defined(__GNUC__) || (defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER)
48 #include <wmmintrin.h>
49 #else
50 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
51 _mm_clmulepi64_si128 (__m128i a, __m128i b, const int i)
52 {
53  asm ("pclmulqdq %2, %1, %0" : "+x"(a) : "xm"(b), "i"(i));
54  return a;
55 }
56 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
57 _mm_aeskeygenassist_si128 (__m128i a, const int i)
58 {
59  __m128i r;
60  asm ("aeskeygenassist %2, %1, %0" : "=x"(r) : "xm"(a), "i"(i));
61  return r;
62 }
63 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
64 _mm_aesimc_si128 (__m128i a)
65 {
66  __m128i r;
67  asm ("aesimc %1, %0" : "=x"(r) : "xm"(a));
68  return r;
69 }
70 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
71 _mm_aesenc_si128 (__m128i a, __m128i b)
72 {
73  asm ("aesenc %1, %0" : "+x"(a) : "xm"(b));
74  return a;
75 }
76 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
77 _mm_aesenclast_si128 (__m128i a, __m128i b)
78 {
79  asm ("aesenclast %1, %0" : "+x"(a) : "xm"(b));
80  return a;
81 }
82 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
83 _mm_aesdec_si128 (__m128i a, __m128i b)
84 {
85  asm ("aesdec %1, %0" : "+x"(a) : "xm"(b));
86  return a;
87 }
88 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
89 _mm_aesdeclast_si128 (__m128i a, __m128i b)
90 {
91  asm ("aesdeclast %1, %0" : "+x"(a) : "xm"(b));
92  return a;
93 }
94 #endif
95 #endif
96 
97 NAMESPACE_BEGIN(CryptoPP)
98 
99 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
100 
101 #define CRYPTOPP_CPUID_AVAILABLE
102 
103 // these should not be used directly
104 extern CRYPTOPP_DLL bool g_x86DetectionDone;
105 extern CRYPTOPP_DLL bool g_hasSSSE3;
106 extern CRYPTOPP_DLL bool g_hasAESNI;
107 extern CRYPTOPP_DLL bool g_hasCLMUL;
108 extern CRYPTOPP_DLL bool g_isP4;
109 extern CRYPTOPP_DLL word32 g_cacheLineSize;
110 CRYPTOPP_DLL void CRYPTOPP_API DetectX86Features();
111 CRYPTOPP_DLL bool CRYPTOPP_API CpuId(word32 input, word32 *output);
112 
113 #if CRYPTOPP_BOOL_X64
114 inline bool HasSSE2() {return true;}
115 inline bool HasISSE() {return true;}
116 inline bool HasMMX() {return true;}
117 #else
118 
119 extern CRYPTOPP_DLL bool g_hasSSE2;
120 extern CRYPTOPP_DLL bool g_hasISSE;
121 extern CRYPTOPP_DLL bool g_hasMMX;
122 
123 inline bool HasSSE2()
124 {
125  if (!g_x86DetectionDone)
126  DetectX86Features();
127  return g_hasSSE2;
128 }
129 
130 inline bool HasISSE()
131 {
132  if (!g_x86DetectionDone)
133  DetectX86Features();
134  return g_hasISSE;
135 }
136 
137 inline bool HasMMX()
138 {
139  if (!g_x86DetectionDone)
140  DetectX86Features();
141  return g_hasMMX;
142 }
143 
144 #endif
145 
146 inline bool HasSSSE3()
147 {
148  if (!g_x86DetectionDone)
149  DetectX86Features();
150  return g_hasSSSE3;
151 }
152 
153 inline bool HasAESNI()
154 {
155  if (!g_x86DetectionDone)
156  DetectX86Features();
157  return g_hasAESNI;
158 }
159 
160 inline bool HasCLMUL()
161 {
162  if (!g_x86DetectionDone)
163  DetectX86Features();
164  return g_hasCLMUL;
165 }
166 
167 inline bool IsP4()
168 {
169  if (!g_x86DetectionDone)
170  DetectX86Features();
171  return g_isP4;
172 }
173 
174 inline int GetCacheLineSize()
175 {
176  if (!g_x86DetectionDone)
177  DetectX86Features();
178  return g_cacheLineSize;
179 }
180 
181 #else
182 
183 inline int GetCacheLineSize()
184 {
185  return CRYPTOPP_L1_CACHE_LINE_SIZE;
186 }
187 
188 #endif
189 
190 #endif
191 
192 #ifdef CRYPTOPP_GENERATE_X64_MASM
193  #define AS1(x) x*newline*
194  #define AS2(x, y) x, y*newline*
195  #define AS3(x, y, z) x, y, z*newline*
196  #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline*
197  #define ASL(x) label##x:*newline*
198  #define ASJ(x, y, z) x label##y*newline*
199  #define ASC(x, y) x label##y*newline*
200  #define AS_HEX(y) 0##y##h
201 #elif defined(_MSC_VER) || defined(__BORLANDC__)
202  #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
203  #define AS1(x) __asm {x}
204  #define AS2(x, y) __asm {x, y}
205  #define AS3(x, y, z) __asm {x, y, z}
206  #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)}
207  #define ASL(x) __asm {label##x:}
208  #define ASJ(x, y, z) __asm {x label##y}
209  #define ASC(x, y) __asm {x label##y}
210  #define CRYPTOPP_NAKED __declspec(naked)
211  #define AS_HEX(y) 0x##y
212 #else
213  #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
214  // define these in two steps to allow arguments to be expanded
215  #define GNU_AS1(x) #x ";"
216  #define GNU_AS2(x, y) #x ", " #y ";"
217  #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
218  #define GNU_ASL(x) "\n" #x ":"
219  #define GNU_ASJ(x, y, z) #x " " #y #z ";"
220  #define AS1(x) GNU_AS1(x)
221  #define AS2(x, y) GNU_AS2(x, y)
222  #define AS3(x, y, z) GNU_AS3(x, y, z)
223  #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
224  #define ASL(x) GNU_ASL(x)
225  #define ASJ(x, y, z) GNU_ASJ(x, y, z)
226  #define ASC(x, y) #x " " #y ";"
227  #define CRYPTOPP_NAKED
228  #define AS_HEX(y) 0x##y
229 #endif
230 
231 #define IF0(y)
232 #define IF1(y) y
233 
234 #ifdef CRYPTOPP_GENERATE_X64_MASM
235 #define ASM_MOD(x, y) ((x) MOD (y))
236 #define XMMWORD_PTR XMMWORD PTR
237 #else
238 // GNU assembler doesn't seem to have mod operator
239 #define ASM_MOD(x, y) ((x)-((x)/(y))*(y))
240 // GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM
241 #define XMMWORD_PTR
242 #endif
243 
244 #if CRYPTOPP_BOOL_X86
245  #define AS_REG_1 ecx
246  #define AS_REG_2 edx
247  #define AS_REG_3 esi
248  #define AS_REG_4 edi
249  #define AS_REG_5 eax
250  #define AS_REG_6 ebx
251  #define AS_REG_7 ebp
252  #define AS_REG_1d ecx
253  #define AS_REG_2d edx
254  #define AS_REG_3d esi
255  #define AS_REG_4d edi
256  #define AS_REG_5d eax
257  #define AS_REG_6d ebx
258  #define AS_REG_7d ebp
259  #define WORD_SZ 4
260  #define WORD_REG(x) e##x
261  #define WORD_PTR DWORD PTR
262  #define AS_PUSH_IF86(x) AS1(push e##x)
263  #define AS_POP_IF86(x) AS1(pop e##x)
264  #define AS_JCXZ jecxz
265 #elif CRYPTOPP_BOOL_X64
266  #ifdef CRYPTOPP_GENERATE_X64_MASM
267  #define AS_REG_1 rcx
268  #define AS_REG_2 rdx
269  #define AS_REG_3 r8
270  #define AS_REG_4 r9
271  #define AS_REG_5 rax
272  #define AS_REG_6 r10
273  #define AS_REG_7 r11
274  #define AS_REG_1d ecx
275  #define AS_REG_2d edx
276  #define AS_REG_3d r8d
277  #define AS_REG_4d r9d
278  #define AS_REG_5d eax
279  #define AS_REG_6d r10d
280  #define AS_REG_7d r11d
281  #else
282  #define AS_REG_1 rdi
283  #define AS_REG_2 rsi
284  #define AS_REG_3 rdx
285  #define AS_REG_4 rcx
286  #define AS_REG_5 r8
287  #define AS_REG_6 r9
288  #define AS_REG_7 r10
289  #define AS_REG_1d edi
290  #define AS_REG_2d esi
291  #define AS_REG_3d edx
292  #define AS_REG_4d ecx
293  #define AS_REG_5d r8d
294  #define AS_REG_6d r9d
295  #define AS_REG_7d r10d
296  #endif
297  #define WORD_SZ 8
298  #define WORD_REG(x) r##x
299  #define WORD_PTR QWORD PTR
300  #define AS_PUSH_IF86(x)
301  #define AS_POP_IF86(x)
302  #define AS_JCXZ jrcxz
303 #endif
304 
305 // helper macro for stream cipher output
306 #define AS_XMM_OUTPUT4(labelPrefix, inputPtr, outputPtr, x0, x1, x2, x3, t, p0, p1, p2, p3, increment)\
307  AS2( test inputPtr, inputPtr)\
308  ASC( jz, labelPrefix##3)\
309  AS2( test inputPtr, 15)\
310  ASC( jnz, labelPrefix##7)\
311  AS2( pxor xmm##x0, [inputPtr+p0*16])\
312  AS2( pxor xmm##x1, [inputPtr+p1*16])\
313  AS2( pxor xmm##x2, [inputPtr+p2*16])\
314  AS2( pxor xmm##x3, [inputPtr+p3*16])\
315  AS2( add inputPtr, increment*16)\
316  ASC( jmp, labelPrefix##3)\
317  ASL(labelPrefix##7)\
318  AS2( movdqu xmm##t, [inputPtr+p0*16])\
319  AS2( pxor xmm##x0, xmm##t)\
320  AS2( movdqu xmm##t, [inputPtr+p1*16])\
321  AS2( pxor xmm##x1, xmm##t)\
322  AS2( movdqu xmm##t, [inputPtr+p2*16])\
323  AS2( pxor xmm##x2, xmm##t)\
324  AS2( movdqu xmm##t, [inputPtr+p3*16])\
325  AS2( pxor xmm##x3, xmm##t)\
326  AS2( add inputPtr, increment*16)\
327  ASL(labelPrefix##3)\
328  AS2( test outputPtr, 15)\
329  ASC( jnz, labelPrefix##8)\
330  AS2( movdqa [outputPtr+p0*16], xmm##x0)\
331  AS2( movdqa [outputPtr+p1*16], xmm##x1)\
332  AS2( movdqa [outputPtr+p2*16], xmm##x2)\
333  AS2( movdqa [outputPtr+p3*16], xmm##x3)\
334  ASC( jmp, labelPrefix##9)\
335  ASL(labelPrefix##8)\
336  AS2( movdqu [outputPtr+p0*16], xmm##x0)\
337  AS2( movdqu [outputPtr+p1*16], xmm##x1)\
338  AS2( movdqu [outputPtr+p2*16], xmm##x2)\
339  AS2( movdqu [outputPtr+p3*16], xmm##x3)\
340  ASL(labelPrefix##9)\
341  AS2( add outputPtr, increment*16)
342 
343 NAMESPACE_END
344 
345 #endif