4 #ifdef CRYPTOPP_GENERATE_X64_MASM
6 #define CRYPTOPP_X86_ASM_AVAILABLE
7 #define CRYPTOPP_BOOL_X64 1
8 #define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1
15 #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
16 #include <emmintrin.h>
19 #if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
20 #if !defined(__GNUC__) || defined(__SSSE3__) || defined(__INTEL_COMPILER)
21 #include <tmmintrin.h>
23 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
24 _mm_shuffle_epi8 (__m128i a, __m128i b)
26 asm (
"pshufb %1, %0" :
"+x"(a) :
"xm"(b));
30 #if !defined(__GNUC__) || defined(__SSE4_1__) || defined(__INTEL_COMPILER)
31 #include <smmintrin.h>
33 __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
34 _mm_extract_epi32 (__m128i a, const
int i)
37 asm (
"pextrd %2, %1, %0" :
"=rm"(r) :
"x"(a),
"i"(i));
40 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
41 _mm_insert_epi32 (__m128i a,
int b, const
int i)
43 asm (
"pinsrd %2, %1, %0" :
"+x"(a) :
"rm"(b),
"i"(i));
47 #if !defined(__GNUC__) || (defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER)
48 #include <wmmintrin.h>
50 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
51 _mm_clmulepi64_si128 (__m128i a, __m128i b, const
int i)
53 asm (
"pclmulqdq %2, %1, %0" :
"+x"(a) :
"xm"(b),
"i"(i));
56 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
57 _mm_aeskeygenassist_si128 (__m128i a, const
int i)
60 asm (
"aeskeygenassist %2, %1, %0" :
"=x"(r) :
"xm"(a),
"i"(i));
63 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
64 _mm_aesimc_si128 (__m128i a)
67 asm (
"aesimc %1, %0" :
"=x"(r) :
"xm"(a));
70 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
71 _mm_aesenc_si128 (__m128i a, __m128i b)
73 asm (
"aesenc %1, %0" :
"+x"(a) :
"xm"(b));
76 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
77 _mm_aesenclast_si128 (__m128i a, __m128i b)
79 asm (
"aesenclast %1, %0" :
"+x"(a) :
"xm"(b));
82 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
83 _mm_aesdec_si128 (__m128i a, __m128i b)
85 asm (
"aesdec %1, %0" :
"+x"(a) :
"xm"(b));
88 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
89 _mm_aesdeclast_si128 (__m128i a, __m128i b)
91 asm (
"aesdeclast %1, %0" :
"+x"(a) :
"xm"(b));
97 NAMESPACE_BEGIN(CryptoPP)
99 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
101 #define CRYPTOPP_CPUID_AVAILABLE
104 extern CRYPTOPP_DLL
bool g_x86DetectionDone;
105 extern CRYPTOPP_DLL
bool g_hasSSSE3;
106 extern CRYPTOPP_DLL
bool g_hasAESNI;
107 extern CRYPTOPP_DLL
bool g_hasCLMUL;
108 extern CRYPTOPP_DLL
bool g_isP4;
109 extern CRYPTOPP_DLL word32 g_cacheLineSize;
110 CRYPTOPP_DLL
void CRYPTOPP_API DetectX86Features();
111 CRYPTOPP_DLL
bool CRYPTOPP_API CpuId(word32 input, word32 *output);
113 #if CRYPTOPP_BOOL_X64
114 inline bool HasSSE2() {
return true;}
115 inline bool HasISSE() {
return true;}
116 inline bool HasMMX() {
return true;}
119 extern CRYPTOPP_DLL
bool g_hasSSE2;
120 extern CRYPTOPP_DLL
bool g_hasISSE;
121 extern CRYPTOPP_DLL
bool g_hasMMX;
123 inline bool HasSSE2()
125 if (!g_x86DetectionDone)
130 inline bool HasISSE()
132 if (!g_x86DetectionDone)
139 if (!g_x86DetectionDone)
146 inline bool HasSSSE3()
148 if (!g_x86DetectionDone)
153 inline bool HasAESNI()
155 if (!g_x86DetectionDone)
160 inline bool HasCLMUL()
162 if (!g_x86DetectionDone)
169 if (!g_x86DetectionDone)
174 inline int GetCacheLineSize()
176 if (!g_x86DetectionDone)
178 return g_cacheLineSize;
183 inline int GetCacheLineSize()
185 return CRYPTOPP_L1_CACHE_LINE_SIZE;
192 #ifdef CRYPTOPP_GENERATE_X64_MASM
193 #define AS1(x) x*newline*
194 #define AS2(x, y) x, y*newline*
195 #define AS3(x, y, z) x, y, z*newline*
196 #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline*
197 #define ASL(x) label##x:*newline*
198 #define ASJ(x, y, z) x label##y*newline*
199 #define ASC(x, y) x label##y*newline*
200 #define AS_HEX(y) 0##y##h
201 #elif defined(_MSC_VER) || defined(__BORLANDC__)
202 #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
203 #define AS1(x) __asm {x}
204 #define AS2(x, y) __asm {x, y}
205 #define AS3(x, y, z) __asm {x, y, z}
206 #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)}
207 #define ASL(x) __asm {label##x:}
208 #define ASJ(x, y, z) __asm {x label##y}
209 #define ASC(x, y) __asm {x label##y}
210 #define CRYPTOPP_NAKED __declspec(naked)
211 #define AS_HEX(y) 0x##y
213 #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
215 #define GNU_AS1(x) #x ";"
216 #define GNU_AS2(x, y) #x ", " #y ";"
217 #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
218 #define GNU_ASL(x) "\n" #x ":"
219 #define GNU_ASJ(x, y, z) #x " " #y #z ";"
220 #define AS1(x) GNU_AS1(x)
221 #define AS2(x, y) GNU_AS2(x, y)
222 #define AS3(x, y, z) GNU_AS3(x, y, z)
223 #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
224 #define ASL(x) GNU_ASL(x)
225 #define ASJ(x, y, z) GNU_ASJ(x, y, z)
226 #define ASC(x, y) #x " " #y ";"
227 #define CRYPTOPP_NAKED
228 #define AS_HEX(y) 0x##y
234 #ifdef CRYPTOPP_GENERATE_X64_MASM
235 #define ASM_MOD(x, y) ((x) MOD (y))
236 #define XMMWORD_PTR XMMWORD PTR
239 #define ASM_MOD(x, y) ((x)-((x)/(y))*(y))
244 #if CRYPTOPP_BOOL_X86
252 #define AS_REG_1d ecx
253 #define AS_REG_2d edx
254 #define AS_REG_3d esi
255 #define AS_REG_4d edi
256 #define AS_REG_5d eax
257 #define AS_REG_6d ebx
258 #define AS_REG_7d ebp
260 #define WORD_REG(x) e##x
261 #define WORD_PTR DWORD PTR
262 #define AS_PUSH_IF86(x) AS1(push e##x)
263 #define AS_POP_IF86(x) AS1(pop e##x)
264 #define AS_JCXZ jecxz
265 #elif CRYPTOPP_BOOL_X64
266 #ifdef CRYPTOPP_GENERATE_X64_MASM
274 #define AS_REG_1d ecx
275 #define AS_REG_2d edx
276 #define AS_REG_3d r8d
277 #define AS_REG_4d r9d
278 #define AS_REG_5d eax
279 #define AS_REG_6d r10d
280 #define AS_REG_7d r11d
289 #define AS_REG_1d edi
290 #define AS_REG_2d esi
291 #define AS_REG_3d edx
292 #define AS_REG_4d ecx
293 #define AS_REG_5d r8d
294 #define AS_REG_6d r9d
295 #define AS_REG_7d r10d
298 #define WORD_REG(x) r##x
299 #define WORD_PTR QWORD PTR
300 #define AS_PUSH_IF86(x)
301 #define AS_POP_IF86(x)
302 #define AS_JCXZ jrcxz
306 #define AS_XMM_OUTPUT4(labelPrefix, inputPtr, outputPtr, x0, x1, x2, x3, t, p0, p1, p2, p3, increment)\
307 AS2( test inputPtr, inputPtr)\
308 ASC( jz, labelPrefix##3)\
309 AS2( test inputPtr, 15)\
310 ASC( jnz, labelPrefix##7)\
311 AS2( pxor xmm##x0, [inputPtr+p0*16])\
312 AS2( pxor xmm##x1, [inputPtr+p1*16])\
313 AS2( pxor xmm##x2, [inputPtr+p2*16])\
314 AS2( pxor xmm##x3, [inputPtr+p3*16])\
315 AS2( add inputPtr, increment*16)\
316 ASC( jmp, labelPrefix##3)\
318 AS2( movdqu xmm##t, [inputPtr+p0*16])\
319 AS2( pxor xmm##x0, xmm##t)\
320 AS2( movdqu xmm##t, [inputPtr+p1*16])\
321 AS2( pxor xmm##x1, xmm##t)\
322 AS2( movdqu xmm##t, [inputPtr+p2*16])\
323 AS2( pxor xmm##x2, xmm##t)\
324 AS2( movdqu xmm##t, [inputPtr+p3*16])\
325 AS2( pxor xmm##x3, xmm##t)\
326 AS2( add inputPtr, increment*16)\
328 AS2( test outputPtr, 15)\
329 ASC( jnz, labelPrefix##8)\
330 AS2( movdqa [outputPtr+p0*16], xmm##x0)\
331 AS2( movdqa [outputPtr+p1*16], xmm##x1)\
332 AS2( movdqa [outputPtr+p2*16], xmm##x2)\
333 AS2( movdqa [outputPtr+p3*16], xmm##x3)\
334 ASC( jmp, labelPrefix##9)\
336 AS2( movdqu [outputPtr+p0*16], xmm##x0)\
337 AS2( movdqu [outputPtr+p1*16], xmm##x1)\
338 AS2( movdqu [outputPtr+p2*16], xmm##x2)\
339 AS2( movdqu [outputPtr+p3*16], xmm##x3)\
341 AS2( add outputPtr, increment*16)