7#ifndef CRYPTOPP_GENERATE_X64_MASM
16#if CRYPTOPP_MSC_VERSION
17# pragma warning(disable: 4731)
23#ifndef CRYPTOPP_DISABLE_PANAMA_ASM
24# if CRYPTOPP_SSSE3_ASM_AVAILABLE
27# elif CRYPTOPP_SSE2_ASM_AVAILABLE
38 memset(m_state, 0, m_state.SizeInBytes());
39#if CRYPTOPP_SSSE3_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_PANAMA_ASM)
40 m_state[17] = HasSSSE3();
46#ifdef CRYPTOPP_X64_MASM_AVAILABLE
50#elif CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_PANAMA_ASM)
52#ifdef CRYPTOPP_GENERATE_X64_MASM
53 Panama_SSE2_Pull PROC FRAME
60void CRYPTOPP_NOINLINE Panama_SSE2_Pull(
size_t count,
word32 *state,
word32 *z,
const word32 *y)
62#if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
68 AS2( mov AS_REG_1, count)
69 AS2( mov AS_REG_2, state)
76 #define REG_loopEnd [esp]
77#elif defined(CRYPTOPP_GENERATE_X64_MASM)
78 #define REG_loopEnd rdi
80 #define REG_loopEnd r8
85 AS2( mov AS_REG_6d, [AS_REG_2+4*17])
86 AS2( add AS_REG_1, AS_REG_6)
89 AS2( mov REG_loopEnd, AS_REG_1)
95 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16])
96 AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16])
97 AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16])
98 AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16])
99 AS2( mov eax,
dword ptr [AS_REG_2+4*16])
103#
if CRYPTOPP_SSSE3_ASM_AVAILABLE
104 AS2( test AS_REG_6, 1)
107 AS2( movdqa xmm6, xmm2)
108 AS2( movss xmm6, xmm3)
109 ASS( pshufd xmm5, xmm6, 0, 3, 2, 1)
111 AS2( movdqa xmm7, xmm3)
112 AS2( movss xmm7, xmm6)
113 ASS( pshufd xmm6, xmm7, 0, 3, 2, 1)
114#
if CRYPTOPP_SSSE3_ASM_AVAILABLE
117 AS2( movdqa xmm5, xmm3)
118 AS3( palignr xmm5, xmm2, 4)
120 AS3( palignr xmm6, xmm3, 4)
124 AS2( movd AS_REG_1d, xmm2)
126 AS2( movd AS_REG_7d, xmm3)
127 AS2( or AS_REG_1d, AS_REG_7d)
128 AS2( xor eax, AS_REG_1d)
130#define SSE2_Index(i) ASM_MOD(((i)*13+16), 17)
133 AS2( movd AS_REG_1d, xmm7)\
134 AS2( rol AS_REG_1d, ASM_MOD((ASM_MOD(5*i,17)*(ASM_MOD(5*i,17)+1)/2), 32))\
135 AS2( mov [AS_REG_2+SSE2_Index(ASM_MOD(5*(i), 17))*4], AS_REG_1d)
137#define pi4(x, y, z, a, b, c, d) \
138 AS2( pcmpeqb xmm7, xmm7)\
143 ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
145 AS2( punpckhqdq xmm7, xmm7)\
147 ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
150 pi4(xmm1, xmm2, xmm3, 1, 5, 9, 13)
151 pi4(xmm0, xmm1, xmm2, 2, 6, 10, 14)
152 pi4(xmm6, xmm0, xmm1, 3, 7, 11, 15)
153 pi4(xmm5, xmm6, xmm0, 4, 8, 12, 16)
156 AS2( movdqa xmm4, xmm3)
157 AS2( punpcklqdq xmm3, xmm2)
158 AS2( punpckhdq xmm4, xmm2)
159 AS2( movdqa xmm2, xmm1)
160 AS2( punpcklqdq xmm1, xmm0)
161 AS2( punpckhdq xmm2, xmm0)
164 AS2( test AS_REG_3, AS_REG_3)
166 AS2( movdqa xmm6, xmm4)
167 AS2( punpcklqdq xmm4, xmm2)
168 AS2( punpckhqdq xmm6, xmm2)
169 AS2( test AS_REG_4, 15)
171 AS2( test AS_REG_4, AS_REG_4)
173 AS2( pxor xmm4, [AS_REG_4])
174 AS2( pxor xmm6, [AS_REG_4+16])
175 AS2( add AS_REG_4, 32)
178 AS2( movdqu xmm0, [AS_REG_4])
179 AS2( movdqu xmm2, [AS_REG_4+16])
180 AS2( pxor xmm4, xmm0)
181 AS2( pxor xmm6, xmm2)
182 AS2( add AS_REG_4, 32)
184 AS2( test AS_REG_3, 15)
186 AS2( movdqa XMMWORD_PTR [AS_REG_3], xmm4)
187 AS2( movdqa XMMWORD_PTR [AS_REG_3+16], xmm6)
188 AS2( add AS_REG_3, 32)
191 AS2( movdqu XMMWORD_PTR [AS_REG_3], xmm4)
192 AS2( movdqu XMMWORD_PTR [AS_REG_3+16], xmm6)
193 AS2( add AS_REG_3, 32)
197 AS2( lea AS_REG_1, [AS_REG_6 + 32])
198 AS2( and AS_REG_1, 31*32)
199 AS2( lea AS_REG_7, [AS_REG_6 + (32-24)*32])
200 AS2( and AS_REG_7, 31*32)
202 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8])
203 AS2( pxor xmm3, xmm0)
204 ASS( pshufd xmm0, xmm0, 2, 3, 0, 1)
205 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8], xmm3)
206 AS2( pxor xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8])
207 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8], xmm0)
209 AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8])
210 AS2( pxor xmm1, xmm4)
211 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8], xmm1)
212 AS2( pxor xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8])
213 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8], xmm4)
216 AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16])
217 AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16])
218 AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16])
219 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16])
221#
if CRYPTOPP_SSSE3_ASM_AVAILABLE
222 AS2( test AS_REG_6, 1)
226 AS2( movdqa xmm7, xmm3)
227 AS2( movss xmm7, xmm6)
228 AS2( movdqa xmm6, xmm2)
229 AS2( movss xmm6, xmm3)
230 AS2( movdqa xmm5, xmm1)
231 AS2( movss xmm5, xmm2)
232 AS2( movdqa xmm4, xmm0)
233 AS2( movss xmm4, xmm1)
234 ASS( pshufd xmm7, xmm7, 0, 3, 2, 1)
235 ASS( pshufd xmm6, xmm6, 0, 3, 2, 1)
236 ASS( pshufd xmm5, xmm5, 0, 3, 2, 1)
237 ASS( pshufd xmm4, xmm4, 0, 3, 2, 1)
238#
if CRYPTOPP_SSSE3_ASM_AVAILABLE
242 AS3( palignr xmm7, xmm3, 4)
243 AS2( movq xmm6, xmm3)
244 AS3( palignr xmm6, xmm2, 4)
245 AS2( movq xmm5, xmm2)
246 AS3( palignr xmm5, xmm1, 4)
247 AS2( movq xmm4, xmm1)
248 AS3( palignr xmm4, xmm0, 4)
253 AS2( movd AS_REG_1d, xmm0)
254 AS2( xor eax, AS_REG_1d)
255 AS2( movd AS_REG_1d, xmm3)
256 AS2( xor eax, AS_REG_1d)
258 AS2( pxor xmm3, xmm2)
259 AS2( pxor xmm2, xmm1)
260 AS2( pxor xmm1, xmm0)
261 AS2( pxor xmm0, xmm7)
262 AS2( pxor xmm3, xmm7)
263 AS2( pxor xmm2, xmm6)
264 AS2( pxor xmm1, xmm5)
265 AS2( pxor xmm0, xmm4)
268 AS2( lea AS_REG_1, [AS_REG_6 + (32-4)*32])
269 AS2( and AS_REG_1, 31*32)
270 AS2( lea AS_REG_7, [AS_REG_6 + 16*32])
271 AS2( and AS_REG_7, 31*32)
273 AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*16])
274 AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*16])
275 AS2( movdqa xmm6, xmm4)
276 AS2( punpcklqdq xmm4, xmm5)
277 AS2( punpckhqdq xmm6, xmm5)
278 AS2( pxor xmm3, xmm4)
279 AS2( pxor xmm2, xmm6)
281 AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+1*16])
282 AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+1*16])
283 AS2( movdqa xmm6, xmm4)
284 AS2( punpcklqdq xmm4, xmm5)
285 AS2( punpckhqdq xmm6, xmm5)
286 AS2( pxor xmm1, xmm4)
287 AS2( pxor xmm0, xmm6)
290 AS2( add AS_REG_6, 32)
291 AS2( cmp AS_REG_6, REG_loopEnd)
295 AS2( mov [AS_REG_2+4*16], eax)
296 AS2( movdqa XMMWORD_PTR [AS_REG_2+3*16], xmm3)
297 AS2( movdqa XMMWORD_PTR [AS_REG_2+2*16], xmm2)
298 AS2( movdqa XMMWORD_PTR [AS_REG_2+1*16], xmm1)
299 AS2( movdqa XMMWORD_PTR [AS_REG_2+0*16], xmm0)
312 :
"D" (count),
"S" (state),
"d" (z),
"c" (y)
313 :
"%r8",
"%r9",
"r10",
"%eax",
"memory",
"cc",
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm4",
"%xmm5",
"%xmm6",
"%xmm7"
315 :
"c" (count),
"d" (state),
"S" (z),
"D" (y)
316 :
"%eax",
"memory",
"cc"
321#ifdef CRYPTOPP_GENERATE_X64_MASM
322 movdqa xmm6, [rsp + 0h]
323 movdqa xmm7, [rsp + 10h]
327 Panama_SSE2_Pull ENDP
333#ifndef CRYPTOPP_GENERATE_X64_MASM
338 word32 bstart = m_state[17];
339 word32 *
const aPtr = m_state;
342#define bPtr ((byte *)(aPtr+20))
347#define a(i) aPtr[((i)*13+16) % 17]
348#define c(i) cPtr[((i)*13+16) % 17]
350#define b(i, j) b##i[(j)*2%8 + (j)/4]
353#define US(i) {word32 t=b(0,i); b(0,i)=ConditionalByteReverse(B::ToEnum(), p[i])^t; b(25,(i+6)%8)^=t;}
354#define UL(i) {word32 t=b(0,i); b(0,i)=a(i+1)^t; b(25,(i+6)%8)^=t;}
356#define GP(i) c(5*i%17) = rotlFixed(a(i) ^ (a((i+1)%17) | ~a((i+2)%17)), ((5*i%17)*((5*i%17)+1)/2)%32)
358#define T(i,x) a(i) = c(i) ^ c((i+1)%17) ^ c((i+4)%17) ^ x
359#define TS1S(i) T(i+1, ConditionalByteReverse(B::ToEnum(), p[i]))
360#define TS1L(i) T(i+1, b(4,i))
361#define TS2(i) T(i+9, b(16,i))
367#define PANAMA_OUTPUT(x) \
368 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 0, a(0+9));\
369 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 1, a(1+9));\
370 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 2, a(2+9));\
371 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 3, a(3+9));\
372 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 4, a(4+9));\
373 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 5, a(5+9));\
374 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 6, a(6+9));\
375 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 7, a(7+9));
381 word32 *
const b16 = (
word32 *)(
void *)(bPtr+((bstart+16*32) & 31*32));
382 word32 *
const b4 = (
word32 *)(
void *)(bPtr+((bstart+(32-4)*32) & 31*32));
384 word32 *
const b0 = (
word32 *)(
void *)(bPtr+((bstart) & 31*32));
385 word32 *
const b25 = (
word32 *)(
void *)(bPtr+((bstart+(32-25)*32) & 31*32));
389 US(0); US(1); US(2); US(3); US(4); US(5); US(6); US(7);
393 UL(0); UL(1); UL(2); UL(3); UL(4); UL(5); UL(6); UL(7);
418 TS1S(0); TS1S(1); TS1S(2); TS1S(3); TS1S(4); TS1S(5); TS1S(6); TS1S(7);
423 TS1L(0); TS1L(1); TS1L(2); TS1L(3); TS1L(4); TS1L(5); TS1L(6); TS1L(7);
426 TS2(0); TS2(1); TS2(2); TS2(3); TS2(4); TS2(5); TS2(6); TS2(7);
428 m_state[17] = bstart;
433size_t PanamaHash<B>::HashMultipleBlocks(
const word32 *input,
size_t length)
435 this->Iterate(length / this->BLOCKSIZE, input);
436 return length % this->BLOCKSIZE;
442 this->ThrowIfInvalidTruncatedSize(size);
444 this->PadLastBlock(this->BLOCKSIZE, 0x01);
446 this->HashEndianCorrectedBlock(this->m_data);
450 this->Iterate(1, NULLPTR, m_buf.BytePtr(), NULLPTR);
452 memcpy(hash, m_buf, size);
461 CRYPTOPP_UNUSED(params); CRYPTOPP_UNUSED(length);
463 memcpy(m_key, key, 32);
469 CRYPTOPP_UNUSED(keystreamBuffer); CRYPTOPP_UNUSED(iv);
473 this->Iterate(1, m_key);
474 if (iv && IsAligned<word32>(iv))
475 this->Iterate(1,
reinterpret_cast<const word32*
>(iv));
479 memcpy(m_buf, iv, 32);
481 memset(m_buf, 0, 32);
482 this->Iterate(1, m_buf);
485#if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_PANAMA_ASM)
487 Panama_SSE2_Pull(32, this->m_state, NULLPTR, NULLPTR);
502#if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_PANAMA_ASM)
513#if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_PANAMA_ASM)
515 Panama_SSE2_Pull(iterationCount, this->m_state,
516 reinterpret_cast<word32*
>(output),
reinterpret_cast<const word32*
>(input));
519 this->Iterate(iterationCount, NULLPTR, output, input, operation);
Interface for retrieving values given their names.
Panama stream cipher operation.
#define CRYPTOPP_BOOL_X86
32-bit x86 platform
#define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
GNU style inline assembly.
#define CRYPTOPP_BOOL_X64
32-bit x86 platform
unsigned int word32
32-bit unsigned datatype
word128 dword
Double word used for multiprecision integer arithmetic.
Functions for CPU features and intrinsics.
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Utility functions for the Crypto++ library.
Crypto++ library namespace.
Namespace containing weak and wounded algorithms.
Classes for Panama hash and stream cipher.
Classes and functions for secure memory allocations.
#define CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(x, y)
Helper macro to implement OperateKeystream.
KeystreamOperation
Keystream operation flags.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.