8 #ifndef CRYPTOPP_GENERATE_X64_MASM 15 #if CRYPTOPP_MSC_VERSION 16 # pragma warning(disable: 4702 4740) 21 #if defined(CRYPTOPP_DISABLE_SALSA_ASM) 22 # undef CRYPTOPP_X86_ASM_AVAILABLE 23 # undef CRYPTOPP_X32_ASM_AVAILABLE 24 # undef CRYPTOPP_X64_ASM_AVAILABLE 25 # undef CRYPTOPP_SSE2_ASM_AVAILABLE 26 # undef CRYPTOPP_SSSE3_ASM_AVAILABLE 31 #if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING) 32 void Salsa20_TestInstantiations()
44 CRYPTOPP_ALIGN_DATA(16) word32 x[16];
46 for (
size_t i = 0; i < 16; ++i)
50 for (
size_t i = 0; i < rounds; i += 2)
52 x[ 4] ^= rotlConstant< 7>(x[ 0]+x[12]);
53 x[ 8] ^= rotlConstant< 9>(x[ 4]+x[ 0]);
54 x[12] ^= rotlConstant<13>(x[ 8]+x[ 4]);
55 x[ 0] ^= rotlConstant<18>(x[12]+x[ 8]);
57 x[ 9] ^= rotlConstant< 7>(x[ 5]+x[ 1]);
58 x[13] ^= rotlConstant< 9>(x[ 9]+x[ 5]);
59 x[ 1] ^= rotlConstant<13>(x[13]+x[ 9]);
60 x[ 5] ^= rotlConstant<18>(x[ 1]+x[13]);
62 x[14] ^= rotlConstant< 7>(x[10]+x[ 6]);
63 x[ 2] ^= rotlConstant< 9>(x[14]+x[10]);
64 x[ 6] ^= rotlConstant<13>(x[ 2]+x[14]);
65 x[10] ^= rotlConstant<18>(x[ 6]+x[ 2]);
67 x[ 3] ^= rotlConstant< 7>(x[15]+x[11]);
68 x[ 7] ^= rotlConstant< 9>(x[ 3]+x[15]);
69 x[11] ^= rotlConstant<13>(x[ 7]+x[ 3]);
70 x[15] ^= rotlConstant<18>(x[11]+x[ 7]);
72 x[ 1] ^= rotlConstant< 7>(x[ 0]+x[ 3]);
73 x[ 2] ^= rotlConstant< 9>(x[ 1]+x[ 0]);
74 x[ 3] ^= rotlConstant<13>(x[ 2]+x[ 1]);
75 x[ 0] ^= rotlConstant<18>(x[ 3]+x[ 2]);
77 x[ 6] ^= rotlConstant< 7>(x[ 5]+x[ 4]);
78 x[ 7] ^= rotlConstant< 9>(x[ 6]+x[ 5]);
79 x[ 4] ^= rotlConstant<13>(x[ 7]+x[ 6]);
80 x[ 5] ^= rotlConstant<18>(x[ 4]+x[ 7]);
82 x[11] ^= rotlConstant< 7>(x[10]+x[ 9]);
83 x[ 8] ^= rotlConstant< 9>(x[11]+x[10]);
84 x[ 9] ^= rotlConstant<13>(x[ 8]+x[11]);
85 x[10] ^= rotlConstant<18>(x[ 9]+x[ 8]);
87 x[12] ^= rotlConstant< 7>(x[15]+x[14]);
88 x[13] ^= rotlConstant< 9>(x[12]+x[15]);
89 x[14] ^= rotlConstant<13>(x[13]+x[12]);
90 x[15] ^= rotlConstant<18>(x[14]+x[13]);
94 for (
size_t i = 0; i < 16; ++i)
98 std::string Salsa20_Policy::AlgorithmProvider()
const 100 #if CRYPTOPP_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_SALSA_ASM) 107 void Salsa20_Policy::CipherSetKey(
const NameValuePairs ¶ms,
const byte *key,
size_t length)
111 if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
112 throw InvalidRounds(Salsa20::StaticAlgorithmName(), m_rounds);
116 get1(m_state[13])(m_state[10])(m_state[7])(m_state[4]);
118 get2(m_state[15])(m_state[12])(m_state[9])(m_state[6]);
121 m_state[0] = 0x61707865;
122 m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e;
123 m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32;
124 m_state[3] = 0x6b206574;
127 void Salsa20_Policy::CipherResynchronize(byte *keystreamBuffer,
const byte *
IV,
size_t length)
129 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
133 get(m_state[14])(m_state[11]);
134 m_state[8] = m_state[5] = 0;
137 void Salsa20_Policy::SeekToIteration(lword iterationCount)
139 m_state[8] = (word32)iterationCount;
140 m_state[5] = (word32)SafeRightShift<32>(iterationCount);
143 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) 146 #if CRYPTOPP_SSE2_ASM_AVAILABLE 151 return GetAlignmentOf<word32>();
156 #if CRYPTOPP_SSE2_ASM_AVAILABLE 158 return 4*BYTES_PER_ITERATION;
161 return BYTES_PER_ITERATION;
165 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 167 void Salsa20_OperateKeystream(byte *output,
const byte *input,
size_t iterationCount,
int rounds,
void *state);
171 #if CRYPTOPP_MSC_VERSION 172 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code 175 void Salsa20_Policy::OperateKeystream(
KeystreamOperation operation, byte *output,
const byte *input,
size_t iterationCount)
177 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM 179 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 180 Salsa20_OperateKeystream(output, input, iterationCount, m_rounds, m_state.
data());
184 #if CRYPTOPP_SSE2_ASM_AVAILABLE 185 #ifdef CRYPTOPP_GENERATE_X64_MASM 187 Salsa20_OperateKeystream PROC FRAME
188 mov r10, [rsp + 5*8] ; state
189 alloc_stack(10*16 + 32*16 + 8)
190 save_xmm128 xmm6, 0200h
191 save_xmm128 xmm7, 0210h
192 save_xmm128 xmm8, 0220h
193 save_xmm128 xmm9, 0230h
194 save_xmm128 xmm10, 0240h
195 save_xmm128 xmm11, 0250h
196 save_xmm128 xmm12, 0260h
197 save_xmm128 xmm13, 0270h
198 save_xmm128 xmm14, 0280h
199 save_xmm128 xmm15, 0290h
202 #define REG_output rcx 203 #define REG_input rdx 204 #define REG_iterationCount r8 205 #define REG_state r10 206 #define REG_rounds e9d 207 #define REG_roundsLeft eax 208 #define REG_temp32 r11d 210 #define SSE2_WORKSPACE rsp 214 #if CRYPTOPP_BOOL_X64 215 #define REG_output %1 217 #define REG_iterationCount %2 219 #define REG_rounds %3 220 #define REG_roundsLeft eax 221 #define REG_temp32 edx 223 #define SSE2_WORKSPACE %5 225 CRYPTOPP_ALIGN_DATA(16) byte workspace[16*32];
227 #define REG_output edi 228 #define REG_input eax 229 #define REG_iterationCount ecx 230 #define REG_state esi 231 #define REG_rounds edx 232 #define REG_roundsLeft ebx 233 #define REG_temp32 ebp 235 #define SSE2_WORKSPACE esp + WORD_SZ 244 void *s = m_state.
data();
247 AS2( mov REG_iterationCount, iterationCount)
248 AS2( mov REG_input, input)
249 AS2( mov REG_output, output)
250 AS2( mov REG_state, s)
251 AS2( mov REG_rounds, r)
253 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 256 AS2( cmp REG_iterationCount, 4)
259 #if CRYPTOPP_BOOL_X86 266 #define SSE2_EXPAND_S(i, j) \ 267 ASS( pshufd xmm4, xmm##i, j, j, j, j) \ 268 AS2( movdqa [SSE2_WORKSPACE + (i*4+j)*16 + 256], xmm4) 270 AS2( movdqa xmm0, [REG_state + 0*16])
271 AS2( movdqa xmm1, [REG_state + 1*16])
272 AS2( movdqa xmm2, [REG_state + 2*16])
273 AS2( movdqa xmm3, [REG_state + 3*16])
289 #define SSE2_EXPAND_S85(i) \ 290 AS2( mov dword ptr [SSE2_WORKSPACE + 8*16 + i*4 + 256], REG_roundsLeft) \ 291 AS2( mov dword ptr [SSE2_WORKSPACE + 5*16 + i*4 + 256], REG_temp32) \ 292 AS2( add REG_roundsLeft, 1) \ 293 AS2( adc REG_temp32, 0) 296 AS2( mov REG_roundsLeft, dword ptr [REG_state + 8*4])
297 AS2( mov REG_temp32, dword ptr [REG_state + 5*4])
302 AS2( mov dword ptr [REG_state + 8*4], REG_roundsLeft)
303 AS2( mov dword ptr [REG_state + 5*4], REG_temp32)
306 #define SSE2_QUARTER_ROUND(a, b, d, i) \ 307 AS2( movdqa xmm4, xmm##d) \ 308 AS2( paddd xmm4, xmm##a) \ 309 AS3( vprotd xmm4, xmm4, i) \ 310 AS2( pxor xmm##b, xmm4) 312 #define SSE2_QUARTER_ROUND(a, b, d, i) \ 313 AS2( movdqa xmm4, xmm##d) \ 314 AS2( paddd xmm4, xmm##a) \ 315 AS2( movdqa xmm5, xmm4) \ 316 AS2( pslld xmm4, i) \ 317 AS2( psrld xmm5, 32-i) \ 318 AS2( pxor xmm##b, xmm4) \ 319 AS2( pxor xmm##b, xmm5) 322 #define L01(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 323 #define L02(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##C, [SSE2_WORKSPACE + a*16 + i*256]) 324 #define L03(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) 327 #define L04(A,B,C,D,a,b,c,d,i) 328 #define L05(A,B,C,D,a,b,c,d,i) AS3( vprotd xmm##A, xmm##A, 7) 329 #define L06(A,B,C,D,a,b,c,d,i) 330 #define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256]) 331 #define L08(A,B,C,D,a,b,c,d,i) 333 #define L04(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 334 #define L05(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 7) 335 #define L06(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-7) 336 #define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256]) 337 #define L08(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) 340 #define L09(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + b*16], xmm##A) 341 #define L10(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 342 #define L11(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) 345 #define L12(A,B,C,D,a,b,c,d,i) 346 #define L13(A,B,C,D,a,b,c,d,i) AS3( vprotd xmm##A, xmm##A, 9) 347 #define L14(A,B,C,D,a,b,c,d,i) 348 #define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256]) 349 #define L16(A,B,C,D,a,b,c,d,i) 351 #define L12(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 352 #define L13(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 9) 353 #define L14(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-9) 354 #define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256]) 355 #define L16(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) 358 #define L17(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + c*16], xmm##A) 359 #define L18(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 360 #define L19(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##B) 363 #define L20(A,B,C,D,a,b,c,d,i) 364 #define L21(A,B,C,D,a,b,c,d,i) AS3( vprotd xmm##A, xmm##A, 13) 365 #define L22(A,B,C,D,a,b,c,d,i) 366 #define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 367 #define L24(A,B,C,D,a,b,c,d,i) 369 #define L20(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 370 #define L21(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 13) 371 #define L22(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-13) 372 #define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 373 #define L24(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) 376 #define L25(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + d*16], xmm##A) 377 #define L26(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##D) 380 #define L27(A,B,C,D,a,b,c,d,i) 381 #define L28(A,B,C,D,a,b,c,d,i) AS3( vprotd xmm##A, xmm##A, 18) 382 #define L29(A,B,C,D,a,b,c,d,i) 383 #define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C) 384 #define L31(A,B,C,D,a,b,c,d,i) 386 #define L27(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 387 #define L28(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 18) 388 #define L29(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-18) 389 #define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C) 390 #define L31(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) 393 #define L32(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + a*16], xmm##A) 395 #define SSE2_QUARTER_ROUND_X8(i, a, b, c, d, e, f, g, h) \ 396 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) \ 397 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) \ 398 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) \ 399 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) \ 400 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) \ 401 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) \ 402 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) \ 403 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) \ 404 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) \ 405 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) \ 406 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) \ 407 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) \ 408 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) \ 409 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) \ 410 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) \ 411 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) \ 412 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) \ 413 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) \ 414 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) \ 415 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) \ 416 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) \ 417 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) \ 418 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) \ 419 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) \ 420 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) \ 421 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) \ 422 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) \ 423 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) \ 424 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) \ 425 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) \ 426 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) \ 427 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) 429 #define SSE2_QUARTER_ROUND_X16(i, a, b, c, d, e, f, g, h, A, B, C, D, E, F, G, H) \ 430 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) L01(8,9,10,11, A,B,C,D, i) L01(12,13,14,15, E,F,G,H, i) \ 431 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) L02(8,9,10,11, A,B,C,D, i) L02(12,13,14,15, E,F,G,H, i) \ 432 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) L03(8,9,10,11, A,B,C,D, i) L03(12,13,14,15, E,F,G,H, i) \ 433 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) L04(8,9,10,11, A,B,C,D, i) L04(12,13,14,15, E,F,G,H, i) \ 434 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) L05(8,9,10,11, A,B,C,D, i) L05(12,13,14,15, E,F,G,H, i) \ 435 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) L06(8,9,10,11, A,B,C,D, i) L06(12,13,14,15, E,F,G,H, i) \ 436 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) L07(8,9,10,11, A,B,C,D, i) L07(12,13,14,15, E,F,G,H, i) \ 437 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) L08(8,9,10,11, A,B,C,D, i) L08(12,13,14,15, E,F,G,H, i) \ 438 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) L09(8,9,10,11, A,B,C,D, i) L09(12,13,14,15, E,F,G,H, i) \ 439 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) L10(8,9,10,11, A,B,C,D, i) L10(12,13,14,15, E,F,G,H, i) \ 440 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) L11(8,9,10,11, A,B,C,D, i) L11(12,13,14,15, E,F,G,H, i) \ 441 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) L12(8,9,10,11, A,B,C,D, i) L12(12,13,14,15, E,F,G,H, i) \ 442 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) L13(8,9,10,11, A,B,C,D, i) L13(12,13,14,15, E,F,G,H, i) \ 443 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) L14(8,9,10,11, A,B,C,D, i) L14(12,13,14,15, E,F,G,H, i) \ 444 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) L15(8,9,10,11, A,B,C,D, i) L15(12,13,14,15, E,F,G,H, i) \ 445 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) L16(8,9,10,11, A,B,C,D, i) L16(12,13,14,15, E,F,G,H, i) \ 446 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) L17(8,9,10,11, A,B,C,D, i) L17(12,13,14,15, E,F,G,H, i) \ 447 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) L18(8,9,10,11, A,B,C,D, i) L18(12,13,14,15, E,F,G,H, i) \ 448 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) L19(8,9,10,11, A,B,C,D, i) L19(12,13,14,15, E,F,G,H, i) \ 449 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) L20(8,9,10,11, A,B,C,D, i) L20(12,13,14,15, E,F,G,H, i) \ 450 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) L21(8,9,10,11, A,B,C,D, i) L21(12,13,14,15, E,F,G,H, i) \ 451 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) L22(8,9,10,11, A,B,C,D, i) L22(12,13,14,15, E,F,G,H, i) \ 452 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) L23(8,9,10,11, A,B,C,D, i) L23(12,13,14,15, E,F,G,H, i) \ 453 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) L24(8,9,10,11, A,B,C,D, i) L24(12,13,14,15, E,F,G,H, i) \ 454 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) L25(8,9,10,11, A,B,C,D, i) L25(12,13,14,15, E,F,G,H, i) \ 455 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) L26(8,9,10,11, A,B,C,D, i) L26(12,13,14,15, E,F,G,H, i) \ 456 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) L27(8,9,10,11, A,B,C,D, i) L27(12,13,14,15, E,F,G,H, i) \ 457 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) L28(8,9,10,11, A,B,C,D, i) L28(12,13,14,15, E,F,G,H, i) \ 458 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) L29(8,9,10,11, A,B,C,D, i) L29(12,13,14,15, E,F,G,H, i) \ 459 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) L30(8,9,10,11, A,B,C,D, i) L30(12,13,14,15, E,F,G,H, i) \ 460 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) L31(8,9,10,11, A,B,C,D, i) L31(12,13,14,15, E,F,G,H, i) \ 461 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) L32(8,9,10,11, A,B,C,D, i) L32(12,13,14,15, E,F,G,H, i) 463 #if CRYPTOPP_BOOL_X64 464 SSE2_QUARTER_ROUND_X16(1, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
466 SSE2_QUARTER_ROUND_X8(1, 2, 6, 10, 14, 3, 7, 11, 15)
467 SSE2_QUARTER_ROUND_X8(1, 0, 4, 8, 12, 1, 5, 9, 13)
469 AS2( mov REG_roundsLeft, REG_rounds)
472 ASL(SSE2_Salsa_Output)
473 AS2( movdqa xmm0, xmm4)
474 AS2( punpckldq xmm4, xmm5)
475 AS2( movdqa xmm1, xmm6)
476 AS2( punpckldq xmm6, xmm7)
477 AS2( movdqa xmm2, xmm4)
478 AS2( punpcklqdq xmm4, xmm6)
479 AS2( punpckhqdq xmm2, xmm6)
480 AS2( punpckhdq xmm0, xmm5)
481 AS2( punpckhdq xmm1, xmm7)
482 AS2( movdqa xmm6, xmm0)
483 AS2( punpcklqdq xmm0, xmm1)
484 AS2( punpckhqdq xmm6, xmm1)
485 AS_XMM_OUTPUT4(SSE2_Salsa_Output_A, REG_input, REG_output, 4, 2, 0, 6, 1, 0, 4, 8, 12, 1)
489 #if CRYPTOPP_BOOL_X64 490 SSE2_QUARTER_ROUND_X16(0, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
492 SSE2_QUARTER_ROUND_X16(0, 0, 13, 10, 7, 1, 14, 11, 4, 2, 15, 8, 5, 3, 12, 9, 6)
494 SSE2_QUARTER_ROUND_X8(0, 2, 6, 10, 14, 3, 7, 11, 15)
495 SSE2_QUARTER_ROUND_X8(0, 0, 4, 8, 12, 1, 5, 9, 13)
497 SSE2_QUARTER_ROUND_X8(0, 2, 15, 8, 5, 3, 12, 9, 6)
498 SSE2_QUARTER_ROUND_X8(0, 0, 13, 10, 7, 1, 14, 11, 4)
500 AS2( sub REG_roundsLeft, 2)
503 #define SSE2_OUTPUT_4(a, b, c, d) \ 504 AS2( movdqa xmm4, [SSE2_WORKSPACE + a*16 + 256])\ 505 AS2( paddd xmm4, [SSE2_WORKSPACE + a*16])\ 506 AS2( movdqa xmm5, [SSE2_WORKSPACE + b*16 + 256])\ 507 AS2( paddd xmm5, [SSE2_WORKSPACE + b*16])\ 508 AS2( movdqa xmm6, [SSE2_WORKSPACE + c*16 + 256])\ 509 AS2( paddd xmm6, [SSE2_WORKSPACE + c*16])\ 510 AS2( movdqa xmm7, [SSE2_WORKSPACE + d*16 + 256])\ 511 AS2( paddd xmm7, [SSE2_WORKSPACE + d*16])\ 512 ASC( call, SSE2_Salsa_Output) 514 SSE2_OUTPUT_4(0, 13, 10, 7)
515 SSE2_OUTPUT_4(4, 1, 14, 11)
516 SSE2_OUTPUT_4(8, 5, 2, 15)
517 SSE2_OUTPUT_4(12, 9, 6, 3)
518 AS2( test REG_input, REG_input)
520 AS2( add REG_input, 12*16)
522 AS2( add REG_output, 12*16)
523 AS2( sub REG_iterationCount, 4)
524 AS2( cmp REG_iterationCount, 4)
529 AS2( sub REG_iterationCount, 1)
531 AS2( movdqa xmm0, [REG_state + 0*16])
532 AS2( movdqa xmm1, [REG_state + 1*16])
533 AS2( movdqa xmm2, [REG_state + 2*16])
534 AS2( movdqa xmm3, [REG_state + 3*16])
535 AS2( mov REG_roundsLeft, REG_rounds)
538 SSE2_QUARTER_ROUND(0, 1, 3, 7)
539 SSE2_QUARTER_ROUND(1, 2, 0, 9)
540 SSE2_QUARTER_ROUND(2, 3, 1, 13)
541 SSE2_QUARTER_ROUND(3, 0, 2, 18)
542 ASS( pshufd xmm1, xmm1, 2, 1, 0, 3)
543 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
544 ASS( pshufd xmm3, xmm3, 0, 3, 2, 1)
545 SSE2_QUARTER_ROUND(0, 3, 1, 7)
546 SSE2_QUARTER_ROUND(3, 2, 0, 9)
547 SSE2_QUARTER_ROUND(2, 1, 3, 13)
548 SSE2_QUARTER_ROUND(1, 0, 2, 18)
549 ASS( pshufd xmm1, xmm1, 0, 3, 2, 1)
550 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
551 ASS( pshufd xmm3, xmm3, 2, 1, 0, 3)
552 AS2( sub REG_roundsLeft, 2)
555 AS2( paddd xmm0, [REG_state + 0*16])
556 AS2( paddd xmm1, [REG_state + 1*16])
557 AS2( paddd xmm2, [REG_state + 2*16])
558 AS2( paddd xmm3, [REG_state + 3*16])
560 AS2( add dword ptr [REG_state + 8*4], 1)
561 AS2( adc dword ptr [REG_state + 5*4], 0)
563 AS2( pcmpeqb xmm6, xmm6)
565 ASS( pshufd xmm7, xmm6, 0, 1, 2, 3)
566 AS2( movdqa xmm4, xmm0)
567 AS2( movdqa xmm5, xmm3)
568 AS2( pand xmm0, xmm7)
569 AS2( pand xmm4, xmm6)
570 AS2( pand xmm3, xmm6)
571 AS2( pand xmm5, xmm7)
573 AS2( movdqa xmm5, xmm1)
574 AS2( pand xmm1, xmm7)
575 AS2( pand xmm5, xmm6)
577 AS2( pand xmm6, xmm2)
578 AS2( pand xmm2, xmm7)
582 AS2( movdqa xmm5, xmm4)
583 AS2( movdqa xmm6, xmm0)
584 AS3( shufpd xmm4, xmm1, 2)
585 AS3( shufpd xmm0, xmm2, 2)
586 AS3( shufpd xmm1, xmm5, 2)
587 AS3( shufpd xmm2, xmm6, 2)
590 AS_XMM_OUTPUT4(SSE2_Salsa_Output_B, REG_input, REG_output, 4, 0, 1, 2, 3, 0, 1, 2, 3, 4)
598 #if CRYPTOPP_BOOL_X64 599 :
"+r" (input),
"+r" (output),
"+r" (iterationCount)
600 :
"r" (m_rounds),
"r" (m_state.
begin()),
"r" (workspace)
601 :
"%eax",
"%rdx",
"memory",
"cc",
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm4",
"%xmm5",
"%xmm6",
"%xmm7",
"%xmm8",
"%xmm9",
"%xmm10",
"%xmm11",
"%xmm12",
"%xmm13",
"%xmm14",
"%xmm15" 603 :
"+a" (input),
"+D" (output),
"+c" (iterationCount)
604 :
"d" (m_rounds),
"S" (m_state.
begin())
609 #ifdef CRYPTOPP_GENERATE_X64_MASM 610 movdqa xmm6, [rsp + 0200h]
611 movdqa xmm7, [rsp + 0210h]
612 movdqa xmm8, [rsp + 0220h]
613 movdqa xmm9, [rsp + 0230h]
614 movdqa xmm10, [rsp + 0240h]
615 movdqa xmm11, [rsp + 0250h]
616 movdqa xmm12, [rsp + 0260h]
617 movdqa xmm13, [rsp + 0270h]
618 movdqa xmm14, [rsp + 0280h]
619 movdqa xmm15, [rsp + 0290h]
620 add rsp, 10*16 + 32*16 + 8
622 Salsa20_OperateKeystream ENDP
628 #ifndef CRYPTOPP_GENERATE_X64_MASM 630 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
632 while (iterationCount--)
634 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
635 x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7];
636 x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11];
637 x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15];
639 for (
int i=m_rounds; i>0; i-=2)
641 #define QUARTER_ROUND(a, b, c, d) \ 642 b = b ^ rotlConstant<7>(a + d); \ 643 c = c ^ rotlConstant<9>(b + a); \ 644 d = d ^ rotlConstant<13>(c + b); \ 645 a = a ^ rotlConstant<18>(d + c); 647 QUARTER_ROUND(x0, x4, x8, x12)
648 QUARTER_ROUND(x1, x5, x9, x13)
649 QUARTER_ROUND(x2, x6, x10, x14)
650 QUARTER_ROUND(x3, x7, x11, x15)
652 QUARTER_ROUND(x0, x13, x10, x7)
653 QUARTER_ROUND(x1, x14, x11, x4)
654 QUARTER_ROUND(x2, x15, x8, x5)
655 QUARTER_ROUND(x3, x12, x9, x6)
658 #ifndef CRYPTOPP_DOXYGEN_PROCESSING 659 #define SALSA_OUTPUT(x) {\ 660 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\ 661 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\ 662 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x10 + m_state[10]);\ 663 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x7 + m_state[7]);\ 664 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\ 665 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x1 + m_state[1]);\ 666 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x14 + m_state[14]);\ 667 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x11 + m_state[11]);\ 668 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\ 669 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x5 + m_state[5]);\ 670 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x2 + m_state[2]);\ 671 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x15 + m_state[15]);\ 672 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\ 673 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x9 + m_state[9]);\ 674 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\ 675 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);} 681 if (++m_state[8] == 0)
691 if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
692 throw InvalidRounds(XSalsa20::StaticAlgorithmName(), m_rounds);
699 m_state[0] = 0x61707865;
700 m_state[1] = 0x3320646e;
701 m_state[2] = 0x79622d32;
702 m_state[3] = 0x6b206574;
707 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
710 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
713 get(x14)(x11)(x8)(x5)(m_state[14])(m_state[11]);
715 x13 = m_key[0]; x10 = m_key[1]; x7 = m_key[2]; x4 = m_key[3];
716 x15 = m_key[4]; x12 = m_key[5]; x9 = m_key[6]; x6 = m_key[7];
717 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
719 for (
int i=m_rounds; i>0; i-=2)
721 QUARTER_ROUND(x0, x4, x8, x12)
722 QUARTER_ROUND(x1, x5, x9, x13)
723 QUARTER_ROUND(x2, x6, x10, x14)
724 QUARTER_ROUND(x3, x7, x11, x15)
726 QUARTER_ROUND(x0, x13, x10, x7)
727 QUARTER_ROUND(x1, x14, x11, x4)
728 QUARTER_ROUND(x2, x15, x8, x5)
729 QUARTER_ROUND(x3, x12, x9, x6)
732 m_state[13] = x0; m_state[10] = x1; m_state[7] = x2; m_state[4] = x3;
733 m_state[15] = x14; m_state[12] = x11; m_state[9] = x8; m_state[6] = x5;
734 m_state[8] = m_state[5] = 0;
739 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM int GetIntValueWithDefault(const char *name, int defaultValue) const
Get a named value with type int, with default.
Standard names for retrieving values by name when working with NameValuePairs.
Utility functions for the Crypto++ library.
Library configuration file.
virtual unsigned int GetOptimalBlockSize() const
Provides number of ideal bytes to process.
unsigned int GetAlignment() const
Provides data alignment requirements.
#define CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(x, y)
Helper macro to implement OperateKeystream.
byte order is little-endian
void CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
Key the cipher.
Exception thrown when an invalid number of rounds is encountered.
void Salsa20_Core(word32 *data, unsigned int rounds)
Salsa20 core transform.
A::pointer data()
Provides a pointer to the first element in the memory block.
void CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length)
Resynchronize the cipher.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Functions for CPU features and intrinsics.
Classes for Salsa and Salsa20 stream ciphers.
iterator begin()
Provides an iterator pointing to the first element in the memory block.
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
bool HasSSE2()
Determines SSE2 availability.
Access a block of memory.
KeystreamOperation
Keystream operation flags.
Crypto++ library namespace.
SymmetricCipher implementation.
size_type size() const
Provides the count of elements in the SecBlock.
Interface for retrieving values given their names.