Crypto++  6.0
Free C++ class library of cryptographic schemes
sha.cpp
1 // sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c
2 
3 // Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. Jeffrey Walton
4 // implemented Intel SHA extensions based on Intel articles and code by
5 // Sean Gulley. Jeffrey Walton implemented ARM SHA based on ARM code and
6 // code from Johannes Schneiders, Skip Hovsmith and Barry O'Rourke.
7 // All code is in the public domain.
8 
9 // In August 2017 Walton reworked the internals to align all the implementations.
10 // Formerly all hashes were software based, IterHashBase handled endian conversions,
11 // and IterHashBase dispatched a single to block SHA{N}::Transform. SHA{N}::Transform
12 // then performed the single block hashing. It was repeated for multiple blocks.
13 //
14 // The rework added SHA{N}::HashMultipleBlocks (class) and SHA{N}_HashMultipleBlocks
15 // (free standing). There are also hardware accelerated variations. Callers enter
16 // SHA{N}::HashMultipleBlocks (class), and the function calls SHA{N}_HashMultipleBlocks
17 // (free standing) or SHA{N}_HashBlock (free standing) as a fallback.
18 //
19 // An added wrinkle is hardware is little endian, C++ is big endian, and callers use big endian,
20 // so SHA{N}_HashMultipleBlock accepts a ByteOrder for the incoming data arrangement. Hardware
21 // based SHA{N}_HashMultipleBlock can often perform the endian swap much easier by setting
22 // an EPI mask. Endian swap incurs no penalty on Intel SHA, and 4-instruction penaly on ARM SHA.
23 // Under C++ the full software based swap penalty is incurred due to use of ReverseBytes().
24 //
25 // The rework also removed the hacked-in pointers to implementations.
26 
27 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code
28 
29 #include "pch.h"
30 #include "config.h"
31 
32 #if CRYPTOPP_MSC_VERSION
33 # pragma warning(disable: 4100 4731)
34 #endif
35 
36 #ifndef CRYPTOPP_IMPORTS
37 #ifndef CRYPTOPP_GENERATE_X64_MASM
38 
39 #include "secblock.h"
40 #include "sha.h"
41 #include "misc.h"
42 #include "cpu.h"
43 
44 // Clang 3.3 integrated assembler crash on Linux
45 // http://github.com/weidai11/cryptopp/issues/264
46 // Clang 3.4.1 (x86) crash on FreeBSD 10.3. Clang 3.4.1 (x64) works fine.
47 #if defined(CRYPTOPP_LLVM_CLANG_VERSION) && (CRYPTOPP_LLVM_CLANG_VERSION < 30500)
48 # define CRYPTOPP_DISABLE_SHA_ASM
49 #endif
50 
51 #if defined(CRYPTOPP_DISABLE_SHA_ASM)
52 # undef CRYPTOPP_X86_ASM_AVAILABLE
53 # undef CRYPTOPP_X32_ASM_AVAILABLE
54 # undef CRYPTOPP_X64_ASM_AVAILABLE
55 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
56 #endif
57 
58 NAMESPACE_BEGIN(CryptoPP)
59 
60 #if CRYPTOPP_SHANI_AVAILABLE
61 extern void SHA1_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order);
62 extern void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order);
63 #endif
64 
65 #if CRYPTOPP_ARM_SHA_AVAILABLE
66 extern void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
67 extern void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
68 #endif
69 
70 #if CRYPTOPP_POWER8_SHA_AVAILABLE
71 extern void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t length, ByteOrder order);
72 extern void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t length, ByteOrder order);
73 #endif
74 
75 ////////////////////////////////
76 // start of Steve Reid's code //
77 ////////////////////////////////
78 
79 ANONYMOUS_NAMESPACE_BEGIN
80 
81 #define blk0(i) (W[i] = data[i])
82 #define blk1(i) (W[i&15] = rotlConstant<1>(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15]))
83 
84 #define f1(x,y,z) (z^(x&(y^z)))
85 #define f2(x,y,z) (x^y^z)
86 #define f3(x,y,z) ((x&y)|(z&(x|y)))
87 #define f4(x,y,z) (x^y^z)
88 
89 /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
90 #define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlConstant<5>(v);w=rotlConstant<30>(w);
91 #define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlConstant<5>(v);w=rotlConstant<30>(w);
92 #define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlConstant<5>(v);w=rotlConstant<30>(w);
93 #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlConstant<5>(v);w=rotlConstant<30>(w);
94 #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlConstant<5>(v);w=rotlConstant<30>(w);
95 
96 void SHA1_HashBlock_CXX(word32 *state, const word32 *data)
97 {
98  CRYPTOPP_ASSERT(state);
99  CRYPTOPP_ASSERT(data);
100 
101  word32 W[16];
102  /* Copy context->state[] to working vars */
103  word32 a = state[0];
104  word32 b = state[1];
105  word32 c = state[2];
106  word32 d = state[3];
107  word32 e = state[4];
108  /* 4 rounds of 20 operations each. Loop unrolled. */
109  R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
110  R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
111  R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
112  R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
113  R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
114  R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
115  R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
116  R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
117  R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
118  R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
119  R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
120  R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
121  R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
122  R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
123  R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
124  R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
125  R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
126  R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
127  R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
128  R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
129  /* Add the working vars back into context.state[] */
130  state[0] += a;
131  state[1] += b;
132  state[2] += c;
133  state[3] += d;
134  state[4] += e;
135 }
136 
137 #undef blk0
138 #undef blk1
139 #undef f1
140 #undef f2
141 #undef f3
142 #undef f4
143 #undef R1
144 #undef R2
145 #undef R3
146 #undef R4
147 
148 ANONYMOUS_NAMESPACE_END
149 
150 //////////////////////////////
151 // end of Steve Reid's code //
152 //////////////////////////////
153 
154 void SHA1::InitState(HashWordType *state)
155 {
156  state[0] = 0x67452301;
157  state[1] = 0xEFCDAB89;
158  state[2] = 0x98BADCFE;
159  state[3] = 0x10325476;
160  state[4] = 0xC3D2E1F0;
161 }
162 
163 void SHA1::Transform(word32 *state, const word32 *data)
164 {
165  CRYPTOPP_ASSERT(state);
166  CRYPTOPP_ASSERT(data);
167 
168 #if CRYPTOPP_SHANI_AVAILABLE
169  if (HasSHA())
170  {
171  SHA1_HashMultipleBlocks_SHANI(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
172  return;
173  }
174 #endif
175 #if CRYPTOPP_ARM_SHA_AVAILABLE
176  if (HasSHA1())
177  {
178  SHA1_HashMultipleBlocks_ARMV8(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
179  return;
180  }
181 #endif
182 
183  SHA1_HashBlock_CXX(state, data);
184 }
185 
186 size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
187 {
188  CRYPTOPP_ASSERT(input);
189  CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE);
190 
191 #if CRYPTOPP_SHANI_AVAILABLE
192  if (HasSHA())
193  {
194  SHA1_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
195  return length & (SHA1::BLOCKSIZE - 1);
196  }
197 #endif
198 #if CRYPTOPP_ARM_SHA_AVAILABLE
199  if (HasSHA1())
200  {
201  SHA1_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
202  return length & (SHA1::BLOCKSIZE - 1);
203  }
204 #endif
205 
206  const bool noReverse = NativeByteOrderIs(this->GetByteOrder());
207  word32 *dataBuf = this->DataBuf();
208  do
209  {
210  if (noReverse)
211  {
212  SHA1_HashBlock_CXX(m_state, input);
213  }
214  else
215  {
216  ByteReverse(dataBuf, input, SHA1::BLOCKSIZE);
217  SHA1_HashBlock_CXX(m_state, dataBuf);
218  }
219 
220  input += SHA1::BLOCKSIZE/sizeof(word32);
221  length -= SHA1::BLOCKSIZE;
222  }
223  while (length >= SHA1::BLOCKSIZE);
224  return length;
225 }
226 
227 // *************************************************************
228 
229 CRYPTOPP_ALIGN_DATA(16)
230 CRYPTOPP_TABLE
231 const word32 SHA256_K[64] = {
232 
233  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
234  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
235  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
236  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
237  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
238  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
239  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
240  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
241  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
242  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
243  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
244  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
245  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
246  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
247  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
248  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
249 };
250 
251 ANONYMOUS_NAMESPACE_BEGIN
252 
253 #define a(i) T[(0-i)&7]
254 #define b(i) T[(1-i)&7]
255 #define c(i) T[(2-i)&7]
256 #define d(i) T[(3-i)&7]
257 #define e(i) T[(4-i)&7]
258 #define f(i) T[(5-i)&7]
259 #define g(i) T[(6-i)&7]
260 #define h(i) T[(7-i)&7]
261 
262 #define blk0(i) (W[i] = data[i])
263 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
264 
265 #define Ch(x,y,z) (z^(x&(y^z)))
266 #define Maj(x,y,z) (y^((x^y)&(y^z)))
267 
268 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
269  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
270 
271 // for SHA256
272 #define s0(x) (rotrConstant<7>(x)^rotrConstant<18>(x)^(x>>3))
273 #define s1(x) (rotrConstant<17>(x)^rotrConstant<19>(x)^(x>>10))
274 #define S0(x) (rotrConstant<2>(x)^rotrConstant<13>(x)^rotrConstant<22>(x))
275 #define S1(x) (rotrConstant<6>(x)^rotrConstant<11>(x)^rotrConstant<25>(x))
276 
277 void SHA256_HashBlock_CXX(word32 *state, const word32 *data)
278 {
279  word32 W[16]={0}, T[8];
280  /* Copy context->state[] to working vars */
281  memcpy(T, state, sizeof(T));
282  /* 64 operations, partially loop unrolled */
283  for (unsigned int j=0; j<64; j+=16)
284  {
285  R( 0); R( 1); R( 2); R( 3);
286  R( 4); R( 5); R( 6); R( 7);
287  R( 8); R( 9); R(10); R(11);
288  R(12); R(13); R(14); R(15);
289  }
290  /* Add the working vars back into context.state[] */
291  state[0] += a(0);
292  state[1] += b(0);
293  state[2] += c(0);
294  state[3] += d(0);
295  state[4] += e(0);
296  state[5] += f(0);
297  state[6] += g(0);
298  state[7] += h(0);
299 }
300 
301 #undef Ch
302 #undef Maj
303 #undef s0
304 #undef s1
305 #undef S0
306 #undef S1
307 #undef blk0
308 #undef blk1
309 #undef blk2
310 #undef R
311 
312 #undef a
313 #undef b
314 #undef c
315 #undef d
316 #undef e
317 #undef f
318 #undef g
319 #undef h
320 
321 ANONYMOUS_NAMESPACE_END
322 
323 void SHA224::InitState(HashWordType *state)
324 {
325  static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4};
326  memcpy(state, s, sizeof(s));
327 }
328 
329 void SHA256::InitState(HashWordType *state)
330 {
331  static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
332  memcpy(state, s, sizeof(s));
333 }
334 #endif // Not CRYPTOPP_GENERATE_X64_MASM
335 
336 #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM))
337 
338 ANONYMOUS_NAMESPACE_BEGIN
339 
340 void CRYPTOPP_FASTCALL SHA256_HashMultipleBlocks_SSE2(word32 *state, const word32 *data, size_t len)
341 {
342  #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
343  #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
344  #define G(i) H(i+1)
345  #define F(i) H(i+2)
346  #define E(i) H(i+3)
347  #define D(i) H(i+4)
348  #define C(i) H(i+5)
349  #define B(i) H(i+6)
350  #define A(i) H(i+7)
351  #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
352  #define Wt_2(i) Wt((i)-2)
353  #define Wt_15(i) Wt((i)-15)
354  #define Wt_7(i) Wt((i)-7)
355  #define K_END [BASE+8*4+16*4+0*WORD_SZ]
356  #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
357  #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
358  #define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
359  #define Kt(i) WORD_REG(si)+(i)*4
360 #if CRYPTOPP_BOOL_X32
361  #define BASE esp+8
362 #elif CRYPTOPP_BOOL_X86
363  #define BASE esp+4
364 #elif defined(__GNUC__)
365  #define BASE r8
366 #else
367  #define BASE rsp
368 #endif
369 
370 #define RA0(i, edx, edi) \
371  AS2( add edx, [Kt(i)] )\
372  AS2( add edx, [Wt(i)] )\
373  AS2( add edx, H(i) )\
374 
375 #define RA1(i, edx, edi)
376 
377 #define RB0(i, edx, edi)
378 
379 #define RB1(i, edx, edi) \
380  AS2( mov AS_REG_7d, [Wt_2(i)] )\
381  AS2( mov edi, [Wt_15(i)])\
382  AS2( mov ebx, AS_REG_7d )\
383  AS2( shr AS_REG_7d, 10 )\
384  AS2( ror ebx, 17 )\
385  AS2( xor AS_REG_7d, ebx )\
386  AS2( ror ebx, 2 )\
387  AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
388  AS2( add ebx, [Wt_7(i)])\
389  AS2( mov AS_REG_7d, edi )\
390  AS2( shr AS_REG_7d, 3 )\
391  AS2( ror edi, 7 )\
392  AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
393  AS2( xor AS_REG_7d, edi )\
394  AS2( add edx, [Kt(i)])\
395  AS2( ror edi, 11 )\
396  AS2( add edx, H(i) )\
397  AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
398  AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
399  AS2( mov [Wt(i)], AS_REG_7d)\
400  AS2( add edx, AS_REG_7d )\
401 
402 #define ROUND(i, r, eax, ecx, edi, edx)\
403  /* in: edi = E */\
404  /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
405  AS2( mov edx, F(i) )\
406  AS2( xor edx, G(i) )\
407  AS2( and edx, edi )\
408  AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
409  AS2( mov AS_REG_7d, edi )\
410  AS2( ror edi, 6 )\
411  AS2( ror AS_REG_7d, 25 )\
412  RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
413  AS2( xor AS_REG_7d, edi )\
414  AS2( ror edi, 5 )\
415  AS2( xor AS_REG_7d, edi )/* S1(E) */\
416  AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
417  RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
418  /* in: ecx = A, eax = B^C, edx = T1 */\
419  /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
420  AS2( mov ebx, ecx )\
421  AS2( xor ecx, B(i) )/* A^B */\
422  AS2( and eax, ecx )\
423  AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
424  AS2( mov AS_REG_7d, ebx )\
425  AS2( ror ebx, 2 )\
426  AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
427  AS2( add edx, D(i) )\
428  AS2( mov D(i), edx )\
429  AS2( ror AS_REG_7d, 22 )\
430  AS2( xor AS_REG_7d, ebx )\
431  AS2( ror ebx, 11 )\
432  AS2( xor AS_REG_7d, ebx )\
433  AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
434  AS2( mov H(i), eax )\
435 
436 // Unroll the use of CRYPTOPP_BOOL_X64 in assembler math. The GAS assembler on X32 (version 2.25)
437 // complains "Error: invalid operands (*ABS* and *UND* sections) for `*` and `-`"
438 #if CRYPTOPP_BOOL_X64
439 #define SWAP_COPY(i) \
440  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
441  AS1( bswap WORD_REG(bx))\
442  AS2( mov [Wt(i*2+1)], WORD_REG(bx))
443 #else // X86 and X32
444 #define SWAP_COPY(i) \
445  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
446  AS1( bswap WORD_REG(bx))\
447  AS2( mov [Wt(i)], WORD_REG(bx))
448 #endif
449 
450 #if defined(__GNUC__)
451  #if CRYPTOPP_BOOL_X64
453  #endif
454  __asm__ __volatile__
455  (
456  #if CRYPTOPP_BOOL_X64
457  "lea %4, %%r8;"
458  #endif
459  INTEL_NOPREFIX
460 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
461  ALIGN 8
462  SHA256_HashMultipleBlocks_SSE2 PROC FRAME
463  rex_push_reg rsi
464  push_reg rdi
465  push_reg rbx
466  push_reg rbp
467  alloc_stack(LOCALS_SIZE+8)
468  .endprolog
469  mov rdi, r8
470  lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
471 #endif
472 
473 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
474  #ifndef __GNUC__
475  AS2( mov edi, [len])
476  AS2( lea WORD_REG(si), [SHA256_K+48*4])
477  #endif
478  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
479  AS_PUSH_IF86(bx)
480  #endif
481 
482  AS_PUSH_IF86(bp)
483  AS2( mov ebx, esp)
484  AS2( and esp, -16)
485  AS2( sub WORD_REG(sp), LOCALS_SIZE)
486  AS_PUSH_IF86(bx)
487 #endif
488  AS2( mov STATE_SAVE, WORD_REG(cx))
489  AS2( mov DATA_SAVE, WORD_REG(dx))
490  AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
491  AS2( mov DATA_END, WORD_REG(ax))
492  AS2( mov K_END, WORD_REG(si))
493 
494 #if CRYPTOPP_SSE2_ASM_AVAILABLE
495 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
496  AS2( test edi, 1)
497  ASJ( jnz, 2, f)
498  AS1( dec DWORD PTR K_END)
499 #endif
500  AS2( movdqu xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
501  AS2( movdqu xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
502 #endif
503 
504 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
505 #if CRYPTOPP_SSE2_ASM_AVAILABLE
506  ASJ( jmp, 0, f)
507 #endif
508  ASL(2) // non-SSE2
509  AS2( mov esi, ecx)
510  AS2( lea edi, A(0))
511  AS2( mov ecx, 8)
512 ATT_NOPREFIX
513  AS1( rep movsd)
514 INTEL_NOPREFIX
515  AS2( mov esi, K_END)
516  ASJ( jmp, 3, f)
517 #endif
518 
519 #if CRYPTOPP_SSE2_ASM_AVAILABLE
520  ASL(0)
521  AS2( movdqu E(0), xmm1)
522  AS2( movdqu A(0), xmm0)
523 #endif
524 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
525  ASL(3)
526 #endif
527  AS2( sub WORD_REG(si), 48*4)
528  SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
529  SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
530 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
531  SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
532  SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
533 #endif
534  AS2( mov edi, E(0)) // E
535  AS2( mov eax, B(0)) // B
536  AS2( xor eax, C(0)) // B^C
537  AS2( mov ecx, A(0)) // A
538 
539  ROUND(0, 0, eax, ecx, edi, edx)
540  ROUND(1, 0, ecx, eax, edx, edi)
541  ROUND(2, 0, eax, ecx, edi, edx)
542  ROUND(3, 0, ecx, eax, edx, edi)
543  ROUND(4, 0, eax, ecx, edi, edx)
544  ROUND(5, 0, ecx, eax, edx, edi)
545  ROUND(6, 0, eax, ecx, edi, edx)
546  ROUND(7, 0, ecx, eax, edx, edi)
547  ROUND(8, 0, eax, ecx, edi, edx)
548  ROUND(9, 0, ecx, eax, edx, edi)
549  ROUND(10, 0, eax, ecx, edi, edx)
550  ROUND(11, 0, ecx, eax, edx, edi)
551  ROUND(12, 0, eax, ecx, edi, edx)
552  ROUND(13, 0, ecx, eax, edx, edi)
553  ROUND(14, 0, eax, ecx, edi, edx)
554  ROUND(15, 0, ecx, eax, edx, edi)
555 
556  ASL(1)
557  AS2(add WORD_REG(si), 4*16)
558  ROUND(0, 1, eax, ecx, edi, edx)
559  ROUND(1, 1, ecx, eax, edx, edi)
560  ROUND(2, 1, eax, ecx, edi, edx)
561  ROUND(3, 1, ecx, eax, edx, edi)
562  ROUND(4, 1, eax, ecx, edi, edx)
563  ROUND(5, 1, ecx, eax, edx, edi)
564  ROUND(6, 1, eax, ecx, edi, edx)
565  ROUND(7, 1, ecx, eax, edx, edi)
566  ROUND(8, 1, eax, ecx, edi, edx)
567  ROUND(9, 1, ecx, eax, edx, edi)
568  ROUND(10, 1, eax, ecx, edi, edx)
569  ROUND(11, 1, ecx, eax, edx, edi)
570  ROUND(12, 1, eax, ecx, edi, edx)
571  ROUND(13, 1, ecx, eax, edx, edi)
572  ROUND(14, 1, eax, ecx, edi, edx)
573  ROUND(15, 1, ecx, eax, edx, edi)
574  AS2( cmp WORD_REG(si), K_END)
575  ATT_NOPREFIX
576  ASJ( jb, 1, b)
577  INTEL_NOPREFIX
578 
579  AS2( mov WORD_REG(dx), DATA_SAVE)
580  AS2( add WORD_REG(dx), 64)
581  AS2( mov AS_REG_7, STATE_SAVE)
582  AS2( mov DATA_SAVE, WORD_REG(dx))
583 
584 #if CRYPTOPP_SSE2_ASM_AVAILABLE
585 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
586  AS2( test DWORD PTR K_END, 1)
587  ASJ( jz, 4, f)
588 #endif
589  AS2( movdqu xmm1, XMMWORD_PTR [AS_REG_7+1*16])
590  AS2( movdqu xmm0, XMMWORD_PTR [AS_REG_7+0*16])
591  AS2( paddd xmm1, E(0))
592  AS2( paddd xmm0, A(0))
593  AS2( movdqu [AS_REG_7+1*16], xmm1)
594  AS2( movdqu [AS_REG_7+0*16], xmm0)
595  AS2( cmp WORD_REG(dx), DATA_END)
596  ATT_NOPREFIX
597  ASJ( jb, 0, b)
598  INTEL_NOPREFIX
599 #endif
600 
601 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
602 #if CRYPTOPP_SSE2_ASM_AVAILABLE
603  ASJ( jmp, 5, f)
604  ASL(4) // non-SSE2
605 #endif
606  AS2( add [AS_REG_7+0*4], ecx) // A
607  AS2( add [AS_REG_7+4*4], edi) // E
608  AS2( mov eax, B(0))
609  AS2( mov ebx, C(0))
610  AS2( mov ecx, D(0))
611  AS2( add [AS_REG_7+1*4], eax)
612  AS2( add [AS_REG_7+2*4], ebx)
613  AS2( add [AS_REG_7+3*4], ecx)
614  AS2( mov eax, F(0))
615  AS2( mov ebx, G(0))
616  AS2( mov ecx, H(0))
617  AS2( add [AS_REG_7+5*4], eax)
618  AS2( add [AS_REG_7+6*4], ebx)
619  AS2( add [AS_REG_7+7*4], ecx)
620  AS2( mov ecx, AS_REG_7d)
621  AS2( cmp WORD_REG(dx), DATA_END)
622  ASJ( jb, 2, b)
623 #if CRYPTOPP_SSE2_ASM_AVAILABLE
624  ASL(5)
625 #endif
626 #endif
627 
628  AS_POP_IF86(sp)
629  AS_POP_IF86(bp)
630  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
631  AS_POP_IF86(bx)
632  #endif
633 
634 #ifdef CRYPTOPP_GENERATE_X64_MASM
635  add rsp, LOCALS_SIZE+8
636  pop rbp
637  pop rbx
638  pop rdi
639  pop rsi
640  ret
641  SHA256_HashMultipleBlocks_SSE2 ENDP
642 #endif
643 
644 #ifdef __GNUC__
645  ATT_PREFIX
646  :
647  : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
648  #if CRYPTOPP_BOOL_X64
649  , "m" (workspace[0])
650  #endif
651  : "memory", "cc", "%eax"
652  #if CRYPTOPP_BOOL_X64
653  , "%rbx", "%r8", "%r10"
654  #endif
655  );
656 #endif
657 }
658 
659 ANONYMOUS_NAMESPACE_END
660 
661 #endif // CRYPTOPP_X86_ASM_AVAILABLE or CRYPTOPP_GENERATE_X64_MASM
662 
663 #ifndef CRYPTOPP_GENERATE_X64_MASM
664 
665 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
666 extern "C" {
667 void CRYPTOPP_FASTCALL SHA256_HashMultipleBlocks_SSE2(word32 *state, const word32 *data, size_t len);
668 }
669 #endif
670 
671 void SHA256::Transform(word32 *state, const word32 *data)
672 {
673  CRYPTOPP_ASSERT(state);
674  CRYPTOPP_ASSERT(data);
675 
676 #if CRYPTOPP_SHANI_AVAILABLE
677  if (HasSHA())
678  {
679  SHA256_HashMultipleBlocks_SHANI(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
680  return;
681  }
682 #endif
683 #if CRYPTOPP_ARM_SHA_AVAILABLE
684  if (HasSHA2())
685  {
686  SHA256_HashMultipleBlocks_ARMV8(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
687  return;
688  }
689 #endif
690 #if CRYPTOPP_POWER8_SHA_AVAILABLE
691  if (HasSHA256())
692  {
693  SHA256_HashMultipleBlocks_POWER8(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
694  return;
695  }
696 #endif
697 
698  SHA256_HashBlock_CXX(state, data);
699 }
700 
701 size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
702 {
703  CRYPTOPP_ASSERT(input);
704  CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
705 
706 #if CRYPTOPP_SHANI_AVAILABLE
707  if (HasSHA())
708  {
709  SHA256_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
710  return length & (SHA256::BLOCKSIZE - 1);
711  }
712 #endif
713 #if CRYPTOPP_SSE2_ASM_AVAILABLE
714  if (HasSSE2())
715  {
716  const size_t res = length & (SHA256::BLOCKSIZE - 1);
717  SHA256_HashMultipleBlocks_SSE2(m_state, input, length-res);
718  return res;
719  }
720 #endif
721 #if CRYPTOPP_ARM_SHA_AVAILABLE
722  if (HasSHA2())
723  {
724  SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
725  return length & (SHA256::BLOCKSIZE - 1);
726  }
727 #endif
728 #if CRYPTOPP_POWER8_SHA_AVAILABLE
729  if (HasSHA256())
730  {
731  SHA256_HashMultipleBlocks_POWER8(m_state, input, length, BIG_ENDIAN_ORDER);
732  return length & (SHA256::BLOCKSIZE - 1);
733  }
734 #endif
735 
736  const bool noReverse = NativeByteOrderIs(this->GetByteOrder());
737  word32 *dataBuf = this->DataBuf();
738  do
739  {
740  if (noReverse)
741  {
742  SHA256_HashBlock_CXX(m_state, input);
743  }
744  else
745  {
746  ByteReverse(dataBuf, input, SHA256::BLOCKSIZE);
747  SHA256_HashBlock_CXX(m_state, dataBuf);
748  }
749 
750  input += SHA256::BLOCKSIZE/sizeof(word32);
751  length -= SHA256::BLOCKSIZE;
752  }
753  while (length >= SHA256::BLOCKSIZE);
754  return length;
755 }
756 
757 size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
758 {
759  CRYPTOPP_ASSERT(input);
760  CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
761 
762 #if CRYPTOPP_SHANI_AVAILABLE
763  if (HasSHA())
764  {
765  SHA256_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
766  return length & (SHA256::BLOCKSIZE - 1);
767  }
768 #endif
769 #if CRYPTOPP_SSE2_ASM_AVAILABLE
770  if (HasSSE2())
771  {
772  const size_t res = length & (SHA256::BLOCKSIZE - 1);
773  SHA256_HashMultipleBlocks_SSE2(m_state, input, length-res);
774  return res;
775  }
776 #endif
777 #if CRYPTOPP_ARM_SHA_AVAILABLE
778  if (HasSHA2())
779  {
780  SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
781  return length & (SHA256::BLOCKSIZE - 1);
782  }
783 #endif
784 #if CRYPTOPP_POWER8_SHA_AVAILABLE
785  if (HasSHA256())
786  {
787  SHA256_HashMultipleBlocks_POWER8(m_state, input, length, BIG_ENDIAN_ORDER);
788  return length & (SHA256::BLOCKSIZE - 1);
789  }
790 #endif
791 
792  const bool noReverse = NativeByteOrderIs(this->GetByteOrder());
793  word32 *dataBuf = this->DataBuf();
794  do
795  {
796  if (noReverse)
797  {
798  SHA256_HashBlock_CXX(m_state, input);
799  }
800  else
801  {
802  ByteReverse(dataBuf, input, SHA256::BLOCKSIZE);
803  SHA256_HashBlock_CXX(m_state, dataBuf);
804  }
805 
806  input += SHA256::BLOCKSIZE/sizeof(word32);
807  length -= SHA256::BLOCKSIZE;
808  }
809  while (length >= SHA256::BLOCKSIZE);
810  return length;
811 }
812 
813 // *************************************************************
814 
815 void SHA384::InitState(HashWordType *state)
816 {
817  const word64 s[8] = {
818  W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507),
819  W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939),
820  W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511),
821  W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)};
822  memcpy(state, s, sizeof(s));
823 }
824 
825 void SHA512::InitState(HashWordType *state)
826 {
827  const word64 s[8] = {
828  W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b),
829  W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1),
830  W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f),
831  W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)};
832  memcpy(state, s, sizeof(s));
833 }
834 
835 // We add extern to export table to sha-simd.cpp, but it
836 // cleared http://github.com/weidai11/cryptopp/issues/502
837 CRYPTOPP_ALIGN_DATA(16)
838 CRYPTOPP_TABLE
839 const word64 SHA512_K[80] = {
840  W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
841  W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
842  W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
843  W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
844  W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
845  W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
846  W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
847  W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
848  W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
849  W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
850  W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
851  W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
852  W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
853  W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
854  W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
855  W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
856  W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
857  W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
858  W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
859  W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
860  W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
861  W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
862  W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
863  W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
864  W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
865  W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
866  W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
867  W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
868  W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
869  W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
870  W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
871  W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
872  W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
873  W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
874  W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
875  W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
876  W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
877  W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
878  W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
879  W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
880 };
881 
882 #if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
883 
884 ANONYMOUS_NAMESPACE_BEGIN
885 
886 CRYPTOPP_NAKED void CRYPTOPP_FASTCALL SHA512_HashBlock_SSE2(word64 *state, const word64 *data)
887 {
888 #ifdef __GNUC__
889  __asm__ __volatile__
890  (
891  INTEL_NOPREFIX
892  AS_PUSH_IF86( bx)
893  AS2( mov ebx, eax)
894 #else
895  AS1( push ebx)
896  AS1( push esi)
897  AS1( push edi)
898  AS2( lea ebx, SHA512_K)
899 #endif
900 
901  AS2( mov eax, esp)
902  AS2( and esp, 0xfffffff0)
903  AS2( sub esp, 27*16) // 17*16 for expanded data, 20*8 for state
904  AS_PUSH_IF86( ax)
905  AS2( xor eax, eax)
906 
907 #if CRYPTOPP_BOOL_X32
908  AS2( lea edi, [esp+8+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
909  AS2( lea esi, [esp+8+20*8+8]) // 16-byte alignment, then add 8
910 #else
911  AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
912  AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8
913 #endif
914 
915  AS2( movdqu xmm0, [ecx+0*16])
916  AS2( movdq2q mm4, xmm0)
917  AS2( movdqu [edi+0*16], xmm0)
918  AS2( movdqu xmm0, [ecx+1*16])
919  AS2( movdqu [edi+1*16], xmm0)
920  AS2( movdqu xmm0, [ecx+2*16])
921  AS2( movdq2q mm5, xmm0)
922  AS2( movdqu [edi+2*16], xmm0)
923  AS2( movdqu xmm0, [ecx+3*16])
924  AS2( movdqu [edi+3*16], xmm0)
925  ASJ( jmp, 0, f)
926 
927 #define SSE2_S0_S1(r, a, b, c) \
928  AS2( movq mm6, r)\
929  AS2( psrlq r, a)\
930  AS2( movq mm7, r)\
931  AS2( psllq mm6, 64-c)\
932  AS2( pxor mm7, mm6)\
933  AS2( psrlq r, b-a)\
934  AS2( pxor mm7, r)\
935  AS2( psllq mm6, c-b)\
936  AS2( pxor mm7, mm6)\
937  AS2( psrlq r, c-b)\
938  AS2( pxor r, mm7)\
939  AS2( psllq mm6, b-a)\
940  AS2( pxor r, mm6)
941 
942 #define SSE2_s0(r, a, b, c) \
943  AS2( movdqu xmm6, r)\
944  AS2( psrlq r, a)\
945  AS2( movdqu xmm7, r)\
946  AS2( psllq xmm6, 64-c)\
947  AS2( pxor xmm7, xmm6)\
948  AS2( psrlq r, b-a)\
949  AS2( pxor xmm7, r)\
950  AS2( psrlq r, c-b)\
951  AS2( pxor r, xmm7)\
952  AS2( psllq xmm6, c-a)\
953  AS2( pxor r, xmm6)
954 
955 #define SSE2_s1(r, a, b, c) \
956  AS2( movdqu xmm6, r)\
957  AS2( psrlq r, a)\
958  AS2( movdqu xmm7, r)\
959  AS2( psllq xmm6, 64-c)\
960  AS2( pxor xmm7, xmm6)\
961  AS2( psrlq r, b-a)\
962  AS2( pxor xmm7, r)\
963  AS2( psllq xmm6, c-b)\
964  AS2( pxor xmm7, xmm6)\
965  AS2( psrlq r, c-b)\
966  AS2( pxor r, xmm7)
967 
968  ASL(SHA512_Round)
969  // k + w is in mm0, a is in mm4, e is in mm5
970  AS2( paddq mm0, [edi+7*8]) // h
971  AS2( movq mm2, [edi+5*8]) // f
972  AS2( movq mm3, [edi+6*8]) // g
973  AS2( pxor mm2, mm3)
974  AS2( pand mm2, mm5)
975  SSE2_S0_S1(mm5,14,18,41)
976  AS2( pxor mm2, mm3)
977  AS2( paddq mm0, mm2) // h += Ch(e,f,g)
978  AS2( paddq mm5, mm0) // h += S1(e)
979  AS2( movq mm2, [edi+1*8]) // b
980  AS2( movq mm1, mm2)
981  AS2( por mm2, mm4)
982  AS2( pand mm2, [edi+2*8]) // c
983  AS2( pand mm1, mm4)
984  AS2( por mm1, mm2)
985  AS2( paddq mm1, mm5) // temp = h + Maj(a,b,c)
986  AS2( paddq mm5, [edi+3*8]) // e = d + h
987  AS2( movq [edi+3*8], mm5)
988  AS2( movq [edi+11*8], mm5)
989  SSE2_S0_S1(mm4,28,34,39) // S0(a)
990  AS2( paddq mm4, mm1) // a = temp + S0(a)
991  AS2( movq [edi-8], mm4)
992  AS2( movq [edi+7*8], mm4)
993  AS1( ret)
994 
995  // first 16 rounds
996  ASL(0)
997  AS2( movq mm0, [edx+eax*8])
998  AS2( movq [esi+eax*8], mm0)
999  AS2( movq [esi+eax*8+16*8], mm0)
1000  AS2( paddq mm0, [ebx+eax*8])
1001  ASC( call, SHA512_Round)
1002  AS1( inc eax)
1003  AS2( sub edi, 8)
1004  AS2( test eax, 7)
1005  ASJ( jnz, 0, b)
1006  AS2( add edi, 8*8)
1007  AS2( cmp eax, 16)
1008  ASJ( jne, 0, b)
1009 
1010  // rest of the rounds
1011  AS2( movdqu xmm0, [esi+(16-2)*8])
1012  ASL(1)
1013  // data expansion, W[i-2] already in xmm0
1014  AS2( movdqu xmm3, [esi])
1015  AS2( paddq xmm3, [esi+(16-7)*8])
1016  AS2( movdqu xmm2, [esi+(16-15)*8])
1017  SSE2_s1(xmm0, 6, 19, 61)
1018  AS2( paddq xmm0, xmm3)
1019  SSE2_s0(xmm2, 1, 7, 8)
1020  AS2( paddq xmm0, xmm2)
1021  AS2( movdq2q mm0, xmm0)
1022  AS2( movhlps xmm1, xmm0)
1023  AS2( paddq mm0, [ebx+eax*8])
1024  AS2( movlps [esi], xmm0)
1025  AS2( movlps [esi+8], xmm1)
1026  AS2( movlps [esi+8*16], xmm0)
1027  AS2( movlps [esi+8*17], xmm1)
1028  // 2 rounds
1029  ASC( call, SHA512_Round)
1030  AS2( sub edi, 8)
1031  AS2( movdq2q mm0, xmm1)
1032  AS2( paddq mm0, [ebx+eax*8+8])
1033  ASC( call, SHA512_Round)
1034  // update indices and loop
1035  AS2( add esi, 16)
1036  AS2( add eax, 2)
1037  AS2( sub edi, 8)
1038  AS2( test eax, 7)
1039  ASJ( jnz, 1, b)
1040  // do housekeeping every 8 rounds
1041  AS2( mov esi, 0xf)
1042  AS2( and esi, eax)
1043 #if CRYPTOPP_BOOL_X32
1044  AS2( lea esi, [esp+8+20*8+8+esi*8])
1045 #else
1046  AS2( lea esi, [esp+4+20*8+8+esi*8])
1047 #endif
1048  AS2( add edi, 8*8)
1049  AS2( cmp eax, 80)
1050  ASJ( jne, 1, b)
1051 
1052 #define SSE2_CombineState(i) \
1053  AS2( movdqu xmm0, [edi+i*16])\
1054  AS2( paddq xmm0, [ecx+i*16])\
1055  AS2( movdqu [ecx+i*16], xmm0)
1056 
1057  SSE2_CombineState(0)
1058  SSE2_CombineState(1)
1059  SSE2_CombineState(2)
1060  SSE2_CombineState(3)
1061 
1062  AS_POP_IF86( sp)
1063  AS1( emms)
1064 
1065 #if defined(__GNUC__)
1066  AS_POP_IF86( bx)
1067  ATT_PREFIX
1068  :
1069  : "a" (SHA512_K), "c" (state), "d" (data)
1070  : "%esi", "%edi", "memory", "cc"
1071  );
1072 #else
1073  AS1( pop edi)
1074  AS1( pop esi)
1075  AS1( pop ebx)
1076  AS1( ret)
1077 #endif
1078 }
1079 
1080 ANONYMOUS_NAMESPACE_END
1081 
1082 #endif // CRYPTOPP_SSE2_ASM_AVAILABLE
1083 
1084 ANONYMOUS_NAMESPACE_BEGIN
1085 
1086 #define a(i) T[(0-i)&7]
1087 #define b(i) T[(1-i)&7]
1088 #define c(i) T[(2-i)&7]
1089 #define d(i) T[(3-i)&7]
1090 #define e(i) T[(4-i)&7]
1091 #define f(i) T[(5-i)&7]
1092 #define g(i) T[(6-i)&7]
1093 #define h(i) T[(7-i)&7]
1094 
1095 #define blk0(i) (W[i]=data[i])
1096 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
1097 
1098 #define Ch(x,y,z) (z^(x&(y^z)))
1099 #define Maj(x,y,z) (y^((x^y)&(y^z)))
1100 
1101 #define s0(x) (rotrConstant<1>(x)^rotrConstant<8>(x)^(x>>7))
1102 #define s1(x) (rotrConstant<19>(x)^rotrConstant<61>(x)^(x>>6))
1103 #define S0(x) (rotrConstant<28>(x)^rotrConstant<34>(x)^rotrConstant<39>(x))
1104 #define S1(x) (rotrConstant<14>(x)^rotrConstant<18>(x)^rotrConstant<41>(x))
1105 
1106 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+\
1107  (j?blk2(i):blk0(i));d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i));
1108 
1109 void SHA512_HashBlock_CXX(word64 *state, const word64 *data)
1110 {
1111  CRYPTOPP_ASSERT(state);
1112  CRYPTOPP_ASSERT(data);
1113 
1114  word64 W[16]={0}, T[8];
1115  /* Copy context->state[] to working vars */
1116  memcpy(T, state, sizeof(T));
1117  /* 80 operations, partially loop unrolled */
1118  for (unsigned int j=0; j<80; j+=16)
1119  {
1120  R( 0); R( 1); R( 2); R( 3);
1121  R( 4); R( 5); R( 6); R( 7);
1122  R( 8); R( 9); R(10); R(11);
1123  R(12); R(13); R(14); R(15);
1124  }
1125  /* Add the working vars back into context.state[] */
1126  state[0] += a(0);
1127  state[1] += b(0);
1128  state[2] += c(0);
1129  state[3] += d(0);
1130  state[4] += e(0);
1131  state[5] += f(0);
1132  state[6] += g(0);
1133  state[7] += h(0);
1134 }
1135 
1136 #undef Ch
1137 #undef Maj
1138 
1139 #undef s0
1140 #undef s1
1141 #undef S0
1142 #undef S1
1143 
1144 #undef blk0
1145 #undef blk1
1146 #undef blk2
1147 
1148 #undef R
1149 
1150 #undef a
1151 #undef b
1152 #undef c
1153 #undef d
1154 #undef e
1155 #undef f
1156 #undef g
1157 #undef h
1158 
1159 ANONYMOUS_NAMESPACE_END
1160 
1161 void SHA512::Transform(word64 *state, const word64 *data)
1162 {
1163  CRYPTOPP_ASSERT(state);
1164  CRYPTOPP_ASSERT(data);
1165 
1166 #if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
1167  if (HasSSE2())
1168  {
1169  SHA512_HashBlock_SSE2(state, data);
1170  return;
1171  }
1172 #endif
1173 #if CRYPTOPP_POWER8_SHA_AVAILABLE
1174  if (HasSHA512())
1175  {
1176  SHA512_HashMultipleBlocks_POWER8(state, data, SHA512::BLOCKSIZE, BIG_ENDIAN_ORDER);
1177  return;
1178  }
1179 #endif
1180 
1181  SHA512_HashBlock_CXX(state, data);
1182 }
1183 
1184 NAMESPACE_END
1185 
1186 #endif // Not CRYPTOPP_GENERATE_X64_MASM
1187 #endif // Not CRYPTOPP_IMPORTS
bool HasSHA()
Determines SHA availability.
Definition: cpu.h:184
bool NativeByteOrderIs(ByteOrder order)
Determines whether order follows native byte ordering.
Definition: misc.h:1067
Utility functions for the Crypto++ library.
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:141
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:815
Library configuration file.
static void Transform(HashWordType *digest, const HashWordType *data)
Operate the hash.
Definition: sha.cpp:163
byte order is little-endian
Definition: cryptlib.h:143
Classes and functions for secure memory allocations.
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:323
bool HasSHA256()
Determine if a PowerPC processor has SHA256 available.
Definition: cpu.h:526
ByteOrder GetByteOrder() const
Provides the byte order of the hash.
Definition: iterhash.h:133
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:154
Precompiled header file.
bool HasSHA1()
Determine if an ARM processor has SHA1 available.
Definition: cpu.h:405
Fixed size stack-based SecBlock with 16-byte alignment.
Definition: secblock.h:853
bool HasSHA512()
Determine if a PowerPC processor has SHA512 available.
Definition: cpu.h:540
byte order is big-endian
Definition: cryptlib.h:145
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:60
Functions for CPU features and intrinsics.
Classes for SHA-1 and SHA-2 family of message digests.
static void Transform(HashWordType *digest, const HashWordType *data)
Operate the hash.
Definition: sha.cpp:1161
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:114
bool HasSHA2()
Determine if an ARM processor has SHA2 available.
Definition: cpu.h:424
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:825
static void Transform(HashWordType *digest, const HashWordType *data)
Operate the hash.
Definition: sha.cpp:671
Crypto++ library namespace.
static void InitState(HashWordType *state)
Initialize state array.
Definition: sha.cpp:329
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:1834