Crypto++  5.6.3
Free C++ class library of cryptographic schemes
sha.cpp
1 // sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c
2 
3 // Steve Reid implemented SHA-1. Wei Dai implemented SHA-2.
4 // Both are in the public domain.
5 
6 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code
7 
8 #include "pch.h"
9 #include "config.h"
10 
11 #if CRYPTOPP_MSC_VERSION
12 # pragma warning(disable: 4100 4731)
13 #endif
14 
15 #ifndef CRYPTOPP_IMPORTS
16 #ifndef CRYPTOPP_GENERATE_X64_MASM
17 
18 #include "secblock.h"
19 #include "sha.h"
20 #include "misc.h"
21 #include "cpu.h"
22 
23 NAMESPACE_BEGIN(CryptoPP)
24 
25 // start of Steve Reid's code
26 
27 #define blk0(i) (W[i] = data[i])
28 #define blk1(i) (W[i&15] = rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1))
29 
30 void SHA1::InitState(HashWordType *state)
31 {
32  state[0] = 0x67452301L;
33  state[1] = 0xEFCDAB89L;
34  state[2] = 0x98BADCFEL;
35  state[3] = 0x10325476L;
36  state[4] = 0xC3D2E1F0L;
37 }
38 
39 #define f1(x,y,z) (z^(x&(y^z)))
40 #define f2(x,y,z) (x^y^z)
41 #define f3(x,y,z) ((x&y)|(z&(x|y)))
42 #define f4(x,y,z) (x^y^z)
43 
44 /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
45 #define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30);
46 #define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30);
47 #define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlFixed(v,5);w=rotlFixed(w,30);
48 #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlFixed(v,5);w=rotlFixed(w,30);
49 #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlFixed(v,5);w=rotlFixed(w,30);
50 
51 void SHA1::Transform(word32 *state, const word32 *data)
52 {
53  word32 W[16];
54  /* Copy context->state[] to working vars */
55  word32 a = state[0];
56  word32 b = state[1];
57  word32 c = state[2];
58  word32 d = state[3];
59  word32 e = state[4];
60  /* 4 rounds of 20 operations each. Loop unrolled. */
61  R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
62  R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
63  R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
64  R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
65  R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
66  R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
67  R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
68  R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
69  R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
70  R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
71  R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
72  R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
73  R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
74  R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
75  R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
76  R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
77  R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
78  R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
79  R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
80  R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
81  /* Add the working vars back into context.state[] */
82  state[0] += a;
83  state[1] += b;
84  state[2] += c;
85  state[3] += d;
86  state[4] += e;
87 }
88 
89 // end of Steve Reid's code
90 
91 // *************************************************************
92 
93 void SHA224::InitState(HashWordType *state)
94 {
95  static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4};
96  memcpy(state, s, sizeof(s));
97 }
98 
99 void SHA256::InitState(HashWordType *state)
100 {
101  static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
102  memcpy(state, s, sizeof(s));
103 }
104 
105 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_SHA_ASM)
106 CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = {
107 #else
108 extern const word32 SHA256_K[64] = {
109 #endif
110  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
111  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
112  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
113  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
114  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
115  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
116  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
117  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
118  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
119  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
120  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
121  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
122  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
123  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
124  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
125  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
126 };
127 
128 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
129 
130 #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM)) && !defined(CRYPTOPP_DISABLE_SHA_ASM)
131 
132 static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len
133 #if defined(_MSC_VER) && (_MSC_VER == 1200)
134  , ... // VC60 workaround: prevent VC 6 from inlining this function
135 #endif
136  )
137 {
138 #if defined(_MSC_VER) && (_MSC_VER == 1200)
139  AS2(mov ecx, [state])
140  AS2(mov edx, [data])
141 #endif
142 
143  #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
144  #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
145  #define G(i) H(i+1)
146  #define F(i) H(i+2)
147  #define E(i) H(i+3)
148  #define D(i) H(i+4)
149  #define C(i) H(i+5)
150  #define B(i) H(i+6)
151  #define A(i) H(i+7)
152  #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
153  #define Wt_2(i) Wt((i)-2)
154  #define Wt_15(i) Wt((i)-15)
155  #define Wt_7(i) Wt((i)-7)
156  #define K_END [BASE+8*4+16*4+0*WORD_SZ]
157  #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
158  #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
159  #define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
160  #define Kt(i) WORD_REG(si)+(i)*4
161 #if CRYPTOPP_BOOL_X32
162  #define BASE esp+8
163 #elif CRYPTOPP_BOOL_X86
164  #define BASE esp+4
165 #elif defined(__GNUC__)
166  #define BASE r8
167 #else
168  #define BASE rsp
169 #endif
170 
171 #define RA0(i, edx, edi) \
172  AS2( add edx, [Kt(i)] )\
173  AS2( add edx, [Wt(i)] )\
174  AS2( add edx, H(i) )\
175 
176 #define RA1(i, edx, edi)
177 
178 #define RB0(i, edx, edi)
179 
180 #define RB1(i, edx, edi) \
181  AS2( mov AS_REG_7d, [Wt_2(i)] )\
182  AS2( mov edi, [Wt_15(i)])\
183  AS2( mov ebx, AS_REG_7d )\
184  AS2( shr AS_REG_7d, 10 )\
185  AS2( ror ebx, 17 )\
186  AS2( xor AS_REG_7d, ebx )\
187  AS2( ror ebx, 2 )\
188  AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
189  AS2( add ebx, [Wt_7(i)])\
190  AS2( mov AS_REG_7d, edi )\
191  AS2( shr AS_REG_7d, 3 )\
192  AS2( ror edi, 7 )\
193  AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
194  AS2( xor AS_REG_7d, edi )\
195  AS2( add edx, [Kt(i)])\
196  AS2( ror edi, 11 )\
197  AS2( add edx, H(i) )\
198  AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
199  AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
200  AS2( mov [Wt(i)], AS_REG_7d)\
201  AS2( add edx, AS_REG_7d )\
202 
203 #define ROUND(i, r, eax, ecx, edi, edx)\
204  /* in: edi = E */\
205  /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
206  AS2( mov edx, F(i) )\
207  AS2( xor edx, G(i) )\
208  AS2( and edx, edi )\
209  AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
210  AS2( mov AS_REG_7d, edi )\
211  AS2( ror edi, 6 )\
212  AS2( ror AS_REG_7d, 25 )\
213  RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
214  AS2( xor AS_REG_7d, edi )\
215  AS2( ror edi, 5 )\
216  AS2( xor AS_REG_7d, edi )/* S1(E) */\
217  AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
218  RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
219  /* in: ecx = A, eax = B^C, edx = T1 */\
220  /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
221  AS2( mov ebx, ecx )\
222  AS2( xor ecx, B(i) )/* A^B */\
223  AS2( and eax, ecx )\
224  AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
225  AS2( mov AS_REG_7d, ebx )\
226  AS2( ror ebx, 2 )\
227  AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
228  AS2( add edx, D(i) )\
229  AS2( mov D(i), edx )\
230  AS2( ror AS_REG_7d, 22 )\
231  AS2( xor AS_REG_7d, ebx )\
232  AS2( ror ebx, 11 )\
233  AS2( xor AS_REG_7d, ebx )\
234  AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
235  AS2( mov H(i), eax )\
236 
237 // Unroll the use of CRYPTOPP_BOOL_X64 in assembler math. The GAS assembler on X32 (version 2.25)
238 // complains "Error: invalid operands (*ABS* and *UND* sections) for `*` and `-`"
239 #if CRYPTOPP_BOOL_X64
240 #define SWAP_COPY(i) \
241  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
242  AS1( bswap WORD_REG(bx))\
243  AS2( mov [Wt(i*2+1)], WORD_REG(bx))
244 #else // X86 and X32
245 #define SWAP_COPY(i) \
246  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
247  AS1( bswap WORD_REG(bx))\
248  AS2( mov [Wt(i)], WORD_REG(bx))
249 #endif
250 
251 #if defined(__GNUC__)
252  #if CRYPTOPP_BOOL_X64
254  #endif
255  __asm__ __volatile__
256  (
257  #if CRYPTOPP_BOOL_X64
258  "lea %4, %%r8;"
259  #endif
260  INTEL_NOPREFIX
261 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
262  ALIGN 8
263  X86_SHA256_HashBlocks PROC FRAME
264  rex_push_reg rsi
265  push_reg rdi
266  push_reg rbx
267  push_reg rbp
268  alloc_stack(LOCALS_SIZE+8)
269  .endprolog
270  mov rdi, r8
271  lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
272 #endif
273 
274 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
275  #ifndef __GNUC__
276  AS2( mov edi, [len])
277  AS2( lea WORD_REG(si), [SHA256_K+48*4])
278  #endif
279  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
280  AS_PUSH_IF86(bx)
281  #endif
282 
283  AS_PUSH_IF86(bp)
284  AS2( mov ebx, esp)
285  AS2( and esp, -16)
286  AS2( sub WORD_REG(sp), LOCALS_SIZE)
287  AS_PUSH_IF86(bx)
288 #endif
289  AS2( mov STATE_SAVE, WORD_REG(cx))
290  AS2( mov DATA_SAVE, WORD_REG(dx))
291  AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
292  AS2( mov DATA_END, WORD_REG(ax))
293  AS2( mov K_END, WORD_REG(si))
294 
295 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
296 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
297  AS2( test edi, 1)
298  ASJ( jnz, 2, f)
299  AS1( dec DWORD PTR K_END)
300 #endif
301  AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
302  AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
303 #endif
304 
305 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
306 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
307  ASJ( jmp, 0, f)
308 #endif
309  ASL(2) // non-SSE2
310  AS2( mov esi, ecx)
311  AS2( lea edi, A(0))
312  AS2( mov ecx, 8)
313 ATT_NOPREFIX
314  AS1( rep movsd)
315 INTEL_NOPREFIX
316  AS2( mov esi, K_END)
317  ASJ( jmp, 3, f)
318 #endif
319 
320 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
321  ASL(0)
322  AS2( movdqa E(0), xmm1)
323  AS2( movdqa A(0), xmm0)
324 #endif
325 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
326  ASL(3)
327 #endif
328  AS2( sub WORD_REG(si), 48*4)
329  SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
330  SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
331 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
332  SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
333  SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
334 #endif
335  AS2( mov edi, E(0)) // E
336  AS2( mov eax, B(0)) // B
337  AS2( xor eax, C(0)) // B^C
338  AS2( mov ecx, A(0)) // A
339 
340  ROUND(0, 0, eax, ecx, edi, edx)
341  ROUND(1, 0, ecx, eax, edx, edi)
342  ROUND(2, 0, eax, ecx, edi, edx)
343  ROUND(3, 0, ecx, eax, edx, edi)
344  ROUND(4, 0, eax, ecx, edi, edx)
345  ROUND(5, 0, ecx, eax, edx, edi)
346  ROUND(6, 0, eax, ecx, edi, edx)
347  ROUND(7, 0, ecx, eax, edx, edi)
348  ROUND(8, 0, eax, ecx, edi, edx)
349  ROUND(9, 0, ecx, eax, edx, edi)
350  ROUND(10, 0, eax, ecx, edi, edx)
351  ROUND(11, 0, ecx, eax, edx, edi)
352  ROUND(12, 0, eax, ecx, edi, edx)
353  ROUND(13, 0, ecx, eax, edx, edi)
354  ROUND(14, 0, eax, ecx, edi, edx)
355  ROUND(15, 0, ecx, eax, edx, edi)
356 
357  ASL(1)
358  AS2(add WORD_REG(si), 4*16)
359  ROUND(0, 1, eax, ecx, edi, edx)
360  ROUND(1, 1, ecx, eax, edx, edi)
361  ROUND(2, 1, eax, ecx, edi, edx)
362  ROUND(3, 1, ecx, eax, edx, edi)
363  ROUND(4, 1, eax, ecx, edi, edx)
364  ROUND(5, 1, ecx, eax, edx, edi)
365  ROUND(6, 1, eax, ecx, edi, edx)
366  ROUND(7, 1, ecx, eax, edx, edi)
367  ROUND(8, 1, eax, ecx, edi, edx)
368  ROUND(9, 1, ecx, eax, edx, edi)
369  ROUND(10, 1, eax, ecx, edi, edx)
370  ROUND(11, 1, ecx, eax, edx, edi)
371  ROUND(12, 1, eax, ecx, edi, edx)
372  ROUND(13, 1, ecx, eax, edx, edi)
373  ROUND(14, 1, eax, ecx, edi, edx)
374  ROUND(15, 1, ecx, eax, edx, edi)
375  AS2( cmp WORD_REG(si), K_END)
376  ATT_NOPREFIX
377  ASJ( jb, 1, b)
378  INTEL_NOPREFIX
379 
380  AS2( mov WORD_REG(dx), DATA_SAVE)
381  AS2( add WORD_REG(dx), 64)
382  AS2( mov AS_REG_7, STATE_SAVE)
383  AS2( mov DATA_SAVE, WORD_REG(dx))
384 
385 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
386 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
387  AS2( test DWORD PTR K_END, 1)
388  ASJ( jz, 4, f)
389 #endif
390  AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16])
391  AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16])
392  AS2( paddd xmm1, E(0))
393  AS2( paddd xmm0, A(0))
394  AS2( movdqa [AS_REG_7+1*16], xmm1)
395  AS2( movdqa [AS_REG_7+0*16], xmm0)
396  AS2( cmp WORD_REG(dx), DATA_END)
397  ATT_NOPREFIX
398  ASJ( jb, 0, b)
399  INTEL_NOPREFIX
400 #endif
401 
402 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
403 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
404  ASJ( jmp, 5, f)
405  ASL(4) // non-SSE2
406 #endif
407  AS2( add [AS_REG_7+0*4], ecx) // A
408  AS2( add [AS_REG_7+4*4], edi) // E
409  AS2( mov eax, B(0))
410  AS2( mov ebx, C(0))
411  AS2( mov ecx, D(0))
412  AS2( add [AS_REG_7+1*4], eax)
413  AS2( add [AS_REG_7+2*4], ebx)
414  AS2( add [AS_REG_7+3*4], ecx)
415  AS2( mov eax, F(0))
416  AS2( mov ebx, G(0))
417  AS2( mov ecx, H(0))
418  AS2( add [AS_REG_7+5*4], eax)
419  AS2( add [AS_REG_7+6*4], ebx)
420  AS2( add [AS_REG_7+7*4], ecx)
421  AS2( mov ecx, AS_REG_7d)
422  AS2( cmp WORD_REG(dx), DATA_END)
423  ASJ( jb, 2, b)
424 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
425  ASL(5)
426 #endif
427 #endif
428 
429  AS_POP_IF86(sp)
430  AS_POP_IF86(bp)
431  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
432  AS_POP_IF86(bx)
433  #endif
434 
435 #ifdef CRYPTOPP_GENERATE_X64_MASM
436  add rsp, LOCALS_SIZE+8
437  pop rbp
438  pop rbx
439  pop rdi
440  pop rsi
441  ret
442  X86_SHA256_HashBlocks ENDP
443 #endif
444 
445 #ifdef __GNUC__
446  ATT_PREFIX
447  :
448  : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
449  #if CRYPTOPP_BOOL_X64
450  , "m" (workspace[0])
451  #endif
452  : "memory", "cc", "%eax"
453  #if CRYPTOPP_BOOL_X64
454  , "%rbx", "%r8", "%r10"
455  #endif
456  );
457 #endif
458 }
459 
460 #endif // (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM))
461 
462 #ifndef CRYPTOPP_GENERATE_X64_MASM
463 
464 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
465 extern "C" {
466 void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len);
467 }
468 #endif
469 
470 #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM)
471 
472 size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
473 {
474  X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2());
475  return length % BLOCKSIZE;
476 }
477 
478 size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
479 {
480  X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2());
481  return length % BLOCKSIZE;
482 }
483 
484 #endif
485 
486 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
487 
488 #define Ch(x,y,z) (z^(x&(y^z)))
489 #define Maj(x,y,z) (y^((x^y)&(y^z)))
490 
491 #define a(i) T[(0-i)&7]
492 #define b(i) T[(1-i)&7]
493 #define c(i) T[(2-i)&7]
494 #define d(i) T[(3-i)&7]
495 #define e(i) T[(4-i)&7]
496 #define f(i) T[(5-i)&7]
497 #define g(i) T[(6-i)&7]
498 #define h(i) T[(7-i)&7]
499 
500 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
501  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
502 
503 // for SHA256
504 #define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
505 #define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
506 #define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
507 #define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
508 
509 void SHA256::Transform(word32 *state, const word32 *data)
510 {
511  word32 W[16];
512 #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM)
513  // this byte reverse is a waste of time, but this function is only called by MDC
514  ByteReverse(W, data, BLOCKSIZE);
515  X86_SHA256_HashBlocks(state, W, BLOCKSIZE - !HasSSE2());
516 #else
517  word32 T[8];
518  /* Copy context->state[] to working vars */
519  memcpy(T, state, sizeof(T));
520  /* 64 operations, partially loop unrolled */
521  for (unsigned int j=0; j<64; j+=16)
522  {
523  R( 0); R( 1); R( 2); R( 3);
524  R( 4); R( 5); R( 6); R( 7);
525  R( 8); R( 9); R(10); R(11);
526  R(12); R(13); R(14); R(15);
527  }
528  /* Add the working vars back into context.state[] */
529  state[0] += a(0);
530  state[1] += b(0);
531  state[2] += c(0);
532  state[3] += d(0);
533  state[4] += e(0);
534  state[5] += f(0);
535  state[6] += g(0);
536  state[7] += h(0);
537 #endif
538 }
539 
540 /*
541 // smaller but slower
542 void SHA256::Transform(word32 *state, const word32 *data)
543 {
544  word32 T[20];
545  word32 W[32];
546  unsigned int i = 0, j = 0;
547  word32 *t = T+8;
548 
549  memcpy(t, state, 8*4);
550  word32 e = t[4], a = t[0];
551 
552  do
553  {
554  word32 w = data[j];
555  W[j] = w;
556  w += SHA256_K[j];
557  w += t[7];
558  w += S1(e);
559  w += Ch(e, t[5], t[6]);
560  e = t[3] + w;
561  t[3] = t[3+8] = e;
562  w += S0(t[0]);
563  a = w + Maj(a, t[1], t[2]);
564  t[-1] = t[7] = a;
565  --t;
566  ++j;
567  if (j%8 == 0)
568  t += 8;
569  } while (j<16);
570 
571  do
572  {
573  i = j&0xf;
574  word32 w = s1(W[i+16-2]) + s0(W[i+16-15]) + W[i] + W[i+16-7];
575  W[i+16] = W[i] = w;
576  w += SHA256_K[j];
577  w += t[7];
578  w += S1(e);
579  w += Ch(e, t[5], t[6]);
580  e = t[3] + w;
581  t[3] = t[3+8] = e;
582  w += S0(t[0]);
583  a = w + Maj(a, t[1], t[2]);
584  t[-1] = t[7] = a;
585 
586  w = s1(W[(i+1)+16-2]) + s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7];
587  W[(i+1)+16] = W[(i+1)] = w;
588  w += SHA256_K[j+1];
589  w += (t-1)[7];
590  w += S1(e);
591  w += Ch(e, (t-1)[5], (t-1)[6]);
592  e = (t-1)[3] + w;
593  (t-1)[3] = (t-1)[3+8] = e;
594  w += S0((t-1)[0]);
595  a = w + Maj(a, (t-1)[1], (t-1)[2]);
596  (t-1)[-1] = (t-1)[7] = a;
597 
598  t-=2;
599  j+=2;
600  if (j%8 == 0)
601  t += 8;
602  } while (j<64);
603 
604  state[0] += a;
605  state[1] += t[1];
606  state[2] += t[2];
607  state[3] += t[3];
608  state[4] += e;
609  state[5] += t[5];
610  state[6] += t[6];
611  state[7] += t[7];
612 }
613 */
614 
615 #undef S0
616 #undef S1
617 #undef s0
618 #undef s1
619 #undef R
620 
621 // *************************************************************
622 
623 void SHA384::InitState(HashWordType *state)
624 {
625  static const word64 s[8] = {
626  W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507),
627  W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939),
628  W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511),
629  W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)};
630  memcpy(state, s, sizeof(s));
631 }
632 
633 void SHA512::InitState(HashWordType *state)
634 {
635  static const word64 s[8] = {
636  W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b),
637  W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1),
638  W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f),
639  W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)};
640  memcpy(state, s, sizeof(s));
641 }
642 
643 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
644 CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = {
645 #else
646 static const word64 SHA512_K[80] = {
647 #endif
648  W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
649  W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
650  W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
651  W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
652  W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
653  W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
654  W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
655  W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
656  W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
657  W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
658  W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
659  W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
660  W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
661  W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
662  W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
663  W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
664  W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
665  W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
666  W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
667  W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
668  W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
669  W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
670  W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
671  W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
672  W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
673  W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
674  W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
675  W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
676  W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
677  W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
678  W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
679  W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
680  W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
681  W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
682  W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
683  W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
684  W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
685  W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
686  W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
687  W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
688 };
689 
690 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
691 // put assembly version in separate function, otherwise MSVC 2005 SP1 doesn't generate correct code for the non-assembly version
692 CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state, const word64 *data)
693 {
694 #ifdef __GNUC__
695  __asm__ __volatile__
696  (
697  INTEL_NOPREFIX
698  AS_PUSH_IF86( bx)
699  AS2( mov ebx, eax)
700 #else
701  AS1( push ebx)
702  AS1( push esi)
703  AS1( push edi)
704  AS2( lea ebx, SHA512_K)
705 #endif
706 
707  AS2( mov eax, esp)
708  AS2( and esp, 0xfffffff0)
709  AS2( sub esp, 27*16) // 17*16 for expanded data, 20*8 for state
710  AS_PUSH_IF86( ax)
711  AS2( xor eax, eax)
712 
713 #if CRYPTOPP_BOOL_X32
714  AS2( lea edi, [esp+8+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
715  AS2( lea esi, [esp+8+20*8+8]) // 16-byte alignment, then add 8
716 #else
717  AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
718  AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8
719 #endif
720 
721  AS2( movdqa xmm0, [ecx+0*16])
722  AS2( movdq2q mm4, xmm0)
723  AS2( movdqa [edi+0*16], xmm0)
724  AS2( movdqa xmm0, [ecx+1*16])
725  AS2( movdqa [edi+1*16], xmm0)
726  AS2( movdqa xmm0, [ecx+2*16])
727  AS2( movdq2q mm5, xmm0)
728  AS2( movdqa [edi+2*16], xmm0)
729  AS2( movdqa xmm0, [ecx+3*16])
730  AS2( movdqa [edi+3*16], xmm0)
731  ASJ( jmp, 0, f)
732 
733 #define SSE2_S0_S1(r, a, b, c) \
734  AS2( movq mm6, r)\
735  AS2( psrlq r, a)\
736  AS2( movq mm7, r)\
737  AS2( psllq mm6, 64-c)\
738  AS2( pxor mm7, mm6)\
739  AS2( psrlq r, b-a)\
740  AS2( pxor mm7, r)\
741  AS2( psllq mm6, c-b)\
742  AS2( pxor mm7, mm6)\
743  AS2( psrlq r, c-b)\
744  AS2( pxor r, mm7)\
745  AS2( psllq mm6, b-a)\
746  AS2( pxor r, mm6)
747 
748 #define SSE2_s0(r, a, b, c) \
749  AS2( movdqa xmm6, r)\
750  AS2( psrlq r, a)\
751  AS2( movdqa xmm7, r)\
752  AS2( psllq xmm6, 64-c)\
753  AS2( pxor xmm7, xmm6)\
754  AS2( psrlq r, b-a)\
755  AS2( pxor xmm7, r)\
756  AS2( psrlq r, c-b)\
757  AS2( pxor r, xmm7)\
758  AS2( psllq xmm6, c-a)\
759  AS2( pxor r, xmm6)
760 
761 #define SSE2_s1(r, a, b, c) \
762  AS2( movdqa xmm6, r)\
763  AS2( psrlq r, a)\
764  AS2( movdqa xmm7, r)\
765  AS2( psllq xmm6, 64-c)\
766  AS2( pxor xmm7, xmm6)\
767  AS2( psrlq r, b-a)\
768  AS2( pxor xmm7, r)\
769  AS2( psllq xmm6, c-b)\
770  AS2( pxor xmm7, xmm6)\
771  AS2( psrlq r, c-b)\
772  AS2( pxor r, xmm7)
773 
774  ASL(SHA512_Round)
775  // k + w is in mm0, a is in mm4, e is in mm5
776  AS2( paddq mm0, [edi+7*8]) // h
777  AS2( movq mm2, [edi+5*8]) // f
778  AS2( movq mm3, [edi+6*8]) // g
779  AS2( pxor mm2, mm3)
780  AS2( pand mm2, mm5)
781  SSE2_S0_S1(mm5,14,18,41)
782  AS2( pxor mm2, mm3)
783  AS2( paddq mm0, mm2) // h += Ch(e,f,g)
784  AS2( paddq mm5, mm0) // h += S1(e)
785  AS2( movq mm2, [edi+1*8]) // b
786  AS2( movq mm1, mm2)
787  AS2( por mm2, mm4)
788  AS2( pand mm2, [edi+2*8]) // c
789  AS2( pand mm1, mm4)
790  AS2( por mm1, mm2)
791  AS2( paddq mm1, mm5) // temp = h + Maj(a,b,c)
792  AS2( paddq mm5, [edi+3*8]) // e = d + h
793  AS2( movq [edi+3*8], mm5)
794  AS2( movq [edi+11*8], mm5)
795  SSE2_S0_S1(mm4,28,34,39) // S0(a)
796  AS2( paddq mm4, mm1) // a = temp + S0(a)
797  AS2( movq [edi-8], mm4)
798  AS2( movq [edi+7*8], mm4)
799  AS1( ret)
800 
801  // first 16 rounds
802  ASL(0)
803  AS2( movq mm0, [edx+eax*8])
804  AS2( movq [esi+eax*8], mm0)
805  AS2( movq [esi+eax*8+16*8], mm0)
806  AS2( paddq mm0, [ebx+eax*8])
807  ASC( call, SHA512_Round)
808  AS1( inc eax)
809  AS2( sub edi, 8)
810  AS2( test eax, 7)
811  ASJ( jnz, 0, b)
812  AS2( add edi, 8*8)
813  AS2( cmp eax, 16)
814  ASJ( jne, 0, b)
815 
816  // rest of the rounds
817  AS2( movdqu xmm0, [esi+(16-2)*8])
818  ASL(1)
819  // data expansion, W[i-2] already in xmm0
820  AS2( movdqu xmm3, [esi])
821  AS2( paddq xmm3, [esi+(16-7)*8])
822  AS2( movdqa xmm2, [esi+(16-15)*8])
823  SSE2_s1(xmm0, 6, 19, 61)
824  AS2( paddq xmm0, xmm3)
825  SSE2_s0(xmm2, 1, 7, 8)
826  AS2( paddq xmm0, xmm2)
827  AS2( movdq2q mm0, xmm0)
828  AS2( movhlps xmm1, xmm0)
829  AS2( paddq mm0, [ebx+eax*8])
830  AS2( movlps [esi], xmm0)
831  AS2( movlps [esi+8], xmm1)
832  AS2( movlps [esi+8*16], xmm0)
833  AS2( movlps [esi+8*17], xmm1)
834  // 2 rounds
835  ASC( call, SHA512_Round)
836  AS2( sub edi, 8)
837  AS2( movdq2q mm0, xmm1)
838  AS2( paddq mm0, [ebx+eax*8+8])
839  ASC( call, SHA512_Round)
840  // update indices and loop
841  AS2( add esi, 16)
842  AS2( add eax, 2)
843  AS2( sub edi, 8)
844  AS2( test eax, 7)
845  ASJ( jnz, 1, b)
846  // do housekeeping every 8 rounds
847  AS2( mov esi, 0xf)
848  AS2( and esi, eax)
849 #if CRYPTOPP_BOOL_X32
850  AS2( lea esi, [esp+8+20*8+8+esi*8])
851 #else
852  AS2( lea esi, [esp+4+20*8+8+esi*8])
853 #endif
854  AS2( add edi, 8*8)
855  AS2( cmp eax, 80)
856  ASJ( jne, 1, b)
857 
858 #define SSE2_CombineState(i) \
859  AS2( movdqa xmm0, [edi+i*16])\
860  AS2( paddq xmm0, [ecx+i*16])\
861  AS2( movdqa [ecx+i*16], xmm0)
862 
863  SSE2_CombineState(0)
864  SSE2_CombineState(1)
865  SSE2_CombineState(2)
866  SSE2_CombineState(3)
867 
868  AS_POP_IF86( sp)
869  AS1( emms)
870 
871 #if defined(__GNUC__)
872  AS_POP_IF86( bx)
873  ATT_PREFIX
874  :
875  : "a" (SHA512_K), "c" (state), "d" (data)
876  : "%esi", "%edi", "memory", "cc"
877  );
878 #else
879  AS1( pop edi)
880  AS1( pop esi)
881  AS1( pop ebx)
882  AS1( ret)
883 #endif
884 }
885 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
886 
887 void SHA512::Transform(word64 *state, const word64 *data)
888 {
889 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
890  if (HasSSE2())
891  {
892  SHA512_SSE2_Transform(state, data);
893  return;
894  }
895 #endif
896 
897 #define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39))
898 #define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41))
899 #define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7))
900 #define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
901 
902 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+(j?blk2(i):blk0(i));\
903  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
904 
905  word64 W[16];
906  word64 T[8];
907  /* Copy context->state[] to working vars */
908  memcpy(T, state, sizeof(T));
909  /* 80 operations, partially loop unrolled */
910  for (unsigned int j=0; j<80; j+=16)
911  {
912  R( 0); R( 1); R( 2); R( 3);
913  R( 4); R( 5); R( 6); R( 7);
914  R( 8); R( 9); R(10); R(11);
915  R(12); R(13); R(14); R(15);
916  }
917  /* Add the working vars back into context.state[] */
918  state[0] += a(0);
919  state[1] += b(0);
920  state[2] += c(0);
921  state[3] += d(0);
922  state[4] += e(0);
923  state[5] += f(0);
924  state[6] += g(0);
925  state[7] += h(0);
926 }
927 
928 NAMESPACE_END
929 
930 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
931 #endif // #ifndef CRYPTOPP_IMPORTS
Utility functions for the Crypto++ library.
Library configuration file.
Classes and functions for secure memory allocations.
Fixed size stack-based SecBlock with 16-byte alignment.
Definition: secblock.h:763
Functions for CPU features and intrinsics.
Classes for SHA-1 and SHA-2 family of message digests.
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:236
Crypto++ library namespace.
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:1687