Crypto++  5.6.5
Free C++ class library of cryptographic schemes
sha.cpp
1 // sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c
2 
3 // Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. Jeffrey Walton
4 // implemented Intel SHA extensions based on Intel articles and code by
5 // Sean Gulley. Jeffrey Walton implemented ARM SHA based on ARM code and
6 // code from Johannes Schneiders, Skip Hovsmith and Barry O'Rourke.
7 // All code is in the public domain.
8 
9 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code
10 
11 #include "pch.h"
12 #include "config.h"
13 
14 #if CRYPTOPP_MSC_VERSION
15 # pragma warning(disable: 4100 4731)
16 #endif
17 
18 #ifndef CRYPTOPP_IMPORTS
19 #ifndef CRYPTOPP_GENERATE_X64_MASM
20 
21 #include "secblock.h"
22 #include "sha.h"
23 #include "misc.h"
24 #include "cpu.h"
25 
26 #if defined(CRYPTOPP_DISABLE_SHA_ASM)
27 # undef CRYPTOPP_X86_ASM_AVAILABLE
28 # undef CRYPTOPP_X32_ASM_AVAILABLE
29 # undef CRYPTOPP_X64_ASM_AVAILABLE
30 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
31 #endif
32 
33 NAMESPACE_BEGIN(CryptoPP)
34 
35 // Function pointer for specific SHA1 or SHA256 Transform function
36 typedef void (*pfnSHATransform)(word32 *state, const word32 *data);
37 typedef void (CRYPTOPP_FASTCALL *pfnSHAHashBlocks)(word32 *state, const word32 *data, size_t length);
38 
39 ////////////////////////////////
40 // start of Steve Reid's code //
41 ////////////////////////////////
42 
43 #define blk0(i) (W[i] = data[i])
44 #define blk1(i) (W[i&15] = rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1))
45 
46 #define f1(x,y,z) (z^(x&(y^z)))
47 #define f2(x,y,z) (x^y^z)
48 #define f3(x,y,z) ((x&y)|(z&(x|y)))
49 #define f4(x,y,z) (x^y^z)
50 
51 /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
52 #define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30);
53 #define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30);
54 #define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlFixed(v,5);w=rotlFixed(w,30);
55 #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlFixed(v,5);w=rotlFixed(w,30);
56 #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlFixed(v,5);w=rotlFixed(w,30);
57 
58 static void SHA1_CXX_Transform(word32 *state, const word32 *data)
59 {
60  word32 W[16];
61  /* Copy context->state[] to working vars */
62  word32 a = state[0];
63  word32 b = state[1];
64  word32 c = state[2];
65  word32 d = state[3];
66  word32 e = state[4];
67  /* 4 rounds of 20 operations each. Loop unrolled. */
68  R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
69  R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
70  R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
71  R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
72  R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
73  R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
74  R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
75  R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
76  R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
77  R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
78  R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
79  R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
80  R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
81  R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
82  R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
83  R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
84  R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
85  R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
86  R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
87  R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
88  /* Add the working vars back into context.state[] */
89  state[0] += a;
90  state[1] += b;
91  state[2] += c;
92  state[3] += d;
93  state[4] += e;
94 }
95 
96 //////////////////////////////
97 // end of Steve Reid's code //
98 //////////////////////////////
99 
100 ///////////////////////////////////
101 // start of Walton/Gulley's code //
102 ///////////////////////////////////
103 
104 #if CRYPTOPP_SSE_SHA_AVAILABLE
105 // Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
106 static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data)
107 {
108  __m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
109  __m128i MASK, MSG0, MSG1, MSG2, MSG3;
110 
111  // IteratedHashBase<T> has code to perform this step before HashEndianCorrectedBlock()
112  // is called, but the design does not lend itself to optional hardware components
113  // where SHA1 needs reversing, but SHA256 does not.
114  word32* dataBuf = const_cast<word32*>(data);
115  ByteReverse(dataBuf, dataBuf, 64);
116 
117  // Load initial values
118  ABCD = _mm_loadu_si128((__m128i*) state);
119  E0 = _mm_set_epi32(state[4], 0, 0, 0);
120  ABCD = MM_SHUFFLE_EPI32(ABCD, 0x1B);
121  MASK = _mm_set_epi64x(W64LIT(0x0001020304050607), W64LIT(0x08090a0b0c0d0e0f));
122 
123  // Save current hash
124  ABCD_SAVE = ABCD;
125  E0_SAVE = E0;
126 
127  // Rounds 0-3
128  MSG0 = _mm_loadu_si128((__m128i*) data+0);
129  MSG0 = MM_SHUFFLE_EPI8(MSG0, MASK);
130  E0 = _mm_add_epi32(E0, MSG0);
131  E1 = ABCD;
132  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E0, 0);
133 
134  // Rounds 4-7
135  MSG1 = _mm_loadu_si128((__m128i*) (data+4));
136  MSG1 = MM_SHUFFLE_EPI8(MSG1, MASK);
137  E1 = MM_SHA1NEXTE_EPU32(E1, MSG1);
138  E0 = ABCD;
139  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E1, 0);
140  MSG0 = MM_SHA1MSG1_EPU32(MSG0, MSG1);
141 
142  // Rounds 8-11
143  MSG2 = _mm_loadu_si128((__m128i*) (data+8));
144  MSG2 = MM_SHUFFLE_EPI8(MSG2, MASK);
145  E0 = MM_SHA1NEXTE_EPU32(E0, MSG2);
146  E1 = ABCD;
147  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E0, 0);
148  MSG1 = MM_SHA1MSG1_EPU32(MSG1, MSG2);
149  MSG0 = _mm_xor_si128(MSG0, MSG2);
150 
151  // Rounds 12-15
152  MSG3 = _mm_loadu_si128((__m128i*) (data+12));
153  MSG3 = MM_SHUFFLE_EPI8(MSG3, MASK);
154  E1 = MM_SHA1NEXTE_EPU32(E1, MSG3);
155  E0 = ABCD;
156  MSG0 = MM_SHA1MSG2_EPU32(MSG0, MSG3);
157  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E1, 0);
158  MSG2 = MM_SHA1MSG1_EPU32(MSG2, MSG3);
159  MSG1 = _mm_xor_si128(MSG1, MSG3);
160 
161  // Rounds 16-19
162  E0 = MM_SHA1NEXTE_EPU32(E0, MSG0);
163  E1 = ABCD;
164  MSG1 = MM_SHA1MSG2_EPU32(MSG1, MSG0);
165  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E0, 0);
166  MSG3 = MM_SHA1MSG1_EPU32(MSG3, MSG0);
167  MSG2 = _mm_xor_si128(MSG2, MSG0);
168 
169  // Rounds 20-23
170  E1 = MM_SHA1NEXTE_EPU32(E1, MSG1);
171  E0 = ABCD;
172  MSG2 = MM_SHA1MSG2_EPU32(MSG2, MSG1);
173  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E1, 1);
174  MSG0 = MM_SHA1MSG1_EPU32(MSG0, MSG1);
175  MSG3 = _mm_xor_si128(MSG3, MSG1);
176 
177  // Rounds 24-27
178  E0 = MM_SHA1NEXTE_EPU32(E0, MSG2);
179  E1 = ABCD;
180  MSG3 = MM_SHA1MSG2_EPU32(MSG3, MSG2);
181  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E0, 1);
182  MSG1 = MM_SHA1MSG1_EPU32(MSG1, MSG2);
183  MSG0 = _mm_xor_si128(MSG0, MSG2);
184 
185  // Rounds 28-31
186  E1 = MM_SHA1NEXTE_EPU32(E1, MSG3);
187  E0 = ABCD;
188  MSG0 = MM_SHA1MSG2_EPU32(MSG0, MSG3);
189  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E1, 1);
190  MSG2 = MM_SHA1MSG1_EPU32(MSG2, MSG3);
191  MSG1 = _mm_xor_si128(MSG1, MSG3);
192 
193  // Rounds 32-35
194  E0 = MM_SHA1NEXTE_EPU32(E0, MSG0);
195  E1 = ABCD;
196  MSG1 = MM_SHA1MSG2_EPU32(MSG1, MSG0);
197  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E0, 1);
198  MSG3 = MM_SHA1MSG1_EPU32(MSG3, MSG0);
199  MSG2 = _mm_xor_si128(MSG2, MSG0);
200 
201  // Rounds 36-39
202  E1 = MM_SHA1NEXTE_EPU32(E1, MSG1);
203  E0 = ABCD;
204  MSG2 = MM_SHA1MSG2_EPU32(MSG2, MSG1);
205  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E1, 1);
206  MSG0 = MM_SHA1MSG1_EPU32(MSG0, MSG1);
207  MSG3 = _mm_xor_si128(MSG3, MSG1);
208 
209  // Rounds 40-43
210  E0 = MM_SHA1NEXTE_EPU32(E0, MSG2);
211  E1 = ABCD;
212  MSG3 = MM_SHA1MSG2_EPU32(MSG3, MSG2);
213  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E0, 2);
214  MSG1 = MM_SHA1MSG1_EPU32(MSG1, MSG2);
215  MSG0 = _mm_xor_si128(MSG0, MSG2);
216 
217  // Rounds 44-47
218  E1 = MM_SHA1NEXTE_EPU32(E1, MSG3);
219  E0 = ABCD;
220  MSG0 = MM_SHA1MSG2_EPU32(MSG0, MSG3);
221  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E1, 2);
222  MSG2 = MM_SHA1MSG1_EPU32(MSG2, MSG3);
223  MSG1 = _mm_xor_si128(MSG1, MSG3);
224 
225  // Rounds 48-51
226  E0 = MM_SHA1NEXTE_EPU32(E0, MSG0);
227  E1 = ABCD;
228  MSG1 = MM_SHA1MSG2_EPU32(MSG1, MSG0);
229  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E0, 2);
230  MSG3 = MM_SHA1MSG1_EPU32(MSG3, MSG0);
231  MSG2 = _mm_xor_si128(MSG2, MSG0);
232 
233  // Rounds 52-55
234  E1 = MM_SHA1NEXTE_EPU32(E1, MSG1);
235  E0 = ABCD;
236  MSG2 = MM_SHA1MSG2_EPU32(MSG2, MSG1);
237  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E1, 2);
238  MSG0 = MM_SHA1MSG1_EPU32(MSG0, MSG1);
239  MSG3 = _mm_xor_si128(MSG3, MSG1);
240 
241  // Rounds 56-59
242  E0 = MM_SHA1NEXTE_EPU32(E0, MSG2);
243  E1 = ABCD;
244  MSG3 = MM_SHA1MSG2_EPU32(MSG3, MSG2);
245  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E0, 2);
246  MSG1 = MM_SHA1MSG1_EPU32(MSG1, MSG2);
247  MSG0 = _mm_xor_si128(MSG0, MSG2);
248 
249  // Rounds 60-63
250  E1 = MM_SHA1NEXTE_EPU32(E1, MSG3);
251  E0 = ABCD;
252  MSG0 = MM_SHA1MSG2_EPU32(MSG0, MSG3);
253  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E1, 3);
254  MSG2 = MM_SHA1MSG1_EPU32(MSG2, MSG3);
255  MSG1 = _mm_xor_si128(MSG1, MSG3);
256 
257  // Rounds 64-67
258  E0 = MM_SHA1NEXTE_EPU32(E0, MSG0);
259  E1 = ABCD;
260  MSG1 = MM_SHA1MSG2_EPU32(MSG1, MSG0);
261  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E0, 3);
262  MSG3 = MM_SHA1MSG1_EPU32(MSG3, MSG0);
263  MSG2 = _mm_xor_si128(MSG2, MSG0);
264 
265  // Rounds 68-71
266  E1 = MM_SHA1NEXTE_EPU32(E1, MSG1);
267  E0 = ABCD;
268  MSG2 = MM_SHA1MSG2_EPU32(MSG2, MSG1);
269  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E1, 3);
270  MSG3 = _mm_xor_si128(MSG3, MSG1);
271 
272  // Rounds 72-75
273  E0 = MM_SHA1NEXTE_EPU32(E0, MSG2);
274  E1 = ABCD;
275  MSG3 = MM_SHA1MSG2_EPU32(MSG3, MSG2);
276  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E0, 3);
277 
278  // Rounds 76-79
279  E1 = MM_SHA1NEXTE_EPU32(E1, MSG3);
280  E0 = ABCD;
281  ABCD = MM_SHA1RNDS4_EPU32(ABCD, E1, 3);
282 
283  // Add values back to state
284  E0 = MM_SHA1NEXTE_EPU32(E0, E0_SAVE);
285  ABCD = _mm_add_epi32(ABCD, ABCD_SAVE);
286 
287  // Save state
288  ABCD = MM_SHUFFLE_EPI32(ABCD, 0x1B);
289  _mm_storeu_si128((__m128i*) state, ABCD);
290  state[4] = MM_EXTRACT_EPI32(E0, 3);
291 }
292 #endif
293 
294 /////////////////////////////////
295 // end of Walton/Gulley's code //
296 /////////////////////////////////
297 
298 //////////////////////////////////////////////////////////////
299 // start of Walton/Schneiders/O'Rourke/Skip Hovsmith's code //
300 //////////////////////////////////////////////////////////////
301 
302 #if CRYPTOPP_ARM_CRYPTO_AVAILABLE
303 static void SHA1_ARM_SHA_Transform(word32 *state, const word32 *data)
304 {
305  uint32x4_t C0, C1, C2, C3;
306  uint32x4_t ABCD, ABCD_SAVED;
307  uint32x4_t MSG0, MSG1, MSG2, MSG3;
308  uint32x4_t TMP0, TMP1;
309  uint32_t E0, E0_SAVED, E1;
310 
311  // Load initial values
312  C0 = vdupq_n_u32(0x5A827999);
313  C1 = vdupq_n_u32(0x6ED9EBA1);
314  C2 = vdupq_n_u32(0x8F1BBCDC);
315  C3 = vdupq_n_u32(0xCA62C1D6);
316 
317  ABCD = vld1q_u32(&state[0]);
318  E0 = state[4];
319 
320  // Save current hash
321  ABCD_SAVED = ABCD;
322  E0_SAVED = E0;
323 
324  MSG0 = vld1q_u32(data + 0);
325  MSG1 = vld1q_u32(data + 4);
326  MSG2 = vld1q_u32(data + 8);
327  MSG3 = vld1q_u32(data + 12);
328 
329  TMP0 = vaddq_u32(MSG0, C0);
330  TMP1 = vaddq_u32(MSG1, C0);
331 
332  // Rounds 0-3
333  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
334  ABCD = vsha1cq_u32(ABCD, E0, TMP0);
335  TMP0 = vaddq_u32(MSG2, C0);
336  MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
337 
338  // Rounds 4-7
339  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
340  ABCD = vsha1cq_u32(ABCD, E1, TMP1);
341  TMP1 = vaddq_u32(MSG3, C0);
342  MSG0 = vsha1su1q_u32(MSG0, MSG3);
343  MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
344 
345  // Rounds 8-11
346  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
347  ABCD = vsha1cq_u32(ABCD, E0, TMP0);
348  TMP0 = vaddq_u32(MSG0, C0);
349  MSG1 = vsha1su1q_u32(MSG1, MSG0);
350  MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
351 
352  // Rounds 12-15
353  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
354  ABCD = vsha1cq_u32(ABCD, E1, TMP1);
355  TMP1 = vaddq_u32(MSG1, C1);
356  MSG2 = vsha1su1q_u32(MSG2, MSG1);
357  MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
358 
359  // Rounds 16-19
360  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
361  ABCD = vsha1cq_u32(ABCD, E0, TMP0);
362  TMP0 = vaddq_u32(MSG2, C1);
363  MSG3 = vsha1su1q_u32(MSG3, MSG2);
364  MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
365 
366  // Rounds 20-23
367  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
368  ABCD = vsha1pq_u32(ABCD, E1, TMP1);
369  TMP1 = vaddq_u32(MSG3, C1);
370  MSG0 = vsha1su1q_u32(MSG0, MSG3);
371  MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
372 
373  // Rounds 24-27
374  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
375  ABCD = vsha1pq_u32(ABCD, E0, TMP0);
376  TMP0 = vaddq_u32(MSG0, C1);
377  MSG1 = vsha1su1q_u32(MSG1, MSG0);
378  MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
379 
380  // Rounds 28-31
381  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
382  ABCD = vsha1pq_u32(ABCD, E1, TMP1);
383  TMP1 = vaddq_u32(MSG1, C1);
384  MSG2 = vsha1su1q_u32(MSG2, MSG1);
385  MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
386 
387  // Rounds 32-35
388  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
389  ABCD = vsha1pq_u32(ABCD, E0, TMP0);
390  TMP0 = vaddq_u32(MSG2, C2);
391  MSG3 = vsha1su1q_u32(MSG3, MSG2);
392  MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
393 
394  // Rounds 36-39
395  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
396  ABCD = vsha1pq_u32(ABCD, E1, TMP1);
397  TMP1 = vaddq_u32(MSG3, C2);
398  MSG0 = vsha1su1q_u32(MSG0, MSG3);
399  MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
400 
401  // Rounds 40-43
402  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
403  ABCD = vsha1mq_u32(ABCD, E0, TMP0);
404  TMP0 = vaddq_u32(MSG0, C2);
405  MSG1 = vsha1su1q_u32(MSG1, MSG0);
406  MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
407 
408  // Rounds 44-47
409  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
410  ABCD = vsha1mq_u32(ABCD, E1, TMP1);
411  TMP1 = vaddq_u32(MSG1, C2);
412  MSG2 = vsha1su1q_u32(MSG2, MSG1);
413  MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
414 
415  // Rounds 48-51
416  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
417  ABCD = vsha1mq_u32(ABCD, E0, TMP0);
418  TMP0 = vaddq_u32(MSG2, C2);
419  MSG3 = vsha1su1q_u32(MSG3, MSG2);
420  MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
421 
422  // Rounds 52-55
423  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
424  ABCD = vsha1mq_u32(ABCD, E1, TMP1);
425  TMP1 = vaddq_u32(MSG3, C3);
426  MSG0 = vsha1su1q_u32(MSG0, MSG3);
427  MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
428 
429  // Rounds 56-59
430  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
431  ABCD = vsha1mq_u32(ABCD, E0, TMP0);
432  TMP0 = vaddq_u32(MSG0, C3);
433  MSG1 = vsha1su1q_u32(MSG1, MSG0);
434  MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
435 
436  // Rounds 60-63
437  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
438  ABCD = vsha1pq_u32(ABCD, E1, TMP1);
439  TMP1 = vaddq_u32(MSG1, C3);
440  MSG2 = vsha1su1q_u32(MSG2, MSG1);
441  MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
442 
443  // Rounds 64-67
444  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
445  ABCD = vsha1pq_u32(ABCD, E0, TMP0);
446  TMP0 = vaddq_u32(MSG2, C3);
447  MSG3 = vsha1su1q_u32(MSG3, MSG2);
448  MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
449 
450  // Rounds 68-71
451  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
452  ABCD = vsha1pq_u32(ABCD, E1, TMP1);
453  TMP1 = vaddq_u32(MSG3, C3);
454  MSG0 = vsha1su1q_u32(MSG0, MSG3);
455 
456  // Rounds 72-75
457  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
458  ABCD = vsha1pq_u32(ABCD, E0, TMP0);
459 
460  // Rounds 76-79
461  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
462  ABCD = vsha1pq_u32(ABCD, E1, TMP1);
463 
464  E0 += E0_SAVED;
465  ABCD = vaddq_u32(ABCD_SAVED, ABCD);
466 
467  // Save state
468  vst1q_u32(&state[0], ABCD);
469  state[4] = E0;
470 }
471 #endif // CRYPTOPP_ARM_CRYPTO_AVAILABLE
472 
473 ///////////////////////////////////////////////////////
474 // end of Walton/Schneiders/O'Rourke/Hovsmith's code //
475 ///////////////////////////////////////////////////////
476 
477 pfnSHATransform InitializeSHA1Transform()
478 {
479 #if CRYPTOPP_SSE_SHA_AVAILABLE
480  if (HasSHA())
481  return &SHA1_SSE_SHA_Transform;
482  else
483 #endif
484 #if CRYPTOPP_ARM_CRYPTO_AVAILABLE
485  if (HasSHA1())
486  return &SHA1_ARM_SHA_Transform;
487  else
488 #endif
489  return &SHA1_CXX_Transform;
490 }
491 
492 void SHA1::InitState(HashWordType *state)
493 {
494  state[0] = 0x67452301L;
495  state[1] = 0xEFCDAB89L;
496  state[2] = 0x98BADCFEL;
497  state[3] = 0x10325476L;
498  state[4] = 0xC3D2E1F0L;
499 }
500 
501 void SHA1::Transform(word32 *state, const word32 *data)
502 {
503  static const pfnSHATransform s_pfn = InitializeSHA1Transform();
504  s_pfn(state, data);
505 }
506 
507 // *************************************************************
508 
509 void SHA224::InitState(HashWordType *state)
510 {
511  static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4};
512  memcpy(state, s, sizeof(s));
513 }
514 
515 void SHA256::InitState(HashWordType *state)
516 {
517  static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
518  memcpy(state, s, sizeof(s));
519 }
520 
521 #if CRYPTOPP_SSE2_ASM_AVAILABLE || CRYPTOPP_ARM_CRYPTO_AVAILABLE
522 CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = {
523 #else
524 extern const word32 SHA256_K[64] = {
525 #endif
526  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
527  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
528  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
529  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
530  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
531  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
532  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
533  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
534  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
535  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
536  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
537  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
538  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
539  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
540  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
541  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
542 };
543 
544 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
545 
546 #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM))
547 
548 static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len)
549 {
550  #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
551  #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
552  #define G(i) H(i+1)
553  #define F(i) H(i+2)
554  #define E(i) H(i+3)
555  #define D(i) H(i+4)
556  #define C(i) H(i+5)
557  #define B(i) H(i+6)
558  #define A(i) H(i+7)
559  #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
560  #define Wt_2(i) Wt((i)-2)
561  #define Wt_15(i) Wt((i)-15)
562  #define Wt_7(i) Wt((i)-7)
563  #define K_END [BASE+8*4+16*4+0*WORD_SZ]
564  #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
565  #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
566  #define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
567  #define Kt(i) WORD_REG(si)+(i)*4
568 #if CRYPTOPP_X32
569  #define BASE esp+8
570 #elif CRYPTOPP_X86
571  #define BASE esp+4
572 #elif defined(__GNUC__)
573  #define BASE r8
574 #else
575  #define BASE rsp
576 #endif
577 
578 #define RA0(i, edx, edi) \
579  AS2( add edx, [Kt(i)] )\
580  AS2( add edx, [Wt(i)] )\
581  AS2( add edx, H(i) )\
582 
583 #define RA1(i, edx, edi)
584 
585 #define RB0(i, edx, edi)
586 
587 #define RB1(i, edx, edi) \
588  AS2( mov AS_REG_7d, [Wt_2(i)] )\
589  AS2( mov edi, [Wt_15(i)])\
590  AS2( mov ebx, AS_REG_7d )\
591  AS2( shr AS_REG_7d, 10 )\
592  AS2( ror ebx, 17 )\
593  AS2( xor AS_REG_7d, ebx )\
594  AS2( ror ebx, 2 )\
595  AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
596  AS2( add ebx, [Wt_7(i)])\
597  AS2( mov AS_REG_7d, edi )\
598  AS2( shr AS_REG_7d, 3 )\
599  AS2( ror edi, 7 )\
600  AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
601  AS2( xor AS_REG_7d, edi )\
602  AS2( add edx, [Kt(i)])\
603  AS2( ror edi, 11 )\
604  AS2( add edx, H(i) )\
605  AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
606  AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
607  AS2( mov [Wt(i)], AS_REG_7d)\
608  AS2( add edx, AS_REG_7d )\
609 
610 #define ROUND(i, r, eax, ecx, edi, edx)\
611  /* in: edi = E */\
612  /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
613  AS2( mov edx, F(i) )\
614  AS2( xor edx, G(i) )\
615  AS2( and edx, edi )\
616  AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
617  AS2( mov AS_REG_7d, edi )\
618  AS2( ror edi, 6 )\
619  AS2( ror AS_REG_7d, 25 )\
620  RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
621  AS2( xor AS_REG_7d, edi )\
622  AS2( ror edi, 5 )\
623  AS2( xor AS_REG_7d, edi )/* S1(E) */\
624  AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
625  RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
626  /* in: ecx = A, eax = B^C, edx = T1 */\
627  /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
628  AS2( mov ebx, ecx )\
629  AS2( xor ecx, B(i) )/* A^B */\
630  AS2( and eax, ecx )\
631  AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
632  AS2( mov AS_REG_7d, ebx )\
633  AS2( ror ebx, 2 )\
634  AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
635  AS2( add edx, D(i) )\
636  AS2( mov D(i), edx )\
637  AS2( ror AS_REG_7d, 22 )\
638  AS2( xor AS_REG_7d, ebx )\
639  AS2( ror ebx, 11 )\
640  AS2( xor AS_REG_7d, ebx )\
641  AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
642  AS2( mov H(i), eax )\
643 
644 // Unroll the use of CRYPTOPP_X64 in assembler math. The GAS assembler on X32 (version 2.25)
645 // complains "Error: invalid operands (*ABS* and *UND* sections) for `*` and `-`"
646 #if CRYPTOPP_X64
647 #define SWAP_COPY(i) \
648  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
649  AS1( bswap WORD_REG(bx))\
650  AS2( mov [Wt(i*2+1)], WORD_REG(bx))
651 #else // X86 and X32
652 #define SWAP_COPY(i) \
653  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
654  AS1( bswap WORD_REG(bx))\
655  AS2( mov [Wt(i)], WORD_REG(bx))
656 #endif
657 
658 #if defined(__GNUC__)
659  #if CRYPTOPP_X64
661  #endif
662  __asm__ __volatile__
663  (
664  #if CRYPTOPP_X64
665  "lea %4, %%r8;"
666  #endif
667  INTEL_NOPREFIX
668 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
669  ALIGN 8
670  X86_SHA256_HashBlocks PROC FRAME
671  rex_push_reg rsi
672  push_reg rdi
673  push_reg rbx
674  push_reg rbp
675  alloc_stack(LOCALS_SIZE+8)
676  .endprolog
677  mov rdi, r8
678  lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
679 #endif
680 
681 #if CRYPTOPP_X86 || CRYPTOPP_X32
682  #ifndef __GNUC__
683  AS2( mov edi, [len])
684  AS2( lea WORD_REG(si), [SHA256_K+48*4])
685  #endif
686  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
687  AS_PUSH_IF86(bx)
688  #endif
689 
690  AS_PUSH_IF86(bp)
691  AS2( mov ebx, esp)
692  AS2( and esp, -16)
693  AS2( sub WORD_REG(sp), LOCALS_SIZE)
694  AS_PUSH_IF86(bx)
695 #endif
696  AS2( mov STATE_SAVE, WORD_REG(cx))
697  AS2( mov DATA_SAVE, WORD_REG(dx))
698  AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
699  AS2( mov DATA_END, WORD_REG(ax))
700  AS2( mov K_END, WORD_REG(si))
701 
702 #if CRYPTOPP_SSE2_ASM_AVAILABLE
703 #if CRYPTOPP_X86 || CRYPTOPP_X32
704  AS2( test edi, 1)
705  ASJ( jnz, 2, f)
706  AS1( dec DWORD PTR K_END)
707 #endif
708  AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
709  AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
710 #endif
711 
712 #if CRYPTOPP_X86 || CRYPTOPP_X32
713 #if CRYPTOPP_SSE2_ASM_AVAILABLE
714  ASJ( jmp, 0, f)
715 #endif
716  ASL(2) // non-SSE2
717  AS2( mov esi, ecx)
718  AS2( lea edi, A(0))
719  AS2( mov ecx, 8)
720 ATT_NOPREFIX
721  AS1( rep movsd)
722 INTEL_NOPREFIX
723  AS2( mov esi, K_END)
724  ASJ( jmp, 3, f)
725 #endif
726 
727 #if CRYPTOPP_SSE2_ASM_AVAILABLE
728  ASL(0)
729  AS2( movdqa E(0), xmm1)
730  AS2( movdqa A(0), xmm0)
731 #endif
732 #if CRYPTOPP_X86 || CRYPTOPP_X32
733  ASL(3)
734 #endif
735  AS2( sub WORD_REG(si), 48*4)
736  SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
737  SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
738 #if CRYPTOPP_X86 || CRYPTOPP_X32
739  SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
740  SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
741 #endif
742  AS2( mov edi, E(0)) // E
743  AS2( mov eax, B(0)) // B
744  AS2( xor eax, C(0)) // B^C
745  AS2( mov ecx, A(0)) // A
746 
747  ROUND(0, 0, eax, ecx, edi, edx)
748  ROUND(1, 0, ecx, eax, edx, edi)
749  ROUND(2, 0, eax, ecx, edi, edx)
750  ROUND(3, 0, ecx, eax, edx, edi)
751  ROUND(4, 0, eax, ecx, edi, edx)
752  ROUND(5, 0, ecx, eax, edx, edi)
753  ROUND(6, 0, eax, ecx, edi, edx)
754  ROUND(7, 0, ecx, eax, edx, edi)
755  ROUND(8, 0, eax, ecx, edi, edx)
756  ROUND(9, 0, ecx, eax, edx, edi)
757  ROUND(10, 0, eax, ecx, edi, edx)
758  ROUND(11, 0, ecx, eax, edx, edi)
759  ROUND(12, 0, eax, ecx, edi, edx)
760  ROUND(13, 0, ecx, eax, edx, edi)
761  ROUND(14, 0, eax, ecx, edi, edx)
762  ROUND(15, 0, ecx, eax, edx, edi)
763 
764  ASL(1)
765  AS2(add WORD_REG(si), 4*16)
766  ROUND(0, 1, eax, ecx, edi, edx)
767  ROUND(1, 1, ecx, eax, edx, edi)
768  ROUND(2, 1, eax, ecx, edi, edx)
769  ROUND(3, 1, ecx, eax, edx, edi)
770  ROUND(4, 1, eax, ecx, edi, edx)
771  ROUND(5, 1, ecx, eax, edx, edi)
772  ROUND(6, 1, eax, ecx, edi, edx)
773  ROUND(7, 1, ecx, eax, edx, edi)
774  ROUND(8, 1, eax, ecx, edi, edx)
775  ROUND(9, 1, ecx, eax, edx, edi)
776  ROUND(10, 1, eax, ecx, edi, edx)
777  ROUND(11, 1, ecx, eax, edx, edi)
778  ROUND(12, 1, eax, ecx, edi, edx)
779  ROUND(13, 1, ecx, eax, edx, edi)
780  ROUND(14, 1, eax, ecx, edi, edx)
781  ROUND(15, 1, ecx, eax, edx, edi)
782  AS2( cmp WORD_REG(si), K_END)
783  ATT_NOPREFIX
784  ASJ( jb, 1, b)
785  INTEL_NOPREFIX
786 
787  AS2( mov WORD_REG(dx), DATA_SAVE)
788  AS2( add WORD_REG(dx), 64)
789  AS2( mov AS_REG_7, STATE_SAVE)
790  AS2( mov DATA_SAVE, WORD_REG(dx))
791 
792 #if CRYPTOPP_SSE2_ASM_AVAILABLE
793 #if CRYPTOPP_X86 || CRYPTOPP_X32
794  AS2( test DWORD PTR K_END, 1)
795  ASJ( jz, 4, f)
796 #endif
797  AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16])
798  AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16])
799  AS2( paddd xmm1, E(0))
800  AS2( paddd xmm0, A(0))
801  AS2( movdqa [AS_REG_7+1*16], xmm1)
802  AS2( movdqa [AS_REG_7+0*16], xmm0)
803  AS2( cmp WORD_REG(dx), DATA_END)
804  ATT_NOPREFIX
805  ASJ( jb, 0, b)
806  INTEL_NOPREFIX
807 #endif
808 
809 #if CRYPTOPP_X86 || CRYPTOPP_X32
810 #if CRYPTOPP_SSE2_ASM_AVAILABLE
811  ASJ( jmp, 5, f)
812  ASL(4) // non-SSE2
813 #endif
814  AS2( add [AS_REG_7+0*4], ecx) // A
815  AS2( add [AS_REG_7+4*4], edi) // E
816  AS2( mov eax, B(0))
817  AS2( mov ebx, C(0))
818  AS2( mov ecx, D(0))
819  AS2( add [AS_REG_7+1*4], eax)
820  AS2( add [AS_REG_7+2*4], ebx)
821  AS2( add [AS_REG_7+3*4], ecx)
822  AS2( mov eax, F(0))
823  AS2( mov ebx, G(0))
824  AS2( mov ecx, H(0))
825  AS2( add [AS_REG_7+5*4], eax)
826  AS2( add [AS_REG_7+6*4], ebx)
827  AS2( add [AS_REG_7+7*4], ecx)
828  AS2( mov ecx, AS_REG_7d)
829  AS2( cmp WORD_REG(dx), DATA_END)
830  ASJ( jb, 2, b)
831 #if CRYPTOPP_SSE2_ASM_AVAILABLE
832  ASL(5)
833 #endif
834 #endif
835 
836  AS_POP_IF86(sp)
837  AS_POP_IF86(bp)
838  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
839  AS_POP_IF86(bx)
840  #endif
841 
842 #ifdef CRYPTOPP_GENERATE_X64_MASM
843  add rsp, LOCALS_SIZE+8
844  pop rbp
845  pop rbx
846  pop rdi
847  pop rsi
848  ret
849  X86_SHA256_HashBlocks ENDP
850 #endif
851 
852 #ifdef __GNUC__
853  ATT_PREFIX
854  :
855  : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
856  #if CRYPTOPP_X64
857  , "m" (workspace[0])
858  #endif
859  : "memory", "cc", "%eax"
860  #if CRYPTOPP_X64
861  , "%rbx", "%r8", "%r10"
862  #endif
863  );
864 #endif
865 }
866 
867 #endif // (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM))
868 
869 #ifndef CRYPTOPP_GENERATE_X64_MASM
870 
871 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
872 extern "C" {
873 void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len);
874 }
875 #endif
876 
877 #if CRYPTOPP_SSE_SHA_AVAILABLE
878 static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length);
879 #elif CRYPTOPP_ARM_CRYPTO_AVAILABLE
880 static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length);
881 #endif
882 
883 #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM)
884 
885 pfnSHAHashBlocks InitializeSHA256HashBlocks()
886 {
887 #if CRYPTOPP_SSE_SHA_AVAILABLE
888  if (HasSHA())
889  return &SHA256_SSE_SHA_HashBlocks;
890  else
891 #endif
892 #if CRYPTOPP_ARM_CRYPTO_AVAILABLE
893  if (HasSHA2())
894  return &SHA256_ARM_SHA_HashBlocks;
895  else
896 #endif
897 
898  return &X86_SHA256_HashBlocks;
899 }
900 
901 size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
902 {
903  static const pfnSHAHashBlocks s_pfn = InitializeSHA256HashBlocks();
904  s_pfn(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2());
905  return length % BLOCKSIZE;
906 }
907 
908 size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
909 {
910  static const pfnSHAHashBlocks s_pfn = InitializeSHA256HashBlocks();
911  s_pfn(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2());
912  return length % BLOCKSIZE;
913 }
914 #endif
915 
916 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
917 
918 #define Ch(x,y,z) (z^(x&(y^z)))
919 #define Maj(x,y,z) (y^((x^y)&(y^z)))
920 
921 #define a(i) T[(0-i)&7]
922 #define b(i) T[(1-i)&7]
923 #define c(i) T[(2-i)&7]
924 #define d(i) T[(3-i)&7]
925 #define e(i) T[(4-i)&7]
926 #define f(i) T[(5-i)&7]
927 #define g(i) T[(6-i)&7]
928 #define h(i) T[(7-i)&7]
929 
930 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
931  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
932 
933 // for SHA256
934 #define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
935 #define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
936 #define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
937 #define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
938 
939 #if defined(__OPTIMIZE_SIZE__)
940 // Smaller but slower
941 void SHA256_CXX_Transform(word32 *state, const word32 *data)
942 {
943  word32 W[32], T[20];
944  unsigned int i = 0, j = 0;
945  word32 *t = T+8;
946 
947  memcpy(t, state, 8*4);
948  word32 e = t[4], a = t[0];
949 
950  do
951  {
952  word32 w = data[j];
953  W[j] = w;
954  w += SHA256_K[j];
955  w += t[7];
956  w += S1(e);
957  w += Ch(e, t[5], t[6]);
958  e = t[3] + w;
959  t[3] = t[3+8] = e;
960  w += S0(t[0]);
961  a = w + Maj(a, t[1], t[2]);
962  t[-1] = t[7] = a;
963  --t;
964  ++j;
965  if (j%8 == 0)
966  t += 8;
967  } while (j<16);
968 
969  do
970  {
971  i = j&0xf;
972  word32 w = s1(W[i+16-2]) + s0(W[i+16-15]) + W[i] + W[i+16-7];
973  W[i+16] = W[i] = w;
974  w += SHA256_K[j];
975  w += t[7];
976  w += S1(e);
977  w += Ch(e, t[5], t[6]);
978  e = t[3] + w;
979  t[3] = t[3+8] = e;
980  w += S0(t[0]);
981  a = w + Maj(a, t[1], t[2]);
982  t[-1] = t[7] = a;
983 
984  w = s1(W[(i+1)+16-2]) + s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7];
985  W[(i+1)+16] = W[(i+1)] = w;
986  w += SHA256_K[j+1];
987  w += (t-1)[7];
988  w += S1(e);
989  w += Ch(e, (t-1)[5], (t-1)[6]);
990  e = (t-1)[3] + w;
991  (t-1)[3] = (t-1)[3+8] = e;
992  w += S0((t-1)[0]);
993  a = w + Maj(a, (t-1)[1], (t-1)[2]);
994  (t-1)[-1] = (t-1)[7] = a;
995 
996  t-=2;
997  j+=2;
998  if (j%8 == 0)
999  t += 8;
1000  } while (j<64);
1001 
1002  state[0] += a;
1003  state[1] += t[1];
1004  state[2] += t[2];
1005  state[3] += t[3];
1006  state[4] += e;
1007  state[5] += t[5];
1008  state[6] += t[6];
1009  state[7] += t[7];
1010 }
1011 #else
1012 // Bigger but faster
1013 void SHA256_CXX_Transform(word32 *state, const word32 *data)
1014 {
1015  word32 W[16], T[8];
1016  /* Copy context->state[] to working vars */
1017  memcpy(T, state, sizeof(T));
1018  /* 64 operations, partially loop unrolled */
1019  for (unsigned int j=0; j<64; j+=16)
1020  {
1021  R( 0); R( 1); R( 2); R( 3);
1022  R( 4); R( 5); R( 6); R( 7);
1023  R( 8); R( 9); R(10); R(11);
1024  R(12); R(13); R(14); R(15);
1025  }
1026  /* Add the working vars back into context.state[] */
1027  state[0] += a(0);
1028  state[1] += b(0);
1029  state[2] += c(0);
1030  state[3] += d(0);
1031  state[4] += e(0);
1032  state[5] += f(0);
1033  state[6] += g(0);
1034  state[7] += h(0);
1035 }
1036 #endif // __OPTIMIZE_SIZE__
1037 
1038 #undef S0
1039 #undef S1
1040 #undef s0
1041 #undef s1
1042 #undef R
1043 
1044 #if CRYPTOPP_SSE2_ASM_AVAILABLE
1045 static void SHA256_SSE2_Transform(word32 *state, const word32 *data)
1046 {
1047  // this byte reverse is a waste of time, but this function is only called by MDC
1048  word32 W[16];
1049  ByteReverse(W, data, SHA256::BLOCKSIZE);
1050  X86_SHA256_HashBlocks(state, W, SHA256::BLOCKSIZE - !HasSSE2());
1051 }
1052 #endif // CRYPTOPP_SSE2_ASM_AVAILABLE
1053 
1054 #if CRYPTOPP_SSE_SHA_AVAILABLE
1055 static void SHA256_SSE_SHA_Transform(word32 *state, const word32 *data)
1056 {
1057  return SHA256_SSE_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE);
1058 }
1059 #endif // CRYPTOPP_SSE_SHA_AVAILABLE
1060 
1061 #if CRYPTOPP_ARM_CRYPTO_AVAILABLE
1062 static void SHA256_ARM_SHA_Transform(word32 *state, const word32 *data)
1063 {
1064  return SHA256_ARM_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE);
1065 }
1066 #endif // CRYPTOPP_ARM_CRYPTO_AVAILABLE
1067 
1068 ///////////////////////////////////
1069 // start of Walton/Gulley's code //
1070 ///////////////////////////////////
1071 
1072 #if CRYPTOPP_SSE_SHA_AVAILABLE
1073 // Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
1074 static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length)
1075 {
1076  CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data);
1077  CRYPTOPP_ASSERT(length % SHA256::BLOCKSIZE == 0);
1078 
1079  __m128i STATE0, STATE1;
1080  __m128i MSG, TMP, MASK;
1081  __m128i TMSG0, TMSG1, TMSG2, TMSG3;
1082  __m128i ABEF_SAVE, CDGH_SAVE;
1083 
1084  // Load initial values
1085  TMP = _mm_loadu_si128((__m128i*) &state[0]);
1086  STATE1 = _mm_loadu_si128((__m128i*) &state[4]);
1087  MASK = _mm_set_epi64x(W64LIT(0x0c0d0e0f08090a0b), W64LIT(0x0405060700010203));
1088 
1089  TMP = MM_SHUFFLE_EPI32(TMP, 0xB1); // CDAB
1090  STATE1 = MM_SHUFFLE_EPI32(STATE1, 0x1B); // EFGH
1091  STATE0 = MM_ALIGNR_EPI8(TMP, STATE1, 8); // ABEF
1092  STATE1 = MM_BLEND_EPI16(STATE1, TMP, 0xF0); // CDGH
1093 
1094  while (length >= SHA256::BLOCKSIZE)
1095  {
1096  // Save current hash
1097  ABEF_SAVE = STATE0;
1098  CDGH_SAVE = STATE1;
1099 
1100  // Rounds 0-3
1101  MSG = _mm_loadu_si128((__m128i*) data+0);
1102  TMSG0 = MM_SHUFFLE_EPI8(MSG, MASK);
1103  MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0xE9B5DBA5B5C0FBCF), W64LIT(0x71374491428A2F98)));
1104  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1105  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1106  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1107 
1108  // Rounds 4-7
1109  TMSG1 = _mm_loadu_si128((__m128i*) (data+4));
1110  TMSG1 = MM_SHUFFLE_EPI8(TMSG1, MASK);
1111  MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0xAB1C5ED5923F82A4), W64LIT(0x59F111F13956C25B)));
1112  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1113  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1114  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1115  TMSG0 = MM_SHA256MSG1_EPU32(TMSG0, TMSG1);
1116 
1117  // Rounds 8-11
1118  TMSG2 = _mm_loadu_si128((__m128i*) (data+8));
1119  TMSG2 = MM_SHUFFLE_EPI8(TMSG2, MASK);
1120  MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x550C7DC3243185BE), W64LIT(0x12835B01D807AA98)));
1121  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1122  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1123  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1124  TMSG1 = MM_SHA256MSG1_EPU32(TMSG1, TMSG2);
1125 
1126  // Rounds 12-15
1127  TMSG3 = _mm_loadu_si128((__m128i*) (data+12));
1128  TMSG3 = MM_SHUFFLE_EPI8(TMSG3, MASK);
1129  MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC19BF1749BDC06A7), W64LIT(0x80DEB1FE72BE5D74)));
1130  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1131  TMP = MM_ALIGNR_EPI8(TMSG3, TMSG2, 4);
1132  TMSG0 = _mm_add_epi32(TMSG0, TMP);
1133  TMSG0 = MM_SHA256MSG2_EPU32(TMSG0, TMSG3);
1134  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1135  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1136  TMSG2 = MM_SHA256MSG1_EPU32(TMSG2, TMSG3);
1137 
1138  // Rounds 16-19
1139  MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0x240CA1CC0FC19DC6), W64LIT(0xEFBE4786E49B69C1)));
1140  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1141  TMP = MM_ALIGNR_EPI8(TMSG0, TMSG3, 4);
1142  TMSG1 = _mm_add_epi32(TMSG1, TMP);
1143  TMSG1 = MM_SHA256MSG2_EPU32(TMSG1, TMSG0);
1144  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1145  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1146  TMSG3 = MM_SHA256MSG1_EPU32(TMSG3, TMSG0);
1147 
1148  // Rounds 20-23
1149  MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0x76F988DA5CB0A9DC), W64LIT(0x4A7484AA2DE92C6F)));
1150  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1151  TMP = MM_ALIGNR_EPI8(TMSG1, TMSG0, 4);
1152  TMSG2 = _mm_add_epi32(TMSG2, TMP);
1153  TMSG2 = MM_SHA256MSG2_EPU32(TMSG2, TMSG1);
1154  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1155  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1156  TMSG0 = MM_SHA256MSG1_EPU32(TMSG0, TMSG1);
1157 
1158  // Rounds 24-27
1159  MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0xBF597FC7B00327C8), W64LIT(0xA831C66D983E5152)));
1160  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1161  TMP = MM_ALIGNR_EPI8(TMSG2, TMSG1, 4);
1162  TMSG3 = _mm_add_epi32(TMSG3, TMP);
1163  TMSG3 = MM_SHA256MSG2_EPU32(TMSG3, TMSG2);
1164  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1165  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1166  TMSG1 = MM_SHA256MSG1_EPU32(TMSG1, TMSG2);
1167 
1168  // Rounds 28-31
1169  MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0x1429296706CA6351), W64LIT(0xD5A79147C6E00BF3)));
1170  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1171  TMP = MM_ALIGNR_EPI8(TMSG3, TMSG2, 4);
1172  TMSG0 = _mm_add_epi32(TMSG0, TMP);
1173  TMSG0 = MM_SHA256MSG2_EPU32(TMSG0, TMSG3);
1174  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1175  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1176  TMSG2 = MM_SHA256MSG1_EPU32(TMSG2, TMSG3);
1177 
1178  // Rounds 32-35
1179  MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0x53380D134D2C6DFC), W64LIT(0x2E1B213827B70A85)));
1180  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1181  TMP = MM_ALIGNR_EPI8(TMSG0, TMSG3, 4);
1182  TMSG1 = _mm_add_epi32(TMSG1, TMP);
1183  TMSG1 = MM_SHA256MSG2_EPU32(TMSG1, TMSG0);
1184  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1185  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1186  TMSG3 = MM_SHA256MSG1_EPU32(TMSG3, TMSG0);
1187 
1188  // Rounds 36-39
1189  MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0x92722C8581C2C92E), W64LIT(0x766A0ABB650A7354)));
1190  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1191  TMP = MM_ALIGNR_EPI8(TMSG1, TMSG0, 4);
1192  TMSG2 = _mm_add_epi32(TMSG2, TMP);
1193  TMSG2 = MM_SHA256MSG2_EPU32(TMSG2, TMSG1);
1194  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1195  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1196  TMSG0 = MM_SHA256MSG1_EPU32(TMSG0, TMSG1);
1197 
1198  // Rounds 40-43
1199  MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0xC76C51A3C24B8B70), W64LIT(0xA81A664BA2BFE8A1)));
1200  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1201  TMP = MM_ALIGNR_EPI8(TMSG2, TMSG1, 4);
1202  TMSG3 = _mm_add_epi32(TMSG3, TMP);
1203  TMSG3 = MM_SHA256MSG2_EPU32(TMSG3, TMSG2);
1204  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1205  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1206  TMSG1 = MM_SHA256MSG1_EPU32(TMSG1, TMSG2);
1207 
1208  // Rounds 44-47
1209  MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0x106AA070F40E3585), W64LIT(0xD6990624D192E819)));
1210  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1211  TMP = MM_ALIGNR_EPI8(TMSG3, TMSG2, 4);
1212  TMSG0 = _mm_add_epi32(TMSG0, TMP);
1213  TMSG0 = MM_SHA256MSG2_EPU32(TMSG0, TMSG3);
1214  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1215  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1216  TMSG2 = MM_SHA256MSG1_EPU32(TMSG2, TMSG3);
1217 
1218  // Rounds 48-51
1219  MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0x34B0BCB52748774C), W64LIT(0x1E376C0819A4C116)));
1220  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1221  TMP = MM_ALIGNR_EPI8(TMSG0, TMSG3, 4);
1222  TMSG1 = _mm_add_epi32(TMSG1, TMP);
1223  TMSG1 = MM_SHA256MSG2_EPU32(TMSG1, TMSG0);
1224  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1225  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1226  TMSG3 = MM_SHA256MSG1_EPU32(TMSG3, TMSG0);
1227 
1228  // Rounds 52-55
1229  MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0x682E6FF35B9CCA4F), W64LIT(0x4ED8AA4A391C0CB3)));
1230  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1231  TMP = MM_ALIGNR_EPI8(TMSG1, TMSG0, 4);
1232  TMSG2 = _mm_add_epi32(TMSG2, TMP);
1233  TMSG2 = MM_SHA256MSG2_EPU32(TMSG2, TMSG1);
1234  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1235  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1236 
1237  // Rounds 56-59
1238  MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x8CC7020884C87814), W64LIT(0x78A5636F748F82EE)));
1239  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1240  TMP = MM_ALIGNR_EPI8(TMSG2, TMSG1, 4);
1241  TMSG3 = _mm_add_epi32(TMSG3, TMP);
1242  TMSG3 = MM_SHA256MSG2_EPU32(TMSG3, TMSG2);
1243  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1244  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1245 
1246  // Rounds 60-63
1247  MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC67178F2BEF9A3F7), W64LIT(0xA4506CEB90BEFFFA)));
1248  STATE1 = MM_SHA256RNDS2_EPU32(STATE1, STATE0, MSG);
1249  MSG = MM_SHUFFLE_EPI32(MSG, 0x0E);
1250  STATE0 = MM_SHA256RNDS2_EPU32(STATE0, STATE1, MSG);
1251 
1252  // Add values back to state
1253  STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
1254  STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
1255 
1256  data += SHA256::BLOCKSIZE/sizeof(word32);
1257  length -= SHA256::BLOCKSIZE;
1258  }
1259 
1260  TMP = MM_SHUFFLE_EPI32(STATE0, 0x1B); // FEBA
1261  STATE1 = MM_SHUFFLE_EPI32(STATE1, 0xB1); // DCHG
1262  STATE0 = MM_BLEND_EPI16(TMP, STATE1, 0xF0); // DCBA
1263  STATE1 = MM_ALIGNR_EPI8(STATE1, TMP, 0x08); // ABEF
1264 
1265  // Save state
1266  _mm_storeu_si128((__m128i*) &state[0], STATE0);
1267  _mm_storeu_si128((__m128i*) &state[4], STATE1);
1268 }
1269 #endif // CRYPTOPP_SSE_SHA_AVAILABLE
1270 
1271 /////////////////////////////////
1272 // end of Walton/Gulley's code //
1273 /////////////////////////////////
1274 
1275 /////////////////////////////////////////////////////////
1276 // start of Walton/Schneiders/O'Rourke/Hovsmith's code //
1277 /////////////////////////////////////////////////////////
1278 
1279 #if CRYPTOPP_ARM_CRYPTO_AVAILABLE
1280 static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length)
1281 {
1282  uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
1283  uint32x4_t MSG0, MSG1, MSG2, MSG3;
1284  uint32x4_t TMP0, TMP1, TMP2;
1285 
1286  // Load initial values
1287  STATE0 = vld1q_u32(&state[0]);
1288  STATE1 = vld1q_u32(&state[4]);
1289 
1290  while (length >= SHA256::BLOCKSIZE)
1291  {
1292  // Save current hash
1293  ABEF_SAVE = STATE0;
1294  CDGH_SAVE = STATE1;
1295 
1296  // Load message
1297  MSG0 = vld1q_u32(data + 0);
1298  MSG1 = vld1q_u32(data + 4);
1299  MSG2 = vld1q_u32(data + 8);
1300  MSG3 = vld1q_u32(data + 12);
1301 
1302  TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x00]));
1303 
1304  // Rounds 0-3
1305  MSG0 = vsha256su0q_u32(MSG0, MSG1);
1306  TMP2 = STATE0;
1307  TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x04]));
1308  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1309  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
1310  MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);;
1311 
1312  // Rounds 4-7
1313  MSG1 = vsha256su0q_u32(MSG1, MSG2);
1314  TMP2 = STATE0;
1315  TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x08]));
1316  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1317  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
1318  MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);;
1319 
1320  // Rounds 8-11
1321  MSG2 = vsha256su0q_u32(MSG2, MSG3);
1322  TMP2 = STATE0;
1323  TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x0c]));
1324  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1325  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
1326  MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);;
1327 
1328  // Rounds 12-15
1329  MSG3 = vsha256su0q_u32(MSG3, MSG0);
1330  TMP2 = STATE0;
1331  TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x10]));
1332  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1333  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
1334  MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);;
1335 
1336  // Rounds 16-19
1337  MSG0 = vsha256su0q_u32(MSG0, MSG1);
1338  TMP2 = STATE0;
1339  TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x14]));
1340  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1341  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
1342  MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);;
1343 
1344  // Rounds 20-23
1345  MSG1 = vsha256su0q_u32(MSG1, MSG2);
1346  TMP2 = STATE0;
1347  TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x18]));
1348  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1349  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
1350  MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);;
1351 
1352  // Rounds 24-27
1353  MSG2 = vsha256su0q_u32(MSG2, MSG3);
1354  TMP2 = STATE0;
1355  TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x1c]));
1356  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1357  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
1358  MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);;
1359 
1360  // Rounds 28-31
1361  MSG3 = vsha256su0q_u32(MSG3, MSG0);
1362  TMP2 = STATE0;
1363  TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x20]));
1364  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1365  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
1366  MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);;
1367 
1368  // Rounds 32-35
1369  MSG0 = vsha256su0q_u32(MSG0, MSG1);
1370  TMP2 = STATE0;
1371  TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x24]));
1372  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1373  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
1374  MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);;
1375 
1376  // Rounds 36-39
1377  MSG1 = vsha256su0q_u32(MSG1, MSG2);
1378  TMP2 = STATE0;
1379  TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x28]));
1380  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1381  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
1382  MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);;
1383 
1384  // Rounds 40-43
1385  MSG2 = vsha256su0q_u32(MSG2, MSG3);
1386  TMP2 = STATE0;
1387  TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x2c]));
1388  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1389  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
1390  MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);;
1391 
1392  // Rounds 44-47
1393  MSG3 = vsha256su0q_u32(MSG3, MSG0);
1394  TMP2 = STATE0;
1395  TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x30]));
1396  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1397  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
1398  MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);;
1399 
1400  // Rounds 48-51
1401  TMP2 = STATE0;
1402  TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x34]));
1403  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1404  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);;
1405 
1406  // Rounds 52-55
1407  TMP2 = STATE0;
1408  TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x38]));
1409  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1410  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);;
1411 
1412  // Rounds 56-59
1413  TMP2 = STATE0;
1414  TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x3c]));
1415  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1416  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);;
1417 
1418  // Rounds 60-63
1419  TMP2 = STATE0;
1420  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1421  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);;
1422 
1423  // Add back to state
1424  STATE0 = vaddq_u32(STATE0, ABEF_SAVE);
1425  STATE1 = vaddq_u32(STATE1, CDGH_SAVE);
1426 
1427  data += SHA256::BLOCKSIZE/sizeof(word32);
1428  length -= SHA256::BLOCKSIZE;
1429  }
1430 
1431  // Save state
1432  vst1q_u32(&state[0], STATE0);
1433  vst1q_u32(&state[4], STATE1);
1434 }
1435 #endif
1436 
1437 ///////////////////////////////////////////////////////
1438 // end of Walton/Schneiders/O'Rourke/Hovsmith's code //
1439 ///////////////////////////////////////////////////////
1440 
1441 pfnSHATransform InitializeSHA256Transform()
1442 {
1443 #if CRYPTOPP_SSE_SHA_AVAILABLE
1444  if (HasSHA())
1445  return &SHA256_SSE_SHA_Transform;
1446  else
1447 #endif
1448 #if CRYPTOPP_SSE2_ASM_AVAILABLE
1449  if (HasSSE2())
1450  return &SHA256_SSE2_Transform;
1451  else
1452 #endif
1453 #if CRYPTOPP_ARM_CRYPTO_AVAILABLE
1454  if (HasSHA2())
1455  return &SHA256_ARM_SHA_Transform;
1456  else
1457 #endif
1458 
1459  return &SHA256_CXX_Transform;
1460 }
1461 
1462 void SHA256::Transform(word32 *state, const word32 *data)
1463 {
1464  static const pfnSHATransform s_pfn = InitializeSHA256Transform();
1465  s_pfn(state, data);
1466 }
1467 
1468 // *************************************************************
1469 
1470 void SHA384::InitState(HashWordType *state)
1471 {
1472  static const word64 s[8] = {
1473  W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507),
1474  W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939),
1475  W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511),
1476  W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)};
1477  memcpy(state, s, sizeof(s));
1478 }
1479 
1480 void SHA512::InitState(HashWordType *state)
1481 {
1482  static const word64 s[8] = {
1483  W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b),
1484  W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1),
1485  W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f),
1486  W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)};
1487  memcpy(state, s, sizeof(s));
1488 }
1489 
1490 #if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_X86 || CRYPTOPP_X32)
1491 CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = {
1492 #else
1493 CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = {
1494 #endif
1495  W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
1496  W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
1497  W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
1498  W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
1499  W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
1500  W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
1501  W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
1502  W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
1503  W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
1504  W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
1505  W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
1506  W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
1507  W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
1508  W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
1509  W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
1510  W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
1511  W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
1512  W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
1513  W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
1514  W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
1515  W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
1516  W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
1517  W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
1518  W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
1519  W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
1520  W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
1521  W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
1522  W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
1523  W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
1524  W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
1525  W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
1526  W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
1527  W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
1528  W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
1529  W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
1530  W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
1531  W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
1532  W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
1533  W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
1534  W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
1535 };
1536 
1537 #if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_X86 || CRYPTOPP_X32)
1538 // put assembly version in separate function, otherwise MSVC 2005 SP1 doesn't generate correct code for the non-assembly version
1539 CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state, const word64 *data)
1540 {
1541 #ifdef __GNUC__
1542  __asm__ __volatile__
1543  (
1544  INTEL_NOPREFIX
1545  AS_PUSH_IF86( bx)
1546  AS2( mov ebx, eax)
1547 #else
1548  AS1( push ebx)
1549  AS1( push esi)
1550  AS1( push edi)
1551  AS2( lea ebx, SHA512_K)
1552 #endif
1553 
1554  AS2( mov eax, esp)
1555  AS2( and esp, 0xfffffff0)
1556  AS2( sub esp, 27*16) // 17*16 for expanded data, 20*8 for state
1557  AS_PUSH_IF86( ax)
1558  AS2( xor eax, eax)
1559 
1560 #if CRYPTOPP_X32
1561  AS2( lea edi, [esp+8+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
1562  AS2( lea esi, [esp+8+20*8+8]) // 16-byte alignment, then add 8
1563 #else
1564  AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
1565  AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8
1566 #endif
1567 
1568  AS2( movdqa xmm0, [ecx+0*16])
1569  AS2( movdq2q mm4, xmm0)
1570  AS2( movdqa [edi+0*16], xmm0)
1571  AS2( movdqa xmm0, [ecx+1*16])
1572  AS2( movdqa [edi+1*16], xmm0)
1573  AS2( movdqa xmm0, [ecx+2*16])
1574  AS2( movdq2q mm5, xmm0)
1575  AS2( movdqa [edi+2*16], xmm0)
1576  AS2( movdqa xmm0, [ecx+3*16])
1577  AS2( movdqa [edi+3*16], xmm0)
1578  ASJ( jmp, 0, f)
1579 
1580 #define SSE2_S0_S1(r, a, b, c) \
1581  AS2( movq mm6, r)\
1582  AS2( psrlq r, a)\
1583  AS2( movq mm7, r)\
1584  AS2( psllq mm6, 64-c)\
1585  AS2( pxor mm7, mm6)\
1586  AS2( psrlq r, b-a)\
1587  AS2( pxor mm7, r)\
1588  AS2( psllq mm6, c-b)\
1589  AS2( pxor mm7, mm6)\
1590  AS2( psrlq r, c-b)\
1591  AS2( pxor r, mm7)\
1592  AS2( psllq mm6, b-a)\
1593  AS2( pxor r, mm6)
1594 
1595 #define SSE2_s0(r, a, b, c) \
1596  AS2( movdqa xmm6, r)\
1597  AS2( psrlq r, a)\
1598  AS2( movdqa xmm7, r)\
1599  AS2( psllq xmm6, 64-c)\
1600  AS2( pxor xmm7, xmm6)\
1601  AS2( psrlq r, b-a)\
1602  AS2( pxor xmm7, r)\
1603  AS2( psrlq r, c-b)\
1604  AS2( pxor r, xmm7)\
1605  AS2( psllq xmm6, c-a)\
1606  AS2( pxor r, xmm6)
1607 
1608 #define SSE2_s1(r, a, b, c) \
1609  AS2( movdqa xmm6, r)\
1610  AS2( psrlq r, a)\
1611  AS2( movdqa xmm7, r)\
1612  AS2( psllq xmm6, 64-c)\
1613  AS2( pxor xmm7, xmm6)\
1614  AS2( psrlq r, b-a)\
1615  AS2( pxor xmm7, r)\
1616  AS2( psllq xmm6, c-b)\
1617  AS2( pxor xmm7, xmm6)\
1618  AS2( psrlq r, c-b)\
1619  AS2( pxor r, xmm7)
1620 
1621  ASL(SHA512_Round)
1622  // k + w is in mm0, a is in mm4, e is in mm5
1623  AS2( paddq mm0, [edi+7*8]) // h
1624  AS2( movq mm2, [edi+5*8]) // f
1625  AS2( movq mm3, [edi+6*8]) // g
1626  AS2( pxor mm2, mm3)
1627  AS2( pand mm2, mm5)
1628  SSE2_S0_S1(mm5,14,18,41)
1629  AS2( pxor mm2, mm3)
1630  AS2( paddq mm0, mm2) // h += Ch(e,f,g)
1631  AS2( paddq mm5, mm0) // h += S1(e)
1632  AS2( movq mm2, [edi+1*8]) // b
1633  AS2( movq mm1, mm2)
1634  AS2( por mm2, mm4)
1635  AS2( pand mm2, [edi+2*8]) // c
1636  AS2( pand mm1, mm4)
1637  AS2( por mm1, mm2)
1638  AS2( paddq mm1, mm5) // temp = h + Maj(a,b,c)
1639  AS2( paddq mm5, [edi+3*8]) // e = d + h
1640  AS2( movq [edi+3*8], mm5)
1641  AS2( movq [edi+11*8], mm5)
1642  SSE2_S0_S1(mm4,28,34,39) // S0(a)
1643  AS2( paddq mm4, mm1) // a = temp + S0(a)
1644  AS2( movq [edi-8], mm4)
1645  AS2( movq [edi+7*8], mm4)
1646  AS1( ret)
1647 
1648  // first 16 rounds
1649  ASL(0)
1650  AS2( movq mm0, [edx+eax*8])
1651  AS2( movq [esi+eax*8], mm0)
1652  AS2( movq [esi+eax*8+16*8], mm0)
1653  AS2( paddq mm0, [ebx+eax*8])
1654  ASC( call, SHA512_Round)
1655  AS1( inc eax)
1656  AS2( sub edi, 8)
1657  AS2( test eax, 7)
1658  ASJ( jnz, 0, b)
1659  AS2( add edi, 8*8)
1660  AS2( cmp eax, 16)
1661  ASJ( jne, 0, b)
1662 
1663  // rest of the rounds
1664  AS2( movdqu xmm0, [esi+(16-2)*8])
1665  ASL(1)
1666  // data expansion, W[i-2] already in xmm0
1667  AS2( movdqu xmm3, [esi])
1668  AS2( paddq xmm3, [esi+(16-7)*8])
1669  AS2( movdqa xmm2, [esi+(16-15)*8])
1670  SSE2_s1(xmm0, 6, 19, 61)
1671  AS2( paddq xmm0, xmm3)
1672  SSE2_s0(xmm2, 1, 7, 8)
1673  AS2( paddq xmm0, xmm2)
1674  AS2( movdq2q mm0, xmm0)
1675  AS2( movhlps xmm1, xmm0)
1676  AS2( paddq mm0, [ebx+eax*8])
1677  AS2( movlps [esi], xmm0)
1678  AS2( movlps [esi+8], xmm1)
1679  AS2( movlps [esi+8*16], xmm0)
1680  AS2( movlps [esi+8*17], xmm1)
1681  // 2 rounds
1682  ASC( call, SHA512_Round)
1683  AS2( sub edi, 8)
1684  AS2( movdq2q mm0, xmm1)
1685  AS2( paddq mm0, [ebx+eax*8+8])
1686  ASC( call, SHA512_Round)
1687  // update indices and loop
1688  AS2( add esi, 16)
1689  AS2( add eax, 2)
1690  AS2( sub edi, 8)
1691  AS2( test eax, 7)
1692  ASJ( jnz, 1, b)
1693  // do housekeeping every 8 rounds
1694  AS2( mov esi, 0xf)
1695  AS2( and esi, eax)
1696 #if CRYPTOPP_X32
1697  AS2( lea esi, [esp+8+20*8+8+esi*8])
1698 #else
1699  AS2( lea esi, [esp+4+20*8+8+esi*8])
1700 #endif
1701  AS2( add edi, 8*8)
1702  AS2( cmp eax, 80)
1703  ASJ( jne, 1, b)
1704 
1705 #define SSE2_CombineState(i) \
1706  AS2( movdqa xmm0, [edi+i*16])\
1707  AS2( paddq xmm0, [ecx+i*16])\
1708  AS2( movdqa [ecx+i*16], xmm0)
1709 
1710  SSE2_CombineState(0)
1711  SSE2_CombineState(1)
1712  SSE2_CombineState(2)
1713  SSE2_CombineState(3)
1714 
1715  AS_POP_IF86( sp)
1716  AS1( emms)
1717 
1718 #if defined(__GNUC__)
1719  AS_POP_IF86( bx)
1720  ATT_PREFIX
1721  :
1722  : "a" (SHA512_K), "c" (state), "d" (data)
1723  : "%esi", "%edi", "memory", "cc"
1724  );
1725 #else
1726  AS1( pop edi)
1727  AS1( pop esi)
1728  AS1( pop ebx)
1729  AS1( ret)
1730 #endif
1731 }
1732 #endif // #if CRYPTOPP_SSE2_ASM_AVAILABLE
1733 
1734 void SHA512::Transform(word64 *state, const word64 *data)
1735 {
1736  CRYPTOPP_ASSERT(IsAlignedOn(state, GetAlignmentOf<word64>()));
1737  CRYPTOPP_ASSERT(IsAlignedOn(data, GetAlignmentOf<word64>()));
1738 
1739 #if CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_X86 || CRYPTOPP_X32)
1740  if (HasSSE2())
1741  {
1742  SHA512_SSE2_Transform(state, data);
1743  return;
1744  }
1745 #endif
1746 
1747 #define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39))
1748 #define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41))
1749 #define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7))
1750 #define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
1751 
1752 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+(j?blk2(i):blk0(i));\
1753  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
1754 
1755  word64 W[16];
1756  word64 T[8];
1757  /* Copy context->state[] to working vars */
1758  memcpy(T, state, sizeof(T));
1759  /* 80 operations, partially loop unrolled */
1760  for (unsigned int j=0; j<80; j+=16)
1761  {
1762  R( 0); R( 1); R( 2); R( 3);
1763  R( 4); R( 5); R( 6); R( 7);
1764  R( 8); R( 9); R(10); R(11);
1765  R(12); R(13); R(14); R(15);
1766  }
1767  /* Add the working vars back into context.state[] */
1768  state[0] += a(0);
1769  state[1] += b(0);
1770  state[2] += c(0);
1771  state[3] += d(0);
1772  state[4] += e(0);
1773  state[5] += f(0);
1774  state[6] += g(0);
1775  state[7] += h(0);
1776 }
1777 
1778 NAMESPACE_END
1779 
1780 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
1781 #endif // #ifndef CRYPTOPP_IMPORTS
bool HasSHA()
Determines SHA availability.
Definition: cpu.h:238
Utility functions for the Crypto++ library.
Library configuration file.
Classes and functions for secure memory allocations.
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1036
bool HasSHA1()
Determine if an ARM processor has SHA1 available.
Definition: cpu.h:397
Fixed size stack-based SecBlock with 16-byte alignment.
Definition: secblock.h:845
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:61
Functions for CPU features and intrinsics.
Classes for SHA-1 and SHA-2 family of message digests.
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:183
bool HasSHA2()
Determine if an ARM processor has SHA2 available.
Definition: cpu.h:412
Crypto++ library namespace.
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:1766