Crypto++  5.6.5
Free C++ class library of cryptographic schemes
sha.cpp
1 // sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c
2 
3 // Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. Jeffrey Walton
4 // implemented Intel SHA extensions based on Intel articles and code by
5 // Sean Gulley. Jeffrey Walton implemented ARM SHA based on ARM code and
6 // code from Johannes Schneiders, Skip Hovsmith and Barry O'Rourke.
7 // All code is in the public domain.
8 
9 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code
10 
11 #include "pch.h"
12 #include "config.h"
13 
14 #if CRYPTOPP_MSC_VERSION
15 # pragma warning(disable: 4100 4731)
16 #endif
17 
18 #ifndef CRYPTOPP_IMPORTS
19 #ifndef CRYPTOPP_GENERATE_X64_MASM
20 
21 #include "secblock.h"
22 #include "sha.h"
23 #include "misc.h"
24 #include "cpu.h"
25 
26 #if defined(CRYPTOPP_DISABLE_SHA_ASM)
27 # undef CRYPTOPP_X86_ASM_AVAILABLE
28 # undef CRYPTOPP_X32_ASM_AVAILABLE
29 # undef CRYPTOPP_X64_ASM_AVAILABLE
30 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
31 #endif
32 
33 NAMESPACE_BEGIN(CryptoPP)
34 
35 // Function pointer for specific SHA1 or SHA256 Transform function
36 typedef void (*pfnSHATransform)(word32 *state, const word32 *data);
37 typedef void (CRYPTOPP_FASTCALL *pfnSHAHashBlocks)(word32 *state, const word32 *data, size_t length);
38 
39 ////////////////////////////////
40 // start of Steve Reid's code //
41 ////////////////////////////////
42 
43 #define blk0(i) (W[i] = data[i])
44 #define blk1(i) (W[i&15] = rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1))
45 
46 #define f1(x,y,z) (z^(x&(y^z)))
47 #define f2(x,y,z) (x^y^z)
48 #define f3(x,y,z) ((x&y)|(z&(x|y)))
49 #define f4(x,y,z) (x^y^z)
50 
51 /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
52 #define R0(v,w,x,y,z,i) z+=f1(w,x,y)+blk0(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30);
53 #define R1(v,w,x,y,z,i) z+=f1(w,x,y)+blk1(i)+0x5A827999+rotlFixed(v,5);w=rotlFixed(w,30);
54 #define R2(v,w,x,y,z,i) z+=f2(w,x,y)+blk1(i)+0x6ED9EBA1+rotlFixed(v,5);w=rotlFixed(w,30);
55 #define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlFixed(v,5);w=rotlFixed(w,30);
56 #define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlFixed(v,5);w=rotlFixed(w,30);
57 
58 static void SHA1_CXX_Transform(word32 *state, const word32 *data)
59 {
60  word32 W[16];
61  /* Copy context->state[] to working vars */
62  word32 a = state[0];
63  word32 b = state[1];
64  word32 c = state[2];
65  word32 d = state[3];
66  word32 e = state[4];
67  /* 4 rounds of 20 operations each. Loop unrolled. */
68  R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
69  R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
70  R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
71  R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
72  R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
73  R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
74  R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
75  R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
76  R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
77  R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
78  R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
79  R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
80  R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
81  R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
82  R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
83  R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
84  R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
85  R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
86  R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
87  R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
88  /* Add the working vars back into context.state[] */
89  state[0] += a;
90  state[1] += b;
91  state[2] += c;
92  state[3] += d;
93  state[4] += e;
94 }
95 
96 //////////////////////////////
97 // end of Steve Reid's code //
98 //////////////////////////////
99 
100 ///////////////////////////////////
101 // start of Walton/Gulley's code //
102 ///////////////////////////////////
103 
104 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
105 // Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
106 static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data)
107 {
108  __m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
109  __m128i MASK, MSG0, MSG1, MSG2, MSG3;
110 
111  // Load initial values
112  ABCD = _mm_loadu_si128((__m128i*) state);
113  E0 = _mm_set_epi32(state[4], 0, 0, 0);
114  ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
115  MASK = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
116 
117  // Save current hash
118  ABCD_SAVE = ABCD;
119  E0_SAVE = E0;
120 
121  // Rounds 0-3
122  MSG0 = _mm_loadu_si128((__m128i*) data+0);
123  MSG0 = _mm_shuffle_epi8(MSG0, MASK);
124  E0 = _mm_add_epi32(E0, MSG0);
125  E1 = ABCD;
126  ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
127 
128  // Rounds 4-7
129  MSG1 = _mm_loadu_si128((__m128i*) (data+4));
130  MSG1 = _mm_shuffle_epi8(MSG1, MASK);
131  E1 = _mm_sha1nexte_epu32(E1, MSG1);
132  E0 = ABCD;
133  ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
134  MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
135 
136  // Rounds 8-11
137  MSG2 = _mm_loadu_si128((__m128i*) (data+8));
138  MSG2 = _mm_shuffle_epi8(MSG2, MASK);
139  E0 = _mm_sha1nexte_epu32(E0, MSG2);
140  E1 = ABCD;
141  ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
142  MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
143  MSG0 = _mm_xor_si128(MSG0, MSG2);
144 
145  // Rounds 12-15
146  MSG3 = _mm_loadu_si128((__m128i*) (data+12));
147  MSG3 = _mm_shuffle_epi8(MSG3, MASK);
148  E1 = _mm_sha1nexte_epu32(E1, MSG3);
149  E0 = ABCD;
150  MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
151  ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
152  MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
153  MSG1 = _mm_xor_si128(MSG1, MSG3);
154 
155  // Rounds 16-19
156  E0 = _mm_sha1nexte_epu32(E0, MSG0);
157  E1 = ABCD;
158  MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
159  ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
160  MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
161  MSG2 = _mm_xor_si128(MSG2, MSG0);
162 
163  // Rounds 20-23
164  E1 = _mm_sha1nexte_epu32(E1, MSG1);
165  E0 = ABCD;
166  MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
167  ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
168  MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
169  MSG3 = _mm_xor_si128(MSG3, MSG1);
170 
171  // Rounds 24-27
172  E0 = _mm_sha1nexte_epu32(E0, MSG2);
173  E1 = ABCD;
174  MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
175  ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
176  MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
177  MSG0 = _mm_xor_si128(MSG0, MSG2);
178 
179  // Rounds 28-31
180  E1 = _mm_sha1nexte_epu32(E1, MSG3);
181  E0 = ABCD;
182  MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
183  ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
184  MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
185  MSG1 = _mm_xor_si128(MSG1, MSG3);
186 
187  // Rounds 32-35
188  E0 = _mm_sha1nexte_epu32(E0, MSG0);
189  E1 = ABCD;
190  MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
191  ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
192  MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
193  MSG2 = _mm_xor_si128(MSG2, MSG0);
194 
195  // Rounds 36-39
196  E1 = _mm_sha1nexte_epu32(E1, MSG1);
197  E0 = ABCD;
198  MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
199  ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
200  MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
201  MSG3 = _mm_xor_si128(MSG3, MSG1);
202 
203  // Rounds 40-43
204  E0 = _mm_sha1nexte_epu32(E0, MSG2);
205  E1 = ABCD;
206  MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
207  ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
208  MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
209  MSG0 = _mm_xor_si128(MSG0, MSG2);
210 
211  // Rounds 44-47
212  E1 = _mm_sha1nexte_epu32(E1, MSG3);
213  E0 = ABCD;
214  MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
215  ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
216  MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
217  MSG1 = _mm_xor_si128(MSG1, MSG3);
218 
219  // Rounds 48-51
220  E0 = _mm_sha1nexte_epu32(E0, MSG0);
221  E1 = ABCD;
222  MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
223  ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
224  MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
225  MSG2 = _mm_xor_si128(MSG2, MSG0);
226 
227  // Rounds 52-55
228  E1 = _mm_sha1nexte_epu32(E1, MSG1);
229  E0 = ABCD;
230  MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
231  ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
232  MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
233  MSG3 = _mm_xor_si128(MSG3, MSG1);
234 
235  // Rounds 56-59
236  E0 = _mm_sha1nexte_epu32(E0, MSG2);
237  E1 = ABCD;
238  MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
239  ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
240  MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
241  MSG0 = _mm_xor_si128(MSG0, MSG2);
242 
243  // Rounds 60-63
244  E1 = _mm_sha1nexte_epu32(E1, MSG3);
245  E0 = ABCD;
246  MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
247  ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
248  MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
249  MSG1 = _mm_xor_si128(MSG1, MSG3);
250 
251  // Rounds 64-67
252  E0 = _mm_sha1nexte_epu32(E0, MSG0);
253  E1 = ABCD;
254  MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
255  ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
256  MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
257  MSG2 = _mm_xor_si128(MSG2, MSG0);
258 
259  // Rounds 68-71
260  E1 = _mm_sha1nexte_epu32(E1, MSG1);
261  E0 = ABCD;
262  MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
263  ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
264  MSG3 = _mm_xor_si128(MSG3, MSG1);
265 
266  // Rounds 72-75
267  E0 = _mm_sha1nexte_epu32(E0, MSG2);
268  E1 = ABCD;
269  MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
270  ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
271 
272  // Rounds 76-79
273  E1 = _mm_sha1nexte_epu32(E1, MSG3);
274  E0 = ABCD;
275  ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
276 
277  // Add values back to state
278  E0 = _mm_sha1nexte_epu32(E0, E0_SAVE);
279  ABCD = _mm_add_epi32(ABCD, ABCD_SAVE);
280 
281  // Save state
282  ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
283  _mm_storeu_si128((__m128i*) state, ABCD);
284  state[4] = _mm_extract_epi32(E0, 3);
285 }
286 #endif
287 
288 /////////////////////////////////
289 // end of Walton/Gulley's code //
290 /////////////////////////////////
291 
292 //////////////////////////////////////////////////////////////
293 // start of Walton/Schneiders/O'Rourke/Skip Hovsmith's code //
294 //////////////////////////////////////////////////////////////
295 
296 #if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
297 static void SHA1_ARM_SHA_Transform(word32 *state, const word32 *data)
298 {
299  uint32x4_t C0, C1, C2, C3;
300  uint32x4_t ABCD, ABCD_SAVED;
301  uint32x4_t MSG0, MSG1, MSG2, MSG3;
302  uint32x4_t TMP0, TMP1;
303  uint32_t E0, E0_SAVED, E1;
304 
305  // Load initial values
306  C0 = vdupq_n_u32(0x5A827999);
307  C1 = vdupq_n_u32(0x6ED9EBA1);
308  C2 = vdupq_n_u32(0x8F1BBCDC);
309  C3 = vdupq_n_u32(0xCA62C1D6);
310 
311  ABCD = vld1q_u32(&state[0]);
312  E0 = state[4];
313 
314  // Save current hash
315  ABCD_SAVED = ABCD;
316  E0_SAVED = E0;
317 
318  MSG0 = vld1q_u32(data + 0);
319  MSG1 = vld1q_u32(data + 4);
320  MSG2 = vld1q_u32(data + 8);
321  MSG3 = vld1q_u32(data + 12);
322 
323  TMP0 = vaddq_u32(MSG0, C0);
324  TMP1 = vaddq_u32(MSG1, C0);
325 
326  // Rounds 0-3
327  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
328  ABCD = vsha1cq_u32(ABCD, E0, TMP0);
329  TMP0 = vaddq_u32(MSG2, C0);
330  MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
331 
332  // Rounds 4-7
333  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
334  ABCD = vsha1cq_u32(ABCD, E1, TMP1);
335  TMP1 = vaddq_u32(MSG3, C0);
336  MSG0 = vsha1su1q_u32(MSG0, MSG3);
337  MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
338 
339  // Rounds 8-11
340  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
341  ABCD = vsha1cq_u32(ABCD, E0, TMP0);
342  TMP0 = vaddq_u32(MSG0, C0);
343  MSG1 = vsha1su1q_u32(MSG1, MSG0);
344  MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
345 
346  // Rounds 12-15
347  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
348  ABCD = vsha1cq_u32(ABCD, E1, TMP1);
349  TMP1 = vaddq_u32(MSG1, C1);
350  MSG2 = vsha1su1q_u32(MSG2, MSG1);
351  MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
352 
353  // Rounds 16-19
354  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
355  ABCD = vsha1cq_u32(ABCD, E0, TMP0);
356  TMP0 = vaddq_u32(MSG2, C1);
357  MSG3 = vsha1su1q_u32(MSG3, MSG2);
358  MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
359 
360  // Rounds 20-23
361  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
362  ABCD = vsha1pq_u32(ABCD, E1, TMP1);
363  TMP1 = vaddq_u32(MSG3, C1);
364  MSG0 = vsha1su1q_u32(MSG0, MSG3);
365  MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
366 
367  // Rounds 24-27
368  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
369  ABCD = vsha1pq_u32(ABCD, E0, TMP0);
370  TMP0 = vaddq_u32(MSG0, C1);
371  MSG1 = vsha1su1q_u32(MSG1, MSG0);
372  MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
373 
374  // Rounds 28-31
375  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
376  ABCD = vsha1pq_u32(ABCD, E1, TMP1);
377  TMP1 = vaddq_u32(MSG1, C1);
378  MSG2 = vsha1su1q_u32(MSG2, MSG1);
379  MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
380 
381  // Rounds 32-35
382  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
383  ABCD = vsha1pq_u32(ABCD, E0, TMP0);
384  TMP0 = vaddq_u32(MSG2, C2);
385  MSG3 = vsha1su1q_u32(MSG3, MSG2);
386  MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
387 
388  // Rounds 36-39
389  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
390  ABCD = vsha1pq_u32(ABCD, E1, TMP1);
391  TMP1 = vaddq_u32(MSG3, C2);
392  MSG0 = vsha1su1q_u32(MSG0, MSG3);
393  MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
394 
395  // Rounds 40-43
396  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
397  ABCD = vsha1mq_u32(ABCD, E0, TMP0);
398  TMP0 = vaddq_u32(MSG0, C2);
399  MSG1 = vsha1su1q_u32(MSG1, MSG0);
400  MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
401 
402  // Rounds 44-47
403  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
404  ABCD = vsha1mq_u32(ABCD, E1, TMP1);
405  TMP1 = vaddq_u32(MSG1, C2);
406  MSG2 = vsha1su1q_u32(MSG2, MSG1);
407  MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
408 
409  // Rounds 48-51
410  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
411  ABCD = vsha1mq_u32(ABCD, E0, TMP0);
412  TMP0 = vaddq_u32(MSG2, C2);
413  MSG3 = vsha1su1q_u32(MSG3, MSG2);
414  MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
415 
416  // Rounds 52-55
417  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
418  ABCD = vsha1mq_u32(ABCD, E1, TMP1);
419  TMP1 = vaddq_u32(MSG3, C3);
420  MSG0 = vsha1su1q_u32(MSG0, MSG3);
421  MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
422 
423  // Rounds 56-59
424  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
425  ABCD = vsha1mq_u32(ABCD, E0, TMP0);
426  TMP0 = vaddq_u32(MSG0, C3);
427  MSG1 = vsha1su1q_u32(MSG1, MSG0);
428  MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
429 
430  // Rounds 60-63
431  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
432  ABCD = vsha1pq_u32(ABCD, E1, TMP1);
433  TMP1 = vaddq_u32(MSG1, C3);
434  MSG2 = vsha1su1q_u32(MSG2, MSG1);
435  MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
436 
437  // Rounds 64-67
438  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
439  ABCD = vsha1pq_u32(ABCD, E0, TMP0);
440  TMP0 = vaddq_u32(MSG2, C3);
441  MSG3 = vsha1su1q_u32(MSG3, MSG2);
442  MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
443 
444  // Rounds 68-71
445  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
446  ABCD = vsha1pq_u32(ABCD, E1, TMP1);
447  TMP1 = vaddq_u32(MSG3, C3);
448  MSG0 = vsha1su1q_u32(MSG0, MSG3);
449 
450  // Rounds 72-75
451  E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
452  ABCD = vsha1pq_u32(ABCD, E0, TMP0);
453 
454  // Rounds 76-79
455  E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
456  ABCD = vsha1pq_u32(ABCD, E1, TMP1);
457 
458  E0 += E0_SAVED;
459  ABCD = vaddq_u32(ABCD_SAVED, ABCD);
460 
461  // Save state
462  vst1q_u32(&state[0], ABCD);
463  state[4] = E0;
464 }
465 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
466 
467 ///////////////////////////////////////////////////////
468 // end of Walton/Schneiders/O'Rourke/Hovsmith's code //
469 ///////////////////////////////////////////////////////
470 
471 pfnSHATransform InitializeSHA1Transform()
472 {
473 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
474  if (HasSHA())
475  return &SHA1_SSE_SHA_Transform;
476  else
477 #endif
478 #if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
479  if (HasSHA1())
480  return &SHA1_ARM_SHA_Transform;
481  else
482 #endif
483  return &SHA1_CXX_Transform;
484 }
485 
486 void SHA1::InitState(HashWordType *state)
487 {
488  state[0] = 0x67452301L;
489  state[1] = 0xEFCDAB89L;
490  state[2] = 0x98BADCFEL;
491  state[3] = 0x10325476L;
492  state[4] = 0xC3D2E1F0L;
493 }
494 
495 void SHA1::Transform(word32 *state, const word32 *data)
496 {
497  static const pfnSHATransform s_pfn = InitializeSHA1Transform();
498  s_pfn(state, data);
499 }
500 
501 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
502 size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
503 {
504  static const bool noReverse = HasSHA() || NativeByteOrderIs(this->GetByteOrder());
505  const unsigned int blockSize = this->BlockSize();
506  word32* dataBuf = this->DataBuf();
507  do
508  {
509  if (noReverse)
510  this->HashEndianCorrectedBlock(input);
511  else
512  {
513  ByteReverse(dataBuf, input, this->BlockSize());
514  this->HashEndianCorrectedBlock(dataBuf);
515  }
516 
517  input += blockSize/sizeof(word32);
518  length -= blockSize;
519  }
520  while (length >= blockSize);
521  return length;
522 }
523 #endif
524 
525 // *************************************************************
526 
527 void SHA224::InitState(HashWordType *state)
528 {
529  static const word32 s[8] = {0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4};
530  memcpy(state, s, sizeof(s));
531 }
532 
533 void SHA256::InitState(HashWordType *state)
534 {
535  static const word32 s[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
536  memcpy(state, s, sizeof(s));
537 }
538 
539 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
540 CRYPTOPP_ALIGN_DATA(16) extern const word32 SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = {
541 #else
542 extern const word32 SHA256_K[64] = {
543 #endif
544  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
545  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
546  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
547  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
548  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
549  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
550  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
551  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
552  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
553  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
554  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
555  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
556  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
557  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
558  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
559  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
560 };
561 
562 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
563 
564 #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM))
565 
566 static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len)
567 {
568  #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
569  #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
570  #define G(i) H(i+1)
571  #define F(i) H(i+2)
572  #define E(i) H(i+3)
573  #define D(i) H(i+4)
574  #define C(i) H(i+5)
575  #define B(i) H(i+6)
576  #define A(i) H(i+7)
577  #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
578  #define Wt_2(i) Wt((i)-2)
579  #define Wt_15(i) Wt((i)-15)
580  #define Wt_7(i) Wt((i)-7)
581  #define K_END [BASE+8*4+16*4+0*WORD_SZ]
582  #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
583  #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
584  #define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
585  #define Kt(i) WORD_REG(si)+(i)*4
586 #if CRYPTOPP_BOOL_X32
587  #define BASE esp+8
588 #elif CRYPTOPP_BOOL_X86
589  #define BASE esp+4
590 #elif defined(__GNUC__)
591  #define BASE r8
592 #else
593  #define BASE rsp
594 #endif
595 
596 #define RA0(i, edx, edi) \
597  AS2( add edx, [Kt(i)] )\
598  AS2( add edx, [Wt(i)] )\
599  AS2( add edx, H(i) )\
600 
601 #define RA1(i, edx, edi)
602 
603 #define RB0(i, edx, edi)
604 
605 #define RB1(i, edx, edi) \
606  AS2( mov AS_REG_7d, [Wt_2(i)] )\
607  AS2( mov edi, [Wt_15(i)])\
608  AS2( mov ebx, AS_REG_7d )\
609  AS2( shr AS_REG_7d, 10 )\
610  AS2( ror ebx, 17 )\
611  AS2( xor AS_REG_7d, ebx )\
612  AS2( ror ebx, 2 )\
613  AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
614  AS2( add ebx, [Wt_7(i)])\
615  AS2( mov AS_REG_7d, edi )\
616  AS2( shr AS_REG_7d, 3 )\
617  AS2( ror edi, 7 )\
618  AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
619  AS2( xor AS_REG_7d, edi )\
620  AS2( add edx, [Kt(i)])\
621  AS2( ror edi, 11 )\
622  AS2( add edx, H(i) )\
623  AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
624  AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
625  AS2( mov [Wt(i)], AS_REG_7d)\
626  AS2( add edx, AS_REG_7d )\
627 
628 #define ROUND(i, r, eax, ecx, edi, edx)\
629  /* in: edi = E */\
630  /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
631  AS2( mov edx, F(i) )\
632  AS2( xor edx, G(i) )\
633  AS2( and edx, edi )\
634  AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
635  AS2( mov AS_REG_7d, edi )\
636  AS2( ror edi, 6 )\
637  AS2( ror AS_REG_7d, 25 )\
638  RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
639  AS2( xor AS_REG_7d, edi )\
640  AS2( ror edi, 5 )\
641  AS2( xor AS_REG_7d, edi )/* S1(E) */\
642  AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
643  RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
644  /* in: ecx = A, eax = B^C, edx = T1 */\
645  /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
646  AS2( mov ebx, ecx )\
647  AS2( xor ecx, B(i) )/* A^B */\
648  AS2( and eax, ecx )\
649  AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
650  AS2( mov AS_REG_7d, ebx )\
651  AS2( ror ebx, 2 )\
652  AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
653  AS2( add edx, D(i) )\
654  AS2( mov D(i), edx )\
655  AS2( ror AS_REG_7d, 22 )\
656  AS2( xor AS_REG_7d, ebx )\
657  AS2( ror ebx, 11 )\
658  AS2( xor AS_REG_7d, ebx )\
659  AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
660  AS2( mov H(i), eax )\
661 
662 // Unroll the use of CRYPTOPP_BOOL_X64 in assembler math. The GAS assembler on X32 (version 2.25)
663 // complains "Error: invalid operands (*ABS* and *UND* sections) for `*` and `-`"
664 #if CRYPTOPP_BOOL_X64
665 #define SWAP_COPY(i) \
666  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
667  AS1( bswap WORD_REG(bx))\
668  AS2( mov [Wt(i*2+1)], WORD_REG(bx))
669 #else // X86 and X32
670 #define SWAP_COPY(i) \
671  AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
672  AS1( bswap WORD_REG(bx))\
673  AS2( mov [Wt(i)], WORD_REG(bx))
674 #endif
675 
676 #if defined(__GNUC__)
677  #if CRYPTOPP_BOOL_X64
679  #endif
680  __asm__ __volatile__
681  (
682  #if CRYPTOPP_BOOL_X64
683  "lea %4, %%r8;"
684  #endif
685  INTEL_NOPREFIX
686 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
687  ALIGN 8
688  X86_SHA256_HashBlocks PROC FRAME
689  rex_push_reg rsi
690  push_reg rdi
691  push_reg rbx
692  push_reg rbp
693  alloc_stack(LOCALS_SIZE+8)
694  .endprolog
695  mov rdi, r8
696  lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
697 #endif
698 
699 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
700  #ifndef __GNUC__
701  AS2( mov edi, [len])
702  AS2( lea WORD_REG(si), [SHA256_K+48*4])
703  #endif
704  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
705  AS_PUSH_IF86(bx)
706  #endif
707 
708  AS_PUSH_IF86(bp)
709  AS2( mov ebx, esp)
710  AS2( and esp, -16)
711  AS2( sub WORD_REG(sp), LOCALS_SIZE)
712  AS_PUSH_IF86(bx)
713 #endif
714  AS2( mov STATE_SAVE, WORD_REG(cx))
715  AS2( mov DATA_SAVE, WORD_REG(dx))
716  AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
717  AS2( mov DATA_END, WORD_REG(ax))
718  AS2( mov K_END, WORD_REG(si))
719 
720 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
721 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
722  AS2( test edi, 1)
723  ASJ( jnz, 2, f)
724  AS1( dec DWORD PTR K_END)
725 #endif
726  AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
727  AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
728 #endif
729 
730 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
731 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
732  ASJ( jmp, 0, f)
733 #endif
734  ASL(2) // non-SSE2
735  AS2( mov esi, ecx)
736  AS2( lea edi, A(0))
737  AS2( mov ecx, 8)
738 ATT_NOPREFIX
739  AS1( rep movsd)
740 INTEL_NOPREFIX
741  AS2( mov esi, K_END)
742  ASJ( jmp, 3, f)
743 #endif
744 
745 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
746  ASL(0)
747  AS2( movdqa E(0), xmm1)
748  AS2( movdqa A(0), xmm0)
749 #endif
750 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
751  ASL(3)
752 #endif
753  AS2( sub WORD_REG(si), 48*4)
754  SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
755  SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
756 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
757  SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
758  SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
759 #endif
760  AS2( mov edi, E(0)) // E
761  AS2( mov eax, B(0)) // B
762  AS2( xor eax, C(0)) // B^C
763  AS2( mov ecx, A(0)) // A
764 
765  ROUND(0, 0, eax, ecx, edi, edx)
766  ROUND(1, 0, ecx, eax, edx, edi)
767  ROUND(2, 0, eax, ecx, edi, edx)
768  ROUND(3, 0, ecx, eax, edx, edi)
769  ROUND(4, 0, eax, ecx, edi, edx)
770  ROUND(5, 0, ecx, eax, edx, edi)
771  ROUND(6, 0, eax, ecx, edi, edx)
772  ROUND(7, 0, ecx, eax, edx, edi)
773  ROUND(8, 0, eax, ecx, edi, edx)
774  ROUND(9, 0, ecx, eax, edx, edi)
775  ROUND(10, 0, eax, ecx, edi, edx)
776  ROUND(11, 0, ecx, eax, edx, edi)
777  ROUND(12, 0, eax, ecx, edi, edx)
778  ROUND(13, 0, ecx, eax, edx, edi)
779  ROUND(14, 0, eax, ecx, edi, edx)
780  ROUND(15, 0, ecx, eax, edx, edi)
781 
782  ASL(1)
783  AS2(add WORD_REG(si), 4*16)
784  ROUND(0, 1, eax, ecx, edi, edx)
785  ROUND(1, 1, ecx, eax, edx, edi)
786  ROUND(2, 1, eax, ecx, edi, edx)
787  ROUND(3, 1, ecx, eax, edx, edi)
788  ROUND(4, 1, eax, ecx, edi, edx)
789  ROUND(5, 1, ecx, eax, edx, edi)
790  ROUND(6, 1, eax, ecx, edi, edx)
791  ROUND(7, 1, ecx, eax, edx, edi)
792  ROUND(8, 1, eax, ecx, edi, edx)
793  ROUND(9, 1, ecx, eax, edx, edi)
794  ROUND(10, 1, eax, ecx, edi, edx)
795  ROUND(11, 1, ecx, eax, edx, edi)
796  ROUND(12, 1, eax, ecx, edi, edx)
797  ROUND(13, 1, ecx, eax, edx, edi)
798  ROUND(14, 1, eax, ecx, edi, edx)
799  ROUND(15, 1, ecx, eax, edx, edi)
800  AS2( cmp WORD_REG(si), K_END)
801  ATT_NOPREFIX
802  ASJ( jb, 1, b)
803  INTEL_NOPREFIX
804 
805  AS2( mov WORD_REG(dx), DATA_SAVE)
806  AS2( add WORD_REG(dx), 64)
807  AS2( mov AS_REG_7, STATE_SAVE)
808  AS2( mov DATA_SAVE, WORD_REG(dx))
809 
810 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
811 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
812  AS2( test DWORD PTR K_END, 1)
813  ASJ( jz, 4, f)
814 #endif
815  AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16])
816  AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16])
817  AS2( paddd xmm1, E(0))
818  AS2( paddd xmm0, A(0))
819  AS2( movdqa [AS_REG_7+1*16], xmm1)
820  AS2( movdqa [AS_REG_7+0*16], xmm0)
821  AS2( cmp WORD_REG(dx), DATA_END)
822  ATT_NOPREFIX
823  ASJ( jb, 0, b)
824  INTEL_NOPREFIX
825 #endif
826 
827 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
828 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
829  ASJ( jmp, 5, f)
830  ASL(4) // non-SSE2
831 #endif
832  AS2( add [AS_REG_7+0*4], ecx) // A
833  AS2( add [AS_REG_7+4*4], edi) // E
834  AS2( mov eax, B(0))
835  AS2( mov ebx, C(0))
836  AS2( mov ecx, D(0))
837  AS2( add [AS_REG_7+1*4], eax)
838  AS2( add [AS_REG_7+2*4], ebx)
839  AS2( add [AS_REG_7+3*4], ecx)
840  AS2( mov eax, F(0))
841  AS2( mov ebx, G(0))
842  AS2( mov ecx, H(0))
843  AS2( add [AS_REG_7+5*4], eax)
844  AS2( add [AS_REG_7+6*4], ebx)
845  AS2( add [AS_REG_7+7*4], ecx)
846  AS2( mov ecx, AS_REG_7d)
847  AS2( cmp WORD_REG(dx), DATA_END)
848  ASJ( jb, 2, b)
849 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
850  ASL(5)
851 #endif
852 #endif
853 
854  AS_POP_IF86(sp)
855  AS_POP_IF86(bp)
856  #if !defined(_MSC_VER) || (_MSC_VER < 1400)
857  AS_POP_IF86(bx)
858  #endif
859 
860 #ifdef CRYPTOPP_GENERATE_X64_MASM
861  add rsp, LOCALS_SIZE+8
862  pop rbp
863  pop rbx
864  pop rdi
865  pop rsi
866  ret
867  X86_SHA256_HashBlocks ENDP
868 #endif
869 
870 #ifdef __GNUC__
871  ATT_PREFIX
872  :
873  : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
874  #if CRYPTOPP_BOOL_X64
875  , "m" (workspace[0])
876  #endif
877  : "memory", "cc", "%eax"
878  #if CRYPTOPP_BOOL_X64
879  , "%rbx", "%r8", "%r10"
880  #endif
881  );
882 #endif
883 }
884 
885 #endif // (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM))
886 
887 #ifndef CRYPTOPP_GENERATE_X64_MASM
888 
889 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
890 extern "C" {
891 void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len);
892 }
893 #endif
894 
895 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
896 static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length);
897 #elif CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
898 static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length);
899 #endif
900 
901 #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_SHA_ASM)
902 
903 pfnSHAHashBlocks InitializeSHA256HashBlocks()
904 {
905 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
906  if (HasSHA())
907  return &SHA256_SSE_SHA_HashBlocks;
908  else
909 #endif
910 #if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
911  if (HasSHA2())
912  return &SHA256_ARM_SHA_HashBlocks;
913  else
914 #endif
915 
916  return &X86_SHA256_HashBlocks;
917 }
918 
919 size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
920 {
921  static const pfnSHAHashBlocks s_pfn = InitializeSHA256HashBlocks();
922  s_pfn(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2());
923  return length % BLOCKSIZE;
924 }
925 
926 size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
927 {
928  static const pfnSHAHashBlocks s_pfn = InitializeSHA256HashBlocks();
929  s_pfn(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2());
930  return length % BLOCKSIZE;
931 }
932 #endif
933 
934 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
935 
936 #define Ch(x,y,z) (z^(x&(y^z)))
937 #define Maj(x,y,z) (y^((x^y)&(y^z)))
938 
939 #define a(i) T[(0-i)&7]
940 #define b(i) T[(1-i)&7]
941 #define c(i) T[(2-i)&7]
942 #define d(i) T[(3-i)&7]
943 #define e(i) T[(4-i)&7]
944 #define f(i) T[(5-i)&7]
945 #define g(i) T[(6-i)&7]
946 #define h(i) T[(7-i)&7]
947 
948 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
949  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
950 
951 // for SHA256
952 #define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
953 #define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
954 #define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
955 #define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
956 
957 #if defined(__OPTIMIZE_SIZE__)
958 // Smaller but slower
959 void SHA256_CXX_Transform(word32 *state, const word32 *data)
960 {
961  word32 W[32], T[20];
962  unsigned int i = 0, j = 0;
963  word32 *t = T+8;
964 
965  memcpy(t, state, 8*4);
966  word32 e = t[4], a = t[0];
967 
968  do
969  {
970  word32 w = data[j];
971  W[j] = w;
972  w += SHA256_K[j];
973  w += t[7];
974  w += S1(e);
975  w += Ch(e, t[5], t[6]);
976  e = t[3] + w;
977  t[3] = t[3+8] = e;
978  w += S0(t[0]);
979  a = w + Maj(a, t[1], t[2]);
980  t[-1] = t[7] = a;
981  --t;
982  ++j;
983  if (j%8 == 0)
984  t += 8;
985  } while (j<16);
986 
987  do
988  {
989  i = j&0xf;
990  word32 w = s1(W[i+16-2]) + s0(W[i+16-15]) + W[i] + W[i+16-7];
991  W[i+16] = W[i] = w;
992  w += SHA256_K[j];
993  w += t[7];
994  w += S1(e);
995  w += Ch(e, t[5], t[6]);
996  e = t[3] + w;
997  t[3] = t[3+8] = e;
998  w += S0(t[0]);
999  a = w + Maj(a, t[1], t[2]);
1000  t[-1] = t[7] = a;
1001 
1002  w = s1(W[(i+1)+16-2]) + s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7];
1003  W[(i+1)+16] = W[(i+1)] = w;
1004  w += SHA256_K[j+1];
1005  w += (t-1)[7];
1006  w += S1(e);
1007  w += Ch(e, (t-1)[5], (t-1)[6]);
1008  e = (t-1)[3] + w;
1009  (t-1)[3] = (t-1)[3+8] = e;
1010  w += S0((t-1)[0]);
1011  a = w + Maj(a, (t-1)[1], (t-1)[2]);
1012  (t-1)[-1] = (t-1)[7] = a;
1013 
1014  t-=2;
1015  j+=2;
1016  if (j%8 == 0)
1017  t += 8;
1018  } while (j<64);
1019 
1020  state[0] += a;
1021  state[1] += t[1];
1022  state[2] += t[2];
1023  state[3] += t[3];
1024  state[4] += e;
1025  state[5] += t[5];
1026  state[6] += t[6];
1027  state[7] += t[7];
1028 }
1029 #else
1030 // Bigger but faster
1031 void SHA256_CXX_Transform(word32 *state, const word32 *data)
1032 {
1033  word32 W[16], T[8];
1034  /* Copy context->state[] to working vars */
1035  memcpy(T, state, sizeof(T));
1036  /* 64 operations, partially loop unrolled */
1037  for (unsigned int j=0; j<64; j+=16)
1038  {
1039  R( 0); R( 1); R( 2); R( 3);
1040  R( 4); R( 5); R( 6); R( 7);
1041  R( 8); R( 9); R(10); R(11);
1042  R(12); R(13); R(14); R(15);
1043  }
1044  /* Add the working vars back into context.state[] */
1045  state[0] += a(0);
1046  state[1] += b(0);
1047  state[2] += c(0);
1048  state[3] += d(0);
1049  state[4] += e(0);
1050  state[5] += f(0);
1051  state[6] += g(0);
1052  state[7] += h(0);
1053 }
1054 #endif // __OPTIMIZE_SIZE__
1055 
1056 #undef S0
1057 #undef S1
1058 #undef s0
1059 #undef s1
1060 #undef R
1061 
1062 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
1063 static void SHA256_SSE2_Transform(word32 *state, const word32 *data)
1064 {
1065  // this byte reverse is a waste of time, but this function is only called by MDC
1066  word32 W[16];
1067  ByteReverse(W, data, SHA256::BLOCKSIZE);
1068  X86_SHA256_HashBlocks(state, W, SHA256::BLOCKSIZE - !HasSSE2());
1069 }
1070 #endif // CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
1071 
1072 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
1073 static void SHA256_SSE_SHA_Transform(word32 *state, const word32 *data)
1074 {
1075  return SHA256_SSE_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE);
1076 }
1077 #endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
1078 
1079 #if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
1080 static void SHA256_ARM_SHA_Transform(word32 *state, const word32 *data)
1081 {
1082  return SHA256_ARM_SHA_HashBlocks(state, data, SHA256::BLOCKSIZE);
1083 }
1084 #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
1085 
1086 ///////////////////////////////////
1087 // start of Walton/Gulley's code //
1088 ///////////////////////////////////
1089 
1090 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
1091 // Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
1092 static void CRYPTOPP_FASTCALL SHA256_SSE_SHA_HashBlocks(word32 *state, const word32 *data, size_t length)
1093 {
1094  CRYPTOPP_ASSERT(state); CRYPTOPP_ASSERT(data);
1095  CRYPTOPP_ASSERT(length % SHA256::BLOCKSIZE == 0);
1096 
1097  __m128i STATE0, STATE1;
1098  __m128i MSG, TMP, MASK;
1099  __m128i TMSG0, TMSG1, TMSG2, TMSG3;
1100  __m128i ABEF_SAVE, CDGH_SAVE;
1101 
1102  // Load initial values
1103  TMP = _mm_loadu_si128((__m128i*) &state[0]);
1104  STATE1 = _mm_loadu_si128((__m128i*) &state[4]);
1105  MASK = _mm_set_epi64x(W64LIT(0x0c0d0e0f08090a0b), W64LIT(0x0405060700010203));
1106 
1107  TMP = _mm_shuffle_epi32(TMP, 0xB1); // CDAB
1108  STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); // EFGH
1109  STATE0 = _mm_alignr_epi8(TMP, STATE1, 8); // ABEF
1110  STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); // CDGH
1111 
1112  while (length >= SHA256::BLOCKSIZE)
1113  {
1114  // Save current hash
1115  ABEF_SAVE = STATE0;
1116  CDGH_SAVE = STATE1;
1117 
1118  // Rounds 0-3
1119  MSG = _mm_loadu_si128((__m128i*) data+0);
1120  TMSG0 = _mm_shuffle_epi8(MSG, MASK);
1121  MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0xE9B5DBA5B5C0FBCF), W64LIT(0x71374491428A2F98)));
1122  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1123  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1124  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1125 
1126  // Rounds 4-7
1127  TMSG1 = _mm_loadu_si128((__m128i*) (data+4));
1128  TMSG1 = _mm_shuffle_epi8(TMSG1, MASK);
1129  MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0xAB1C5ED5923F82A4), W64LIT(0x59F111F13956C25B)));
1130  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1131  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1132  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1133  TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
1134 
1135  // Rounds 8-11
1136  TMSG2 = _mm_loadu_si128((__m128i*) (data+8));
1137  TMSG2 = _mm_shuffle_epi8(TMSG2, MASK);
1138  MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x550C7DC3243185BE), W64LIT(0x12835B01D807AA98)));
1139  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1140  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1141  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1142  TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
1143 
1144  // Rounds 12-15
1145  TMSG3 = _mm_loadu_si128((__m128i*) (data+12));
1146  TMSG3 = _mm_shuffle_epi8(TMSG3, MASK);
1147  MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC19BF1749BDC06A7), W64LIT(0x80DEB1FE72BE5D74)));
1148  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1149  TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
1150  TMSG0 = _mm_add_epi32(TMSG0, TMP);
1151  TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
1152  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1153  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1154  TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
1155 
1156  // Rounds 16-19
1157  MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0x240CA1CC0FC19DC6), W64LIT(0xEFBE4786E49B69C1)));
1158  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1159  TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
1160  TMSG1 = _mm_add_epi32(TMSG1, TMP);
1161  TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
1162  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1163  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1164  TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
1165 
1166  // Rounds 20-23
1167  MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0x76F988DA5CB0A9DC), W64LIT(0x4A7484AA2DE92C6F)));
1168  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1169  TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
1170  TMSG2 = _mm_add_epi32(TMSG2, TMP);
1171  TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
1172  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1173  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1174  TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
1175 
1176  // Rounds 24-27
1177  MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0xBF597FC7B00327C8), W64LIT(0xA831C66D983E5152)));
1178  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1179  TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
1180  TMSG3 = _mm_add_epi32(TMSG3, TMP);
1181  TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
1182  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1183  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1184  TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
1185 
1186  // Rounds 28-31
1187  MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0x1429296706CA6351), W64LIT(0xD5A79147C6E00BF3)));
1188  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1189  TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
1190  TMSG0 = _mm_add_epi32(TMSG0, TMP);
1191  TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
1192  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1193  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1194  TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
1195 
1196  // Rounds 32-35
1197  MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0x53380D134D2C6DFC), W64LIT(0x2E1B213827B70A85)));
1198  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1199  TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
1200  TMSG1 = _mm_add_epi32(TMSG1, TMP);
1201  TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
1202  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1203  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1204  TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
1205 
1206  // Rounds 36-39
1207  MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0x92722C8581C2C92E), W64LIT(0x766A0ABB650A7354)));
1208  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1209  TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
1210  TMSG2 = _mm_add_epi32(TMSG2, TMP);
1211  TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
1212  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1213  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1214  TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
1215 
1216  // Rounds 40-43
1217  MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0xC76C51A3C24B8B70), W64LIT(0xA81A664BA2BFE8A1)));
1218  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1219  TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
1220  TMSG3 = _mm_add_epi32(TMSG3, TMP);
1221  TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
1222  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1223  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1224  TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
1225 
1226  // Rounds 44-47
1227  MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0x106AA070F40E3585), W64LIT(0xD6990624D192E819)));
1228  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1229  TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
1230  TMSG0 = _mm_add_epi32(TMSG0, TMP);
1231  TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
1232  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1233  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1234  TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
1235 
1236  // Rounds 48-51
1237  MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0x34B0BCB52748774C), W64LIT(0x1E376C0819A4C116)));
1238  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1239  TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
1240  TMSG1 = _mm_add_epi32(TMSG1, TMP);
1241  TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
1242  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1243  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1244  TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
1245 
1246  // Rounds 52-55
1247  MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0x682E6FF35B9CCA4F), W64LIT(0x4ED8AA4A391C0CB3)));
1248  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1249  TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
1250  TMSG2 = _mm_add_epi32(TMSG2, TMP);
1251  TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
1252  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1253  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1254 
1255  // Rounds 56-59
1256  MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x8CC7020884C87814), W64LIT(0x78A5636F748F82EE)));
1257  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1258  TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
1259  TMSG3 = _mm_add_epi32(TMSG3, TMP);
1260  TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
1261  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1262  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1263 
1264  // Rounds 60-63
1265  MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC67178F2BEF9A3F7), W64LIT(0xA4506CEB90BEFFFA)));
1266  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
1267  MSG = _mm_shuffle_epi32(MSG, 0x0E);
1268  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
1269 
1270  // Add values back to state
1271  STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
1272  STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
1273 
1274  data += SHA256::BLOCKSIZE/sizeof(word32);
1275  length -= SHA256::BLOCKSIZE;
1276  }
1277 
1278  TMP = _mm_shuffle_epi32(STATE0, 0x1B); // FEBA
1279  STATE1 = _mm_shuffle_epi32(STATE1, 0xB1); // DCHG
1280  STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0); // DCBA
1281  STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF
1282 
1283  // Save state
1284  _mm_storeu_si128((__m128i*) &state[0], STATE0);
1285  _mm_storeu_si128((__m128i*) &state[4], STATE1);
1286 }
1287 #endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
1288 
1289 /////////////////////////////////
1290 // end of Walton/Gulley's code //
1291 /////////////////////////////////
1292 
1293 /////////////////////////////////////////////////////////
1294 // start of Walton/Schneiders/O'Rourke/Hovsmith's code //
1295 /////////////////////////////////////////////////////////
1296 
1297 #if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
1298 static void CRYPTOPP_FASTCALL SHA256_ARM_SHA_HashBlocks(word32 *state, const word32 *data, size_t length)
1299 {
1300  uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
1301  uint32x4_t MSG0, MSG1, MSG2, MSG3;
1302  uint32x4_t TMP0, TMP1, TMP2;
1303 
1304  // Load initial values
1305  STATE0 = vld1q_u32(&state[0]);
1306  STATE1 = vld1q_u32(&state[4]);
1307 
1308  while (length >= SHA256::BLOCKSIZE)
1309  {
1310  // Save current hash
1311  ABEF_SAVE = STATE0;
1312  CDGH_SAVE = STATE1;
1313 
1314  // Load message
1315  MSG0 = vld1q_u32(data + 0);
1316  MSG1 = vld1q_u32(data + 4);
1317  MSG2 = vld1q_u32(data + 8);
1318  MSG3 = vld1q_u32(data + 12);
1319 
1320  TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x00]));
1321 
1322  // Rounds 0-3
1323  MSG0 = vsha256su0q_u32(MSG0, MSG1);
1324  TMP2 = STATE0;
1325  TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x04]));
1326  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1327  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
1328  MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);;
1329 
1330  // Rounds 4-7
1331  MSG1 = vsha256su0q_u32(MSG1, MSG2);
1332  TMP2 = STATE0;
1333  TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x08]));
1334  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1335  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
1336  MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);;
1337 
1338  // Rounds 8-11
1339  MSG2 = vsha256su0q_u32(MSG2, MSG3);
1340  TMP2 = STATE0;
1341  TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x0c]));
1342  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1343  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
1344  MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);;
1345 
1346  // Rounds 12-15
1347  MSG3 = vsha256su0q_u32(MSG3, MSG0);
1348  TMP2 = STATE0;
1349  TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x10]));
1350  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1351  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
1352  MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);;
1353 
1354  // Rounds 16-19
1355  MSG0 = vsha256su0q_u32(MSG0, MSG1);
1356  TMP2 = STATE0;
1357  TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x14]));
1358  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1359  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
1360  MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);;
1361 
1362  // Rounds 20-23
1363  MSG1 = vsha256su0q_u32(MSG1, MSG2);
1364  TMP2 = STATE0;
1365  TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x18]));
1366  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1367  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
1368  MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);;
1369 
1370  // Rounds 24-27
1371  MSG2 = vsha256su0q_u32(MSG2, MSG3);
1372  TMP2 = STATE0;
1373  TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x1c]));
1374  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1375  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
1376  MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);;
1377 
1378  // Rounds 28-31
1379  MSG3 = vsha256su0q_u32(MSG3, MSG0);
1380  TMP2 = STATE0;
1381  TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x20]));
1382  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1383  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
1384  MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);;
1385 
1386  // Rounds 32-35
1387  MSG0 = vsha256su0q_u32(MSG0, MSG1);
1388  TMP2 = STATE0;
1389  TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x24]));
1390  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1391  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
1392  MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);;
1393 
1394  // Rounds 36-39
1395  MSG1 = vsha256su0q_u32(MSG1, MSG2);
1396  TMP2 = STATE0;
1397  TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x28]));
1398  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1399  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
1400  MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);;
1401 
1402  // Rounds 40-43
1403  MSG2 = vsha256su0q_u32(MSG2, MSG3);
1404  TMP2 = STATE0;
1405  TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x2c]));
1406  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1407  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
1408  MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);;
1409 
1410  // Rounds 44-47
1411  MSG3 = vsha256su0q_u32(MSG3, MSG0);
1412  TMP2 = STATE0;
1413  TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x30]));
1414  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1415  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
1416  MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);;
1417 
1418  // Rounds 48-51
1419  TMP2 = STATE0;
1420  TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x34]));
1421  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1422  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);;
1423 
1424  // Rounds 52-55
1425  TMP2 = STATE0;
1426  TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x38]));
1427  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1428  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);;
1429 
1430  // Rounds 56-59
1431  TMP2 = STATE0;
1432  TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x3c]));
1433  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
1434  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);;
1435 
1436  // Rounds 60-63
1437  TMP2 = STATE0;
1438  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
1439  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);;
1440 
1441  // Add back to state
1442  STATE0 = vaddq_u32(STATE0, ABEF_SAVE);
1443  STATE1 = vaddq_u32(STATE1, CDGH_SAVE);
1444 
1445  data += SHA256::BLOCKSIZE/sizeof(word32);
1446  length -= SHA256::BLOCKSIZE;
1447  }
1448 
1449  // Save state
1450  vst1q_u32(&state[0], STATE0);
1451  vst1q_u32(&state[4], STATE1);
1452 }
1453 #endif
1454 
1455 ///////////////////////////////////////////////////////
1456 // end of Walton/Schneiders/O'Rourke/Hovsmith's code //
1457 ///////////////////////////////////////////////////////
1458 
1459 pfnSHATransform InitializeSHA256Transform()
1460 {
1461 #if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
1462  if (HasSHA())
1463  return &SHA256_SSE_SHA_Transform;
1464  else
1465 #endif
1466 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
1467  if (HasSSE2())
1468  return &SHA256_SSE2_Transform;
1469  else
1470 #endif
1471 #if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
1472  if (HasSHA2())
1473  return &SHA256_ARM_SHA_Transform;
1474  else
1475 #endif
1476 
1477  return &SHA256_CXX_Transform;
1478 }
1479 
1480 void SHA256::Transform(word32 *state, const word32 *data)
1481 {
1482  static const pfnSHATransform s_pfn = InitializeSHA256Transform();
1483  s_pfn(state, data);
1484 }
1485 
1486 // *************************************************************
1487 
1488 void SHA384::InitState(HashWordType *state)
1489 {
1490  static const word64 s[8] = {
1491  W64LIT(0xcbbb9d5dc1059ed8), W64LIT(0x629a292a367cd507),
1492  W64LIT(0x9159015a3070dd17), W64LIT(0x152fecd8f70e5939),
1493  W64LIT(0x67332667ffc00b31), W64LIT(0x8eb44a8768581511),
1494  W64LIT(0xdb0c2e0d64f98fa7), W64LIT(0x47b5481dbefa4fa4)};
1495  memcpy(state, s, sizeof(s));
1496 }
1497 
1498 void SHA512::InitState(HashWordType *state)
1499 {
1500  static const word64 s[8] = {
1501  W64LIT(0x6a09e667f3bcc908), W64LIT(0xbb67ae8584caa73b),
1502  W64LIT(0x3c6ef372fe94f82b), W64LIT(0xa54ff53a5f1d36f1),
1503  W64LIT(0x510e527fade682d1), W64LIT(0x9b05688c2b3e6c1f),
1504  W64LIT(0x1f83d9abfb41bd6b), W64LIT(0x5be0cd19137e2179)};
1505  memcpy(state, s, sizeof(s));
1506 }
1507 
1508 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
1509 CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = {
1510 #else
1511 CRYPTOPP_ALIGN_DATA(16) static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = {
1512 #endif
1513  W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
1514  W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
1515  W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
1516  W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
1517  W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
1518  W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
1519  W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
1520  W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
1521  W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
1522  W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
1523  W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
1524  W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
1525  W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
1526  W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
1527  W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
1528  W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
1529  W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
1530  W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
1531  W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
1532  W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
1533  W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
1534  W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
1535  W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
1536  W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
1537  W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
1538  W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
1539  W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
1540  W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
1541  W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
1542  W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
1543  W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
1544  W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
1545  W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
1546  W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
1547  W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
1548  W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
1549  W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
1550  W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
1551  W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
1552  W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
1553 };
1554 
1555 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
1556 // put assembly version in separate function, otherwise MSVC 2005 SP1 doesn't generate correct code for the non-assembly version
1557 CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state, const word64 *data)
1558 {
1559 #ifdef __GNUC__
1560  __asm__ __volatile__
1561  (
1562  INTEL_NOPREFIX
1563  AS_PUSH_IF86( bx)
1564  AS2( mov ebx, eax)
1565 #else
1566  AS1( push ebx)
1567  AS1( push esi)
1568  AS1( push edi)
1569  AS2( lea ebx, SHA512_K)
1570 #endif
1571 
1572  AS2( mov eax, esp)
1573  AS2( and esp, 0xfffffff0)
1574  AS2( sub esp, 27*16) // 17*16 for expanded data, 20*8 for state
1575  AS_PUSH_IF86( ax)
1576  AS2( xor eax, eax)
1577 
1578 #if CRYPTOPP_BOOL_X32
1579  AS2( lea edi, [esp+8+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
1580  AS2( lea esi, [esp+8+20*8+8]) // 16-byte alignment, then add 8
1581 #else
1582  AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
1583  AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8
1584 #endif
1585 
1586  AS2( movdqa xmm0, [ecx+0*16])
1587  AS2( movdq2q mm4, xmm0)
1588  AS2( movdqa [edi+0*16], xmm0)
1589  AS2( movdqa xmm0, [ecx+1*16])
1590  AS2( movdqa [edi+1*16], xmm0)
1591  AS2( movdqa xmm0, [ecx+2*16])
1592  AS2( movdq2q mm5, xmm0)
1593  AS2( movdqa [edi+2*16], xmm0)
1594  AS2( movdqa xmm0, [ecx+3*16])
1595  AS2( movdqa [edi+3*16], xmm0)
1596  ASJ( jmp, 0, f)
1597 
1598 #define SSE2_S0_S1(r, a, b, c) \
1599  AS2( movq mm6, r)\
1600  AS2( psrlq r, a)\
1601  AS2( movq mm7, r)\
1602  AS2( psllq mm6, 64-c)\
1603  AS2( pxor mm7, mm6)\
1604  AS2( psrlq r, b-a)\
1605  AS2( pxor mm7, r)\
1606  AS2( psllq mm6, c-b)\
1607  AS2( pxor mm7, mm6)\
1608  AS2( psrlq r, c-b)\
1609  AS2( pxor r, mm7)\
1610  AS2( psllq mm6, b-a)\
1611  AS2( pxor r, mm6)
1612 
1613 #define SSE2_s0(r, a, b, c) \
1614  AS2( movdqa xmm6, r)\
1615  AS2( psrlq r, a)\
1616  AS2( movdqa xmm7, r)\
1617  AS2( psllq xmm6, 64-c)\
1618  AS2( pxor xmm7, xmm6)\
1619  AS2( psrlq r, b-a)\
1620  AS2( pxor xmm7, r)\
1621  AS2( psrlq r, c-b)\
1622  AS2( pxor r, xmm7)\
1623  AS2( psllq xmm6, c-a)\
1624  AS2( pxor r, xmm6)
1625 
1626 #define SSE2_s1(r, a, b, c) \
1627  AS2( movdqa xmm6, r)\
1628  AS2( psrlq r, a)\
1629  AS2( movdqa xmm7, r)\
1630  AS2( psllq xmm6, 64-c)\
1631  AS2( pxor xmm7, xmm6)\
1632  AS2( psrlq r, b-a)\
1633  AS2( pxor xmm7, r)\
1634  AS2( psllq xmm6, c-b)\
1635  AS2( pxor xmm7, xmm6)\
1636  AS2( psrlq r, c-b)\
1637  AS2( pxor r, xmm7)
1638 
1639  ASL(SHA512_Round)
1640  // k + w is in mm0, a is in mm4, e is in mm5
1641  AS2( paddq mm0, [edi+7*8]) // h
1642  AS2( movq mm2, [edi+5*8]) // f
1643  AS2( movq mm3, [edi+6*8]) // g
1644  AS2( pxor mm2, mm3)
1645  AS2( pand mm2, mm5)
1646  SSE2_S0_S1(mm5,14,18,41)
1647  AS2( pxor mm2, mm3)
1648  AS2( paddq mm0, mm2) // h += Ch(e,f,g)
1649  AS2( paddq mm5, mm0) // h += S1(e)
1650  AS2( movq mm2, [edi+1*8]) // b
1651  AS2( movq mm1, mm2)
1652  AS2( por mm2, mm4)
1653  AS2( pand mm2, [edi+2*8]) // c
1654  AS2( pand mm1, mm4)
1655  AS2( por mm1, mm2)
1656  AS2( paddq mm1, mm5) // temp = h + Maj(a,b,c)
1657  AS2( paddq mm5, [edi+3*8]) // e = d + h
1658  AS2( movq [edi+3*8], mm5)
1659  AS2( movq [edi+11*8], mm5)
1660  SSE2_S0_S1(mm4,28,34,39) // S0(a)
1661  AS2( paddq mm4, mm1) // a = temp + S0(a)
1662  AS2( movq [edi-8], mm4)
1663  AS2( movq [edi+7*8], mm4)
1664  AS1( ret)
1665 
1666  // first 16 rounds
1667  ASL(0)
1668  AS2( movq mm0, [edx+eax*8])
1669  AS2( movq [esi+eax*8], mm0)
1670  AS2( movq [esi+eax*8+16*8], mm0)
1671  AS2( paddq mm0, [ebx+eax*8])
1672  ASC( call, SHA512_Round)
1673  AS1( inc eax)
1674  AS2( sub edi, 8)
1675  AS2( test eax, 7)
1676  ASJ( jnz, 0, b)
1677  AS2( add edi, 8*8)
1678  AS2( cmp eax, 16)
1679  ASJ( jne, 0, b)
1680 
1681  // rest of the rounds
1682  AS2( movdqu xmm0, [esi+(16-2)*8])
1683  ASL(1)
1684  // data expansion, W[i-2] already in xmm0
1685  AS2( movdqu xmm3, [esi])
1686  AS2( paddq xmm3, [esi+(16-7)*8])
1687  AS2( movdqa xmm2, [esi+(16-15)*8])
1688  SSE2_s1(xmm0, 6, 19, 61)
1689  AS2( paddq xmm0, xmm3)
1690  SSE2_s0(xmm2, 1, 7, 8)
1691  AS2( paddq xmm0, xmm2)
1692  AS2( movdq2q mm0, xmm0)
1693  AS2( movhlps xmm1, xmm0)
1694  AS2( paddq mm0, [ebx+eax*8])
1695  AS2( movlps [esi], xmm0)
1696  AS2( movlps [esi+8], xmm1)
1697  AS2( movlps [esi+8*16], xmm0)
1698  AS2( movlps [esi+8*17], xmm1)
1699  // 2 rounds
1700  ASC( call, SHA512_Round)
1701  AS2( sub edi, 8)
1702  AS2( movdq2q mm0, xmm1)
1703  AS2( paddq mm0, [ebx+eax*8+8])
1704  ASC( call, SHA512_Round)
1705  // update indices and loop
1706  AS2( add esi, 16)
1707  AS2( add eax, 2)
1708  AS2( sub edi, 8)
1709  AS2( test eax, 7)
1710  ASJ( jnz, 1, b)
1711  // do housekeeping every 8 rounds
1712  AS2( mov esi, 0xf)
1713  AS2( and esi, eax)
1714 #if CRYPTOPP_BOOL_X32
1715  AS2( lea esi, [esp+8+20*8+8+esi*8])
1716 #else
1717  AS2( lea esi, [esp+4+20*8+8+esi*8])
1718 #endif
1719  AS2( add edi, 8*8)
1720  AS2( cmp eax, 80)
1721  ASJ( jne, 1, b)
1722 
1723 #define SSE2_CombineState(i) \
1724  AS2( movdqa xmm0, [edi+i*16])\
1725  AS2( paddq xmm0, [ecx+i*16])\
1726  AS2( movdqa [ecx+i*16], xmm0)
1727 
1728  SSE2_CombineState(0)
1729  SSE2_CombineState(1)
1730  SSE2_CombineState(2)
1731  SSE2_CombineState(3)
1732 
1733  AS_POP_IF86( sp)
1734  AS1( emms)
1735 
1736 #if defined(__GNUC__)
1737  AS_POP_IF86( bx)
1738  ATT_PREFIX
1739  :
1740  : "a" (SHA512_K), "c" (state), "d" (data)
1741  : "%esi", "%edi", "memory", "cc"
1742  );
1743 #else
1744  AS1( pop edi)
1745  AS1( pop esi)
1746  AS1( pop ebx)
1747  AS1( ret)
1748 #endif
1749 }
1750 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
1751 
1752 void SHA512::Transform(word64 *state, const word64 *data)
1753 {
1754  CRYPTOPP_ASSERT(IsAlignedOn(state, GetAlignmentOf<word64>()));
1755  CRYPTOPP_ASSERT(IsAlignedOn(data, GetAlignmentOf<word64>()));
1756 
1757 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
1758  if (HasSSE2())
1759  {
1760  SHA512_SSE2_Transform(state, data);
1761  return;
1762  }
1763 #endif
1764 
1765 #define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39))
1766 #define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41))
1767 #define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7))
1768 #define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
1769 
1770 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+(j?blk2(i):blk0(i));\
1771  d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
1772 
1773  word64 W[16];
1774  word64 T[8];
1775  /* Copy context->state[] to working vars */
1776  memcpy(T, state, sizeof(T));
1777  /* 80 operations, partially loop unrolled */
1778  for (unsigned int j=0; j<80; j+=16)
1779  {
1780  R( 0); R( 1); R( 2); R( 3);
1781  R( 4); R( 5); R( 6); R( 7);
1782  R( 8); R( 9); R(10); R(11);
1783  R(12); R(13); R(14); R(15);
1784  }
1785  /* Add the working vars back into context.state[] */
1786  state[0] += a(0);
1787  state[1] += b(0);
1788  state[2] += c(0);
1789  state[3] += d(0);
1790  state[4] += e(0);
1791  state[5] += f(0);
1792  state[6] += g(0);
1793  state[7] += h(0);
1794 }
1795 
1796 NAMESPACE_END
1797 
1798 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
1799 #endif // #ifndef CRYPTOPP_IMPORTS
bool HasSHA()
Determines SHA availability.
Definition: cpu.h:235
bool NativeByteOrderIs(ByteOrder order)
Determines whether order follows native byte ordering.
Definition: misc.h:1077
Utility functions for the Crypto++ library.
Library configuration file.
unsigned int BlockSize() const
Provides the block size of the hash.
Definition: iterhash.h:116
Classes and functions for secure memory allocations.
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1036
ByteOrder GetByteOrder() const
Provides the byte order of the hash.
Definition: iterhash.h:122
Fixed size stack-based SecBlock with 16-byte alignment.
Definition: secblock.h:845
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:60
Functions for CPU features and intrinsics.
Classes for SHA-1 and SHA-2 family of message digests.
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:180
Crypto++ library namespace.
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:1766