Crypto++  5.6.3
Free C++ class library of cryptographic schemes
sosemanuk.cpp
1 // sosemanuk.cpp - written and placed in the public domain by Wei Dai
2 
3 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sosemanuk.cpp" to generate MASM code
4 
5 #include "pch.h"
6 #include "config.h"
7 
8 #if CRYPTOPP_MSC_VERSION
9 # pragma warning(disable: 4702 4731)
10 #endif
11 
12 #ifndef CRYPTOPP_GENERATE_X64_MASM
13 
14 #include "sosemanuk.h"
15 #include "serpentp.h"
16 #include "secblock.h"
17 #include "misc.h"
18 #include "cpu.h"
19 
20 NAMESPACE_BEGIN(CryptoPP)
21 
22 void SosemanukPolicy::CipherSetKey(const NameValuePairs &params, const byte *userKey, size_t keylen)
23 {
24  CRYPTOPP_UNUSED(params);
25  Serpent_KeySchedule(m_key, 24, userKey, keylen);
26 }
27 
28 void SosemanukPolicy::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length)
29 {
30  CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(iv), CRYPTOPP_UNUSED(length);
31  assert(length==16);
32 
33  word32 a, b, c, d, e;
34 
36  Block::Get(iv)(a)(b)(c)(d);
37 
38  const word32 *k = m_key;
39  unsigned int i=1;
40 
41  do
42  {
43  beforeS0(KX); beforeS0(S0); afterS0(LT);
44  afterS0(KX); afterS0(S1); afterS1(LT);
45  if (i == 3) // after 18th round
46  {
47  m_state[4] = b;
48  m_state[5] = e;
49  m_state[10] = c;
50  m_state[11] = a;
51  }
52  afterS1(KX); afterS1(S2); afterS2(LT);
53  afterS2(KX); afterS2(S3); afterS3(LT);
54  if (i == 2) // after 12th round
55  {
56  m_state[6] = c;
57  m_state[7] = d;
58  m_state[8] = b;
59  m_state[9] = e;
60  }
61  afterS3(KX); afterS3(S4); afterS4(LT);
62  afterS4(KX); afterS4(S5); afterS5(LT);
63  afterS5(KX); afterS5(S6); afterS6(LT);
64  afterS6(KX); afterS6(S7); afterS7(LT);
65 
66  if (i == 3)
67  break;
68 
69  ++i;
70  c = b;
71  b = e;
72  e = d;
73  d = a;
74  a = e;
75  k += 32;
76  }
77  while (true);
78 
79  afterS7(KX);
80 
81  m_state[0] = a;
82  m_state[1] = b;
83  m_state[2] = e;
84  m_state[3] = d;
85 
86 #define XMUX(c, x, y) (x ^ (y & (0 - (c & 1))))
87  m_state[11] += XMUX(m_state[10], m_state[1], m_state[8]);
88  m_state[10] = rotlFixed(m_state[10] * 0x54655307, 7);
89 }
90 
91 extern "C" {
92 word32 s_sosemanukMulTables[512] = {
93 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
94  0x00000000, 0xE19FCF12, 0x6B973724, 0x8A08F836,
95  0xD6876E48, 0x3718A15A, 0xBD10596C, 0x5C8F967E,
96  0x05A7DC90, 0xE4381382, 0x6E30EBB4, 0x8FAF24A6,
97  0xD320B2D8, 0x32BF7DCA, 0xB8B785FC, 0x59284AEE,
98  0x0AE71189, 0xEB78DE9B, 0x617026AD, 0x80EFE9BF,
99  0xDC607FC1, 0x3DFFB0D3, 0xB7F748E5, 0x566887F7,
100  0x0F40CD19, 0xEEDF020B, 0x64D7FA3D, 0x8548352F,
101  0xD9C7A351, 0x38586C43, 0xB2509475, 0x53CF5B67,
102  0x146722BB, 0xF5F8EDA9, 0x7FF0159F, 0x9E6FDA8D,
103  0xC2E04CF3, 0x237F83E1, 0xA9777BD7, 0x48E8B4C5,
104  0x11C0FE2B, 0xF05F3139, 0x7A57C90F, 0x9BC8061D,
105  0xC7479063, 0x26D85F71, 0xACD0A747, 0x4D4F6855,
106  0x1E803332, 0xFF1FFC20, 0x75170416, 0x9488CB04,
107  0xC8075D7A, 0x29989268, 0xA3906A5E, 0x420FA54C,
108  0x1B27EFA2, 0xFAB820B0, 0x70B0D886, 0x912F1794,
109  0xCDA081EA, 0x2C3F4EF8, 0xA637B6CE, 0x47A879DC,
110  0x28CE44DF, 0xC9518BCD, 0x435973FB, 0xA2C6BCE9,
111  0xFE492A97, 0x1FD6E585, 0x95DE1DB3, 0x7441D2A1,
112  0x2D69984F, 0xCCF6575D, 0x46FEAF6B, 0xA7616079,
113  0xFBEEF607, 0x1A713915, 0x9079C123, 0x71E60E31,
114  0x22295556, 0xC3B69A44, 0x49BE6272, 0xA821AD60,
115  0xF4AE3B1E, 0x1531F40C, 0x9F390C3A, 0x7EA6C328,
116  0x278E89C6, 0xC61146D4, 0x4C19BEE2, 0xAD8671F0,
117  0xF109E78E, 0x1096289C, 0x9A9ED0AA, 0x7B011FB8,
118  0x3CA96664, 0xDD36A976, 0x573E5140, 0xB6A19E52,
119  0xEA2E082C, 0x0BB1C73E, 0x81B93F08, 0x6026F01A,
120  0x390EBAF4, 0xD89175E6, 0x52998DD0, 0xB30642C2,
121  0xEF89D4BC, 0x0E161BAE, 0x841EE398, 0x65812C8A,
122  0x364E77ED, 0xD7D1B8FF, 0x5DD940C9, 0xBC468FDB,
123  0xE0C919A5, 0x0156D6B7, 0x8B5E2E81, 0x6AC1E193,
124  0x33E9AB7D, 0xD276646F, 0x587E9C59, 0xB9E1534B,
125  0xE56EC535, 0x04F10A27, 0x8EF9F211, 0x6F663D03,
126  0x50358817, 0xB1AA4705, 0x3BA2BF33, 0xDA3D7021,
127  0x86B2E65F, 0x672D294D, 0xED25D17B, 0x0CBA1E69,
128  0x55925487, 0xB40D9B95, 0x3E0563A3, 0xDF9AACB1,
129  0x83153ACF, 0x628AF5DD, 0xE8820DEB, 0x091DC2F9,
130  0x5AD2999E, 0xBB4D568C, 0x3145AEBA, 0xD0DA61A8,
131  0x8C55F7D6, 0x6DCA38C4, 0xE7C2C0F2, 0x065D0FE0,
132  0x5F75450E, 0xBEEA8A1C, 0x34E2722A, 0xD57DBD38,
133  0x89F22B46, 0x686DE454, 0xE2651C62, 0x03FAD370,
134  0x4452AAAC, 0xA5CD65BE, 0x2FC59D88, 0xCE5A529A,
135  0x92D5C4E4, 0x734A0BF6, 0xF942F3C0, 0x18DD3CD2,
136  0x41F5763C, 0xA06AB92E, 0x2A624118, 0xCBFD8E0A,
137  0x97721874, 0x76EDD766, 0xFCE52F50, 0x1D7AE042,
138  0x4EB5BB25, 0xAF2A7437, 0x25228C01, 0xC4BD4313,
139  0x9832D56D, 0x79AD1A7F, 0xF3A5E249, 0x123A2D5B,
140  0x4B1267B5, 0xAA8DA8A7, 0x20855091, 0xC11A9F83,
141  0x9D9509FD, 0x7C0AC6EF, 0xF6023ED9, 0x179DF1CB,
142  0x78FBCCC8, 0x996403DA, 0x136CFBEC, 0xF2F334FE,
143  0xAE7CA280, 0x4FE36D92, 0xC5EB95A4, 0x24745AB6,
144  0x7D5C1058, 0x9CC3DF4A, 0x16CB277C, 0xF754E86E,
145  0xABDB7E10, 0x4A44B102, 0xC04C4934, 0x21D38626,
146  0x721CDD41, 0x93831253, 0x198BEA65, 0xF8142577,
147  0xA49BB309, 0x45047C1B, 0xCF0C842D, 0x2E934B3F,
148  0x77BB01D1, 0x9624CEC3, 0x1C2C36F5, 0xFDB3F9E7,
149  0xA13C6F99, 0x40A3A08B, 0xCAAB58BD, 0x2B3497AF,
150  0x6C9CEE73, 0x8D032161, 0x070BD957, 0xE6941645,
151  0xBA1B803B, 0x5B844F29, 0xD18CB71F, 0x3013780D,
152  0x693B32E3, 0x88A4FDF1, 0x02AC05C7, 0xE333CAD5,
153  0xBFBC5CAB, 0x5E2393B9, 0xD42B6B8F, 0x35B4A49D,
154  0x667BFFFA, 0x87E430E8, 0x0DECC8DE, 0xEC7307CC,
155  0xB0FC91B2, 0x51635EA0, 0xDB6BA696, 0x3AF46984,
156  0x63DC236A, 0x8243EC78, 0x084B144E, 0xE9D4DB5C,
157  0xB55B4D22, 0x54C48230, 0xDECC7A06, 0x3F53B514,
158 #else
159  0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
160  0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
161  0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
162  0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
163  0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
164  0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
165  0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
166  0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
167  0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
168  0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
169  0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
170  0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
171  0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
172  0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
173  0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
174  0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
175  0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
176  0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
177  0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
178  0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
179  0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
180  0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
181  0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
182  0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
183  0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
184  0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
185  0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
186  0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
187  0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
188  0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
189  0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
190  0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
191  0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
192  0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
193  0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
194  0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
195  0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
196  0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
197  0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
198  0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
199  0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
200  0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
201  0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
202  0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
203  0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
204  0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
205  0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
206  0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
207  0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
208  0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
209  0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
210  0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
211  0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
212  0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
213  0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
214  0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
215  0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
216  0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
217  0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
218  0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
219  0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
220  0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
221  0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
222  0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB,
223 #endif
224  0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
225  0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
226  0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
227  0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
228  0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
229  0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
230  0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
231  0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
232  0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
233  0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
234  0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
235  0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
236  0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
237  0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
238  0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
239  0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
240  0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
241  0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
242  0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
243  0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
244  0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
245  0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
246  0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
247  0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
248  0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
249  0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
250  0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
251  0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
252  0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
253  0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
254  0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
255  0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
256  0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
257  0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
258  0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
259  0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
260  0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
261  0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
262  0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
263  0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
264  0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
265  0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
266  0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
267  0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
268  0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
269  0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
270  0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
271  0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
272  0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
273  0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
274  0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
275  0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
276  0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
277  0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
278  0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
279  0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
280  0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
281  0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
282  0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
283  0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
284  0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
285  0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
286  0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
287  0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
288 };
289 }
290 
291 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
292 unsigned int SosemanukPolicy::GetAlignment() const
293 {
294 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
295 #ifdef __INTEL_COMPILER
296  if (HasSSE2() && !IsP4()) // Intel compiler produces faster code for this algorithm on the P4
297 #else
298  if (HasSSE2())
299 #endif
300  return 16;
301  else
302 #endif
303  return GetAlignmentOf<word32>();
304 }
305 
306 unsigned int SosemanukPolicy::GetOptimalBlockSize() const
307 {
308 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
309 #ifdef __INTEL_COMPILER
310  if (HasSSE2() && !IsP4()) // Intel compiler produces faster code for this algorithm on the P4
311 #else
312  if (HasSSE2())
313 #endif
314  return 4*BYTES_PER_ITERATION;
315  else
316 #endif
317  return BYTES_PER_ITERATION;
318 }
319 #endif
320 
321 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
322 extern "C" {
323 void Sosemanuk_OperateKeystream(size_t iterationCount, const byte *input, byte *output, word32 *state);
324 }
325 #endif
326 
327 void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
328 {
329 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
330 
331 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
332  Sosemanuk_OperateKeystream(iterationCount, input, output, m_state.data());
333  return;
334 #endif
335 
336 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
337 #ifdef CRYPTOPP_GENERATE_X64_MASM
338  ALIGN 8
339  Sosemanuk_OperateKeystream PROC FRAME
340  rex_push_reg rsi
341  push_reg rdi
342  alloc_stack(80*4*2+12*4+8*WORD_SZ + 2*16+8)
343  save_xmm128 xmm6, 02f0h
344  save_xmm128 xmm7, 0300h
345  .endprolog
346  mov rdi, r8
347  mov rax, r9
348 #else
349 #ifdef __INTEL_COMPILER
350  if (HasSSE2() && !IsP4()) // Intel compiler produces faster code for this algorithm on the P4
351 #else
352  if (HasSSE2())
353 #endif
354  {
355 #ifdef __GNUC__
356  #if CRYPTOPP_BOOL_X64
358  #endif
359  __asm__ __volatile__
360  (
361  INTEL_NOPREFIX
362  AS_PUSH_IF86( bx)
363 #else
364  word32 *state = m_state;
365  AS2( mov WORD_REG(ax), state)
366  AS2( mov WORD_REG(di), output)
367  AS2( mov WORD_REG(dx), input)
368  AS2( mov WORD_REG(cx), iterationCount)
369 #endif
370 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
371 
372 #if defined(__GNUC__) && CRYPTOPP_BOOL_X64
373  #define SSE2_workspace %5
374 #else
375  #define SSE2_workspace WORD_REG(sp)
376 #endif
377 
378 #define SSE2_output WORD_PTR [SSE2_workspace+1*WORD_SZ]
379 #define SSE2_input WORD_PTR [SSE2_workspace+2*WORD_SZ]
380 #define SSE2_wordsLeft WORD_PTR [SSE2_workspace+3*WORD_SZ]
381 #define SSE2_diEnd WORD_PTR [SSE2_workspace+4*WORD_SZ]
382 #define SSE2_pMulTables WORD_PTR [SSE2_workspace+5*WORD_SZ]
383 #define SSE2_state WORD_PTR [SSE2_workspace+6*WORD_SZ]
384 #define SSE2_wordsLeft2 WORD_PTR [SSE2_workspace+7*WORD_SZ]
385 #define SSE2_stateCopy SSE2_workspace + 8*WORD_SZ
386 #define SSE2_uvStart SSE2_stateCopy + 12*4
387 
388 #if (CRYPTOPP_BOOL_X86) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
389  AS_PUSH_IF86( bp)
390  AS2( mov AS_REG_6, esp)
391  AS2( and esp, -16)
392  AS2( sub esp, 80*4*2+12*4+8*WORD_SZ) // 80 v's, 80 u's, 12 state, 8 locals
393  AS2( mov [esp], AS_REG_6)
394 #endif
395  AS2( mov SSE2_output, WORD_REG(di))
396  AS2( mov SSE2_input, WORD_REG(dx))
397  AS2( mov SSE2_state, WORD_REG(ax))
398 #ifndef _MSC_VER
399  AS2( mov SSE2_pMulTables, WORD_REG(si))
400 #endif
401  AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)])
402  AS2( lea WORD_REG(si), [4*WORD_REG(cx)])
403  AS2( mov SSE2_wordsLeft, WORD_REG(si))
404  AS2( movdqa xmm0, [WORD_REG(ax)+0*16]) // copy state to stack to save a register
405  AS2( movdqa [SSE2_stateCopy+0*16], xmm0)
406  AS2( movdqa xmm0, [WORD_REG(ax)+1*16])
407  AS2( movdqa [SSE2_stateCopy+1*16], xmm0)
408  AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16])
409  AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0)
410  AS2( psrlq xmm0, 32)
411  AS2( movd AS_REG_6d, xmm0) // s(9)
412  AS2( mov ecx, [WORD_REG(ax)+10*4])
413  AS2( mov edx, [WORD_REG(ax)+11*4])
414  AS2( pcmpeqb xmm7, xmm7) // all ones
415 
416 #define s(i) SSE2_stateCopy + ASM_MOD(i,10)*4
417 #define u(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4
418 #define v(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4 + 80*4
419 
420 #define R10 ecx
421 #define R11 edx
422 #define R20 edx
423 #define R21 ecx
424 // workaround bug in GAS 2.15
425 #define R20r WORD_REG(dx)
426 #define R21r WORD_REG(cx)
427 
428 #define SSE2_STEP(i, j) \
429  AS2( mov eax, [s(i+0)])\
430  AS2( mov [v(i)], eax)\
431  AS2( rol eax, 8)\
432  AS2( lea AS_REG_7, [AS_REG_6 + R2##j##r])\
433  AS2( xor AS_REG_7d, R1##j)\
434  AS2( mov [u(i)], AS_REG_7d)\
435  AS2( mov AS_REG_7d, 1)\
436  AS2( and AS_REG_7d, R2##j)\
437  AS1( neg AS_REG_7d)\
438  AS2( and AS_REG_7d, AS_REG_6d)\
439  AS2( xor AS_REG_6d, eax)\
440  AS2( movzx eax, al)\
441  AS2( xor AS_REG_6d, [WORD_REG(si)+WORD_REG(ax)*4])\
442  AS2( mov eax, [s(i+3)])\
443  AS2( xor AS_REG_7d, [s(i+2)])\
444  AS2( add R1##j, AS_REG_7d)\
445  AS2( movzx AS_REG_7d, al)\
446  AS2( shr eax, 8)\
447  AS2( xor AS_REG_6d, [WORD_REG(si)+1024+AS_REG_7*4])\
448  AS2( xor AS_REG_6d, eax)\
449  AS2( imul R2##j, AS_HEX(54655307))\
450  AS2( rol R2##j, 7)\
451  AS2( mov [s(i+0)], AS_REG_6d)\
452 
453  ASL(2) // outer loop, each iteration of this processes 80 words
454  AS2( lea WORD_REG(di), [SSE2_uvStart]) // start of v and u
455  AS2( mov WORD_REG(ax), 80)
456  AS2( cmp WORD_REG(si), 80)
457  AS2( cmovg WORD_REG(si), WORD_REG(ax))
458  AS2( mov SSE2_wordsLeft2, WORD_REG(si))
459  AS2( lea WORD_REG(si), [WORD_REG(di)+WORD_REG(si)]) // use to end first inner loop
460  AS2( mov SSE2_diEnd, WORD_REG(si))
461 #ifdef _MSC_VER
462  AS2( lea WORD_REG(si), s_sosemanukMulTables)
463 #else
464  AS2( mov WORD_REG(si), SSE2_pMulTables)
465 #endif
466 
467  ASL(0) // first inner loop, 20 words each, 4 iterations
468  SSE2_STEP(0, 0)
469  SSE2_STEP(1, 1)
470  SSE2_STEP(2, 0)
471  SSE2_STEP(3, 1)
472  SSE2_STEP(4, 0)
473  SSE2_STEP(5, 1)
474  SSE2_STEP(6, 0)
475  SSE2_STEP(7, 1)
476  SSE2_STEP(8, 0)
477  SSE2_STEP(9, 1)
478  SSE2_STEP(10, 0)
479  SSE2_STEP(11, 1)
480  SSE2_STEP(12, 0)
481  SSE2_STEP(13, 1)
482  SSE2_STEP(14, 0)
483  SSE2_STEP(15, 1)
484  SSE2_STEP(16, 0)
485  SSE2_STEP(17, 1)
486  SSE2_STEP(18, 0)
487  SSE2_STEP(19, 1)
488  // loop
489  AS2( add WORD_REG(di), 5*4)
490  AS2( cmp WORD_REG(di), SSE2_diEnd)
491  ASJ( jne, 0, b)
492 
493  AS2( mov WORD_REG(ax), SSE2_input)
494  AS2( mov AS_REG_7, SSE2_output)
495  AS2( lea WORD_REG(di), [SSE2_uvStart]) // start of v and u
496  AS2( mov WORD_REG(si), SSE2_wordsLeft2)
497 
498  ASL(1) // second inner loop, 16 words each, 5 iterations
499  AS2( movdqa xmm0, [WORD_REG(di)+0*20*4])
500  AS2( movdqa xmm2, [WORD_REG(di)+2*20*4])
501  AS2( movdqa xmm3, [WORD_REG(di)+3*20*4])
502  AS2( movdqa xmm1, [WORD_REG(di)+1*20*4])
503  // S2
504  AS2( movdqa xmm4, xmm0)
505  AS2( pand xmm0, xmm2)
506  AS2( pxor xmm0, xmm3)
507  AS2( pxor xmm2, xmm1)
508  AS2( pxor xmm2, xmm0)
509  AS2( por xmm3, xmm4)
510  AS2( pxor xmm3, xmm1)
511  AS2( pxor xmm4, xmm2)
512  AS2( movdqa xmm1, xmm3)
513  AS2( por xmm3, xmm4)
514  AS2( pxor xmm3, xmm0)
515  AS2( pand xmm0, xmm1)
516  AS2( pxor xmm4, xmm0)
517  AS2( pxor xmm1, xmm3)
518  AS2( pxor xmm1, xmm4)
519  AS2( pxor xmm4, xmm7)
520  // xor with v
521  AS2( pxor xmm2, [WORD_REG(di)+80*4])
522  AS2( pxor xmm3, [WORD_REG(di)+80*5])
523  AS2( pxor xmm1, [WORD_REG(di)+80*6])
524  AS2( pxor xmm4, [WORD_REG(di)+80*7])
525  // exit loop early if less than 16 words left to output
526  // this is necessary because block size is 20 words, and we output 16 words in each iteration of this loop
527  AS2( cmp WORD_REG(si), 16)
528  ASJ( jl, 4, f)
529  // unpack
530  AS2( movdqa xmm6, xmm2)
531  AS2( punpckldq xmm2, xmm3)
532  AS2( movdqa xmm5, xmm1)
533  AS2( punpckldq xmm1, xmm4)
534  AS2( movdqa xmm0, xmm2)
535  AS2( punpcklqdq xmm2, xmm1)
536  AS2( punpckhqdq xmm0, xmm1)
537  AS2( punpckhdq xmm6, xmm3)
538  AS2( punpckhdq xmm5, xmm4)
539  AS2( movdqa xmm3, xmm6)
540  AS2( punpcklqdq xmm6, xmm5)
541  AS2( punpckhqdq xmm3, xmm5)
542 
543  // output keystream
544  AS_XMM_OUTPUT4(SSE2_Sosemanuk_Output, WORD_REG(ax), AS_REG_7, 2,0,6,3, 1, 0,1,2,3, 4)
545 
546  // loop
547  AS2( add WORD_REG(di), 4*4)
548  AS2( sub WORD_REG(si), 16)
549  ASJ( jnz, 1, b)
550 
551  // outer loop
552  AS2( mov WORD_REG(si), SSE2_wordsLeft)
553  AS2( sub WORD_REG(si), 80)
554  ASJ( jz, 6, f)
555  AS2( mov SSE2_wordsLeft, WORD_REG(si))
556  AS2( mov SSE2_input, WORD_REG(ax))
557  AS2( mov SSE2_output, AS_REG_7)
558  ASJ( jmp, 2, b)
559 
560  ASL(4) // final output of less than 16 words
561  AS2( test WORD_REG(ax), WORD_REG(ax))
562  ASJ( jz, 5, f)
563  AS2( movd xmm0, dword ptr [WORD_REG(ax)+0*4])
564  AS2( pxor xmm2, xmm0)
565  AS2( movd xmm0, dword ptr [WORD_REG(ax)+1*4])
566  AS2( pxor xmm3, xmm0)
567  AS2( movd xmm0, dword ptr [WORD_REG(ax)+2*4])
568  AS2( pxor xmm1, xmm0)
569  AS2( movd xmm0, dword ptr [WORD_REG(ax)+3*4])
570  AS2( pxor xmm4, xmm0)
571  AS2( add WORD_REG(ax), 16)
572  ASL(5)
573  AS2( movd dword ptr [AS_REG_7+0*4], xmm2)
574  AS2( movd dword ptr [AS_REG_7+1*4], xmm3)
575  AS2( movd dword ptr [AS_REG_7+2*4], xmm1)
576  AS2( movd dword ptr [AS_REG_7+3*4], xmm4)
577  AS2( sub WORD_REG(si), 4)
578  ASJ( jz, 6, f)
579  AS2( add AS_REG_7, 16)
580  AS2( psrldq xmm2, 4)
581  AS2( psrldq xmm3, 4)
582  AS2( psrldq xmm1, 4)
583  AS2( psrldq xmm4, 4)
584  ASJ( jmp, 4, b)
585 
586  ASL(6) // save state
587  AS2( mov AS_REG_6, SSE2_state)
588  AS2( movdqa xmm0, [SSE2_stateCopy+0*16])
589  AS2( movdqa [AS_REG_6+0*16], xmm0)
590  AS2( movdqa xmm0, [SSE2_stateCopy+1*16])
591  AS2( movdqa [AS_REG_6+1*16], xmm0)
592  AS2( movq xmm0, QWORD PTR [SSE2_stateCopy+2*16])
593  AS2( movq QWORD PTR [AS_REG_6+2*16], xmm0)
594  AS2( mov [AS_REG_6+10*4], ecx)
595  AS2( mov [AS_REG_6+11*4], edx)
596 
597  AS_POP_IF86( sp)
598  AS_POP_IF86( bp)
599 
600 #ifdef __GNUC__
601  AS_POP_IF86( bx)
602  ATT_PREFIX
603  :
604  : "a" (m_state.m_ptr), "c" (iterationCount), "S" (s_sosemanukMulTables), "D" (output), "d" (input)
605  #if CRYPTOPP_BOOL_X64
606  , "r" (workspace.m_ptr)
607  : "memory", "cc", "%r9", "%r10", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
608  #else
609  : "memory", "cc"
610  #endif
611  );
612 #endif
613 #ifdef CRYPTOPP_GENERATE_X64_MASM
614  movdqa xmm6, [rsp + 02f0h]
615  movdqa xmm7, [rsp + 0300h]
616  add rsp, 80*4*2+12*4+8*WORD_SZ + 2*16+8
617  pop rdi
618  pop rsi
619  ret
620  Sosemanuk_OperateKeystream ENDP
621 #else
622  }
623  else
624 #endif
625 #endif
626 #ifndef CRYPTOPP_GENERATE_X64_MASM
627  {
628 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SOSEMANUK_ASM)
629 #define MUL_A(x) (x = rotlFixed(x, 8), x ^ s_sosemanukMulTables[byte(x)])
630 #else
631 #define MUL_A(x) (((x) << 8) ^ s_sosemanukMulTables[(x) >> 24])
632 #endif
633 
634 #define DIV_A(x) (((x) >> 8) ^ s_sosemanukMulTables[256 + byte(x)])
635 
636 #define r1(i) ((i%2) ? reg2 : reg1)
637 #define r2(i) ((i%2) ? reg1 : reg2)
638 
639 #define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, v, u) \
640  u = (s##x9 + r2(x0)) ^ r1(x0);\
641  v = s##x0;\
642  s##x0 = MUL_A(s##x0) ^ DIV_A(s##x3) ^ s##x9;\
643  r1(x0) += XMUX(r2(x0), s##x2, s##x9);\
644  r2(x0) = rotlFixed(r2(x0) * 0x54655307, 7);\
645 
646 #define SOSEMANUK_OUTPUT(x) \
647  CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, u2 ^ v0);\
648  CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, u3 ^ v1);\
649  CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, u1 ^ v2);\
650  CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, u4 ^ v3);
651 
652 #define OUTPUT4 \
653  S2(0, u0, u1, u2, u3, u4);\
654  CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SOSEMANUK_OUTPUT, 4*4);
655 
656  word32 s0 = m_state[0];
657  word32 s1 = m_state[1];
658  word32 s2 = m_state[2];
659  word32 s3 = m_state[3];
660  word32 s4 = m_state[4];
661  word32 s5 = m_state[5];
662  word32 s6 = m_state[6];
663  word32 s7 = m_state[7];
664  word32 s8 = m_state[8];
665  word32 s9 = m_state[9];
666  word32 reg1 = m_state[10];
667  word32 reg2 = m_state[11];
668  word32 u0, u1, u2, u3, u4, v0, v1, v2, v3;
669 
670  do
671  {
672  STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v0, u0)
673  STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v1, u1)
674  STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v2, u2)
675  STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v3, u3)
676  OUTPUT4
677  STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v0, u0)
678  STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v1, u1)
679  STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v2, u2)
680  STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v3, u3)
681  OUTPUT4
682  STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v0, u0)
683  STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v1, u1)
684  STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v2, u2)
685  STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v3, u3)
686  OUTPUT4
687  STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v0, u0)
688  STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v1, u1)
689  STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v2, u2)
690  STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v3, u3)
691  OUTPUT4
692  STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v0, u0)
693  STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v1, u1)
694  STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v2, u2)
695  STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v3, u3)
696  OUTPUT4
697  }
698  while (--iterationCount);
699 
700  m_state[0] = s0;
701  m_state[1] = s1;
702  m_state[2] = s2;
703  m_state[3] = s3;
704  m_state[4] = s4;
705  m_state[5] = s5;
706  m_state[6] = s6;
707  m_state[7] = s7;
708  m_state[8] = s8;
709  m_state[9] = s9;
710  m_state[10] = reg1;
711  m_state[11] = reg2;
712  }
713 }
714 
715 NAMESPACE_END
716 
717 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
Utility functions for the Crypto++ library.
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1178
Library configuration file.
Classes for Sosemanuk stream cipher.
Classes and functions for secure memory allocations.
A::pointer data()
Provides a pointer to the first element in the memory block.
Definition: secblock.h:508
Fixed size stack-based SecBlock with 16-byte alignment.
Definition: secblock.h:758
Classes, functions, intrinsics and features for X86, X32 nd X64 assembly.
Crypto++ library namespace.
Interface for retrieving values given their names.
Definition: cryptlib.h:277