00001
00002
00003 #include "pch.h"
00004 #include "sosemanuk.h"
00005 #include "misc.h"
00006 #include "cpu.h"
00007
00008 #include "serpentp.h"
00009
00010 NAMESPACE_BEGIN(CryptoPP)
00011
00012 void SosemanukPolicy::CipherSetKey(const NameValuePairs ¶ms, const byte *userKey, size_t keylen)
00013 {
00014 Serpent_KeySchedule(m_key, 24, userKey, keylen);
00015 }
00016
00017 void SosemanukPolicy::CipherResynchronize(byte *keystreamBuffer, const byte *iv)
00018 {
00019 word32 a, b, c, d, e;
00020
00021 typedef BlockGetAndPut<word32, LittleEndian> Block;
00022 Block::Get(iv)(a)(b)(c)(d);
00023
00024 const word32 *k = m_key;
00025 unsigned int i=1;
00026
00027 do
00028 {
00029 beforeS0(KX); beforeS0(S0); afterS0(LT);
00030 afterS0(KX); afterS0(S1); afterS1(LT);
00031 if (i == 3)
00032 {
00033 m_state[4] = b;
00034 m_state[5] = e;
00035 m_state[10] = c;
00036 m_state[11] = a;
00037 }
00038 afterS1(KX); afterS1(S2); afterS2(LT);
00039 afterS2(KX); afterS2(S3); afterS3(LT);
00040 if (i == 2)
00041 {
00042 m_state[6] = c;
00043 m_state[7] = d;
00044 m_state[8] = b;
00045 m_state[9] = e;
00046 }
00047 afterS3(KX); afterS3(S4); afterS4(LT);
00048 afterS4(KX); afterS4(S5); afterS5(LT);
00049 afterS5(KX); afterS5(S6); afterS6(LT);
00050 afterS6(KX); afterS6(S7); afterS7(LT);
00051
00052 if (i == 3)
00053 break;
00054
00055 ++i;
00056 c = b;
00057 b = e;
00058 e = d;
00059 d = a;
00060 a = e;
00061 k += 32;
00062 }
00063 while (true);
00064
00065 afterS7(KX);
00066
00067 m_state[0] = a;
00068 m_state[1] = b;
00069 m_state[2] = e;
00070 m_state[3] = d;
00071
00072 #define XMUX(c, x, y) (x ^ (y & (0 - (c & 1))))
00073 m_state[11] += XMUX(m_state[10], m_state[1], m_state[8]);
00074 m_state[10] = rotlFixed(m_state[10] * 0x54655307, 7);
00075 }
00076
00077 static word32 s_mulTables[512] = {
00078 #if CRYPTOPP_BOOL_X86 | CRYPTOPP_BOOL_X64
00079 0x00000000, 0xE19FCF12, 0x6B973724, 0x8A08F836,
00080 0xD6876E48, 0x3718A15A, 0xBD10596C, 0x5C8F967E,
00081 0x05A7DC90, 0xE4381382, 0x6E30EBB4, 0x8FAF24A6,
00082 0xD320B2D8, 0x32BF7DCA, 0xB8B785FC, 0x59284AEE,
00083 0x0AE71189, 0xEB78DE9B, 0x617026AD, 0x80EFE9BF,
00084 0xDC607FC1, 0x3DFFB0D3, 0xB7F748E5, 0x566887F7,
00085 0x0F40CD19, 0xEEDF020B, 0x64D7FA3D, 0x8548352F,
00086 0xD9C7A351, 0x38586C43, 0xB2509475, 0x53CF5B67,
00087 0x146722BB, 0xF5F8EDA9, 0x7FF0159F, 0x9E6FDA8D,
00088 0xC2E04CF3, 0x237F83E1, 0xA9777BD7, 0x48E8B4C5,
00089 0x11C0FE2B, 0xF05F3139, 0x7A57C90F, 0x9BC8061D,
00090 0xC7479063, 0x26D85F71, 0xACD0A747, 0x4D4F6855,
00091 0x1E803332, 0xFF1FFC20, 0x75170416, 0x9488CB04,
00092 0xC8075D7A, 0x29989268, 0xA3906A5E, 0x420FA54C,
00093 0x1B27EFA2, 0xFAB820B0, 0x70B0D886, 0x912F1794,
00094 0xCDA081EA, 0x2C3F4EF8, 0xA637B6CE, 0x47A879DC,
00095 0x28CE44DF, 0xC9518BCD, 0x435973FB, 0xA2C6BCE9,
00096 0xFE492A97, 0x1FD6E585, 0x95DE1DB3, 0x7441D2A1,
00097 0x2D69984F, 0xCCF6575D, 0x46FEAF6B, 0xA7616079,
00098 0xFBEEF607, 0x1A713915, 0x9079C123, 0x71E60E31,
00099 0x22295556, 0xC3B69A44, 0x49BE6272, 0xA821AD60,
00100 0xF4AE3B1E, 0x1531F40C, 0x9F390C3A, 0x7EA6C328,
00101 0x278E89C6, 0xC61146D4, 0x4C19BEE2, 0xAD8671F0,
00102 0xF109E78E, 0x1096289C, 0x9A9ED0AA, 0x7B011FB8,
00103 0x3CA96664, 0xDD36A976, 0x573E5140, 0xB6A19E52,
00104 0xEA2E082C, 0x0BB1C73E, 0x81B93F08, 0x6026F01A,
00105 0x390EBAF4, 0xD89175E6, 0x52998DD0, 0xB30642C2,
00106 0xEF89D4BC, 0x0E161BAE, 0x841EE398, 0x65812C8A,
00107 0x364E77ED, 0xD7D1B8FF, 0x5DD940C9, 0xBC468FDB,
00108 0xE0C919A5, 0x0156D6B7, 0x8B5E2E81, 0x6AC1E193,
00109 0x33E9AB7D, 0xD276646F, 0x587E9C59, 0xB9E1534B,
00110 0xE56EC535, 0x04F10A27, 0x8EF9F211, 0x6F663D03,
00111 0x50358817, 0xB1AA4705, 0x3BA2BF33, 0xDA3D7021,
00112 0x86B2E65F, 0x672D294D, 0xED25D17B, 0x0CBA1E69,
00113 0x55925487, 0xB40D9B95, 0x3E0563A3, 0xDF9AACB1,
00114 0x83153ACF, 0x628AF5DD, 0xE8820DEB, 0x091DC2F9,
00115 0x5AD2999E, 0xBB4D568C, 0x3145AEBA, 0xD0DA61A8,
00116 0x8C55F7D6, 0x6DCA38C4, 0xE7C2C0F2, 0x065D0FE0,
00117 0x5F75450E, 0xBEEA8A1C, 0x34E2722A, 0xD57DBD38,
00118 0x89F22B46, 0x686DE454, 0xE2651C62, 0x03FAD370,
00119 0x4452AAAC, 0xA5CD65BE, 0x2FC59D88, 0xCE5A529A,
00120 0x92D5C4E4, 0x734A0BF6, 0xF942F3C0, 0x18DD3CD2,
00121 0x41F5763C, 0xA06AB92E, 0x2A624118, 0xCBFD8E0A,
00122 0x97721874, 0x76EDD766, 0xFCE52F50, 0x1D7AE042,
00123 0x4EB5BB25, 0xAF2A7437, 0x25228C01, 0xC4BD4313,
00124 0x9832D56D, 0x79AD1A7F, 0xF3A5E249, 0x123A2D5B,
00125 0x4B1267B5, 0xAA8DA8A7, 0x20855091, 0xC11A9F83,
00126 0x9D9509FD, 0x7C0AC6EF, 0xF6023ED9, 0x179DF1CB,
00127 0x78FBCCC8, 0x996403DA, 0x136CFBEC, 0xF2F334FE,
00128 0xAE7CA280, 0x4FE36D92, 0xC5EB95A4, 0x24745AB6,
00129 0x7D5C1058, 0x9CC3DF4A, 0x16CB277C, 0xF754E86E,
00130 0xABDB7E10, 0x4A44B102, 0xC04C4934, 0x21D38626,
00131 0x721CDD41, 0x93831253, 0x198BEA65, 0xF8142577,
00132 0xA49BB309, 0x45047C1B, 0xCF0C842D, 0x2E934B3F,
00133 0x77BB01D1, 0x9624CEC3, 0x1C2C36F5, 0xFDB3F9E7,
00134 0xA13C6F99, 0x40A3A08B, 0xCAAB58BD, 0x2B3497AF,
00135 0x6C9CEE73, 0x8D032161, 0x070BD957, 0xE6941645,
00136 0xBA1B803B, 0x5B844F29, 0xD18CB71F, 0x3013780D,
00137 0x693B32E3, 0x88A4FDF1, 0x02AC05C7, 0xE333CAD5,
00138 0xBFBC5CAB, 0x5E2393B9, 0xD42B6B8F, 0x35B4A49D,
00139 0x667BFFFA, 0x87E430E8, 0x0DECC8DE, 0xEC7307CC,
00140 0xB0FC91B2, 0x51635EA0, 0xDB6BA696, 0x3AF46984,
00141 0x63DC236A, 0x8243EC78, 0x084B144E, 0xE9D4DB5C,
00142 0xB55B4D22, 0x54C48230, 0xDECC7A06, 0x3F53B514,
00143 #else
00144 0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
00145 0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
00146 0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
00147 0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
00148 0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
00149 0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
00150 0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
00151 0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
00152 0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
00153 0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
00154 0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
00155 0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
00156 0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
00157 0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
00158 0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
00159 0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
00160 0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
00161 0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
00162 0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
00163 0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
00164 0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
00165 0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
00166 0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
00167 0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
00168 0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
00169 0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
00170 0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
00171 0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
00172 0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
00173 0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
00174 0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
00175 0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
00176 0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
00177 0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
00178 0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
00179 0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
00180 0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
00181 0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
00182 0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
00183 0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
00184 0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
00185 0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
00186 0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
00187 0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
00188 0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
00189 0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
00190 0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
00191 0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
00192 0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
00193 0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
00194 0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
00195 0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
00196 0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
00197 0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
00198 0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
00199 0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
00200 0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
00201 0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
00202 0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
00203 0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
00204 0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
00205 0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
00206 0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
00207 0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB,
00208 #endif
00209 0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
00210 0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
00211 0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
00212 0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
00213 0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
00214 0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
00215 0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
00216 0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
00217 0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
00218 0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
00219 0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
00220 0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
00221 0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
00222 0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
00223 0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
00224 0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
00225 0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
00226 0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
00227 0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
00228 0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
00229 0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
00230 0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
00231 0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
00232 0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
00233 0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
00234 0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
00235 0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
00236 0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
00237 0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
00238 0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
00239 0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
00240 0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
00241 0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
00242 0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
00243 0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
00244 0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
00245 0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
00246 0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
00247 0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
00248 0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
00249 0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
00250 0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
00251 0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
00252 0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
00253 0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
00254 0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
00255 0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
00256 0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
00257 0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
00258 0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
00259 0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
00260 0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
00261 0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
00262 0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
00263 0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
00264 0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
00265 0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
00266 0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
00267 0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
00268 0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
00269 0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
00270 0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
00271 0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
00272 0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
00273 };
00274
00275
00276 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
00277 unsigned int SosemanukPolicy::GetAlignment() const
00278 {
00279 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00280 #ifdef __INTEL_COMPILER
00281 if (HasSSE2() && !IsP4())
00282 #else
00283 if (HasSSE2())
00284 #endif
00285 return 16;
00286 else
00287 #endif
00288 return 1;
00289 }
00290
00291 unsigned int SosemanukPolicy::GetOptimalBlockSize() const
00292 {
00293 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00294 #ifdef __INTEL_COMPILER
00295 if (HasSSE2() && !IsP4())
00296 #else
00297 if (HasSSE2())
00298 #endif
00299 return 4*BYTES_PER_ITERATION;
00300 else
00301 #endif
00302 return BYTES_PER_ITERATION;
00303 }
00304 #endif
00305
00306 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
00307
00308 void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
00309 {
00310 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00311 #ifdef __INTEL_COMPILER
00312 if (HasSSE2() && !IsP4())
00313 #else
00314 if (HasSSE2())
00315 #endif
00316 {
00317 #ifdef __GNUC__
00318 __asm__ __volatile__
00319 (
00320 ".intel_syntax noprefix;"
00321 AS_PUSH( bx)
00322 #else
00323 word32 *state = m_state;
00324 AS2( mov WORD_REG(ax), state)
00325 AS2( mov WORD_REG(di), output)
00326 AS2( mov WORD_REG(dx), input)
00327 AS2( mov WORD_REG(cx), iterationCount)
00328 #endif
00329
00330 #define SSE2_output WORD_PTR [WORD_REG(sp)+1*WORD_SZ]
00331 #define SSE2_input WORD_PTR [WORD_REG(sp)+2*WORD_SZ]
00332 #define SSE2_wordsLeft WORD_PTR [WORD_REG(sp)+3*WORD_SZ]
00333 #define SSE2_diEnd WORD_PTR [WORD_REG(sp)+4*WORD_SZ]
00334 #define SSE2_pMulTables WORD_PTR [WORD_REG(sp)+5*WORD_SZ]
00335 #define SSE2_state WORD_PTR [WORD_REG(sp)+6*WORD_SZ]
00336 #define SSE2_wordsLeft2 WORD_PTR [WORD_REG(sp)+7*WORD_SZ]
00337 #define SSE2_stateCopy WORD_REG(sp) + 8*WORD_SZ
00338 #define SSE2_uvStart SSE2_stateCopy + 12*4
00339
00340 AS_PUSH( bp)
00341 AS2( mov WORD_REG(bx), WORD_REG(sp))
00342 AS2( and WORD_REG(sp), -16)
00343 AS2( sub WORD_REG(sp), 80*4*2+12*4+8*WORD_SZ)
00344 AS2( mov [WORD_REG(sp)], WORD_REG(bx))
00345 AS2( mov SSE2_output, WORD_REG(di))
00346 AS2( mov SSE2_input, WORD_REG(dx))
00347 AS2( mov SSE2_state, WORD_REG(ax))
00348 #ifndef _MSC_VER
00349 AS2( mov SSE2_pMulTables, WORD_REG(si))
00350 #endif
00351 AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)])
00352 AS2( lea WORD_REG(si), [4*WORD_REG(cx)])
00353 AS2( mov SSE2_wordsLeft, WORD_REG(si))
00354 AS2( movdqa xmm0, [WORD_REG(ax)+0*16])
00355 AS2( movdqa [SSE2_stateCopy+0*16], xmm0)
00356 AS2( movdqa xmm0, [WORD_REG(ax)+1*16])
00357 AS2( movdqa [SSE2_stateCopy+1*16], xmm0)
00358 AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16])
00359 AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0)
00360 AS2( psrlq xmm0, 32)
00361 AS2( movd ebx, xmm0)
00362 AS2( mov ecx, [WORD_REG(ax)+10*4])
00363 AS2( mov edx, [WORD_REG(ax)+11*4])
00364 AS2( pcmpeqb xmm7, xmm7)
00365
00366 #define s(i) SSE2_stateCopy + ASM_MOD(i,10)*4
00367 #define u(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4
00368 #define v(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4 + 80*4
00369
00370 #define r10 ecx
00371 #define r11 edx
00372 #define r20 edx
00373 #define r21 ecx
00374
00375 #define SSE2_STEP(i, j) \
00376 AS2( mov eax, [s(i+0)])\
00377 AS2( mov [v(i)], eax)\
00378 AS2( rol eax, 8)\
00379 AS2( lea ebp, [ebx + r2##j])\
00380 AS2( xor ebp, r1##j)\
00381 AS2( mov [u(i)], ebp)\
00382 AS2( mov ebp, 1)\
00383 AS2( and ebp, r2##j)\
00384 AS1( neg ebp)\
00385 AS2( and ebp, ebx)\
00386 AS2( xor ebx, eax)\
00387 AS2( movzx eax, al)\
00388 AS2( xor ebx, [WORD_REG(si)+WORD_REG(ax)*4])\
00389 AS2( mov eax, [s(i+3)])\
00390 AS2( xor ebp, [s(i+2)])\
00391 AS2( add r1##j, ebp)\
00392 AS2( movzx ebp, al)\
00393 AS2( shr eax, 8)\
00394 AS2( xor ebx, [WORD_REG(si)+1024+WORD_REG(bp)*4])\
00395 AS2( xor ebx, eax)\
00396 AS2( imul r2##j, 0x54655307)\
00397 AS2( rol r2##j, 7)\
00398 AS2( mov [s(i+0)], ebx)\
00399
00400 ASL(2)
00401 AS2( lea WORD_REG(di), [SSE2_uvStart])
00402 AS2( mov WORD_REG(ax), 80)
00403 AS2( cmp WORD_REG(si), 80)
00404 AS2( cmovg WORD_REG(si), WORD_REG(ax))
00405 AS2( mov SSE2_wordsLeft2, WORD_REG(si))
00406 AS2( lea WORD_REG(si), [WORD_REG(di)+WORD_REG(si)])
00407 AS2( mov SSE2_diEnd, WORD_REG(si))
00408 #ifdef _MSC_VER
00409 AS2( lea WORD_REG(si), s_mulTables)
00410 #else
00411 AS2( mov WORD_REG(si), SSE2_pMulTables)
00412 #endif
00413
00414 ASL(0)
00415 SSE2_STEP(0, 0)
00416 SSE2_STEP(1, 1)
00417 SSE2_STEP(2, 0)
00418 SSE2_STEP(3, 1)
00419 SSE2_STEP(4, 0)
00420 SSE2_STEP(5, 1)
00421 SSE2_STEP(6, 0)
00422 SSE2_STEP(7, 1)
00423 SSE2_STEP(8, 0)
00424 SSE2_STEP(9, 1)
00425 SSE2_STEP(10, 0)
00426 SSE2_STEP(11, 1)
00427 SSE2_STEP(12, 0)
00428 SSE2_STEP(13, 1)
00429 SSE2_STEP(14, 0)
00430 SSE2_STEP(15, 1)
00431 SSE2_STEP(16, 0)
00432 SSE2_STEP(17, 1)
00433 SSE2_STEP(18, 0)
00434 SSE2_STEP(19, 1)
00435
00436 AS2( add WORD_REG(di), 5*4)
00437 AS2( cmp WORD_REG(di), SSE2_diEnd)
00438 ASJ( jne, 0, b)
00439
00440 AS2( mov WORD_REG(ax), SSE2_input)
00441 AS2( mov WORD_REG(bp), SSE2_output)
00442 AS2( lea WORD_REG(di), [SSE2_uvStart])
00443 AS2( mov WORD_REG(si), SSE2_wordsLeft2)
00444
00445 ASL(1)
00446 AS2( movdqa xmm0, [WORD_REG(di)+0*20*4])
00447 AS2( movdqa xmm2, [WORD_REG(di)+2*20*4])
00448 AS2( movdqa xmm3, [WORD_REG(di)+3*20*4])
00449 AS2( movdqa xmm1, [WORD_REG(di)+1*20*4])
00450
00451 AS2( movdqa xmm4, xmm0)
00452 AS2( pand xmm0, xmm2)
00453 AS2( pxor xmm0, xmm3)
00454 AS2( pxor xmm2, xmm1)
00455 AS2( pxor xmm2, xmm0)
00456 AS2( por xmm3, xmm4)
00457 AS2( pxor xmm3, xmm1)
00458 AS2( pxor xmm4, xmm2)
00459 AS2( movdqa xmm1, xmm3)
00460 AS2( por xmm3, xmm4)
00461 AS2( pxor xmm3, xmm0)
00462 AS2( pand xmm0, xmm1)
00463 AS2( pxor xmm4, xmm0)
00464 AS2( pxor xmm1, xmm3)
00465 AS2( pxor xmm1, xmm4)
00466 AS2( pxor xmm4, xmm7)
00467
00468 AS2( pxor xmm2, [WORD_REG(di)+80*4])
00469 AS2( pxor xmm3, [WORD_REG(di)+80*5])
00470 AS2( pxor xmm1, [WORD_REG(di)+80*6])
00471 AS2( pxor xmm4, [WORD_REG(di)+80*7])
00472
00473
00474 AS2( cmp WORD_REG(si), 16)
00475 ASJ( jl, 4, f)
00476
00477 AS2( movdqa xmm6, xmm2)
00478 AS2( punpckldq xmm2, xmm3)
00479 AS2( movdqa xmm5, xmm1)
00480 AS2( punpckldq xmm1, xmm4)
00481 AS2( movdqa xmm0, xmm2)
00482 AS2( punpcklqdq xmm2, xmm1)
00483 AS2( punpckhqdq xmm0, xmm1)
00484 AS2( punpckhdq xmm6, xmm3)
00485 AS2( punpckhdq xmm5, xmm4)
00486 AS2( movdqa xmm3, xmm6)
00487 AS2( punpcklqdq xmm6, xmm5)
00488 AS2( punpckhqdq xmm3, xmm5)
00489
00490 AS2( test WORD_REG(ax), WORD_REG(ax))
00491 ASJ( jz, 3, f)
00492 AS2( test eax, 0xf)
00493 ASJ( jnz, 7, f)
00494 AS2( pxor xmm2, [WORD_REG(ax)+0*16])
00495 AS2( pxor xmm0, [WORD_REG(ax)+1*16])
00496 AS2( pxor xmm6, [WORD_REG(ax)+2*16])
00497 AS2( pxor xmm3, [WORD_REG(ax)+3*16])
00498 AS2( add WORD_REG(ax), 4*16)
00499 ASJ( jmp, 3, f)
00500 ASL(7)
00501 AS2( movdqu xmm1, [WORD_REG(ax)+0*16])
00502 AS2( pxor xmm2, xmm1)
00503 AS2( movdqu xmm1, [WORD_REG(ax)+1*16])
00504 AS2( pxor xmm0, xmm1)
00505 AS2( movdqu xmm1, [WORD_REG(ax)+2*16])
00506 AS2( pxor xmm6, xmm1)
00507 AS2( movdqu xmm1, [WORD_REG(ax)+3*16])
00508 AS2( pxor xmm3, xmm1)
00509 AS2( add WORD_REG(ax), 4*16)
00510 ASL(3)
00511 AS2( test ebp, 0xf)
00512 ASJ( jnz, 8, f)
00513 AS2( movdqa [WORD_REG(bp)+0*16], xmm2)
00514 AS2( movdqa [WORD_REG(bp)+1*16], xmm0)
00515 AS2( movdqa [WORD_REG(bp)+2*16], xmm6)
00516 AS2( movdqa [WORD_REG(bp)+3*16], xmm3)
00517 ASJ( jmp, 9, f)
00518 ASL(8)
00519 AS2( movdqu [WORD_REG(bp)+0*16], xmm2)
00520 AS2( movdqu [WORD_REG(bp)+1*16], xmm0)
00521 AS2( movdqu [WORD_REG(bp)+2*16], xmm6)
00522 AS2( movdqu [WORD_REG(bp)+3*16], xmm3)
00523 ASL(9)
00524
00525 AS2( add WORD_REG(di), 4*4)
00526 AS2( add WORD_REG(bp), 4*16)
00527 AS2( sub WORD_REG(si), 16)
00528 ASJ( jnz, 1, b)
00529
00530
00531 AS2( mov WORD_REG(si), SSE2_wordsLeft)
00532 AS2( sub WORD_REG(si), 80)
00533 ASJ( jz, 6, f)
00534 AS2( mov SSE2_wordsLeft, WORD_REG(si))
00535 AS2( mov SSE2_input, WORD_REG(ax))
00536 AS2( mov SSE2_output, WORD_REG(bp))
00537 ASJ( jmp, 2, b)
00538
00539 ASL(4)
00540 AS2( test WORD_REG(ax), WORD_REG(ax))
00541 ASJ( jz, 5, f)
00542 AS2( movd xmm0, [WORD_REG(ax)+0*4])
00543 AS2( pxor xmm2, xmm0)
00544 AS2( movd xmm0, [WORD_REG(ax)+1*4])
00545 AS2( pxor xmm3, xmm0)
00546 AS2( movd xmm0, [WORD_REG(ax)+2*4])
00547 AS2( pxor xmm1, xmm0)
00548 AS2( movd xmm0, [WORD_REG(ax)+3*4])
00549 AS2( pxor xmm4, xmm0)
00550 AS2( add WORD_REG(ax), 16)
00551 ASL(5)
00552 AS2( movd [WORD_REG(bp)+0*4], xmm2)
00553 AS2( movd [WORD_REG(bp)+1*4], xmm3)
00554 AS2( movd [WORD_REG(bp)+2*4], xmm1)
00555 AS2( movd [WORD_REG(bp)+3*4], xmm4)
00556 AS2( sub WORD_REG(si), 4)
00557 ASJ( jz, 6, f)
00558 AS2( add WORD_REG(bp), 16)
00559 AS2( psrldq xmm2, 4)
00560 AS2( psrldq xmm3, 4)
00561 AS2( psrldq xmm1, 4)
00562 AS2( psrldq xmm4, 4)
00563 ASJ( jmp, 4, b)
00564
00565 ASL(6)
00566 AS2( mov WORD_REG(bx), SSE2_state)
00567 AS2( movdqa xmm0, [SSE2_stateCopy+0*16])
00568 AS2( movdqa [WORD_REG(bx)+0*16], xmm0)
00569 AS2( movdqa xmm0, [SSE2_stateCopy+1*16])
00570 AS2( movdqa [WORD_REG(bx)+1*16], xmm0)
00571 AS2( movq xmm0, QWORD PTR [SSE2_stateCopy+2*16])
00572 AS2( movq QWORD PTR [WORD_REG(bx)+2*16], xmm0)
00573 AS2( mov [WORD_REG(bx)+10*4], ecx)
00574 AS2( mov [WORD_REG(bx)+11*4], edx)
00575
00576 AS_POP( sp)
00577 AS_POP( bp)
00578
00579 #ifdef __GNUC__
00580 AS_POP( bx)
00581 ".att_syntax prefix;"
00582 :
00583 : "a" (m_state.m_ptr), "c" (iterationCount), "S" (s_mulTables), "D" (output), "d" (input)
00584 : "memory", "cc"
00585 );
00586 #endif
00587 }
00588 else
00589 #endif
00590 {
00591 #if CRYPTOPP_BOOL_X86 | CRYPTOPP_BOOL_X64
00592 #define MUL_A(x) (x = rotlFixed(x, 8), x ^ s_mulTables[byte(x)])
00593 #else
00594 #define MUL_A(x) (((x) << 8) ^ s_mulTables[(x) >> 24])
00595 #endif
00596
00597 #define DIV_A(x) (((x) >> 8) ^ s_mulTables[256 + byte(x)])
00598
00599 #define r1(i) ((i%2) ? reg2 : reg1)
00600 #define r2(i) ((i%2) ? reg1 : reg2)
00601
00602 #define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, v, u) \
00603 u = (s##x9 + r2(x0)) ^ r1(x0);\
00604 v = s##x0;\
00605 s##x0 = MUL_A(s##x0) ^ DIV_A(s##x3) ^ s##x9;\
00606 r1(x0) += XMUX(r2(x0), s##x2, s##x9);\
00607 r2(x0) = rotlFixed(r2(x0) * 0x54655307, 7);\
00608
00609 #define SOSEMANUK_OUTPUT(x) \
00610 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, u2 ^ v0);\
00611 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, u3 ^ v1);\
00612 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, u1 ^ v2);\
00613 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, u4 ^ v3);
00614
00615 #define OUTPUT4 \
00616 S2(0, u0, u1, u2, u3, u4);\
00617 CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SOSEMANUK_OUTPUT, 4*4);
00618
00619 word32 s0 = m_state[0];
00620 word32 s1 = m_state[1];
00621 word32 s2 = m_state[2];
00622 word32 s3 = m_state[3];
00623 word32 s4 = m_state[4];
00624 word32 s5 = m_state[5];
00625 word32 s6 = m_state[6];
00626 word32 s7 = m_state[7];
00627 word32 s8 = m_state[8];
00628 word32 s9 = m_state[9];
00629 word32 reg1 = m_state[10];
00630 word32 reg2 = m_state[11];
00631 word32 u0, u1, u2, u3, u4, v0, v1, v2, v3;
00632
00633 do
00634 {
00635 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v0, u0)
00636 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v1, u1)
00637 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v2, u2)
00638 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v3, u3)
00639 OUTPUT4
00640 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v0, u0)
00641 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v1, u1)
00642 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v2, u2)
00643 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v3, u3)
00644 OUTPUT4
00645 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v0, u0)
00646 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v1, u1)
00647 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v2, u2)
00648 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v3, u3)
00649 OUTPUT4
00650 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v0, u0)
00651 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v1, u1)
00652 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v2, u2)
00653 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v3, u3)
00654 OUTPUT4
00655 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v0, u0)
00656 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v1, u1)
00657 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v2, u2)
00658 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v3, u3)
00659 OUTPUT4
00660 }
00661 while (--iterationCount);
00662
00663 m_state[0] = s0;
00664 m_state[1] = s1;
00665 m_state[2] = s2;
00666 m_state[3] = s3;
00667 m_state[4] = s4;
00668 m_state[5] = s5;
00669 m_state[6] = s6;
00670 m_state[7] = s7;
00671 m_state[8] = s8;
00672 m_state[9] = s9;
00673 m_state[10] = reg1;
00674 m_state[11] = reg2;
00675 }
00676 }
00677
00678 NAMESPACE_END