• Main Page
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

sosemanuk.cpp

00001 // sosemanuk.cpp - written and placed in the public domain by Wei Dai
00002 
00003 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sosemanuk.cpp" to generate MASM code
00004 
00005 #include "pch.h"
00006 
00007 #ifndef CRYPTOPP_GENERATE_X64_MASM
00008 
00009 #include "sosemanuk.h"
00010 #include "misc.h"
00011 #include "cpu.h"
00012 
00013 #include "serpentp.h"
00014 
00015 NAMESPACE_BEGIN(CryptoPP)
00016 
00017 void SosemanukPolicy::CipherSetKey(const NameValuePairs &params, const byte *userKey, size_t keylen)
00018 {
00019         Serpent_KeySchedule(m_key, 24, userKey, keylen);
00020 }
00021 
00022 void SosemanukPolicy::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length)
00023 {
00024         assert(length==16);
00025 
00026         word32 a, b, c, d, e;
00027         
00028         typedef BlockGetAndPut<word32, LittleEndian> Block;
00029         Block::Get(iv)(a)(b)(c)(d);
00030 
00031         const word32 *k = m_key;
00032         unsigned int i=1;
00033 
00034         do
00035         {
00036                 beforeS0(KX); beforeS0(S0); afterS0(LT);
00037                 afterS0(KX); afterS0(S1); afterS1(LT);
00038                 if (i == 3)     // after 18th round
00039                 {
00040                         m_state[4] = b;
00041                         m_state[5] = e;
00042                         m_state[10] = c;
00043                         m_state[11] = a;
00044                 }
00045                 afterS1(KX); afterS1(S2); afterS2(LT);
00046                 afterS2(KX); afterS2(S3); afterS3(LT);
00047                 if (i == 2)     // after 12th round
00048                 {
00049                         m_state[6] = c;
00050                         m_state[7] = d;
00051                         m_state[8] = b;
00052                         m_state[9] = e;
00053                 }
00054                 afterS3(KX); afterS3(S4); afterS4(LT);
00055                 afterS4(KX); afterS4(S5); afterS5(LT);
00056                 afterS5(KX); afterS5(S6); afterS6(LT);
00057                 afterS6(KX); afterS6(S7); afterS7(LT);
00058 
00059                 if (i == 3)
00060                         break;
00061 
00062                 ++i;
00063                 c = b;
00064                 b = e;
00065                 e = d;
00066                 d = a;
00067                 a = e;
00068                 k += 32;
00069         }
00070         while (true);
00071 
00072         afterS7(KX);
00073 
00074         m_state[0] = a;
00075         m_state[1] = b;
00076         m_state[2] = e;
00077         m_state[3] = d;
00078 
00079 #define XMUX(c, x, y)   (x ^ (y & (0 - (c & 1))))
00080         m_state[11] += XMUX(m_state[10], m_state[1], m_state[8]);
00081         m_state[10] = rotlFixed(m_state[10] * 0x54655307, 7);
00082 }
00083 
00084 extern "C" {
00085 word32 s_sosemanukMulTables[512] = {
00086 #if CRYPTOPP_BOOL_X86 | CRYPTOPP_BOOL_X64
00087         0x00000000, 0xE19FCF12, 0x6B973724, 0x8A08F836, 
00088         0xD6876E48, 0x3718A15A, 0xBD10596C, 0x5C8F967E, 
00089         0x05A7DC90, 0xE4381382, 0x6E30EBB4, 0x8FAF24A6, 
00090         0xD320B2D8, 0x32BF7DCA, 0xB8B785FC, 0x59284AEE, 
00091         0x0AE71189, 0xEB78DE9B, 0x617026AD, 0x80EFE9BF, 
00092         0xDC607FC1, 0x3DFFB0D3, 0xB7F748E5, 0x566887F7, 
00093         0x0F40CD19, 0xEEDF020B, 0x64D7FA3D, 0x8548352F, 
00094         0xD9C7A351, 0x38586C43, 0xB2509475, 0x53CF5B67, 
00095         0x146722BB, 0xF5F8EDA9, 0x7FF0159F, 0x9E6FDA8D, 
00096         0xC2E04CF3, 0x237F83E1, 0xA9777BD7, 0x48E8B4C5, 
00097         0x11C0FE2B, 0xF05F3139, 0x7A57C90F, 0x9BC8061D, 
00098         0xC7479063, 0x26D85F71, 0xACD0A747, 0x4D4F6855, 
00099         0x1E803332, 0xFF1FFC20, 0x75170416, 0x9488CB04, 
00100         0xC8075D7A, 0x29989268, 0xA3906A5E, 0x420FA54C, 
00101         0x1B27EFA2, 0xFAB820B0, 0x70B0D886, 0x912F1794, 
00102         0xCDA081EA, 0x2C3F4EF8, 0xA637B6CE, 0x47A879DC, 
00103         0x28CE44DF, 0xC9518BCD, 0x435973FB, 0xA2C6BCE9, 
00104         0xFE492A97, 0x1FD6E585, 0x95DE1DB3, 0x7441D2A1, 
00105         0x2D69984F, 0xCCF6575D, 0x46FEAF6B, 0xA7616079, 
00106         0xFBEEF607, 0x1A713915, 0x9079C123, 0x71E60E31, 
00107         0x22295556, 0xC3B69A44, 0x49BE6272, 0xA821AD60, 
00108         0xF4AE3B1E, 0x1531F40C, 0x9F390C3A, 0x7EA6C328, 
00109         0x278E89C6, 0xC61146D4, 0x4C19BEE2, 0xAD8671F0, 
00110         0xF109E78E, 0x1096289C, 0x9A9ED0AA, 0x7B011FB8, 
00111         0x3CA96664, 0xDD36A976, 0x573E5140, 0xB6A19E52, 
00112         0xEA2E082C, 0x0BB1C73E, 0x81B93F08, 0x6026F01A, 
00113         0x390EBAF4, 0xD89175E6, 0x52998DD0, 0xB30642C2, 
00114         0xEF89D4BC, 0x0E161BAE, 0x841EE398, 0x65812C8A, 
00115         0x364E77ED, 0xD7D1B8FF, 0x5DD940C9, 0xBC468FDB, 
00116         0xE0C919A5, 0x0156D6B7, 0x8B5E2E81, 0x6AC1E193, 
00117         0x33E9AB7D, 0xD276646F, 0x587E9C59, 0xB9E1534B, 
00118         0xE56EC535, 0x04F10A27, 0x8EF9F211, 0x6F663D03, 
00119         0x50358817, 0xB1AA4705, 0x3BA2BF33, 0xDA3D7021, 
00120         0x86B2E65F, 0x672D294D, 0xED25D17B, 0x0CBA1E69, 
00121         0x55925487, 0xB40D9B95, 0x3E0563A3, 0xDF9AACB1, 
00122         0x83153ACF, 0x628AF5DD, 0xE8820DEB, 0x091DC2F9, 
00123         0x5AD2999E, 0xBB4D568C, 0x3145AEBA, 0xD0DA61A8, 
00124         0x8C55F7D6, 0x6DCA38C4, 0xE7C2C0F2, 0x065D0FE0, 
00125         0x5F75450E, 0xBEEA8A1C, 0x34E2722A, 0xD57DBD38, 
00126         0x89F22B46, 0x686DE454, 0xE2651C62, 0x03FAD370, 
00127         0x4452AAAC, 0xA5CD65BE, 0x2FC59D88, 0xCE5A529A, 
00128         0x92D5C4E4, 0x734A0BF6, 0xF942F3C0, 0x18DD3CD2, 
00129         0x41F5763C, 0xA06AB92E, 0x2A624118, 0xCBFD8E0A, 
00130         0x97721874, 0x76EDD766, 0xFCE52F50, 0x1D7AE042, 
00131         0x4EB5BB25, 0xAF2A7437, 0x25228C01, 0xC4BD4313, 
00132         0x9832D56D, 0x79AD1A7F, 0xF3A5E249, 0x123A2D5B, 
00133         0x4B1267B5, 0xAA8DA8A7, 0x20855091, 0xC11A9F83, 
00134         0x9D9509FD, 0x7C0AC6EF, 0xF6023ED9, 0x179DF1CB, 
00135         0x78FBCCC8, 0x996403DA, 0x136CFBEC, 0xF2F334FE, 
00136         0xAE7CA280, 0x4FE36D92, 0xC5EB95A4, 0x24745AB6, 
00137         0x7D5C1058, 0x9CC3DF4A, 0x16CB277C, 0xF754E86E, 
00138         0xABDB7E10, 0x4A44B102, 0xC04C4934, 0x21D38626, 
00139         0x721CDD41, 0x93831253, 0x198BEA65, 0xF8142577, 
00140         0xA49BB309, 0x45047C1B, 0xCF0C842D, 0x2E934B3F, 
00141         0x77BB01D1, 0x9624CEC3, 0x1C2C36F5, 0xFDB3F9E7, 
00142         0xA13C6F99, 0x40A3A08B, 0xCAAB58BD, 0x2B3497AF, 
00143         0x6C9CEE73, 0x8D032161, 0x070BD957, 0xE6941645, 
00144         0xBA1B803B, 0x5B844F29, 0xD18CB71F, 0x3013780D, 
00145         0x693B32E3, 0x88A4FDF1, 0x02AC05C7, 0xE333CAD5, 
00146         0xBFBC5CAB, 0x5E2393B9, 0xD42B6B8F, 0x35B4A49D, 
00147         0x667BFFFA, 0x87E430E8, 0x0DECC8DE, 0xEC7307CC, 
00148         0xB0FC91B2, 0x51635EA0, 0xDB6BA696, 0x3AF46984, 
00149         0x63DC236A, 0x8243EC78, 0x084B144E, 0xE9D4DB5C, 
00150         0xB55B4D22, 0x54C48230, 0xDECC7A06, 0x3F53B514,
00151 #else
00152         0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
00153         0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
00154         0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
00155         0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
00156         0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
00157         0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
00158         0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
00159         0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
00160         0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
00161         0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
00162         0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
00163         0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
00164         0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
00165         0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
00166         0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
00167         0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
00168         0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
00169         0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
00170         0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
00171         0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
00172         0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
00173         0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
00174         0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
00175         0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
00176         0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
00177         0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
00178         0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
00179         0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
00180         0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
00181         0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
00182         0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
00183         0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
00184         0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
00185         0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
00186         0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
00187         0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
00188         0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
00189         0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
00190         0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
00191         0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
00192         0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
00193         0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
00194         0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
00195         0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
00196         0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
00197         0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
00198         0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
00199         0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
00200         0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
00201         0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
00202         0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
00203         0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
00204         0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
00205         0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
00206         0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
00207         0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
00208         0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
00209         0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
00210         0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
00211         0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
00212         0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
00213         0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
00214         0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
00215         0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB,
00216 #endif
00217         0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
00218         0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
00219         0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
00220         0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
00221         0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
00222         0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
00223         0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
00224         0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
00225         0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
00226         0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
00227         0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
00228         0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
00229         0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
00230         0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
00231         0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
00232         0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
00233         0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
00234         0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
00235         0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
00236         0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
00237         0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
00238         0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
00239         0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
00240         0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
00241         0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
00242         0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
00243         0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
00244         0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
00245         0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
00246         0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
00247         0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
00248         0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
00249         0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
00250         0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
00251         0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
00252         0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
00253         0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
00254         0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
00255         0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
00256         0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
00257         0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
00258         0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
00259         0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
00260         0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
00261         0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
00262         0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
00263         0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
00264         0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
00265         0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
00266         0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
00267         0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
00268         0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
00269         0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
00270         0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
00271         0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
00272         0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
00273         0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
00274         0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
00275         0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
00276         0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
00277         0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
00278         0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
00279         0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
00280         0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
00281 };
00282 }
00283 
00284 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
00285 unsigned int SosemanukPolicy::GetAlignment() const
00286 {
00287 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00288 #ifdef __INTEL_COMPILER
00289         if (HasSSE2() && !IsP4())       // Intel compiler produces faster code for this algorithm on the P4
00290 #else
00291         if (HasSSE2())
00292 #endif
00293                 return 16;
00294         else
00295 #endif
00296                 return GetAlignmentOf<word32>();
00297 }
00298 
00299 unsigned int SosemanukPolicy::GetOptimalBlockSize() const
00300 {
00301 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00302 #ifdef __INTEL_COMPILER
00303         if (HasSSE2() && !IsP4())       // Intel compiler produces faster code for this algorithm on the P4
00304 #else
00305         if (HasSSE2())
00306 #endif
00307                 return 4*BYTES_PER_ITERATION;
00308         else
00309 #endif
00310                 return BYTES_PER_ITERATION;
00311 }
00312 #endif
00313 
00314 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00315 extern "C" {
00316 void Sosemanuk_OperateKeystream(size_t iterationCount, const byte *input, byte *output, word32 *state);
00317 }
00318 #endif
00319 
00320 #pragma warning(disable: 4731)  // frame pointer register 'ebp' modified by inline assembly code
00321 
00322 void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
00323 {
00324 #endif  // #ifdef CRYPTOPP_GENERATE_X64_MASM
00325 
00326 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00327         Sosemanuk_OperateKeystream(iterationCount, input, output, m_state.data());
00328         return;
00329 #endif
00330 
00331 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00332 #ifdef CRYPTOPP_GENERATE_X64_MASM
00333                 ALIGN   8
00334         Sosemanuk_OperateKeystream      PROC FRAME
00335                 rex_push_reg rsi
00336                 push_reg rdi
00337                 alloc_stack(80*4*2+12*4+8*WORD_SZ + 2*16+8)
00338                 save_xmm128 xmm6, 02f0h
00339                 save_xmm128 xmm7, 0300h
00340                 .endprolog
00341                 mov             rdi, r8
00342                 mov             rax, r9
00343 #else
00344 #ifdef __INTEL_COMPILER
00345         if (HasSSE2() && !IsP4())       // Intel compiler produces faster code for this algorithm on the P4
00346 #else
00347         if (HasSSE2())
00348 #endif
00349         {
00350 #ifdef __GNUC__
00351         #if CRYPTOPP_BOOL_X64
00352                 FixedSizeAlignedSecBlock<byte, 80*4*2+12*4+8*WORD_SZ> workspace;
00353         #endif
00354                 __asm__ __volatile__
00355                 (
00356                 ".intel_syntax noprefix;"
00357                 AS_PUSH_IF86(   bx)
00358 #else
00359                 word32 *state = m_state;
00360                 AS2(    mov             WORD_REG(ax), state)
00361                 AS2(    mov             WORD_REG(di), output)
00362                 AS2(    mov             WORD_REG(dx), input)
00363                 AS2(    mov             WORD_REG(cx), iterationCount)
00364 #endif
00365 #endif  // #ifdef CRYPTOPP_GENERATE_X64_MASM
00366 
00367 #if defined(__GNUC__) && CRYPTOPP_BOOL_X64
00368         #define SSE2_workspace %5
00369 #else
00370         #define SSE2_workspace WORD_REG(sp)
00371 #endif
00372 
00373 #define SSE2_output                     WORD_PTR [SSE2_workspace+1*WORD_SZ]
00374 #define SSE2_input                      WORD_PTR [SSE2_workspace+2*WORD_SZ]
00375 #define SSE2_wordsLeft          WORD_PTR [SSE2_workspace+3*WORD_SZ]
00376 #define SSE2_diEnd                      WORD_PTR [SSE2_workspace+4*WORD_SZ]
00377 #define SSE2_pMulTables         WORD_PTR [SSE2_workspace+5*WORD_SZ]
00378 #define SSE2_state                      WORD_PTR [SSE2_workspace+6*WORD_SZ]
00379 #define SSE2_wordsLeft2         WORD_PTR [SSE2_workspace+7*WORD_SZ]
00380 #define SSE2_stateCopy          SSE2_workspace + 8*WORD_SZ
00381 #define SSE2_uvStart            SSE2_stateCopy + 12*4
00382 
00383 #if CRYPTOPP_BOOL_X86
00384                 AS_PUSH_IF86(   bp)
00385                 AS2(    mov             AS_REG_6, esp)
00386                 AS2(    and             esp, -16)
00387                 AS2(    sub             esp, 80*4*2+12*4+8*WORD_SZ)     // 80 v's, 80 u's, 12 state, 8 locals
00388                 AS2(    mov             [esp], AS_REG_6)
00389 #endif
00390                 AS2(    mov             SSE2_output, WORD_REG(di))
00391                 AS2(    mov             SSE2_input, WORD_REG(dx))
00392                 AS2(    mov             SSE2_state, WORD_REG(ax))
00393 #ifndef _MSC_VER
00394                 AS2(    mov             SSE2_pMulTables, WORD_REG(si))
00395 #endif
00396                 AS2(    lea             WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)])
00397                 AS2(    lea             WORD_REG(si), [4*WORD_REG(cx)])
00398                 AS2(    mov             SSE2_wordsLeft, WORD_REG(si))
00399                 AS2(    movdqa  xmm0, [WORD_REG(ax)+0*16])              // copy state to stack to save a register
00400                 AS2(    movdqa  [SSE2_stateCopy+0*16], xmm0)
00401                 AS2(    movdqa  xmm0, [WORD_REG(ax)+1*16])
00402                 AS2(    movdqa  [SSE2_stateCopy+1*16], xmm0)
00403                 AS2(    movq    xmm0, QWORD PTR [WORD_REG(ax)+2*16])
00404                 AS2(    movq    QWORD PTR [SSE2_stateCopy+2*16], xmm0)
00405                 AS2(    psrlq   xmm0, 32)
00406                 AS2(    movd    AS_REG_6d, xmm0)                                // s(9)
00407                 AS2(    mov             ecx, [WORD_REG(ax)+10*4])
00408                 AS2(    mov             edx, [WORD_REG(ax)+11*4])
00409                 AS2(    pcmpeqb xmm7, xmm7)                             // all ones
00410 
00411 #define s(i)    SSE2_stateCopy + ASM_MOD(i,10)*4
00412 #define u(j)    WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4
00413 #define v(j)    WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4 + 80*4
00414 
00415 #define R10 ecx
00416 #define R11 edx
00417 #define R20 edx
00418 #define R21 ecx
00419 // workaround bug in GAS 2.15
00420 #define R20r WORD_REG(dx)
00421 #define R21r WORD_REG(cx)
00422 
00423 #define SSE2_STEP(i, j) \
00424         AS2(    mov             eax, [s(i+0)])\
00425         AS2(    mov             [v(i)], eax)\
00426         AS2(    rol             eax, 8)\
00427         AS2(    lea             AS_REG_7, [AS_REG_6 + R2##j##r])\
00428         AS2(    xor             AS_REG_7d, R1##j)\
00429         AS2(    mov             [u(i)], AS_REG_7d)\
00430         AS2(    mov             AS_REG_7d, 1)\
00431         AS2(    and             AS_REG_7d, R2##j)\
00432         AS1(    neg             AS_REG_7d)\
00433         AS2(    and             AS_REG_7d, AS_REG_6d)\
00434         AS2(    xor             AS_REG_6d, eax)\
00435         AS2(    movzx   eax, al)\
00436         AS2(    xor             AS_REG_6d, [WORD_REG(si)+WORD_REG(ax)*4])\
00437         AS2(    mov             eax, [s(i+3)])\
00438         AS2(    xor             AS_REG_7d, [s(i+2)])\
00439         AS2(    add             R1##j, AS_REG_7d)\
00440         AS2(    movzx   AS_REG_7d, al)\
00441         AS2(    shr             eax, 8)\
00442         AS2(    xor             AS_REG_6d, [WORD_REG(si)+1024+AS_REG_7*4])\
00443         AS2(    xor             AS_REG_6d, eax)\
00444         AS2(    imul    R2##j, AS_HEX(54655307))\
00445         AS2(    rol             R2##j, 7)\
00446         AS2(    mov             [s(i+0)], AS_REG_6d)\
00447 
00448                 ASL(2)  // outer loop, each iteration of this processes 80 words
00449                 AS2(    lea             WORD_REG(di), [SSE2_uvStart])   // start of v and u
00450                 AS2(    mov             WORD_REG(ax), 80)
00451                 AS2(    cmp             WORD_REG(si), 80)
00452                 AS2(    cmovg   WORD_REG(si), WORD_REG(ax))
00453                 AS2(    mov             SSE2_wordsLeft2, WORD_REG(si))
00454                 AS2(    lea             WORD_REG(si), [WORD_REG(di)+WORD_REG(si)])              // use to end first inner loop
00455                 AS2(    mov             SSE2_diEnd, WORD_REG(si))
00456 #ifdef _MSC_VER
00457                 AS2(    lea             WORD_REG(si), s_sosemanukMulTables)
00458 #else
00459                 AS2(    mov             WORD_REG(si), SSE2_pMulTables)
00460 #endif
00461 
00462                 ASL(0)  // first inner loop, 20 words each, 4 iterations
00463                 SSE2_STEP(0, 0)
00464                 SSE2_STEP(1, 1)
00465                 SSE2_STEP(2, 0)
00466                 SSE2_STEP(3, 1)
00467                 SSE2_STEP(4, 0)
00468                 SSE2_STEP(5, 1)
00469                 SSE2_STEP(6, 0)
00470                 SSE2_STEP(7, 1)
00471                 SSE2_STEP(8, 0)
00472                 SSE2_STEP(9, 1)
00473                 SSE2_STEP(10, 0)
00474                 SSE2_STEP(11, 1)
00475                 SSE2_STEP(12, 0)
00476                 SSE2_STEP(13, 1)
00477                 SSE2_STEP(14, 0)
00478                 SSE2_STEP(15, 1)
00479                 SSE2_STEP(16, 0)
00480                 SSE2_STEP(17, 1)
00481                 SSE2_STEP(18, 0)
00482                 SSE2_STEP(19, 1)
00483                 // loop
00484                 AS2(    add             WORD_REG(di), 5*4)
00485                 AS2(    cmp             WORD_REG(di), SSE2_diEnd)
00486                 ASJ(    jne,    0, b)
00487 
00488                 AS2(    mov             WORD_REG(ax), SSE2_input)
00489                 AS2(    mov             AS_REG_7, SSE2_output)
00490                 AS2(    lea             WORD_REG(di), [SSE2_uvStart])           // start of v and u
00491                 AS2(    mov             WORD_REG(si), SSE2_wordsLeft2)
00492 
00493                 ASL(1)  // second inner loop, 16 words each, 5 iterations
00494                 AS2(    movdqa  xmm0, [WORD_REG(di)+0*20*4])
00495                 AS2(    movdqa  xmm2, [WORD_REG(di)+2*20*4])
00496                 AS2(    movdqa  xmm3, [WORD_REG(di)+3*20*4])
00497                 AS2(    movdqa  xmm1, [WORD_REG(di)+1*20*4])
00498                 // S2
00499                 AS2(    movdqa  xmm4, xmm0)
00500                 AS2(    pand    xmm0, xmm2)
00501                 AS2(    pxor    xmm0, xmm3)
00502                 AS2(    pxor    xmm2, xmm1)
00503                 AS2(    pxor    xmm2, xmm0)
00504                 AS2(    por             xmm3, xmm4)
00505                 AS2(    pxor    xmm3, xmm1)
00506                 AS2(    pxor    xmm4, xmm2)
00507                 AS2(    movdqa  xmm1, xmm3)
00508                 AS2(    por             xmm3, xmm4)
00509                 AS2(    pxor    xmm3, xmm0)
00510                 AS2(    pand    xmm0, xmm1)
00511                 AS2(    pxor    xmm4, xmm0)
00512                 AS2(    pxor    xmm1, xmm3)
00513                 AS2(    pxor    xmm1, xmm4)
00514                 AS2(    pxor    xmm4, xmm7)
00515                 // xor with v
00516                 AS2(    pxor    xmm2, [WORD_REG(di)+80*4])
00517                 AS2(    pxor    xmm3, [WORD_REG(di)+80*5])
00518                 AS2(    pxor    xmm1, [WORD_REG(di)+80*6])
00519                 AS2(    pxor    xmm4, [WORD_REG(di)+80*7])
00520                 // exit loop early if less than 16 words left to output
00521                 // this is necessary because block size is 20 words, and we output 16 words in each iteration of this loop
00522                 AS2(    cmp             WORD_REG(si), 16)
00523                 ASJ(    jl,             4, f)
00524                 // unpack
00525                 AS2(    movdqa          xmm6, xmm2)
00526                 AS2(    punpckldq       xmm2, xmm3)
00527                 AS2(    movdqa          xmm5, xmm1)
00528                 AS2(    punpckldq       xmm1, xmm4)
00529                 AS2(    movdqa          xmm0, xmm2)
00530                 AS2(    punpcklqdq      xmm2, xmm1)
00531                 AS2(    punpckhqdq      xmm0, xmm1)
00532                 AS2(    punpckhdq       xmm6, xmm3)
00533                 AS2(    punpckhdq       xmm5, xmm4)
00534                 AS2(    movdqa          xmm3, xmm6)
00535                 AS2(    punpcklqdq      xmm6, xmm5)
00536                 AS2(    punpckhqdq      xmm3, xmm5)
00537                 // output keystream
00538                 AS_XMM_OUTPUT4(SSE2_Sosemanuk_Output, WORD_REG(ax), AS_REG_7, 2,0,6,3, 1, 0,1,2,3, 4)
00539 
00540                 // loop
00541                 AS2(    add             WORD_REG(di), 4*4)
00542                 AS2(    sub             WORD_REG(si), 16)
00543                 ASJ(    jnz,    1, b)
00544 
00545                 // outer loop
00546                 AS2(    mov             WORD_REG(si), SSE2_wordsLeft)
00547                 AS2(    sub             WORD_REG(si), 80)
00548                 ASJ(    jz,             6, f)
00549                 AS2(    mov             SSE2_wordsLeft, WORD_REG(si))
00550                 AS2(    mov             SSE2_input, WORD_REG(ax))
00551                 AS2(    mov             SSE2_output, AS_REG_7)
00552                 ASJ(    jmp,    2, b)
00553 
00554                 ASL(4)  // final output of less than 16 words
00555                 AS2(    test    WORD_REG(ax), WORD_REG(ax))
00556                 ASJ(    jz,             5, f)
00557                 AS2(    movd    xmm0, dword ptr [WORD_REG(ax)+0*4])
00558                 AS2(    pxor    xmm2, xmm0)
00559                 AS2(    movd    xmm0, dword ptr [WORD_REG(ax)+1*4])
00560                 AS2(    pxor    xmm3, xmm0)
00561                 AS2(    movd    xmm0, dword ptr [WORD_REG(ax)+2*4])
00562                 AS2(    pxor    xmm1, xmm0)
00563                 AS2(    movd    xmm0, dword ptr [WORD_REG(ax)+3*4])
00564                 AS2(    pxor    xmm4, xmm0)
00565                 AS2(    add             WORD_REG(ax), 16)
00566                 ASL(5)
00567                 AS2(    movd    dword ptr [AS_REG_7+0*4], xmm2)
00568                 AS2(    movd    dword ptr [AS_REG_7+1*4], xmm3)
00569                 AS2(    movd    dword ptr [AS_REG_7+2*4], xmm1)
00570                 AS2(    movd    dword ptr [AS_REG_7+3*4], xmm4)
00571                 AS2(    sub             WORD_REG(si), 4)
00572                 ASJ(    jz,             6, f)
00573                 AS2(    add             AS_REG_7, 16)
00574                 AS2(    psrldq  xmm2, 4)
00575                 AS2(    psrldq  xmm3, 4)
00576                 AS2(    psrldq  xmm1, 4)
00577                 AS2(    psrldq  xmm4, 4)
00578                 ASJ(    jmp,    4, b)
00579 
00580                 ASL(6)  // save state
00581                 AS2(    mov             AS_REG_6, SSE2_state)
00582                 AS2(    movdqa  xmm0, [SSE2_stateCopy+0*16])
00583                 AS2(    movdqa  [AS_REG_6+0*16], xmm0)
00584                 AS2(    movdqa  xmm0, [SSE2_stateCopy+1*16])
00585                 AS2(    movdqa  [AS_REG_6+1*16], xmm0)
00586                 AS2(    movq    xmm0, QWORD PTR [SSE2_stateCopy+2*16])
00587                 AS2(    movq    QWORD PTR [AS_REG_6+2*16], xmm0)
00588                 AS2(    mov             [AS_REG_6+10*4], ecx)
00589                 AS2(    mov             [AS_REG_6+11*4], edx)
00590 
00591                 AS_POP_IF86(    sp)
00592                 AS_POP_IF86(    bp)
00593 
00594 #ifdef __GNUC__
00595                 AS_POP_IF86(    bx)
00596                 ".att_syntax prefix;"
00597                         :
00598                         : "a" (m_state.m_ptr), "c" (iterationCount), "S" (s_sosemanukMulTables), "D" (output), "d" (input)
00599         #if CRYPTOPP_BOOL_X64
00600                         , "r" (workspace.m_ptr)
00601                         : "memory", "cc", "%r9", "%r10", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
00602         #else
00603                         : "memory", "cc"
00604         #endif
00605                 );
00606 #endif
00607 #ifdef CRYPTOPP_GENERATE_X64_MASM
00608         movdqa  xmm6, [rsp + 02f0h]
00609         movdqa  xmm7, [rsp + 0300h]
00610         add             rsp, 80*4*2+12*4+8*WORD_SZ + 2*16+8
00611         pop             rdi
00612         pop             rsi
00613         ret
00614         Sosemanuk_OperateKeystream ENDP
00615 #else
00616         }
00617         else
00618 #endif
00619 #endif
00620 #ifndef CRYPTOPP_GENERATE_X64_MASM
00621         {
00622 #if CRYPTOPP_BOOL_X86 | CRYPTOPP_BOOL_X64
00623 #define MUL_A(x)    (x = rotlFixed(x, 8), x ^ s_sosemanukMulTables[byte(x)])
00624 #else
00625 #define MUL_A(x)    (((x) << 8) ^ s_sosemanukMulTables[(x) >> 24])
00626 #endif
00627 
00628 #define DIV_A(x)    (((x) >> 8) ^ s_sosemanukMulTables[256 + byte(x)])
00629 
00630 #define r1(i) ((i%2) ? reg2 : reg1)
00631 #define r2(i) ((i%2) ? reg1 : reg2)
00632 
00633 #define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, v, u)      \
00634                 u = (s##x9 + r2(x0)) ^ r1(x0);\
00635                 v = s##x0;\
00636                 s##x0 = MUL_A(s##x0) ^ DIV_A(s##x3) ^ s##x9;\
00637                 r1(x0) += XMUX(r2(x0), s##x2, s##x9);\
00638                 r2(x0) = rotlFixed(r2(x0) * 0x54655307, 7);\
00639 
00640 #define SOSEMANUK_OUTPUT(x)     \
00641         CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, u2 ^ v0);\
00642         CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, u3 ^ v1);\
00643         CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, u1 ^ v2);\
00644         CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, u4 ^ v3);
00645 
00646 #define OUTPUT4 \
00647         S2(0, u0, u1, u2, u3, u4);\
00648         CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SOSEMANUK_OUTPUT, 4*4);
00649 
00650         word32 s0 = m_state[0];
00651         word32 s1 = m_state[1];
00652         word32 s2 = m_state[2];
00653         word32 s3 = m_state[3];
00654         word32 s4 = m_state[4];
00655         word32 s5 = m_state[5];
00656         word32 s6 = m_state[6];
00657         word32 s7 = m_state[7];
00658         word32 s8 = m_state[8];
00659         word32 s9 = m_state[9];
00660         word32 reg1 = m_state[10];
00661         word32 reg2 = m_state[11];
00662         word32 u0, u1, u2, u3, u4, v0, v1, v2, v3;
00663 
00664         do
00665         {
00666                 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v0, u0)
00667                 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v1, u1)
00668                 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v2, u2)
00669                 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v3, u3)
00670                 OUTPUT4
00671                 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v0, u0)
00672                 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v1, u1)
00673                 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v2, u2)
00674                 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v3, u3)
00675                 OUTPUT4
00676                 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v0, u0)
00677                 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v1, u1)
00678                 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v2, u2)
00679                 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v3, u3)
00680                 OUTPUT4
00681                 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v0, u0)
00682                 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v1, u1)
00683                 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v2, u2)
00684                 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v3, u3)
00685                 OUTPUT4
00686                 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v0, u0)
00687                 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v1, u1)
00688                 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v2, u2)
00689                 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v3, u3)
00690                 OUTPUT4
00691         }
00692         while (--iterationCount);
00693 
00694         m_state[0] = s0;
00695         m_state[1] = s1;
00696         m_state[2] = s2;
00697         m_state[3] = s3;
00698         m_state[4] = s4;
00699         m_state[5] = s5;
00700         m_state[6] = s6;
00701         m_state[7] = s7;
00702         m_state[8] = s8;
00703         m_state[9] = s9;
00704         m_state[10] = reg1;
00705         m_state[11] = reg2;
00706         }
00707 }
00708 
00709 NAMESPACE_END
00710 
00711 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM

Generated on Mon Aug 9 2010 15:56:38 for Crypto++ by  doxygen 1.7.1