00001
00002
00003
00004
00005 #include "pch.h"
00006
00007 #ifndef CRYPTOPP_GENERATE_X64_MASM
00008
00009 #include "sosemanuk.h"
00010 #include "misc.h"
00011 #include "cpu.h"
00012
00013 #include "serpentp.h"
00014
00015 #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
00016 #include <emmintrin.h>
00017 #endif
00018
00019 NAMESPACE_BEGIN(CryptoPP)
00020
00021 void SosemanukPolicy::CipherSetKey(const NameValuePairs ¶ms, const byte *userKey, size_t keylen)
00022 {
00023 Serpent_KeySchedule(m_key, 24, userKey, keylen);
00024 }
00025
00026 void SosemanukPolicy::CipherResynchronize(byte *keystreamBuffer, const byte *iv)
00027 {
00028 word32 a, b, c, d, e;
00029
00030 typedef BlockGetAndPut<word32, LittleEndian> Block;
00031 Block::Get(iv)(a)(b)(c)(d);
00032
00033 const word32 *k = m_key;
00034 unsigned int i=1;
00035
00036 do
00037 {
00038 beforeS0(KX); beforeS0(S0); afterS0(LT);
00039 afterS0(KX); afterS0(S1); afterS1(LT);
00040 if (i == 3)
00041 {
00042 m_state[4] = b;
00043 m_state[5] = e;
00044 m_state[10] = c;
00045 m_state[11] = a;
00046 }
00047 afterS1(KX); afterS1(S2); afterS2(LT);
00048 afterS2(KX); afterS2(S3); afterS3(LT);
00049 if (i == 2)
00050 {
00051 m_state[6] = c;
00052 m_state[7] = d;
00053 m_state[8] = b;
00054 m_state[9] = e;
00055 }
00056 afterS3(KX); afterS3(S4); afterS4(LT);
00057 afterS4(KX); afterS4(S5); afterS5(LT);
00058 afterS5(KX); afterS5(S6); afterS6(LT);
00059 afterS6(KX); afterS6(S7); afterS7(LT);
00060
00061 if (i == 3)
00062 break;
00063
00064 ++i;
00065 c = b;
00066 b = e;
00067 e = d;
00068 d = a;
00069 a = e;
00070 k += 32;
00071 }
00072 while (true);
00073
00074 afterS7(KX);
00075
00076 m_state[0] = a;
00077 m_state[1] = b;
00078 m_state[2] = e;
00079 m_state[3] = d;
00080
00081 #define XMUX(c, x, y) (x ^ (y & (0 - (c & 1))))
00082 m_state[11] += XMUX(m_state[10], m_state[1], m_state[8]);
00083 m_state[10] = rotlFixed(m_state[10] * 0x54655307, 7);
00084 }
00085
00086 extern "C" {
00087 word32 s_sosemanukMulTables[512] = {
00088 #if CRYPTOPP_BOOL_X86 | CRYPTOPP_BOOL_X64
00089 0x00000000, 0xE19FCF12, 0x6B973724, 0x8A08F836,
00090 0xD6876E48, 0x3718A15A, 0xBD10596C, 0x5C8F967E,
00091 0x05A7DC90, 0xE4381382, 0x6E30EBB4, 0x8FAF24A6,
00092 0xD320B2D8, 0x32BF7DCA, 0xB8B785FC, 0x59284AEE,
00093 0x0AE71189, 0xEB78DE9B, 0x617026AD, 0x80EFE9BF,
00094 0xDC607FC1, 0x3DFFB0D3, 0xB7F748E5, 0x566887F7,
00095 0x0F40CD19, 0xEEDF020B, 0x64D7FA3D, 0x8548352F,
00096 0xD9C7A351, 0x38586C43, 0xB2509475, 0x53CF5B67,
00097 0x146722BB, 0xF5F8EDA9, 0x7FF0159F, 0x9E6FDA8D,
00098 0xC2E04CF3, 0x237F83E1, 0xA9777BD7, 0x48E8B4C5,
00099 0x11C0FE2B, 0xF05F3139, 0x7A57C90F, 0x9BC8061D,
00100 0xC7479063, 0x26D85F71, 0xACD0A747, 0x4D4F6855,
00101 0x1E803332, 0xFF1FFC20, 0x75170416, 0x9488CB04,
00102 0xC8075D7A, 0x29989268, 0xA3906A5E, 0x420FA54C,
00103 0x1B27EFA2, 0xFAB820B0, 0x70B0D886, 0x912F1794,
00104 0xCDA081EA, 0x2C3F4EF8, 0xA637B6CE, 0x47A879DC,
00105 0x28CE44DF, 0xC9518BCD, 0x435973FB, 0xA2C6BCE9,
00106 0xFE492A97, 0x1FD6E585, 0x95DE1DB3, 0x7441D2A1,
00107 0x2D69984F, 0xCCF6575D, 0x46FEAF6B, 0xA7616079,
00108 0xFBEEF607, 0x1A713915, 0x9079C123, 0x71E60E31,
00109 0x22295556, 0xC3B69A44, 0x49BE6272, 0xA821AD60,
00110 0xF4AE3B1E, 0x1531F40C, 0x9F390C3A, 0x7EA6C328,
00111 0x278E89C6, 0xC61146D4, 0x4C19BEE2, 0xAD8671F0,
00112 0xF109E78E, 0x1096289C, 0x9A9ED0AA, 0x7B011FB8,
00113 0x3CA96664, 0xDD36A976, 0x573E5140, 0xB6A19E52,
00114 0xEA2E082C, 0x0BB1C73E, 0x81B93F08, 0x6026F01A,
00115 0x390EBAF4, 0xD89175E6, 0x52998DD0, 0xB30642C2,
00116 0xEF89D4BC, 0x0E161BAE, 0x841EE398, 0x65812C8A,
00117 0x364E77ED, 0xD7D1B8FF, 0x5DD940C9, 0xBC468FDB,
00118 0xE0C919A5, 0x0156D6B7, 0x8B5E2E81, 0x6AC1E193,
00119 0x33E9AB7D, 0xD276646F, 0x587E9C59, 0xB9E1534B,
00120 0xE56EC535, 0x04F10A27, 0x8EF9F211, 0x6F663D03,
00121 0x50358817, 0xB1AA4705, 0x3BA2BF33, 0xDA3D7021,
00122 0x86B2E65F, 0x672D294D, 0xED25D17B, 0x0CBA1E69,
00123 0x55925487, 0xB40D9B95, 0x3E0563A3, 0xDF9AACB1,
00124 0x83153ACF, 0x628AF5DD, 0xE8820DEB, 0x091DC2F9,
00125 0x5AD2999E, 0xBB4D568C, 0x3145AEBA, 0xD0DA61A8,
00126 0x8C55F7D6, 0x6DCA38C4, 0xE7C2C0F2, 0x065D0FE0,
00127 0x5F75450E, 0xBEEA8A1C, 0x34E2722A, 0xD57DBD38,
00128 0x89F22B46, 0x686DE454, 0xE2651C62, 0x03FAD370,
00129 0x4452AAAC, 0xA5CD65BE, 0x2FC59D88, 0xCE5A529A,
00130 0x92D5C4E4, 0x734A0BF6, 0xF942F3C0, 0x18DD3CD2,
00131 0x41F5763C, 0xA06AB92E, 0x2A624118, 0xCBFD8E0A,
00132 0x97721874, 0x76EDD766, 0xFCE52F50, 0x1D7AE042,
00133 0x4EB5BB25, 0xAF2A7437, 0x25228C01, 0xC4BD4313,
00134 0x9832D56D, 0x79AD1A7F, 0xF3A5E249, 0x123A2D5B,
00135 0x4B1267B5, 0xAA8DA8A7, 0x20855091, 0xC11A9F83,
00136 0x9D9509FD, 0x7C0AC6EF, 0xF6023ED9, 0x179DF1CB,
00137 0x78FBCCC8, 0x996403DA, 0x136CFBEC, 0xF2F334FE,
00138 0xAE7CA280, 0x4FE36D92, 0xC5EB95A4, 0x24745AB6,
00139 0x7D5C1058, 0x9CC3DF4A, 0x16CB277C, 0xF754E86E,
00140 0xABDB7E10, 0x4A44B102, 0xC04C4934, 0x21D38626,
00141 0x721CDD41, 0x93831253, 0x198BEA65, 0xF8142577,
00142 0xA49BB309, 0x45047C1B, 0xCF0C842D, 0x2E934B3F,
00143 0x77BB01D1, 0x9624CEC3, 0x1C2C36F5, 0xFDB3F9E7,
00144 0xA13C6F99, 0x40A3A08B, 0xCAAB58BD, 0x2B3497AF,
00145 0x6C9CEE73, 0x8D032161, 0x070BD957, 0xE6941645,
00146 0xBA1B803B, 0x5B844F29, 0xD18CB71F, 0x3013780D,
00147 0x693B32E3, 0x88A4FDF1, 0x02AC05C7, 0xE333CAD5,
00148 0xBFBC5CAB, 0x5E2393B9, 0xD42B6B8F, 0x35B4A49D,
00149 0x667BFFFA, 0x87E430E8, 0x0DECC8DE, 0xEC7307CC,
00150 0xB0FC91B2, 0x51635EA0, 0xDB6BA696, 0x3AF46984,
00151 0x63DC236A, 0x8243EC78, 0x084B144E, 0xE9D4DB5C,
00152 0xB55B4D22, 0x54C48230, 0xDECC7A06, 0x3F53B514,
00153 #else
00154 0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
00155 0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
00156 0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
00157 0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
00158 0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
00159 0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
00160 0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
00161 0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
00162 0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
00163 0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
00164 0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
00165 0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
00166 0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
00167 0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
00168 0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
00169 0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
00170 0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
00171 0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
00172 0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
00173 0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
00174 0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
00175 0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
00176 0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
00177 0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
00178 0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
00179 0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
00180 0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
00181 0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
00182 0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
00183 0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
00184 0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
00185 0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
00186 0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
00187 0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
00188 0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
00189 0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
00190 0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
00191 0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
00192 0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
00193 0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
00194 0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
00195 0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
00196 0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
00197 0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
00198 0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
00199 0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
00200 0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
00201 0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
00202 0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
00203 0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
00204 0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
00205 0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
00206 0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
00207 0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
00208 0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
00209 0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
00210 0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
00211 0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
00212 0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
00213 0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
00214 0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
00215 0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
00216 0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
00217 0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB,
00218 #endif
00219 0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
00220 0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
00221 0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
00222 0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
00223 0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
00224 0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
00225 0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
00226 0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
00227 0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
00228 0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
00229 0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
00230 0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
00231 0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
00232 0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
00233 0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
00234 0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
00235 0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
00236 0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
00237 0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
00238 0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
00239 0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
00240 0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
00241 0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
00242 0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
00243 0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
00244 0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
00245 0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
00246 0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
00247 0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
00248 0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
00249 0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
00250 0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
00251 0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
00252 0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
00253 0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
00254 0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
00255 0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
00256 0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
00257 0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
00258 0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
00259 0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
00260 0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
00261 0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
00262 0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
00263 0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
00264 0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
00265 0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
00266 0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
00267 0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
00268 0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
00269 0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
00270 0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
00271 0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
00272 0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
00273 0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
00274 0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
00275 0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
00276 0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
00277 0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
00278 0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
00279 0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
00280 0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
00281 0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
00282 0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
00283 };
00284 }
00285
00286 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
00287 unsigned int SosemanukPolicy::GetAlignment() const
00288 {
00289 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00290 #ifdef __INTEL_COMPILER
00291 if (HasSSE2() && !IsP4())
00292 #else
00293 if (HasSSE2())
00294 #endif
00295 return 16;
00296 else
00297 #endif
00298 return 1;
00299 }
00300
00301 unsigned int SosemanukPolicy::GetOptimalBlockSize() const
00302 {
00303 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00304 #ifdef __INTEL_COMPILER
00305 if (HasSSE2() && !IsP4())
00306 #else
00307 if (HasSSE2())
00308 #endif
00309 return 4*BYTES_PER_ITERATION;
00310 else
00311 #endif
00312 return BYTES_PER_ITERATION;
00313 }
00314 #endif
00315
00316 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00317 extern "C" {
00318 void Sosemanuk_OperateKeystream(size_t iterationCount, const byte *input, byte *output, word32 *state);
00319 }
00320 #endif
00321
00322 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
00323
00324 void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
00325 {
00326 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
00327
00328 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00329 Sosemanuk_OperateKeystream(iterationCount, input, output, m_state.data());
00330 return;
00331 #endif
00332
00333 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00334 #ifdef CRYPTOPP_GENERATE_X64_MASM
00335 ALIGN 8
00336 Sosemanuk_OperateKeystream PROC FRAME
00337 rex_push_reg rsi
00338 push_reg rdi
00339 alloc_stack(80*4*2+12*4+8*WORD_SZ + 2*16+8)
00340 save_xmm128 xmm6, 02f0h
00341 save_xmm128 xmm7, 0300h
00342 .endprolog
00343 mov rdi, r8
00344 mov rax, r9
00345 #else
00346 #ifdef __INTEL_COMPILER
00347 if (HasSSE2() && !IsP4())
00348 #else
00349 if (HasSSE2())
00350 #endif
00351 {
00352 #ifdef __GNUC__
00353 #if CRYPTOPP_BOOL_X64
00354 __m128i workspace[(80*4*2+12*4+8*WORD_SZ)/16];
00355 #endif
00356 __asm__ __volatile__
00357 (
00358 ".intel_syntax noprefix;"
00359 AS_PUSH_IF86( bx)
00360 #else
00361 word32 *state = m_state;
00362 AS2( mov WORD_REG(ax), state)
00363 AS2( mov WORD_REG(di), output)
00364 AS2( mov WORD_REG(dx), input)
00365 AS2( mov WORD_REG(cx), iterationCount)
00366 #endif
00367 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
00368
00369 #if defined(__GNUC__) && CRYPTOPP_BOOL_X64
00370 #define SSE2_workspace %5
00371 #else
00372 #define SSE2_workspace WORD_REG(sp)
00373 #endif
00374
00375 #define SSE2_output WORD_PTR [SSE2_workspace+1*WORD_SZ]
00376 #define SSE2_input WORD_PTR [SSE2_workspace+2*WORD_SZ]
00377 #define SSE2_wordsLeft WORD_PTR [SSE2_workspace+3*WORD_SZ]
00378 #define SSE2_diEnd WORD_PTR [SSE2_workspace+4*WORD_SZ]
00379 #define SSE2_pMulTables WORD_PTR [SSE2_workspace+5*WORD_SZ]
00380 #define SSE2_state WORD_PTR [SSE2_workspace+6*WORD_SZ]
00381 #define SSE2_wordsLeft2 WORD_PTR [SSE2_workspace+7*WORD_SZ]
00382 #define SSE2_stateCopy SSE2_workspace + 8*WORD_SZ
00383 #define SSE2_uvStart SSE2_stateCopy + 12*4
00384
00385 #if CRYPTOPP_BOOL_X86
00386 AS_PUSH_IF86( bp)
00387 AS2( mov AS_REG_6, esp)
00388 AS2( and esp, -16)
00389 AS2( sub esp, 80*4*2+12*4+8*WORD_SZ)
00390 AS2( mov [esp], AS_REG_6)
00391 #endif
00392 AS2( mov SSE2_output, WORD_REG(di))
00393 AS2( mov SSE2_input, WORD_REG(dx))
00394 AS2( mov SSE2_state, WORD_REG(ax))
00395 #ifndef _MSC_VER
00396 AS2( mov SSE2_pMulTables, WORD_REG(si))
00397 #endif
00398 AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)])
00399 AS2( lea WORD_REG(si), [4*WORD_REG(cx)])
00400 AS2( mov SSE2_wordsLeft, WORD_REG(si))
00401 AS2( movdqa xmm0, [WORD_REG(ax)+0*16])
00402 AS2( movdqa [SSE2_stateCopy+0*16], xmm0)
00403 AS2( movdqa xmm0, [WORD_REG(ax)+1*16])
00404 AS2( movdqa [SSE2_stateCopy+1*16], xmm0)
00405 AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16])
00406 AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0)
00407 AS2( psrlq xmm0, 32)
00408 AS2( movd AS_REG_6d, xmm0)
00409 AS2( mov ecx, [WORD_REG(ax)+10*4])
00410 AS2( mov edx, [WORD_REG(ax)+11*4])
00411 AS2( pcmpeqb xmm7, xmm7)
00412
00413 #define s(i) SSE2_stateCopy + ASM_MOD(i,10)*4
00414 #define u(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4
00415 #define v(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4 + 80*4
00416
00417 #define R10 ecx
00418 #define R11 edx
00419 #define R20 edx
00420 #define R21 ecx
00421
00422 #define SSE2_STEP(i, j) \
00423 AS2( mov eax, [s(i+0)])\
00424 AS2( mov [v(i)], eax)\
00425 AS2( rol eax, 8)\
00426 AS2( lea AS_REG_7d, [AS_REG_6d + R2##j])\
00427 AS2( xor AS_REG_7d, R1##j)\
00428 AS2( mov [u(i)], AS_REG_7d)\
00429 AS2( mov AS_REG_7d, 1)\
00430 AS2( and AS_REG_7d, R2##j)\
00431 AS1( neg AS_REG_7d)\
00432 AS2( and AS_REG_7d, AS_REG_6d)\
00433 AS2( xor AS_REG_6d, eax)\
00434 AS2( movzx eax, al)\
00435 AS2( xor AS_REG_6d, [WORD_REG(si)+WORD_REG(ax)*4])\
00436 AS2( mov eax, [s(i+3)])\
00437 AS2( xor AS_REG_7d, [s(i+2)])\
00438 AS2( add R1##j, AS_REG_7d)\
00439 AS2( movzx AS_REG_7d, al)\
00440 AS2( shr eax, 8)\
00441 AS2( xor AS_REG_6d, [WORD_REG(si)+1024+AS_REG_7*4])\
00442 AS2( xor AS_REG_6d, eax)\
00443 AS2( imul R2##j, AS_HEX(54655307))\
00444 AS2( rol R2##j, 7)\
00445 AS2( mov [s(i+0)], AS_REG_6d)\
00446
00447 ASL(2)
00448 AS2( lea WORD_REG(di), [SSE2_uvStart])
00449 AS2( mov WORD_REG(ax), 80)
00450 AS2( cmp WORD_REG(si), 80)
00451 AS2( cmovg WORD_REG(si), WORD_REG(ax))
00452 AS2( mov SSE2_wordsLeft2, WORD_REG(si))
00453 AS2( lea WORD_REG(si), [WORD_REG(di)+WORD_REG(si)])
00454 AS2( mov SSE2_diEnd, WORD_REG(si))
00455 #ifdef _MSC_VER
00456 AS2( lea WORD_REG(si), s_sosemanukMulTables)
00457 #else
00458 AS2( mov WORD_REG(si), SSE2_pMulTables)
00459 #endif
00460
00461 ASL(0)
00462 SSE2_STEP(0, 0)
00463 SSE2_STEP(1, 1)
00464 SSE2_STEP(2, 0)
00465 SSE2_STEP(3, 1)
00466 SSE2_STEP(4, 0)
00467 SSE2_STEP(5, 1)
00468 SSE2_STEP(6, 0)
00469 SSE2_STEP(7, 1)
00470 SSE2_STEP(8, 0)
00471 SSE2_STEP(9, 1)
00472 SSE2_STEP(10, 0)
00473 SSE2_STEP(11, 1)
00474 SSE2_STEP(12, 0)
00475 SSE2_STEP(13, 1)
00476 SSE2_STEP(14, 0)
00477 SSE2_STEP(15, 1)
00478 SSE2_STEP(16, 0)
00479 SSE2_STEP(17, 1)
00480 SSE2_STEP(18, 0)
00481 SSE2_STEP(19, 1)
00482
00483 AS2( add WORD_REG(di), 5*4)
00484 AS2( cmp WORD_REG(di), SSE2_diEnd)
00485 ASJ( jne, 0, b)
00486
00487 AS2( mov WORD_REG(ax), SSE2_input)
00488 AS2( mov AS_REG_7, SSE2_output)
00489 AS2( lea WORD_REG(di), [SSE2_uvStart])
00490 AS2( mov WORD_REG(si), SSE2_wordsLeft2)
00491
00492 ASL(1)
00493 AS2( movdqa xmm0, [WORD_REG(di)+0*20*4])
00494 AS2( movdqa xmm2, [WORD_REG(di)+2*20*4])
00495 AS2( movdqa xmm3, [WORD_REG(di)+3*20*4])
00496 AS2( movdqa xmm1, [WORD_REG(di)+1*20*4])
00497
00498 AS2( movdqa xmm4, xmm0)
00499 AS2( pand xmm0, xmm2)
00500 AS2( pxor xmm0, xmm3)
00501 AS2( pxor xmm2, xmm1)
00502 AS2( pxor xmm2, xmm0)
00503 AS2( por xmm3, xmm4)
00504 AS2( pxor xmm3, xmm1)
00505 AS2( pxor xmm4, xmm2)
00506 AS2( movdqa xmm1, xmm3)
00507 AS2( por xmm3, xmm4)
00508 AS2( pxor xmm3, xmm0)
00509 AS2( pand xmm0, xmm1)
00510 AS2( pxor xmm4, xmm0)
00511 AS2( pxor xmm1, xmm3)
00512 AS2( pxor xmm1, xmm4)
00513 AS2( pxor xmm4, xmm7)
00514
00515 AS2( pxor xmm2, [WORD_REG(di)+80*4])
00516 AS2( pxor xmm3, [WORD_REG(di)+80*5])
00517 AS2( pxor xmm1, [WORD_REG(di)+80*6])
00518 AS2( pxor xmm4, [WORD_REG(di)+80*7])
00519
00520
00521 AS2( cmp WORD_REG(si), 16)
00522 ASJ( jl, 4, f)
00523
00524 AS2( movdqa xmm6, xmm2)
00525 AS2( punpckldq xmm2, xmm3)
00526 AS2( movdqa xmm5, xmm1)
00527 AS2( punpckldq xmm1, xmm4)
00528 AS2( movdqa xmm0, xmm2)
00529 AS2( punpcklqdq xmm2, xmm1)
00530 AS2( punpckhqdq xmm0, xmm1)
00531 AS2( punpckhdq xmm6, xmm3)
00532 AS2( punpckhdq xmm5, xmm4)
00533 AS2( movdqa xmm3, xmm6)
00534 AS2( punpcklqdq xmm6, xmm5)
00535 AS2( punpckhqdq xmm3, xmm5)
00536
00537 AS_XMM_OUTPUT4(SSE2_Sosemanuk_Output, WORD_REG(ax), AS_REG_7, 2,0,6,3, 1, 0,1,2,3, 4)
00538
00539
00540 AS2( add WORD_REG(di), 4*4)
00541 AS2( sub WORD_REG(si), 16)
00542 ASJ( jnz, 1, b)
00543
00544
00545 AS2( mov WORD_REG(si), SSE2_wordsLeft)
00546 AS2( sub WORD_REG(si), 80)
00547 ASJ( jz, 6, f)
00548 AS2( mov SSE2_wordsLeft, WORD_REG(si))
00549 AS2( mov SSE2_input, WORD_REG(ax))
00550 AS2( mov SSE2_output, AS_REG_7)
00551 ASJ( jmp, 2, b)
00552
00553 ASL(4)
00554 AS2( test WORD_REG(ax), WORD_REG(ax))
00555 ASJ( jz, 5, f)
00556 AS2( movd xmm0, dword ptr [WORD_REG(ax)+0*4])
00557 AS2( pxor xmm2, xmm0)
00558 AS2( movd xmm0, dword ptr [WORD_REG(ax)+1*4])
00559 AS2( pxor xmm3, xmm0)
00560 AS2( movd xmm0, dword ptr [WORD_REG(ax)+2*4])
00561 AS2( pxor xmm1, xmm0)
00562 AS2( movd xmm0, dword ptr [WORD_REG(ax)+3*4])
00563 AS2( pxor xmm4, xmm0)
00564 AS2( add WORD_REG(ax), 16)
00565 ASL(5)
00566 AS2( movd dword ptr [AS_REG_7+0*4], xmm2)
00567 AS2( movd dword ptr [AS_REG_7+1*4], xmm3)
00568 AS2( movd dword ptr [AS_REG_7+2*4], xmm1)
00569 AS2( movd dword ptr [AS_REG_7+3*4], xmm4)
00570 AS2( sub WORD_REG(si), 4)
00571 ASJ( jz, 6, f)
00572 AS2( add AS_REG_7, 16)
00573 AS2( psrldq xmm2, 4)
00574 AS2( psrldq xmm3, 4)
00575 AS2( psrldq xmm1, 4)
00576 AS2( psrldq xmm4, 4)
00577 ASJ( jmp, 4, b)
00578
00579 ASL(6)
00580 AS2( mov AS_REG_6, SSE2_state)
00581 AS2( movdqa xmm0, [SSE2_stateCopy+0*16])
00582 AS2( movdqa [AS_REG_6+0*16], xmm0)
00583 AS2( movdqa xmm0, [SSE2_stateCopy+1*16])
00584 AS2( movdqa [AS_REG_6+1*16], xmm0)
00585 AS2( movq xmm0, QWORD PTR [SSE2_stateCopy+2*16])
00586 AS2( movq QWORD PTR [AS_REG_6+2*16], xmm0)
00587 AS2( mov [AS_REG_6+10*4], ecx)
00588 AS2( mov [AS_REG_6+11*4], edx)
00589
00590 AS_POP_IF86( sp)
00591 AS_POP_IF86( bp)
00592
00593 #ifdef __GNUC__
00594 AS_POP_IF86( bx)
00595 ".att_syntax prefix;"
00596 :
00597 : "a" (m_state.m_ptr), "c" (iterationCount), "S" (s_sosemanukMulTables), "D" (output), "d" (input)
00598 #if CRYPTOPP_BOOL_X64
00599 , "r" (workspace)
00600 : "memory", "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
00601 #else
00602 : "memory", "cc"
00603 #endif
00604 );
00605 #endif
00606 #ifdef CRYPTOPP_GENERATE_X64_MASM
00607 movdqa xmm6, [rsp + 02f0h]
00608 movdqa xmm7, [rsp + 0300h]
00609 add rsp, 80*4*2+12*4+8*WORD_SZ + 2*16+8
00610 pop rdi
00611 pop rsi
00612 ret
00613 Sosemanuk_OperateKeystream ENDP
00614 #else
00615 }
00616 else
00617 #endif
00618 #endif
00619 #ifndef CRYPTOPP_GENERATE_X64_MASM
00620 {
00621 #if CRYPTOPP_BOOL_X86 | CRYPTOPP_BOOL_X64
00622 #define MUL_A(x) (x = rotlFixed(x, 8), x ^ s_sosemanukMulTables[byte(x)])
00623 #else
00624 #define MUL_A(x) (((x) << 8) ^ s_sosemanukMulTables[(x) >> 24])
00625 #endif
00626
00627 #define DIV_A(x) (((x) >> 8) ^ s_sosemanukMulTables[256 + byte(x)])
00628
00629 #define r1(i) ((i%2) ? reg2 : reg1)
00630 #define r2(i) ((i%2) ? reg1 : reg2)
00631
00632 #define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, v, u) \
00633 u = (s##x9 + r2(x0)) ^ r1(x0);\
00634 v = s##x0;\
00635 s##x0 = MUL_A(s##x0) ^ DIV_A(s##x3) ^ s##x9;\
00636 r1(x0) += XMUX(r2(x0), s##x2, s##x9);\
00637 r2(x0) = rotlFixed(r2(x0) * 0x54655307, 7);\
00638
00639 #define SOSEMANUK_OUTPUT(x) \
00640 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, u2 ^ v0);\
00641 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, u3 ^ v1);\
00642 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, u1 ^ v2);\
00643 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, u4 ^ v3);
00644
00645 #define OUTPUT4 \
00646 S2(0, u0, u1, u2, u3, u4);\
00647 CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SOSEMANUK_OUTPUT, 4*4);
00648
00649 word32 s0 = m_state[0];
00650 word32 s1 = m_state[1];
00651 word32 s2 = m_state[2];
00652 word32 s3 = m_state[3];
00653 word32 s4 = m_state[4];
00654 word32 s5 = m_state[5];
00655 word32 s6 = m_state[6];
00656 word32 s7 = m_state[7];
00657 word32 s8 = m_state[8];
00658 word32 s9 = m_state[9];
00659 word32 reg1 = m_state[10];
00660 word32 reg2 = m_state[11];
00661 word32 u0, u1, u2, u3, u4, v0, v1, v2, v3;
00662
00663 do
00664 {
00665 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v0, u0)
00666 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v1, u1)
00667 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v2, u2)
00668 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v3, u3)
00669 OUTPUT4
00670 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v0, u0)
00671 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v1, u1)
00672 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v2, u2)
00673 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v3, u3)
00674 OUTPUT4
00675 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v0, u0)
00676 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v1, u1)
00677 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v2, u2)
00678 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v3, u3)
00679 OUTPUT4
00680 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v0, u0)
00681 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v1, u1)
00682 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v2, u2)
00683 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v3, u3)
00684 OUTPUT4
00685 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v0, u0)
00686 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v1, u1)
00687 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v2, u2)
00688 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v3, u3)
00689 OUTPUT4
00690 }
00691 while (--iterationCount);
00692
00693 m_state[0] = s0;
00694 m_state[1] = s1;
00695 m_state[2] = s2;
00696 m_state[3] = s3;
00697 m_state[4] = s4;
00698 m_state[5] = s5;
00699 m_state[6] = s6;
00700 m_state[7] = s7;
00701 m_state[8] = s8;
00702 m_state[9] = s9;
00703 m_state[10] = reg1;
00704 m_state[11] = reg2;
00705 }
00706 }
00707
00708 NAMESPACE_END
00709
00710 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM