00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048 #include "pch.h"
00049
00050 #ifndef CRYPTOPP_IMPORTS
00051
00052 #include "rijndael.h"
00053 #include "misc.h"
00054 #include "cpu.h"
00055
00056 NAMESPACE_BEGIN(CryptoPP)
00057
00058 void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, const NameValuePairs &)
00059 {
00060 AssertValidKeyLength(keylen);
00061
00062 m_rounds = keylen/4 + 6;
00063 m_key.New(4*(m_rounds+1));
00064
00065 word32 temp, *rk = m_key;
00066 const word32 *rc = rcon;
00067
00068 GetUserKey(BIG_ENDIAN_ORDER, rk, keylen/4, userKey, keylen);
00069
00070 while (true)
00071 {
00072 temp = rk[keylen/4-1];
00073 rk[keylen/4] = rk[0] ^
00074 (word32(Se[GETBYTE(temp, 2)]) << 24) ^
00075 (word32(Se[GETBYTE(temp, 1)]) << 16) ^
00076 (word32(Se[GETBYTE(temp, 0)]) << 8) ^
00077 Se[GETBYTE(temp, 3)] ^
00078 *(rc++);
00079 rk[keylen/4+1] = rk[1] ^ rk[keylen/4];
00080 rk[keylen/4+2] = rk[2] ^ rk[keylen/4+1];
00081 rk[keylen/4+3] = rk[3] ^ rk[keylen/4+2];
00082
00083 if (rk + keylen/4 + 4 == m_key.end())
00084 break;
00085
00086 if (keylen == 24)
00087 {
00088 rk[10] = rk[ 4] ^ rk[ 9];
00089 rk[11] = rk[ 5] ^ rk[10];
00090 }
00091 else if (keylen == 32)
00092 {
00093 temp = rk[11];
00094 rk[12] = rk[ 4] ^
00095 (word32(Se[GETBYTE(temp, 3)]) << 24) ^
00096 (word32(Se[GETBYTE(temp, 2)]) << 16) ^
00097 (word32(Se[GETBYTE(temp, 1)]) << 8) ^
00098 Se[GETBYTE(temp, 0)];
00099 rk[13] = rk[ 5] ^ rk[12];
00100 rk[14] = rk[ 6] ^ rk[13];
00101 rk[15] = rk[ 7] ^ rk[14];
00102 }
00103 rk += keylen/4;
00104 }
00105
00106 if (!IsForwardTransformation())
00107 {
00108 unsigned int i, j;
00109 rk = m_key;
00110
00111
00112 for (i = 0, j = 4*m_rounds; i < j; i += 4, j -= 4) {
00113 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
00114 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
00115 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
00116 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
00117 }
00118
00119 for (i = 1; i < m_rounds; i++) {
00120 rk += 4;
00121 rk[0] =
00122 Td[0*256+Se[GETBYTE(rk[0], 3)]] ^
00123 Td[1*256+Se[GETBYTE(rk[0], 2)]] ^
00124 Td[2*256+Se[GETBYTE(rk[0], 1)]] ^
00125 Td[3*256+Se[GETBYTE(rk[0], 0)]];
00126 rk[1] =
00127 Td[0*256+Se[GETBYTE(rk[1], 3)]] ^
00128 Td[1*256+Se[GETBYTE(rk[1], 2)]] ^
00129 Td[2*256+Se[GETBYTE(rk[1], 1)]] ^
00130 Td[3*256+Se[GETBYTE(rk[1], 0)]];
00131 rk[2] =
00132 Td[0*256+Se[GETBYTE(rk[2], 3)]] ^
00133 Td[1*256+Se[GETBYTE(rk[2], 2)]] ^
00134 Td[2*256+Se[GETBYTE(rk[2], 1)]] ^
00135 Td[3*256+Se[GETBYTE(rk[2], 0)]];
00136 rk[3] =
00137 Td[0*256+Se[GETBYTE(rk[3], 3)]] ^
00138 Td[1*256+Se[GETBYTE(rk[3], 2)]] ^
00139 Td[2*256+Se[GETBYTE(rk[3], 1)]] ^
00140 Td[3*256+Se[GETBYTE(rk[3], 0)]];
00141 }
00142 }
00143
00144 ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key.begin(), m_key.begin(), 16);
00145 ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16);
00146 }
00147
00148 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
00149
00150 void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
00151 {
00152 #if defined(CRYPTOPP_X86_ASM_AVAILABLE)
00153 if (HasMMX())
00154 {
00155 const word32 *k = m_key;
00156 const word32 *kLoopEnd = k + m_rounds*4;
00157 #if CRYPTOPP_BOOL_X64
00158 #define K_REG r8
00159 #define K_END_REG r9
00160 #define SAVE_K
00161 #define RESTORE_K
00162 #define RESTORE_K_END
00163 #define SAVE_0(x) AS2(mov r10d, x)
00164 #define SAVE_1(x) AS2(mov r11d, x)
00165 #define SAVE_2(x) AS2(mov r12d, x)
00166 #define RESTORE_0(x) AS2(mov x, r10d)
00167 #define RESTORE_1(x) AS2(mov x, r11d)
00168 #define RESTORE_2(x) AS2(mov x, r12d)
00169 #else
00170 #define K_REG esi
00171 #define K_END_REG edi
00172 #define SAVE_K AS2(movd mm4, esi)
00173 #define RESTORE_K AS2(movd esi, mm4)
00174 #define RESTORE_K_END AS2(movd edi, mm5)
00175 #define SAVE_0(x) AS2(movd mm0, x)
00176 #define SAVE_1(x) AS2(movd mm1, x)
00177 #define SAVE_2(x) AS2(movd mm2, x)
00178 #define RESTORE_0(x) AS2(movd x, mm0)
00179 #define RESTORE_1(x) AS2(movd x, mm1)
00180 #define RESTORE_2(x) AS2(movd x, mm2)
00181 #endif
00182 #ifdef __GNUC__
00183 word32 t0, t1, t2, t3;
00184 __asm__ __volatile__
00185 (
00186 ".intel_syntax noprefix;"
00187 AS_PUSH( bx)
00188 AS_PUSH( bp)
00189 AS2( mov WORD_REG(bp), WORD_REG(ax))
00190 #if CRYPTOPP_BOOL_X64
00191
00192 AS1( pushq K_REG)
00193 AS1( pushq K_END_REG)
00194 AS1( pushq r10)
00195 AS1( pushq r11)
00196 AS1( pushq r12)
00197 AS2( mov K_REG, rsi)
00198 AS2( mov K_END_REG, rcx)
00199 #else
00200 AS2( movd mm5, ecx)
00201 #endif
00202 #else
00203 #if _MSC_VER < 1300
00204 const word32 *t = Te;
00205 AS2( mov eax, t)
00206 #endif
00207 AS2( mov edx, g_cacheLineSize)
00208 AS2( mov WORD_REG(di), inBlock)
00209 AS2( mov K_REG, k)
00210 AS2( movd mm5, kLoopEnd)
00211 #if _MSC_VER < 1300
00212 AS_PUSH( bx)
00213 AS_PUSH( bp)
00214 AS2( mov ebp, eax)
00215 #else
00216 AS_PUSH( bp)
00217 AS2( lea ebp, Te)
00218 #endif
00219 #endif
00220 AS2( mov eax, [K_REG+0*4])
00221 AS2( xor eax, [WORD_REG(di)+0*4])
00222 SAVE_0(eax)
00223 AS2( mov ebx, [K_REG+1*4])
00224 AS2( xor ebx, [WORD_REG(di)+1*4])
00225 SAVE_1(ebx)
00226 AS2( and ebx, eax)
00227 AS2( mov eax, [K_REG+2*4])
00228 AS2( xor eax, [WORD_REG(di)+2*4])
00229 SAVE_2(eax)
00230 AS2( and ebx, eax)
00231 AS2( mov ecx, [K_REG+3*4])
00232 AS2( xor ecx, [WORD_REG(di)+3*4])
00233 AS2( and ebx, ecx)
00234
00235
00236 AS2( and ebx, 0)
00237 AS2( mov edi, ebx)
00238 ASL(2)
00239 AS2( and ebx, [WORD_REG(bp)+WORD_REG(di)])
00240 AS2( add edi, edx)
00241 AS2( and ebx, [WORD_REG(bp)+WORD_REG(di)])
00242 AS2( add edi, edx)
00243 AS2( and ebx, [WORD_REG(bp)+WORD_REG(di)])
00244 AS2( add edi, edx)
00245 AS2( and ebx, [WORD_REG(bp)+WORD_REG(di)])
00246 AS2( add edi, edx)
00247 AS2( cmp edi, 1024)
00248 ASJ( jl, 2, b)
00249 AS2( and ebx, [WORD_REG(bp)+1020])
00250 #if CRYPTOPP_BOOL_X64
00251 AS2( xor r10d, ebx)
00252 AS2( xor r11d, ebx)
00253 AS2( xor r12d, ebx)
00254 #else
00255 AS2( movd mm6, ebx)
00256 AS2( pxor mm2, mm6)
00257 AS2( pxor mm1, mm6)
00258 AS2( pxor mm0, mm6)
00259 #endif
00260 AS2( xor ecx, ebx)
00261
00262 AS2( mov edi, [K_REG+4*4])
00263 AS2( mov eax, [K_REG+5*4])
00264 AS2( mov ebx, [K_REG+6*4])
00265 AS2( mov edx, [K_REG+7*4])
00266 AS2( add K_REG, 8*4)
00267 SAVE_K
00268
00269 #define QUARTER_ROUND(t, a, b, c, d) \
00270 AS2(movzx esi, t##l)\
00271 AS2(d, [WORD_REG(bp)+0*1024+4*WORD_REG(si)])\
00272 AS2(movzx esi, t##h)\
00273 AS2(c, [WORD_REG(bp)+1*1024+4*WORD_REG(si)])\
00274 AS2(shr e##t##x, 16)\
00275 AS2(movzx esi, t##l)\
00276 AS2(b, [WORD_REG(bp)+2*1024+4*WORD_REG(si)])\
00277 AS2(movzx esi, t##h)\
00278 AS2(a, [WORD_REG(bp)+3*1024+4*WORD_REG(si)])
00279
00280 #define s0 xor edi
00281 #define s1 xor eax
00282 #define s2 xor ebx
00283 #define s3 xor ecx
00284 #define t0 xor edi
00285 #define t1 xor eax
00286 #define t2 xor ebx
00287 #define t3 xor edx
00288
00289 QUARTER_ROUND(c, t0, t1, t2, t3)
00290 RESTORE_2(ecx)
00291 QUARTER_ROUND(c, t3, t0, t1, t2)
00292 RESTORE_1(ecx)
00293 QUARTER_ROUND(c, t2, t3, t0, t1)
00294 RESTORE_0(ecx)
00295 QUARTER_ROUND(c, t1, t2, t3, t0)
00296 SAVE_2(ebx)
00297 SAVE_1(eax)
00298 SAVE_0(edi)
00299 #undef QUARTER_ROUND
00300
00301 RESTORE_K
00302
00303 ASL(0)
00304 AS2( mov edi, [K_REG+0*4])
00305 AS2( mov eax, [K_REG+1*4])
00306 AS2( mov ebx, [K_REG+2*4])
00307 AS2( mov ecx, [K_REG+3*4])
00308
00309 #define QUARTER_ROUND(t, a, b, c, d) \
00310 AS2(movzx esi, t##l)\
00311 AS2(a, [WORD_REG(bp)+3*1024+4*WORD_REG(si)])\
00312 AS2(movzx esi, t##h)\
00313 AS2(b, [WORD_REG(bp)+2*1024+4*WORD_REG(si)])\
00314 AS2(shr e##t##x, 16)\
00315 AS2(movzx esi, t##l)\
00316 AS2(c, [WORD_REG(bp)+1*1024+4*WORD_REG(si)])\
00317 AS2(movzx esi, t##h)\
00318 AS2(d, [WORD_REG(bp)+0*1024+4*WORD_REG(si)])
00319
00320 QUARTER_ROUND(d, s0, s1, s2, s3)
00321 RESTORE_2(edx)
00322 QUARTER_ROUND(d, s3, s0, s1, s2)
00323 RESTORE_1(edx)
00324 QUARTER_ROUND(d, s2, s3, s0, s1)
00325 RESTORE_0(edx)
00326 QUARTER_ROUND(d, s1, s2, s3, s0)
00327 RESTORE_K
00328 SAVE_2(ebx)
00329 SAVE_1(eax)
00330 SAVE_0(edi)
00331
00332 AS2( mov edi, [K_REG+4*4])
00333 AS2( mov eax, [K_REG+5*4])
00334 AS2( mov ebx, [K_REG+6*4])
00335 AS2( mov edx, [K_REG+7*4])
00336
00337 QUARTER_ROUND(c, t0, t1, t2, t3)
00338 RESTORE_2(ecx)
00339 QUARTER_ROUND(c, t3, t0, t1, t2)
00340 RESTORE_1(ecx)
00341 QUARTER_ROUND(c, t2, t3, t0, t1)
00342 RESTORE_0(ecx)
00343 QUARTER_ROUND(c, t1, t2, t3, t0)
00344 SAVE_2(ebx)
00345 SAVE_1(eax)
00346 SAVE_0(edi)
00347
00348 RESTORE_K
00349 RESTORE_K_END
00350 AS2( add K_REG, 8*4)
00351 SAVE_K
00352 AS2( cmp K_END_REG, K_REG)
00353 ASJ( jne, 0, b)
00354
00355 #undef QUARTER_ROUND
00356 #undef s0
00357 #undef s1
00358 #undef s2
00359 #undef s3
00360 #undef t0
00361 #undef t1
00362 #undef t2
00363 #undef t3
00364
00365 AS2( mov eax, [K_END_REG+0*4])
00366 AS2( mov ecx, [K_END_REG+1*4])
00367 AS2( mov esi, [K_END_REG+2*4])
00368 AS2( mov edi, [K_END_REG+3*4])
00369
00370 #define QUARTER_ROUND(a, b, c, d) \
00371 AS2( movzx ebx, dl)\
00372 AS2( movzx ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(bx)])\
00373 AS2( shl ebx, 3*8)\
00374 AS2( xor a, ebx)\
00375 AS2( movzx ebx, dh)\
00376 AS2( movzx ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(bx)])\
00377 AS2( shl ebx, 2*8)\
00378 AS2( xor b, ebx)\
00379 AS2( shr edx, 16)\
00380 AS2( movzx ebx, dl)\
00381 AS2( shr edx, 8)\
00382 AS2( movzx ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(bx)])\
00383 AS2( shl ebx, 1*8)\
00384 AS2( xor c, ebx)\
00385 AS2( movzx ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(dx)])\
00386 AS2( xor d, ebx)
00387
00388 QUARTER_ROUND(eax, ecx, esi, edi)
00389 RESTORE_2(edx)
00390 QUARTER_ROUND(edi, eax, ecx, esi)
00391 RESTORE_1(edx)
00392 QUARTER_ROUND(esi, edi, eax, ecx)
00393 RESTORE_0(edx)
00394 QUARTER_ROUND(ecx, esi, edi, eax)
00395
00396 #undef QUARTER_ROUND
00397
00398 #if CRYPTOPP_BOOL_X64
00399 AS1(popq r12)
00400 AS1(popq r11)
00401 AS1(popq r10)
00402 AS1(popq K_END_REG)
00403 AS1(popq K_REG)
00404 #else
00405 AS1(emms)
00406 #endif
00407 AS_POP( bp)
00408
00409 #if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER < 1300)
00410 AS_POP( bx)
00411 #endif
00412 #ifdef __GNUC__
00413 ".att_syntax prefix;"
00414 : "=a" (t0), "=c" (t1), "=S" (t2), "=D" (t3)
00415 : "a" (Te), "D" (inBlock), "S" (k), "c" (kLoopEnd), "d" (g_cacheLineSize)
00416 : "memory", "cc"
00417 );
00418
00419 if (xorBlock)
00420 {
00421 t0 ^= ((const word32 *)xorBlock)[0];
00422 t1 ^= ((const word32 *)xorBlock)[1];
00423 t2 ^= ((const word32 *)xorBlock)[2];
00424 t3 ^= ((const word32 *)xorBlock)[3];
00425 }
00426 ((word32 *)outBlock)[0] = t0;
00427 ((word32 *)outBlock)[1] = t1;
00428 ((word32 *)outBlock)[2] = t2;
00429 ((word32 *)outBlock)[3] = t3;
00430 #else
00431 AS2( mov WORD_REG(bx), xorBlock)
00432 AS2( test WORD_REG(bx), WORD_REG(bx))
00433 ASJ( jz, 1, f)
00434 AS2( xor eax, [WORD_REG(bx)+0*4])
00435 AS2( xor ecx, [WORD_REG(bx)+1*4])
00436 AS2( xor esi, [WORD_REG(bx)+2*4])
00437 AS2( xor edi, [WORD_REG(bx)+3*4])
00438 ASL(1)
00439 AS2( mov WORD_REG(bx), outBlock)
00440 AS2( mov [WORD_REG(bx)+0*4], eax)
00441 AS2( mov [WORD_REG(bx)+1*4], ecx)
00442 AS2( mov [WORD_REG(bx)+2*4], esi)
00443 AS2( mov [WORD_REG(bx)+3*4], edi)
00444 #endif
00445 }
00446 else
00447 #endif // #ifdef CRYPTOPP_X86_ASM_AVAILABLE
00448 {
00449 word32 s0, s1, s2, s3, t0, t1, t2, t3;
00450 const word32 *rk = m_key;
00451
00452 s0 = ((const word32 *)inBlock)[0] ^ rk[0];
00453 s1 = ((const word32 *)inBlock)[1] ^ rk[1];
00454 s2 = ((const word32 *)inBlock)[2] ^ rk[2];
00455 s3 = ((const word32 *)inBlock)[3] ^ rk[3];
00456 t0 = rk[4];
00457 t1 = rk[5];
00458 t2 = rk[6];
00459 t3 = rk[7];
00460 rk += 8;
00461
00462
00463 const int cacheLineSize = GetCacheLineSize();
00464 unsigned int i;
00465 word32 u = 0;
00466 for (i=0; i<1024; i+=cacheLineSize)
00467 u &= *(const word32 *)(((const byte *)Te)+i);
00468 u &= Te[255];
00469 s0 |= u; s1 |= u; s2 |= u; s3 |= u;
00470
00471
00472 #ifdef IS_BIG_ENDIAN
00473 #define QUARTER_ROUND(t, a, b, c, d) \
00474 a ^= rotrFixed(Te[byte(t)], 24); t >>= 8;\
00475 b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\
00476 c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
00477 d ^= Te[t];
00478 #else
00479 #define QUARTER_ROUND(t, a, b, c, d) \
00480 d ^= Te[byte(t)]; t >>= 8;\
00481 c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
00482 b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\
00483 a ^= rotrFixed(Te[t], 24);
00484 #endif
00485
00486 QUARTER_ROUND(s3, t0, t1, t2, t3)
00487 QUARTER_ROUND(s2, t3, t0, t1, t2)
00488 QUARTER_ROUND(s1, t2, t3, t0, t1)
00489 QUARTER_ROUND(s0, t1, t2, t3, t0)
00490 #undef QUARTER_ROUND
00491
00492
00493 unsigned int r = m_rounds/2 - 1;
00494 do
00495 {
00496 #define QUARTER_ROUND(t, a, b, c, d) \
00497 a ^= Te[3*256+byte(t)]; t >>= 8;\
00498 b ^= Te[2*256+byte(t)]; t >>= 8;\
00499 c ^= Te[1*256+byte(t)]; t >>= 8;\
00500 d ^= Te[t];
00501
00502 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
00503
00504 QUARTER_ROUND(t3, s0, s1, s2, s3)
00505 QUARTER_ROUND(t2, s3, s0, s1, s2)
00506 QUARTER_ROUND(t1, s2, s3, s0, s1)
00507 QUARTER_ROUND(t0, s1, s2, s3, s0)
00508
00509 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
00510
00511 QUARTER_ROUND(s3, t0, t1, t2, t3)
00512 QUARTER_ROUND(s2, t3, t0, t1, t2)
00513 QUARTER_ROUND(s1, t2, t3, t0, t1)
00514 QUARTER_ROUND(s0, t1, t2, t3, t0)
00515 #undef QUARTER_ROUND
00516
00517 rk += 8;
00518 } while (--r);
00519
00520
00521 u = 0;
00522 for (i=0; i<256; i+=cacheLineSize)
00523 u &= *(const word32 *)(Se+i);
00524 u &= *(const word32 *)(Se+252);
00525 t0 |= u; t1 |= u; t2 |= u; t3 |= u;
00526
00527 word32 tbw[4];
00528 byte *const tempBlock = (byte *)tbw;
00529 word32 *const obw = (word32 *)outBlock;
00530 const word32 *const xbw = (const word32 *)xorBlock;
00531
00532 #define QUARTER_ROUND(t, a, b, c, d) \
00533 tempBlock[a] = Se[byte(t)]; t >>= 8;\
00534 tempBlock[b] = Se[byte(t)]; t >>= 8;\
00535 tempBlock[c] = Se[byte(t)]; t >>= 8;\
00536 tempBlock[d] = Se[t];
00537
00538 QUARTER_ROUND(t2, 15, 2, 5, 8)
00539 QUARTER_ROUND(t1, 11, 14, 1, 4)
00540 QUARTER_ROUND(t0, 7, 10, 13, 0)
00541 QUARTER_ROUND(t3, 3, 6, 9, 12)
00542 #undef QUARTER_ROUND
00543
00544 if (xbw)
00545 {
00546 obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
00547 obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
00548 obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
00549 obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
00550 }
00551 else
00552 {
00553 obw[0] = tbw[0] ^ rk[0];
00554 obw[1] = tbw[1] ^ rk[1];
00555 obw[2] = tbw[2] ^ rk[2];
00556 obw[3] = tbw[3] ^ rk[3];
00557 }
00558 }
00559 }
00560
00561 void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
00562 {
00563 word32 s0, s1, s2, s3, t0, t1, t2, t3;
00564 const word32 *rk = m_key;
00565
00566 s0 = ((const word32 *)inBlock)[0] ^ rk[0];
00567 s1 = ((const word32 *)inBlock)[1] ^ rk[1];
00568 s2 = ((const word32 *)inBlock)[2] ^ rk[2];
00569 s3 = ((const word32 *)inBlock)[3] ^ rk[3];
00570 t0 = rk[4];
00571 t1 = rk[5];
00572 t2 = rk[6];
00573 t3 = rk[7];
00574 rk += 8;
00575
00576
00577 const int cacheLineSize = GetCacheLineSize();
00578 unsigned int i;
00579 word32 u = 0;
00580 for (i=0; i<1024; i+=cacheLineSize)
00581 u &= *(const word32 *)(((const byte *)Td)+i);
00582 u &= Td[255];
00583 s0 |= u; s1 |= u; s2 |= u; s3 |= u;
00584
00585
00586 #ifdef IS_BIG_ENDIAN
00587 #define QUARTER_ROUND(t, a, b, c, d) \
00588 a ^= rotrFixed(Td[byte(t)], 24); t >>= 8;\
00589 b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\
00590 c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\
00591 d ^= Td[t];
00592 #else
00593 #define QUARTER_ROUND(t, a, b, c, d) \
00594 d ^= Td[byte(t)]; t >>= 8;\
00595 c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\
00596 b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\
00597 a ^= rotrFixed(Td[t], 24);
00598 #endif
00599
00600 QUARTER_ROUND(s3, t2, t1, t0, t3)
00601 QUARTER_ROUND(s2, t1, t0, t3, t2)
00602 QUARTER_ROUND(s1, t0, t3, t2, t1)
00603 QUARTER_ROUND(s0, t3, t2, t1, t0)
00604 #undef QUARTER_ROUND
00605
00606
00607 unsigned int r = m_rounds/2 - 1;
00608 do
00609 {
00610 #define QUARTER_ROUND(t, a, b, c, d) \
00611 a ^= Td[3*256+byte(t)]; t >>= 8;\
00612 b ^= Td[2*256+byte(t)]; t >>= 8;\
00613 c ^= Td[1*256+byte(t)]; t >>= 8;\
00614 d ^= Td[t];
00615
00616 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
00617
00618 QUARTER_ROUND(t3, s2, s1, s0, s3)
00619 QUARTER_ROUND(t2, s1, s0, s3, s2)
00620 QUARTER_ROUND(t1, s0, s3, s2, s1)
00621 QUARTER_ROUND(t0, s3, s2, s1, s0)
00622
00623 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
00624
00625 QUARTER_ROUND(s3, t2, t1, t0, t3)
00626 QUARTER_ROUND(s2, t1, t0, t3, t2)
00627 QUARTER_ROUND(s1, t0, t3, t2, t1)
00628 QUARTER_ROUND(s0, t3, t2, t1, t0)
00629 #undef QUARTER_ROUND
00630
00631 rk += 8;
00632 } while (--r);
00633
00634
00635 u = 0;
00636 for (i=0; i<256; i+=cacheLineSize)
00637 u &= *(const word32 *)(Sd+i);
00638 u &= *(const word32 *)(Sd+252);
00639 t0 |= u; t1 |= u; t2 |= u; t3 |= u;
00640
00641 word32 tbw[4];
00642 byte *const tempBlock = (byte *)tbw;
00643 word32 *const obw = (word32 *)outBlock;
00644 const word32 *const xbw = (const word32 *)xorBlock;
00645
00646 #define QUARTER_ROUND(t, a, b, c, d) \
00647 tempBlock[a] = Sd[byte(t)]; t >>= 8;\
00648 tempBlock[b] = Sd[byte(t)]; t >>= 8;\
00649 tempBlock[c] = Sd[byte(t)]; t >>= 8;\
00650 tempBlock[d] = Sd[t];
00651
00652 QUARTER_ROUND(t2, 7, 2, 13, 8)
00653 QUARTER_ROUND(t1, 3, 14, 9, 4)
00654 QUARTER_ROUND(t0, 15, 10, 5, 0)
00655 QUARTER_ROUND(t3, 11, 6, 1, 12)
00656 #undef QUARTER_ROUND
00657
00658 if (xbw)
00659 {
00660 obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
00661 obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
00662 obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
00663 obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
00664 }
00665 else
00666 {
00667 obw[0] = tbw[0] ^ rk[0];
00668 obw[1] = tbw[1] ^ rk[1];
00669 obw[2] = tbw[2] ^ rk[2];
00670 obw[3] = tbw[3] ^ rk[3];
00671 }
00672 }
00673
00674 NAMESPACE_END
00675
00676 #endif