rijndael.cpp

00001 // rijndael.cpp - modified by Chris Morgan <cmorgan@wpi.edu>
00002 // and Wei Dai from Paulo Baretto's Rijndael implementation
00003 // The original code and all modifications are in the public domain.
00004 
00005 /*
00006 Defense against timing attacks was added in July 2006 by Wei Dai.
00007 
00008 The code now uses smaller tables in the first and last rounds,
00009 and preloads them into L1 cache before usage (by loading at least 
00010 one element in each cache line). 
00011 
00012 We try to delay subsequent accesses to each table (used in the first 
00013 and last rounds) until all of the table has been preloaded. Hopefully
00014 the compiler isn't smart enough to optimize that code away.
00015 
00016 After preloading the table, we also try not to access any memory location
00017 other than the table and the stack, in order to prevent table entries from 
00018 being unloaded from L1 cache, until that round is finished.
00019 (Some popular CPUs have 2-way associative caches.)
00020 */
00021 
00022 // This is the original introductory comment:
00023 
00024 /**
00025  * version 3.0 (December 2000)
00026  *
00027  * Optimised ANSI C code for the Rijndael cipher (now AES)
00028  *
00029  * author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
00030  * author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
00031  * author Paulo Barreto <paulo.barreto@terra.com.br>
00032  *
00033  * This code is hereby placed in the public domain.
00034  *
00035  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
00036  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00037  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00038  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
00039  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00040  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00041  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
00042  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
00043  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
00044  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
00045  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00046  */
00047 
00048 #include "pch.h"
00049 
00050 #ifndef CRYPTOPP_IMPORTS
00051 
00052 #include "rijndael.h"
00053 #include "misc.h"
00054 #include "cpu.h"
00055 
00056 NAMESPACE_BEGIN(CryptoPP)
00057 
00058 void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, const NameValuePairs &)
00059 {
00060         AssertValidKeyLength(keylen);
00061 
00062         m_rounds = keylen/4 + 6;
00063         m_key.New(4*(m_rounds+1));
00064 
00065         word32 temp, *rk = m_key;
00066         const word32 *rc = rcon;
00067 
00068         GetUserKey(BIG_ENDIAN_ORDER, rk, keylen/4, userKey, keylen);
00069 
00070         while (true)
00071         {
00072                 temp  = rk[keylen/4-1];
00073                 rk[keylen/4] = rk[0] ^
00074                         (word32(Se[GETBYTE(temp, 2)]) << 24) ^
00075                         (word32(Se[GETBYTE(temp, 1)]) << 16) ^
00076                         (word32(Se[GETBYTE(temp, 0)]) << 8) ^
00077                         Se[GETBYTE(temp, 3)] ^
00078                         *(rc++);
00079                 rk[keylen/4+1] = rk[1] ^ rk[keylen/4];
00080                 rk[keylen/4+2] = rk[2] ^ rk[keylen/4+1];
00081                 rk[keylen/4+3] = rk[3] ^ rk[keylen/4+2];
00082 
00083                 if (rk + keylen/4 + 4 == m_key.end())
00084                         break;
00085 
00086                 if (keylen == 24)
00087                 {
00088                         rk[10] = rk[ 4] ^ rk[ 9];
00089                         rk[11] = rk[ 5] ^ rk[10];
00090                 }
00091                 else if (keylen == 32)
00092                 {
00093                 temp = rk[11];
00094                 rk[12] = rk[ 4] ^
00095                                 (word32(Se[GETBYTE(temp, 3)]) << 24) ^
00096                                 (word32(Se[GETBYTE(temp, 2)]) << 16) ^
00097                                 (word32(Se[GETBYTE(temp, 1)]) << 8) ^
00098                                 Se[GETBYTE(temp, 0)];
00099                 rk[13] = rk[ 5] ^ rk[12];
00100                 rk[14] = rk[ 6] ^ rk[13];
00101                 rk[15] = rk[ 7] ^ rk[14];
00102                 }
00103                 rk += keylen/4;
00104         }
00105 
00106         if (!IsForwardTransformation())
00107         {
00108                 unsigned int i, j;
00109                 rk = m_key;
00110 
00111                 /* invert the order of the round keys: */
00112                 for (i = 0, j = 4*m_rounds; i < j; i += 4, j -= 4) {
00113                         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
00114                         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
00115                         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
00116                         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
00117                 }
00118                 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
00119                 for (i = 1; i < m_rounds; i++) {
00120                         rk += 4;
00121                         rk[0] =
00122                                 Td[0*256+Se[GETBYTE(rk[0], 3)]] ^
00123                                 Td[1*256+Se[GETBYTE(rk[0], 2)]] ^
00124                                 Td[2*256+Se[GETBYTE(rk[0], 1)]] ^
00125                                 Td[3*256+Se[GETBYTE(rk[0], 0)]];
00126                         rk[1] =
00127                                 Td[0*256+Se[GETBYTE(rk[1], 3)]] ^
00128                                 Td[1*256+Se[GETBYTE(rk[1], 2)]] ^
00129                                 Td[2*256+Se[GETBYTE(rk[1], 1)]] ^
00130                                 Td[3*256+Se[GETBYTE(rk[1], 0)]];
00131                         rk[2] =
00132                                 Td[0*256+Se[GETBYTE(rk[2], 3)]] ^
00133                                 Td[1*256+Se[GETBYTE(rk[2], 2)]] ^
00134                                 Td[2*256+Se[GETBYTE(rk[2], 1)]] ^
00135                                 Td[3*256+Se[GETBYTE(rk[2], 0)]];
00136                         rk[3] =
00137                                 Td[0*256+Se[GETBYTE(rk[3], 3)]] ^
00138                                 Td[1*256+Se[GETBYTE(rk[3], 2)]] ^
00139                                 Td[2*256+Se[GETBYTE(rk[3], 1)]] ^
00140                                 Td[3*256+Se[GETBYTE(rk[3], 0)]];
00141                 }
00142         }
00143 
00144         ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key.begin(), m_key.begin(), 16);
00145         ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16);
00146 }
00147 
00148 #pragma warning(disable: 4731)  // frame pointer register 'ebp' modified by inline assembly code
00149 
00150 void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
00151 {
00152 #if defined(CRYPTOPP_X86_ASM_AVAILABLE)
00153         if (HasMMX())
00154         {
00155                 const word32 *k = m_key;
00156                 const word32 *kLoopEnd = k + m_rounds*4;
00157                 #if CRYPTOPP_BOOL_X64
00158                         #define K_REG                   r8
00159                         #define K_END_REG               r9
00160                         #define SAVE_K
00161                         #define RESTORE_K
00162                         #define RESTORE_K_END
00163                         #define SAVE_0(x)               AS2(mov r10d, x)
00164                         #define SAVE_1(x)               AS2(mov r11d, x)
00165                         #define SAVE_2(x)               AS2(mov r12d, x)
00166                         #define RESTORE_0(x)    AS2(mov x, r10d)
00167                         #define RESTORE_1(x)    AS2(mov x, r11d)
00168                         #define RESTORE_2(x)    AS2(mov x, r12d)
00169                 #else
00170                         #define K_REG                   esi
00171                         #define K_END_REG               edi
00172                         #define SAVE_K                  AS2(movd        mm4, esi)
00173                         #define RESTORE_K               AS2(movd        esi, mm4)
00174                         #define RESTORE_K_END   AS2(movd        edi, mm5)
00175                         #define SAVE_0(x)               AS2(movd        mm0, x)
00176                         #define SAVE_1(x)               AS2(movd        mm1, x)
00177                         #define SAVE_2(x)               AS2(movd        mm2, x)
00178                         #define RESTORE_0(x)    AS2(movd        x, mm0)
00179                         #define RESTORE_1(x)    AS2(movd        x, mm1)
00180                         #define RESTORE_2(x)    AS2(movd        x, mm2)
00181                 #endif
00182 #ifdef __GNUC__
00183                 word32 t0, t1, t2, t3;
00184                 __asm__ __volatile__
00185                 (
00186                 ".intel_syntax noprefix;"
00187                 AS_PUSH(                bx)
00188                 AS_PUSH(                bp)
00189                 AS2(    mov             WORD_REG(bp), WORD_REG(ax))
00190         #if CRYPTOPP_BOOL_X64
00191                 // save these manually. clobber list doesn't seem to work as of GCC 4.1.0
00192                 AS1(    pushq   K_REG)
00193                 AS1(    pushq   K_END_REG)
00194                 AS1(    pushq   r10)
00195                 AS1(    pushq   r11)
00196                 AS1(    pushq   r12)
00197                 AS2(    mov             K_REG, rsi)
00198                 AS2(    mov             K_END_REG, rcx)
00199         #else
00200                 AS2(    movd    mm5, ecx)
00201         #endif
00202 #else
00203         #if _MSC_VER < 1300
00204                 const word32 *t = Te;
00205                 AS2(    mov             eax, t)
00206         #endif
00207                 AS2(    mov             edx, g_cacheLineSize)
00208                 AS2(    mov             WORD_REG(di), inBlock)
00209                 AS2(    mov             K_REG, k)
00210                 AS2(    movd    mm5, kLoopEnd)
00211         #if _MSC_VER < 1300
00212                 AS_PUSH(                bx)
00213                 AS_PUSH(                bp)
00214                 AS2(    mov             ebp, eax)
00215         #else
00216                 AS_PUSH(                bp)
00217                 AS2(    lea             ebp, Te)
00218         #endif
00219 #endif
00220                 AS2(    mov             eax, [K_REG+0*4])       // s0
00221                 AS2(    xor             eax, [WORD_REG(di)+0*4])
00222                 SAVE_0(eax)
00223                 AS2(    mov             ebx, [K_REG+1*4])
00224                 AS2(    xor             ebx, [WORD_REG(di)+1*4])
00225                 SAVE_1(ebx)
00226                 AS2(    and             ebx, eax)
00227                 AS2(    mov             eax, [K_REG+2*4])
00228                 AS2(    xor             eax, [WORD_REG(di)+2*4])
00229                 SAVE_2(eax)
00230                 AS2(    and             ebx, eax)
00231                 AS2(    mov             ecx, [K_REG+3*4])
00232                 AS2(    xor             ecx, [WORD_REG(di)+3*4])
00233                 AS2(    and             ebx, ecx)
00234 
00235                 // read Te0 into L1 cache. this code could be simplifed by using lfence, but that is an SSE2 instruction
00236                 AS2(    and             ebx, 0)
00237                 AS2(    mov             edi, ebx)       // make index depend on previous loads to simulate lfence
00238                 ASL(2)
00239                 AS2(    and             ebx, [WORD_REG(bp)+WORD_REG(di)])
00240                 AS2(    add             edi, edx)
00241                 AS2(    and             ebx, [WORD_REG(bp)+WORD_REG(di)])
00242                 AS2(    add             edi, edx)
00243                 AS2(    and             ebx, [WORD_REG(bp)+WORD_REG(di)])
00244                 AS2(    add             edi, edx)
00245                 AS2(    and             ebx, [WORD_REG(bp)+WORD_REG(di)])
00246                 AS2(    add             edi, edx)
00247                 AS2(    cmp             edi, 1024)
00248                 ASJ(    jl,             2, b)
00249                 AS2(    and             ebx, [WORD_REG(bp)+1020])
00250 #if CRYPTOPP_BOOL_X64
00251                 AS2(    xor             r10d, ebx)
00252                 AS2(    xor             r11d, ebx)
00253                 AS2(    xor             r12d, ebx)
00254 #else
00255                 AS2(    movd    mm6, ebx)
00256                 AS2(    pxor    mm2, mm6)
00257                 AS2(    pxor    mm1, mm6)
00258                 AS2(    pxor    mm0, mm6)
00259 #endif
00260                 AS2(    xor             ecx, ebx)
00261 
00262                 AS2(    mov             edi, [K_REG+4*4])       // t0
00263                 AS2(    mov             eax, [K_REG+5*4])
00264                 AS2(    mov             ebx, [K_REG+6*4])
00265                 AS2(    mov             edx, [K_REG+7*4])
00266                 AS2(    add             K_REG, 8*4)
00267                 SAVE_K
00268 
00269 #define QUARTER_ROUND(t, a, b, c, d)    \
00270         AS2(movzx esi, t##l)\
00271         AS2(d, [WORD_REG(bp)+0*1024+4*WORD_REG(si)])\
00272         AS2(movzx esi, t##h)\
00273         AS2(c, [WORD_REG(bp)+1*1024+4*WORD_REG(si)])\
00274         AS2(shr e##t##x, 16)\
00275         AS2(movzx esi, t##l)\
00276         AS2(b, [WORD_REG(bp)+2*1024+4*WORD_REG(si)])\
00277         AS2(movzx esi, t##h)\
00278         AS2(a, [WORD_REG(bp)+3*1024+4*WORD_REG(si)])
00279 
00280 #define s0              xor edi
00281 #define s1              xor eax
00282 #define s2              xor ebx
00283 #define s3              xor ecx
00284 #define t0              xor edi
00285 #define t1              xor eax
00286 #define t2              xor ebx
00287 #define t3              xor edx
00288 
00289                 QUARTER_ROUND(c, t0, t1, t2, t3)
00290                 RESTORE_2(ecx)
00291                 QUARTER_ROUND(c, t3, t0, t1, t2)
00292                 RESTORE_1(ecx)
00293                 QUARTER_ROUND(c, t2, t3, t0, t1)
00294                 RESTORE_0(ecx)
00295                 QUARTER_ROUND(c, t1, t2, t3, t0)
00296                 SAVE_2(ebx)
00297                 SAVE_1(eax)
00298                 SAVE_0(edi)
00299 #undef QUARTER_ROUND
00300 
00301                 RESTORE_K
00302 
00303                 ASL(0)
00304                 AS2(    mov             edi, [K_REG+0*4])
00305                 AS2(    mov             eax, [K_REG+1*4])
00306                 AS2(    mov             ebx, [K_REG+2*4])
00307                 AS2(    mov             ecx, [K_REG+3*4])
00308 
00309 #define QUARTER_ROUND(t, a, b, c, d)    \
00310         AS2(movzx esi, t##l)\
00311         AS2(a, [WORD_REG(bp)+3*1024+4*WORD_REG(si)])\
00312         AS2(movzx esi, t##h)\
00313         AS2(b, [WORD_REG(bp)+2*1024+4*WORD_REG(si)])\
00314         AS2(shr e##t##x, 16)\
00315         AS2(movzx esi, t##l)\
00316         AS2(c, [WORD_REG(bp)+1*1024+4*WORD_REG(si)])\
00317         AS2(movzx esi, t##h)\
00318         AS2(d, [WORD_REG(bp)+0*1024+4*WORD_REG(si)])
00319 
00320                 QUARTER_ROUND(d, s0, s1, s2, s3)
00321                 RESTORE_2(edx)
00322                 QUARTER_ROUND(d, s3, s0, s1, s2)
00323                 RESTORE_1(edx)
00324                 QUARTER_ROUND(d, s2, s3, s0, s1)
00325                 RESTORE_0(edx)
00326                 QUARTER_ROUND(d, s1, s2, s3, s0)
00327                 RESTORE_K
00328                 SAVE_2(ebx)
00329                 SAVE_1(eax)
00330                 SAVE_0(edi)
00331 
00332                 AS2(    mov             edi, [K_REG+4*4])
00333                 AS2(    mov             eax, [K_REG+5*4])
00334                 AS2(    mov             ebx, [K_REG+6*4])
00335                 AS2(    mov             edx, [K_REG+7*4])
00336 
00337                 QUARTER_ROUND(c, t0, t1, t2, t3)
00338                 RESTORE_2(ecx)
00339                 QUARTER_ROUND(c, t3, t0, t1, t2)
00340                 RESTORE_1(ecx)
00341                 QUARTER_ROUND(c, t2, t3, t0, t1)
00342                 RESTORE_0(ecx)
00343                 QUARTER_ROUND(c, t1, t2, t3, t0)
00344                 SAVE_2(ebx)
00345                 SAVE_1(eax)
00346                 SAVE_0(edi)
00347 
00348                 RESTORE_K
00349                 RESTORE_K_END
00350                 AS2(    add             K_REG, 8*4)
00351                 SAVE_K
00352                 AS2(    cmp             K_END_REG, K_REG)
00353                 ASJ(    jne,    0, b)
00354 
00355 #undef QUARTER_ROUND
00356 #undef s0
00357 #undef s1
00358 #undef s2
00359 #undef s3
00360 #undef t0
00361 #undef t1
00362 #undef t2
00363 #undef t3
00364 
00365                 AS2(    mov             eax, [K_END_REG+0*4])
00366                 AS2(    mov             ecx, [K_END_REG+1*4])
00367                 AS2(    mov             esi, [K_END_REG+2*4])
00368                 AS2(    mov             edi, [K_END_REG+3*4])
00369 
00370 #define QUARTER_ROUND(a, b, c, d)       \
00371         AS2(    movzx   ebx, dl)\
00372         AS2(    movzx   ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(bx)])\
00373         AS2(    shl             ebx, 3*8)\
00374         AS2(    xor             a, ebx)\
00375         AS2(    movzx   ebx, dh)\
00376         AS2(    movzx   ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(bx)])\
00377         AS2(    shl             ebx, 2*8)\
00378         AS2(    xor             b, ebx)\
00379         AS2(    shr             edx, 16)\
00380         AS2(    movzx   ebx, dl)\
00381         AS2(    shr             edx, 8)\
00382         AS2(    movzx   ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(bx)])\
00383         AS2(    shl             ebx, 1*8)\
00384         AS2(    xor             c, ebx)\
00385         AS2(    movzx   ebx, BYTE PTR [WORD_REG(bp)+1+4*WORD_REG(dx)])\
00386         AS2(    xor             d, ebx)
00387 
00388                 QUARTER_ROUND(eax, ecx, esi, edi)
00389                 RESTORE_2(edx)
00390                 QUARTER_ROUND(edi, eax, ecx, esi)
00391                 RESTORE_1(edx)
00392                 QUARTER_ROUND(esi, edi, eax, ecx)
00393                 RESTORE_0(edx)
00394                 QUARTER_ROUND(ecx, esi, edi, eax)
00395 
00396 #undef QUARTER_ROUND
00397 
00398 #if CRYPTOPP_BOOL_X64
00399                 AS1(popq        r12)
00400                 AS1(popq        r11)
00401                 AS1(popq        r10)
00402                 AS1(popq        K_END_REG)
00403                 AS1(popq        K_REG)
00404 #else
00405                 AS1(emms)
00406 #endif
00407                 AS_POP(         bp)
00408 
00409 #if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER < 1300)
00410                 AS_POP(         bx)
00411 #endif
00412 #ifdef __GNUC__
00413                 ".att_syntax prefix;"
00414                         : "=a" (t0), "=c" (t1), "=S" (t2), "=D" (t3)
00415                         : "a" (Te), "D" (inBlock), "S" (k), "c" (kLoopEnd), "d" (g_cacheLineSize)
00416                         : "memory", "cc"
00417                 );
00418 
00419                 if (xorBlock)
00420                 {
00421                         t0 ^= ((const word32 *)xorBlock)[0];
00422                         t1 ^= ((const word32 *)xorBlock)[1];
00423                         t2 ^= ((const word32 *)xorBlock)[2];
00424                         t3 ^= ((const word32 *)xorBlock)[3];
00425                 }
00426                 ((word32 *)outBlock)[0] = t0;
00427                 ((word32 *)outBlock)[1] = t1;
00428                 ((word32 *)outBlock)[2] = t2;
00429                 ((word32 *)outBlock)[3] = t3;
00430 #else
00431                 AS2(    mov             WORD_REG(bx), xorBlock)
00432                 AS2(    test    WORD_REG(bx), WORD_REG(bx))
00433                 ASJ(    jz,             1, f)
00434                 AS2(    xor             eax, [WORD_REG(bx)+0*4])
00435                 AS2(    xor             ecx, [WORD_REG(bx)+1*4])
00436                 AS2(    xor             esi, [WORD_REG(bx)+2*4])
00437                 AS2(    xor             edi, [WORD_REG(bx)+3*4])
00438                 ASL(1)
00439                 AS2(    mov             WORD_REG(bx), outBlock)
00440                 AS2(    mov             [WORD_REG(bx)+0*4], eax)
00441                 AS2(    mov             [WORD_REG(bx)+1*4], ecx)
00442                 AS2(    mov             [WORD_REG(bx)+2*4], esi)
00443                 AS2(    mov             [WORD_REG(bx)+3*4], edi)
00444 #endif
00445         }
00446         else
00447 #endif  // #ifdef CRYPTOPP_X86_ASM_AVAILABLE
00448         {
00449         word32 s0, s1, s2, s3, t0, t1, t2, t3;
00450         const word32 *rk = m_key;
00451 
00452         s0 = ((const word32 *)inBlock)[0] ^ rk[0];
00453         s1 = ((const word32 *)inBlock)[1] ^ rk[1];
00454         s2 = ((const word32 *)inBlock)[2] ^ rk[2];
00455         s3 = ((const word32 *)inBlock)[3] ^ rk[3];
00456         t0 = rk[4];
00457         t1 = rk[5];
00458         t2 = rk[6];
00459         t3 = rk[7];
00460         rk += 8;
00461 
00462         // timing attack countermeasure. see comments at top for more details
00463         const int cacheLineSize = GetCacheLineSize();
00464         unsigned int i;
00465         word32 u = 0;
00466         for (i=0; i<1024; i+=cacheLineSize)
00467                 u &= *(const word32 *)(((const byte *)Te)+i);
00468         u &= Te[255];
00469         s0 |= u; s1 |= u; s2 |= u; s3 |= u;
00470 
00471         // first round
00472 #ifdef IS_BIG_ENDIAN
00473 #define QUARTER_ROUND(t, a, b, c, d)    \
00474                 a ^= rotrFixed(Te[byte(t)], 24);        t >>= 8;\
00475                 b ^= rotrFixed(Te[byte(t)], 16);        t >>= 8;\
00476                 c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
00477                 d ^= Te[t];
00478 #else
00479 #define QUARTER_ROUND(t, a, b, c, d)    \
00480                 d ^= Te[byte(t)];                                       t >>= 8;\
00481                 c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
00482                 b ^= rotrFixed(Te[byte(t)], 16);        t >>= 8;\
00483                 a ^= rotrFixed(Te[t], 24);
00484 #endif
00485 
00486         QUARTER_ROUND(s3, t0, t1, t2, t3)
00487         QUARTER_ROUND(s2, t3, t0, t1, t2)
00488         QUARTER_ROUND(s1, t2, t3, t0, t1)
00489         QUARTER_ROUND(s0, t1, t2, t3, t0)
00490 #undef QUARTER_ROUND
00491 
00492         // Nr - 2 full rounds:
00493     unsigned int r = m_rounds/2 - 1;
00494     do
00495         {
00496 #define QUARTER_ROUND(t, a, b, c, d)    \
00497                 a ^= Te[3*256+byte(t)]; t >>= 8;\
00498                 b ^= Te[2*256+byte(t)]; t >>= 8;\
00499                 c ^= Te[1*256+byte(t)]; t >>= 8;\
00500                 d ^= Te[t];
00501 
00502                 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
00503 
00504                 QUARTER_ROUND(t3, s0, s1, s2, s3)
00505                 QUARTER_ROUND(t2, s3, s0, s1, s2)
00506                 QUARTER_ROUND(t1, s2, s3, s0, s1)
00507                 QUARTER_ROUND(t0, s1, s2, s3, s0)
00508 
00509                 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
00510 
00511                 QUARTER_ROUND(s3, t0, t1, t2, t3)
00512                 QUARTER_ROUND(s2, t3, t0, t1, t2)
00513                 QUARTER_ROUND(s1, t2, t3, t0, t1)
00514                 QUARTER_ROUND(s0, t1, t2, t3, t0)
00515 #undef QUARTER_ROUND
00516 
00517         rk += 8;
00518     } while (--r);
00519 
00520         // timing attack countermeasure. see comments at top for more details
00521         u = 0;
00522         for (i=0; i<256; i+=cacheLineSize)
00523                 u &= *(const word32 *)(Se+i);
00524         u &= *(const word32 *)(Se+252);
00525         t0 |= u; t1 |= u; t2 |= u; t3 |= u;
00526 
00527         word32 tbw[4];
00528         byte *const tempBlock = (byte *)tbw;
00529         word32 *const obw = (word32 *)outBlock;
00530         const word32 *const xbw = (const word32 *)xorBlock;
00531 
00532 #define QUARTER_ROUND(t, a, b, c, d)    \
00533         tempBlock[a] = Se[byte(t)]; t >>= 8;\
00534         tempBlock[b] = Se[byte(t)]; t >>= 8;\
00535         tempBlock[c] = Se[byte(t)]; t >>= 8;\
00536         tempBlock[d] = Se[t];
00537 
00538         QUARTER_ROUND(t2, 15, 2, 5, 8)
00539         QUARTER_ROUND(t1, 11, 14, 1, 4)
00540         QUARTER_ROUND(t0, 7, 10, 13, 0)
00541         QUARTER_ROUND(t3, 3, 6, 9, 12)
00542 #undef QUARTER_ROUND
00543 
00544         if (xbw)
00545         {
00546                 obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
00547                 obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
00548                 obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
00549                 obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
00550         }
00551         else
00552         {
00553                 obw[0] = tbw[0] ^ rk[0];
00554                 obw[1] = tbw[1] ^ rk[1];
00555                 obw[2] = tbw[2] ^ rk[2];
00556                 obw[3] = tbw[3] ^ rk[3];
00557         }
00558         }
00559 }
00560 
00561 void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
00562 {
00563         word32 s0, s1, s2, s3, t0, t1, t2, t3;
00564         const word32 *rk = m_key;
00565 
00566         s0 = ((const word32 *)inBlock)[0] ^ rk[0];
00567         s1 = ((const word32 *)inBlock)[1] ^ rk[1];
00568         s2 = ((const word32 *)inBlock)[2] ^ rk[2];
00569         s3 = ((const word32 *)inBlock)[3] ^ rk[3];
00570         t0 = rk[4];
00571         t1 = rk[5];
00572         t2 = rk[6];
00573         t3 = rk[7];
00574         rk += 8;
00575 
00576         // timing attack countermeasure. see comments at top for more details
00577         const int cacheLineSize = GetCacheLineSize();
00578         unsigned int i;
00579         word32 u = 0;
00580         for (i=0; i<1024; i+=cacheLineSize)
00581                 u &= *(const word32 *)(((const byte *)Td)+i);
00582         u &= Td[255];
00583         s0 |= u; s1 |= u; s2 |= u; s3 |= u;
00584 
00585         // first round
00586 #ifdef IS_BIG_ENDIAN
00587 #define QUARTER_ROUND(t, a, b, c, d)    \
00588                 a ^= rotrFixed(Td[byte(t)], 24);        t >>= 8;\
00589                 b ^= rotrFixed(Td[byte(t)], 16);        t >>= 8;\
00590                 c ^= rotrFixed(Td[byte(t)], 8);         t >>= 8;\
00591                 d ^= Td[t];
00592 #else
00593 #define QUARTER_ROUND(t, a, b, c, d)    \
00594                 d ^= Td[byte(t)];                                       t >>= 8;\
00595                 c ^= rotrFixed(Td[byte(t)], 8);         t >>= 8;\
00596                 b ^= rotrFixed(Td[byte(t)], 16);        t >>= 8;\
00597                 a ^= rotrFixed(Td[t], 24);
00598 #endif
00599 
00600         QUARTER_ROUND(s3, t2, t1, t0, t3)
00601         QUARTER_ROUND(s2, t1, t0, t3, t2)
00602         QUARTER_ROUND(s1, t0, t3, t2, t1)
00603         QUARTER_ROUND(s0, t3, t2, t1, t0)
00604 #undef QUARTER_ROUND
00605 
00606         // Nr - 2 full rounds:
00607     unsigned int r = m_rounds/2 - 1;
00608     do
00609         {
00610 #define QUARTER_ROUND(t, a, b, c, d)    \
00611                 a ^= Td[3*256+byte(t)]; t >>= 8;\
00612                 b ^= Td[2*256+byte(t)]; t >>= 8;\
00613                 c ^= Td[1*256+byte(t)]; t >>= 8;\
00614                 d ^= Td[t];
00615 
00616                 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
00617 
00618                 QUARTER_ROUND(t3, s2, s1, s0, s3)
00619                 QUARTER_ROUND(t2, s1, s0, s3, s2)
00620                 QUARTER_ROUND(t1, s0, s3, s2, s1)
00621                 QUARTER_ROUND(t0, s3, s2, s1, s0)
00622 
00623                 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
00624 
00625                 QUARTER_ROUND(s3, t2, t1, t0, t3)
00626                 QUARTER_ROUND(s2, t1, t0, t3, t2)
00627                 QUARTER_ROUND(s1, t0, t3, t2, t1)
00628                 QUARTER_ROUND(s0, t3, t2, t1, t0)
00629 #undef QUARTER_ROUND
00630 
00631         rk += 8;
00632     } while (--r);
00633 
00634         // timing attack countermeasure. see comments at top for more details
00635         u = 0;
00636         for (i=0; i<256; i+=cacheLineSize)
00637                 u &= *(const word32 *)(Sd+i);
00638         u &= *(const word32 *)(Sd+252);
00639         t0 |= u; t1 |= u; t2 |= u; t3 |= u;
00640 
00641         word32 tbw[4];
00642         byte *const tempBlock = (byte *)tbw;
00643         word32 *const obw = (word32 *)outBlock;
00644         const word32 *const xbw = (const word32 *)xorBlock;
00645 
00646 #define QUARTER_ROUND(t, a, b, c, d)    \
00647         tempBlock[a] = Sd[byte(t)]; t >>= 8;\
00648         tempBlock[b] = Sd[byte(t)]; t >>= 8;\
00649         tempBlock[c] = Sd[byte(t)]; t >>= 8;\
00650         tempBlock[d] = Sd[t];
00651 
00652         QUARTER_ROUND(t2, 7, 2, 13, 8)
00653         QUARTER_ROUND(t1, 3, 14, 9, 4)
00654         QUARTER_ROUND(t0, 15, 10, 5, 0)
00655         QUARTER_ROUND(t3, 11, 6, 1, 12)
00656 #undef QUARTER_ROUND
00657 
00658         if (xbw)
00659         {
00660                 obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
00661                 obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
00662                 obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
00663                 obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
00664         }
00665         else
00666         {
00667                 obw[0] = tbw[0] ^ rk[0];
00668                 obw[1] = tbw[1] ^ rk[1];
00669                 obw[2] = tbw[2] ^ rk[2];
00670                 obw[3] = tbw[3] ^ rk[3];
00671         }
00672 }
00673 
00674 NAMESPACE_END
00675 
00676 #endif

Generated on Fri Jun 1 11:11:24 2007 for Crypto++ by  doxygen 1.5.2