rijndael.cpp

00001 // rijndael.cpp - modified by Chris Morgan <cmorgan@wpi.edu>
00002 // and Wei Dai from Paulo Baretto's Rijndael implementation
00003 // The original code and all modifications are in the public domain.
00004 
00005 /*
00006 Defense against timing attacks was added in July 2006 by Wei Dai.
00007 
00008 The code now uses smaller tables in the first and last rounds,
00009 and preloads them into L1 cache before usage (by loading at least 
00010 one element in each cache line). 
00011 
00012 We try to delay subsequent accesses to each table (used in the first 
00013 and last rounds) until all of the table has been preloaded. Hopefully
00014 the compiler isn't smart enough to optimize that code away.
00015 
00016 After preloading the table, we also try not to access any memory location
00017 other than the table and the stack, in order to prevent table entries from 
00018 being unloaded from L1 cache, until that round is finished.
00019 (Some popular CPUs have 2-way associative caches.)
00020 */
00021 
00022 // This is the original introductory comment:
00023 
00024 /**
00025  * version 3.0 (December 2000)
00026  *
00027  * Optimised ANSI C code for the Rijndael cipher (now AES)
00028  *
00029  * author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
00030  * author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
00031  * author Paulo Barreto <paulo.barreto@terra.com.br>
00032  *
00033  * This code is hereby placed in the public domain.
00034  *
00035  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
00036  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00037  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00038  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
00039  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00040  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00041  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
00042  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
00043  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
00044  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
00045  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00046  */
00047 
00048 #include "pch.h"
00049 
00050 #ifndef CRYPTOPP_IMPORTS
00051 
00052 #include "rijndael.h"
00053 #include "misc.h"
00054 
00055 NAMESPACE_BEGIN(CryptoPP)
00056 
00057 void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, const NameValuePairs &)
00058 {
00059         AssertValidKeyLength(keylen);
00060 
00061         m_rounds = keylen/4 + 6;
00062         m_key.New(4*(m_rounds+1));
00063 
00064         word32 temp, *rk = m_key;
00065         const word32 *rc = rcon;
00066 
00067         GetUserKey(BIG_ENDIAN_ORDER, rk, keylen/4, userKey, keylen);
00068 
00069         while (true)
00070         {
00071                 temp  = rk[keylen/4-1];
00072                 rk[keylen/4] = rk[0] ^
00073                         (word32(Se[GETBYTE(temp, 2)]) << 24) ^
00074                         (word32(Se[GETBYTE(temp, 1)]) << 16) ^
00075                         (word32(Se[GETBYTE(temp, 0)]) << 8) ^
00076                         Se[GETBYTE(temp, 3)] ^
00077                         *(rc++);
00078                 rk[keylen/4+1] = rk[1] ^ rk[keylen/4];
00079                 rk[keylen/4+2] = rk[2] ^ rk[keylen/4+1];
00080                 rk[keylen/4+3] = rk[3] ^ rk[keylen/4+2];
00081 
00082                 if (rk + keylen/4 + 4 == m_key.end())
00083                         break;
00084 
00085                 if (keylen == 24)
00086                 {
00087                         rk[10] = rk[ 4] ^ rk[ 9];
00088                         rk[11] = rk[ 5] ^ rk[10];
00089                 }
00090                 else if (keylen == 32)
00091                 {
00092                 temp = rk[11];
00093                 rk[12] = rk[ 4] ^
00094                                 (word32(Se[GETBYTE(temp, 3)]) << 24) ^
00095                                 (word32(Se[GETBYTE(temp, 2)]) << 16) ^
00096                                 (word32(Se[GETBYTE(temp, 1)]) << 8) ^
00097                                 Se[GETBYTE(temp, 0)];
00098                 rk[13] = rk[ 5] ^ rk[12];
00099                 rk[14] = rk[ 6] ^ rk[13];
00100                 rk[15] = rk[ 7] ^ rk[14];
00101                 }
00102                 rk += keylen/4;
00103         }
00104 
00105         if (!IsForwardTransformation())
00106         {
00107                 unsigned int i, j;
00108                 rk = m_key;
00109 
00110                 /* invert the order of the round keys: */
00111                 for (i = 0, j = 4*m_rounds; i < j; i += 4, j -= 4) {
00112                         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
00113                         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
00114                         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
00115                         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
00116                 }
00117                 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
00118                 for (i = 1; i < m_rounds; i++) {
00119                         rk += 4;
00120                         rk[0] =
00121                                 Td0[Se[GETBYTE(rk[0], 3)]] ^
00122                                 Td1[Se[GETBYTE(rk[0], 2)]] ^
00123                                 Td2[Se[GETBYTE(rk[0], 1)]] ^
00124                                 Td3[Se[GETBYTE(rk[0], 0)]];
00125                         rk[1] =
00126                                 Td0[Se[GETBYTE(rk[1], 3)]] ^
00127                                 Td1[Se[GETBYTE(rk[1], 2)]] ^
00128                                 Td2[Se[GETBYTE(rk[1], 1)]] ^
00129                                 Td3[Se[GETBYTE(rk[1], 0)]];
00130                         rk[2] =
00131                                 Td0[Se[GETBYTE(rk[2], 3)]] ^
00132                                 Td1[Se[GETBYTE(rk[2], 2)]] ^
00133                                 Td2[Se[GETBYTE(rk[2], 1)]] ^
00134                                 Td3[Se[GETBYTE(rk[2], 0)]];
00135                         rk[3] =
00136                                 Td0[Se[GETBYTE(rk[3], 3)]] ^
00137                                 Td1[Se[GETBYTE(rk[3], 2)]] ^
00138                                 Td2[Se[GETBYTE(rk[3], 1)]] ^
00139                                 Td3[Se[GETBYTE(rk[3], 0)]];
00140                 }
00141         }
00142 
00143         ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key.begin(), m_key.begin(), 16);
00144         ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16);
00145 }
00146 
00147 const static unsigned int s_lineSizeDiv4 = CRYPTOPP_L1_CACHE_LINE_SIZE/4;
00148 #ifdef IS_BIG_ENDIAN
00149 const static unsigned int s_i3=3, s_i2=2, s_i1=1, s_i0=0;
00150 #else
00151 const static unsigned int s_i3=0, s_i2=1, s_i1=2, s_i0=3;
00152 #endif
00153 
00154 void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
00155 {
00156         word32 s0, s1, s2, s3, t0, t1, t2, t3;
00157         const word32 *rk = m_key;
00158 
00159         s0 = ((const word32 *)inBlock)[0] ^ rk[0];
00160         s1 = ((const word32 *)inBlock)[1] ^ rk[1];
00161         s2 = ((const word32 *)inBlock)[2] ^ rk[2];
00162         s3 = ((const word32 *)inBlock)[3] ^ rk[3];
00163         t0 = rk[4];
00164         t1 = rk[5];
00165         t2 = rk[6];
00166         t3 = rk[7];
00167         rk += 8;
00168 
00169         // timing attack countermeasure. see comments at top for more details
00170         unsigned int i;
00171         word32 u = 0;
00172         for (i=0; i<sizeof(Te0)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
00173                 u &= (Te0[i+0*s_lineSizeDiv4] & Te0[i+2*s_lineSizeDiv4]) & (Te0[i+1*s_lineSizeDiv4] & Te0[i+3*s_lineSizeDiv4]);
00174         s0 |= u; s1 |= u; s2 |= u; s3 |= u;
00175 
00176         // first round
00177     t0 ^=
00178         Te0[GETBYTE(s0, s_i3)] ^
00179         rotrFixed(Te0[GETBYTE(s1, s_i2)], 8) ^
00180         rotrFixed(Te0[GETBYTE(s2, s_i1)], 16) ^
00181         rotrFixed(Te0[GETBYTE(s3, s_i0)], 24);
00182     t1 ^=
00183         Te0[GETBYTE(s1, s_i3)] ^
00184         rotrFixed(Te0[GETBYTE(s2, s_i2)], 8) ^
00185         rotrFixed(Te0[GETBYTE(s3, s_i1)], 16) ^
00186         rotrFixed(Te0[GETBYTE(s0, s_i0)], 24);
00187     t2 ^=
00188         Te0[GETBYTE(s2, s_i3)] ^
00189         rotrFixed(Te0[GETBYTE(s3, s_i2)], 8) ^
00190         rotrFixed(Te0[GETBYTE(s0, s_i1)], 16) ^
00191         rotrFixed(Te0[GETBYTE(s1, s_i0)], 24);
00192     t3 ^=
00193         Te0[GETBYTE(s3, s_i3)] ^
00194         rotrFixed(Te0[GETBYTE(s0, s_i2)], 8) ^
00195         rotrFixed(Te0[GETBYTE(s1, s_i1)], 16) ^
00196         rotrFixed(Te0[GETBYTE(s2, s_i0)], 24);
00197 
00198         // Nr - 2 full rounds:
00199     unsigned int r = m_rounds/2 - 1;
00200     do
00201         {
00202         s0 =
00203             Te0[GETBYTE(t0, 3)] ^
00204             Te1[GETBYTE(t1, 2)] ^
00205             Te2[GETBYTE(t2, 1)] ^
00206             Te3[GETBYTE(t3, 0)] ^
00207             rk[0];
00208         s1 =
00209             Te0[GETBYTE(t1, 3)] ^
00210             Te1[GETBYTE(t2, 2)] ^
00211             Te2[GETBYTE(t3, 1)] ^
00212             Te3[GETBYTE(t0, 0)] ^
00213             rk[1];
00214         s2 =
00215             Te0[GETBYTE(t2, 3)] ^
00216             Te1[GETBYTE(t3, 2)] ^
00217             Te2[GETBYTE(t0, 1)] ^
00218             Te3[GETBYTE(t1, 0)] ^
00219             rk[2];
00220         s3 =
00221             Te0[GETBYTE(t3, 3)] ^
00222             Te1[GETBYTE(t0, 2)] ^
00223             Te2[GETBYTE(t1, 1)] ^
00224             Te3[GETBYTE(t2, 0)] ^
00225             rk[3];
00226 
00227         t0 =
00228             Te0[GETBYTE(s0, 3)] ^
00229             Te1[GETBYTE(s1, 2)] ^
00230             Te2[GETBYTE(s2, 1)] ^
00231             Te3[GETBYTE(s3, 0)] ^
00232             rk[4];
00233         t1 =
00234             Te0[GETBYTE(s1, 3)] ^
00235             Te1[GETBYTE(s2, 2)] ^
00236             Te2[GETBYTE(s3, 1)] ^
00237             Te3[GETBYTE(s0, 0)] ^
00238             rk[5];
00239         t2 =
00240             Te0[GETBYTE(s2, 3)] ^
00241             Te1[GETBYTE(s3, 2)] ^
00242             Te2[GETBYTE(s0, 1)] ^
00243             Te3[GETBYTE(s1, 0)] ^
00244             rk[6];
00245         t3 =
00246             Te0[GETBYTE(s3, 3)] ^
00247             Te1[GETBYTE(s0, 2)] ^
00248             Te2[GETBYTE(s1, 1)] ^
00249             Te3[GETBYTE(s2, 0)] ^
00250             rk[7];
00251 
00252         rk += 8;
00253     } while (--r);
00254 
00255         // timing attack countermeasure. see comments at top for more details
00256         u = 0;
00257         for (i=0; i<sizeof(Se)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
00258                 u &= (((word32*)Se)[i+0*s_lineSizeDiv4] & ((word32*)Se)[i+2*s_lineSizeDiv4]) & (((word32*)Se)[i+1*s_lineSizeDiv4] & ((word32*)Se)[i+3*s_lineSizeDiv4]);
00259         t0 |= u; t1 |= u; t2 |= u; t3 |= u;
00260 
00261         word32 tbw[4];
00262         byte *const tempBlock = (byte *)tbw;
00263         word32 *const obw = (word32 *)outBlock;
00264         const word32 *const xbw = (const word32 *)xorBlock;
00265 
00266         // last round
00267         tempBlock[0] = Se[GETBYTE(t0, 3)];
00268         tempBlock[1] = Se[GETBYTE(t1, 2)];
00269         tempBlock[2] = Se[GETBYTE(t2, 1)];
00270         tempBlock[3] = Se[GETBYTE(t3, 0)];
00271         tempBlock[4] = Se[GETBYTE(t1, 3)];
00272         tempBlock[5] = Se[GETBYTE(t2, 2)];
00273         tempBlock[6] = Se[GETBYTE(t3, 1)];
00274         tempBlock[7] = Se[GETBYTE(t0, 0)];
00275         tempBlock[8] = Se[GETBYTE(t2, 3)];
00276         tempBlock[9] = Se[GETBYTE(t3, 2)];
00277         tempBlock[10] = Se[GETBYTE(t0, 1)];
00278         tempBlock[11] = Se[GETBYTE(t1, 0)];
00279         tempBlock[12] = Se[GETBYTE(t3, 3)];
00280         tempBlock[13] = Se[GETBYTE(t0, 2)];
00281         tempBlock[14] = Se[GETBYTE(t1, 1)];
00282         tempBlock[15] = Se[GETBYTE(t2, 0)];
00283 
00284         if (xbw)
00285         {
00286                 obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
00287                 obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
00288                 obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
00289                 obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
00290         }
00291         else
00292         {
00293                 obw[0] = tbw[0] ^ rk[0];
00294                 obw[1] = tbw[1] ^ rk[1];
00295                 obw[2] = tbw[2] ^ rk[2];
00296                 obw[3] = tbw[3] ^ rk[3];
00297         }
00298 }
00299 
00300 void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
00301 {
00302         word32 s0, s1, s2, s3, t0, t1, t2, t3;
00303     const word32 *rk = m_key;
00304 
00305         s0 = ((const word32 *)inBlock)[0] ^ rk[0];
00306         s1 = ((const word32 *)inBlock)[1] ^ rk[1];
00307         s2 = ((const word32 *)inBlock)[2] ^ rk[2];
00308         s3 = ((const word32 *)inBlock)[3] ^ rk[3];
00309         t0 = rk[4];
00310         t1 = rk[5];
00311         t2 = rk[6];
00312         t3 = rk[7];
00313         rk += 8;
00314 
00315         // timing attack countermeasure. see comments at top for more details
00316         unsigned int i;
00317         word32 u = 0;
00318         for (i=0; i<sizeof(Td0)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
00319                 u &= (Td0[i+0*s_lineSizeDiv4] & Td0[i+2*s_lineSizeDiv4]) & (Td0[i+1*s_lineSizeDiv4] & Td0[i+3*s_lineSizeDiv4]);
00320         s0 |= u; s1 |= u; s2 |= u; s3 |= u;
00321 
00322         // first round
00323     t0 ^=
00324         Td0[GETBYTE(s0, s_i3)] ^
00325         rotrFixed(Td0[GETBYTE(s3, s_i2)], 8) ^
00326         rotrFixed(Td0[GETBYTE(s2, s_i1)], 16) ^
00327         rotrFixed(Td0[GETBYTE(s1, s_i0)], 24);
00328     t1 ^=
00329         Td0[GETBYTE(s1, s_i3)] ^
00330         rotrFixed(Td0[GETBYTE(s0, s_i2)], 8) ^
00331         rotrFixed(Td0[GETBYTE(s3, s_i1)], 16) ^
00332         rotrFixed(Td0[GETBYTE(s2, s_i0)], 24);
00333     t2 ^=
00334         Td0[GETBYTE(s2, s_i3)] ^
00335         rotrFixed(Td0[GETBYTE(s1, s_i2)], 8) ^
00336         rotrFixed(Td0[GETBYTE(s0, s_i1)], 16) ^
00337         rotrFixed(Td0[GETBYTE(s3, s_i0)], 24);
00338     t3 ^=
00339         Td0[GETBYTE(s3, s_i3)] ^
00340         rotrFixed(Td0[GETBYTE(s2, s_i2)], 8) ^
00341         rotrFixed(Td0[GETBYTE(s1, s_i1)], 16) ^
00342         rotrFixed(Td0[GETBYTE(s0, s_i0)], 24);
00343 
00344         // Nr - 2 full rounds:
00345     unsigned int r = m_rounds/2 - 1;
00346     do
00347         {
00348         s0 =
00349             Td0[GETBYTE(t0, 3)] ^
00350             Td1[GETBYTE(t3, 2)] ^
00351             Td2[GETBYTE(t2, 1)] ^
00352             Td3[GETBYTE(t1, 0)] ^
00353             rk[0];
00354         s1 =
00355             Td0[GETBYTE(t1, 3)] ^
00356             Td1[GETBYTE(t0, 2)] ^
00357             Td2[GETBYTE(t3, 1)] ^
00358             Td3[GETBYTE(t2, 0)] ^
00359             rk[1];
00360         s2 =
00361             Td0[GETBYTE(t2, 3)] ^
00362             Td1[GETBYTE(t1, 2)] ^
00363             Td2[GETBYTE(t0, 1)] ^
00364             Td3[GETBYTE(t3, 0)] ^
00365             rk[2];
00366         s3 =
00367             Td0[GETBYTE(t3, 3)] ^
00368             Td1[GETBYTE(t2, 2)] ^
00369             Td2[GETBYTE(t1, 1)] ^
00370             Td3[GETBYTE(t0, 0)] ^
00371             rk[3];
00372 
00373         t0 =
00374             Td0[GETBYTE(s0, 3)] ^
00375             Td1[GETBYTE(s3, 2)] ^
00376             Td2[GETBYTE(s2, 1)] ^
00377             Td3[GETBYTE(s1, 0)] ^
00378             rk[4];
00379         t1 =
00380             Td0[GETBYTE(s1, 3)] ^
00381             Td1[GETBYTE(s0, 2)] ^
00382             Td2[GETBYTE(s3, 1)] ^
00383             Td3[GETBYTE(s2, 0)] ^
00384             rk[5];
00385         t2 =
00386             Td0[GETBYTE(s2, 3)] ^
00387             Td1[GETBYTE(s1, 2)] ^
00388             Td2[GETBYTE(s0, 1)] ^
00389             Td3[GETBYTE(s3, 0)] ^
00390             rk[6];
00391         t3 =
00392             Td0[GETBYTE(s3, 3)] ^
00393             Td1[GETBYTE(s2, 2)] ^
00394             Td2[GETBYTE(s1, 1)] ^
00395             Td3[GETBYTE(s0, 0)] ^
00396             rk[7];
00397 
00398         rk += 8;
00399     } while (--r);
00400 
00401         // timing attack countermeasure. see comments at top for more details
00402         u = 0;
00403         for (i=0; i<sizeof(Sd)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
00404                 u &= (((word32*)Sd)[i+0*s_lineSizeDiv4] & ((word32*)Sd)[i+2*s_lineSizeDiv4]) & (((word32*)Sd)[i+1*s_lineSizeDiv4] & ((word32*)Sd)[i+3*s_lineSizeDiv4]);
00405         t0 |= u; t1 |= u; t2 |= u; t3 |= u;
00406 
00407         word32 tbw[4];
00408         byte *const tempBlock = (byte *)tbw;
00409         word32 *const obw = (word32 *)outBlock;
00410         const word32 *const xbw = (const word32 *)xorBlock;
00411 
00412         // last round
00413         tempBlock[0] = Sd[GETBYTE(t0, 3)];
00414         tempBlock[1] = Sd[GETBYTE(t3, 2)];
00415         tempBlock[2] = Sd[GETBYTE(t2, 1)];
00416         tempBlock[3] = Sd[GETBYTE(t1, 0)];
00417         tempBlock[4] = Sd[GETBYTE(t1, 3)];
00418         tempBlock[5] = Sd[GETBYTE(t0, 2)];
00419         tempBlock[6] = Sd[GETBYTE(t3, 1)];
00420         tempBlock[7] = Sd[GETBYTE(t2, 0)];
00421         tempBlock[8] = Sd[GETBYTE(t2, 3)];
00422         tempBlock[9] = Sd[GETBYTE(t1, 2)];
00423         tempBlock[10] = Sd[GETBYTE(t0, 1)];
00424         tempBlock[11] = Sd[GETBYTE(t3, 0)];
00425         tempBlock[12] = Sd[GETBYTE(t3, 3)];
00426         tempBlock[13] = Sd[GETBYTE(t2, 2)];
00427         tempBlock[14] = Sd[GETBYTE(t1, 1)];
00428         tempBlock[15] = Sd[GETBYTE(t0, 0)];
00429 
00430         if (xbw)
00431         {
00432                 obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
00433                 obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
00434                 obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
00435                 obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
00436         }
00437         else
00438         {
00439                 obw[0] = tbw[0] ^ rk[0];
00440                 obw[1] = tbw[1] ^ rk[1];
00441                 obw[2] = tbw[2] ^ rk[2];
00442                 obw[3] = tbw[3] ^ rk[3];
00443         }
00444 }
00445 
00446 NAMESPACE_END
00447 
00448 #endif

Generated on Sat Dec 23 02:07:09 2006 for Crypto++ by  doxygen 1.5.1-p1