rijndael.cpp

00001 // rijndael.cpp - modified by Chris Morgan <cmorgan@wpi.edu>
00002 // and Wei Dai from Paulo Baretto's Rijndael implementation
00003 // The original code and all modifications are in the public domain.
00004 
00005 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM rijndael.cpp" to generate MASM code
00006 
00007 /*
00008 Defense against timing attacks was added in July 2006 by Wei Dai.
00009 
00010 The code now uses smaller tables in the first and last rounds,
00011 and preloads them into L1 cache before usage (by loading at least 
00012 one element in each cache line). 
00013 
00014 We try to delay subsequent accesses to each table (used in the first 
00015 and last rounds) until all of the table has been preloaded. Hopefully
00016 the compiler isn't smart enough to optimize that code away.
00017 
00018 After preloading the table, we also try not to access any memory location
00019 other than the table and the stack, in order to prevent table entries from 
00020 being unloaded from L1 cache, until that round is finished.
00021 (Some popular CPUs have 2-way associative caches.)
00022 */
00023 
00024 // This is the original introductory comment:
00025 
00026 /**
00027  * version 3.0 (December 2000)
00028  *
00029  * Optimised ANSI C code for the Rijndael cipher (now AES)
00030  *
00031  * author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
00032  * author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
00033  * author Paulo Barreto <paulo.barreto@terra.com.br>
00034  *
00035  * This code is hereby placed in the public domain.
00036  *
00037  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
00038  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00039  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00040  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
00041  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00042  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00043  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
00044  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
00045  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
00046  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
00047  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00048  */
00049 
00050 #include "pch.h"
00051 
00052 #ifndef CRYPTOPP_IMPORTS
00053 #ifndef CRYPTOPP_GENERATE_X64_MASM
00054 
00055 #include "rijndael.h"
00056 #include "misc.h"
00057 #include "cpu.h"
00058 
00059 NAMESPACE_BEGIN(CryptoPP)
00060 
00061 void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, const NameValuePairs &)
00062 {
00063         AssertValidKeyLength(keylen);
00064 
00065         m_rounds = keylen/4 + 6;
00066         m_key.New(4*(m_rounds+1));
00067 
00068         word32 temp, *rk = m_key;
00069         const word32 *rc = rcon;
00070 
00071         GetUserKey(BIG_ENDIAN_ORDER, rk, keylen/4, userKey, keylen);
00072 
00073         while (true)
00074         {
00075                 temp  = rk[keylen/4-1];
00076                 rk[keylen/4] = rk[0] ^
00077                         (word32(Se[GETBYTE(temp, 2)]) << 24) ^
00078                         (word32(Se[GETBYTE(temp, 1)]) << 16) ^
00079                         (word32(Se[GETBYTE(temp, 0)]) << 8) ^
00080                         Se[GETBYTE(temp, 3)] ^
00081                         *(rc++);
00082                 rk[keylen/4+1] = rk[1] ^ rk[keylen/4];
00083                 rk[keylen/4+2] = rk[2] ^ rk[keylen/4+1];
00084                 rk[keylen/4+3] = rk[3] ^ rk[keylen/4+2];
00085 
00086                 if (rk + keylen/4 + 4 == m_key.end())
00087                         break;
00088 
00089                 if (keylen == 24)
00090                 {
00091                         rk[10] = rk[ 4] ^ rk[ 9];
00092                         rk[11] = rk[ 5] ^ rk[10];
00093                 }
00094                 else if (keylen == 32)
00095                 {
00096                 temp = rk[11];
00097                 rk[12] = rk[ 4] ^
00098                                 (word32(Se[GETBYTE(temp, 3)]) << 24) ^
00099                                 (word32(Se[GETBYTE(temp, 2)]) << 16) ^
00100                                 (word32(Se[GETBYTE(temp, 1)]) << 8) ^
00101                                 Se[GETBYTE(temp, 0)];
00102                 rk[13] = rk[ 5] ^ rk[12];
00103                 rk[14] = rk[ 6] ^ rk[13];
00104                 rk[15] = rk[ 7] ^ rk[14];
00105                 }
00106                 rk += keylen/4;
00107         }
00108 
00109         if (!IsForwardTransformation())
00110         {
00111                 unsigned int i, j;
00112                 rk = m_key;
00113 
00114                 /* invert the order of the round keys: */
00115                 for (i = 0, j = 4*m_rounds; i < j; i += 4, j -= 4) {
00116                         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
00117                         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
00118                         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
00119                         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
00120                 }
00121                 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
00122                 for (i = 1; i < m_rounds; i++) {
00123                         rk += 4;
00124                         rk[0] =
00125                                 Td[0*256+Se[GETBYTE(rk[0], 3)]] ^
00126                                 Td[1*256+Se[GETBYTE(rk[0], 2)]] ^
00127                                 Td[2*256+Se[GETBYTE(rk[0], 1)]] ^
00128                                 Td[3*256+Se[GETBYTE(rk[0], 0)]];
00129                         rk[1] =
00130                                 Td[0*256+Se[GETBYTE(rk[1], 3)]] ^
00131                                 Td[1*256+Se[GETBYTE(rk[1], 2)]] ^
00132                                 Td[2*256+Se[GETBYTE(rk[1], 1)]] ^
00133                                 Td[3*256+Se[GETBYTE(rk[1], 0)]];
00134                         rk[2] =
00135                                 Td[0*256+Se[GETBYTE(rk[2], 3)]] ^
00136                                 Td[1*256+Se[GETBYTE(rk[2], 2)]] ^
00137                                 Td[2*256+Se[GETBYTE(rk[2], 1)]] ^
00138                                 Td[3*256+Se[GETBYTE(rk[2], 0)]];
00139                         rk[3] =
00140                                 Td[0*256+Se[GETBYTE(rk[3], 3)]] ^
00141                                 Td[1*256+Se[GETBYTE(rk[3], 2)]] ^
00142                                 Td[2*256+Se[GETBYTE(rk[3], 1)]] ^
00143                                 Td[3*256+Se[GETBYTE(rk[3], 0)]];
00144                 }
00145         }
00146 
00147         ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key.begin(), m_key.begin(), 16);
00148         ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16);
00149 }
00150 
00151 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00152 extern "C" {
00153 void Rijndael_Enc_ProcessAndXorBlock(const word32 *table, word32 cacheLineSize, const word32 *k, const word32 *kLoopEnd, const byte *inBlock, const byte *xorBlock, byte *outBlock);
00154 }
00155 #endif
00156 
00157 #pragma warning(disable: 4731)  // frame pointer register 'ebp' modified by inline assembly code
00158 
00159 void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
00160 {
00161 #endif  // #ifdef CRYPTOPP_GENERATE_X64_MASM
00162 
00163 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00164         Rijndael_Enc_ProcessAndXorBlock(Te, g_cacheLineSize, m_key, m_key + m_rounds*4, inBlock, xorBlock, outBlock);
00165         return;
00166 #endif
00167 
00168 #if defined(CRYPTOPP_X86_ASM_AVAILABLE)
00169         #ifdef CRYPTOPP_GENERATE_X64_MASM
00170                 ALIGN   8
00171         Rijndael_Enc_ProcessAndXorBlock PROC FRAME
00172                 rex_push_reg rbx
00173                 push_reg rsi
00174                 push_reg rdi
00175                 push_reg r12
00176                 push_reg r13
00177                 push_reg r14
00178                 push_reg r15
00179                 .endprolog
00180                 mov             AS_REG_7, rcx
00181                 mov             rdi, [rsp + 5*8 + 7*8]                  ; inBlock
00182         #else
00183         if (HasMMX())
00184         {
00185                 const word32 *k = m_key;
00186                 const word32 *kLoopEnd = k + m_rounds*4;
00187         #endif
00188 
00189                 #if CRYPTOPP_BOOL_X64
00190                         #define K_REG                   r8
00191                         #define K_END_REG               r9
00192                         #define SAVE_K
00193                         #define RESTORE_K
00194                         #define RESTORE_K_END
00195                         #define SAVE_0(x)               AS2(mov r13d, x)
00196                         #define SAVE_1(x)               AS2(mov r14d, x)
00197                         #define SAVE_2(x)               AS2(mov r15d, x)
00198                         #define RESTORE_0(x)    AS2(mov x, r13d)
00199                         #define RESTORE_1(x)    AS2(mov x, r14d)
00200                         #define RESTORE_2(x)    AS2(mov x, r15d)
00201                 #else
00202                         #define K_REG                   esi
00203                         #define K_END_REG               edi
00204                         #define SAVE_K                  AS2(movd        mm4, esi)
00205                         #define RESTORE_K               AS2(movd        esi, mm4)
00206                         #define RESTORE_K_END   AS2(movd        edi, mm5)
00207                         #define SAVE_0(x)               AS2(movd        mm0, x)
00208                         #define SAVE_1(x)               AS2(movd        mm1, x)
00209                         #define SAVE_2(x)               AS2(movd        mm2, x)
00210                         #define RESTORE_0(x)    AS2(movd        x, mm0)
00211                         #define RESTORE_1(x)    AS2(movd        x, mm1)
00212                         #define RESTORE_2(x)    AS2(movd        x, mm2)
00213                 #endif
00214 #ifdef __GNUC__
00215                 word32 t0, t1, t2, t3;
00216                 __asm__ __volatile__
00217                 (
00218                 ".intel_syntax noprefix;"
00219         #if CRYPTOPP_BOOL_X64
00220                 AS2(    mov             K_REG, rsi)
00221                 AS2(    mov             K_END_REG, rcx)
00222         #else
00223                 AS1(    push    ebx)
00224                 AS1(    push    ebp)
00225                 AS2(    movd    mm5, ecx)
00226         #endif
00227                 AS2(    mov             AS_REG_7, WORD_REG(ax))
00228 #elif CRYPTOPP_BOOL_X86
00229         #if _MSC_VER < 1300
00230                 const word32 *t = Te;
00231                 AS2(    mov             eax, t)
00232         #endif
00233                 AS2(    mov             edx, g_cacheLineSize)
00234                 AS2(    mov             WORD_REG(di), inBlock)
00235                 AS2(    mov             K_REG, k)
00236                 AS2(    movd    mm5, kLoopEnd)
00237         #if _MSC_VER < 1300
00238                 AS1(    push    ebx)
00239                 AS1(    push    ebp)
00240                 AS2(    mov             AS_REG_7, eax)
00241         #else
00242                 AS1(    push    ebp)
00243                 AS2(    lea             AS_REG_7, Te)
00244         #endif
00245 #endif
00246                 AS2(    mov             eax, [K_REG+0*4])       // s0
00247                 AS2(    xor             eax, [WORD_REG(di)+0*4])
00248                 SAVE_0(eax)
00249                 AS2(    mov             ebx, [K_REG+1*4])
00250                 AS2(    xor             ebx, [WORD_REG(di)+1*4])
00251                 SAVE_1(ebx)
00252                 AS2(    and             ebx, eax)
00253                 AS2(    mov             eax, [K_REG+2*4])
00254                 AS2(    xor             eax, [WORD_REG(di)+2*4])
00255                 SAVE_2(eax)
00256                 AS2(    and             ebx, eax)
00257                 AS2(    mov             ecx, [K_REG+3*4])
00258                 AS2(    xor             ecx, [WORD_REG(di)+3*4])
00259                 AS2(    and             ebx, ecx)
00260 
00261                 // read Te0 into L1 cache. this code could be simplifed by using lfence, but that is an SSE2 instruction
00262                 AS2(    and             ebx, 0)
00263                 AS2(    mov             edi, ebx)       // make index depend on previous loads to simulate lfence
00264                 ASL(2)
00265                 AS2(    and             ebx, [AS_REG_7+WORD_REG(di)])
00266                 AS2(    add             edi, edx)
00267                 AS2(    and             ebx, [AS_REG_7+WORD_REG(di)])
00268                 AS2(    add             edi, edx)
00269                 AS2(    and             ebx, [AS_REG_7+WORD_REG(di)])
00270                 AS2(    add             edi, edx)
00271                 AS2(    and             ebx, [AS_REG_7+WORD_REG(di)])
00272                 AS2(    add             edi, edx)
00273                 AS2(    cmp             edi, 1024)
00274                 ASJ(    jl,             2, b)
00275                 AS2(    and             ebx, [AS_REG_7+1020])
00276 #if CRYPTOPP_BOOL_X64
00277                 AS2(    xor             r13d, ebx)
00278                 AS2(    xor             r14d, ebx)
00279                 AS2(    xor             r15d, ebx)
00280 #else
00281                 AS2(    movd    mm6, ebx)
00282                 AS2(    pxor    mm2, mm6)
00283                 AS2(    pxor    mm1, mm6)
00284                 AS2(    pxor    mm0, mm6)
00285 #endif
00286                 AS2(    xor             ecx, ebx)
00287 
00288                 AS2(    mov             edi, [K_REG+4*4])       // t0
00289                 AS2(    mov             eax, [K_REG+5*4])
00290                 AS2(    mov             ebx, [K_REG+6*4])
00291                 AS2(    mov             edx, [K_REG+7*4])
00292                 AS2(    add             K_REG, 8*4)
00293                 SAVE_K
00294 
00295 #define QUARTER_ROUND(t, a, b, c, d)    \
00296         AS2(movzx esi, t##l)\
00297         AS2(d, [AS_REG_7+0*1024+4*WORD_REG(si)])\
00298         AS2(movzx esi, t##h)\
00299         AS2(c, [AS_REG_7+1*1024+4*WORD_REG(si)])\
00300         AS2(shr e##t##x, 16)\
00301         AS2(movzx esi, t##l)\
00302         AS2(b, [AS_REG_7+2*1024+4*WORD_REG(si)])\
00303         AS2(movzx esi, t##h)\
00304         AS2(a, [AS_REG_7+3*1024+4*WORD_REG(si)])
00305 
00306 #define s0              xor edi
00307 #define s1              xor eax
00308 #define s2              xor ebx
00309 #define s3              xor ecx
00310 #define t0              xor edi
00311 #define t1              xor eax
00312 #define t2              xor ebx
00313 #define t3              xor edx
00314 
00315                 QUARTER_ROUND(c, t0, t1, t2, t3)
00316                 RESTORE_2(ecx)
00317                 QUARTER_ROUND(c, t3, t0, t1, t2)
00318                 RESTORE_1(ecx)
00319                 QUARTER_ROUND(c, t2, t3, t0, t1)
00320                 RESTORE_0(ecx)
00321                 QUARTER_ROUND(c, t1, t2, t3, t0)
00322                 SAVE_2(ebx)
00323                 SAVE_1(eax)
00324                 SAVE_0(edi)
00325 #undef QUARTER_ROUND
00326 
00327                 RESTORE_K
00328 
00329                 ASL(0)
00330                 AS2(    mov             edi, [K_REG+0*4])
00331                 AS2(    mov             eax, [K_REG+1*4])
00332                 AS2(    mov             ebx, [K_REG+2*4])
00333                 AS2(    mov             ecx, [K_REG+3*4])
00334 
00335 #define QUARTER_ROUND(t, a, b, c, d)    \
00336         AS2(movzx esi, t##l)\
00337         AS2(a, [AS_REG_7+3*1024+4*WORD_REG(si)])\
00338         AS2(movzx esi, t##h)\
00339         AS2(b, [AS_REG_7+2*1024+4*WORD_REG(si)])\
00340         AS2(shr e##t##x, 16)\
00341         AS2(movzx esi, t##l)\
00342         AS2(c, [AS_REG_7+1*1024+4*WORD_REG(si)])\
00343         AS2(movzx esi, t##h)\
00344         AS2(d, [AS_REG_7+0*1024+4*WORD_REG(si)])
00345 
00346                 QUARTER_ROUND(d, s0, s1, s2, s3)
00347                 RESTORE_2(edx)
00348                 QUARTER_ROUND(d, s3, s0, s1, s2)
00349                 RESTORE_1(edx)
00350                 QUARTER_ROUND(d, s2, s3, s0, s1)
00351                 RESTORE_0(edx)
00352                 QUARTER_ROUND(d, s1, s2, s3, s0)
00353                 RESTORE_K
00354                 SAVE_2(ebx)
00355                 SAVE_1(eax)
00356                 SAVE_0(edi)
00357 
00358                 AS2(    mov             edi, [K_REG+4*4])
00359                 AS2(    mov             eax, [K_REG+5*4])
00360                 AS2(    mov             ebx, [K_REG+6*4])
00361                 AS2(    mov             edx, [K_REG+7*4])
00362 
00363                 QUARTER_ROUND(c, t0, t1, t2, t3)
00364                 RESTORE_2(ecx)
00365                 QUARTER_ROUND(c, t3, t0, t1, t2)
00366                 RESTORE_1(ecx)
00367                 QUARTER_ROUND(c, t2, t3, t0, t1)
00368                 RESTORE_0(ecx)
00369                 QUARTER_ROUND(c, t1, t2, t3, t0)
00370                 SAVE_2(ebx)
00371                 SAVE_1(eax)
00372                 SAVE_0(edi)
00373 
00374                 RESTORE_K
00375                 RESTORE_K_END
00376                 AS2(    add             K_REG, 8*4)
00377                 SAVE_K
00378                 AS2(    cmp             K_END_REG, K_REG)
00379                 ASJ(    jne,    0, b)
00380 
00381 #undef QUARTER_ROUND
00382 #undef s0
00383 #undef s1
00384 #undef s2
00385 #undef s3
00386 #undef t0
00387 #undef t1
00388 #undef t2
00389 #undef t3
00390 
00391                 AS2(    mov             eax, [K_END_REG+0*4])
00392                 AS2(    mov             ecx, [K_END_REG+1*4])
00393                 AS2(    mov             esi, [K_END_REG+2*4])
00394                 AS2(    mov             edi, [K_END_REG+3*4])
00395 
00396 #define QUARTER_ROUND(a, b, c, d)       \
00397         AS2(    movzx   ebx, dl)\
00398         AS2(    movzx   ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\
00399         AS2(    shl             ebx, 3*8)\
00400         AS2(    xor             a, ebx)\
00401         AS2(    movzx   ebx, dh)\
00402         AS2(    movzx   ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\
00403         AS2(    shl             ebx, 2*8)\
00404         AS2(    xor             b, ebx)\
00405         AS2(    shr             edx, 16)\
00406         AS2(    movzx   ebx, dl)\
00407         AS2(    shr             edx, 8)\
00408         AS2(    movzx   ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\
00409         AS2(    shl             ebx, 1*8)\
00410         AS2(    xor             c, ebx)\
00411         AS2(    movzx   ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(dx)])\
00412         AS2(    xor             d, ebx)
00413 
00414                 QUARTER_ROUND(eax, ecx, esi, edi)
00415                 RESTORE_2(edx)
00416                 QUARTER_ROUND(edi, eax, ecx, esi)
00417                 RESTORE_1(edx)
00418                 QUARTER_ROUND(esi, edi, eax, ecx)
00419                 RESTORE_0(edx)
00420                 QUARTER_ROUND(ecx, esi, edi, eax)
00421 
00422 #undef QUARTER_ROUND
00423 
00424 #if CRYPTOPP_BOOL_X86
00425                 AS1(emms)
00426                 AS1(pop         ebp)
00427         #if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER < 1300)
00428                 AS1(pop         ebx)
00429         #endif
00430 #endif
00431 
00432 #ifdef __GNUC__
00433                 ".att_syntax prefix;"
00434                         : "=a" (t0), "=c" (t1), "=S" (t2), "=D" (t3)
00435                         : "a" (Te), "D" (inBlock), "S" (k), "c" (kLoopEnd), "d" (g_cacheLineSize)
00436                         : "memory", "cc"
00437         #if CRYPTOPP_BOOL_X64
00438                         , "%ebx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
00439         #endif
00440                 );
00441 
00442                 if (xorBlock)
00443                 {
00444                         t0 ^= ((const word32 *)xorBlock)[0];
00445                         t1 ^= ((const word32 *)xorBlock)[1];
00446                         t2 ^= ((const word32 *)xorBlock)[2];
00447                         t3 ^= ((const word32 *)xorBlock)[3];
00448                 }
00449                 ((word32 *)outBlock)[0] = t0;
00450                 ((word32 *)outBlock)[1] = t1;
00451                 ((word32 *)outBlock)[2] = t2;
00452                 ((word32 *)outBlock)[3] = t3;
00453 #else
00454         #if CRYPTOPP_BOOL_X64
00455                 mov             rbx, [rsp + 6*8 + 7*8]                  ; xorBlock
00456         #else
00457                 AS2(    mov             ebx, xorBlock)
00458         #endif
00459                 AS2(    test    WORD_REG(bx), WORD_REG(bx))
00460                 ASJ(    jz,             1, f)
00461                 AS2(    xor             eax, [WORD_REG(bx)+0*4])
00462                 AS2(    xor             ecx, [WORD_REG(bx)+1*4])
00463                 AS2(    xor             esi, [WORD_REG(bx)+2*4])
00464                 AS2(    xor             edi, [WORD_REG(bx)+3*4])
00465                 ASL(1)
00466         #if CRYPTOPP_BOOL_X64
00467                 mov             rbx, [rsp + 7*8 + 7*8]                  ; outBlock
00468         #else
00469                 AS2(    mov             ebx, outBlock)
00470         #endif
00471                 AS2(    mov             [WORD_REG(bx)+0*4], eax)
00472                 AS2(    mov             [WORD_REG(bx)+1*4], ecx)
00473                 AS2(    mov             [WORD_REG(bx)+2*4], esi)
00474                 AS2(    mov             [WORD_REG(bx)+3*4], edi)
00475 #endif
00476 
00477 #if CRYPTOPP_GENERATE_X64_MASM
00478                 pop r15
00479                 pop r14
00480                 pop r13
00481                 pop r12
00482                 pop rdi
00483                 pop rsi
00484                 pop rbx
00485                 ret
00486         Rijndael_Enc_ProcessAndXorBlock ENDP
00487 #else
00488         }
00489         else
00490 #endif
00491 #endif  // #ifdef CRYPTOPP_X86_ASM_AVAILABLE
00492 #ifndef CRYPTOPP_GENERATE_X64_MASM
00493         {
00494         word32 s0, s1, s2, s3, t0, t1, t2, t3;
00495         const word32 *rk = m_key;
00496 
00497         s0 = ((const word32 *)inBlock)[0] ^ rk[0];
00498         s1 = ((const word32 *)inBlock)[1] ^ rk[1];
00499         s2 = ((const word32 *)inBlock)[2] ^ rk[2];
00500         s3 = ((const word32 *)inBlock)[3] ^ rk[3];
00501         t0 = rk[4];
00502         t1 = rk[5];
00503         t2 = rk[6];
00504         t3 = rk[7];
00505         rk += 8;
00506 
00507         // timing attack countermeasure. see comments at top for more details
00508         const int cacheLineSize = GetCacheLineSize();
00509         unsigned int i;
00510         word32 u = 0;
00511         for (i=0; i<1024; i+=cacheLineSize)
00512                 u &= *(const word32 *)(((const byte *)Te)+i);
00513         u &= Te[255];
00514         s0 |= u; s1 |= u; s2 |= u; s3 |= u;
00515 
00516         // first round
00517 #ifdef IS_BIG_ENDIAN
00518 #define QUARTER_ROUND(t, a, b, c, d)    \
00519                 a ^= rotrFixed(Te[byte(t)], 24);        t >>= 8;\
00520                 b ^= rotrFixed(Te[byte(t)], 16);        t >>= 8;\
00521                 c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
00522                 d ^= Te[t];
00523 #else
00524 #define QUARTER_ROUND(t, a, b, c, d)    \
00525                 d ^= Te[byte(t)];                                       t >>= 8;\
00526                 c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
00527                 b ^= rotrFixed(Te[byte(t)], 16);        t >>= 8;\
00528                 a ^= rotrFixed(Te[t], 24);
00529 #endif
00530 
00531         QUARTER_ROUND(s3, t0, t1, t2, t3)
00532         QUARTER_ROUND(s2, t3, t0, t1, t2)
00533         QUARTER_ROUND(s1, t2, t3, t0, t1)
00534         QUARTER_ROUND(s0, t1, t2, t3, t0)
00535 #undef QUARTER_ROUND
00536 
00537         // Nr - 2 full rounds:
00538     unsigned int r = m_rounds/2 - 1;
00539     do
00540         {
00541 #define QUARTER_ROUND(t, a, b, c, d)    \
00542                 a ^= Te[3*256+byte(t)]; t >>= 8;\
00543                 b ^= Te[2*256+byte(t)]; t >>= 8;\
00544                 c ^= Te[1*256+byte(t)]; t >>= 8;\
00545                 d ^= Te[t];
00546 
00547                 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
00548 
00549                 QUARTER_ROUND(t3, s0, s1, s2, s3)
00550                 QUARTER_ROUND(t2, s3, s0, s1, s2)
00551                 QUARTER_ROUND(t1, s2, s3, s0, s1)
00552                 QUARTER_ROUND(t0, s1, s2, s3, s0)
00553 
00554                 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
00555 
00556                 QUARTER_ROUND(s3, t0, t1, t2, t3)
00557                 QUARTER_ROUND(s2, t3, t0, t1, t2)
00558                 QUARTER_ROUND(s1, t2, t3, t0, t1)
00559                 QUARTER_ROUND(s0, t1, t2, t3, t0)
00560 #undef QUARTER_ROUND
00561 
00562         rk += 8;
00563     } while (--r);
00564 
00565         // timing attack countermeasure. see comments at top for more details
00566         u = 0;
00567         for (i=0; i<256; i+=cacheLineSize)
00568                 u &= *(const word32 *)(Se+i);
00569         u &= *(const word32 *)(Se+252);
00570         t0 |= u; t1 |= u; t2 |= u; t3 |= u;
00571 
00572         word32 tbw[4];
00573         byte *const tempBlock = (byte *)tbw;
00574         word32 *const obw = (word32 *)outBlock;
00575         const word32 *const xbw = (const word32 *)xorBlock;
00576 
00577 #define QUARTER_ROUND(t, a, b, c, d)    \
00578         tempBlock[a] = Se[byte(t)]; t >>= 8;\
00579         tempBlock[b] = Se[byte(t)]; t >>= 8;\
00580         tempBlock[c] = Se[byte(t)]; t >>= 8;\
00581         tempBlock[d] = Se[t];
00582 
00583         QUARTER_ROUND(t2, 15, 2, 5, 8)
00584         QUARTER_ROUND(t1, 11, 14, 1, 4)
00585         QUARTER_ROUND(t0, 7, 10, 13, 0)
00586         QUARTER_ROUND(t3, 3, 6, 9, 12)
00587 #undef QUARTER_ROUND
00588 
00589         if (xbw)
00590         {
00591                 obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
00592                 obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
00593                 obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
00594                 obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
00595         }
00596         else
00597         {
00598                 obw[0] = tbw[0] ^ rk[0];
00599                 obw[1] = tbw[1] ^ rk[1];
00600                 obw[2] = tbw[2] ^ rk[2];
00601                 obw[3] = tbw[3] ^ rk[3];
00602         }
00603         }
00604 }
00605 
00606 void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
00607 {
00608         word32 s0, s1, s2, s3, t0, t1, t2, t3;
00609         const word32 *rk = m_key;
00610 
00611         s0 = ((const word32 *)inBlock)[0] ^ rk[0];
00612         s1 = ((const word32 *)inBlock)[1] ^ rk[1];
00613         s2 = ((const word32 *)inBlock)[2] ^ rk[2];
00614         s3 = ((const word32 *)inBlock)[3] ^ rk[3];
00615         t0 = rk[4];
00616         t1 = rk[5];
00617         t2 = rk[6];
00618         t3 = rk[7];
00619         rk += 8;
00620 
00621         // timing attack countermeasure. see comments at top for more details
00622         const int cacheLineSize = GetCacheLineSize();
00623         unsigned int i;
00624         word32 u = 0;
00625         for (i=0; i<1024; i+=cacheLineSize)
00626                 u &= *(const word32 *)(((const byte *)Td)+i);
00627         u &= Td[255];
00628         s0 |= u; s1 |= u; s2 |= u; s3 |= u;
00629 
00630         // first round
00631 #ifdef IS_BIG_ENDIAN
00632 #define QUARTER_ROUND(t, a, b, c, d)    \
00633                 a ^= rotrFixed(Td[byte(t)], 24);        t >>= 8;\
00634                 b ^= rotrFixed(Td[byte(t)], 16);        t >>= 8;\
00635                 c ^= rotrFixed(Td[byte(t)], 8);         t >>= 8;\
00636                 d ^= Td[t];
00637 #else
00638 #define QUARTER_ROUND(t, a, b, c, d)    \
00639                 d ^= Td[byte(t)];                                       t >>= 8;\
00640                 c ^= rotrFixed(Td[byte(t)], 8);         t >>= 8;\
00641                 b ^= rotrFixed(Td[byte(t)], 16);        t >>= 8;\
00642                 a ^= rotrFixed(Td[t], 24);
00643 #endif
00644 
00645         QUARTER_ROUND(s3, t2, t1, t0, t3)
00646         QUARTER_ROUND(s2, t1, t0, t3, t2)
00647         QUARTER_ROUND(s1, t0, t3, t2, t1)
00648         QUARTER_ROUND(s0, t3, t2, t1, t0)
00649 #undef QUARTER_ROUND
00650 
00651         // Nr - 2 full rounds:
00652     unsigned int r = m_rounds/2 - 1;
00653     do
00654         {
00655 #define QUARTER_ROUND(t, a, b, c, d)    \
00656                 a ^= Td[3*256+byte(t)]; t >>= 8;\
00657                 b ^= Td[2*256+byte(t)]; t >>= 8;\
00658                 c ^= Td[1*256+byte(t)]; t >>= 8;\
00659                 d ^= Td[t];
00660 
00661                 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
00662 
00663                 QUARTER_ROUND(t3, s2, s1, s0, s3)
00664                 QUARTER_ROUND(t2, s1, s0, s3, s2)
00665                 QUARTER_ROUND(t1, s0, s3, s2, s1)
00666                 QUARTER_ROUND(t0, s3, s2, s1, s0)
00667 
00668                 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
00669 
00670                 QUARTER_ROUND(s3, t2, t1, t0, t3)
00671                 QUARTER_ROUND(s2, t1, t0, t3, t2)
00672                 QUARTER_ROUND(s1, t0, t3, t2, t1)
00673                 QUARTER_ROUND(s0, t3, t2, t1, t0)
00674 #undef QUARTER_ROUND
00675 
00676         rk += 8;
00677     } while (--r);
00678 
00679         // timing attack countermeasure. see comments at top for more details
00680         u = 0;
00681         for (i=0; i<256; i+=cacheLineSize)
00682                 u &= *(const word32 *)(Sd+i);
00683         u &= *(const word32 *)(Sd+252);
00684         t0 |= u; t1 |= u; t2 |= u; t3 |= u;
00685 
00686         word32 tbw[4];
00687         byte *const tempBlock = (byte *)tbw;
00688         word32 *const obw = (word32 *)outBlock;
00689         const word32 *const xbw = (const word32 *)xorBlock;
00690 
00691 #define QUARTER_ROUND(t, a, b, c, d)    \
00692         tempBlock[a] = Sd[byte(t)]; t >>= 8;\
00693         tempBlock[b] = Sd[byte(t)]; t >>= 8;\
00694         tempBlock[c] = Sd[byte(t)]; t >>= 8;\
00695         tempBlock[d] = Sd[t];
00696 
00697         QUARTER_ROUND(t2, 7, 2, 13, 8)
00698         QUARTER_ROUND(t1, 3, 14, 9, 4)
00699         QUARTER_ROUND(t0, 15, 10, 5, 0)
00700         QUARTER_ROUND(t3, 11, 6, 1, 12)
00701 #undef QUARTER_ROUND
00702 
00703         if (xbw)
00704         {
00705                 obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
00706                 obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
00707                 obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
00708                 obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
00709         }
00710         else
00711         {
00712                 obw[0] = tbw[0] ^ rk[0];
00713                 obw[1] = tbw[1] ^ rk[1];
00714                 obw[2] = tbw[2] ^ rk[2];
00715                 obw[3] = tbw[3] ^ rk[3];
00716         }
00717 }
00718 
00719 NAMESPACE_END
00720 
00721 #endif
00722 #endif

Generated on Fri Feb 6 00:56:25 2009 for Crypto++ by  doxygen 1.4.7