00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050 #include "pch.h"
00051
00052 #ifndef CRYPTOPP_IMPORTS
00053 #ifndef CRYPTOPP_GENERATE_X64_MASM
00054
00055 #include "rijndael.h"
00056 #include "misc.h"
00057 #include "cpu.h"
00058
00059 NAMESPACE_BEGIN(CryptoPP)
00060
00061 void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, const NameValuePairs &)
00062 {
00063 AssertValidKeyLength(keylen);
00064
00065 m_rounds = keylen/4 + 6;
00066 m_key.New(4*(m_rounds+1));
00067
00068 word32 temp, *rk = m_key;
00069 const word32 *rc = rcon;
00070
00071 GetUserKey(BIG_ENDIAN_ORDER, rk, keylen/4, userKey, keylen);
00072
00073 while (true)
00074 {
00075 temp = rk[keylen/4-1];
00076 rk[keylen/4] = rk[0] ^
00077 (word32(Se[GETBYTE(temp, 2)]) << 24) ^
00078 (word32(Se[GETBYTE(temp, 1)]) << 16) ^
00079 (word32(Se[GETBYTE(temp, 0)]) << 8) ^
00080 Se[GETBYTE(temp, 3)] ^
00081 *(rc++);
00082 rk[keylen/4+1] = rk[1] ^ rk[keylen/4];
00083 rk[keylen/4+2] = rk[2] ^ rk[keylen/4+1];
00084 rk[keylen/4+3] = rk[3] ^ rk[keylen/4+2];
00085
00086 if (rk + keylen/4 + 4 == m_key.end())
00087 break;
00088
00089 if (keylen == 24)
00090 {
00091 rk[10] = rk[ 4] ^ rk[ 9];
00092 rk[11] = rk[ 5] ^ rk[10];
00093 }
00094 else if (keylen == 32)
00095 {
00096 temp = rk[11];
00097 rk[12] = rk[ 4] ^
00098 (word32(Se[GETBYTE(temp, 3)]) << 24) ^
00099 (word32(Se[GETBYTE(temp, 2)]) << 16) ^
00100 (word32(Se[GETBYTE(temp, 1)]) << 8) ^
00101 Se[GETBYTE(temp, 0)];
00102 rk[13] = rk[ 5] ^ rk[12];
00103 rk[14] = rk[ 6] ^ rk[13];
00104 rk[15] = rk[ 7] ^ rk[14];
00105 }
00106 rk += keylen/4;
00107 }
00108
00109 if (!IsForwardTransformation())
00110 {
00111 unsigned int i, j;
00112 rk = m_key;
00113
00114
00115 for (i = 0, j = 4*m_rounds; i < j; i += 4, j -= 4) {
00116 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
00117 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
00118 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
00119 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
00120 }
00121
00122 for (i = 1; i < m_rounds; i++) {
00123 rk += 4;
00124 rk[0] =
00125 Td[0*256+Se[GETBYTE(rk[0], 3)]] ^
00126 Td[1*256+Se[GETBYTE(rk[0], 2)]] ^
00127 Td[2*256+Se[GETBYTE(rk[0], 1)]] ^
00128 Td[3*256+Se[GETBYTE(rk[0], 0)]];
00129 rk[1] =
00130 Td[0*256+Se[GETBYTE(rk[1], 3)]] ^
00131 Td[1*256+Se[GETBYTE(rk[1], 2)]] ^
00132 Td[2*256+Se[GETBYTE(rk[1], 1)]] ^
00133 Td[3*256+Se[GETBYTE(rk[1], 0)]];
00134 rk[2] =
00135 Td[0*256+Se[GETBYTE(rk[2], 3)]] ^
00136 Td[1*256+Se[GETBYTE(rk[2], 2)]] ^
00137 Td[2*256+Se[GETBYTE(rk[2], 1)]] ^
00138 Td[3*256+Se[GETBYTE(rk[2], 0)]];
00139 rk[3] =
00140 Td[0*256+Se[GETBYTE(rk[3], 3)]] ^
00141 Td[1*256+Se[GETBYTE(rk[3], 2)]] ^
00142 Td[2*256+Se[GETBYTE(rk[3], 1)]] ^
00143 Td[3*256+Se[GETBYTE(rk[3], 0)]];
00144 }
00145 }
00146
00147 ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key.begin(), m_key.begin(), 16);
00148 ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16);
00149 }
00150
00151 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00152 extern "C" {
00153 void Rijndael_Enc_ProcessAndXorBlock(const word32 *table, word32 cacheLineSize, const word32 *k, const word32 *kLoopEnd, const byte *inBlock, const byte *xorBlock, byte *outBlock);
00154 }
00155 #endif
00156
00157 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
00158
00159 void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
00160 {
00161 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
00162
00163 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00164 Rijndael_Enc_ProcessAndXorBlock(Te, g_cacheLineSize, m_key, m_key + m_rounds*4, inBlock, xorBlock, outBlock);
00165 return;
00166 #endif
00167
00168 #if defined(CRYPTOPP_X86_ASM_AVAILABLE)
00169 #ifdef CRYPTOPP_GENERATE_X64_MASM
00170 ALIGN 8
00171 Rijndael_Enc_ProcessAndXorBlock PROC FRAME
00172 rex_push_reg rbx
00173 push_reg rsi
00174 push_reg rdi
00175 push_reg r12
00176 push_reg r13
00177 push_reg r14
00178 push_reg r15
00179 .endprolog
00180 mov AS_REG_7, rcx
00181 mov rdi, [rsp + 5*8 + 7*8] ; inBlock
00182 #else
00183 if (HasMMX())
00184 {
00185 const word32 *k = m_key;
00186 const word32 *kLoopEnd = k + m_rounds*4;
00187 #endif
00188
00189 #if CRYPTOPP_BOOL_X64
00190 #define K_REG r8
00191 #define K_END_REG r9
00192 #define SAVE_K
00193 #define RESTORE_K
00194 #define RESTORE_K_END
00195 #define SAVE_0(x) AS2(mov r13d, x)
00196 #define SAVE_1(x) AS2(mov r14d, x)
00197 #define SAVE_2(x) AS2(mov r15d, x)
00198 #define RESTORE_0(x) AS2(mov x, r13d)
00199 #define RESTORE_1(x) AS2(mov x, r14d)
00200 #define RESTORE_2(x) AS2(mov x, r15d)
00201 #else
00202 #define K_REG esi
00203 #define K_END_REG edi
00204 #define SAVE_K AS2(movd mm4, esi)
00205 #define RESTORE_K AS2(movd esi, mm4)
00206 #define RESTORE_K_END AS2(movd edi, mm5)
00207 #define SAVE_0(x) AS2(movd mm0, x)
00208 #define SAVE_1(x) AS2(movd mm1, x)
00209 #define SAVE_2(x) AS2(movd mm2, x)
00210 #define RESTORE_0(x) AS2(movd x, mm0)
00211 #define RESTORE_1(x) AS2(movd x, mm1)
00212 #define RESTORE_2(x) AS2(movd x, mm2)
00213 #endif
00214 #ifdef __GNUC__
00215 word32 t0, t1, t2, t3;
00216 __asm__ __volatile__
00217 (
00218 ".intel_syntax noprefix;"
00219 #if CRYPTOPP_BOOL_X64
00220 AS2( mov K_REG, rsi)
00221 AS2( mov K_END_REG, rcx)
00222 #else
00223 AS1( push ebx)
00224 AS1( push ebp)
00225 AS2( movd mm5, ecx)
00226 #endif
00227 AS2( mov AS_REG_7, WORD_REG(ax))
00228 #elif CRYPTOPP_BOOL_X86
00229 #if _MSC_VER < 1300
00230 const word32 *t = Te;
00231 AS2( mov eax, t)
00232 #endif
00233 AS2( mov edx, g_cacheLineSize)
00234 AS2( mov WORD_REG(di), inBlock)
00235 AS2( mov K_REG, k)
00236 AS2( movd mm5, kLoopEnd)
00237 #if _MSC_VER < 1300
00238 AS1( push ebx)
00239 AS1( push ebp)
00240 AS2( mov AS_REG_7, eax)
00241 #else
00242 AS1( push ebp)
00243 AS2( lea AS_REG_7, Te)
00244 #endif
00245 #endif
00246 AS2( mov eax, [K_REG+0*4])
00247 AS2( xor eax, [WORD_REG(di)+0*4])
00248 SAVE_0(eax)
00249 AS2( mov ebx, [K_REG+1*4])
00250 AS2( xor ebx, [WORD_REG(di)+1*4])
00251 SAVE_1(ebx)
00252 AS2( and ebx, eax)
00253 AS2( mov eax, [K_REG+2*4])
00254 AS2( xor eax, [WORD_REG(di)+2*4])
00255 SAVE_2(eax)
00256 AS2( and ebx, eax)
00257 AS2( mov ecx, [K_REG+3*4])
00258 AS2( xor ecx, [WORD_REG(di)+3*4])
00259 AS2( and ebx, ecx)
00260
00261
00262 AS2( and ebx, 0)
00263 AS2( mov edi, ebx)
00264 ASL(2)
00265 AS2( and ebx, [AS_REG_7+WORD_REG(di)])
00266 AS2( add edi, edx)
00267 AS2( and ebx, [AS_REG_7+WORD_REG(di)])
00268 AS2( add edi, edx)
00269 AS2( and ebx, [AS_REG_7+WORD_REG(di)])
00270 AS2( add edi, edx)
00271 AS2( and ebx, [AS_REG_7+WORD_REG(di)])
00272 AS2( add edi, edx)
00273 AS2( cmp edi, 1024)
00274 ASJ( jl, 2, b)
00275 AS2( and ebx, [AS_REG_7+1020])
00276 #if CRYPTOPP_BOOL_X64
00277 AS2( xor r13d, ebx)
00278 AS2( xor r14d, ebx)
00279 AS2( xor r15d, ebx)
00280 #else
00281 AS2( movd mm6, ebx)
00282 AS2( pxor mm2, mm6)
00283 AS2( pxor mm1, mm6)
00284 AS2( pxor mm0, mm6)
00285 #endif
00286 AS2( xor ecx, ebx)
00287
00288 AS2( mov edi, [K_REG+4*4])
00289 AS2( mov eax, [K_REG+5*4])
00290 AS2( mov ebx, [K_REG+6*4])
00291 AS2( mov edx, [K_REG+7*4])
00292 AS2( add K_REG, 8*4)
00293 SAVE_K
00294
00295 #define QUARTER_ROUND(t, a, b, c, d) \
00296 AS2(movzx esi, t##l)\
00297 AS2(d, [AS_REG_7+0*1024+4*WORD_REG(si)])\
00298 AS2(movzx esi, t##h)\
00299 AS2(c, [AS_REG_7+1*1024+4*WORD_REG(si)])\
00300 AS2(shr e##t##x, 16)\
00301 AS2(movzx esi, t##l)\
00302 AS2(b, [AS_REG_7+2*1024+4*WORD_REG(si)])\
00303 AS2(movzx esi, t##h)\
00304 AS2(a, [AS_REG_7+3*1024+4*WORD_REG(si)])
00305
00306 #define s0 xor edi
00307 #define s1 xor eax
00308 #define s2 xor ebx
00309 #define s3 xor ecx
00310 #define t0 xor edi
00311 #define t1 xor eax
00312 #define t2 xor ebx
00313 #define t3 xor edx
00314
00315 QUARTER_ROUND(c, t0, t1, t2, t3)
00316 RESTORE_2(ecx)
00317 QUARTER_ROUND(c, t3, t0, t1, t2)
00318 RESTORE_1(ecx)
00319 QUARTER_ROUND(c, t2, t3, t0, t1)
00320 RESTORE_0(ecx)
00321 QUARTER_ROUND(c, t1, t2, t3, t0)
00322 SAVE_2(ebx)
00323 SAVE_1(eax)
00324 SAVE_0(edi)
00325 #undef QUARTER_ROUND
00326
00327 RESTORE_K
00328
00329 ASL(0)
00330 AS2( mov edi, [K_REG+0*4])
00331 AS2( mov eax, [K_REG+1*4])
00332 AS2( mov ebx, [K_REG+2*4])
00333 AS2( mov ecx, [K_REG+3*4])
00334
00335 #define QUARTER_ROUND(t, a, b, c, d) \
00336 AS2(movzx esi, t##l)\
00337 AS2(a, [AS_REG_7+3*1024+4*WORD_REG(si)])\
00338 AS2(movzx esi, t##h)\
00339 AS2(b, [AS_REG_7+2*1024+4*WORD_REG(si)])\
00340 AS2(shr e##t##x, 16)\
00341 AS2(movzx esi, t##l)\
00342 AS2(c, [AS_REG_7+1*1024+4*WORD_REG(si)])\
00343 AS2(movzx esi, t##h)\
00344 AS2(d, [AS_REG_7+0*1024+4*WORD_REG(si)])
00345
00346 QUARTER_ROUND(d, s0, s1, s2, s3)
00347 RESTORE_2(edx)
00348 QUARTER_ROUND(d, s3, s0, s1, s2)
00349 RESTORE_1(edx)
00350 QUARTER_ROUND(d, s2, s3, s0, s1)
00351 RESTORE_0(edx)
00352 QUARTER_ROUND(d, s1, s2, s3, s0)
00353 RESTORE_K
00354 SAVE_2(ebx)
00355 SAVE_1(eax)
00356 SAVE_0(edi)
00357
00358 AS2( mov edi, [K_REG+4*4])
00359 AS2( mov eax, [K_REG+5*4])
00360 AS2( mov ebx, [K_REG+6*4])
00361 AS2( mov edx, [K_REG+7*4])
00362
00363 QUARTER_ROUND(c, t0, t1, t2, t3)
00364 RESTORE_2(ecx)
00365 QUARTER_ROUND(c, t3, t0, t1, t2)
00366 RESTORE_1(ecx)
00367 QUARTER_ROUND(c, t2, t3, t0, t1)
00368 RESTORE_0(ecx)
00369 QUARTER_ROUND(c, t1, t2, t3, t0)
00370 SAVE_2(ebx)
00371 SAVE_1(eax)
00372 SAVE_0(edi)
00373
00374 RESTORE_K
00375 RESTORE_K_END
00376 AS2( add K_REG, 8*4)
00377 SAVE_K
00378 AS2( cmp K_END_REG, K_REG)
00379 ASJ( jne, 0, b)
00380
00381 #undef QUARTER_ROUND
00382 #undef s0
00383 #undef s1
00384 #undef s2
00385 #undef s3
00386 #undef t0
00387 #undef t1
00388 #undef t2
00389 #undef t3
00390
00391 AS2( mov eax, [K_END_REG+0*4])
00392 AS2( mov ecx, [K_END_REG+1*4])
00393 AS2( mov esi, [K_END_REG+2*4])
00394 AS2( mov edi, [K_END_REG+3*4])
00395
00396 #define QUARTER_ROUND(a, b, c, d) \
00397 AS2( movzx ebx, dl)\
00398 AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\
00399 AS2( shl ebx, 3*8)\
00400 AS2( xor a, ebx)\
00401 AS2( movzx ebx, dh)\
00402 AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\
00403 AS2( shl ebx, 2*8)\
00404 AS2( xor b, ebx)\
00405 AS2( shr edx, 16)\
00406 AS2( movzx ebx, dl)\
00407 AS2( shr edx, 8)\
00408 AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\
00409 AS2( shl ebx, 1*8)\
00410 AS2( xor c, ebx)\
00411 AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(dx)])\
00412 AS2( xor d, ebx)
00413
00414 QUARTER_ROUND(eax, ecx, esi, edi)
00415 RESTORE_2(edx)
00416 QUARTER_ROUND(edi, eax, ecx, esi)
00417 RESTORE_1(edx)
00418 QUARTER_ROUND(esi, edi, eax, ecx)
00419 RESTORE_0(edx)
00420 QUARTER_ROUND(ecx, esi, edi, eax)
00421
00422 #undef QUARTER_ROUND
00423
00424 #if CRYPTOPP_BOOL_X86
00425 AS1(emms)
00426 AS1(pop ebp)
00427 #if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER < 1300)
00428 AS1(pop ebx)
00429 #endif
00430 #endif
00431
00432 #ifdef __GNUC__
00433 ".att_syntax prefix;"
00434 : "=a" (t0), "=c" (t1), "=S" (t2), "=D" (t3)
00435 : "a" (Te), "D" (inBlock), "S" (k), "c" (kLoopEnd), "d" (g_cacheLineSize)
00436 : "memory", "cc"
00437 #if CRYPTOPP_BOOL_X64
00438 , "%ebx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
00439 #endif
00440 );
00441
00442 if (xorBlock)
00443 {
00444 t0 ^= ((const word32 *)xorBlock)[0];
00445 t1 ^= ((const word32 *)xorBlock)[1];
00446 t2 ^= ((const word32 *)xorBlock)[2];
00447 t3 ^= ((const word32 *)xorBlock)[3];
00448 }
00449 ((word32 *)outBlock)[0] = t0;
00450 ((word32 *)outBlock)[1] = t1;
00451 ((word32 *)outBlock)[2] = t2;
00452 ((word32 *)outBlock)[3] = t3;
00453 #else
00454 #if CRYPTOPP_BOOL_X64
00455 mov rbx, [rsp + 6*8 + 7*8] ; xorBlock
00456 #else
00457 AS2( mov ebx, xorBlock)
00458 #endif
00459 AS2( test WORD_REG(bx), WORD_REG(bx))
00460 ASJ( jz, 1, f)
00461 AS2( xor eax, [WORD_REG(bx)+0*4])
00462 AS2( xor ecx, [WORD_REG(bx)+1*4])
00463 AS2( xor esi, [WORD_REG(bx)+2*4])
00464 AS2( xor edi, [WORD_REG(bx)+3*4])
00465 ASL(1)
00466 #if CRYPTOPP_BOOL_X64
00467 mov rbx, [rsp + 7*8 + 7*8] ; outBlock
00468 #else
00469 AS2( mov ebx, outBlock)
00470 #endif
00471 AS2( mov [WORD_REG(bx)+0*4], eax)
00472 AS2( mov [WORD_REG(bx)+1*4], ecx)
00473 AS2( mov [WORD_REG(bx)+2*4], esi)
00474 AS2( mov [WORD_REG(bx)+3*4], edi)
00475 #endif
00476
00477 #if CRYPTOPP_GENERATE_X64_MASM
00478 pop r15
00479 pop r14
00480 pop r13
00481 pop r12
00482 pop rdi
00483 pop rsi
00484 pop rbx
00485 ret
00486 Rijndael_Enc_ProcessAndXorBlock ENDP
00487 #else
00488 }
00489 else
00490 #endif
00491 #endif // #ifdef CRYPTOPP_X86_ASM_AVAILABLE
00492 #ifndef CRYPTOPP_GENERATE_X64_MASM
00493 {
00494 word32 s0, s1, s2, s3, t0, t1, t2, t3;
00495 const word32 *rk = m_key;
00496
00497 s0 = ((const word32 *)inBlock)[0] ^ rk[0];
00498 s1 = ((const word32 *)inBlock)[1] ^ rk[1];
00499 s2 = ((const word32 *)inBlock)[2] ^ rk[2];
00500 s3 = ((const word32 *)inBlock)[3] ^ rk[3];
00501 t0 = rk[4];
00502 t1 = rk[5];
00503 t2 = rk[6];
00504 t3 = rk[7];
00505 rk += 8;
00506
00507
00508 const int cacheLineSize = GetCacheLineSize();
00509 unsigned int i;
00510 word32 u = 0;
00511 for (i=0; i<1024; i+=cacheLineSize)
00512 u &= *(const word32 *)(((const byte *)Te)+i);
00513 u &= Te[255];
00514 s0 |= u; s1 |= u; s2 |= u; s3 |= u;
00515
00516
00517 #ifdef IS_BIG_ENDIAN
00518 #define QUARTER_ROUND(t, a, b, c, d) \
00519 a ^= rotrFixed(Te[byte(t)], 24); t >>= 8;\
00520 b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\
00521 c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
00522 d ^= Te[t];
00523 #else
00524 #define QUARTER_ROUND(t, a, b, c, d) \
00525 d ^= Te[byte(t)]; t >>= 8;\
00526 c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
00527 b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\
00528 a ^= rotrFixed(Te[t], 24);
00529 #endif
00530
00531 QUARTER_ROUND(s3, t0, t1, t2, t3)
00532 QUARTER_ROUND(s2, t3, t0, t1, t2)
00533 QUARTER_ROUND(s1, t2, t3, t0, t1)
00534 QUARTER_ROUND(s0, t1, t2, t3, t0)
00535 #undef QUARTER_ROUND
00536
00537
00538 unsigned int r = m_rounds/2 - 1;
00539 do
00540 {
00541 #define QUARTER_ROUND(t, a, b, c, d) \
00542 a ^= Te[3*256+byte(t)]; t >>= 8;\
00543 b ^= Te[2*256+byte(t)]; t >>= 8;\
00544 c ^= Te[1*256+byte(t)]; t >>= 8;\
00545 d ^= Te[t];
00546
00547 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
00548
00549 QUARTER_ROUND(t3, s0, s1, s2, s3)
00550 QUARTER_ROUND(t2, s3, s0, s1, s2)
00551 QUARTER_ROUND(t1, s2, s3, s0, s1)
00552 QUARTER_ROUND(t0, s1, s2, s3, s0)
00553
00554 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
00555
00556 QUARTER_ROUND(s3, t0, t1, t2, t3)
00557 QUARTER_ROUND(s2, t3, t0, t1, t2)
00558 QUARTER_ROUND(s1, t2, t3, t0, t1)
00559 QUARTER_ROUND(s0, t1, t2, t3, t0)
00560 #undef QUARTER_ROUND
00561
00562 rk += 8;
00563 } while (--r);
00564
00565
00566 u = 0;
00567 for (i=0; i<256; i+=cacheLineSize)
00568 u &= *(const word32 *)(Se+i);
00569 u &= *(const word32 *)(Se+252);
00570 t0 |= u; t1 |= u; t2 |= u; t3 |= u;
00571
00572 word32 tbw[4];
00573 byte *const tempBlock = (byte *)tbw;
00574 word32 *const obw = (word32 *)outBlock;
00575 const word32 *const xbw = (const word32 *)xorBlock;
00576
00577 #define QUARTER_ROUND(t, a, b, c, d) \
00578 tempBlock[a] = Se[byte(t)]; t >>= 8;\
00579 tempBlock[b] = Se[byte(t)]; t >>= 8;\
00580 tempBlock[c] = Se[byte(t)]; t >>= 8;\
00581 tempBlock[d] = Se[t];
00582
00583 QUARTER_ROUND(t2, 15, 2, 5, 8)
00584 QUARTER_ROUND(t1, 11, 14, 1, 4)
00585 QUARTER_ROUND(t0, 7, 10, 13, 0)
00586 QUARTER_ROUND(t3, 3, 6, 9, 12)
00587 #undef QUARTER_ROUND
00588
00589 if (xbw)
00590 {
00591 obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
00592 obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
00593 obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
00594 obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
00595 }
00596 else
00597 {
00598 obw[0] = tbw[0] ^ rk[0];
00599 obw[1] = tbw[1] ^ rk[1];
00600 obw[2] = tbw[2] ^ rk[2];
00601 obw[3] = tbw[3] ^ rk[3];
00602 }
00603 }
00604 }
00605
00606 void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
00607 {
00608 word32 s0, s1, s2, s3, t0, t1, t2, t3;
00609 const word32 *rk = m_key;
00610
00611 s0 = ((const word32 *)inBlock)[0] ^ rk[0];
00612 s1 = ((const word32 *)inBlock)[1] ^ rk[1];
00613 s2 = ((const word32 *)inBlock)[2] ^ rk[2];
00614 s3 = ((const word32 *)inBlock)[3] ^ rk[3];
00615 t0 = rk[4];
00616 t1 = rk[5];
00617 t2 = rk[6];
00618 t3 = rk[7];
00619 rk += 8;
00620
00621
00622 const int cacheLineSize = GetCacheLineSize();
00623 unsigned int i;
00624 word32 u = 0;
00625 for (i=0; i<1024; i+=cacheLineSize)
00626 u &= *(const word32 *)(((const byte *)Td)+i);
00627 u &= Td[255];
00628 s0 |= u; s1 |= u; s2 |= u; s3 |= u;
00629
00630
00631 #ifdef IS_BIG_ENDIAN
00632 #define QUARTER_ROUND(t, a, b, c, d) \
00633 a ^= rotrFixed(Td[byte(t)], 24); t >>= 8;\
00634 b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\
00635 c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\
00636 d ^= Td[t];
00637 #else
00638 #define QUARTER_ROUND(t, a, b, c, d) \
00639 d ^= Td[byte(t)]; t >>= 8;\
00640 c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\
00641 b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\
00642 a ^= rotrFixed(Td[t], 24);
00643 #endif
00644
00645 QUARTER_ROUND(s3, t2, t1, t0, t3)
00646 QUARTER_ROUND(s2, t1, t0, t3, t2)
00647 QUARTER_ROUND(s1, t0, t3, t2, t1)
00648 QUARTER_ROUND(s0, t3, t2, t1, t0)
00649 #undef QUARTER_ROUND
00650
00651
00652 unsigned int r = m_rounds/2 - 1;
00653 do
00654 {
00655 #define QUARTER_ROUND(t, a, b, c, d) \
00656 a ^= Td[3*256+byte(t)]; t >>= 8;\
00657 b ^= Td[2*256+byte(t)]; t >>= 8;\
00658 c ^= Td[1*256+byte(t)]; t >>= 8;\
00659 d ^= Td[t];
00660
00661 s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
00662
00663 QUARTER_ROUND(t3, s2, s1, s0, s3)
00664 QUARTER_ROUND(t2, s1, s0, s3, s2)
00665 QUARTER_ROUND(t1, s0, s3, s2, s1)
00666 QUARTER_ROUND(t0, s3, s2, s1, s0)
00667
00668 t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
00669
00670 QUARTER_ROUND(s3, t2, t1, t0, t3)
00671 QUARTER_ROUND(s2, t1, t0, t3, t2)
00672 QUARTER_ROUND(s1, t0, t3, t2, t1)
00673 QUARTER_ROUND(s0, t3, t2, t1, t0)
00674 #undef QUARTER_ROUND
00675
00676 rk += 8;
00677 } while (--r);
00678
00679
00680 u = 0;
00681 for (i=0; i<256; i+=cacheLineSize)
00682 u &= *(const word32 *)(Sd+i);
00683 u &= *(const word32 *)(Sd+252);
00684 t0 |= u; t1 |= u; t2 |= u; t3 |= u;
00685
00686 word32 tbw[4];
00687 byte *const tempBlock = (byte *)tbw;
00688 word32 *const obw = (word32 *)outBlock;
00689 const word32 *const xbw = (const word32 *)xorBlock;
00690
00691 #define QUARTER_ROUND(t, a, b, c, d) \
00692 tempBlock[a] = Sd[byte(t)]; t >>= 8;\
00693 tempBlock[b] = Sd[byte(t)]; t >>= 8;\
00694 tempBlock[c] = Sd[byte(t)]; t >>= 8;\
00695 tempBlock[d] = Sd[t];
00696
00697 QUARTER_ROUND(t2, 7, 2, 13, 8)
00698 QUARTER_ROUND(t1, 3, 14, 9, 4)
00699 QUARTER_ROUND(t0, 15, 10, 5, 0)
00700 QUARTER_ROUND(t3, 11, 6, 1, 12)
00701 #undef QUARTER_ROUND
00702
00703 if (xbw)
00704 {
00705 obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
00706 obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
00707 obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
00708 obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
00709 }
00710 else
00711 {
00712 obw[0] = tbw[0] ^ rk[0];
00713 obw[1] = tbw[1] ^ rk[1];
00714 obw[2] = tbw[2] ^ rk[2];
00715 obw[3] = tbw[3] ^ rk[3];
00716 }
00717 }
00718
00719 NAMESPACE_END
00720
00721 #endif
00722 #endif