integer.cpp

00001 // integer.cpp - written and placed in the public domain by Wei Dai
00002 // contains public domain code contributed by Alister Lee and Leonard Janke
00003 
00004 #include "pch.h"
00005 
00006 #ifndef CRYPTOPP_IMPORTS
00007 
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"             // for P1363_KDF2
00016 #include "sha.h"
00017 #include "cpu.h"
00018 
00019 #include <iostream>
00020 
00021 #if _MSC_VER >= 1400
00022         #include <intrin.h>
00023 #endif
00024 
00025 #ifdef __DECCXX
00026         #include <c_asm.h>
00027 #endif
00028 
00029 #ifdef CRYPTOPP_MSVC6_NO_PP
00030         #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
00031 #endif
00032 
00033 #define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
00034 
00035 NAMESPACE_BEGIN(CryptoPP)
00036 
00037 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00038 {
00039         if (valueType != typeid(Integer))
00040                 return false;
00041         *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00042         return true;
00043 }
00044 
00045 inline static int Compare(const word *A, const word *B, size_t N)
00046 {
00047         while (N--)
00048                 if (A[N] > B[N])
00049                         return 1;
00050                 else if (A[N] < B[N])
00051                         return -1;
00052 
00053         return 0;
00054 }
00055 
00056 inline static int Increment(word *A, size_t N, word B=1)
00057 {
00058         assert(N);
00059         word t = A[0];
00060         A[0] = t+B;
00061         if (A[0] >= t)
00062                 return 0;
00063         for (unsigned i=1; i<N; i++)
00064                 if (++A[i])
00065                         return 0;
00066         return 1;
00067 }
00068 
00069 inline static int Decrement(word *A, size_t N, word B=1)
00070 {
00071         assert(N);
00072         word t = A[0];
00073         A[0] = t-B;
00074         if (A[0] <= t)
00075                 return 0;
00076         for (unsigned i=1; i<N; i++)
00077                 if (A[i]--)
00078                         return 0;
00079         return 1;
00080 }
00081 
00082 static void TwosComplement(word *A, size_t N)
00083 {
00084         Decrement(A, N);
00085         for (unsigned i=0; i<N; i++)
00086                 A[i] = ~A[i];
00087 }
00088 
00089 static word AtomicInverseModPower2(word A)
00090 {
00091         assert(A%2==1);
00092 
00093         word R=A%8;
00094 
00095         for (unsigned i=3; i<WORD_BITS; i*=2)
00096                 R = R*(2-R*A);
00097 
00098         assert(R*A==1);
00099         return R;
00100 }
00101 
00102 // ********************************************************
00103 
00104 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || defined(__x86_64__)
00105         #define Declare2Words(x)                        word x##0, x##1;
00106         #define AssignWord(a, b)                        a##0 = b; a##1 = 0;
00107         #define Add2WordsBy1(a, b, c)           a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
00108         #define LowWord(a)                                      a##0
00109         #define HighWord(a)                                     a##1
00110         #ifdef _MSC_VER
00111                 #define MultiplyWords(p, a, b)  p##0 = _umul128(a, b, &p##1);
00112                 #ifndef __INTEL_COMPILER
00113                         #define Double3Words(c, d)              d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
00114                 #endif
00115         #elif defined(__DECCXX)
00116                 #define MultiplyWords(p, a, b)  p##0 = a*b; p##1 = asm("umulh %a0, %a1, %v0", a, b);
00117         #elif defined(__x86_64__)
00118                 #define MultiplyWords(p, a, b)  asm ("mulq %3" : "=a"(p##0), "=d"(p##1) : "a"(a), "g"(b) : "cc");
00119                 #define MulAcc(c, d, a, b)              asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00120                 #define Double3Words(c, d)              asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
00121                 #define Acc2WordsBy1(a, b)              asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
00122                 #define Acc2WordsBy2(a, b)              asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
00123                 #define Acc3WordsBy2(c, d, e)   asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
00124         #endif
00125         #ifndef Double3Words
00126                 #define Double3Words(c, d)              d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
00127         #endif
00128         #ifndef Acc2WordsBy2
00129                 #define Acc2WordsBy2(a, b)              a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
00130         #endif
00131         #define AddWithCarry(u, a, b)           {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
00132         #define SubtractWithBorrow(u, a, b)     {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
00133         #define GetCarry(u)                                     u##1
00134         #define GetBorrow(u)                            u##1
00135 #else
00136         #define Declare2Words(x)                        dword x;
00137         #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00138                 #define MultiplyWords(p, a, b)          p = __emulu(a, b);
00139         #else
00140                 #define MultiplyWords(p, a, b)          p = (dword)a*b;
00141         #endif
00142         #define AssignWord(a, b)                        a = b;
00143         #define Add2WordsBy1(a, b, c)           a = b + c;
00144         #define Acc2WordsBy2(a, b)                      a += b;
00145         #define LowWord(a)                                      word(a)
00146         #define HighWord(a)                                     word(a>>WORD_BITS)
00147         #define Double3Words(c, d)                      d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
00148         #define AddWithCarry(u, a, b)           u = dword(a) + b + GetCarry(u);
00149         #define SubtractWithBorrow(u, a, b)     u = dword(a) - b - GetBorrow(u);
00150         #define GetCarry(u)                                     HighWord(u)
00151         #define GetBorrow(u)                            word(u>>(WORD_BITS*2-1))
00152 #endif
00153 #ifndef MulAcc
00154         #define MulAcc(c, d, a, b)                      MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
00155 #endif
00156 #ifndef Acc2WordsBy1
00157         #define Acc2WordsBy1(a, b)                      Add2WordsBy1(a, a, b)
00158 #endif
00159 #ifndef Acc3WordsBy2
00160         #define Acc3WordsBy2(c, d, e)           Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
00161 #endif
00162 
00163 class DWord
00164 {
00165 public:
00166         DWord() {}
00167 
00168 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00169         explicit DWord(word low)
00170         {
00171                 m_whole = low;
00172         }
00173 #else
00174         explicit DWord(word low)
00175         {
00176                 m_halfs.low = low;
00177                 m_halfs.high = 0;
00178         }
00179 #endif
00180 
00181         DWord(word low, word high)
00182         {
00183                 m_halfs.low = low;
00184                 m_halfs.high = high;
00185         }
00186 
00187         static DWord Multiply(word a, word b)
00188         {
00189                 DWord r;
00190                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00191                         r.m_whole = (dword)a * b;
00192                 #elif defined(__x86_64__)
00193                         asm ("mulq %3" : "=a"(r.m_halfs.low), "=d"(r.m_halfs.high) : "a"(a), "g"(b) : "cc");
00194                 #else
00195                         r.m_halfs.low = _umul128(a, b, &r.m_halfs.high);
00196                 #endif
00197                 return r;
00198         }
00199 
00200         static DWord MultiplyAndAdd(word a, word b, word c)
00201         {
00202                 DWord r = Multiply(a, b);
00203                 return r += c;
00204         }
00205 
00206         DWord & operator+=(word a)
00207         {
00208                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00209                         m_whole = m_whole + a;
00210                 #else
00211                         m_halfs.low += a;
00212                         m_halfs.high += (m_halfs.low < a);
00213                 #endif
00214                 return *this;
00215         }
00216 
00217         DWord operator+(word a)
00218         {
00219                 DWord r;
00220                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00221                         r.m_whole = m_whole + a;
00222                 #else
00223                         r.m_halfs.low = m_halfs.low + a;
00224                         r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00225                 #endif
00226                 return r;
00227         }
00228 
00229         DWord operator-(DWord a)
00230         {
00231                 DWord r;
00232                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00233                         r.m_whole = m_whole - a.m_whole;
00234                 #else
00235                         r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00236                         r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00237                 #endif
00238                 return r;
00239         }
00240 
00241         DWord operator-(word a)
00242         {
00243                 DWord r;
00244                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00245                         r.m_whole = m_whole - a;
00246                 #else
00247                         r.m_halfs.low = m_halfs.low - a;
00248                         r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00249                 #endif
00250                 return r;
00251         }
00252 
00253         // returns quotient, which must fit in a word
00254         word operator/(word divisor);
00255 
00256         word operator%(word a);
00257 
00258         bool operator!() const
00259         {
00260         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00261                 return !m_whole;
00262         #else
00263                 return !m_halfs.high && !m_halfs.low;
00264         #endif
00265         }
00266 
00267         word GetLowHalf() const {return m_halfs.low;}
00268         word GetHighHalf() const {return m_halfs.high;}
00269         word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00270 
00271 private:
00272         union
00273         {
00274         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00275                 dword m_whole;
00276         #endif
00277                 struct
00278                 {
00279                 #ifdef IS_LITTLE_ENDIAN
00280                         word low;
00281                         word high;
00282                 #else
00283                         word high;
00284                         word low;
00285                 #endif
00286                 } m_halfs;
00287         };
00288 };
00289 
00290 class Word
00291 {
00292 public:
00293         Word() {}
00294 
00295         Word(word value)
00296         {
00297                 m_whole = value;
00298         }
00299 
00300         Word(hword low, hword high)
00301         {
00302                 m_whole = low | (word(high) << (WORD_BITS/2));
00303         }
00304 
00305         static Word Multiply(hword a, hword b)
00306         {
00307                 Word r;
00308                 r.m_whole = (word)a * b;
00309                 return r;
00310         }
00311 
00312         Word operator-(Word a)
00313         {
00314                 Word r;
00315                 r.m_whole = m_whole - a.m_whole;
00316                 return r;
00317         }
00318 
00319         Word operator-(hword a)
00320         {
00321                 Word r;
00322                 r.m_whole = m_whole - a;
00323                 return r;
00324         }
00325 
00326         // returns quotient, which must fit in a word
00327         hword operator/(hword divisor)
00328         {
00329                 return hword(m_whole / divisor);
00330         }
00331 
00332         bool operator!() const
00333         {
00334                 return !m_whole;
00335         }
00336 
00337         word GetWhole() const {return m_whole;}
00338         hword GetLowHalf() const {return hword(m_whole);}
00339         hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00340         hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00341         
00342 private:
00343         word m_whole;
00344 };
00345 
00346 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
00347 template <class S, class D>
00348 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00349 {
00350         // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
00351         assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00352 
00353         // estimate the quotient: do a 2 S by 1 S divide
00354         S Q;
00355         if (S(B1+1) == 0)
00356                 Q = A[2];
00357         else
00358                 Q = D(A[1], A[2]) / S(B1+1);
00359 
00360         // now subtract Q*B from A
00361         D p = D::Multiply(B0, Q);
00362         D u = (D) A[0] - p.GetLowHalf();
00363         A[0] = u.GetLowHalf();
00364         u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00365         A[1] = u.GetLowHalf();
00366         A[2] += u.GetHighHalf();
00367 
00368         // Q <= actual quotient, so fix it
00369         while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00370         {
00371                 u = (D) A[0] - B0;
00372                 A[0] = u.GetLowHalf();
00373                 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00374                 A[1] = u.GetLowHalf();
00375                 A[2] += u.GetHighHalf();
00376                 Q++;
00377                 assert(Q);      // shouldn't overflow
00378         }
00379 
00380         return Q;
00381 }
00382 
00383 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
00384 template <class S, class D>
00385 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00386 {
00387         if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
00388                 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00389         else
00390         {
00391                 S Q[2];
00392                 T[0] = Al.GetLowHalf();
00393                 T[1] = Al.GetHighHalf(); 
00394                 T[2] = Ah.GetLowHalf();
00395                 T[3] = Ah.GetHighHalf();
00396                 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00397                 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00398                 return D(Q[0], Q[1]);
00399         }
00400 }
00401 
00402 // returns quotient, which must fit in a word
00403 inline word DWord::operator/(word a)
00404 {
00405         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00406                 return word(m_whole / a);
00407         #else
00408                 hword r[4];
00409                 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00410         #endif
00411 }
00412 
00413 inline word DWord::operator%(word a)
00414 {
00415         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00416                 return word(m_whole % a);
00417         #else
00418                 if (a < (word(1) << (WORD_BITS/2)))
00419                 {
00420                         hword h = hword(a);
00421                         word r = m_halfs.high % h;
00422                         r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00423                         return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00424                 }
00425                 else
00426                 {
00427                         hword r[4];
00428                         DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00429                         return Word(r[0], r[1]).GetWhole();
00430                 }
00431         #endif
00432 }
00433 
00434 // ********************************************************
00435 
00436 // use some tricks to share assembly code between MSVC and GCC
00437 #if defined(__GNUC__)
00438         #define AddPrologue \
00439                 int result;     \
00440                 __asm__ __volatile__ \
00441                 ( \
00442                         ".intel_syntax noprefix;"
00443         #define AddEpilogue \
00444                         ".att_syntax prefix;" \
00445                                         : "=a" (result)\
00446                                         : "d" (C), "a" (A), "D" (B), "c" (N) \
00447                                         : "%esi", "memory", "cc" \
00448                 );\
00449                 return result;
00450         #define MulPrologue \
00451                 __asm__ __volatile__ \
00452                 ( \
00453                         ".intel_syntax noprefix;" \
00454                         AS1(    push    ebx) \
00455                         AS2(    mov             ebx, edx)
00456         #define MulEpilogue \
00457                         AS1(    pop             ebx) \
00458                         ".att_syntax prefix;" \
00459                         : \
00460                         : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
00461                         : "%esi", "memory", "cc" \
00462                 );
00463         #define SquPrologue             MulPrologue
00464         #define SquEpilogue     \
00465                         AS1(    pop             ebx) \
00466                         ".att_syntax prefix;" \
00467                         : \
00468                         : "d" (s_maskLow16), "c" (C), "a" (A) \
00469                         : "%esi", "%edi", "memory", "cc" \
00470                 );
00471         #define TopPrologue             MulPrologue
00472         #define TopEpilogue     \
00473                         AS1(    pop             ebx) \
00474                         ".att_syntax prefix;" \
00475                         : \
00476                         : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
00477                         : "memory", "cc" \
00478                 );
00479 #else
00480         #define AddPrologue \
00481                 __asm   push edi \
00482                 __asm   push esi \
00483                 __asm   mov             eax, [esp+12] \
00484                 __asm   mov             edi, [esp+16]
00485         #define AddEpilogue \
00486                 __asm   pop esi \
00487                 __asm   pop edi \
00488                 __asm   ret 8
00489 #if _MSC_VER < 1300
00490         #define SaveEBX         __asm push ebx
00491         #define RestoreEBX      __asm pop ebx
00492 #else
00493         #define SaveEBX
00494         #define RestoreEBX
00495 #endif
00496         #define SquPrologue                                     \
00497                 AS2(    mov             eax, A)                 \
00498                 AS2(    mov             ecx, C)                 \
00499                 SaveEBX                                                 \
00500                 AS2(    lea             ebx, s_maskLow16)
00501         #define MulPrologue                                     \
00502                 AS2(    mov             eax, A)                 \
00503                 AS2(    mov             edi, B)                 \
00504                 AS2(    mov             ecx, C)                 \
00505                 SaveEBX                                                 \
00506                 AS2(    lea             ebx, s_maskLow16)
00507         #define TopPrologue                                     \
00508                 AS2(    mov             eax, A)                 \
00509                 AS2(    mov             edi, B)                 \
00510                 AS2(    mov             ecx, C)                 \
00511                 AS2(    mov             esi, L)                 \
00512                 SaveEBX                                                 \
00513                 AS2(    lea             ebx, s_maskLow16)
00514         #define SquEpilogue             RestoreEBX
00515         #define MulEpilogue             RestoreEBX
00516         #define TopEpilogue             RestoreEBX
00517 #endif
00518 
00519 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00520 extern "C" {
00521 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
00522 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
00523 }
00524 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__)
00525 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
00526 {
00527         word result;
00528         __asm__ __volatile__
00529         (
00530         ".intel_syntax;"
00531         AS1(    neg             %1)
00532         ASJ(    jz,             1, f)
00533         AS2(    mov             %0,[%3+8*%1])
00534         AS2(    add             %0,[%4+8*%1])
00535         AS2(    mov             [%2+8*%1],%0)
00536         ASL(0)
00537         AS2(    mov             %0,[%3+8*%1+8])
00538         AS2(    adc             %0,[%4+8*%1+8])
00539         AS2(    mov             [%2+8*%1+8],%0)
00540         AS2(    lea             %1,[%1+2])
00541         ASJ(    jrcxz,  1, f)
00542         AS2(    mov             %0,[%3+8*%1])
00543         AS2(    adc             %0,[%4+8*%1])
00544         AS2(    mov             [%2+8*%1],%0)
00545         ASJ(    jmp,    0, b)
00546         ASL(1)
00547         AS2(    mov             %0, 0)
00548         AS2(    adc             %0, %0)
00549         ".att_syntax;"
00550         : "=&r" (result)
00551         : "c" (N), "r" (C+N), "r" (A+N), "r" (B+N)
00552         : "memory", "cc"
00553         );
00554         return (int)result;
00555 }
00556 
00557 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00558 {
00559         word result;
00560         __asm__ __volatile__
00561         (
00562         ".intel_syntax;"
00563         AS1(    neg             %1)
00564         ASJ(    jz,             1, f)
00565         AS2(    mov             %0,[%3+8*%1])
00566         AS2(    sub             %0,[%4+8*%1])
00567         AS2(    mov             [%2+8*%1],%0)
00568         ASL(0)
00569         AS2(    mov             %0,[%3+8*%1+8])
00570         AS2(    sbb             %0,[%4+8*%1+8])
00571         AS2(    mov             [%2+8*%1+8],%0)
00572         AS2(    lea             %1,[%1+2])
00573         ASJ(    jrcxz,  1, f)
00574         AS2(    mov             %0,[%3+8*%1])
00575         AS2(    sbb             %0,[%4+8*%1])
00576         AS2(    mov             [%2+8*%1],%0)
00577         ASJ(    jmp,    0, b)
00578         ASL(1)
00579         AS2(    mov             %0, 0)
00580         AS2(    adc             %0, %0)
00581         ".att_syntax;"
00582         : "=&r" (result)
00583         : "c" (N), "r" (C+N), "r" (A+N), "r" (B+N)
00584         : "memory", "cc"
00585         );
00586         return (int)result;
00587 }
00588 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
00589 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00590 {
00591         AddPrologue
00592 
00593         // now: eax = A, edi = B, edx = C, ecx = N
00594         AS2(    lea             eax, [eax+4*ecx])
00595         AS2(    lea             edi, [edi+4*ecx])
00596         AS2(    lea             edx, [edx+4*ecx])
00597 
00598         AS1(    neg             ecx)                            // ecx is negative index
00599         AS2(    test    ecx, 2)                         // this clears carry flag
00600         ASJ(    jz,             0, f)
00601         AS2(    sub             ecx, 2)
00602         ASJ(    jmp,    1, f)
00603 
00604         ASL(0)
00605         ASJ(    jecxz,  2, f)                           // loop until ecx overflows and becomes zero
00606         AS2(    mov             esi,[eax+4*ecx])
00607         AS2(    adc             esi,[edi+4*ecx])
00608         AS2(    mov             [edx+4*ecx],esi)
00609         AS2(    mov             esi,[eax+4*ecx+4])
00610         AS2(    adc             esi,[edi+4*ecx+4])
00611         AS2(    mov             [edx+4*ecx+4],esi)
00612         ASL(1)
00613         AS2(    mov             esi,[eax+4*ecx+8])
00614         AS2(    adc             esi,[edi+4*ecx+8])
00615         AS2(    mov             [edx+4*ecx+8],esi)
00616         AS2(    mov             esi,[eax+4*ecx+12])
00617         AS2(    adc             esi,[edi+4*ecx+12])
00618         AS2(    mov             [edx+4*ecx+12],esi)
00619 
00620         AS2(    lea             ecx,[ecx+4])            // advance index, avoid inc which causes slowdown on Intel Core 2
00621         ASJ(    jmp,    0, b)
00622 
00623         ASL(2)
00624         AS2(    mov             eax, 0)
00625         AS1(    setc    al)                                     // store carry into eax (return result register)
00626 
00627         AddEpilogue
00628 }
00629 
00630 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00631 {
00632         AddPrologue
00633 
00634         // now: eax = A, edi = B, edx = C, ecx = N
00635         AS2(    lea             eax, [eax+4*ecx])
00636         AS2(    lea             edi, [edi+4*ecx])
00637         AS2(    lea             edx, [edx+4*ecx])
00638 
00639         AS1(    neg             ecx)                            // ecx is negative index
00640         AS2(    test    ecx, 2)                         // this clears carry flag
00641         ASJ(    jz,             0, f)
00642         AS2(    sub             ecx, 2)
00643         ASJ(    jmp,    1, f)
00644 
00645         ASL(0)
00646         ASJ(    jecxz,  2, f)                           // loop until ecx overflows and becomes zero
00647         AS2(    mov             esi,[eax+4*ecx])
00648         AS2(    sbb             esi,[edi+4*ecx])
00649         AS2(    mov             [edx+4*ecx],esi)
00650         AS2(    mov             esi,[eax+4*ecx+4])
00651         AS2(    sbb             esi,[edi+4*ecx+4])
00652         AS2(    mov             [edx+4*ecx+4],esi)
00653         ASL(1)
00654         AS2(    mov             esi,[eax+4*ecx+8])
00655         AS2(    sbb             esi,[edi+4*ecx+8])
00656         AS2(    mov             [edx+4*ecx+8],esi)
00657         AS2(    mov             esi,[eax+4*ecx+12])
00658         AS2(    sbb             esi,[edi+4*ecx+12])
00659         AS2(    mov             [edx+4*ecx+12],esi)
00660 
00661         AS2(    lea             ecx,[ecx+4])            // advance index, avoid inc which causes slowdown on Intel Core 2
00662         ASJ(    jmp,    0, b)
00663 
00664         ASL(2)
00665         AS2(    mov             eax, 0)
00666         AS1(    setc    al)                                     // store carry into eax (return result register)
00667 
00668         AddEpilogue
00669 }
00670 
00671 #if CRYPTOPP_INTEGER_SSE2
00672 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
00673 {
00674         AddPrologue
00675 
00676         // now: eax = A, edi = B, edx = C, ecx = N
00677         AS2(    lea             eax, [eax+4*ecx])
00678         AS2(    lea             edi, [edi+4*ecx])
00679         AS2(    lea             edx, [edx+4*ecx])
00680 
00681         AS1(    neg             ecx)                            // ecx is negative index
00682         AS2(    pxor    mm2, mm2)
00683         ASJ(    jz,             2, f)
00684         AS2(    test    ecx, 2)                         // this clears carry flag
00685         ASJ(    jz,             0, f)
00686         AS2(    sub             ecx, 2)
00687         ASJ(    jmp,    1, f)
00688 
00689         ASL(0)
00690         AS2(    movd     mm0, DWORD PTR [eax+4*ecx])
00691         AS2(    movd     mm1, DWORD PTR [edi+4*ecx])
00692         AS2(    paddq    mm0, mm1)
00693         AS2(    paddq    mm2, mm0)
00694         AS2(    movd     DWORD PTR [edx+4*ecx], mm2)
00695         AS2(    psrlq    mm2, 32)
00696 
00697         AS2(    movd     mm0, DWORD PTR [eax+4*ecx+4])
00698         AS2(    movd     mm1, DWORD PTR [edi+4*ecx+4])
00699         AS2(    paddq    mm0, mm1)
00700         AS2(    paddq    mm2, mm0)
00701         AS2(    movd     DWORD PTR [edx+4*ecx+4], mm2)
00702         AS2(    psrlq    mm2, 32)
00703 
00704         ASL(1)
00705         AS2(    movd     mm0, DWORD PTR [eax+4*ecx+8])
00706         AS2(    movd     mm1, DWORD PTR [edi+4*ecx+8])
00707         AS2(    paddq    mm0, mm1)
00708         AS2(    paddq    mm2, mm0)
00709         AS2(    movd     DWORD PTR [edx+4*ecx+8], mm2)
00710         AS2(    psrlq    mm2, 32)
00711 
00712         AS2(    movd     mm0, DWORD PTR [eax+4*ecx+12])
00713         AS2(    movd     mm1, DWORD PTR [edi+4*ecx+12])
00714         AS2(    paddq    mm0, mm1)
00715         AS2(    paddq    mm2, mm0)
00716         AS2(    movd     DWORD PTR [edx+4*ecx+12], mm2)
00717         AS2(    psrlq    mm2, 32)
00718 
00719         AS2(    add             ecx, 4)
00720         ASJ(    jnz,    0, b)
00721 
00722         ASL(2)
00723         AS2(    movd    eax, mm2)
00724         AS1(    emms)
00725 
00726         AddEpilogue
00727 }
00728 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
00729 {
00730         AddPrologue
00731 
00732         // now: eax = A, edi = B, edx = C, ecx = N
00733         AS2(    lea             eax, [eax+4*ecx])
00734         AS2(    lea             edi, [edi+4*ecx])
00735         AS2(    lea             edx, [edx+4*ecx])
00736 
00737         AS1(    neg             ecx)                            // ecx is negative index
00738         AS2(    pxor    mm2, mm2)
00739         ASJ(    jz,             2, f)
00740         AS2(    test    ecx, 2)                         // this clears carry flag
00741         ASJ(    jz,             0, f)
00742         AS2(    sub             ecx, 2)
00743         ASJ(    jmp,    1, f)
00744 
00745         ASL(0)
00746         AS2(    movd     mm0, DWORD PTR [eax+4*ecx])
00747         AS2(    movd     mm1, DWORD PTR [edi+4*ecx])
00748         AS2(    psubq    mm0, mm1)
00749         AS2(    psubq    mm0, mm2)
00750         AS2(    movd     DWORD PTR [edx+4*ecx], mm0)
00751         AS2(    psrlq    mm0, 63)
00752 
00753         AS2(    movd     mm2, DWORD PTR [eax+4*ecx+4])
00754         AS2(    movd     mm1, DWORD PTR [edi+4*ecx+4])
00755         AS2(    psubq    mm2, mm1)
00756         AS2(    psubq    mm2, mm0)
00757         AS2(    movd     DWORD PTR [edx+4*ecx+4], mm2)
00758         AS2(    psrlq    mm2, 63)
00759 
00760         ASL(1)
00761         AS2(    movd     mm0, DWORD PTR [eax+4*ecx+8])
00762         AS2(    movd     mm1, DWORD PTR [edi+4*ecx+8])
00763         AS2(    psubq    mm0, mm1)
00764         AS2(    psubq    mm0, mm2)
00765         AS2(    movd     DWORD PTR [edx+4*ecx+8], mm0)
00766         AS2(    psrlq    mm0, 63)
00767 
00768         AS2(    movd     mm2, DWORD PTR [eax+4*ecx+12])
00769         AS2(    movd     mm1, DWORD PTR [edi+4*ecx+12])
00770         AS2(    psubq    mm2, mm1)
00771         AS2(    psubq    mm2, mm0)
00772         AS2(    movd     DWORD PTR [edx+4*ecx+12], mm2)
00773         AS2(    psrlq    mm2, 63)
00774 
00775         AS2(    add             ecx, 4)
00776         ASJ(    jnz,    0, b)
00777 
00778         ASL(2)
00779         AS2(    movd    eax, mm2)
00780         AS1(    emms)
00781 
00782         AddEpilogue
00783 }
00784 #endif  // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00785 #else
00786 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00787 {
00788         assert (N%2 == 0);
00789 
00790         Declare2Words(u);
00791         AssignWord(u, 0);
00792         for (size_t i=0; i<N; i+=2)
00793         {
00794                 AddWithCarry(u, A[i], B[i]);
00795                 C[i] = LowWord(u);
00796                 AddWithCarry(u, A[i+1], B[i+1]);
00797                 C[i+1] = LowWord(u);
00798         }
00799         return int(GetCarry(u));
00800 }
00801 
00802 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00803 {
00804         assert (N%2 == 0);
00805 
00806         Declare2Words(u);
00807         AssignWord(u, 0);
00808         for (size_t i=0; i<N; i+=2)
00809         {
00810                 SubtractWithBorrow(u, A[i], B[i]);
00811                 C[i] = LowWord(u);
00812                 SubtractWithBorrow(u, A[i+1], B[i+1]);
00813                 C[i+1] = LowWord(u);
00814         }
00815         return int(GetBorrow(u));
00816 }
00817 #endif
00818 
00819 static word LinearMultiply(word *C, const word *A, word B, size_t N)
00820 {
00821         word carry=0;
00822         for(unsigned i=0; i<N; i++)
00823         {
00824                 Declare2Words(p);
00825                 MultiplyWords(p, A[i], B);
00826                 Acc2WordsBy1(p, carry);
00827                 C[i] = LowWord(p);
00828                 carry = HighWord(p);
00829         }
00830         return carry;
00831 }
00832 
00833 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
00834 
00835 #define Mul_2 \
00836         Mul_Begin(2) \
00837         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00838         Mul_End(1, 1)
00839 
00840 #define Mul_4 \
00841         Mul_Begin(4) \
00842         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00843         Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0)  \
00844         Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
00845         Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1)  \
00846         Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
00847         Mul_End(5, 3)
00848 
00849 #define Mul_8 \
00850         Mul_Begin(8) \
00851         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00852         Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0)  \
00853         Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
00854         Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00855         Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00856         Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00857         Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00858         Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
00859         Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
00860         Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
00861         Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
00862         Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
00863         Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
00864         Mul_End(13, 7)
00865 
00866 #define Mul_16 \
00867         Mul_Begin(16) \
00868         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00869         Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00870         Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00871         Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00872         Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00873         Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00874         Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00875         Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00876         Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00877         Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00878         Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00879         Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00880         Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00881         Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00882         Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
00883         Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
00884         Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
00885         Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
00886         Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
00887         Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
00888         Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
00889         Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
00890         Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
00891         Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
00892         Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
00893         Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
00894         Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
00895         Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
00896         Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
00897         Mul_End(29, 15)
00898 
00899 #define Squ_2 \
00900         Squ_Begin(2) \
00901         Squ_End(2)
00902 
00903 #define Squ_4 \
00904         Squ_Begin(4) \
00905         Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00906         Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00907         Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
00908         Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
00909         Squ_End(4)
00910 
00911 #define Squ_8 \
00912         Squ_Begin(8) \
00913         Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00914         Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00915         Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00916         Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00917         Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00918         Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00919         Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00920         Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5)  Squ_NonDiag \
00921         Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00922         Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00923         Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
00924         Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
00925         Squ_End(8)
00926 
00927 #define Squ_16 \
00928         Squ_Begin(16) \
00929         Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00930         Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00931         Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00932         Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00933         Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00934         Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00935         Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00936         Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00937         Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00938         Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00939         Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
00940         Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
00941         Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
00942         Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
00943         Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
00944         Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
00945         Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
00946         Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
00947         Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
00948         Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
00949         Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
00950         Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
00951         Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
00952         Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
00953         Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
00954         Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
00955         Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
00956         Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
00957         Squ_End(16)
00958 
00959 #define Bot_2 \
00960         Mul_Begin(2) \
00961         Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
00962         Bot_End(2)
00963 
00964 #define Bot_4 \
00965         Mul_Begin(4) \
00966         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00967         Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2)  \
00968         Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0)  \
00969         Bot_End(4)
00970 
00971 #define Bot_8 \
00972         Mul_Begin(8) \
00973         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00974         Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0)  \
00975         Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
00976         Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00977         Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00978         Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00979         Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
00980         Bot_End(8)
00981 
00982 #define Bot_16 \
00983         Mul_Begin(16) \
00984         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00985         Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00986         Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00987         Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00988         Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00989         Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00990         Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00991         Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00992         Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00993         Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00994         Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00995         Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00996         Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00997         Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00998         Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
00999         Bot_End(16)
01000         
01001 #endif
01002 
01003 #if 0
01004 #define Mul_Begin(n)                            \
01005         Declare2Words(p)                                \
01006         Declare2Words(c)                                \
01007         Declare2Words(d)                                \
01008         MultiplyWords(p, A[0], B[0])    \
01009         AssignWord(c, LowWord(p))               \
01010         AssignWord(d, HighWord(p))
01011 
01012 #define Mul_Acc(i, j)                           \
01013         MultiplyWords(p, A[i], B[j])    \
01014         Acc2WordsBy1(c, LowWord(p))             \
01015         Acc2WordsBy1(d, HighWord(p))
01016 
01017 #define Mul_SaveAcc(k, i, j)            \
01018         R[k] = LowWord(c);                              \
01019         Add2WordsBy1(c, d, HighWord(c)) \
01020         MultiplyWords(p, A[i], B[j])    \
01021         AssignWord(d, HighWord(p))              \
01022         Acc2WordsBy1(c, LowWord(p))
01023 
01024 #define Mul_End(n)                                      \
01025         R[2*n-3] = LowWord(c);                  \
01026         Acc2WordsBy1(d, HighWord(c))    \
01027         MultiplyWords(p, A[n-1], B[n-1])\
01028         Acc2WordsBy2(d, p)                              \
01029         R[2*n-2] = LowWord(d);                  \
01030         R[2*n-1] = HighWord(d);
01031 
01032 #define Bot_SaveAcc(k, i, j)            \
01033         R[k] = LowWord(c);                              \
01034         word e = LowWord(d) + HighWord(c);      \
01035         e += A[i] * B[j];
01036 
01037 #define Bot_Acc(i, j)   \
01038         e += A[i] * B[j];
01039 
01040 #define Bot_End(n)              \
01041         R[n-1] = e;
01042 #else
01043 #define Mul_Begin(n)                            \
01044         Declare2Words(p)                                \
01045         word c; \
01046         Declare2Words(d)                                \
01047         MultiplyWords(p, A[0], B[0])    \
01048         c = LowWord(p);         \
01049         AssignWord(d, HighWord(p))
01050 
01051 #define Mul_Acc(i, j)                           \
01052         MulAcc(c, d, A[i], B[j])
01053 
01054 #define Mul_SaveAcc(k, i, j)            \
01055         R[k] = c;                               \
01056         c = LowWord(d); \
01057         AssignWord(d, HighWord(d))      \
01058         MulAcc(c, d, A[i], B[j])
01059 
01060 #define Mul_End(k, i)                                   \
01061         R[k] = c;                       \
01062         MultiplyWords(p, A[i], B[i])    \
01063         Acc2WordsBy2(p, d)                              \
01064         R[k+1] = LowWord(p);                    \
01065         R[k+2] = HighWord(p);
01066 
01067 #define Bot_SaveAcc(k, i, j)            \
01068         R[k] = c;                               \
01069         c = LowWord(d); \
01070         c += A[i] * B[j];
01071 
01072 #define Bot_Acc(i, j)   \
01073         c += A[i] * B[j];
01074 
01075 #define Bot_End(n)              \
01076         R[n-1] = c;
01077 #endif
01078 
01079 #define Squ_Begin(n)                            \
01080         Declare2Words(p)                                \
01081         word c;                         \
01082         Declare2Words(d)                                \
01083         Declare2Words(e)                                \
01084         MultiplyWords(p, A[0], A[0])    \
01085         R[0] = LowWord(p);                              \
01086         AssignWord(e, HighWord(p))              \
01087         MultiplyWords(p, A[0], A[1])    \
01088         c = LowWord(p);         \
01089         AssignWord(d, HighWord(p))              \
01090         Squ_NonDiag                                             \
01091 
01092 #define Squ_NonDiag                             \
01093         Double3Words(c, d)
01094 
01095 #define Squ_SaveAcc(k, i, j)            \
01096         Acc3WordsBy2(c, d, e)                   \
01097         R[k] = c;                               \
01098         MultiplyWords(p, A[i], A[j])    \
01099         c = LowWord(p);         \
01100         AssignWord(d, HighWord(p))              \
01101 
01102 #define Squ_Acc(i, j)                           \
01103         MulAcc(c, d, A[i], A[j])
01104 
01105 #define Squ_Diag(i)                                     \
01106         Squ_NonDiag                                             \
01107         MulAcc(c, d, A[i], A[i])
01108 
01109 #define Squ_End(n)                                      \
01110         Acc3WordsBy2(c, d, e)                   \
01111         R[2*n-3] = c;                   \
01112         MultiplyWords(p, A[n-1], A[n-1])\
01113         Acc2WordsBy2(p, e)                              \
01114         R[2*n-2] = LowWord(p);                  \
01115         R[2*n-1] = HighWord(p);
01116 
01117 void Baseline_Multiply2(word *R, const word *A, const word *B)
01118 {
01119         Mul_2
01120 }
01121 
01122 void Baseline_Multiply4(word *R, const word *A, const word *B)
01123 {
01124         Mul_4
01125 }
01126 
01127 void Baseline_Multiply8(word *R, const word *A, const word *B)
01128 {
01129         Mul_8
01130 }
01131 
01132 void Baseline_Square2(word *R, const word *A)
01133 {
01134         Squ_2
01135 }
01136 
01137 void Baseline_Square4(word *R, const word *A)
01138 {
01139         Squ_4
01140 }
01141 
01142 void Baseline_Square8(word *R, const word *A)
01143 {
01144         Squ_8
01145 }
01146 
01147 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
01148 {
01149         Bot_2
01150 }
01151 
01152 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
01153 {
01154         Bot_4
01155 }
01156 
01157 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
01158 {
01159         Bot_8
01160 }
01161 
01162 #define Top_Begin(n)                            \
01163         Declare2Words(p)                                \
01164         word c; \
01165         Declare2Words(d)                                \
01166         MultiplyWords(p, A[0], B[n-2]);\
01167         AssignWord(d, HighWord(p));
01168 
01169 #define Top_Acc(i, j)   \
01170         MultiplyWords(p, A[i], B[j]);\
01171         Acc2WordsBy1(d, HighWord(p));
01172 
01173 #define Top_SaveAcc0(i, j)              \
01174         c = LowWord(d); \
01175         AssignWord(d, HighWord(d))      \
01176         MulAcc(c, d, A[i], B[j])
01177 
01178 #define Top_SaveAcc1(i, j)              \
01179         c = L<c; \
01180         Acc2WordsBy1(d, c);     \
01181         c = LowWord(d); \
01182         AssignWord(d, HighWord(d))      \
01183         MulAcc(c, d, A[i], B[j])
01184 
01185 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
01186 {
01187         word T[4];
01188         Baseline_Multiply2(T, A, B);
01189         R[0] = T[2];
01190         R[1] = T[3];
01191 }
01192 
01193 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
01194 {
01195         Top_Begin(4)
01196         Top_Acc(1, 1) Top_Acc(2, 0)  \
01197         Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
01198         Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1)  \
01199         Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
01200         Mul_End(1, 3)
01201 }
01202 
01203 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
01204 {
01205         Top_Begin(8)
01206         Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
01207         Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
01208         Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
01209         Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
01210         Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
01211         Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
01212         Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
01213         Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
01214         Mul_End(5, 7)
01215 }
01216 
01217 #if !CRYPTOPP_INTEGER_SSE2      // save memory by not compiling these functions when SSE2 is available
01218 void Baseline_Multiply16(word *R, const word *A, const word *B)
01219 {
01220         Mul_16
01221 }
01222 
01223 void Baseline_Square16(word *R, const word *A)
01224 {
01225         Squ_16
01226 }
01227 
01228 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
01229 {
01230         Bot_16
01231 }
01232 
01233 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
01234 {
01235         Top_Begin(16)
01236         Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
01237         Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
01238         Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
01239         Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
01240         Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
01241         Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
01242         Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
01243         Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
01244         Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
01245         Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
01246         Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
01247         Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
01248         Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
01249         Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
01250         Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
01251         Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
01252         Mul_End(13, 15)
01253 }
01254 #endif
01255 
01256 // ********************************************************
01257 
01258 #if CRYPTOPP_INTEGER_SSE2
01259 
01260 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
01261 
01262 #undef Mul_Begin
01263 #undef Mul_Acc
01264 #undef Top_Begin
01265 #undef Top_Acc
01266 #undef Squ_Acc
01267 #undef Squ_NonDiag
01268 #undef Squ_Diag
01269 #undef Squ_SaveAcc
01270 #undef Squ_Begin
01271 #undef Mul_SaveAcc
01272 #undef Bot_Acc
01273 #undef Bot_SaveAcc
01274 #undef Bot_End
01275 #undef Squ_End
01276 #undef Mul_End
01277 
01278 #define SSE2_FinalSave(k)                       \
01279         AS2(    psllq           xmm5, 16)       \
01280         AS2(    paddq           xmm4, xmm5)     \
01281         AS2(    movq            QWORD PTR [ecx+8*(k)], xmm4)
01282 
01283 #define SSE2_SaveShift(k)                       \
01284         AS2(    movq            xmm0, xmm6)     \
01285         AS2(    punpckhqdq      xmm6, xmm0)     \
01286         AS2(    movq            xmm1, xmm7)     \
01287         AS2(    punpckhqdq      xmm7, xmm1)     \
01288         AS2(    paddd           xmm6, xmm0)     \
01289         AS2(    pslldq          xmm6, 4)        \
01290         AS2(    paddd           xmm7, xmm1)     \
01291         AS2(    paddd           xmm4, xmm6)     \
01292         AS2(    pslldq          xmm7, 4)        \
01293         AS2(    movq            xmm6, xmm4)     \
01294         AS2(    paddd           xmm5, xmm7)     \
01295         AS2(    movq            xmm7, xmm5)     \
01296         AS2(    movd            DWORD PTR [ecx+8*(k)], xmm4)    \
01297         AS2(    psrlq           xmm6, 16)       \
01298         AS2(    paddq           xmm6, xmm7)     \
01299         AS2(    punpckhqdq      xmm4, xmm0)     \
01300         AS2(    punpckhqdq      xmm5, xmm0)     \
01301         AS2(    movq            QWORD PTR [ecx+8*(k)+2], xmm6)  \
01302         AS2(    psrlq           xmm6, 3*16)     \
01303         AS2(    paddd           xmm4, xmm6)     \
01304 
01305 #define Squ_SSE2_SaveShift(k)                   \
01306         AS2(    movq            xmm0, xmm6)     \
01307         AS2(    punpckhqdq      xmm6, xmm0)     \
01308         AS2(    movq            xmm1, xmm7)     \
01309         AS2(    punpckhqdq      xmm7, xmm1)     \
01310         AS2(    paddd           xmm6, xmm0)     \
01311         AS2(    pslldq          xmm6, 4)        \
01312         AS2(    paddd           xmm7, xmm1)     \
01313         AS2(    paddd           xmm4, xmm6)     \
01314         AS2(    pslldq          xmm7, 4)        \
01315         AS2(    movhlps         xmm6, xmm4)     \
01316         AS2(    movd            DWORD PTR [ecx+8*(k)], xmm4)    \
01317         AS2(    paddd           xmm5, xmm7)     \
01318         AS2(    movhps          QWORD PTR [esp+12], xmm5)\
01319         AS2(    psrlq           xmm4, 16)       \
01320         AS2(    paddq           xmm4, xmm5)     \
01321         AS2(    movq            QWORD PTR [ecx+8*(k)+2], xmm4)  \
01322         AS2(    psrlq           xmm4, 3*16)     \
01323         AS2(    paddd           xmm4, xmm6)     \
01324         AS2(    movq            QWORD PTR [esp+4], xmm4)\
01325 
01326 #define SSE2_FirstMultiply(i)                           \
01327         AS2(    movdqa          xmm7, [esi+(i)*16])\
01328         AS2(    movdqa          xmm5, [edi-(i)*16])\
01329         AS2(    pmuludq         xmm5, xmm7)             \
01330         AS2(    movdqa          xmm4, [ebx])\
01331         AS2(    movdqa          xmm6, xmm4)             \
01332         AS2(    pand            xmm4, xmm5)             \
01333         AS2(    psrld           xmm5, 16)               \
01334         AS2(    pmuludq         xmm7, [edx-(i)*16])\
01335         AS2(    pand            xmm6, xmm7)             \
01336         AS2(    psrld           xmm7, 16)
01337 
01338 #define Squ_Begin(n)                                                    \
01339         SquPrologue                                                                     \
01340         AS2(    mov             esi, esp)\
01341         AS2(    and             esp, 0xfffffff0)\
01342         AS2(    lea             edi, [esp-32*n])\
01343         AS2(    sub             esp, 32*n+16)\
01344         AS1(    push    esi)\
01345         AS2(    mov             esi, edi)                                       \
01346         AS2(    xor             edx, edx)                                       \
01347         ASL(1)                                                                          \
01348         ASS(    pshufd  xmm0, [eax+edx], 3,1,2,0)       \
01349         ASS(    pshufd  xmm1, [eax+edx], 2,0,3,1)       \
01350         AS2(    movdqa  [edi+2*edx], xmm0)              \
01351         AS2(    psrlq   xmm0, 32)                                       \
01352         AS2(    movdqa  [edi+2*edx+16], xmm0)   \
01353         AS2(    movdqa  [edi+16*n+2*edx], xmm1)         \
01354         AS2(    psrlq   xmm1, 32)                                       \
01355         AS2(    movdqa  [edi+16*n+2*edx+16], xmm1)      \
01356         AS2(    add             edx, 16)                                        \
01357         AS2(    cmp             edx, 8*(n))                                     \
01358         ASJ(    jne,    1, b)                                           \
01359         AS2(    lea             edx, [edi+16*n])\
01360         SSE2_FirstMultiply(0)                                                   \
01361 
01362 #define Squ_Acc(i)                                                              \
01363         ASL(LSqu##i)                                                            \
01364         AS2(    movdqa          xmm1, [esi+(i)*16])     \
01365         AS2(    movdqa          xmm0, [edi-(i)*16])     \
01366         AS2(    movdqa          xmm2, [ebx])    \
01367         AS2(    pmuludq         xmm0, xmm1)                             \
01368         AS2(    pmuludq         xmm1, [edx-(i)*16])     \
01369         AS2(    movdqa          xmm3, xmm2)                     \
01370         AS2(    pand            xmm2, xmm0)                     \
01371         AS2(    psrld           xmm0, 16)                       \
01372         AS2(    paddd           xmm4, xmm2)                     \
01373         AS2(    paddd           xmm5, xmm0)                     \
01374         AS2(    pand            xmm3, xmm1)                     \
01375         AS2(    psrld           xmm1, 16)                       \
01376         AS2(    paddd           xmm6, xmm3)                     \
01377         AS2(    paddd           xmm7, xmm1)             \
01378 
01379 #define Squ_Acc1(i)             
01380 #define Squ_Acc2(i)             ASC(call, LSqu##i)
01381 #define Squ_Acc3(i)             Squ_Acc2(i)
01382 #define Squ_Acc4(i)             Squ_Acc2(i)
01383 #define Squ_Acc5(i)             Squ_Acc2(i)
01384 #define Squ_Acc6(i)             Squ_Acc2(i)
01385 #define Squ_Acc7(i)             Squ_Acc2(i)
01386 #define Squ_Acc8(i)             Squ_Acc2(i)
01387 
01388 #define SSE2_End(E, n)                                  \
01389         SSE2_SaveShift(2*(n)-3)                 \
01390         AS2(    movdqa          xmm7, [esi+16]) \
01391         AS2(    movdqa          xmm0, [edi])    \
01392         AS2(    pmuludq         xmm0, xmm7)                             \
01393         AS2(    movdqa          xmm2, [ebx])            \
01394         AS2(    pmuludq         xmm7, [edx])    \
01395         AS2(    movdqa          xmm6, xmm2)                             \
01396         AS2(    pand            xmm2, xmm0)                             \
01397         AS2(    psrld           xmm0, 16)                               \
01398         AS2(    paddd           xmm4, xmm2)                             \
01399         AS2(    paddd           xmm5, xmm0)                             \
01400         AS2(    pand            xmm6, xmm7)                             \
01401         AS2(    psrld           xmm7, 16)       \
01402         SSE2_SaveShift(2*(n)-2)                 \
01403         SSE2_FinalSave(2*(n)-1)                 \
01404         AS1(    pop             esp)\
01405         E
01406 
01407 #define Squ_End(n)              SSE2_End(SquEpilogue, n)
01408 #define Mul_End(n)              SSE2_End(MulEpilogue, n)
01409 #define Top_End(n)              SSE2_End(TopEpilogue, n)
01410 
01411 #define Squ_Column1(k, i)       \
01412         Squ_SSE2_SaveShift(k)                                   \
01413         AS2(    add                     esi, 16)        \
01414         SSE2_FirstMultiply(1)\
01415         Squ_Acc##i(i)   \
01416         AS2(    paddd           xmm4, xmm4)             \
01417         AS2(    paddd           xmm5, xmm5)             \
01418         AS2(    movdqa          xmm3, [esi])                            \
01419         AS2(    movq            xmm1, QWORD PTR [esi+8])        \
01420         AS2(    pmuludq         xmm1, xmm3)             \
01421         AS2(    pmuludq         xmm3, xmm3)             \
01422         AS2(    movdqa          xmm0, [ebx])\
01423         AS2(    movdqa          xmm2, xmm0)             \
01424         AS2(    pand            xmm0, xmm1)             \
01425         AS2(    psrld           xmm1, 16)               \
01426         AS2(    paddd           xmm6, xmm0)             \
01427         AS2(    paddd           xmm7, xmm1)             \
01428         AS2(    pand            xmm2, xmm3)             \
01429         AS2(    psrld           xmm3, 16)               \
01430         AS2(    paddd           xmm6, xmm6)             \
01431         AS2(    paddd           xmm7, xmm7)             \
01432         AS2(    paddd           xmm4, xmm2)             \
01433         AS2(    paddd           xmm5, xmm3)             \
01434         AS2(    movq            xmm0, QWORD PTR [esp+4])\
01435         AS2(    movq            xmm1, QWORD PTR [esp+12])\
01436         AS2(    paddd           xmm4, xmm0)\
01437         AS2(    paddd           xmm5, xmm1)\
01438 
01439 #define Squ_Column0(k, i)       \
01440         Squ_SSE2_SaveShift(k)                                   \
01441         AS2(    add                     edi, 16)        \
01442         AS2(    add                     edx, 16)        \
01443         SSE2_FirstMultiply(1)\
01444         Squ_Acc##i(i)   \
01445         AS2(    paddd           xmm6, xmm6)             \
01446         AS2(    paddd           xmm7, xmm7)             \
01447         AS2(    paddd           xmm4, xmm4)             \
01448         AS2(    paddd           xmm5, xmm5)             \
01449         AS2(    movq            xmm0, QWORD PTR [esp+4])\
01450         AS2(    movq            xmm1, QWORD PTR [esp+12])\
01451         AS2(    paddd           xmm4, xmm0)\
01452         AS2(    paddd           xmm5, xmm1)\
01453 
01454 #define SSE2_MulAdd45                                           \
01455         AS2(    movdqa          xmm7, [esi])    \
01456         AS2(    movdqa          xmm0, [edi])    \
01457         AS2(    pmuludq         xmm0, xmm7)                             \
01458         AS2(    movdqa          xmm2, [ebx])            \
01459         AS2(    pmuludq         xmm7, [edx])    \
01460         AS2(    movdqa          xmm6, xmm2)                             \
01461         AS2(    pand            xmm2, xmm0)                             \
01462         AS2(    psrld           xmm0, 16)                               \
01463         AS2(    paddd           xmm4, xmm2)                             \
01464         AS2(    paddd           xmm5, xmm0)                             \
01465         AS2(    pand            xmm6, xmm7)                             \
01466         AS2(    psrld           xmm7, 16)
01467 
01468 #define Mul_Begin(n)                                                    \
01469         MulPrologue                                                                     \
01470         AS2(    mov             esi, esp)\
01471         AS2(    and             esp, 0xfffffff0)\
01472         AS2(    sub             esp, 48*n+16)\
01473         AS1(    push    esi)\
01474         AS2(    xor             edx, edx)                                       \
01475         ASL(1)                                                                          \
01476         ASS(    pshufd  xmm0, [eax+edx], 3,1,2,0)       \
01477         ASS(    pshufd  xmm1, [eax+edx], 2,0,3,1)       \
01478         ASS(    pshufd  xmm2, [edi+edx], 3,1,2,0)       \
01479         AS2(    movdqa  [esp+20+2*edx], xmm0)           \
01480         AS2(    psrlq   xmm0, 32)                                       \
01481         AS2(    movdqa  [esp+20+2*edx+16], xmm0)        \
01482         AS2(    movdqa  [esp+20+16*n+2*edx], xmm1)              \
01483         AS2(    psrlq   xmm1, 32)                                       \
01484         AS2(    movdqa  [esp+20+16*n+2*edx+16], xmm1)   \
01485         AS2(    movdqa  [esp+20+32*n+2*edx], xmm2)              \
01486         AS2(    psrlq   xmm2, 32)                                       \
01487         AS2(    movdqa  [esp+20+32*n+2*edx+16], xmm2)   \
01488         AS2(    add             edx, 16)                                        \
01489         AS2(    cmp             edx, 8*(n))                                     \
01490         ASJ(    jne,    1, b)                                           \
01491         AS2(    lea             edi, [esp+20])\
01492         AS2(    lea             edx, [esp+20+16*n])\
01493         AS2(    lea             esi, [esp+20+32*n])\
01494         SSE2_FirstMultiply(0)                                                   \
01495 
01496 #define Mul_Acc(i)                                                              \
01497         ASL(LMul##i)                                                                            \
01498         AS2(    movdqa          xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16])   \
01499         AS2(    movdqa          xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16])   \
01500         AS2(    movdqa          xmm2, [ebx])    \
01501         AS2(    pmuludq         xmm0, xmm1)                             \
01502         AS2(    pmuludq         xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16])   \
01503         AS2(    movdqa          xmm3, xmm2)                     \
01504         AS2(    pand            xmm2, xmm0)                     \
01505         AS2(    psrld           xmm0, 16)                       \
01506         AS2(    paddd           xmm4, xmm2)                     \
01507         AS2(    paddd           xmm5, xmm0)                     \
01508         AS2(    pand            xmm3, xmm1)                     \
01509         AS2(    psrld           xmm1, 16)                       \
01510         AS2(    paddd           xmm6, xmm3)                     \
01511         AS2(    paddd           xmm7, xmm1)             \
01512 
01513 #define Mul_Acc1(i)             
01514 #define Mul_Acc2(i)             ASC(call, LMul##i)
01515 #define Mul_Acc3(i)             Mul_Acc2(i)
01516 #define Mul_Acc4(i)             Mul_Acc2(i)
01517 #define Mul_Acc5(i)             Mul_Acc2(i)
01518 #define Mul_Acc6(i)             Mul_Acc2(i)
01519 #define Mul_Acc7(i)             Mul_Acc2(i)
01520 #define Mul_Acc8(i)             Mul_Acc2(i)
01521 #define Mul_Acc9(i)             Mul_Acc2(i)
01522 #define Mul_Acc10(i)    Mul_Acc2(i)
01523 #define Mul_Acc11(i)    Mul_Acc2(i)
01524 #define Mul_Acc12(i)    Mul_Acc2(i)
01525 #define Mul_Acc13(i)    Mul_Acc2(i)
01526 #define Mul_Acc14(i)    Mul_Acc2(i)
01527 #define Mul_Acc15(i)    Mul_Acc2(i)
01528 #define Mul_Acc16(i)    Mul_Acc2(i)
01529 
01530 #define Mul_Column1(k, i)       \
01531         SSE2_SaveShift(k)                                       \
01532         AS2(    add                     esi, 16)        \
01533         SSE2_MulAdd45\
01534         Mul_Acc##i(i)   \
01535 
01536 #define Mul_Column0(k, i)       \
01537         SSE2_SaveShift(k)                                       \
01538         AS2(    add                     edi, 16)        \
01539         AS2(    add                     edx, 16)        \
01540         SSE2_MulAdd45\
01541         Mul_Acc##i(i)   \
01542 
01543 #define Bot_Acc(i)                                                      \
01544         AS2(    movdqa          xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16])   \
01545         AS2(    movdqa          xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16])   \
01546         AS2(    pmuludq         xmm0, xmm1)                             \
01547         AS2(    pmuludq         xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16])           \
01548         AS2(    paddq           xmm4, xmm0)                             \
01549         AS2(    paddd           xmm6, xmm1)
01550 
01551 #define Bot_SaveAcc(k)                                  \
01552         SSE2_SaveShift(k)                                                       \
01553         AS2(    add                     edi, 16)        \
01554         AS2(    add                     edx, 16)        \
01555         AS2(    movdqa          xmm6, [esi])    \
01556         AS2(    movdqa          xmm0, [edi])    \
01557         AS2(    pmuludq         xmm0, xmm6)                             \
01558         AS2(    paddq           xmm4, xmm0)                             \
01559         AS2(    psllq           xmm5, 16)                               \
01560         AS2(    paddq           xmm4, xmm5)                             \
01561         AS2(    pmuludq         xmm6, [edx])
01562 
01563 #define Bot_End(n)                                                      \
01564         AS2(    movhlps         xmm7, xmm6)                     \
01565         AS2(    paddd           xmm6, xmm7)                     \
01566         AS2(    psllq           xmm6, 32)                       \
01567         AS2(    paddd           xmm4, xmm6)                     \
01568         AS2(    movq            QWORD PTR [ecx+8*((n)-1)], xmm4)        \
01569         AS1(    pop             esp)\
01570         MulEpilogue
01571 
01572 #define Top_Begin(n)                                                    \
01573         TopPrologue                                                                     \
01574         AS2(    mov             edx, esp)\
01575         AS2(    and             esp, 0xfffffff0)\
01576         AS2(    sub             esp, 48*n+16)\
01577         AS1(    push    edx)\
01578         AS2(    xor             edx, edx)                                       \
01579         ASL(1)                                                                          \
01580         ASS(    pshufd  xmm0, [eax+edx], 3,1,2,0)       \
01581         ASS(    pshufd  xmm1, [eax+edx], 2,0,3,1)       \
01582         ASS(    pshufd  xmm2, [edi+edx], 3,1,2,0)       \
01583         AS2(    movdqa  [esp+20+2*edx], xmm0)           \
01584         AS2(    psrlq   xmm0, 32)                                       \
01585         AS2(    movdqa  [esp+20+2*edx+16], xmm0)        \
01586         AS2(    movdqa  [esp+20+16*n+2*edx], xmm1)              \
01587         AS2(    psrlq   xmm1, 32)                                       \
01588         AS2(    movdqa  [esp+20+16*n+2*edx+16], xmm1)   \
01589         AS2(    movdqa  [esp+20+32*n+2*edx], xmm2)              \
01590         AS2(    psrlq   xmm2, 32)                                       \
01591         AS2(    movdqa  [esp+20+32*n+2*edx+16], xmm2)   \
01592         AS2(    add             edx, 16)                                        \
01593         AS2(    cmp             edx, 8*(n))                                     \
01594         ASJ(    jne,    1, b)                                           \
01595         AS2(    mov             eax, esi)                                       \
01596         AS2(    lea             edi, [esp+20+00*n+16*(n/2-1)])\
01597         AS2(    lea             edx, [esp+20+16*n+16*(n/2-1)])\
01598         AS2(    lea             esi, [esp+20+32*n+16*(n/2-1)])\
01599         AS2(    pxor    xmm4, xmm4)\
01600         AS2(    pxor    xmm5, xmm5)
01601 
01602 #define Top_Acc(i)                                                      \
01603         AS2(    movq            xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8])       \
01604         AS2(    pmuludq         xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16])   \
01605         AS2(    psrlq           xmm0, 48)                               \
01606         AS2(    paddd           xmm5, xmm0)\
01607 
01608 #define Top_Column0(i)  \
01609         AS2(    psllq           xmm5, 32)                               \
01610         AS2(    add                     edi, 16)        \
01611         AS2(    add                     edx, 16)        \
01612         SSE2_MulAdd45\
01613         Mul_Acc##i(i)   \
01614 
01615 #define Top_Column1(i)  \
01616         SSE2_SaveShift(0)                                       \
01617         AS2(    add                     esi, 16)        \
01618         SSE2_MulAdd45\
01619         Mul_Acc##i(i)   \
01620         AS2(    shr                     eax, 16)        \
01621         AS2(    movd            xmm0, eax)\
01622         AS2(    movd            xmm1, [ecx+4])\
01623         AS2(    psrld           xmm1, 16)\
01624         AS2(    pcmpgtd         xmm1, xmm0)\
01625         AS2(    psrld           xmm1, 31)\
01626         AS2(    paddd           xmm4, xmm1)\
01627 
01628 void SSE2_Square4(word *C, const word *A)
01629 {
01630         Squ_Begin(2)
01631         Squ_Column0(0, 1)
01632         Squ_End(2)
01633 }
01634 
01635 void SSE2_Square8(word *C, const word *A)
01636 {
01637         Squ_Begin(4)
01638 #ifndef __GNUC__
01639         ASJ(    jmp,    0, f)
01640         Squ_Acc(2)
01641         AS1(    ret) ASL(0)
01642 #endif
01643         Squ_Column0(0, 1)
01644         Squ_Column1(1, 1)
01645         Squ_Column0(2, 2)
01646         Squ_Column1(3, 1)
01647         Squ_Column0(4, 1)
01648         Squ_End(4)
01649 }
01650 
01651 void SSE2_Square16(word *C, const word *A)
01652 {
01653         Squ_Begin(8)
01654 #ifndef __GNUC__
01655         ASJ(    jmp,    0, f)
01656         Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01657         AS1(    ret) ASL(0)
01658 #endif
01659         Squ_Column0(0, 1)
01660         Squ_Column1(1, 1)
01661         Squ_Column0(2, 2)
01662         Squ_Column1(3, 2)
01663         Squ_Column0(4, 3)
01664         Squ_Column1(5, 3)
01665         Squ_Column0(6, 4)
01666         Squ_Column1(7, 3)
01667         Squ_Column0(8, 3)
01668         Squ_Column1(9, 2)
01669         Squ_Column0(10, 2)
01670         Squ_Column1(11, 1)
01671         Squ_Column0(12, 1)
01672         Squ_End(8)
01673 }
01674 
01675 void SSE2_Square32(word *C, const word *A)
01676 {
01677         Squ_Begin(16)
01678         ASJ(    jmp,    0, f)
01679         Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01680         AS1(    ret) ASL(0)
01681         Squ_Column0(0, 1)
01682         Squ_Column1(1, 1)
01683         Squ_Column0(2, 2)
01684         Squ_Column1(3, 2)
01685         Squ_Column0(4, 3)
01686         Squ_Column1(5, 3)
01687         Squ_Column0(6, 4)
01688         Squ_Column1(7, 4)
01689         Squ_Column0(8, 5)
01690         Squ_Column1(9, 5)
01691         Squ_Column0(10, 6)
01692         Squ_Column1(11, 6)
01693         Squ_Column0(12, 7)
01694         Squ_Column1(13, 7)
01695         Squ_Column0(14, 8)
01696         Squ_Column1(15, 7)
01697         Squ_Column0(16, 7)
01698         Squ_Column1(17, 6)
01699         Squ_Column0(18, 6)
01700         Squ_Column1(19, 5)
01701         Squ_Column0(20, 5)
01702         Squ_Column1(21, 4)
01703         Squ_Column0(22, 4)
01704         Squ_Column1(23, 3)
01705         Squ_Column0(24, 3)
01706         Squ_Column1(25, 2)
01707         Squ_Column0(26, 2)
01708         Squ_Column1(27, 1)
01709         Squ_Column0(28, 1)
01710         Squ_End(16)
01711 }
01712 
01713 void SSE2_Multiply4(word *C, const word *A, const word *B)
01714 {
01715         Mul_Begin(2)
01716 #ifndef __GNUC__
01717         ASJ(    jmp,    0, f)
01718         Mul_Acc(2)
01719         AS1(    ret) ASL(0)
01720 #endif
01721         Mul_Column0(0, 2)
01722         Mul_End(2)
01723 }
01724 
01725 void SSE2_Multiply8(word *C, const word *A, const word *B)
01726 {
01727         Mul_Begin(4)
01728 #ifndef __GNUC__
01729         ASJ(    jmp,    0, f)
01730         Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01731         AS1(    ret) ASL(0)
01732 #endif
01733         Mul_Column0(0, 2)
01734         Mul_Column1(1, 3)
01735         Mul_Column0(2, 4)
01736         Mul_Column1(3, 3)
01737         Mul_Column0(4, 2)
01738         Mul_End(4)
01739 }
01740 
01741 void SSE2_Multiply16(word *C, const word *A, const word *B)
01742 {
01743         Mul_Begin(8)
01744 #ifndef __GNUC__
01745         ASJ(    jmp,    0, f)
01746         Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01747         AS1(    ret) ASL(0)
01748 #endif
01749         Mul_Column0(0, 2)
01750         Mul_Column1(1, 3)
01751         Mul_Column0(2, 4)
01752         Mul_Column1(3, 5)
01753         Mul_Column0(4, 6)
01754         Mul_Column1(5, 7)
01755         Mul_Column0(6, 8)
01756         Mul_Column1(7, 7)
01757         Mul_Column0(8, 6)
01758         Mul_Column1(9, 5)
01759         Mul_Column0(10, 4)
01760         Mul_Column1(11, 3)
01761         Mul_Column0(12, 2)
01762         Mul_End(8)
01763 }
01764 
01765 void SSE2_Multiply32(word *C, const word *A, const word *B)
01766 {
01767         Mul_Begin(16)
01768         ASJ(    jmp,    0, f)
01769         Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01770         AS1(    ret) ASL(0)
01771         Mul_Column0(0, 2)
01772         Mul_Column1(1, 3)
01773         Mul_Column0(2, 4)
01774         Mul_Column1(3, 5)
01775         Mul_Column0(4, 6)
01776         Mul_Column1(5, 7)
01777         Mul_Column0(6, 8)
01778         Mul_Column1(7, 9)
01779         Mul_Column0(8, 10)
01780         Mul_Column1(9, 11)
01781         Mul_Column0(10, 12)
01782         Mul_Column1(11, 13)
01783         Mul_Column0(12, 14)
01784         Mul_Column1(13, 15)
01785         Mul_Column0(14, 16)
01786         Mul_Column1(15, 15)
01787         Mul_Column0(16, 14)
01788         Mul_Column1(17, 13)
01789         Mul_Column0(18, 12)
01790         Mul_Column1(19, 11)
01791         Mul_Column0(20, 10)
01792         Mul_Column1(21, 9)
01793         Mul_Column0(22, 8)
01794         Mul_Column1(23, 7)
01795         Mul_Column0(24, 6)
01796         Mul_Column1(25, 5)
01797         Mul_Column0(26, 4)
01798         Mul_Column1(27, 3)
01799         Mul_Column0(28, 2)
01800         Mul_End(16)
01801 }
01802 
01803 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
01804 {
01805         Mul_Begin(2)
01806         Bot_SaveAcc(0) Bot_Acc(2)
01807         Bot_End(2)
01808 }
01809 
01810 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
01811 {
01812         Mul_Begin(4)
01813 #ifndef __GNUC__
01814         ASJ(    jmp,    0, f)
01815         Mul_Acc(3) Mul_Acc(2)
01816         AS1(    ret) ASL(0)
01817 #endif
01818         Mul_Column0(0, 2)
01819         Mul_Column1(1, 3)
01820         Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01821         Bot_End(4)
01822 }
01823 
01824 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
01825 {
01826         Mul_Begin(8)
01827 #ifndef __GNUC__
01828         ASJ(    jmp,    0, f)
01829         Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01830         AS1(    ret) ASL(0)
01831 #endif
01832         Mul_Column0(0, 2)
01833         Mul_Column1(1, 3)
01834         Mul_Column0(2, 4)
01835         Mul_Column1(3, 5)
01836         Mul_Column0(4, 6)
01837         Mul_Column1(5, 7)
01838         Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01839         Bot_End(8)
01840 }
01841 
01842 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
01843 {
01844         Mul_Begin(16)
01845 #ifndef __GNUC__
01846         ASJ(    jmp,    0, f)
01847         Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01848         AS1(    ret) ASL(0)
01849 #endif
01850         Mul_Column0(0, 2)
01851         Mul_Column1(1, 3)
01852         Mul_Column0(2, 4)
01853         Mul_Column1(3, 5)
01854         Mul_Column0(4, 6)
01855         Mul_Column1(5, 7)
01856         Mul_Column0(6, 8)
01857         Mul_Column1(7, 9)
01858         Mul_Column0(8, 10)
01859         Mul_Column1(9, 11)
01860         Mul_Column0(10, 12)
01861         Mul_Column1(11, 13)
01862         Mul_Column0(12, 14)
01863         Mul_Column1(13, 15)
01864         Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01865         Bot_End(16)
01866 }
01867 
01868 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
01869 {
01870         Top_Begin(4)
01871         Top_Acc(3) Top_Acc(2) Top_Acc(1)
01872 #ifndef __GNUC__
01873         ASJ(    jmp,    0, f)
01874         Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01875         AS1(    ret) ASL(0)
01876 #endif
01877         Top_Column0(4)
01878         Top_Column1(3)
01879         Mul_Column0(0, 2)
01880         Top_End(2)
01881 }
01882 
01883 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
01884 {
01885         Top_Begin(8)
01886         Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01887 #ifndef __GNUC__
01888         ASJ(    jmp,    0, f)
01889         Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01890         AS1(    ret) ASL(0)
01891 #endif
01892         Top_Column0(8)
01893         Top_Column1(7)
01894         Mul_Column0(0, 6)
01895         Mul_Column1(1, 5)
01896         Mul_Column0(2, 4)
01897         Mul_Column1(3, 3)
01898         Mul_Column0(4, 2)
01899         Top_End(4)
01900 }
01901 
01902 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
01903 {
01904         Top_Begin(16)
01905         Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01906 #ifndef __GNUC__
01907         ASJ(    jmp,    0, f)
01908         Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01909         AS1(    ret) ASL(0)
01910 #endif
01911         Top_Column0(16)
01912         Top_Column1(15)
01913         Mul_Column0(0, 14)
01914         Mul_Column1(1, 13)
01915         Mul_Column0(2, 12)
01916         Mul_Column1(3, 11)
01917         Mul_Column0(4, 10)
01918         Mul_Column1(5, 9)
01919         Mul_Column0(6, 8)
01920         Mul_Column1(7, 7)
01921         Mul_Column0(8, 6)
01922         Mul_Column1(9, 5)
01923         Mul_Column0(10, 4)
01924         Mul_Column1(11, 3)
01925         Mul_Column0(12, 2)
01926         Top_End(8)
01927 }
01928 
01929 #endif  // #if CRYPTOPP_INTEGER_SSE2
01930 
01931 // ********************************************************
01932 
01933 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
01934 typedef void (* PMul)(word *C, const word *A, const word *B);
01935 typedef void (* PSqu)(word *C, const word *A);
01936 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
01937 
01938 #if CRYPTOPP_INTEGER_SSE2
01939 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
01940 static size_t s_recursionLimit = 8;
01941 #else
01942 static const size_t s_recursionLimit = 16;
01943 #endif
01944 
01945 static PMul s_pMul[9], s_pBot[9];
01946 static PSqu s_pSqu[9];
01947 static PMulTop s_pTop[9];
01948 
01949 static void SetFunctionPointers()
01950 {
01951         s_pMul[0] = &Baseline_Multiply2;
01952         s_pBot[0] = &Baseline_MultiplyBottom2;
01953         s_pSqu[0] = &Baseline_Square2;
01954         s_pTop[0] = &Baseline_MultiplyTop2;
01955         s_pTop[1] = &Baseline_MultiplyTop4;
01956 
01957 #if CRYPTOPP_INTEGER_SSE2
01958         if (HasSSE2())
01959         {
01960 #if _MSC_VER != 1200 || defined(NDEBUG)
01961                 if (IsP4())
01962                 {
01963                         s_pAdd = &SSE2_Add;
01964                         s_pSub = &SSE2_Sub;
01965                 }
01966 #endif
01967 
01968                 s_recursionLimit = 32;
01969 
01970                 s_pMul[1] = &SSE2_Multiply4;
01971                 s_pMul[2] = &SSE2_Multiply8;
01972                 s_pMul[4] = &SSE2_Multiply16;
01973                 s_pMul[8] = &SSE2_Multiply32;
01974 
01975                 s_pBot[1] = &SSE2_MultiplyBottom4;
01976                 s_pBot[2] = &SSE2_MultiplyBottom8;
01977                 s_pBot[4] = &SSE2_MultiplyBottom16;
01978                 s_pBot[8] = &SSE2_MultiplyBottom32;
01979 
01980                 s_pSqu[1] = &SSE2_Square4;
01981                 s_pSqu[2] = &SSE2_Square8;
01982                 s_pSqu[4] = &SSE2_Square16;
01983                 s_pSqu[8] = &SSE2_Square32;
01984 
01985                 s_pTop[2] = &SSE2_MultiplyTop8;
01986                 s_pTop[4] = &SSE2_MultiplyTop16;
01987                 s_pTop[8] = &SSE2_MultiplyTop32;
01988         }
01989         else
01990 #endif
01991         {
01992                 s_pMul[1] = &Baseline_Multiply4;
01993                 s_pMul[2] = &Baseline_Multiply8;
01994 
01995                 s_pBot[1] = &Baseline_MultiplyBottom4;
01996                 s_pBot[2] = &Baseline_MultiplyBottom8;
01997 
01998                 s_pSqu[1] = &Baseline_Square4;
01999                 s_pSqu[2] = &Baseline_Square8;
02000 
02001                 s_pTop[2] = &Baseline_MultiplyTop8;
02002 
02003 #if     !CRYPTOPP_INTEGER_SSE2
02004                 s_pMul[4] = &Baseline_Multiply16;
02005                 s_pBot[4] = &Baseline_MultiplyBottom16;
02006                 s_pSqu[4] = &Baseline_Square16;
02007                 s_pTop[4] = &Baseline_MultiplyTop16;
02008 #endif
02009         }
02010 }
02011 
02012 inline int Add(word *C, const word *A, const word *B, size_t N)
02013 {
02014 #if CRYPTOPP_INTEGER_SSE2
02015         return s_pAdd(N, C, A, B);
02016 #else
02017         return Baseline_Add(N, C, A, B);
02018 #endif
02019 }
02020 
02021 inline int Subtract(word *C, const word *A, const word *B, size_t N)
02022 {
02023 #if CRYPTOPP_INTEGER_SSE2
02024         return s_pSub(N, C, A, B);
02025 #else
02026         return Baseline_Sub(N, C, A, B);
02027 #endif
02028 }
02029 
02030 // ********************************************************
02031 
02032 
02033 #define A0              A
02034 #define A1              (A+N2)
02035 #define B0              B
02036 #define B1              (B+N2)
02037 
02038 #define T0              T
02039 #define T1              (T+N2)
02040 #define T2              (T+N)
02041 #define T3              (T+N+N2)
02042 
02043 #define R0              R
02044 #define R1              (R+N2)
02045 #define R2              (R+N)
02046 #define R3              (R+N+N2)
02047 
02048 // R[2*N] - result = A*B
02049 // T[2*N] - temporary work space
02050 // A[N] --- multiplier
02051 // B[N] --- multiplicant
02052 
02053 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02054 {
02055         assert(N>=2 && N%2==0);
02056 
02057         if (N <= s_recursionLimit)
02058                 s_pMul[N/4](R, A, B);
02059         else
02060         {
02061                 const size_t N2 = N/2;
02062 
02063                 size_t AN2 = Compare(A0, A1, N2) > 0 ?  0 : N2;
02064                 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02065 
02066                 size_t BN2 = Compare(B0, B1, N2) > 0 ?  0 : N2;
02067                 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02068 
02069                 RecursiveMultiply(R2, T2, A1, B1, N2);
02070                 RecursiveMultiply(T0, T2, R0, R1, N2);
02071                 RecursiveMultiply(R0, T2, A0, B0, N2);
02072 
02073                 // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
02074 
02075                 int c2 = Add(R2, R2, R1, N2);
02076                 int c3 = c2;
02077                 c2 += Add(R1, R2, R0, N2);
02078                 c3 += Add(R2, R2, R3, N2);
02079 
02080                 if (AN2 == BN2)
02081                         c3 -= Subtract(R1, R1, T0, N);
02082                 else
02083                         c3 += Add(R1, R1, T0, N);
02084 
02085                 c3 += Increment(R2, N2, c2);
02086                 assert (c3 >= 0 && c3 <= 2);
02087                 Increment(R3, N2, c3);
02088         }
02089 }
02090 
02091 // R[2*N] - result = A*A
02092 // T[2*N] - temporary work space
02093 // A[N] --- number to be squared
02094 
02095 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02096 {
02097         assert(N && N%2==0);
02098 
02099         if (N <= s_recursionLimit)
02100                 s_pSqu[N/4](R, A);
02101         else
02102         {
02103                 const size_t N2 = N/2;
02104 
02105                 RecursiveSquare(R0, T2, A0, N2);
02106                 RecursiveSquare(R2, T2, A1, N2);
02107                 RecursiveMultiply(T0, T2, A0, A1, N2);
02108 
02109                 int carry = Add(R1, R1, T0, N);
02110                 carry += Add(R1, R1, T0, N);
02111                 Increment(R3, N2, carry);
02112         }
02113 }
02114 
02115 // R[N] - bottom half of A*B
02116 // T[3*N/2] - temporary work space
02117 // A[N] - multiplier
02118 // B[N] - multiplicant
02119 
02120 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02121 {
02122         assert(N>=2 && N%2==0);
02123 
02124         if (N <= s_recursionLimit)
02125                 s_pBot[N/4](R, A, B);
02126         else
02127         {
02128                 const size_t N2 = N/2;
02129 
02130                 RecursiveMultiply(R, T, A0, B0, N2);
02131                 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02132                 Add(R1, R1, T0, N2);
02133                 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02134                 Add(R1, R1, T0, N2);
02135         }
02136 }
02137 
02138 // R[N] --- upper half of A*B
02139 // T[2*N] - temporary work space
02140 // L[N] --- lower half of A*B
02141 // A[N] --- multiplier
02142 // B[N] --- multiplicant
02143 
02144 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02145 {
02146         assert(N>=2 && N%2==0);
02147 
02148         if (N <= s_recursionLimit)
02149                 s_pTop[N/4](R, A, B, L[N-1]);
02150         else
02151         {
02152                 const size_t N2 = N/2;
02153 
02154                 size_t AN2 = Compare(A0, A1, N2) > 0 ?  0 : N2;
02155                 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02156 
02157                 size_t BN2 = Compare(B0, B1, N2) > 0 ?  0 : N2;
02158                 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02159 
02160                 RecursiveMultiply(T0, T2, R0, R1, N2);
02161                 RecursiveMultiply(R0, T2, A1, B1, N2);
02162 
02163                 // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
02164 
02165                 int t, c3;
02166                 int c2 = Subtract(T2, L+N2, L, N2);
02167 
02168                 if (AN2 == BN2)
02169                 {
02170                         c2 -= Add(T2, T2, T0, N2);
02171                         t = (Compare(T2, R0, N2) == -1);
02172                         c3 = t - Subtract(T2, T2, T1, N2);
02173                 }
02174                 else
02175                 {
02176                         c2 += Subtract(T2, T2, T0, N2);
02177                         t = (Compare(T2, R0, N2) == -1);
02178                         c3 = t + Add(T2, T2, T1, N2);
02179                 }
02180 
02181                 c2 += t;
02182                 if (c2 >= 0)
02183                         c3 += Increment(T2, N2, c2);
02184                 else
02185                         c3 -= Decrement(T2, N2, -c2);
02186                 c3 += Add(R0, T2, R1, N2);
02187 
02188                 assert (c3 >= 0 && c3 <= 2);
02189                 Increment(R1, N2, c3);
02190         }
02191 }
02192 
02193 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02194 {
02195         RecursiveMultiply(R, T, A, B, N);
02196 }
02197 
02198 inline void Square(word *R, word *T, const word *A, size_t N)
02199 {
02200         RecursiveSquare(R, T, A, N);
02201 }
02202 
02203 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02204 {
02205         RecursiveMultiplyBottom(R, T, A, B, N);
02206 }
02207 
02208 // R[NA+NB] - result = A*B
02209 // T[NA+NB] - temporary work space
02210 // A[NA] ---- multiplier
02211 // B[NB] ---- multiplicant
02212 
02213 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02214 {
02215         if (NA == NB)
02216         {
02217                 if (A == B)
02218                         Square(R, T, A, NA);
02219                 else
02220                         Multiply(R, T, A, B, NA);
02221 
02222                 return;
02223         }
02224 
02225         if (NA > NB)
02226         {
02227                 std::swap(A, B);
02228                 std::swap(NA, NB);
02229         }
02230 
02231         assert(NB % NA == 0);
02232 
02233         if (NA==2 && !A[1])
02234         {
02235                 switch (A[0])
02236                 {
02237                 case 0:
02238                         SetWords(R, 0, NB+2);
02239                         return;
02240                 case 1:
02241                         CopyWords(R, B, NB);
02242                         R[NB] = R[NB+1] = 0;
02243                         return;
02244                 default:
02245                         R[NB] = LinearMultiply(R, B, A[0], NB);
02246                         R[NB+1] = 0;
02247                         return;
02248                 }
02249         }
02250 
02251         size_t i;
02252         if ((NB/NA)%2 == 0)
02253         {
02254                 Multiply(R, T, A, B, NA);
02255                 CopyWords(T+2*NA, R+NA, NA);
02256 
02257                 for (i=2*NA; i<NB; i+=2*NA)
02258                         Multiply(T+NA+i, T, A, B+i, NA);
02259                 for (i=NA; i<NB; i+=2*NA)
02260                         Multiply(R+i, T, A, B+i, NA);
02261         }
02262         else
02263         {
02264                 for (i=0; i<NB; i+=2*NA)
02265                         Multiply(R+i, T, A, B+i, NA);
02266                 for (i=NA; i<NB; i+=2*NA)
02267                         Multiply(T+NA+i, T, A, B+i, NA);
02268         }
02269 
02270         if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02271                 Increment(R+NB, NA);
02272 }
02273 
02274 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
02275 // T[3*N/2] - temporary work space
02276 // A[N] ----- an odd number as input
02277 
02278 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02279 {
02280         if (N==2)
02281         {
02282                 T[0] = AtomicInverseModPower2(A[0]);
02283                 T[1] = 0;
02284                 s_pBot[0](T+2, T, A);
02285                 TwosComplement(T+2, 2);
02286                 Increment(T+2, 2, 2);
02287                 s_pBot[0](R, T, T+2);
02288         }
02289         else
02290         {
02291                 const size_t N2 = N/2;
02292                 RecursiveInverseModPower2(R0, T0, A0, N2);
02293                 T0[0] = 1;
02294                 SetWords(T0+1, 0, N2-1);
02295                 MultiplyTop(R1, T1, T0, R0, A0, N2);
02296                 MultiplyBottom(T0, T1, R0, A1, N2);
02297                 Add(T0, R1, T0, N2);
02298                 TwosComplement(T0, N2);
02299                 MultiplyBottom(R1, T1, R0, T0, N2);
02300         }
02301 }
02302 
02303 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
02304 // T[3*N] - temporary work space
02305 // X[2*N] - number to be reduced
02306 // M[N] --- modulus
02307 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
02308 
02309 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
02310 {
02311 #if 1
02312         MultiplyBottom(R, T, X, U, N);
02313         MultiplyTop(T, T+N, X, R, M, N);
02314         word borrow = Subtract(T, X+N, T, N);
02315         // defend against timing attack by doing this Add even when not needed
02316         word carry = Add(T+N, T, M, N);
02317         assert(carry | !borrow);
02318         CopyWords(R, T + ((0-borrow) & N), N);
02319 #elif 0
02320         const word u = 0-U[0];
02321         Declare2Words(p)
02322         for (size_t i=0; i<N; i++)
02323         {
02324                 const word t = u * X[i];
02325                 word c = 0;
02326                 for (size_t j=0; j<N; j+=2)
02327                 {
02328                         MultiplyWords(p, t, M[j]);
02329                         Acc2WordsBy1(p, X[i+j]);
02330                         Acc2WordsBy1(p, c);
02331                         X[i+j] = LowWord(p);
02332                         c = HighWord(p);
02333                         MultiplyWords(p, t, M[j+1]);
02334                         Acc2WordsBy1(p, X[i+j+1]);
02335                         Acc2WordsBy1(p, c);
02336                         X[i+j+1] = LowWord(p);
02337                         c = HighWord(p);
02338                 }
02339 
02340                 if (Increment(X+N+i, N-i, c))
02341                         while (!Subtract(X+N, X+N, M, N)) {}
02342         }
02343 
02344         memcpy(R, X+N, N*WORD_SIZE);
02345 #else
02346         __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
02347         for (size_t i=0; i<N; i++)
02348         {
02349                 __m64 t = _mm_cvtsi32_si64(X[i]);
02350                 t = _mm_mul_su32(t, u);
02351                 __m64 c = _mm_setzero_si64();
02352                 for (size_t j=0; j<N; j+=2)
02353                 {
02354                         p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
02355                         p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
02356                         c = _mm_add_si64(c, p);
02357                         X[i+j] = _mm_cvtsi64_si32(c);
02358                         c = _mm_srli_si64(c, 32);
02359                         p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
02360                         p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
02361                         c = _mm_add_si64(c, p);
02362                         X[i+j+1] = _mm_cvtsi64_si32(c);
02363                         c = _mm_srli_si64(c, 32);
02364                 }
02365 
02366                 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
02367                         while (!Subtract(X+N, X+N, M, N)) {}
02368         }
02369 
02370         memcpy(R, X+N, N*WORD_SIZE);
02371         _mm_empty();
02372 #endif
02373 }
02374 
02375 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
02376 // T[2*N] - temporary work space
02377 // X[2*N] - number to be reduced
02378 // M[N] --- modulus
02379 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
02380 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
02381 
02382 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02383 {
02384         assert(N%2==0 && N>=4);
02385 
02386 #define M0              M
02387 #define M1              (M+N2)
02388 #define V0              V
02389 #define V1              (V+N2)
02390 
02391 #define X0              X
02392 #define X1              (X+N2)
02393 #define X2              (X+N)
02394 #define X3              (X+N+N2)
02395 
02396         const size_t N2 = N/2;
02397         Multiply(T0, T2, V0, X3, N2);
02398         int c2 = Add(T0, T0, X0, N);
02399         MultiplyBottom(T3, T2, T0, U, N2);
02400         MultiplyTop(T2, R, T0, T3, M0, N2);
02401         c2 -= Subtract(T2, T1, T2, N2);
02402         Multiply(T0, R, T3, M1, N2);
02403         c2 -= Subtract(T0, T2, T0, N2);
02404         int c3 = -(int)Subtract(T1, X2, T1, N2);
02405         Multiply(R0, T2, V1, X3, N2);
02406         c3 += Add(R, R, T, N);
02407 
02408         if (c2>0)
02409                 c3 += Increment(R1, N2);
02410         else if (c2<0)
02411                 c3 -= Decrement(R1, N2, -c2);
02412 
02413         assert(c3>=-1 && c3<=1);
02414         if (c3>0)
02415                 Subtract(R, R, M, N);
02416         else if (c3<0)
02417                 Add(R, R, M, N);
02418 
02419 #undef M0
02420 #undef M1
02421 #undef V0
02422 #undef V1
02423 
02424 #undef X0
02425 #undef X1
02426 #undef X2
02427 #undef X3
02428 }
02429 
02430 #undef A0
02431 #undef A1
02432 #undef B0
02433 #undef B1
02434 
02435 #undef T0
02436 #undef T1
02437 #undef T2
02438 #undef T3
02439 
02440 #undef R0
02441 #undef R1
02442 #undef R2
02443 #undef R3
02444 
02445 /*
02446 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
02447 static word SubatomicDivide(word *A, word B0, word B1)
02448 {
02449         // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
02450         assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
02451 
02452         // estimate the quotient: do a 2 word by 1 word divide
02453         word Q;
02454         if (B1+1 == 0)
02455                 Q = A[2];
02456         else
02457                 Q = DWord(A[1], A[2]).DividedBy(B1+1);
02458 
02459         // now subtract Q*B from A
02460         DWord p = DWord::Multiply(B0, Q);
02461         DWord u = (DWord) A[0] - p.GetLowHalf();
02462         A[0] = u.GetLowHalf();
02463         u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
02464         A[1] = u.GetLowHalf();
02465         A[2] += u.GetHighHalf();
02466 
02467         // Q <= actual quotient, so fix it
02468         while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
02469         {
02470                 u = (DWord) A[0] - B0;
02471                 A[0] = u.GetLowHalf();
02472                 u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
02473                 A[1] = u.GetLowHalf();
02474                 A[2] += u.GetHighHalf();
02475                 Q++;
02476                 assert(Q);      // shouldn't overflow
02477         }
02478 
02479         return Q;
02480 }
02481 
02482 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
02483 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02484 {
02485         if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
02486         {
02487                 Q[0] = A[2];
02488                 Q[1] = A[3];
02489         }
02490         else
02491         {
02492                 word T[4];
02493                 T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
02494                 Q[1] = SubatomicDivide(T+1, B[0], B[1]);
02495                 Q[0] = SubatomicDivide(T, B[0], B[1]);
02496 
02497 #ifndef NDEBUG
02498                 // multiply quotient and divisor and add remainder, make sure it equals dividend
02499                 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02500                 word P[4];
02501                 LowLevel::Multiply2(P, Q, B);
02502                 Add(P, P, T, 4);
02503                 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02504 #endif
02505         }
02506 }
02507 */
02508 
02509 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02510 {
02511         word T[4];
02512         DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02513         Q[0] = q.GetLowHalf();
02514         Q[1] = q.GetHighHalf();
02515 
02516 #ifndef NDEBUG
02517         if (B[0] || B[1])
02518         {
02519                 // multiply quotient and divisor and add remainder, make sure it equals dividend
02520                 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02521                 word P[4];
02522                 s_pMul[0](P, Q, B);
02523                 Add(P, P, T, 4);
02524                 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02525         }
02526 #endif
02527 }
02528 
02529 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
02530 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02531 {
02532         assert(N && N%2==0);
02533 
02534         AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
02535 
02536         word borrow = Subtract(R, R, T, N+2);
02537         assert(!borrow && !R[N+1]);
02538 
02539         while (R[N] || Compare(R, B, N) >= 0)
02540         {
02541                 R[N] -= Subtract(R, R, B, N);
02542                 Q[1] += (++Q[0]==0);
02543                 assert(Q[0] || Q[1]); // no overflow
02544         }
02545 }
02546 
02547 // R[NB] -------- remainder = A%B
02548 // Q[NA-NB+2] --- quotient      = A/B
02549 // T[NA+3*(NB+2)] - temp work space
02550 // A[NA] -------- dividend
02551 // B[NB] -------- divisor
02552 
02553 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02554 {
02555         assert(NA && NB && NA%2==0 && NB%2==0);
02556         assert(B[NB-1] || B[NB-2]);
02557         assert(NB <= NA);
02558 
02559         // set up temporary work space
02560         word *const TA=T;
02561         word *const TB=T+NA+2;
02562         word *const TP=T+NA+2+NB;
02563 
02564         // copy B into TB and normalize it so that TB has highest bit set to 1
02565         unsigned shiftWords = (B[NB-1]==0);
02566         TB[0] = TB[NB-1] = 0;
02567         CopyWords(TB+shiftWords, B, NB-shiftWords);
02568         unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02569         assert(shiftBits < WORD_BITS);
02570         ShiftWordsLeftByBits(TB, NB, shiftBits);
02571 
02572         // copy A into TA and normalize it
02573         TA[0] = TA[NA] = TA[NA+1] = 0;
02574         CopyWords(TA+shiftWords, A, NA);
02575         ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02576 
02577         if (TA[NA+1]==0 && TA[NA] <= 1)
02578         {
02579                 Q[NA-NB+1] = Q[NA-NB] = 0;
02580                 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02581                 {
02582                         TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02583                         ++Q[NA-NB];
02584                 }
02585         }
02586         else
02587         {
02588                 NA+=2;
02589                 assert(Compare(TA+NA-NB, TB, NB) < 0);
02590         }
02591 
02592         word BT[2];
02593         BT[0] = TB[NB-2] + 1;
02594         BT[1] = TB[NB-1] + (BT[0]==0);
02595 
02596         // start reducing TA mod TB, 2 words at a time
02597         for (size_t i=NA-2; i>=NB; i-=2)
02598         {
02599                 AtomicDivide(Q+i-NB, TA+i-2, BT);
02600                 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02601         }
02602 
02603         // copy TA into R, and denormalize it
02604         CopyWords(R, TA+shiftWords, NB);
02605         ShiftWordsRightByBits(R, NB, shiftBits);
02606 }
02607 
02608 static inline size_t EvenWordCount(const word *X, size_t N)
02609 {
02610         while (N && X[N-2]==0 && X[N-1]==0)
02611                 N-=2;
02612         return N;
02613 }
02614 
02615 // return k
02616 // R[N] --- result = A^(-1) * 2^k mod M
02617 // T[4*N] - temporary work space
02618 // A[NA] -- number to take inverse of
02619 // M[N] --- modulus
02620 
02621 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02622 {
02623         assert(NA<=N && N && N%2==0);
02624 
02625         word *b = T;
02626         word *c = T+N;
02627         word *f = T+2*N;
02628         word *g = T+3*N;
02629         size_t bcLen=2, fgLen=EvenWordCount(M, N);
02630         unsigned int k=0, s=0;
02631 
02632         SetWords(T, 0, 3*N);
02633         b[0]=1;
02634         CopyWords(f, A, NA);
02635         CopyWords(g, M, N);
02636 
02637         while (1)
02638         {
02639                 word t=f[0];
02640                 while (!t)
02641                 {
02642                         if (EvenWordCount(f, fgLen)==0)
02643                         {
02644                                 SetWords(R, 0, N);
02645                                 return 0;
02646                         }
02647 
02648                         ShiftWordsRightByWords(f, fgLen, 1);
02649                         if (c[bcLen-1]) bcLen+=2;
02650                         assert(bcLen <= N);
02651                         ShiftWordsLeftByWords(c, bcLen, 1);
02652                         k+=WORD_BITS;
02653                         t=f[0];
02654                 }
02655 
02656                 unsigned int i=0;
02657                 while (t%2 == 0)
02658                 {
02659                         t>>=1;
02660                         i++;
02661                 }
02662                 k+=i;
02663 
02664                 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02665                 {
02666                         if (s%2==0)
02667                                 CopyWords(R, b, N);
02668                         else
02669                                 Subtract(R, M, b, N);
02670                         return k;
02671                 }
02672 
02673                 ShiftWordsRightByBits(f, fgLen, i);
02674                 t=ShiftWordsLeftByBits(c, bcLen, i);
02675                 if (t)
02676                 {
02677                         c[bcLen] = t;
02678                         bcLen+=2;
02679                         assert(bcLen <= N);
02680                 }
02681 
02682                 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02683                         fgLen-=2;
02684 
02685                 if (Compare(f, g, fgLen)==-1)
02686                 {
02687                         std::swap(f, g);
02688                         std::swap(b, c);
02689                         s++;
02690                 }
02691 
02692                 Subtract(f, f, g, fgLen);
02693 
02694                 if (Add(b, b, c, bcLen))
02695                 {
02696                         b[bcLen] = 1;
02697                         bcLen+=2;
02698                         assert(bcLen <= N);
02699                 }
02700         }
02701 }
02702 
02703 // R[N] - result = A/(2^k) mod M
02704 // A[N] - input
02705 // M[N] - modulus
02706 
02707 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02708 {
02709         CopyWords(R, A, N);
02710 
02711         while (k--)
02712         {
02713                 if (R[0]%2==0)
02714                         ShiftWordsRightByBits(R, N, 1);
02715                 else
02716                 {
02717                         word carry = Add(R, R, M, N);
02718                         ShiftWordsRightByBits(R, N, 1);
02719                         R[N-1] += carry<<(WORD_BITS-1);
02720                 }
02721         }
02722 }
02723 
02724 // R[N] - result = A*(2^k) mod M
02725 // A[N] - input
02726 // M[N] - modulus
02727 
02728 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02729 {
02730         CopyWords(R, A, N);
02731 
02732         while (k--)
02733                 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02734                         Subtract(R, R, M, N);
02735 }
02736 
02737 // ******************************************************************
02738 
02739 InitializeInteger::InitializeInteger()
02740 {
02741         if (!g_pAssignIntToInteger)
02742         {
02743                 SetFunctionPointers();
02744                 g_pAssignIntToInteger = AssignIntToInteger;
02745         }
02746 }
02747 
02748 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02749 
02750 static inline size_t RoundupSize(size_t n)
02751 {
02752         if (n<=8)
02753                 return RoundupSizeTable[n];
02754         else if (n<=16)
02755                 return 16;
02756         else if (n<=32)
02757                 return 32;
02758         else if (n<=64)
02759                 return 64;
02760         else return size_t(1) << BitPrecision(n-1);
02761 }
02762 
02763 Integer::Integer()
02764         : reg(2), sign(POSITIVE)
02765 {
02766         reg[0] = reg[1] = 0;
02767 }
02768 
02769 Integer::Integer(const Integer& t)
02770         : reg(RoundupSize(t.WordCount())), sign(t.sign)
02771 {
02772         CopyWords(reg, t.reg, reg.size());
02773 }
02774 
02775 Integer::Integer(Sign s, lword value)
02776         : reg(2), sign(s)
02777 {
02778         reg[0] = word(value);
02779         reg[1] = word(SafeRightShift<WORD_BITS>(value));
02780 }
02781 
02782 Integer::Integer(signed long value)
02783         : reg(2)
02784 {
02785         if (value >= 0)
02786                 sign = POSITIVE;
02787         else
02788         {
02789                 sign = NEGATIVE;
02790                 value = -value;
02791         }
02792         reg[0] = word(value);
02793         reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02794 }
02795 
02796 Integer::Integer(Sign s, word high, word low)
02797         : reg(2), sign(s)
02798 {
02799         reg[0] = low;
02800         reg[1] = high;
02801 }
02802 
02803 bool Integer::IsConvertableToLong() const
02804 {
02805         if (ByteCount() > sizeof(long))
02806                 return false;
02807 
02808         unsigned long value = (unsigned long)reg[0];
02809         value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02810 
02811         if (sign==POSITIVE)
02812                 return (signed long)value >= 0;
02813         else
02814                 return -(signed long)value < 0;
02815 }
02816 
02817 signed long Integer::ConvertToLong() const
02818 {
02819         assert(IsConvertableToLong());
02820 
02821         unsigned long value = (unsigned long)reg[0];
02822         value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02823         return sign==POSITIVE ? value : -(signed long)value;
02824 }
02825 
02826 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02827 {
02828         Decode(encodedInteger, byteCount, s);
02829 }
02830 
02831 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02832 {
02833         Decode(encodedInteger, byteCount, s);
02834 }
02835 
02836 Integer::Integer(BufferedTransformation &bt)
02837 {
02838         BERDecode(bt);
02839 }
02840 
02841 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02842 {
02843         Randomize(rng, bitcount);
02844 }
02845 
02846 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02847 {
02848         if (!Randomize(rng, min, max, rnType, equiv, mod))
02849                 throw Integer::RandomNumberNotFound();
02850 }
02851 
02852 Integer Integer::Power2(size_t e)
02853 {
02854         Integer r((word)0, BitsToWords(e+1));
02855         r.SetBit(e);
02856         return r;
02857 }
02858 
02859 template <long i>
02860 struct NewInteger
02861 {
02862         Integer * operator()() const
02863         {
02864                 return new Integer(i);
02865         }
02866 };
02867 
02868 const Integer &Integer::Zero()
02869 {
02870         return Singleton<Integer>().Ref();
02871 }
02872 
02873 const Integer &Integer::One()
02874 {
02875         return Singleton<Integer, NewInteger<1> >().Ref();
02876 }
02877 
02878 const Integer &Integer::Two()
02879 {
02880         return Singleton<Integer, NewInteger<2> >().Ref();
02881 }
02882 
02883 bool Integer::operator!() const
02884 {
02885         return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02886 }
02887 
02888 Integer& Integer::operator=(const Integer& t)
02889 {
02890         if (this != &t)
02891         {
02892                 if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
02893                         reg.New(RoundupSize(t.WordCount()));
02894                 CopyWords(reg, t.reg, reg.size());
02895                 sign = t.sign;
02896         }
02897         return *this;
02898 }
02899 
02900 bool Integer::GetBit(size_t n) const
02901 {
02902         if (n/WORD_BITS >= reg.size())
02903                 return 0;
02904         else
02905                 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02906 }
02907 
02908 void Integer::SetBit(size_t n, bool value)
02909 {
02910         if (value)
02911         {
02912                 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02913                 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02914         }
02915         else
02916         {
02917                 if (n/WORD_BITS < reg.size())
02918                         reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02919         }
02920 }
02921 
02922 byte Integer::GetByte(size_t n) const
02923 {
02924         if (n/WORD_SIZE >= reg.size())
02925                 return 0;
02926         else
02927                 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02928 }
02929 
02930 void Integer::SetByte(size_t n, byte value)
02931 {
02932         reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02933         reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02934         reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02935 }
02936 
02937 lword Integer::GetBits(size_t i, size_t n) const
02938 {
02939         lword v = 0;
02940         assert(n <= sizeof(v)*8);
02941         for (unsigned int j=0; j<n; j++)
02942                 v |= lword(GetBit(i+j)) << j;
02943         return v;
02944 }
02945 
02946 Integer Integer::operator-() const
02947 {
02948         Integer result(*this);
02949         result.Negate();
02950         return result;
02951 }
02952 
02953 Integer Integer::AbsoluteValue() const
02954 {
02955         Integer result(*this);
02956         result.sign = POSITIVE;
02957         return result;
02958 }
02959 
02960 void Integer::swap(Integer &a)
02961 {
02962         reg.swap(a.reg);
02963         std::swap(sign, a.sign);
02964 }
02965 
02966 Integer::Integer(word value, size_t length)
02967         : reg(RoundupSize(length)), sign(POSITIVE)
02968 {
02969         reg[0] = value;
02970         SetWords(reg+1, 0, reg.size()-1);
02971 }
02972 
02973 template <class T>
02974 static Integer StringToInteger(const T *str)
02975 {
02976         int radix;
02977         // GCC workaround
02978         // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
02979         unsigned int length;
02980         for (length = 0; str[length] != 0; length++) {}
02981 
02982         Integer v;
02983 
02984         if (length == 0)
02985                 return v;
02986 
02987         switch (str[length-1])
02988         {
02989         case 'h':
02990         case 'H':
02991                 radix=16;
02992                 break;
02993         case 'o':
02994         case 'O':
02995                 radix=8;
02996                 break;
02997         case 'b':
02998         case 'B':
02999                 radix=2;
03000                 break;
03001         default:
03002                 radix=10;
03003         }
03004 
03005         if (length > 2 && str[0] == '0' && str[1] == 'x')
03006                 radix = 16;
03007 
03008         for (unsigned i=0; i<length; i++)
03009         {
03010                 int digit;
03011 
03012                 if (str[i] >= '0' && str[i] <= '9')
03013                         digit = str[i] - '0';
03014                 else if (str[i] >= 'A' && str[i] <= 'F')
03015                         digit = str[i] - 'A' + 10;
03016                 else if (str[i] >= 'a' && str[i] <= 'f')
03017                         digit = str[i] - 'a' + 10;
03018                 else
03019                         digit = radix;
03020 
03021                 if (digit < radix)
03022                 {
03023                         v *= radix;
03024                         v += digit;
03025                 }
03026         }
03027 
03028         if (str[0] == '-')
03029                 v.Negate();
03030 
03031         return v;
03032 }
03033 
03034 Integer::Integer(const char *str)
03035         : reg(2), sign(POSITIVE)
03036 {
03037         *this = StringToInteger(str);
03038 }
03039 
03040 Integer::Integer(const wchar_t *str)
03041         : reg(2), sign(POSITIVE)
03042 {
03043         *this = StringToInteger(str);
03044 }
03045 
03046 unsigned int Integer::WordCount() const
03047 {
03048         return (unsigned int)CountWords(reg, reg.size());
03049 }
03050 
03051 unsigned int Integer::ByteCount() const
03052 {
03053         unsigned wordCount = WordCount();
03054         if (wordCount)
03055                 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03056         else
03057                 return 0;
03058 }
03059 
03060 unsigned int Integer::BitCount() const
03061 {
03062         unsigned wordCount = WordCount();
03063         if (wordCount)
03064                 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03065         else
03066                 return 0;
03067 }
03068 
03069 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03070 {
03071         StringStore store(input, inputLen);
03072         Decode(store, inputLen, s);
03073 }
03074 
03075 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03076 {
03077         assert(bt.MaxRetrievable() >= inputLen);
03078 
03079         byte b;
03080         bt.Peek(b);
03081         sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03082 
03083         while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03084         {
03085                 bt.Skip(1);
03086                 inputLen--;
03087                 bt.Peek(b);
03088         }
03089 
03090         reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03091 
03092         for (size_t i=inputLen; i > 0; i--)
03093         {
03094                 bt.Get(b);
03095                 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03096         }
03097 
03098         if (sign == NEGATIVE)
03099         {
03100                 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03101                         reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03102                 TwosComplement(reg, reg.size());
03103         }
03104 }
03105 
03106 size_t Integer::MinEncodedSize(Signedness signedness) const
03107 {
03108         unsigned int outputLen = STDMAX(1U, ByteCount());
03109         if (signedness == UNSIGNED)
03110                 return outputLen;
03111         if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03112                 outputLen++;
03113         if (IsNegative() && *this < -Power2(outputLen*8-1))
03114                 outputLen++;
03115         return outputLen;
03116 }
03117 
03118 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03119 {
03120         ArraySink sink(output, outputLen);
03121         Encode(sink, outputLen, signedness);
03122 }
03123 
03124 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03125 {
03126         if (signedness == UNSIGNED || NotNegative())
03127         {
03128                 for (size_t i=outputLen; i > 0; i--)
03129                         bt.Put(GetByte(i-1));
03130         }
03131         else
03132         {
03133                 // take two's complement of *this
03134                 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
03135                 temp.Encode(bt, outputLen, UNSIGNED);
03136         }
03137 }
03138 
03139 void Integer::DEREncode(BufferedTransformation &bt) const
03140 {
03141         DERGeneralEncoder enc(bt, INTEGER);
03142         Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03143         enc.MessageEnd();
03144 }
03145 
03146 void Integer::BERDecode(const byte *input, size_t len)
03147 {
03148         StringStore store(input, len);
03149         BERDecode(store);
03150 }
03151 
03152 void Integer::BERDecode(BufferedTransformation &bt)
03153 {
03154         BERGeneralDecoder dec(bt, INTEGER);
03155         if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03156                 BERDecodeError();
03157         Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03158         dec.MessageEnd();
03159 }
03160 
03161 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03162 {
03163         DERGeneralEncoder enc(bt, OCTET_STRING);
03164         Encode(enc, length);
03165         enc.MessageEnd();
03166 }
03167 
03168 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03169 {
03170         BERGeneralDecoder dec(bt, OCTET_STRING);
03171         if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03172                 BERDecodeError();
03173         Decode(dec, length);
03174         dec.MessageEnd();
03175 }
03176 
03177 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03178 {
03179         ArraySink sink(output, len);
03180         return OpenPGPEncode(sink);
03181 }
03182 
03183 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03184 {
03185         word16 bitCount = BitCount();
03186         bt.PutWord16(bitCount);
03187         size_t byteCount = BitsToBytes(bitCount);
03188         Encode(bt, byteCount);
03189         return 2 + byteCount;
03190 }
03191 
03192 void Integer::OpenPGPDecode(const byte *input, size_t len)
03193 {
03194         StringStore store(input, len);
03195         OpenPGPDecode(store);
03196 }
03197 
03198 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03199 {
03200         word16 bitCount;
03201         if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03202                 throw OpenPGPDecodeErr();
03203         Decode(bt, BitsToBytes(bitCount));
03204 }
03205 
03206 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03207 {
03208         const size_t nbytes = nbits/8 + 1;
03209         SecByteBlock buf(nbytes);
03210         rng.GenerateBlock(buf, nbytes);
03211         if (nbytes)
03212                 buf[0] = (byte)Crop(buf[0], nbits % 8);
03213         Decode(buf, nbytes, UNSIGNED);
03214 }
03215 
03216 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03217 {
03218         if (min > max)
03219                 throw InvalidArgument("Integer: Min must be no greater than Max");
03220 
03221         Integer range = max - min;
03222         const unsigned int nbits = range.BitCount();
03223 
03224         do
03225         {
03226                 Randomize(rng, nbits);
03227         }
03228         while (*this > range);
03229 
03230         *this += min;
03231 }
03232 
03233 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03234 {
03235         return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03236 }
03237 
03238 class KDF2_RNG : public RandomNumberGenerator
03239 {
03240 public:
03241         KDF2_RNG(const byte *seed, size_t seedSize)
03242                 : m_counter(0), m_counterAndSeed(seedSize + 4)
03243         {
03244                 memcpy(m_counterAndSeed + 4, seed, seedSize);
03245         }
03246 
03247         void GenerateBlock(byte *output, size_t size)
03248         {
03249                 PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03250                 ++m_counter;
03251                 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03252         }
03253 
03254 private:
03255         word32 m_counter;
03256         SecByteBlock m_counterAndSeed;
03257 };
03258 
03259 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
03260 {
03261         Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03262         Integer max;
03263         if (!params.GetValue("Max", max))
03264         {
03265                 int bitLength;
03266                 if (params.GetIntValue("BitLength", bitLength))
03267                         max = Integer::Power2(bitLength);
03268                 else
03269                         throw InvalidArgument("Integer: missing Max argument");
03270         }
03271         if (min > max)
03272                 throw InvalidArgument("Integer: Min must be no greater than Max");
03273 
03274         Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03275         Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03276 
03277         if (equiv.IsNegative() || equiv >= mod)
03278                 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03279 
03280         Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03281 
03282         member_ptr<KDF2_RNG> kdf2Rng;
03283         ConstByteArrayParameter seed;
03284         if (params.GetValue("Seed", seed))
03285         {
03286                 ByteQueue bq;
03287                 DERSequenceEncoder seq(bq);
03288                 min.DEREncode(seq);
03289                 max.DEREncode(seq);
03290                 equiv.DEREncode(seq);
03291                 mod.DEREncode(seq);
03292                 DEREncodeUnsigned(seq, rnType);
03293                 DEREncodeOctetString(seq, seed.begin(), seed.size());
03294                 seq.MessageEnd();
03295 
03296                 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03297                 bq.Get(finalSeed, finalSeed.size());
03298                 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03299         }
03300         RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03301 
03302         switch (rnType)
03303         {
03304                 case ANY:
03305                         if (mod == One())
03306                                 Randomize(rng, min, max);
03307                         else
03308                         {
03309                                 Integer min1 = min + (equiv-min)%mod;
03310                                 if (max < min1)
03311                                         return false;
03312                                 Randomize(rng, Zero(), (max - min1) / mod);
03313                                 *this *= mod;
03314                                 *this += min1;
03315                         }
03316                         return true;
03317 
03318                 case PRIME:
03319                 {
03320                         const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03321 
03322                         int i;
03323                         i = 0;
03324                         while (1)
03325                         {
03326                                 if (++i==16)
03327                                 {
03328                                         // check if there are any suitable primes in [min, max]
03329                                         Integer first = min;
03330                                         if (FirstPrime(first, max, equiv, mod, pSelector))
03331                                         {
03332                                                 // if there is only one suitable prime, we're done
03333                                                 *this = first;
03334                                                 if (!FirstPrime(first, max, equiv, mod, pSelector))
03335                                                         return true;
03336                                         }
03337                                         else
03338                                                 return false;
03339                                 }
03340 
03341                                 Randomize(rng, min, max);
03342                                 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03343                                         return true;
03344                         }
03345                 }
03346 
03347                 default:
03348                         throw InvalidArgument("Integer: invalid RandomNumberType argument");
03349         }
03350 }
03351 
03352 std::istream& operator>>(std::istream& in, Integer &a)
03353 {
03354         char c;
03355         unsigned int length = 0;
03356         SecBlock<char> str(length + 16);
03357 
03358         std::ws(in);
03359 
03360         do
03361         {
03362                 in.read(&c, 1);
03363                 str[length++] = c;
03364                 if (length >= str.size())
03365                         str.Grow(length + 16);
03366         }
03367         while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03368 
03369         if (in.gcount())
03370                 in.putback(c);
03371         str[length-1] = '\0';
03372         a = Integer(str);
03373 
03374         return in;
03375 }
03376 
03377 std::ostream& operator<<(std::ostream& out, const Integer &a)
03378 {
03379         // Get relevant conversion specifications from ostream.
03380         long f = out.flags() & std::ios::basefield; // Get base digits.
03381         int base, block;
03382         char suffix;
03383         switch(f)
03384         {
03385         case std::ios::oct :
03386                 base = 8;
03387                 block = 8;
03388                 suffix = 'o';
03389                 break;
03390         case std::ios::hex :
03391                 base = 16;
03392                 block = 4;
03393                 suffix = 'h';
03394                 break;
03395         default :
03396                 base = 10;
03397                 block = 3;
03398                 suffix = '.';
03399         }
03400 
03401         SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03402         Integer temp1=a, temp2;
03403         unsigned i=0;
03404         const char vec[]="0123456789ABCDEF";
03405 
03406         if (a.IsNegative())
03407         {
03408                 out << '-';
03409                 temp1.Negate();
03410         }
03411 
03412         if (!a)
03413                 out << '0';
03414 
03415         while (!!temp1)
03416         {
03417                 word digit;
03418                 Integer::Divide(digit, temp2, temp1, base);
03419                 s[i++]=vec[digit];
03420                 temp1=temp2;
03421         }
03422 
03423         while (i--)
03424         {
03425                 out << s[i];
03426 //              if (i && !(i%block))
03427 //                      out << ",";
03428         }
03429         return out << suffix;
03430 }
03431 
03432 Integer& Integer::operator++()
03433 {
03434         if (NotNegative())
03435         {
03436                 if (Increment(reg, reg.size()))
03437                 {
03438                         reg.CleanGrow(2*reg.size());
03439                         reg[reg.size()/2]=1;
03440                 }
03441         }
03442         else
03443         {
03444                 word borrow = Decrement(reg, reg.size());
03445                 assert(!borrow);
03446                 if (WordCount()==0)
03447                         *this = Zero();
03448         }
03449         return *this;
03450 }
03451 
03452 Integer& Integer::operator--()
03453 {
03454         if (IsNegative())
03455         {
03456                 if (Increment(reg, reg.size()))
03457                 {
03458                         reg.CleanGrow(2*reg.size());
03459                         reg[reg.size()/2]=1;
03460                 }
03461         }
03462         else
03463         {
03464                 if (Decrement(reg, reg.size()))
03465                         *this = -One();
03466         }
03467         return *this;
03468 }
03469 
03470 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03471 {
03472         int carry;
03473         if (a.reg.size() == b.reg.size())
03474                 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03475         else if (a.reg.size() > b.reg.size())
03476         {
03477                 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03478                 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03479                 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03480         }
03481         else
03482         {
03483                 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03484                 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03485                 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03486         }
03487 
03488         if (carry)
03489         {
03490                 sum.reg.CleanGrow(2*sum.reg.size());
03491                 sum.reg[sum.reg.size()/2] = 1;
03492         }
03493         sum.sign = Integer::POSITIVE;
03494 }
03495 
03496 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03497 {
03498         unsigned aSize = a.WordCount();
03499         aSize += aSize%2;
03500         unsigned bSize = b.WordCount();
03501         bSize += bSize%2;
03502 
03503         if (aSize == bSize)
03504         {
03505                 if (Compare(a.reg, b.reg, aSize) >= 0)
03506                 {
03507                         Subtract(diff.reg, a.reg, b.reg, aSize);
03508                         diff.sign = Integer::POSITIVE;
03509                 }
03510                 else
03511                 {
03512                         Subtract(diff.reg, b.reg, a.reg, aSize);
03513                         diff.sign = Integer::NEGATIVE;
03514                 }
03515         }
03516         else if (aSize > bSize)
03517         {
03518                 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03519                 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03520                 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03521                 assert(!borrow);
03522                 diff.sign = Integer::POSITIVE;
03523         }
03524         else
03525         {
03526                 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03527                 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03528                 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03529                 assert(!borrow);
03530                 diff.sign = Integer::NEGATIVE;
03531         }
03532 }
03533 
03534 // MSVC .NET 2003 workaround
03535 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03536 {
03537         return a < b ? b : a;
03538 }
03539 
03540 Integer Integer::Plus(const Integer& b) const
03541 {
03542         Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03543         if (NotNegative())
03544         {
03545                 if (b.NotNegative())
03546                         PositiveAdd(sum, *this, b);
03547                 else
03548                         PositiveSubtract(sum, *this, b);
03549         }
03550         else
03551         {
03552                 if (b.NotNegative())
03553                         PositiveSubtract(sum, b, *this);
03554                 else
03555                 {
03556                         PositiveAdd(sum, *this, b);
03557                         sum.sign = Integer::NEGATIVE;
03558                 }
03559         }
03560         return sum;
03561 }
03562 
03563 Integer& Integer::operator+=(const Integer& t)
03564 {
03565         reg.CleanGrow(t.reg.size());
03566         if (NotNegative())
03567         {
03568                 if (t.NotNegative())
03569                         PositiveAdd(*this, *this, t);
03570                 else
03571                         PositiveSubtract(*this, *this, t);
03572         }
03573         else
03574         {
03575                 if (t.NotNegative())
03576                         PositiveSubtract(*this, t, *this);
03577                 else
03578                 {
03579                         PositiveAdd(*this, *this, t);
03580                         sign = Integer::NEGATIVE;
03581                 }
03582         }
03583         return *this;
03584 }
03585 
03586 Integer Integer::Minus(const Integer& b) const
03587 {
03588         Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03589         if (NotNegative())
03590         {
03591                 if (b.NotNegative())
03592                         PositiveSubtract(diff, *this, b);
03593                 else
03594                         PositiveAdd(diff, *this, b);
03595         }
03596         else
03597         {
03598                 if (b.NotNegative())
03599                 {
03600                         PositiveAdd(diff, *this, b);
03601                         diff.sign = Integer::NEGATIVE;
03602                 }
03603                 else
03604                         PositiveSubtract(diff, b, *this);
03605         }
03606         return diff;
03607 }
03608 
03609 Integer& Integer::operator-=(const Integer& t)
03610 {
03611         reg.CleanGrow(t.reg.size());
03612         if (NotNegative())
03613         {
03614                 if (t.NotNegative())
03615                         PositiveSubtract(*this, *this, t);
03616                 else
03617                         PositiveAdd(*this, *this, t);
03618         }
03619         else
03620         {
03621                 if (t.NotNegative())
03622                 {
03623                         PositiveAdd(*this, *this, t);
03624                         sign = Integer::NEGATIVE;
03625                 }
03626                 else
03627                         PositiveSubtract(*this, t, *this);
03628         }
03629         return *this;
03630 }
03631 
03632 Integer& Integer::operator<<=(size_t n)
03633 {
03634         const size_t wordCount = WordCount();
03635         const size_t shiftWords = n / WORD_BITS;
03636         const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03637 
03638         reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03639         ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03640         ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03641         return *this;
03642 }
03643 
03644 Integer& Integer::operator>>=(size_t n)
03645 {
03646         const size_t wordCount = WordCount();
03647         const size_t shiftWords = n / WORD_BITS;
03648         const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03649 
03650         ShiftWordsRightByWords(reg, wordCount, shiftWords);
03651         if (wordCount > shiftWords)
03652                 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03653         if (IsNegative() && WordCount()==0)   // avoid -0
03654                 *this = Zero();
03655         return *this;
03656 }
03657 
03658 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03659 {
03660         size_t aSize = RoundupSize(a.WordCount());
03661         size_t bSize = RoundupSize(b.WordCount());
03662 
03663         product.reg.CleanNew(RoundupSize(aSize+bSize));
03664         product.sign = Integer::POSITIVE;
03665 
03666         IntegerSecBlock workspace(aSize + bSize);
03667         AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03668 }
03669 
03670 void Multiply(Integer &product, const Integer &a, const Integer &b)
03671 {
03672         PositiveMultiply(product, a, b);
03673 
03674         if (a.NotNegative() != b.NotNegative())
03675                 product.Negate();
03676 }
03677 
03678 Integer Integer::Times(const Integer &b) const
03679 {
03680         Integer product;
03681         Multiply(product, *this, b);
03682         return product;
03683 }
03684 
03685 /*
03686 void PositiveDivide(Integer &remainder, Integer &quotient,
03687                                    const Integer &dividend, const Integer &divisor)
03688 {
03689         remainder.reg.CleanNew(divisor.reg.size());
03690         remainder.sign = Integer::POSITIVE;
03691         quotient.reg.New(0);
03692         quotient.sign = Integer::POSITIVE;
03693         unsigned i=dividend.BitCount();
03694         while (i--)
03695         {
03696                 word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
03697                 remainder.reg[0] |= dividend[i];
03698                 if (overflow || remainder >= divisor)
03699                 {
03700                         Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
03701                         quotient.SetBit(i);
03702                 }
03703         }
03704 }
03705 */
03706 
03707 void PositiveDivide(Integer &remainder, Integer &quotient,
03708                                    const Integer &a, const Integer &b)
03709 {
03710         unsigned aSize = a.WordCount();
03711         unsigned bSize = b.WordCount();
03712 
03713         if (!bSize)
03714                 throw Integer::DivideByZero();
03715 
03716         if (a.PositiveCompare(b) == -1)
03717         {
03718                 remainder = a;
03719                 remainder.sign = Integer::POSITIVE;
03720                 quotient = Integer::Zero();
03721                 return;
03722         }
03723 
03724         aSize += aSize%2;       // round up to next even number
03725         bSize += bSize%2;
03726 
03727         remainder.reg.CleanNew(RoundupSize(bSize));
03728         remainder.sign = Integer::POSITIVE;
03729         quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03730         quotient.sign = Integer::POSITIVE;
03731 
03732         IntegerSecBlock T(aSize+3*(bSize+2));
03733         Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03734 }
03735 
03736 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
03737 {
03738         PositiveDivide(remainder, quotient, dividend, divisor);
03739 
03740         if (dividend.IsNegative())
03741         {
03742                 quotient.Negate();
03743                 if (remainder.NotZero())
03744                 {
03745                         --quotient;
03746                         remainder = divisor.AbsoluteValue() - remainder;
03747                 }
03748         }
03749 
03750         if (divisor.IsNegative())
03751                 quotient.Negate();
03752 }
03753 
03754 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03755 {
03756         q = a;
03757         q >>= n;
03758 
03759         const size_t wordCount = BitsToWords(n);
03760         if (wordCount <= a.WordCount())
03761         {
03762                 r.reg.resize(RoundupSize(wordCount));
03763                 CopyWords(r.reg, a.reg, wordCount);
03764                 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03765                 if (n % WORD_BITS != 0)
03766                         r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
03767         }
03768         else
03769         {
03770                 r.reg.resize(RoundupSize(a.WordCount()));
03771                 CopyWords(r.reg, a.reg, r.reg.size());
03772         }
03773         r.sign = POSITIVE;
03774 
03775         if (a.IsNegative() && r.NotZero())
03776         {
03777                 --q;
03778                 r = Power2(n) - r;
03779         }
03780 }
03781 
03782 Integer Integer::DividedBy(const Integer &b) const
03783 {
03784         Integer remainder, quotient;
03785         Integer::Divide(remainder, quotient, *this, b);
03786         return quotient;
03787 }
03788 
03789 Integer Integer::Modulo(const Integer &b) const
03790 {
03791         Integer remainder, quotient;
03792         Integer::Divide(remainder, quotient, *this, b);
03793         return remainder;
03794 }
03795 
03796 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
03797 {
03798         if (!divisor)
03799                 throw Integer::DivideByZero();
03800 
03801         assert(divisor);
03802 
03803         if ((divisor & (divisor-1)) == 0)       // divisor is a power of 2
03804         {
03805                 quotient = dividend >> (BitPrecision(divisor)-1);
03806                 remainder = dividend.reg[0] & (divisor-1);
03807                 return;
03808         }
03809 
03810         unsigned int i = dividend.WordCount();
03811         quotient.reg.CleanNew(RoundupSize(i));
03812         remainder = 0;
03813         while (i--)
03814         {
03815                 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03816                 remainder = DWord(dividend.reg[i], remainder) % divisor;
03817         }
03818 
03819         if (dividend.NotNegative())
03820                 quotient.sign = POSITIVE;
03821         else
03822         {
03823                 quotient.sign = NEGATIVE;
03824                 if (remainder)
03825                 {
03826                         --quotient;
03827                         remainder = divisor - remainder;
03828                 }
03829         }
03830 }
03831 
03832 Integer Integer::DividedBy(word b) const
03833 {
03834         word remainder;
03835         Integer quotient;
03836         Integer::Divide(remainder, quotient, *this, b);
03837         return quotient;
03838 }
03839 
03840 word Integer::Modulo(word divisor) const
03841 {
03842         if (!divisor)
03843                 throw Integer::DivideByZero();
03844 
03845         assert(divisor);
03846 
03847         word remainder;
03848 
03849         if ((divisor & (divisor-1)) == 0)       // divisor is a power of 2
03850                 remainder = reg[0] & (divisor-1);
03851         else
03852         {
03853                 unsigned int i = WordCount();
03854 
03855                 if (divisor <= 5)
03856                 {
03857                         DWord sum(0, 0);
03858                         while (i--)
03859                                 sum += reg[i];
03860                         remainder = sum % divisor;
03861                 }
03862                 else
03863                 {
03864                         remainder = 0;
03865                         while (i--)
03866                                 remainder = DWord(reg[i], remainder) % divisor;
03867                 }
03868         }
03869 
03870         if (IsNegative() && remainder)
03871                 remainder = divisor - remainder;
03872 
03873         return remainder;
03874 }
03875 
03876 void Integer::Negate()
03877 {
03878         if (!!(*this))  // don't flip sign if *this==0
03879                 sign = Sign(1-sign);
03880 }
03881 
03882 int Integer::PositiveCompare(const Integer& t) const
03883 {
03884         unsigned size = WordCount(), tSize = t.WordCount();
03885 
03886         if (size == tSize)
03887                 return CryptoPP::Compare(reg, t.reg, size);
03888         else
03889                 return size > tSize ? 1 : -1;
03890 }
03891 
03892 int Integer::Compare(const Integer& t) const
03893 {
03894         if (NotNegative())
03895         {
03896                 if (t.NotNegative())
03897                         return PositiveCompare(t);
03898                 else
03899                         return 1;
03900         }
03901         else
03902         {
03903                 if (t.NotNegative())
03904                         return -1;
03905                 else
03906                         return -PositiveCompare(t);
03907         }
03908 }
03909 
03910 Integer Integer::SquareRoot() const
03911 {
03912         if (!IsPositive())
03913                 return Zero();
03914 
03915         // overestimate square root
03916         Integer x, y = Power2((BitCount()+1)/2);
03917         assert(y*y >= *this);
03918 
03919         do
03920         {
03921                 x = y;
03922                 y = (x + *this/x) >> 1;
03923         } while (y<x);
03924 
03925         return x;
03926 }
03927 
03928 bool Integer::IsSquare() const
03929 {
03930         Integer r = SquareRoot();
03931         return *this == r.Squared();
03932 }
03933 
03934 bool Integer::IsUnit() const
03935 {
03936         return (WordCount() == 1) && (reg[0] == 1);
03937 }
03938 
03939 Integer Integer::MultiplicativeInverse() const
03940 {
03941         return IsUnit() ? *this : Zero();
03942 }
03943 
03944 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03945 {
03946         return x*y%m;
03947 }
03948 
03949 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03950 {
03951         ModularArithmetic mr(m);
03952         return mr.Exponentiate(x, e);
03953 }
03954 
03955 Integer Integer::Gcd(const Integer &a, const Integer &b)
03956 {
03957         return EuclideanDomainOf<Integer>().Gcd(a, b);
03958 }
03959 
03960 Integer Integer::InverseMod(const Integer &m) const
03961 {
03962         assert(m.NotNegative());
03963 
03964         if (IsNegative() || *this>=m)
03965                 return (*this%m).InverseMod(m);
03966 
03967         if (m.IsEven())
03968         {
03969                 if (!m || IsEven())
03970                         return Zero();  // no inverse
03971                 if (*this == One())
03972                         return One();
03973 
03974                 Integer u = m.InverseMod(*this);
03975                 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03976         }
03977 
03978         SecBlock<word> T(m.reg.size() * 4);
03979         Integer r((word)0, m.reg.size());
03980         unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03981         DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03982         return r;
03983 }
03984 
03985 word Integer::InverseMod(word mod) const
03986 {
03987         word g0 = mod, g1 = *this % mod;
03988         word v0 = 0, v1 = 1;
03989         word y;
03990 
03991         while (g1)
03992         {
03993                 if (g1 == 1)
03994                         return v1;
03995                 y = g0 / g1;
03996                 g0 = g0 % g1;
03997                 v0 += y * v1;
03998 
03999                 if (!g0)
04000                         break;
04001                 if (g0 == 1)
04002                         return mod-v0;
04003                 y = g1 / g0;
04004                 g1 = g1 % g0;
04005                 v1 += y * v0;
04006         }
04007         return 0;
04008 }
04009 
04010 // ********************************************************
04011 
04012 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
04013 {
04014         BERSequenceDecoder seq(bt);
04015         OID oid(seq);
04016         if (oid != ASN1::prime_field())
04017                 BERDecodeError();
04018         m_modulus.BERDecode(seq);
04019         seq.MessageEnd();
04020         m_result.reg.resize(m_modulus.reg.size());
04021 }
04022 
04023 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04024 {
04025         DERSequenceEncoder seq(bt);
04026         ASN1::prime_field().DEREncode(seq);
04027         m_modulus.DEREncode(seq);
04028         seq.MessageEnd();
04029 }
04030 
04031 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04032 {
04033         a.DEREncodeAsOctetString(out, MaxElementByteLength());
04034 }
04035 
04036 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04037 {
04038         a.BERDecodeAsOctetString(in, MaxElementByteLength());
04039 }
04040 
04041 const Integer& ModularArithmetic::Half(const Integer &a) const
04042 {
04043         if (a.reg.size()==m_modulus.reg.size())
04044         {
04045                 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04046                 return m_result;
04047         }
04048         else
04049                 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04050 }
04051 
04052 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04053 {
04054         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04055         {
04056                 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04057                         || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04058                 {
04059                         CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04060                 }
04061                 return m_result;
04062         }
04063         else
04064         {
04065                 m_result1 = a+b;
04066                 if (m_result1 >= m_modulus)
04067                         m_result1 -= m_modulus;
04068                 return m_result1;
04069         }
04070 }
04071 
04072 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04073 {
04074         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04075         {
04076                 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04077                         || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04078                 {
04079                         CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04080                 }
04081         }
04082         else
04083         {
04084                 a+=b;
04085                 if (a>=m_modulus)
04086                         a-=m_modulus;
04087         }
04088 
04089         return a;
04090 }
04091 
04092 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04093 {
04094         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04095         {
04096                 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04097                         CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04098                 return m_result;
04099         }
04100         else
04101         {
04102                 m_result1 = a-b;
04103                 if (m_result1.IsNegative())
04104                         m_result1 += m_modulus;
04105                 return m_result1;
04106         }
04107 }
04108 
04109 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04110 {
04111         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04112         {
04113                 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04114                         CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04115         }
04116         else
04117         {
04118                 a-=b;
04119                 if (a.IsNegative())
04120                         a+=m_modulus;
04121         }
04122 
04123         return a;
04124 }
04125 
04126 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04127 {
04128         if (!a)
04129                 return a;
04130 
04131         CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04132         if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04133                 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04134 
04135         return m_result;
04136 }
04137 
04138 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04139 {
04140         if (m_modulus.IsOdd())
04141         {
04142                 MontgomeryRepresentation dr(m_modulus);
04143                 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04144         }
04145         else
04146                 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04147 }
04148 
04149 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04150 {
04151         if (m_modulus.IsOdd())
04152         {
04153                 MontgomeryRepresentation dr(m_modulus);
04154                 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04155                 for (unsigned int i=0; i<exponentsCount; i++)
04156                         results[i] = dr.ConvertOut(results[i]);
04157         }
04158         else
04159                 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04160 }
04161 
04162 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)    // modulus must be odd
04163         : ModularArithmetic(m),
04164           m_u((word)0, m_modulus.reg.size()),
04165           m_workspace(5*m_modulus.reg.size())
04166 {
04167         if (!m_modulus.IsOdd())
04168                 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04169 
04170         RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04171 }
04172 
04173 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04174 {
04175         word *const T = m_workspace.begin();
04176         word *const R = m_result.reg.begin();
04177         const size_t N = m_modulus.reg.size();
04178         assert(a.reg.size()<=N && b.reg.size()<=N);
04179 
04180         AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04181         SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04182         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04183         return m_result;
04184 }
04185 
04186 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04187 {
04188         word *const T = m_workspace.begin();
04189         word *const R = m_result.reg.begin();
04190         const size_t N = m_modulus.reg.size();
04191         assert(a.reg.size()<=N);
04192 
04193         CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04194         SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04195         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04196         return m_result;
04197 }
04198 
04199 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04200 {
04201         word *const T = m_workspace.begin();
04202         word *const R = m_result.reg.begin();
04203         const size_t N = m_modulus.reg.size();
04204         assert(a.reg.size()<=N);
04205 
04206         CopyWords(T, a.reg, a.reg.size());
04207         SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04208         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04209         return m_result;
04210 }
04211 
04212 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04213 {
04214 //        return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
04215         word *const T = m_workspace.begin();
04216         word *const R = m_result.reg.begin();
04217         const size_t N = m_modulus.reg.size();
04218         assert(a.reg.size()<=N);
04219 
04220         CopyWords(T, a.reg, a.reg.size());
04221         SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04222         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04223         unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04224 
04225 //      cout << "k=" << k << " N*32=" << 32*N << endl;
04226 
04227         if (k>N*WORD_BITS)
04228                 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04229         else
04230                 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04231 
04232         return m_result;
04233 }
04234 
04235 NAMESPACE_END
04236 
04237 #endif

Generated on Fri Feb 6 00:56:24 2009 for Crypto++ by  doxygen 1.4.7