Crypto++: integer.cpp Source File

00001 // integer.cpp - written and placed in the public domain by Wei Dai
00002 // contains public domain code contributed by Alister Lee and Leonard Janke
00003 
00004 #include "pch.h"
00005 
00006 #ifndef CRYPTOPP_IMPORTS
00007 
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"             // for P1363_KDF2
00016 #include "sha.h"
00017 #include "cpu.h"
00018 
00019 #include <iostream>
00020 
00021 #if _MSC_VER >= 1400
00022         #include <intrin.h>
00023 #endif
00024 
00025 #ifdef __DECCXX
00026         #include <c_asm.h>
00027 #endif
00028 
00029 #ifdef CRYPTOPP_MSVC6_NO_PP
00030         #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
00031 #endif
00032 
00033 #define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
00034 
00035 NAMESPACE_BEGIN(CryptoPP)
00036 
00037 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00038 {
00039         if (valueType != typeid(Integer))
00040                 return false;
00041         *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00042         return true;
00043 }
00044 
00045 inline static int Compare(const word *A, const word *B, size_t N)
00046 {
00047         while (N--)
00048                 if (A[N] > B[N])
00049                         return 1;
00050                 else if (A[N] < B[N])
00051                         return -1;
00052 
00053         return 0;
00054 }
00055 
00056 inline static int Increment(word *A, size_t N, word B=1)
00057 {
00058         assert(N);
00059         word t = A[0];
00060         A[0] = t+B;
00061         if (A[0] >= t)
00062                 return 0;
00063         for (unsigned i=1; i<N; i++)
00064                 if (++A[i])
00065                         return 0;
00066         return 1;
00067 }
00068 
00069 inline static int Decrement(word *A, size_t N, word B=1)
00070 {
00071         assert(N);
00072         word t = A[0];
00073         A[0] = t-B;
00074         if (A[0] <= t)
00075                 return 0;
00076         for (unsigned i=1; i<N; i++)
00077                 if (A[i]--)
00078                         return 0;
00079         return 1;
00080 }
00081 
00082 static void TwosComplement(word *A, size_t N)
00083 {
00084         Decrement(A, N);
00085         for (unsigned i=0; i<N; i++)
00086                 A[i] = ~A[i];
00087 }
00088 
00089 static word AtomicInverseModPower2(word A)
00090 {
00091         assert(A%2==1);
00092 
00093         word R=A%8;
00094 
00095         for (unsigned i=3; i<WORD_BITS; i*=2)
00096                 R = R*(2-R*A);
00097 
00098         assert(R*A==1);
00099         return R;
00100 }
00101 
00102 // ********************************************************
00103 
00104 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
00105         #define Declare2Words(x)                        word x##0, x##1;
00106         #define AssignWord(a, b)                        a##0 = b; a##1 = 0;
00107         #define Add2WordsBy1(a, b, c)           a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
00108         #define LowWord(a)                                      a##0
00109         #define HighWord(a)                                     a##1
00110         #ifdef _MSC_VER
00111                 #define MultiplyWordsLoHi(p0, p1, a, b)         p0 = _umul128(a, b, &p1);
00112                 #ifndef __INTEL_COMPILER
00113                         #define Double3Words(c, d)              d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
00114                 #endif
00115         #elif defined(__DECCXX)
00116                 #define MultiplyWordsLoHi(p0, p1, a, b)         p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
00117         #elif defined(__x86_64__)
00118                 #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
00119                         // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
00120                         #define MultiplyWordsLoHi(p0, p1, a, b)         asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
00121                 #else
00122                         #define MultiplyWordsLoHi(p0, p1, a, b)         asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00123                         #define MulAcc(c, d, a, b)              asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00124                         #define Double3Words(c, d)              asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
00125                         #define Acc2WordsBy1(a, b)              asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
00126                         #define Acc2WordsBy2(a, b)              asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
00127                         #define Acc3WordsBy2(c, d, e)   asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
00128                 #endif
00129         #endif
00130         #define MultiplyWords(p, a, b)          MultiplyWordsLoHi(p##0, p##1, a, b)
00131         #ifndef Double3Words
00132                 #define Double3Words(c, d)              d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
00133         #endif
00134         #ifndef Acc2WordsBy2
00135                 #define Acc2WordsBy2(a, b)              a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
00136         #endif
00137         #define AddWithCarry(u, a, b)           {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
00138         #define SubtractWithBorrow(u, a, b)     {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
00139         #define GetCarry(u)                                     u##1
00140         #define GetBorrow(u)                            u##1
00141 #else
00142         #define Declare2Words(x)                        dword x;
00143         #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00144                 #define MultiplyWords(p, a, b)          p = __emulu(a, b);
00145         #else
00146                 #define MultiplyWords(p, a, b)          p = (dword)a*b;
00147         #endif
00148         #define AssignWord(a, b)                        a = b;
00149         #define Add2WordsBy1(a, b, c)           a = b + c;
00150         #define Acc2WordsBy2(a, b)                      a += b;
00151         #define LowWord(a)                                      word(a)
00152         #define HighWord(a)                                     word(a>>WORD_BITS)
00153         #define Double3Words(c, d)                      d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
00154         #define AddWithCarry(u, a, b)           u = dword(a) + b + GetCarry(u);
00155         #define SubtractWithBorrow(u, a, b)     u = dword(a) - b - GetBorrow(u);
00156         #define GetCarry(u)                                     HighWord(u)
00157         #define GetBorrow(u)                            word(u>>(WORD_BITS*2-1))
00158 #endif
00159 #ifndef MulAcc
00160         #define MulAcc(c, d, a, b)                      MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
00161 #endif
00162 #ifndef Acc2WordsBy1
00163         #define Acc2WordsBy1(a, b)                      Add2WordsBy1(a, a, b)
00164 #endif
00165 #ifndef Acc3WordsBy2
00166         #define Acc3WordsBy2(c, d, e)           Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
00167 #endif
00168 
00169 class DWord
00170 {
00171 public:
00172         DWord() {}
00173 
00174 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00175         explicit DWord(word low)
00176         {
00177                 m_whole = low;
00178         }
00179 #else
00180         explicit DWord(word low)
00181         {
00182                 m_halfs.low = low;
00183                 m_halfs.high = 0;
00184         }
00185 #endif
00186 
00187         DWord(word low, word high)
00188         {
00189                 m_halfs.low = low;
00190                 m_halfs.high = high;
00191         }
00192 
00193         static DWord Multiply(word a, word b)
00194         {
00195                 DWord r;
00196                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00197                         r.m_whole = (dword)a * b;
00198                 #elif defined(MultiplyWordsLoHi)
00199                         MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
00200                 #endif
00201                 return r;
00202         }
00203 
00204         static DWord MultiplyAndAdd(word a, word b, word c)
00205         {
00206                 DWord r = Multiply(a, b);
00207                 return r += c;
00208         }
00209 
00210         DWord & operator+=(word a)
00211         {
00212                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00213                         m_whole = m_whole + a;
00214                 #else
00215                         m_halfs.low += a;
00216                         m_halfs.high += (m_halfs.low < a);
00217                 #endif
00218                 return *this;
00219         }
00220 
00221         DWord operator+(word a)
00222         {
00223                 DWord r;
00224                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00225                         r.m_whole = m_whole + a;
00226                 #else
00227                         r.m_halfs.low = m_halfs.low + a;
00228                         r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00229                 #endif
00230                 return r;
00231         }
00232 
00233         DWord operator-(DWord a)
00234         {
00235                 DWord r;
00236                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00237                         r.m_whole = m_whole - a.m_whole;
00238                 #else
00239                         r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00240                         r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00241                 #endif
00242                 return r;
00243         }
00244 
00245         DWord operator-(word a)
00246         {
00247                 DWord r;
00248                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00249                         r.m_whole = m_whole - a;
00250                 #else
00251                         r.m_halfs.low = m_halfs.low - a;
00252                         r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00253                 #endif
00254                 return r;
00255         }
00256 
00257         // returns quotient, which must fit in a word
00258         word operator/(word divisor);
00259 
00260         word operator%(word a);
00261 
00262         bool operator!() const
00263         {
00264         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00265                 return !m_whole;
00266         #else
00267                 return !m_halfs.high && !m_halfs.low;
00268         #endif
00269         }
00270 
00271         word GetLowHalf() const {return m_halfs.low;}
00272         word GetHighHalf() const {return m_halfs.high;}
00273         word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00274 
00275 private:
00276         union
00277         {
00278         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00279                 dword m_whole;
00280         #endif
00281                 struct
00282                 {
00283                 #ifdef IS_LITTLE_ENDIAN
00284                         word low;
00285                         word high;
00286                 #else
00287                         word high;
00288                         word low;
00289                 #endif
00290                 } m_halfs;
00291         };
00292 };
00293 
00294 class Word
00295 {
00296 public:
00297         Word() {}
00298 
00299         Word(word value)
00300         {
00301                 m_whole = value;
00302         }
00303 
00304         Word(hword low, hword high)
00305         {
00306                 m_whole = low | (word(high) << (WORD_BITS/2));
00307         }
00308 
00309         static Word Multiply(hword a, hword b)
00310         {
00311                 Word r;
00312                 r.m_whole = (word)a * b;
00313                 return r;
00314         }
00315 
00316         Word operator-(Word a)
00317         {
00318                 Word r;
00319                 r.m_whole = m_whole - a.m_whole;
00320                 return r;
00321         }
00322 
00323         Word operator-(hword a)
00324         {
00325                 Word r;
00326                 r.m_whole = m_whole - a;
00327                 return r;
00328         }
00329 
00330         // returns quotient, which must fit in a word
00331         hword operator/(hword divisor)
00332         {
00333                 return hword(m_whole / divisor);
00334         }
00335 
00336         bool operator!() const
00337         {
00338                 return !m_whole;
00339         }
00340 
00341         word GetWhole() const {return m_whole;}
00342         hword GetLowHalf() const {return hword(m_whole);}
00343         hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00344         hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00345         
00346 private:
00347         word m_whole;
00348 };
00349 
00350 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
00351 template <class S, class D>
00352 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00353 {
00354         // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
00355         assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00356 
00357         // estimate the quotient: do a 2 S by 1 S divide
00358         S Q;
00359         if (S(B1+1) == 0)
00360                 Q = A[2];
00361         else if (B1 > 0)
00362                 Q = D(A[1], A[2]) / S(B1+1);
00363         else
00364                 Q = D(A[0], A[1]) / B0;
00365 
00366         // now subtract Q*B from A
00367         D p = D::Multiply(B0, Q);
00368         D u = (D) A[0] - p.GetLowHalf();
00369         A[0] = u.GetLowHalf();
00370         u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00371         A[1] = u.GetLowHalf();
00372         A[2] += u.GetHighHalf();
00373 
00374         // Q <= actual quotient, so fix it
00375         while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00376         {
00377                 u = (D) A[0] - B0;
00378                 A[0] = u.GetLowHalf();
00379                 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00380                 A[1] = u.GetLowHalf();
00381                 A[2] += u.GetHighHalf();
00382                 Q++;
00383                 assert(Q);      // shouldn't overflow
00384         }
00385 
00386         return Q;
00387 }
00388 
00389 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
00390 template <class S, class D>
00391 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00392 {
00393         if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
00394                 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00395         else
00396         {
00397                 S Q[2];
00398                 T[0] = Al.GetLowHalf();
00399                 T[1] = Al.GetHighHalf(); 
00400                 T[2] = Ah.GetLowHalf();
00401                 T[3] = Ah.GetHighHalf();
00402                 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00403                 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00404                 return D(Q[0], Q[1]);
00405         }
00406 }
00407 
00408 // returns quotient, which must fit in a word
00409 inline word DWord::operator/(word a)
00410 {
00411         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00412                 return word(m_whole / a);
00413         #else
00414                 hword r[4];
00415                 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00416         #endif
00417 }
00418 
00419 inline word DWord::operator%(word a)
00420 {
00421         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00422                 return word(m_whole % a);
00423         #else
00424                 if (a < (word(1) << (WORD_BITS/2)))
00425                 {
00426                         hword h = hword(a);
00427                         word r = m_halfs.high % h;
00428                         r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00429                         return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00430                 }
00431                 else
00432                 {
00433                         hword r[4];
00434                         DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00435                         return Word(r[0], r[1]).GetWhole();
00436                 }
00437         #endif
00438 }
00439 
00440 // ********************************************************
00441 
00442 // use some tricks to share assembly code between MSVC and GCC
00443 #if defined(__GNUC__)
00444         #define AddPrologue \
00445                 int result;     \
00446                 __asm__ __volatile__ \
00447                 ( \
00448                         ".intel_syntax noprefix;"
00449         #define AddEpilogue \
00450                         ".att_syntax prefix;" \
00451                                         : "=a" (result)\
00452                                         : "d" (C), "a" (A), "D" (B), "c" (N) \
00453                                         : "%esi", "memory", "cc" \
00454                 );\
00455                 return result;
00456         #define MulPrologue \
00457                 __asm__ __volatile__ \
00458                 ( \
00459                         ".intel_syntax noprefix;" \
00460                         AS1(    push    ebx) \
00461                         AS2(    mov             ebx, edx)
00462         #define MulEpilogue \
00463                         AS1(    pop             ebx) \
00464                         ".att_syntax prefix;" \
00465                         : \
00466                         : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
00467                         : "%esi", "memory", "cc" \
00468                 );
00469         #define SquPrologue             MulPrologue
00470         #define SquEpilogue     \
00471                         AS1(    pop             ebx) \
00472                         ".att_syntax prefix;" \
00473                         : \
00474                         : "d" (s_maskLow16), "c" (C), "a" (A) \
00475                         : "%esi", "%edi", "memory", "cc" \
00476                 );
00477         #define TopPrologue             MulPrologue
00478         #define TopEpilogue     \
00479                         AS1(    pop             ebx) \
00480                         ".att_syntax prefix;" \
00481                         : \
00482                         : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
00483                         : "memory", "cc" \
00484                 );
00485 #else
00486         #define AddPrologue \
00487                 __asm   push edi \
00488                 __asm   push esi \
00489                 __asm   mov             eax, [esp+12] \
00490                 __asm   mov             edi, [esp+16]
00491         #define AddEpilogue \
00492                 __asm   pop esi \
00493                 __asm   pop edi \
00494                 __asm   ret 8
00495 #if _MSC_VER < 1300
00496         #define SaveEBX         __asm push ebx
00497         #define RestoreEBX      __asm pop ebx
00498 #else
00499         #define SaveEBX
00500         #define RestoreEBX
00501 #endif
00502         #define SquPrologue                                     \
00503                 AS2(    mov             eax, A)                 \
00504                 AS2(    mov             ecx, C)                 \
00505                 SaveEBX                                                 \
00506                 AS2(    lea             ebx, s_maskLow16)
00507         #define MulPrologue                                     \
00508                 AS2(    mov             eax, A)                 \
00509                 AS2(    mov             edi, B)                 \
00510                 AS2(    mov             ecx, C)                 \
00511                 SaveEBX                                                 \
00512                 AS2(    lea             ebx, s_maskLow16)
00513         #define TopPrologue                                     \
00514                 AS2(    mov             eax, A)                 \
00515                 AS2(    mov             edi, B)                 \
00516                 AS2(    mov             ecx, C)                 \
00517                 AS2(    mov             esi, L)                 \
00518                 SaveEBX                                                 \
00519                 AS2(    lea             ebx, s_maskLow16)
00520         #define SquEpilogue             RestoreEBX
00521         #define MulEpilogue             RestoreEBX
00522         #define TopEpilogue             RestoreEBX
00523 #endif
00524 
00525 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00526 extern "C" {
00527 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
00528 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
00529 }
00530 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
00531 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
00532 {
00533         word result;
00534         __asm__ __volatile__
00535         (
00536         ".intel_syntax;"
00537         AS1(    neg             %1)
00538         ASJ(    jz,             1, f)
00539         AS2(    mov             %0,[%3+8*%1])
00540         AS2(    add             %0,[%4+8*%1])
00541         AS2(    mov             [%2+8*%1],%0)
00542         ASL(0)
00543         AS2(    mov             %0,[%3+8*%1+8])
00544         AS2(    adc             %0,[%4+8*%1+8])
00545         AS2(    mov             [%2+8*%1+8],%0)
00546         AS2(    lea             %1,[%1+2])
00547         ASJ(    jrcxz,  1, f)
00548         AS2(    mov             %0,[%3+8*%1])
00549         AS2(    adc             %0,[%4+8*%1])
00550         AS2(    mov             [%2+8*%1],%0)
00551         ASJ(    jmp,    0, b)
00552         ASL(1)
00553         AS2(    mov             %0, 0)
00554         AS2(    adc             %0, %0)
00555         ".att_syntax;"
00556         : "=&r" (result), "+c" (N)
00557         : "r" (C+N), "r" (A+N), "r" (B+N)
00558         : "memory", "cc"
00559         );
00560         return (int)result;
00561 }
00562 
00563 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00564 {
00565         word result;
00566         __asm__ __volatile__
00567         (
00568         ".intel_syntax;"
00569         AS1(    neg             %1)
00570         ASJ(    jz,             1, f)
00571         AS2(    mov             %0,[%3+8*%1])
00572         AS2(    sub             %0,[%4+8*%1])
00573         AS2(    mov             [%2+8*%1],%0)
00574         ASL(0)
00575         AS2(    mov             %0,[%3+8*%1+8])
00576         AS2(    sbb             %0,[%4+8*%1+8])
00577         AS2(    mov             [%2+8*%1+8],%0)
00578         AS2(    lea             %1,[%1+2])
00579         ASJ(    jrcxz,  1, f)
00580         AS2(    mov             %0,[%3+8*%1])
00581         AS2(    sbb             %0,[%4+8*%1])
00582         AS2(    mov             [%2+8*%1],%0)
00583         ASJ(    jmp,    0, b)
00584         ASL(1)
00585         AS2(    mov             %0, 0)
00586         AS2(    adc             %0, %0)
00587         ".att_syntax;"
00588         : "=&r" (result), "+c" (N)
00589         : "r" (C+N), "r" (A+N), "r" (B+N)
00590         : "memory", "cc"
00591         );
00592         return (int)result;
00593 }
00594 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
00595 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00596 {
00597         AddPrologue
00598 
00599         // now: eax = A, edi = B, edx = C, ecx = N
00600         AS2(    lea             eax, [eax+4*ecx])
00601         AS2(    lea             edi, [edi+4*ecx])
00602         AS2(    lea             edx, [edx+4*ecx])
00603 
00604         AS1(    neg             ecx)                            // ecx is negative index
00605         AS2(    test    ecx, 2)                         // this clears carry flag
00606         ASJ(    jz,             0, f)
00607         AS2(    sub             ecx, 2)
00608         ASJ(    jmp,    1, f)
00609 
00610         ASL(0)
00611         ASJ(    jecxz,  2, f)                           // loop until ecx overflows and becomes zero
00612         AS2(    mov             esi,[eax+4*ecx])
00613         AS2(    adc             esi,[edi+4*ecx])
00614         AS2(    mov             [edx+4*ecx],esi)
00615         AS2(    mov             esi,[eax+4*ecx+4])
00616         AS2(    adc             esi,[edi+4*ecx+4])
00617         AS2(    mov             [edx+4*ecx+4],esi)
00618         ASL(1)
00619         AS2(    mov             esi,[eax+4*ecx+8])
00620         AS2(    adc             esi,[edi+4*ecx+8])
00621         AS2(    mov             [edx+4*ecx+8],esi)
00622         AS2(    mov             esi,[eax+4*ecx+12])
00623         AS2(    adc             esi,[edi+4*ecx+12])
00624         AS2(    mov             [edx+4*ecx+12],esi)
00625 
00626         AS2(    lea             ecx,[ecx+4])            // advance index, avoid inc which causes slowdown on Intel Core 2
00627         ASJ(    jmp,    0, b)
00628 
00629         ASL(2)
00630         AS2(    mov             eax, 0)
00631         AS1(    setc    al)                                     // store carry into eax (return result register)
00632 
00633         AddEpilogue
00634 }
00635 
00636 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00637 {
00638         AddPrologue
00639 
00640         // now: eax = A, edi = B, edx = C, ecx = N
00641         AS2(    lea             eax, [eax+4*ecx])
00642         AS2(    lea             edi, [edi+4*ecx])
00643         AS2(    lea             edx, [edx+4*ecx])
00644 
00645         AS1(    neg             ecx)                            // ecx is negative index
00646         AS2(    test    ecx, 2)                         // this clears carry flag
00647         ASJ(    jz,             0, f)
00648         AS2(    sub             ecx, 2)
00649         ASJ(    jmp,    1, f)
00650 
00651         ASL(0)
00652         ASJ(    jecxz,  2, f)                           // loop until ecx overflows and becomes zero
00653         AS2(    mov             esi,[eax+4*ecx])
00654         AS2(    sbb             esi,[edi+4*ecx])
00655         AS2(    mov             [edx+4*ecx],esi)
00656         AS2(    mov             esi,[eax+4*ecx+4])
00657         AS2(    sbb             esi,[edi+4*ecx+4])
00658         AS2(    mov             [edx+4*ecx+4],esi)
00659         ASL(1)
00660         AS2(    mov             esi,[eax+4*ecx+8])
00661         AS2(    sbb             esi,[edi+4*ecx+8])
00662         AS2(    mov             [edx+4*ecx+8],esi)
00663         AS2(    mov             esi,[eax+4*ecx+12])
00664         AS2(    sbb             esi,[edi+4*ecx+12])
00665         AS2(    mov             [edx+4*ecx+12],esi)
00666 
00667         AS2(    lea             ecx,[ecx+4])            // advance index, avoid inc which causes slowdown on Intel Core 2
00668         ASJ(    jmp,    0, b)
00669 
00670         ASL(2)
00671         AS2(    mov             eax, 0)
00672         AS1(    setc    al)                                     // store carry into eax (return result register)
00673 
00674         AddEpilogue
00675 }
00676 
00677 #if CRYPTOPP_INTEGER_SSE2
00678 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
00679 {
00680         AddPrologue
00681 
00682         // now: eax = A, edi = B, edx = C, ecx = N
00683         AS2(    lea             eax, [eax+4*ecx])
00684         AS2(    lea             edi, [edi+4*ecx])
00685         AS2(    lea             edx, [edx+4*ecx])
00686 
00687         AS1(    neg             ecx)                            // ecx is negative index
00688         AS2(    pxor    mm2, mm2)
00689         ASJ(    jz,             2, f)
00690         AS2(    test    ecx, 2)                         // this clears carry flag
00691         ASJ(    jz,             0, f)
00692         AS2(    sub             ecx, 2)
00693         ASJ(    jmp,    1, f)
00694 
00695         ASL(0)
00696         AS2(    movd     mm0, DWORD PTR [eax+4*ecx])
00697         AS2(    movd     mm1, DWORD PTR [edi+4*ecx])
00698         AS2(    paddq    mm0, mm1)
00699         AS2(    paddq    mm2, mm0)
00700         AS2(    movd     DWORD PTR [edx+4*ecx], mm2)
00701         AS2(    psrlq    mm2, 32)
00702 
00703         AS2(    movd     mm0, DWORD PTR [eax+4*ecx+4])
00704         AS2(    movd     mm1, DWORD PTR [edi+4*ecx+4])
00705         AS2(    paddq    mm0, mm1)
00706         AS2(    paddq    mm2, mm0)
00707         AS2(    movd     DWORD PTR [edx+4*ecx+4], mm2)
00708         AS2(    psrlq    mm2, 32)
00709 
00710         ASL(1)
00711         AS2(    movd     mm0, DWORD PTR [eax+4*ecx+8])
00712         AS2(    movd     mm1, DWORD PTR [edi+4*ecx+8])
00713         AS2(    paddq    mm0, mm1)
00714         AS2(    paddq    mm2, mm0)
00715         AS2(    movd     DWORD PTR [edx+4*ecx+8], mm2)
00716         AS2(    psrlq    mm2, 32)
00717 
00718         AS2(    movd     mm0, DWORD PTR [eax+4*ecx+12])
00719         AS2(    movd     mm1, DWORD PTR [edi+4*ecx+12])
00720         AS2(    paddq    mm0, mm1)
00721         AS2(    paddq    mm2, mm0)
00722         AS2(    movd     DWORD PTR [edx+4*ecx+12], mm2)
00723         AS2(    psrlq    mm2, 32)
00724 
00725         AS2(    add             ecx, 4)
00726         ASJ(    jnz,    0, b)
00727 
00728         ASL(2)
00729         AS2(    movd    eax, mm2)
00730         AS1(    emms)
00731 
00732         AddEpilogue
00733 }
00734 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
00735 {
00736         AddPrologue
00737 
00738         // now: eax = A, edi = B, edx = C, ecx = N
00739         AS2(    lea             eax, [eax+4*ecx])
00740         AS2(    lea             edi, [edi+4*ecx])
00741         AS2(    lea             edx, [edx+4*ecx])
00742 
00743         AS1(    neg             ecx)                            // ecx is negative index
00744         AS2(    pxor    mm2, mm2)
00745         ASJ(    jz,             2, f)
00746         AS2(    test    ecx, 2)                         // this clears carry flag
00747         ASJ(    jz,             0, f)
00748         AS2(    sub             ecx, 2)
00749         ASJ(    jmp,    1, f)
00750 
00751         ASL(0)
00752         AS2(    movd     mm0, DWORD PTR [eax+4*ecx])
00753         AS2(    movd     mm1, DWORD PTR [edi+4*ecx])
00754         AS2(    psubq    mm0, mm1)
00755         AS2(    psubq    mm0, mm2)
00756         AS2(    movd     DWORD PTR [edx+4*ecx], mm0)
00757         AS2(    psrlq    mm0, 63)
00758 
00759         AS2(    movd     mm2, DWORD PTR [eax+4*ecx+4])
00760         AS2(    movd     mm1, DWORD PTR [edi+4*ecx+4])
00761         AS2(    psubq    mm2, mm1)
00762         AS2(    psubq    mm2, mm0)
00763         AS2(    movd     DWORD PTR [edx+4*ecx+4], mm2)
00764         AS2(    psrlq    mm2, 63)
00765 
00766         ASL(1)
00767         AS2(    movd     mm0, DWORD PTR [eax+4*ecx+8])
00768         AS2(    movd     mm1, DWORD PTR [edi+4*ecx+8])
00769         AS2(    psubq    mm0, mm1)
00770         AS2(    psubq    mm0, mm2)
00771         AS2(    movd     DWORD PTR [edx+4*ecx+8], mm0)
00772         AS2(    psrlq    mm0, 63)
00773 
00774         AS2(    movd     mm2, DWORD PTR [eax+4*ecx+12])
00775         AS2(    movd     mm1, DWORD PTR [edi+4*ecx+12])
00776         AS2(    psubq    mm2, mm1)
00777         AS2(    psubq    mm2, mm0)
00778         AS2(    movd     DWORD PTR [edx+4*ecx+12], mm2)
00779         AS2(    psrlq    mm2, 63)
00780 
00781         AS2(    add             ecx, 4)
00782         ASJ(    jnz,    0, b)
00783 
00784         ASL(2)
00785         AS2(    movd    eax, mm2)
00786         AS1(    emms)
00787 
00788         AddEpilogue
00789 }
00790 #endif  // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00791 #else
00792 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00793 {
00794         assert (N%2 == 0);
00795 
00796         Declare2Words(u);
00797         AssignWord(u, 0);
00798         for (size_t i=0; i<N; i+=2)
00799         {
00800                 AddWithCarry(u, A[i], B[i]);
00801                 C[i] = LowWord(u);
00802                 AddWithCarry(u, A[i+1], B[i+1]);
00803                 C[i+1] = LowWord(u);
00804         }
00805         return int(GetCarry(u));
00806 }
00807 
00808 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00809 {
00810         assert (N%2 == 0);
00811 
00812         Declare2Words(u);
00813         AssignWord(u, 0);
00814         for (size_t i=0; i<N; i+=2)
00815         {
00816                 SubtractWithBorrow(u, A[i], B[i]);
00817                 C[i] = LowWord(u);
00818                 SubtractWithBorrow(u, A[i+1], B[i+1]);
00819                 C[i+1] = LowWord(u);
00820         }
00821         return int(GetBorrow(u));
00822 }
00823 #endif
00824 
00825 static word LinearMultiply(word *C, const word *A, word B, size_t N)
00826 {
00827         word carry=0;
00828         for(unsigned i=0; i<N; i++)
00829         {
00830                 Declare2Words(p);
00831                 MultiplyWords(p, A[i], B);
00832                 Acc2WordsBy1(p, carry);
00833                 C[i] = LowWord(p);
00834                 carry = HighWord(p);
00835         }
00836         return carry;
00837 }
00838 
00839 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
00840 
00841 #define Mul_2 \
00842         Mul_Begin(2) \
00843         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00844         Mul_End(1, 1)
00845 
00846 #define Mul_4 \
00847         Mul_Begin(4) \
00848         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00849         Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0)  \
00850         Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
00851         Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1)  \
00852         Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
00853         Mul_End(5, 3)
00854 
00855 #define Mul_8 \
00856         Mul_Begin(8) \
00857         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00858         Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0)  \
00859         Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
00860         Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00861         Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00862         Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00863         Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00864         Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
00865         Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
00866         Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
00867         Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
00868         Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
00869         Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
00870         Mul_End(13, 7)
00871 
00872 #define Mul_16 \
00873         Mul_Begin(16) \
00874         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00875         Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00876         Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00877         Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00878         Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00879         Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00880         Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00881         Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00882         Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00883         Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00884         Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00885         Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00886         Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00887         Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00888         Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
00889         Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
00890         Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
00891         Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
00892         Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
00893         Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
00894         Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
00895         Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
00896         Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
00897         Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
00898         Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
00899         Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
00900         Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
00901         Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
00902         Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
00903         Mul_End(29, 15)
00904 
00905 #define Squ_2 \
00906         Squ_Begin(2) \
00907         Squ_End(2)
00908 
00909 #define Squ_4 \
00910         Squ_Begin(4) \
00911         Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00912         Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00913         Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
00914         Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
00915         Squ_End(4)
00916 
00917 #define Squ_8 \
00918         Squ_Begin(8) \
00919         Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00920         Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00921         Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00922         Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00923         Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00924         Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00925         Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00926         Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5)  Squ_NonDiag \
00927         Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00928         Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00929         Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
00930         Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
00931         Squ_End(8)
00932 
00933 #define Squ_16 \
00934         Squ_Begin(16) \
00935         Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00936         Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00937         Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00938         Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00939         Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00940         Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00941         Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00942         Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00943         Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00944         Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00945         Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
00946         Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
00947         Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
00948         Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
00949         Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
00950         Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
00951         Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
00952         Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
00953         Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
00954         Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
00955         Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
00956         Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
00957         Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
00958         Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
00959         Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
00960         Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
00961         Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
00962         Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
00963         Squ_End(16)
00964 
00965 #define Bot_2 \
00966         Mul_Begin(2) \
00967         Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
00968         Bot_End(2)
00969 
00970 #define Bot_4 \
00971         Mul_Begin(4) \
00972         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00973         Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2)  \
00974         Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0)  \
00975         Bot_End(4)
00976 
00977 #define Bot_8 \
00978         Mul_Begin(8) \
00979         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00980         Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0)  \
00981         Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
00982         Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00983         Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00984         Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00985         Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
00986         Bot_End(8)
00987 
00988 #define Bot_16 \
00989         Mul_Begin(16) \
00990         Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00991         Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00992         Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00993         Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00994         Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00995         Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00996         Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00997         Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00998         Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00999         Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
01000         Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
01001         Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
01002         Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
01003         Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
01004         Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
01005         Bot_End(16)
01006         
01007 #endif
01008 
01009 #if 0
01010 #define Mul_Begin(n)                            \
01011         Declare2Words(p)                                \
01012         Declare2Words(c)                                \
01013         Declare2Words(d)                                \
01014         MultiplyWords(p, A[0], B[0])    \
01015         AssignWord(c, LowWord(p))               \
01016         AssignWord(d, HighWord(p))
01017 
01018 #define Mul_Acc(i, j)                           \
01019         MultiplyWords(p, A[i], B[j])    \
01020         Acc2WordsBy1(c, LowWord(p))             \
01021         Acc2WordsBy1(d, HighWord(p))
01022 
01023 #define Mul_SaveAcc(k, i, j)            \
01024         R[k] = LowWord(c);                              \
01025         Add2WordsBy1(c, d, HighWord(c)) \
01026         MultiplyWords(p, A[i], B[j])    \
01027         AssignWord(d, HighWord(p))              \
01028         Acc2WordsBy1(c, LowWord(p))
01029 
01030 #define Mul_End(n)                                      \
01031         R[2*n-3] = LowWord(c);                  \
01032         Acc2WordsBy1(d, HighWord(c))    \
01033         MultiplyWords(p, A[n-1], B[n-1])\
01034         Acc2WordsBy2(d, p)                              \
01035         R[2*n-2] = LowWord(d);                  \
01036         R[2*n-1] = HighWord(d);
01037 
01038 #define Bot_SaveAcc(k, i, j)            \
01039         R[k] = LowWord(c);                              \
01040         word e = LowWord(d) + HighWord(c);      \
01041         e += A[i] * B[j];
01042 
01043 #define Bot_Acc(i, j)   \
01044         e += A[i] * B[j];
01045 
01046 #define Bot_End(n)              \
01047         R[n-1] = e;
01048 #else
01049 #define Mul_Begin(n)                            \
01050         Declare2Words(p)                                \
01051         word c; \
01052         Declare2Words(d)                                \
01053         MultiplyWords(p, A[0], B[0])    \
01054         c = LowWord(p);         \
01055         AssignWord(d, HighWord(p))
01056 
01057 #define Mul_Acc(i, j)                           \
01058         MulAcc(c, d, A[i], B[j])
01059 
01060 #define Mul_SaveAcc(k, i, j)            \
01061         R[k] = c;                               \
01062         c = LowWord(d); \
01063         AssignWord(d, HighWord(d))      \
01064         MulAcc(c, d, A[i], B[j])
01065 
01066 #define Mul_End(k, i)                                   \
01067         R[k] = c;                       \
01068         MultiplyWords(p, A[i], B[i])    \
01069         Acc2WordsBy2(p, d)                              \
01070         R[k+1] = LowWord(p);                    \
01071         R[k+2] = HighWord(p);
01072 
01073 #define Bot_SaveAcc(k, i, j)            \
01074         R[k] = c;                               \
01075         c = LowWord(d); \
01076         c += A[i] * B[j];
01077 
01078 #define Bot_Acc(i, j)   \
01079         c += A[i] * B[j];
01080 
01081 #define Bot_End(n)              \
01082         R[n-1] = c;
01083 #endif
01084 
01085 #define Squ_Begin(n)                            \
01086         Declare2Words(p)                                \
01087         word c;                         \
01088         Declare2Words(d)                                \
01089         Declare2Words(e)                                \
01090         MultiplyWords(p, A[0], A[0])    \
01091         R[0] = LowWord(p);                              \
01092         AssignWord(e, HighWord(p))              \
01093         MultiplyWords(p, A[0], A[1])    \
01094         c = LowWord(p);         \
01095         AssignWord(d, HighWord(p))              \
01096         Squ_NonDiag                                             \
01097 
01098 #define Squ_NonDiag                             \
01099         Double3Words(c, d)
01100 
01101 #define Squ_SaveAcc(k, i, j)            \
01102         Acc3WordsBy2(c, d, e)                   \
01103         R[k] = c;                               \
01104         MultiplyWords(p, A[i], A[j])    \
01105         c = LowWord(p);         \
01106         AssignWord(d, HighWord(p))              \
01107 
01108 #define Squ_Acc(i, j)                           \
01109         MulAcc(c, d, A[i], A[j])
01110 
01111 #define Squ_Diag(i)                                     \
01112         Squ_NonDiag                                             \
01113         MulAcc(c, d, A[i], A[i])
01114 
01115 #define Squ_End(n)                                      \
01116         Acc3WordsBy2(c, d, e)                   \
01117         R[2*n-3] = c;                   \
01118         MultiplyWords(p, A[n-1], A[n-1])\
01119         Acc2WordsBy2(p, e)                              \
01120         R[2*n-2] = LowWord(p);                  \
01121         R[2*n-1] = HighWord(p);
01122 
01123 void Baseline_Multiply2(word *R, const word *A, const word *B)
01124 {
01125         Mul_2
01126 }
01127 
01128 void Baseline_Multiply4(word *R, const word *A, const word *B)
01129 {
01130         Mul_4
01131 }
01132 
01133 void Baseline_Multiply8(word *R, const word *A, const word *B)
01134 {
01135         Mul_8
01136 }
01137 
01138 void Baseline_Square2(word *R, const word *A)
01139 {
01140         Squ_2
01141 }
01142 
01143 void Baseline_Square4(word *R, const word *A)
01144 {
01145         Squ_4
01146 }
01147 
01148 void Baseline_Square8(word *R, const word *A)
01149 {
01150         Squ_8
01151 }
01152 
01153 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
01154 {
01155         Bot_2
01156 }
01157 
01158 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
01159 {
01160         Bot_4
01161 }
01162 
01163 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
01164 {
01165         Bot_8
01166 }
01167 
01168 #define Top_Begin(n)                            \
01169         Declare2Words(p)                                \
01170         word c; \
01171         Declare2Words(d)                                \
01172         MultiplyWords(p, A[0], B[n-2]);\
01173         AssignWord(d, HighWord(p));
01174 
01175 #define Top_Acc(i, j)   \
01176         MultiplyWords(p, A[i], B[j]);\
01177         Acc2WordsBy1(d, HighWord(p));
01178 
01179 #define Top_SaveAcc0(i, j)              \
01180         c = LowWord(d); \
01181         AssignWord(d, HighWord(d))      \
01182         MulAcc(c, d, A[i], B[j])
01183 
01184 #define Top_SaveAcc1(i, j)              \
01185         c = L<c; \
01186         Acc2WordsBy1(d, c);     \
01187         c = LowWord(d); \
01188         AssignWord(d, HighWord(d))      \
01189         MulAcc(c, d, A[i], B[j])
01190 
01191 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
01192 {
01193         word T[4];
01194         Baseline_Multiply2(T, A, B);
01195         R[0] = T[2];
01196         R[1] = T[3];
01197 }
01198 
01199 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
01200 {
01201         Top_Begin(4)
01202         Top_Acc(1, 1) Top_Acc(2, 0)  \
01203         Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
01204         Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1)  \
01205         Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
01206         Mul_End(1, 3)
01207 }
01208 
01209 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
01210 {
01211         Top_Begin(8)
01212         Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
01213         Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
01214         Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
01215         Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
01216         Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
01217         Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
01218         Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
01219         Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
01220         Mul_End(5, 7)
01221 }
01222 
01223 #if !CRYPTOPP_INTEGER_SSE2      // save memory by not compiling these functions when SSE2 is available
01224 void Baseline_Multiply16(word *R, const word *A, const word *B)
01225 {
01226         Mul_16
01227 }
01228 
01229 void Baseline_Square16(word *R, const word *A)
01230 {
01231         Squ_16
01232 }
01233 
01234 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
01235 {
01236         Bot_16
01237 }
01238 
01239 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
01240 {
01241         Top_Begin(16)
01242         Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
01243         Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
01244         Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
01245         Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
01246         Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
01247         Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
01248         Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
01249         Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
01250         Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
01251         Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
01252         Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
01253         Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
01254         Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
01255         Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
01256         Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
01257         Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
01258         Mul_End(13, 15)
01259 }
01260 #endif
01261 
01262 // ********************************************************
01263 
01264 #if CRYPTOPP_INTEGER_SSE2
01265 
01266 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
01267 
01268 #undef Mul_Begin
01269 #undef Mul_Acc
01270 #undef Top_Begin
01271 #undef Top_Acc
01272 #undef Squ_Acc
01273 #undef Squ_NonDiag
01274 #undef Squ_Diag
01275 #undef Squ_SaveAcc
01276 #undef Squ_Begin
01277 #undef Mul_SaveAcc
01278 #undef Bot_Acc
01279 #undef Bot_SaveAcc
01280 #undef Bot_End
01281 #undef Squ_End
01282 #undef Mul_End
01283 
01284 #define SSE2_FinalSave(k)                       \
01285         AS2(    psllq           xmm5, 16)       \
01286         AS2(    paddq           xmm4, xmm5)     \
01287         AS2(    movq            QWORD PTR [ecx+8*(k)], xmm4)
01288 
01289 #define SSE2_SaveShift(k)                       \
01290         AS2(    movq            xmm0, xmm6)     \
01291         AS2(    punpckhqdq      xmm6, xmm0)     \
01292         AS2(    movq            xmm1, xmm7)     \
01293         AS2(    punpckhqdq      xmm7, xmm1)     \
01294         AS2(    paddd           xmm6, xmm0)     \
01295         AS2(    pslldq          xmm6, 4)        \
01296         AS2(    paddd           xmm7, xmm1)     \
01297         AS2(    paddd           xmm4, xmm6)     \
01298         AS2(    pslldq          xmm7, 4)        \
01299         AS2(    movq            xmm6, xmm4)     \
01300         AS2(    paddd           xmm5, xmm7)     \
01301         AS2(    movq            xmm7, xmm5)     \
01302         AS2(    movd            DWORD PTR [ecx+8*(k)], xmm4)    \
01303         AS2(    psrlq           xmm6, 16)       \
01304         AS2(    paddq           xmm6, xmm7)     \
01305         AS2(    punpckhqdq      xmm4, xmm0)     \
01306         AS2(    punpckhqdq      xmm5, xmm0)     \
01307         AS2(    movq            QWORD PTR [ecx+8*(k)+2], xmm6)  \
01308         AS2(    psrlq           xmm6, 3*16)     \
01309         AS2(    paddd           xmm4, xmm6)     \
01310 
01311 #define Squ_SSE2_SaveShift(k)                   \
01312         AS2(    movq            xmm0, xmm6)     \
01313         AS2(    punpckhqdq      xmm6, xmm0)     \
01314         AS2(    movq            xmm1, xmm7)     \
01315         AS2(    punpckhqdq      xmm7, xmm1)     \
01316         AS2(    paddd           xmm6, xmm0)     \
01317         AS2(    pslldq          xmm6, 4)        \
01318         AS2(    paddd           xmm7, xmm1)     \
01319         AS2(    paddd           xmm4, xmm6)     \
01320         AS2(    pslldq          xmm7, 4)        \
01321         AS2(    movhlps         xmm6, xmm4)     \
01322         AS2(    movd            DWORD PTR [ecx+8*(k)], xmm4)    \
01323         AS2(    paddd           xmm5, xmm7)     \
01324         AS2(    movhps          QWORD PTR [esp+12], xmm5)\
01325         AS2(    psrlq           xmm4, 16)       \
01326         AS2(    paddq           xmm4, xmm5)     \
01327         AS2(    movq            QWORD PTR [ecx+8*(k)+2], xmm4)  \
01328         AS2(    psrlq           xmm4, 3*16)     \
01329         AS2(    paddd           xmm4, xmm6)     \
01330         AS2(    movq            QWORD PTR [esp+4], xmm4)\
01331 
01332 #define SSE2_FirstMultiply(i)                           \
01333         AS2(    movdqa          xmm7, [esi+(i)*16])\
01334         AS2(    movdqa          xmm5, [edi-(i)*16])\
01335         AS2(    pmuludq         xmm5, xmm7)             \
01336         AS2(    movdqa          xmm4, [ebx])\
01337         AS2(    movdqa          xmm6, xmm4)             \
01338         AS2(    pand            xmm4, xmm5)             \
01339         AS2(    psrld           xmm5, 16)               \
01340         AS2(    pmuludq         xmm7, [edx-(i)*16])\
01341         AS2(    pand            xmm6, xmm7)             \
01342         AS2(    psrld           xmm7, 16)
01343 
01344 #define Squ_Begin(n)                                                    \
01345         SquPrologue                                                                     \
01346         AS2(    mov             esi, esp)\
01347         AS2(    and             esp, 0xfffffff0)\
01348         AS2(    lea             edi, [esp-32*n])\
01349         AS2(    sub             esp, 32*n+16)\
01350         AS1(    push    esi)\
01351         AS2(    mov             esi, edi)                                       \
01352         AS2(    xor             edx, edx)                                       \
01353         ASL(1)                                                                          \
01354         ASS(    pshufd  xmm0, [eax+edx], 3,1,2,0)       \
01355         ASS(    pshufd  xmm1, [eax+edx], 2,0,3,1)       \
01356         AS2(    movdqa  [edi+2*edx], xmm0)              \
01357         AS2(    psrlq   xmm0, 32)                                       \
01358         AS2(    movdqa  [edi+2*edx+16], xmm0)   \
01359         AS2(    movdqa  [edi+16*n+2*edx], xmm1)         \
01360         AS2(    psrlq   xmm1, 32)                                       \
01361         AS2(    movdqa  [edi+16*n+2*edx+16], xmm1)      \
01362         AS2(    add             edx, 16)                                        \
01363         AS2(    cmp             edx, 8*(n))                                     \
01364         ASJ(    jne,    1, b)                                           \
01365         AS2(    lea             edx, [edi+16*n])\
01366         SSE2_FirstMultiply(0)                                                   \
01367 
01368 #define Squ_Acc(i)                                                              \
01369         ASL(LSqu##i)                                                            \
01370         AS2(    movdqa          xmm1, [esi+(i)*16])     \
01371         AS2(    movdqa          xmm0, [edi-(i)*16])     \
01372         AS2(    movdqa          xmm2, [ebx])    \
01373         AS2(    pmuludq         xmm0, xmm1)                             \
01374         AS2(    pmuludq         xmm1, [edx-(i)*16])     \
01375         AS2(    movdqa          xmm3, xmm2)                     \
01376         AS2(    pand            xmm2, xmm0)                     \
01377         AS2(    psrld           xmm0, 16)                       \
01378         AS2(    paddd           xmm4, xmm2)                     \
01379         AS2(    paddd           xmm5, xmm0)                     \
01380         AS2(    pand            xmm3, xmm1)                     \
01381         AS2(    psrld           xmm1, 16)                       \
01382         AS2(    paddd           xmm6, xmm3)                     \
01383         AS2(    paddd           xmm7, xmm1)             \
01384 
01385 #define Squ_Acc1(i)             
01386 #define Squ_Acc2(i)             ASC(call, LSqu##i)
01387 #define Squ_Acc3(i)             Squ_Acc2(i)
01388 #define Squ_Acc4(i)             Squ_Acc2(i)
01389 #define Squ_Acc5(i)             Squ_Acc2(i)
01390 #define Squ_Acc6(i)             Squ_Acc2(i)
01391 #define Squ_Acc7(i)             Squ_Acc2(i)
01392 #define Squ_Acc8(i)             Squ_Acc2(i)
01393 
01394 #define SSE2_End(E, n)                                  \
01395         SSE2_SaveShift(2*(n)-3)                 \
01396         AS2(    movdqa          xmm7, [esi+16]) \
01397         AS2(    movdqa          xmm0, [edi])    \
01398         AS2(    pmuludq         xmm0, xmm7)                             \
01399         AS2(    movdqa          xmm2, [ebx])            \
01400         AS2(    pmuludq         xmm7, [edx])    \
01401         AS2(    movdqa          xmm6, xmm2)                             \
01402         AS2(    pand            xmm2, xmm0)                             \
01403         AS2(    psrld           xmm0, 16)                               \
01404         AS2(    paddd           xmm4, xmm2)                             \
01405         AS2(    paddd           xmm5, xmm0)                             \
01406         AS2(    pand            xmm6, xmm7)                             \
01407         AS2(    psrld           xmm7, 16)       \
01408         SSE2_SaveShift(2*(n)-2)                 \
01409         SSE2_FinalSave(2*(n)-1)                 \
01410         AS1(    pop             esp)\
01411         E
01412 
01413 #define Squ_End(n)              SSE2_End(SquEpilogue, n)
01414 #define Mul_End(n)              SSE2_End(MulEpilogue, n)
01415 #define Top_End(n)              SSE2_End(TopEpilogue, n)
01416 
01417 #define Squ_Column1(k, i)       \
01418         Squ_SSE2_SaveShift(k)                                   \
01419         AS2(    add                     esi, 16)        \
01420         SSE2_FirstMultiply(1)\
01421         Squ_Acc##i(i)   \
01422         AS2(    paddd           xmm4, xmm4)             \
01423         AS2(    paddd           xmm5, xmm5)             \
01424         AS2(    movdqa          xmm3, [esi])                            \
01425         AS2(    movq            xmm1, QWORD PTR [esi+8])        \
01426         AS2(    pmuludq         xmm1, xmm3)             \
01427         AS2(    pmuludq         xmm3, xmm3)             \
01428         AS2(    movdqa          xmm0, [ebx])\
01429         AS2(    movdqa          xmm2, xmm0)             \
01430         AS2(    pand            xmm0, xmm1)             \
01431         AS2(    psrld           xmm1, 16)               \
01432         AS2(    paddd           xmm6, xmm0)             \
01433         AS2(    paddd           xmm7, xmm1)             \
01434         AS2(    pand            xmm2, xmm3)             \
01435         AS2(    psrld           xmm3, 16)               \
01436         AS2(    paddd           xmm6, xmm6)             \
01437         AS2(    paddd           xmm7, xmm7)             \
01438         AS2(    paddd           xmm4, xmm2)             \
01439         AS2(    paddd           xmm5, xmm3)             \
01440         AS2(    movq            xmm0, QWORD PTR [esp+4])\
01441         AS2(    movq            xmm1, QWORD PTR [esp+12])\
01442         AS2(    paddd           xmm4, xmm0)\
01443         AS2(    paddd           xmm5, xmm1)\
01444 
01445 #define Squ_Column0(k, i)       \
01446         Squ_SSE2_SaveShift(k)                                   \
01447         AS2(    add                     edi, 16)        \
01448         AS2(    add                     edx, 16)        \
01449         SSE2_FirstMultiply(1)\
01450         Squ_Acc##i(i)   \
01451         AS2(    paddd           xmm6, xmm6)             \
01452         AS2(    paddd           xmm7, xmm7)             \
01453         AS2(    paddd           xmm4, xmm4)             \
01454         AS2(    paddd           xmm5, xmm5)             \
01455         AS2(    movq            xmm0, QWORD PTR [esp+4])\
01456         AS2(    movq            xmm1, QWORD PTR [esp+12])\
01457         AS2(    paddd           xmm4, xmm0)\
01458         AS2(    paddd           xmm5, xmm1)\
01459 
01460 #define SSE2_MulAdd45                                           \
01461         AS2(    movdqa          xmm7, [esi])    \
01462         AS2(    movdqa          xmm0, [edi])    \
01463         AS2(    pmuludq         xmm0, xmm7)                             \
01464         AS2(    movdqa          xmm2, [ebx])            \
01465         AS2(    pmuludq         xmm7, [edx])    \
01466         AS2(    movdqa          xmm6, xmm2)                             \
01467         AS2(    pand            xmm2, xmm0)                             \
01468         AS2(    psrld           xmm0, 16)                               \
01469         AS2(    paddd           xmm4, xmm2)                             \
01470         AS2(    paddd           xmm5, xmm0)                             \
01471         AS2(    pand            xmm6, xmm7)                             \
01472         AS2(    psrld           xmm7, 16)
01473 
01474 #define Mul_Begin(n)                                                    \
01475         MulPrologue                                                                     \
01476         AS2(    mov             esi, esp)\
01477         AS2(    and             esp, 0xfffffff0)\
01478         AS2(    sub             esp, 48*n+16)\
01479         AS1(    push    esi)\
01480         AS2(    xor             edx, edx)                                       \
01481         ASL(1)                                                                          \
01482         ASS(    pshufd  xmm0, [eax+edx], 3,1,2,0)       \
01483         ASS(    pshufd  xmm1, [eax+edx], 2,0,3,1)       \
01484         ASS(    pshufd  xmm2, [edi+edx], 3,1,2,0)       \
01485         AS2(    movdqa  [esp+20+2*edx], xmm0)           \
01486         AS2(    psrlq   xmm0, 32)                                       \
01487         AS2(    movdqa  [esp+20+2*edx+16], xmm0)        \
01488         AS2(    movdqa  [esp+20+16*n+2*edx], xmm1)              \
01489         AS2(    psrlq   xmm1, 32)                                       \
01490         AS2(    movdqa  [esp+20+16*n+2*edx+16], xmm1)   \
01491         AS2(    movdqa  [esp+20+32*n+2*edx], xmm2)              \
01492         AS2(    psrlq   xmm2, 32)                                       \
01493         AS2(    movdqa  [esp+20+32*n+2*edx+16], xmm2)   \
01494         AS2(    add             edx, 16)                                        \
01495         AS2(    cmp             edx, 8*(n))                                     \
01496         ASJ(    jne,    1, b)                                           \
01497         AS2(    lea             edi, [esp+20])\
01498         AS2(    lea             edx, [esp+20+16*n])\
01499         AS2(    lea             esi, [esp+20+32*n])\
01500         SSE2_FirstMultiply(0)                                                   \
01501 
01502 #define Mul_Acc(i)                                                              \
01503         ASL(LMul##i)                                                                            \
01504         AS2(    movdqa          xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16])   \
01505         AS2(    movdqa          xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16])   \
01506         AS2(    movdqa          xmm2, [ebx])    \
01507         AS2(    pmuludq         xmm0, xmm1)                             \
01508         AS2(    pmuludq         xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16])   \
01509         AS2(    movdqa          xmm3, xmm2)                     \
01510         AS2(    pand            xmm2, xmm0)                     \
01511         AS2(    psrld           xmm0, 16)                       \
01512         AS2(    paddd           xmm4, xmm2)                     \
01513         AS2(    paddd           xmm5, xmm0)                     \
01514         AS2(    pand            xmm3, xmm1)                     \
01515         AS2(    psrld           xmm1, 16)                       \
01516         AS2(    paddd           xmm6, xmm3)                     \
01517         AS2(    paddd           xmm7, xmm1)             \
01518 
01519 #define Mul_Acc1(i)             
01520 #define Mul_Acc2(i)             ASC(call, LMul##i)
01521 #define Mul_Acc3(i)             Mul_Acc2(i)
01522 #define Mul_Acc4(i)             Mul_Acc2(i)
01523 #define Mul_Acc5(i)             Mul_Acc2(i)
01524 #define Mul_Acc6(i)             Mul_Acc2(i)
01525 #define Mul_Acc7(i)             Mul_Acc2(i)
01526 #define Mul_Acc8(i)             Mul_Acc2(i)
01527 #define Mul_Acc9(i)             Mul_Acc2(i)
01528 #define Mul_Acc10(i)    Mul_Acc2(i)
01529 #define Mul_Acc11(i)    Mul_Acc2(i)
01530 #define Mul_Acc12(i)    Mul_Acc2(i)
01531 #define Mul_Acc13(i)    Mul_Acc2(i)
01532 #define Mul_Acc14(i)    Mul_Acc2(i)
01533 #define Mul_Acc15(i)    Mul_Acc2(i)
01534 #define Mul_Acc16(i)    Mul_Acc2(i)
01535 
01536 #define Mul_Column1(k, i)       \
01537         SSE2_SaveShift(k)                                       \
01538         AS2(    add                     esi, 16)        \
01539         SSE2_MulAdd45\
01540         Mul_Acc##i(i)   \
01541 
01542 #define Mul_Column0(k, i)       \
01543         SSE2_SaveShift(k)                                       \
01544         AS2(    add                     edi, 16)        \
01545         AS2(    add                     edx, 16)        \
01546         SSE2_MulAdd45\
01547         Mul_Acc##i(i)   \
01548 
01549 #define Bot_Acc(i)                                                      \
01550         AS2(    movdqa          xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16])   \
01551         AS2(    movdqa          xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16])   \
01552         AS2(    pmuludq         xmm0, xmm1)                             \
01553         AS2(    pmuludq         xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16])           \
01554         AS2(    paddq           xmm4, xmm0)                             \
01555         AS2(    paddd           xmm6, xmm1)
01556 
01557 #define Bot_SaveAcc(k)                                  \
01558         SSE2_SaveShift(k)                                                       \
01559         AS2(    add                     edi, 16)        \
01560         AS2(    add                     edx, 16)        \
01561         AS2(    movdqa          xmm6, [esi])    \
01562         AS2(    movdqa          xmm0, [edi])    \
01563         AS2(    pmuludq         xmm0, xmm6)                             \
01564         AS2(    paddq           xmm4, xmm0)                             \
01565         AS2(    psllq           xmm5, 16)                               \
01566         AS2(    paddq           xmm4, xmm5)                             \
01567         AS2(    pmuludq         xmm6, [edx])
01568 
01569 #define Bot_End(n)                                                      \
01570         AS2(    movhlps         xmm7, xmm6)                     \
01571         AS2(    paddd           xmm6, xmm7)                     \
01572         AS2(    psllq           xmm6, 32)                       \
01573         AS2(    paddd           xmm4, xmm6)                     \
01574         AS2(    movq            QWORD PTR [ecx+8*((n)-1)], xmm4)        \
01575         AS1(    pop             esp)\
01576         MulEpilogue
01577 
01578 #define Top_Begin(n)                                                    \
01579         TopPrologue                                                                     \
01580         AS2(    mov             edx, esp)\
01581         AS2(    and             esp, 0xfffffff0)\
01582         AS2(    sub             esp, 48*n+16)\
01583         AS1(    push    edx)\
01584         AS2(    xor             edx, edx)                                       \
01585         ASL(1)                                                                          \
01586         ASS(    pshufd  xmm0, [eax+edx], 3,1,2,0)       \
01587         ASS(    pshufd  xmm1, [eax+edx], 2,0,3,1)       \
01588         ASS(    pshufd  xmm2, [edi+edx], 3,1,2,0)       \
01589         AS2(    movdqa  [esp+20+2*edx], xmm0)           \
01590         AS2(    psrlq   xmm0, 32)                                       \
01591         AS2(    movdqa  [esp+20+2*edx+16], xmm0)        \
01592         AS2(    movdqa  [esp+20+16*n+2*edx], xmm1)              \
01593         AS2(    psrlq   xmm1, 32)                                       \
01594         AS2(    movdqa  [esp+20+16*n+2*edx+16], xmm1)   \
01595         AS2(    movdqa  [esp+20+32*n+2*edx], xmm2)              \
01596         AS2(    psrlq   xmm2, 32)                                       \
01597         AS2(    movdqa  [esp+20+32*n+2*edx+16], xmm2)   \
01598         AS2(    add             edx, 16)                                        \
01599         AS2(    cmp             edx, 8*(n))                                     \
01600         ASJ(    jne,    1, b)                                           \
01601         AS2(    mov             eax, esi)                                       \
01602         AS2(    lea             edi, [esp+20+00*n+16*(n/2-1)])\
01603         AS2(    lea             edx, [esp+20+16*n+16*(n/2-1)])\
01604         AS2(    lea             esi, [esp+20+32*n+16*(n/2-1)])\
01605         AS2(    pxor    xmm4, xmm4)\
01606         AS2(    pxor    xmm5, xmm5)
01607 
01608 #define Top_Acc(i)                                                      \
01609         AS2(    movq            xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8])       \
01610         AS2(    pmuludq         xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16])   \
01611         AS2(    psrlq           xmm0, 48)                               \
01612         AS2(    paddd           xmm5, xmm0)\
01613 
01614 #define Top_Column0(i)  \
01615         AS2(    psllq           xmm5, 32)                               \
01616         AS2(    add                     edi, 16)        \
01617         AS2(    add                     edx, 16)        \
01618         SSE2_MulAdd45\
01619         Mul_Acc##i(i)   \
01620 
01621 #define Top_Column1(i)  \
01622         SSE2_SaveShift(0)                                       \
01623         AS2(    add                     esi, 16)        \
01624         SSE2_MulAdd45\
01625         Mul_Acc##i(i)   \
01626         AS2(    shr                     eax, 16)        \
01627         AS2(    movd            xmm0, eax)\
01628         AS2(    movd            xmm1, [ecx+4])\
01629         AS2(    psrld           xmm1, 16)\
01630         AS2(    pcmpgtd         xmm1, xmm0)\
01631         AS2(    psrld           xmm1, 31)\
01632         AS2(    paddd           xmm4, xmm1)\
01633 
01634 void SSE2_Square4(word *C, const word *A)
01635 {
01636         Squ_Begin(2)
01637         Squ_Column0(0, 1)
01638         Squ_End(2)
01639 }
01640 
01641 void SSE2_Square8(word *C, const word *A)
01642 {
01643         Squ_Begin(4)
01644 #ifndef __GNUC__
01645         ASJ(    jmp,    0, f)
01646         Squ_Acc(2)
01647         AS1(    ret) ASL(0)
01648 #endif
01649         Squ_Column0(0, 1)
01650         Squ_Column1(1, 1)
01651         Squ_Column0(2, 2)
01652         Squ_Column1(3, 1)
01653         Squ_Column0(4, 1)
01654         Squ_End(4)
01655 }
01656 
01657 void SSE2_Square16(word *C, const word *A)
01658 {
01659         Squ_Begin(8)
01660 #ifndef __GNUC__
01661         ASJ(    jmp,    0, f)
01662         Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01663         AS1(    ret) ASL(0)
01664 #endif
01665         Squ_Column0(0, 1)
01666         Squ_Column1(1, 1)
01667         Squ_Column0(2, 2)
01668         Squ_Column1(3, 2)
01669         Squ_Column0(4, 3)
01670         Squ_Column1(5, 3)
01671         Squ_Column0(6, 4)
01672         Squ_Column1(7, 3)
01673         Squ_Column0(8, 3)
01674         Squ_Column1(9, 2)
01675         Squ_Column0(10, 2)
01676         Squ_Column1(11, 1)
01677         Squ_Column0(12, 1)
01678         Squ_End(8)
01679 }
01680 
01681 void SSE2_Square32(word *C, const word *A)
01682 {
01683         Squ_Begin(16)
01684         ASJ(    jmp,    0, f)
01685         Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01686         AS1(    ret) ASL(0)
01687         Squ_Column0(0, 1)
01688         Squ_Column1(1, 1)
01689         Squ_Column0(2, 2)
01690         Squ_Column1(3, 2)
01691         Squ_Column0(4, 3)
01692         Squ_Column1(5, 3)
01693         Squ_Column0(6, 4)
01694         Squ_Column1(7, 4)
01695         Squ_Column0(8, 5)
01696         Squ_Column1(9, 5)
01697         Squ_Column0(10, 6)
01698         Squ_Column1(11, 6)
01699         Squ_Column0(12, 7)
01700         Squ_Column1(13, 7)
01701         Squ_Column0(14, 8)
01702         Squ_Column1(15, 7)
01703         Squ_Column0(16, 7)
01704         Squ_Column1(17, 6)
01705         Squ_Column0(18, 6)
01706         Squ_Column1(19, 5)
01707         Squ_Column0(20, 5)
01708         Squ_Column1(21, 4)
01709         Squ_Column0(22, 4)
01710         Squ_Column1(23, 3)
01711         Squ_Column0(24, 3)
01712         Squ_Column1(25, 2)
01713         Squ_Column0(26, 2)
01714         Squ_Column1(27, 1)
01715         Squ_Column0(28, 1)
01716         Squ_End(16)
01717 }
01718 
01719 void SSE2_Multiply4(word *C, const word *A, const word *B)
01720 {
01721         Mul_Begin(2)
01722 #ifndef __GNUC__
01723         ASJ(    jmp,    0, f)
01724         Mul_Acc(2)
01725         AS1(    ret) ASL(0)
01726 #endif
01727         Mul_Column0(0, 2)
01728         Mul_End(2)
01729 }
01730 
01731 void SSE2_Multiply8(word *C, const word *A, const word *B)
01732 {
01733         Mul_Begin(4)
01734 #ifndef __GNUC__
01735         ASJ(    jmp,    0, f)
01736         Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01737         AS1(    ret) ASL(0)
01738 #endif
01739         Mul_Column0(0, 2)
01740         Mul_Column1(1, 3)
01741         Mul_Column0(2, 4)
01742         Mul_Column1(3, 3)
01743         Mul_Column0(4, 2)
01744         Mul_End(4)
01745 }
01746 
01747 void SSE2_Multiply16(word *C, const word *A, const word *B)
01748 {
01749         Mul_Begin(8)
01750 #ifndef __GNUC__
01751         ASJ(    jmp,    0, f)
01752         Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01753         AS1(    ret) ASL(0)
01754 #endif
01755         Mul_Column0(0, 2)
01756         Mul_Column1(1, 3)
01757         Mul_Column0(2, 4)
01758         Mul_Column1(3, 5)
01759         Mul_Column0(4, 6)
01760         Mul_Column1(5, 7)
01761         Mul_Column0(6, 8)
01762         Mul_Column1(7, 7)
01763         Mul_Column0(8, 6)
01764         Mul_Column1(9, 5)
01765         Mul_Column0(10, 4)
01766         Mul_Column1(11, 3)
01767         Mul_Column0(12, 2)
01768         Mul_End(8)
01769 }
01770 
01771 void SSE2_Multiply32(word *C, const word *A, const word *B)
01772 {
01773         Mul_Begin(16)
01774         ASJ(    jmp,    0, f)
01775         Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01776         AS1(    ret) ASL(0)
01777         Mul_Column0(0, 2)
01778         Mul_Column1(1, 3)
01779         Mul_Column0(2, 4)
01780         Mul_Column1(3, 5)
01781         Mul_Column0(4, 6)
01782         Mul_Column1(5, 7)
01783         Mul_Column0(6, 8)
01784         Mul_Column1(7, 9)
01785         Mul_Column0(8, 10)
01786         Mul_Column1(9, 11)
01787         Mul_Column0(10, 12)
01788         Mul_Column1(11, 13)
01789         Mul_Column0(12, 14)
01790         Mul_Column1(13, 15)
01791         Mul_Column0(14, 16)
01792         Mul_Column1(15, 15)
01793         Mul_Column0(16, 14)
01794         Mul_Column1(17, 13)
01795         Mul_Column0(18, 12)
01796         Mul_Column1(19, 11)
01797         Mul_Column0(20, 10)
01798         Mul_Column1(21, 9)
01799         Mul_Column0(22, 8)
01800         Mul_Column1(23, 7)
01801         Mul_Column0(24, 6)
01802         Mul_Column1(25, 5)
01803         Mul_Column0(26, 4)
01804         Mul_Column1(27, 3)
01805         Mul_Column0(28, 2)
01806         Mul_End(16)
01807 }
01808 
01809 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
01810 {
01811         Mul_Begin(2)
01812         Bot_SaveAcc(0) Bot_Acc(2)
01813         Bot_End(2)
01814 }
01815 
01816 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
01817 {
01818         Mul_Begin(4)
01819 #ifndef __GNUC__
01820         ASJ(    jmp,    0, f)
01821         Mul_Acc(3) Mul_Acc(2)
01822         AS1(    ret) ASL(0)
01823 #endif
01824         Mul_Column0(0, 2)
01825         Mul_Column1(1, 3)
01826         Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01827         Bot_End(4)
01828 }
01829 
01830 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
01831 {
01832         Mul_Begin(8)
01833 #ifndef __GNUC__
01834         ASJ(    jmp,    0, f)
01835         Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01836         AS1(    ret) ASL(0)
01837 #endif
01838         Mul_Column0(0, 2)
01839         Mul_Column1(1, 3)
01840         Mul_Column0(2, 4)
01841         Mul_Column1(3, 5)
01842         Mul_Column0(4, 6)
01843         Mul_Column1(5, 7)
01844         Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01845         Bot_End(8)
01846 }
01847 
01848 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
01849 {
01850         Mul_Begin(16)
01851 #ifndef __GNUC__
01852         ASJ(    jmp,    0, f)
01853         Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01854         AS1(    ret) ASL(0)
01855 #endif
01856         Mul_Column0(0, 2)
01857         Mul_Column1(1, 3)
01858         Mul_Column0(2, 4)
01859         Mul_Column1(3, 5)
01860         Mul_Column0(4, 6)
01861         Mul_Column1(5, 7)
01862         Mul_Column0(6, 8)
01863         Mul_Column1(7, 9)
01864         Mul_Column0(8, 10)
01865         Mul_Column1(9, 11)
01866         Mul_Column0(10, 12)
01867         Mul_Column1(11, 13)
01868         Mul_Column0(12, 14)
01869         Mul_Column1(13, 15)
01870         Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01871         Bot_End(16)
01872 }
01873 
01874 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
01875 {
01876         Top_Begin(4)
01877         Top_Acc(3) Top_Acc(2) Top_Acc(1)
01878 #ifndef __GNUC__
01879         ASJ(    jmp,    0, f)
01880         Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01881         AS1(    ret) ASL(0)
01882 #endif
01883         Top_Column0(4)
01884         Top_Column1(3)
01885         Mul_Column0(0, 2)
01886         Top_End(2)
01887 }
01888 
01889 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
01890 {
01891         Top_Begin(8)
01892         Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01893 #ifndef __GNUC__
01894         ASJ(    jmp,    0, f)
01895         Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01896         AS1(    ret) ASL(0)
01897 #endif
01898         Top_Column0(8)
01899         Top_Column1(7)
01900         Mul_Column0(0, 6)
01901         Mul_Column1(1, 5)
01902         Mul_Column0(2, 4)
01903         Mul_Column1(3, 3)
01904         Mul_Column0(4, 2)
01905         Top_End(4)
01906 }
01907 
01908 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
01909 {
01910         Top_Begin(16)
01911         Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01912 #ifndef __GNUC__
01913         ASJ(    jmp,    0, f)
01914         Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01915         AS1(    ret) ASL(0)
01916 #endif
01917         Top_Column0(16)
01918         Top_Column1(15)
01919         Mul_Column0(0, 14)
01920         Mul_Column1(1, 13)
01921         Mul_Column0(2, 12)
01922         Mul_Column1(3, 11)
01923         Mul_Column0(4, 10)
01924         Mul_Column1(5, 9)
01925         Mul_Column0(6, 8)
01926         Mul_Column1(7, 7)
01927         Mul_Column0(8, 6)
01928         Mul_Column1(9, 5)
01929         Mul_Column0(10, 4)
01930         Mul_Column1(11, 3)
01931         Mul_Column0(12, 2)
01932         Top_End(8)
01933 }
01934 
01935 #endif  // #if CRYPTOPP_INTEGER_SSE2
01936 
01937 // ********************************************************
01938 
01939 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
01940 typedef void (* PMul)(word *C, const word *A, const word *B);
01941 typedef void (* PSqu)(word *C, const word *A);
01942 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
01943 
01944 #if CRYPTOPP_INTEGER_SSE2
01945 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
01946 static size_t s_recursionLimit = 8;
01947 #else
01948 static const size_t s_recursionLimit = 16;
01949 #endif
01950 
01951 static PMul s_pMul[9], s_pBot[9];
01952 static PSqu s_pSqu[9];
01953 static PMulTop s_pTop[9];
01954 
01955 static void SetFunctionPointers()
01956 {
01957         s_pMul[0] = &Baseline_Multiply2;
01958         s_pBot[0] = &Baseline_MultiplyBottom2;
01959         s_pSqu[0] = &Baseline_Square2;
01960         s_pTop[0] = &Baseline_MultiplyTop2;
01961         s_pTop[1] = &Baseline_MultiplyTop4;
01962 
01963 #if CRYPTOPP_INTEGER_SSE2
01964         if (HasSSE2())
01965         {
01966 #if _MSC_VER != 1200 || defined(NDEBUG)
01967                 if (IsP4())
01968                 {
01969                         s_pAdd = &SSE2_Add;
01970                         s_pSub = &SSE2_Sub;
01971                 }
01972 #endif
01973 
01974                 s_recursionLimit = 32;
01975 
01976                 s_pMul[1] = &SSE2_Multiply4;
01977                 s_pMul[2] = &SSE2_Multiply8;
01978                 s_pMul[4] = &SSE2_Multiply16;
01979                 s_pMul[8] = &SSE2_Multiply32;
01980 
01981                 s_pBot[1] = &SSE2_MultiplyBottom4;
01982                 s_pBot[2] = &SSE2_MultiplyBottom8;
01983                 s_pBot[4] = &SSE2_MultiplyBottom16;
01984                 s_pBot[8] = &SSE2_MultiplyBottom32;
01985 
01986                 s_pSqu[1] = &SSE2_Square4;
01987                 s_pSqu[2] = &SSE2_Square8;
01988                 s_pSqu[4] = &SSE2_Square16;
01989                 s_pSqu[8] = &SSE2_Square32;
01990 
01991                 s_pTop[2] = &SSE2_MultiplyTop8;
01992                 s_pTop[4] = &SSE2_MultiplyTop16;
01993                 s_pTop[8] = &SSE2_MultiplyTop32;
01994         }
01995         else
01996 #endif
01997         {
01998                 s_pMul[1] = &Baseline_Multiply4;
01999                 s_pMul[2] = &Baseline_Multiply8;
02000 
02001                 s_pBot[1] = &Baseline_MultiplyBottom4;
02002                 s_pBot[2] = &Baseline_MultiplyBottom8;
02003 
02004                 s_pSqu[1] = &Baseline_Square4;
02005                 s_pSqu[2] = &Baseline_Square8;
02006 
02007                 s_pTop[2] = &Baseline_MultiplyTop8;
02008 
02009 #if     !CRYPTOPP_INTEGER_SSE2
02010                 s_pMul[4] = &Baseline_Multiply16;
02011                 s_pBot[4] = &Baseline_MultiplyBottom16;
02012                 s_pSqu[4] = &Baseline_Square16;
02013                 s_pTop[4] = &Baseline_MultiplyTop16;
02014 #endif
02015         }
02016 }
02017 
02018 inline int Add(word *C, const word *A, const word *B, size_t N)
02019 {
02020 #if CRYPTOPP_INTEGER_SSE2
02021         return s_pAdd(N, C, A, B);
02022 #else
02023         return Baseline_Add(N, C, A, B);
02024 #endif
02025 }
02026 
02027 inline int Subtract(word *C, const word *A, const word *B, size_t N)
02028 {
02029 #if CRYPTOPP_INTEGER_SSE2
02030         return s_pSub(N, C, A, B);
02031 #else
02032         return Baseline_Sub(N, C, A, B);
02033 #endif
02034 }
02035 
02036 // ********************************************************
02037 
02038 
02039 #define A0              A
02040 #define A1              (A+N2)
02041 #define B0              B
02042 #define B1              (B+N2)
02043 
02044 #define T0              T
02045 #define T1              (T+N2)
02046 #define T2              (T+N)
02047 #define T3              (T+N+N2)
02048 
02049 #define R0              R
02050 #define R1              (R+N2)
02051 #define R2              (R+N)
02052 #define R3              (R+N+N2)
02053 
02054 // R[2*N] - result = A*B
02055 // T[2*N] - temporary work space
02056 // A[N] --- multiplier
02057 // B[N] --- multiplicant
02058 
02059 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02060 {
02061         assert(N>=2 && N%2==0);
02062 
02063         if (N <= s_recursionLimit)
02064                 s_pMul[N/4](R, A, B);
02065         else
02066         {
02067                 const size_t N2 = N/2;
02068 
02069                 size_t AN2 = Compare(A0, A1, N2) > 0 ?  0 : N2;
02070                 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02071 
02072                 size_t BN2 = Compare(B0, B1, N2) > 0 ?  0 : N2;
02073                 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02074 
02075                 RecursiveMultiply(R2, T2, A1, B1, N2);
02076                 RecursiveMultiply(T0, T2, R0, R1, N2);
02077                 RecursiveMultiply(R0, T2, A0, B0, N2);
02078 
02079                 // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
02080 
02081                 int c2 = Add(R2, R2, R1, N2);
02082                 int c3 = c2;
02083                 c2 += Add(R1, R2, R0, N2);
02084                 c3 += Add(R2, R2, R3, N2);
02085 
02086                 if (AN2 == BN2)
02087                         c3 -= Subtract(R1, R1, T0, N);
02088                 else
02089                         c3 += Add(R1, R1, T0, N);
02090 
02091                 c3 += Increment(R2, N2, c2);
02092                 assert (c3 >= 0 && c3 <= 2);
02093                 Increment(R3, N2, c3);
02094         }
02095 }
02096 
02097 // R[2*N] - result = A*A
02098 // T[2*N] - temporary work space
02099 // A[N] --- number to be squared
02100 
02101 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02102 {
02103         assert(N && N%2==0);
02104 
02105         if (N <= s_recursionLimit)
02106                 s_pSqu[N/4](R, A);
02107         else
02108         {
02109                 const size_t N2 = N/2;
02110 
02111                 RecursiveSquare(R0, T2, A0, N2);
02112                 RecursiveSquare(R2, T2, A1, N2);
02113                 RecursiveMultiply(T0, T2, A0, A1, N2);
02114 
02115                 int carry = Add(R1, R1, T0, N);
02116                 carry += Add(R1, R1, T0, N);
02117                 Increment(R3, N2, carry);
02118         }
02119 }
02120 
02121 // R[N] - bottom half of A*B
02122 // T[3*N/2] - temporary work space
02123 // A[N] - multiplier
02124 // B[N] - multiplicant
02125 
02126 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02127 {
02128         assert(N>=2 && N%2==0);
02129 
02130         if (N <= s_recursionLimit)
02131                 s_pBot[N/4](R, A, B);
02132         else
02133         {
02134                 const size_t N2 = N/2;
02135 
02136                 RecursiveMultiply(R, T, A0, B0, N2);
02137                 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02138                 Add(R1, R1, T0, N2);
02139                 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02140                 Add(R1, R1, T0, N2);
02141         }
02142 }
02143 
02144 // R[N] --- upper half of A*B
02145 // T[2*N] - temporary work space
02146 // L[N] --- lower half of A*B
02147 // A[N] --- multiplier
02148 // B[N] --- multiplicant
02149 
02150 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02151 {
02152         assert(N>=2 && N%2==0);
02153 
02154         if (N <= s_recursionLimit)
02155                 s_pTop[N/4](R, A, B, L[N-1]);
02156         else
02157         {
02158                 const size_t N2 = N/2;
02159 
02160                 size_t AN2 = Compare(A0, A1, N2) > 0 ?  0 : N2;
02161                 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02162 
02163                 size_t BN2 = Compare(B0, B1, N2) > 0 ?  0 : N2;
02164                 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02165 
02166                 RecursiveMultiply(T0, T2, R0, R1, N2);
02167                 RecursiveMultiply(R0, T2, A1, B1, N2);
02168 
02169                 // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
02170 
02171                 int t, c3;
02172                 int c2 = Subtract(T2, L+N2, L, N2);
02173 
02174                 if (AN2 == BN2)
02175                 {
02176                         c2 -= Add(T2, T2, T0, N2);
02177                         t = (Compare(T2, R0, N2) == -1);
02178                         c3 = t - Subtract(T2, T2, T1, N2);
02179                 }
02180                 else
02181                 {
02182                         c2 += Subtract(T2, T2, T0, N2);
02183                         t = (Compare(T2, R0, N2) == -1);
02184                         c3 = t + Add(T2, T2, T1, N2);
02185                 }
02186 
02187                 c2 += t;
02188                 if (c2 >= 0)
02189                         c3 += Increment(T2, N2, c2);
02190                 else
02191                         c3 -= Decrement(T2, N2, -c2);
02192                 c3 += Add(R0, T2, R1, N2);
02193 
02194                 assert (c3 >= 0 && c3 <= 2);
02195                 Increment(R1, N2, c3);
02196         }
02197 }
02198 
02199 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02200 {
02201         RecursiveMultiply(R, T, A, B, N);
02202 }
02203 
02204 inline void Square(word *R, word *T, const word *A, size_t N)
02205 {
02206         RecursiveSquare(R, T, A, N);
02207 }
02208 
02209 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02210 {
02211         RecursiveMultiplyBottom(R, T, A, B, N);
02212 }
02213 
02214 // R[NA+NB] - result = A*B
02215 // T[NA+NB] - temporary work space
02216 // A[NA] ---- multiplier
02217 // B[NB] ---- multiplicant
02218 
02219 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02220 {
02221         if (NA == NB)
02222         {
02223                 if (A == B)
02224                         Square(R, T, A, NA);
02225                 else
02226                         Multiply(R, T, A, B, NA);
02227 
02228                 return;
02229         }
02230 
02231         if (NA > NB)
02232         {
02233                 std::swap(A, B);
02234                 std::swap(NA, NB);
02235         }
02236 
02237         assert(NB % NA == 0);
02238 
02239         if (NA==2 && !A[1])
02240         {
02241                 switch (A[0])
02242                 {
02243                 case 0:
02244                         SetWords(R, 0, NB+2);
02245                         return;
02246                 case 1:
02247                         CopyWords(R, B, NB);
02248                         R[NB] = R[NB+1] = 0;
02249                         return;
02250                 default:
02251                         R[NB] = LinearMultiply(R, B, A[0], NB);
02252                         R[NB+1] = 0;
02253                         return;
02254                 }
02255         }
02256 
02257         size_t i;
02258         if ((NB/NA)%2 == 0)
02259         {
02260                 Multiply(R, T, A, B, NA);
02261                 CopyWords(T+2*NA, R+NA, NA);
02262 
02263                 for (i=2*NA; i<NB; i+=2*NA)
02264                         Multiply(T+NA+i, T, A, B+i, NA);
02265                 for (i=NA; i<NB; i+=2*NA)
02266                         Multiply(R+i, T, A, B+i, NA);
02267         }
02268         else
02269         {
02270                 for (i=0; i<NB; i+=2*NA)
02271                         Multiply(R+i, T, A, B+i, NA);
02272                 for (i=NA; i<NB; i+=2*NA)
02273                         Multiply(T+NA+i, T, A, B+i, NA);
02274         }
02275 
02276         if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02277                 Increment(R+NB, NA);
02278 }
02279 
02280 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
02281 // T[3*N/2] - temporary work space
02282 // A[N] ----- an odd number as input
02283 
02284 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02285 {
02286         if (N==2)
02287         {
02288                 T[0] = AtomicInverseModPower2(A[0]);
02289                 T[1] = 0;
02290                 s_pBot[0](T+2, T, A);
02291                 TwosComplement(T+2, 2);
02292                 Increment(T+2, 2, 2);
02293                 s_pBot[0](R, T, T+2);
02294         }
02295         else
02296         {
02297                 const size_t N2 = N/2;
02298                 RecursiveInverseModPower2(R0, T0, A0, N2);
02299                 T0[0] = 1;
02300                 SetWords(T0+1, 0, N2-1);
02301                 MultiplyTop(R1, T1, T0, R0, A0, N2);
02302                 MultiplyBottom(T0, T1, R0, A1, N2);
02303                 Add(T0, R1, T0, N2);
02304                 TwosComplement(T0, N2);
02305                 MultiplyBottom(R1, T1, R0, T0, N2);
02306         }
02307 }
02308 
02309 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
02310 // T[3*N] - temporary work space
02311 // X[2*N] - number to be reduced
02312 // M[N] --- modulus
02313 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
02314 
02315 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
02316 {
02317 #if 1
02318         MultiplyBottom(R, T, X, U, N);
02319         MultiplyTop(T, T+N, X, R, M, N);
02320         word borrow = Subtract(T, X+N, T, N);
02321         // defend against timing attack by doing this Add even when not needed
02322         word carry = Add(T+N, T, M, N);
02323         assert(carry | !borrow);
02324         CopyWords(R, T + ((0-borrow) & N), N);
02325 #elif 0
02326         const word u = 0-U[0];
02327         Declare2Words(p)
02328         for (size_t i=0; i<N; i++)
02329         {
02330                 const word t = u * X[i];
02331                 word c = 0;
02332                 for (size_t j=0; j<N; j+=2)
02333                 {
02334                         MultiplyWords(p, t, M[j]);
02335                         Acc2WordsBy1(p, X[i+j]);
02336                         Acc2WordsBy1(p, c);
02337                         X[i+j] = LowWord(p);
02338                         c = HighWord(p);
02339                         MultiplyWords(p, t, M[j+1]);
02340                         Acc2WordsBy1(p, X[i+j+1]);
02341                         Acc2WordsBy1(p, c);
02342                         X[i+j+1] = LowWord(p);
02343                         c = HighWord(p);
02344                 }
02345 
02346                 if (Increment(X+N+i, N-i, c))
02347                         while (!Subtract(X+N, X+N, M, N)) {}
02348         }
02349 
02350         memcpy(R, X+N, N*WORD_SIZE);
02351 #else
02352         __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
02353         for (size_t i=0; i<N; i++)
02354         {
02355                 __m64 t = _mm_cvtsi32_si64(X[i]);
02356                 t = _mm_mul_su32(t, u);
02357                 __m64 c = _mm_setzero_si64();
02358                 for (size_t j=0; j<N; j+=2)
02359                 {
02360                         p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
02361                         p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
02362                         c = _mm_add_si64(c, p);
02363                         X[i+j] = _mm_cvtsi64_si32(c);
02364                         c = _mm_srli_si64(c, 32);
02365                         p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
02366                         p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
02367                         c = _mm_add_si64(c, p);
02368                         X[i+j+1] = _mm_cvtsi64_si32(c);
02369                         c = _mm_srli_si64(c, 32);
02370                 }
02371 
02372                 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
02373                         while (!Subtract(X+N, X+N, M, N)) {}
02374         }
02375 
02376         memcpy(R, X+N, N*WORD_SIZE);
02377         _mm_empty();
02378 #endif
02379 }
02380 
02381 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
02382 // T[2*N] - temporary work space
02383 // X[2*N] - number to be reduced
02384 // M[N] --- modulus
02385 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
02386 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
02387 
02388 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02389 {
02390         assert(N%2==0 && N>=4);
02391 
02392 #define M0              M
02393 #define M1              (M+N2)
02394 #define V0              V
02395 #define V1              (V+N2)
02396 
02397 #define X0              X
02398 #define X1              (X+N2)
02399 #define X2              (X+N)
02400 #define X3              (X+N+N2)
02401 
02402         const size_t N2 = N/2;
02403         Multiply(T0, T2, V0, X3, N2);
02404         int c2 = Add(T0, T0, X0, N);
02405         MultiplyBottom(T3, T2, T0, U, N2);
02406         MultiplyTop(T2, R, T0, T3, M0, N2);
02407         c2 -= Subtract(T2, T1, T2, N2);
02408         Multiply(T0, R, T3, M1, N2);
02409         c2 -= Subtract(T0, T2, T0, N2);
02410         int c3 = -(int)Subtract(T1, X2, T1, N2);
02411         Multiply(R0, T2, V1, X3, N2);
02412         c3 += Add(R, R, T, N);
02413 
02414         if (c2>0)
02415                 c3 += Increment(R1, N2);
02416         else if (c2<0)
02417                 c3 -= Decrement(R1, N2, -c2);
02418 
02419         assert(c3>=-1 && c3<=1);
02420         if (c3>0)
02421                 Subtract(R, R, M, N);
02422         else if (c3<0)
02423                 Add(R, R, M, N);
02424 
02425 #undef M0
02426 #undef M1
02427 #undef V0
02428 #undef V1
02429 
02430 #undef X0
02431 #undef X1
02432 #undef X2
02433 #undef X3
02434 }
02435 
02436 #undef A0
02437 #undef A1
02438 #undef B0
02439 #undef B1
02440 
02441 #undef T0
02442 #undef T1
02443 #undef T2
02444 #undef T3
02445 
02446 #undef R0
02447 #undef R1
02448 #undef R2
02449 #undef R3
02450 
02451 /*
02452 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
02453 static word SubatomicDivide(word *A, word B0, word B1)
02454 {
02455         // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
02456         assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
02457 
02458         // estimate the quotient: do a 2 word by 1 word divide
02459         word Q;
02460         if (B1+1 == 0)
02461                 Q = A[2];
02462         else
02463                 Q = DWord(A[1], A[2]).DividedBy(B1+1);
02464 
02465         // now subtract Q*B from A
02466         DWord p = DWord::Multiply(B0, Q);
02467         DWord u = (DWord) A[0] - p.GetLowHalf();
02468         A[0] = u.GetLowHalf();
02469         u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
02470         A[1] = u.GetLowHalf();
02471         A[2] += u.GetHighHalf();
02472 
02473         // Q <= actual quotient, so fix it
02474         while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
02475         {
02476                 u = (DWord) A[0] - B0;
02477                 A[0] = u.GetLowHalf();
02478                 u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
02479                 A[1] = u.GetLowHalf();
02480                 A[2] += u.GetHighHalf();
02481                 Q++;
02482                 assert(Q);      // shouldn't overflow
02483         }
02484 
02485         return Q;
02486 }
02487 
02488 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
02489 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02490 {
02491         if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
02492         {
02493                 Q[0] = A[2];
02494                 Q[1] = A[3];
02495         }
02496         else
02497         {
02498                 word T[4];
02499                 T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
02500                 Q[1] = SubatomicDivide(T+1, B[0], B[1]);
02501                 Q[0] = SubatomicDivide(T, B[0], B[1]);
02502 
02503 #ifndef NDEBUG
02504                 // multiply quotient and divisor and add remainder, make sure it equals dividend
02505                 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02506                 word P[4];
02507                 LowLevel::Multiply2(P, Q, B);
02508                 Add(P, P, T, 4);
02509                 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02510 #endif
02511         }
02512 }
02513 */
02514 
02515 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02516 {
02517         word T[4];
02518         DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02519         Q[0] = q.GetLowHalf();
02520         Q[1] = q.GetHighHalf();
02521 
02522 #ifndef NDEBUG
02523         if (B[0] || B[1])
02524         {
02525                 // multiply quotient and divisor and add remainder, make sure it equals dividend
02526                 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02527                 word P[4];
02528                 s_pMul[0](P, Q, B);
02529                 Add(P, P, T, 4);
02530                 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02531         }
02532 #endif
02533 }
02534 
02535 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
02536 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02537 {
02538         assert(N && N%2==0);
02539 
02540         AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
02541 
02542         word borrow = Subtract(R, R, T, N+2);
02543         assert(!borrow && !R[N+1]);
02544 
02545         while (R[N] || Compare(R, B, N) >= 0)
02546         {
02547                 R[N] -= Subtract(R, R, B, N);
02548                 Q[1] += (++Q[0]==0);
02549                 assert(Q[0] || Q[1]); // no overflow
02550         }
02551 }
02552 
02553 // R[NB] -------- remainder = A%B
02554 // Q[NA-NB+2] --- quotient      = A/B
02555 // T[NA+3*(NB+2)] - temp work space
02556 // A[NA] -------- dividend
02557 // B[NB] -------- divisor
02558 
02559 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02560 {
02561         assert(NA && NB && NA%2==0 && NB%2==0);
02562         assert(B[NB-1] || B[NB-2]);
02563         assert(NB <= NA);
02564 
02565         // set up temporary work space
02566         word *const TA=T;
02567         word *const TB=T+NA+2;
02568         word *const TP=T+NA+2+NB;
02569 
02570         // copy B into TB and normalize it so that TB has highest bit set to 1
02571         unsigned shiftWords = (B[NB-1]==0);
02572         TB[0] = TB[NB-1] = 0;
02573         CopyWords(TB+shiftWords, B, NB-shiftWords);
02574         unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02575         assert(shiftBits < WORD_BITS);
02576         ShiftWordsLeftByBits(TB, NB, shiftBits);
02577 
02578         // copy A into TA and normalize it
02579         TA[0] = TA[NA] = TA[NA+1] = 0;
02580         CopyWords(TA+shiftWords, A, NA);
02581         ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02582 
02583         if (TA[NA+1]==0 && TA[NA] <= 1)
02584         {
02585                 Q[NA-NB+1] = Q[NA-NB] = 0;
02586                 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02587                 {
02588                         TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02589                         ++Q[NA-NB];
02590                 }
02591         }
02592         else
02593         {
02594                 NA+=2;
02595                 assert(Compare(TA+NA-NB, TB, NB) < 0);
02596         }
02597 
02598         word BT[2];
02599         BT[0] = TB[NB-2] + 1;
02600         BT[1] = TB[NB-1] + (BT[0]==0);
02601 
02602         // start reducing TA mod TB, 2 words at a time
02603         for (size_t i=NA-2; i>=NB; i-=2)
02604         {
02605                 AtomicDivide(Q+i-NB, TA+i-2, BT);
02606                 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02607         }
02608 
02609         // copy TA into R, and denormalize it
02610         CopyWords(R, TA+shiftWords, NB);
02611         ShiftWordsRightByBits(R, NB, shiftBits);
02612 }
02613 
02614 static inline size_t EvenWordCount(const word *X, size_t N)
02615 {
02616         while (N && X[N-2]==0 && X[N-1]==0)
02617                 N-=2;
02618         return N;
02619 }
02620 
02621 // return k
02622 // R[N] --- result = A^(-1) * 2^k mod M
02623 // T[4*N] - temporary work space
02624 // A[NA] -- number to take inverse of
02625 // M[N] --- modulus
02626 
02627 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02628 {
02629         assert(NA<=N && N && N%2==0);
02630 
02631         word *b = T;
02632         word *c = T+N;
02633         word *f = T+2*N;
02634         word *g = T+3*N;
02635         size_t bcLen=2, fgLen=EvenWordCount(M, N);
02636         unsigned int k=0;
02637         bool s=false;
02638 
02639         SetWords(T, 0, 3*N);
02640         b[0]=1;
02641         CopyWords(f, A, NA);
02642         CopyWords(g, M, N);
02643 
02644         while (1)
02645         {
02646                 word t=f[0];
02647                 while (!t)
02648                 {
02649                         if (EvenWordCount(f, fgLen)==0)
02650                         {
02651                                 SetWords(R, 0, N);
02652                                 return 0;
02653                         }
02654 
02655                         ShiftWordsRightByWords(f, fgLen, 1);
02656                         bcLen += 2 * (c[bcLen-1] != 0);
02657                         assert(bcLen <= N);
02658                         ShiftWordsLeftByWords(c, bcLen, 1);
02659                         k+=WORD_BITS;
02660                         t=f[0];
02661                 }
02662 
02663                 unsigned int i = TrailingZeros(t);
02664                 t >>= i;
02665                 k += i;
02666 
02667                 if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
02668                 {
02669                         if (s)
02670                                 Subtract(R, M, b, N);
02671                         else
02672                                 CopyWords(R, b, N);
02673                         return k;
02674                 }
02675 
02676                 ShiftWordsRightByBits(f, fgLen, i);
02677                 t = ShiftWordsLeftByBits(c, bcLen, i);
02678                 c[bcLen] += t;
02679                 bcLen += 2 * (t!=0);
02680                 assert(bcLen <= N);
02681 
02682                 bool swap = Compare(f, g, fgLen)==-1;
02683                 ConditionalSwapPointers(swap, f, g);
02684                 ConditionalSwapPointers(swap, b, c);
02685                 s ^= swap;
02686 
02687                 fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
02688 
02689                 Subtract(f, f, g, fgLen);
02690                 t = Add(b, b, c, bcLen);
02691                 b[bcLen] += t;
02692                 bcLen += 2*t;
02693                 assert(bcLen <= N);
02694         }
02695 }
02696 
02697 // R[N] - result = A/(2^k) mod M
02698 // A[N] - input
02699 // M[N] - modulus
02700 
02701 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02702 {
02703         CopyWords(R, A, N);
02704 
02705         while (k--)
02706         {
02707                 if (R[0]%2==0)
02708                         ShiftWordsRightByBits(R, N, 1);
02709                 else
02710                 {
02711                         word carry = Add(R, R, M, N);
02712                         ShiftWordsRightByBits(R, N, 1);
02713                         R[N-1] += carry<<(WORD_BITS-1);
02714                 }
02715         }
02716 }
02717 
02718 // R[N] - result = A*(2^k) mod M
02719 // A[N] - input
02720 // M[N] - modulus
02721 
02722 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02723 {
02724         CopyWords(R, A, N);
02725 
02726         while (k--)
02727                 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02728                         Subtract(R, R, M, N);
02729 }
02730 
02731 // ******************************************************************
02732 
02733 InitializeInteger::InitializeInteger()
02734 {
02735         if (!g_pAssignIntToInteger)
02736         {
02737                 SetFunctionPointers();
02738                 g_pAssignIntToInteger = AssignIntToInteger;
02739         }
02740 }
02741 
02742 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02743 
02744 static inline size_t RoundupSize(size_t n)
02745 {
02746         if (n<=8)
02747                 return RoundupSizeTable[n];
02748         else if (n<=16)
02749                 return 16;
02750         else if (n<=32)
02751                 return 32;
02752         else if (n<=64)
02753                 return 64;
02754         else return size_t(1) << BitPrecision(n-1);
02755 }
02756 
02757 Integer::Integer()
02758         : reg(2), sign(POSITIVE)
02759 {
02760         reg[0] = reg[1] = 0;
02761 }
02762 
02763 Integer::Integer(const Integer& t)
02764         : reg(RoundupSize(t.WordCount())), sign(t.sign)
02765 {
02766         CopyWords(reg, t.reg, reg.size());
02767 }
02768 
02769 Integer::Integer(Sign s, lword value)
02770         : reg(2), sign(s)
02771 {
02772         reg[0] = word(value);
02773         reg[1] = word(SafeRightShift<WORD_BITS>(value));
02774 }
02775 
02776 Integer::Integer(signed long value)
02777         : reg(2)
02778 {
02779         if (value >= 0)
02780                 sign = POSITIVE;
02781         else
02782         {
02783                 sign = NEGATIVE;
02784                 value = -value;
02785         }
02786         reg[0] = word(value);
02787         reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02788 }
02789 
02790 Integer::Integer(Sign s, word high, word low)
02791         : reg(2), sign(s)
02792 {
02793         reg[0] = low;
02794         reg[1] = high;
02795 }
02796 
02797 bool Integer::IsConvertableToLong() const
02798 {
02799         if (ByteCount() > sizeof(long))
02800                 return false;
02801 
02802         unsigned long value = (unsigned long)reg[0];
02803         value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02804 
02805         if (sign==POSITIVE)
02806                 return (signed long)value >= 0;
02807         else
02808                 return -(signed long)value < 0;
02809 }
02810 
02811 signed long Integer::ConvertToLong() const
02812 {
02813         assert(IsConvertableToLong());
02814 
02815         unsigned long value = (unsigned long)reg[0];
02816         value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02817         return sign==POSITIVE ? value : -(signed long)value;
02818 }
02819 
02820 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02821 {
02822         Decode(encodedInteger, byteCount, s);
02823 }
02824 
02825 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02826 {
02827         Decode(encodedInteger, byteCount, s);
02828 }
02829 
02830 Integer::Integer(BufferedTransformation &bt)
02831 {
02832         BERDecode(bt);
02833 }
02834 
02835 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02836 {
02837         Randomize(rng, bitcount);
02838 }
02839 
02840 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02841 {
02842         if (!Randomize(rng, min, max, rnType, equiv, mod))
02843                 throw Integer::RandomNumberNotFound();
02844 }
02845 
02846 Integer Integer::Power2(size_t e)
02847 {
02848         Integer r((word)0, BitsToWords(e+1));
02849         r.SetBit(e);
02850         return r;
02851 }
02852 
02853 template <long i>
02854 struct NewInteger
02855 {
02856         Integer * operator()() const
02857         {
02858                 return new Integer(i);
02859         }
02860 };
02861 
02862 const Integer &Integer::Zero()
02863 {
02864         return Singleton<Integer>().Ref();
02865 }
02866 
02867 const Integer &Integer::One()
02868 {
02869         return Singleton<Integer, NewInteger<1> >().Ref();
02870 }
02871 
02872 const Integer &Integer::Two()
02873 {
02874         return Singleton<Integer, NewInteger<2> >().Ref();
02875 }
02876 
02877 bool Integer::operator!() const
02878 {
02879         return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02880 }
02881 
02882 Integer& Integer::operator=(const Integer& t)
02883 {
02884         if (this != &t)
02885         {
02886                 if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
02887                         reg.New(RoundupSize(t.WordCount()));
02888                 CopyWords(reg, t.reg, reg.size());
02889                 sign = t.sign;
02890         }
02891         return *this;
02892 }
02893 
02894 bool Integer::GetBit(size_t n) const
02895 {
02896         if (n/WORD_BITS >= reg.size())
02897                 return 0;
02898         else
02899                 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02900 }
02901 
02902 void Integer::SetBit(size_t n, bool value)
02903 {
02904         if (value)
02905         {
02906                 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02907                 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02908         }
02909         else
02910         {
02911                 if (n/WORD_BITS < reg.size())
02912                         reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02913         }
02914 }
02915 
02916 byte Integer::GetByte(size_t n) const
02917 {
02918         if (n/WORD_SIZE >= reg.size())
02919                 return 0;
02920         else
02921                 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02922 }
02923 
02924 void Integer::SetByte(size_t n, byte value)
02925 {
02926         reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02927         reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02928         reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02929 }
02930 
02931 lword Integer::GetBits(size_t i, size_t n) const
02932 {
02933         lword v = 0;
02934         assert(n <= sizeof(v)*8);
02935         for (unsigned int j=0; j<n; j++)
02936                 v |= lword(GetBit(i+j)) << j;
02937         return v;
02938 }
02939 
02940 Integer Integer::operator-() const
02941 {
02942         Integer result(*this);
02943         result.Negate();
02944         return result;
02945 }
02946 
02947 Integer Integer::AbsoluteValue() const
02948 {
02949         Integer result(*this);
02950         result.sign = POSITIVE;
02951         return result;
02952 }
02953 
02954 void Integer::swap(Integer &a)
02955 {
02956         reg.swap(a.reg);
02957         std::swap(sign, a.sign);
02958 }
02959 
02960 Integer::Integer(word value, size_t length)
02961         : reg(RoundupSize(length)), sign(POSITIVE)
02962 {
02963         reg[0] = value;
02964         SetWords(reg+1, 0, reg.size()-1);
02965 }
02966 
02967 template <class T>
02968 static Integer StringToInteger(const T *str)
02969 {
02970         int radix;
02971         // GCC workaround
02972         // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
02973         unsigned int length;
02974         for (length = 0; str[length] != 0; length++) {}
02975 
02976         Integer v;
02977 
02978         if (length == 0)
02979                 return v;
02980 
02981         switch (str[length-1])
02982         {
02983         case 'h':
02984         case 'H':
02985                 radix=16;
02986                 break;
02987         case 'o':
02988         case 'O':
02989                 radix=8;
02990                 break;
02991         case 'b':
02992         case 'B':
02993                 radix=2;
02994                 break;
02995         default:
02996                 radix=10;
02997         }
02998 
02999         if (length > 2 && str[0] == '0' && str[1] == 'x')
03000                 radix = 16;
03001 
03002         for (unsigned i=0; i<length; i++)
03003         {
03004                 int digit;
03005 
03006                 if (str[i] >= '0' && str[i] <= '9')
03007                         digit = str[i] - '0';
03008                 else if (str[i] >= 'A' && str[i] <= 'F')
03009                         digit = str[i] - 'A' + 10;
03010                 else if (str[i] >= 'a' && str[i] <= 'f')
03011                         digit = str[i] - 'a' + 10;
03012                 else
03013                         digit = radix;
03014 
03015                 if (digit < radix)
03016                 {
03017                         v *= radix;
03018                         v += digit;
03019                 }
03020         }
03021 
03022         if (str[0] == '-')
03023                 v.Negate();
03024 
03025         return v;
03026 }
03027 
03028 Integer::Integer(const char *str)
03029         : reg(2), sign(POSITIVE)
03030 {
03031         *this = StringToInteger(str);
03032 }
03033 
03034 Integer::Integer(const wchar_t *str)
03035         : reg(2), sign(POSITIVE)
03036 {
03037         *this = StringToInteger(str);
03038 }
03039 
03040 unsigned int Integer::WordCount() const
03041 {
03042         return (unsigned int)CountWords(reg, reg.size());
03043 }
03044 
03045 unsigned int Integer::ByteCount() const
03046 {
03047         unsigned wordCount = WordCount();
03048         if (wordCount)
03049                 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03050         else
03051                 return 0;
03052 }
03053 
03054 unsigned int Integer::BitCount() const
03055 {
03056         unsigned wordCount = WordCount();
03057         if (wordCount)
03058                 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03059         else
03060                 return 0;
03061 }
03062 
03063 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03064 {
03065         StringStore store(input, inputLen);
03066         Decode(store, inputLen, s);
03067 }
03068 
03069 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03070 {
03071         assert(bt.MaxRetrievable() >= inputLen);
03072 
03073         byte b;
03074         bt.Peek(b);
03075         sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03076 
03077         while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03078         {
03079                 bt.Skip(1);
03080                 inputLen--;
03081                 bt.Peek(b);
03082         }
03083 
03084         reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03085 
03086         for (size_t i=inputLen; i > 0; i--)
03087         {
03088                 bt.Get(b);
03089                 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03090         }
03091 
03092         if (sign == NEGATIVE)
03093         {
03094                 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03095                         reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03096                 TwosComplement(reg, reg.size());
03097         }
03098 }
03099 
03100 size_t Integer::MinEncodedSize(Signedness signedness) const
03101 {
03102         unsigned int outputLen = STDMAX(1U, ByteCount());
03103         if (signedness == UNSIGNED)
03104                 return outputLen;
03105         if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03106                 outputLen++;
03107         if (IsNegative() && *this < -Power2(outputLen*8-1))
03108                 outputLen++;
03109         return outputLen;
03110 }
03111 
03112 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03113 {
03114         ArraySink sink(output, outputLen);
03115         Encode(sink, outputLen, signedness);
03116 }
03117 
03118 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03119 {
03120         if (signedness == UNSIGNED || NotNegative())
03121         {
03122                 for (size_t i=outputLen; i > 0; i--)
03123                         bt.Put(GetByte(i-1));
03124         }
03125         else
03126         {
03127                 // take two's complement of *this
03128                 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
03129                 temp.Encode(bt, outputLen, UNSIGNED);
03130         }
03131 }
03132 
03133 void Integer::DEREncode(BufferedTransformation &bt) const
03134 {
03135         DERGeneralEncoder enc(bt, INTEGER);
03136         Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03137         enc.MessageEnd();
03138 }
03139 
03140 void Integer::BERDecode(const byte *input, size_t len)
03141 {
03142         StringStore store(input, len);
03143         BERDecode(store);
03144 }
03145 
03146 void Integer::BERDecode(BufferedTransformation &bt)
03147 {
03148         BERGeneralDecoder dec(bt, INTEGER);
03149         if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03150                 BERDecodeError();
03151         Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03152         dec.MessageEnd();
03153 }
03154 
03155 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03156 {
03157         DERGeneralEncoder enc(bt, OCTET_STRING);
03158         Encode(enc, length);
03159         enc.MessageEnd();
03160 }
03161 
03162 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03163 {
03164         BERGeneralDecoder dec(bt, OCTET_STRING);
03165         if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03166                 BERDecodeError();
03167         Decode(dec, length);
03168         dec.MessageEnd();
03169 }
03170 
03171 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03172 {
03173         ArraySink sink(output, len);
03174         return OpenPGPEncode(sink);
03175 }
03176 
03177 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03178 {
03179         word16 bitCount = BitCount();
03180         bt.PutWord16(bitCount);
03181         size_t byteCount = BitsToBytes(bitCount);
03182         Encode(bt, byteCount);
03183         return 2 + byteCount;
03184 }
03185 
03186 void Integer::OpenPGPDecode(const byte *input, size_t len)
03187 {
03188         StringStore store(input, len);
03189         OpenPGPDecode(store);
03190 }
03191 
03192 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03193 {
03194         word16 bitCount;
03195         if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03196                 throw OpenPGPDecodeErr();
03197         Decode(bt, BitsToBytes(bitCount));
03198 }
03199 
03200 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03201 {
03202         const size_t nbytes = nbits/8 + 1;
03203         SecByteBlock buf(nbytes);
03204         rng.GenerateBlock(buf, nbytes);
03205         if (nbytes)
03206                 buf[0] = (byte)Crop(buf[0], nbits % 8);
03207         Decode(buf, nbytes, UNSIGNED);
03208 }
03209 
03210 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03211 {
03212         if (min > max)
03213                 throw InvalidArgument("Integer: Min must be no greater than Max");
03214 
03215         Integer range = max - min;
03216         const unsigned int nbits = range.BitCount();
03217 
03218         do
03219         {
03220                 Randomize(rng, nbits);
03221         }
03222         while (*this > range);
03223 
03224         *this += min;
03225 }
03226 
03227 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03228 {
03229         return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03230 }
03231 
03232 class KDF2_RNG : public RandomNumberGenerator
03233 {
03234 public:
03235         KDF2_RNG(const byte *seed, size_t seedSize)
03236                 : m_counter(0), m_counterAndSeed(seedSize + 4)
03237         {
03238                 memcpy(m_counterAndSeed + 4, seed, seedSize);
03239         }
03240 
03241         void GenerateBlock(byte *output, size_t size)
03242         {
03243                 PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03244                 ++m_counter;
03245                 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03246         }
03247 
03248 private:
03249         word32 m_counter;
03250         SecByteBlock m_counterAndSeed;
03251 };
03252 
03253 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
03254 {
03255         Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03256         Integer max;
03257         if (!params.GetValue("Max", max))
03258         {
03259                 int bitLength;
03260                 if (params.GetIntValue("BitLength", bitLength))
03261                         max = Integer::Power2(bitLength);
03262                 else
03263                         throw InvalidArgument("Integer: missing Max argument");
03264         }
03265         if (min > max)
03266                 throw InvalidArgument("Integer: Min must be no greater than Max");
03267 
03268         Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03269         Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03270 
03271         if (equiv.IsNegative() || equiv >= mod)
03272                 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03273 
03274         Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03275 
03276         member_ptr<KDF2_RNG> kdf2Rng;
03277         ConstByteArrayParameter seed;
03278         if (params.GetValue(Name::Seed(), seed))
03279         {
03280                 ByteQueue bq;
03281                 DERSequenceEncoder seq(bq);
03282                 min.DEREncode(seq);
03283                 max.DEREncode(seq);
03284                 equiv.DEREncode(seq);
03285                 mod.DEREncode(seq);
03286                 DEREncodeUnsigned(seq, rnType);
03287                 DEREncodeOctetString(seq, seed.begin(), seed.size());
03288                 seq.MessageEnd();
03289 
03290                 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03291                 bq.Get(finalSeed, finalSeed.size());
03292                 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03293         }
03294         RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03295 
03296         switch (rnType)
03297         {
03298                 case ANY:
03299                         if (mod == One())
03300                                 Randomize(rng, min, max);
03301                         else
03302                         {
03303                                 Integer min1 = min + (equiv-min)%mod;
03304                                 if (max < min1)
03305                                         return false;
03306                                 Randomize(rng, Zero(), (max - min1) / mod);
03307                                 *this *= mod;
03308                                 *this += min1;
03309                         }
03310                         return true;
03311 
03312                 case PRIME:
03313                 {
03314                         const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03315 
03316                         int i;
03317                         i = 0;
03318                         while (1)
03319                         {
03320                                 if (++i==16)
03321                                 {
03322                                         // check if there are any suitable primes in [min, max]
03323                                         Integer first = min;
03324                                         if (FirstPrime(first, max, equiv, mod, pSelector))
03325                                         {
03326                                                 // if there is only one suitable prime, we're done
03327                                                 *this = first;
03328                                                 if (!FirstPrime(first, max, equiv, mod, pSelector))
03329                                                         return true;
03330                                         }
03331                                         else
03332                                                 return false;
03333                                 }
03334 
03335                                 Randomize(rng, min, max);
03336                                 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03337                                         return true;
03338                         }
03339                 }
03340 
03341                 default:
03342                         throw InvalidArgument("Integer: invalid RandomNumberType argument");
03343         }
03344 }
03345 
03346 std::istream& operator>>(std::istream& in, Integer &a)
03347 {
03348         char c;
03349         unsigned int length = 0;
03350         SecBlock<char> str(length + 16);
03351 
03352         std::ws(in);
03353 
03354         do
03355         {
03356                 in.read(&c, 1);
03357                 str[length++] = c;
03358                 if (length >= str.size())
03359                         str.Grow(length + 16);
03360         }
03361         while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03362 
03363         if (in.gcount())
03364                 in.putback(c);
03365         str[length-1] = '\0';
03366         a = Integer(str);
03367 
03368         return in;
03369 }
03370 
03371 std::ostream& operator<<(std::ostream& out, const Integer &a)
03372 {
03373         // Get relevant conversion specifications from ostream.
03374         long f = out.flags() & std::ios::basefield; // Get base digits.
03375         int base, block;
03376         char suffix;
03377         switch(f)
03378         {
03379         case std::ios::oct :
03380                 base = 8;
03381                 block = 8;
03382                 suffix = 'o';
03383                 break;
03384         case std::ios::hex :
03385                 base = 16;
03386                 block = 4;
03387                 suffix = 'h';
03388                 break;
03389         default :
03390                 base = 10;
03391                 block = 3;
03392                 suffix = '.';
03393         }
03394 
03395         Integer temp1=a, temp2;
03396     
03397         if (a.IsNegative())
03398         {
03399                 out << '-';
03400                 temp1.Negate();
03401         }
03402 
03403         if (!a)
03404                 out << '0';
03405 
03406         static const char upper[]="0123456789ABCDEF";
03407         static const char lower[]="0123456789abcdef";
03408 
03409         const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
03410         unsigned i=0;
03411         SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03412 
03413         while (!!temp1)
03414         {
03415                 word digit;
03416                 Integer::Divide(digit, temp2, temp1, base);
03417                 s[i++]=vec[digit];
03418                 temp1.swap(temp2);
03419         }
03420 
03421         while (i--)
03422         {
03423                 out << s[i];
03424 //              if (i && !(i%block))
03425 //                      out << ",";
03426         }
03427         return out << suffix;
03428 }
03429 
03430 Integer& Integer::operator++()
03431 {
03432         if (NotNegative())
03433         {
03434                 if (Increment(reg, reg.size()))
03435                 {
03436                         reg.CleanGrow(2*reg.size());
03437                         reg[reg.size()/2]=1;
03438                 }
03439         }
03440         else
03441         {
03442                 word borrow = Decrement(reg, reg.size());
03443                 assert(!borrow);
03444                 if (WordCount()==0)
03445                         *this = Zero();
03446         }
03447         return *this;
03448 }
03449 
03450 Integer& Integer::operator--()
03451 {
03452         if (IsNegative())
03453         {
03454                 if (Increment(reg, reg.size()))
03455                 {
03456                         reg.CleanGrow(2*reg.size());
03457                         reg[reg.size()/2]=1;
03458                 }
03459         }
03460         else
03461         {
03462                 if (Decrement(reg, reg.size()))
03463                         *this = -One();
03464         }
03465         return *this;
03466 }
03467 
03468 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03469 {
03470         int carry;
03471         if (a.reg.size() == b.reg.size())
03472                 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03473         else if (a.reg.size() > b.reg.size())
03474         {
03475                 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03476                 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03477                 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03478         }
03479         else
03480         {
03481                 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03482                 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03483                 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03484         }
03485 
03486         if (carry)
03487         {
03488                 sum.reg.CleanGrow(2*sum.reg.size());
03489                 sum.reg[sum.reg.size()/2] = 1;
03490         }
03491         sum.sign = Integer::POSITIVE;
03492 }
03493 
03494 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03495 {
03496         unsigned aSize = a.WordCount();
03497         aSize += aSize%2;
03498         unsigned bSize = b.WordCount();
03499         bSize += bSize%2;
03500 
03501         if (aSize == bSize)
03502         {
03503                 if (Compare(a.reg, b.reg, aSize) >= 0)
03504                 {
03505                         Subtract(diff.reg, a.reg, b.reg, aSize);
03506                         diff.sign = Integer::POSITIVE;
03507                 }
03508                 else
03509                 {
03510                         Subtract(diff.reg, b.reg, a.reg, aSize);
03511                         diff.sign = Integer::NEGATIVE;
03512                 }
03513         }
03514         else if (aSize > bSize)
03515         {
03516                 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03517                 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03518                 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03519                 assert(!borrow);
03520                 diff.sign = Integer::POSITIVE;
03521         }
03522         else
03523         {
03524                 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03525                 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03526                 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03527                 assert(!borrow);
03528                 diff.sign = Integer::NEGATIVE;
03529         }
03530 }
03531 
03532 // MSVC .NET 2003 workaround
03533 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03534 {
03535         return a < b ? b : a;
03536 }
03537 
03538 Integer Integer::Plus(const Integer& b) const
03539 {
03540         Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03541         if (NotNegative())
03542         {
03543                 if (b.NotNegative())
03544                         PositiveAdd(sum, *this, b);
03545                 else
03546                         PositiveSubtract(sum, *this, b);
03547         }
03548         else
03549         {
03550                 if (b.NotNegative())
03551                         PositiveSubtract(sum, b, *this);
03552                 else
03553                 {
03554                         PositiveAdd(sum, *this, b);
03555                         sum.sign = Integer::NEGATIVE;
03556                 }
03557         }
03558         return sum;
03559 }
03560 
03561 Integer& Integer::operator+=(const Integer& t)
03562 {
03563         reg.CleanGrow(t.reg.size());
03564         if (NotNegative())
03565         {
03566                 if (t.NotNegative())
03567                         PositiveAdd(*this, *this, t);
03568                 else
03569                         PositiveSubtract(*this, *this, t);
03570         }
03571         else
03572         {
03573                 if (t.NotNegative())
03574                         PositiveSubtract(*this, t, *this);
03575                 else
03576                 {
03577                         PositiveAdd(*this, *this, t);
03578                         sign = Integer::NEGATIVE;
03579                 }
03580         }
03581         return *this;
03582 }
03583 
03584 Integer Integer::Minus(const Integer& b) const
03585 {
03586         Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03587         if (NotNegative())
03588         {
03589                 if (b.NotNegative())
03590                         PositiveSubtract(diff, *this, b);
03591                 else
03592                         PositiveAdd(diff, *this, b);
03593         }
03594         else
03595         {
03596                 if (b.NotNegative())
03597                 {
03598                         PositiveAdd(diff, *this, b);
03599                         diff.sign = Integer::NEGATIVE;
03600                 }
03601                 else
03602                         PositiveSubtract(diff, b, *this);
03603         }
03604         return diff;
03605 }
03606 
03607 Integer& Integer::operator-=(const Integer& t)
03608 {
03609         reg.CleanGrow(t.reg.size());
03610         if (NotNegative())
03611         {
03612                 if (t.NotNegative())
03613                         PositiveSubtract(*this, *this, t);
03614                 else
03615                         PositiveAdd(*this, *this, t);
03616         }
03617         else
03618         {
03619                 if (t.NotNegative())
03620                 {
03621                         PositiveAdd(*this, *this, t);
03622                         sign = Integer::NEGATIVE;
03623                 }
03624                 else
03625                         PositiveSubtract(*this, t, *this);
03626         }
03627         return *this;
03628 }
03629 
03630 Integer& Integer::operator<<=(size_t n)
03631 {
03632         const size_t wordCount = WordCount();
03633         const size_t shiftWords = n / WORD_BITS;
03634         const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03635 
03636         reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03637         ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03638         ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03639         return *this;
03640 }
03641 
03642 Integer& Integer::operator>>=(size_t n)
03643 {
03644         const size_t wordCount = WordCount();
03645         const size_t shiftWords = n / WORD_BITS;
03646         const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03647 
03648         ShiftWordsRightByWords(reg, wordCount, shiftWords);
03649         if (wordCount > shiftWords)
03650                 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03651         if (IsNegative() && WordCount()==0)   // avoid -0
03652                 *this = Zero();
03653         return *this;
03654 }
03655 
03656 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03657 {
03658         size_t aSize = RoundupSize(a.WordCount());
03659         size_t bSize = RoundupSize(b.WordCount());
03660 
03661         product.reg.CleanNew(RoundupSize(aSize+bSize));
03662         product.sign = Integer::POSITIVE;
03663 
03664         IntegerSecBlock workspace(aSize + bSize);
03665         AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03666 }
03667 
03668 void Multiply(Integer &product, const Integer &a, const Integer &b)
03669 {
03670         PositiveMultiply(product, a, b);
03671 
03672         if (a.NotNegative() != b.NotNegative())
03673                 product.Negate();
03674 }
03675 
03676 Integer Integer::Times(const Integer &b) const
03677 {
03678         Integer product;
03679         Multiply(product, *this, b);
03680         return product;
03681 }
03682 
03683 /*
03684 void PositiveDivide(Integer &remainder, Integer &quotient,
03685                                    const Integer &dividend, const Integer &divisor)
03686 {
03687         remainder.reg.CleanNew(divisor.reg.size());
03688         remainder.sign = Integer::POSITIVE;
03689         quotient.reg.New(0);
03690         quotient.sign = Integer::POSITIVE;
03691         unsigned i=dividend.BitCount();
03692         while (i--)
03693         {
03694                 word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
03695                 remainder.reg[0] |= dividend[i];
03696                 if (overflow || remainder >= divisor)
03697                 {
03698                         Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
03699                         quotient.SetBit(i);
03700                 }
03701         }
03702 }
03703 */
03704 
03705 void PositiveDivide(Integer &remainder, Integer &quotient,
03706                                    const Integer &a, const Integer &b)
03707 {
03708         unsigned aSize = a.WordCount();
03709         unsigned bSize = b.WordCount();
03710 
03711         if (!bSize)
03712                 throw Integer::DivideByZero();
03713 
03714         if (aSize < bSize)
03715         {
03716                 remainder = a;
03717                 remainder.sign = Integer::POSITIVE;
03718                 quotient = Integer::Zero();
03719                 return;
03720         }
03721 
03722         aSize += aSize%2;       // round up to next even number
03723         bSize += bSize%2;
03724 
03725         remainder.reg.CleanNew(RoundupSize(bSize));
03726         remainder.sign = Integer::POSITIVE;
03727         quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03728         quotient.sign = Integer::POSITIVE;
03729 
03730         IntegerSecBlock T(aSize+3*(bSize+2));
03731         Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03732 }
03733 
03734 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
03735 {
03736         PositiveDivide(remainder, quotient, dividend, divisor);
03737 
03738         if (dividend.IsNegative())
03739         {
03740                 quotient.Negate();
03741                 if (remainder.NotZero())
03742                 {
03743                         --quotient;
03744                         remainder = divisor.AbsoluteValue() - remainder;
03745                 }
03746         }
03747 
03748         if (divisor.IsNegative())
03749                 quotient.Negate();
03750 }
03751 
03752 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03753 {
03754         q = a;
03755         q >>= n;
03756 
03757         const size_t wordCount = BitsToWords(n);
03758         if (wordCount <= a.WordCount())
03759         {
03760                 r.reg.resize(RoundupSize(wordCount));
03761                 CopyWords(r.reg, a.reg, wordCount);
03762                 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03763                 if (n % WORD_BITS != 0)
03764                         r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
03765         }
03766         else
03767         {
03768                 r.reg.resize(RoundupSize(a.WordCount()));
03769                 CopyWords(r.reg, a.reg, r.reg.size());
03770         }
03771         r.sign = POSITIVE;
03772 
03773         if (a.IsNegative() && r.NotZero())
03774         {
03775                 --q;
03776                 r = Power2(n) - r;
03777         }
03778 }
03779 
03780 Integer Integer::DividedBy(const Integer &b) const
03781 {
03782         Integer remainder, quotient;
03783         Integer::Divide(remainder, quotient, *this, b);
03784         return quotient;
03785 }
03786 
03787 Integer Integer::Modulo(const Integer &b) const
03788 {
03789         Integer remainder, quotient;
03790         Integer::Divide(remainder, quotient, *this, b);
03791         return remainder;
03792 }
03793 
03794 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
03795 {
03796         if (!divisor)
03797                 throw Integer::DivideByZero();
03798 
03799         assert(divisor);
03800 
03801         if ((divisor & (divisor-1)) == 0)       // divisor is a power of 2
03802         {
03803                 quotient = dividend >> (BitPrecision(divisor)-1);
03804                 remainder = dividend.reg[0] & (divisor-1);
03805                 return;
03806         }
03807 
03808         unsigned int i = dividend.WordCount();
03809         quotient.reg.CleanNew(RoundupSize(i));
03810         remainder = 0;
03811         while (i--)
03812         {
03813                 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03814                 remainder = DWord(dividend.reg[i], remainder) % divisor;
03815         }
03816 
03817         if (dividend.NotNegative())
03818                 quotient.sign = POSITIVE;
03819         else
03820         {
03821                 quotient.sign = NEGATIVE;
03822                 if (remainder)
03823                 {
03824                         --quotient;
03825                         remainder = divisor - remainder;
03826                 }
03827         }
03828 }
03829 
03830 Integer Integer::DividedBy(word b) const
03831 {
03832         word remainder;
03833         Integer quotient;
03834         Integer::Divide(remainder, quotient, *this, b);
03835         return quotient;
03836 }
03837 
03838 word Integer::Modulo(word divisor) const
03839 {
03840         if (!divisor)
03841                 throw Integer::DivideByZero();
03842 
03843         assert(divisor);
03844 
03845         word remainder;
03846 
03847         if ((divisor & (divisor-1)) == 0)       // divisor is a power of 2
03848                 remainder = reg[0] & (divisor-1);
03849         else
03850         {
03851                 unsigned int i = WordCount();
03852 
03853                 if (divisor <= 5)
03854                 {
03855                         DWord sum(0, 0);
03856                         while (i--)
03857                                 sum += reg[i];
03858                         remainder = sum % divisor;
03859                 }
03860                 else
03861                 {
03862                         remainder = 0;
03863                         while (i--)
03864                                 remainder = DWord(reg[i], remainder) % divisor;
03865                 }
03866         }
03867 
03868         if (IsNegative() && remainder)
03869                 remainder = divisor - remainder;
03870 
03871         return remainder;
03872 }
03873 
03874 void Integer::Negate()
03875 {
03876         if (!!(*this))  // don't flip sign if *this==0
03877                 sign = Sign(1-sign);
03878 }
03879 
03880 int Integer::PositiveCompare(const Integer& t) const
03881 {
03882         unsigned size = WordCount(), tSize = t.WordCount();
03883 
03884         if (size == tSize)
03885                 return CryptoPP::Compare(reg, t.reg, size);
03886         else
03887                 return size > tSize ? 1 : -1;
03888 }
03889 
03890 int Integer::Compare(const Integer& t) const
03891 {
03892         if (NotNegative())
03893         {
03894                 if (t.NotNegative())
03895                         return PositiveCompare(t);
03896                 else
03897                         return 1;
03898         }
03899         else
03900         {
03901                 if (t.NotNegative())
03902                         return -1;
03903                 else
03904                         return -PositiveCompare(t);
03905         }
03906 }
03907 
03908 Integer Integer::SquareRoot() const
03909 {
03910         if (!IsPositive())
03911                 return Zero();
03912 
03913         // overestimate square root
03914         Integer x, y = Power2((BitCount()+1)/2);
03915         assert(y*y >= *this);
03916 
03917         do
03918         {
03919                 x = y;
03920                 y = (x + *this/x) >> 1;
03921         } while (y<x);
03922 
03923         return x;
03924 }
03925 
03926 bool Integer::IsSquare() const
03927 {
03928         Integer r = SquareRoot();
03929         return *this == r.Squared();
03930 }
03931 
03932 bool Integer::IsUnit() const
03933 {
03934         return (WordCount() == 1) && (reg[0] == 1);
03935 }
03936 
03937 Integer Integer::MultiplicativeInverse() const
03938 {
03939         return IsUnit() ? *this : Zero();
03940 }
03941 
03942 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03943 {
03944         return x*y%m;
03945 }
03946 
03947 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03948 {
03949         ModularArithmetic mr(m);
03950         return mr.Exponentiate(x, e);
03951 }
03952 
03953 Integer Integer::Gcd(const Integer &a, const Integer &b)
03954 {
03955         return EuclideanDomainOf<Integer>().Gcd(a, b);
03956 }
03957 
03958 Integer Integer::InverseMod(const Integer &m) const
03959 {
03960         assert(m.NotNegative());
03961 
03962         if (IsNegative())
03963                 return Modulo(m).InverseMod(m);
03964 
03965         if (m.IsEven())
03966         {
03967                 if (!m || IsEven())
03968                         return Zero();  // no inverse
03969                 if (*this == One())
03970                         return One();
03971 
03972                 Integer u = m.Modulo(*this).InverseMod(*this);
03973                 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03974         }
03975 
03976         SecBlock<word> T(m.reg.size() * 4);
03977         Integer r((word)0, m.reg.size());
03978         unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03979         DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03980         return r;
03981 }
03982 
03983 word Integer::InverseMod(word mod) const
03984 {
03985         word g0 = mod, g1 = *this % mod;
03986         word v0 = 0, v1 = 1;
03987         word y;
03988 
03989         while (g1)
03990         {
03991                 if (g1 == 1)
03992                         return v1;
03993                 y = g0 / g1;
03994                 g0 = g0 % g1;
03995                 v0 += y * v1;
03996 
03997                 if (!g0)
03998                         break;
03999                 if (g0 == 1)
04000                         return mod-v0;
04001                 y = g1 / g0;
04002                 g1 = g1 % g0;
04003                 v1 += y * v0;
04004         }
04005         return 0;
04006 }
04007 
04008 // ********************************************************
04009 
04010 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
04011 {
04012         BERSequenceDecoder seq(bt);
04013         OID oid(seq);
04014         if (oid != ASN1::prime_field())
04015                 BERDecodeError();
04016         m_modulus.BERDecode(seq);
04017         seq.MessageEnd();
04018         m_result.reg.resize(m_modulus.reg.size());
04019 }
04020 
04021 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04022 {
04023         DERSequenceEncoder seq(bt);
04024         ASN1::prime_field().DEREncode(seq);
04025         m_modulus.DEREncode(seq);
04026         seq.MessageEnd();
04027 }
04028 
04029 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04030 {
04031         a.DEREncodeAsOctetString(out, MaxElementByteLength());
04032 }
04033 
04034 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04035 {
04036         a.BERDecodeAsOctetString(in, MaxElementByteLength());
04037 }
04038 
04039 const Integer& ModularArithmetic::Half(const Integer &a) const
04040 {
04041         if (a.reg.size()==m_modulus.reg.size())
04042         {
04043                 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04044                 return m_result;
04045         }
04046         else
04047                 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04048 }
04049 
04050 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04051 {
04052         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04053         {
04054                 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04055                         || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04056                 {
04057                         CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04058                 }
04059                 return m_result;
04060         }
04061         else
04062         {
04063                 m_result1 = a+b;
04064                 if (m_result1 >= m_modulus)
04065                         m_result1 -= m_modulus;
04066                 return m_result1;
04067         }
04068 }
04069 
04070 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04071 {
04072         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04073         {
04074                 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04075                         || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04076                 {
04077                         CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04078                 }
04079         }
04080         else
04081         {
04082                 a+=b;
04083                 if (a>=m_modulus)
04084                         a-=m_modulus;
04085         }
04086 
04087         return a;
04088 }
04089 
04090 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04091 {
04092         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04093         {
04094                 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04095                         CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04096                 return m_result;
04097         }
04098         else
04099         {
04100                 m_result1 = a-b;
04101                 if (m_result1.IsNegative())
04102                         m_result1 += m_modulus;
04103                 return m_result1;
04104         }
04105 }
04106 
04107 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04108 {
04109         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04110         {
04111                 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04112                         CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04113         }
04114         else
04115         {
04116                 a-=b;
04117                 if (a.IsNegative())
04118                         a+=m_modulus;
04119         }
04120 
04121         return a;
04122 }
04123 
04124 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04125 {
04126         if (!a)
04127                 return a;
04128 
04129         CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04130         if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04131                 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04132 
04133         return m_result;
04134 }
04135 
04136 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04137 {
04138         if (m_modulus.IsOdd())
04139         {
04140                 MontgomeryRepresentation dr(m_modulus);
04141                 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04142         }
04143         else
04144                 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04145 }
04146 
04147 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04148 {
04149         if (m_modulus.IsOdd())
04150         {
04151                 MontgomeryRepresentation dr(m_modulus);
04152                 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04153                 for (unsigned int i=0; i<exponentsCount; i++)
04154                         results[i] = dr.ConvertOut(results[i]);
04155         }
04156         else
04157                 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04158 }
04159 
04160 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)    // modulus must be odd
04161         : ModularArithmetic(m),
04162           m_u((word)0, m_modulus.reg.size()),
04163           m_workspace(5*m_modulus.reg.size())
04164 {
04165         if (!m_modulus.IsOdd())
04166                 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04167 
04168         RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04169 }
04170 
04171 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04172 {
04173         word *const T = m_workspace.begin();
04174         word *const R = m_result.reg.begin();
04175         const size_t N = m_modulus.reg.size();
04176         assert(a.reg.size()<=N && b.reg.size()<=N);
04177 
04178         AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04179         SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04180         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04181         return m_result;
04182 }
04183 
04184 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04185 {
04186         word *const T = m_workspace.begin();
04187         word *const R = m_result.reg.begin();
04188         const size_t N = m_modulus.reg.size();
04189         assert(a.reg.size()<=N);
04190 
04191         CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04192         SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04193         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04194         return m_result;
04195 }
04196 
04197 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04198 {
04199         word *const T = m_workspace.begin();
04200         word *const R = m_result.reg.begin();
04201         const size_t N = m_modulus.reg.size();
04202         assert(a.reg.size()<=N);
04203 
04204         CopyWords(T, a.reg, a.reg.size());
04205         SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04206         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04207         return m_result;
04208 }
04209 
04210 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04211 {
04212 //        return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
04213         word *const T = m_workspace.begin();
04214         word *const R = m_result.reg.begin();
04215         const size_t N = m_modulus.reg.size();
04216         assert(a.reg.size()<=N);
04217 
04218         CopyWords(T, a.reg, a.reg.size());
04219         SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04220         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04221         unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04222 
04223 //      cout << "k=" << k << " N*32=" << 32*N << endl;
04224 
04225         if (k>N*WORD_BITS)
04226                 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04227         else
04228                 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04229 
04230         return m_result;
04231 }
04232 
04233 NAMESPACE_END
04234 
04235 #endif