Crypto++: integer.cpp Source File

00001 // integer.cpp - written and placed in the public domain by Wei Dai
00002 // contains public domain code contributed by Alister Lee and Leonard Janke
00003 
00004 #include "pch.h"
00005 
00006 #ifndef CRYPTOPP_IMPORTS
00007 
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"             // for P1363_KDF2
00016 #include "sha.h"
00017 
00018 #include <iostream>
00019 
00020 #ifdef _M_X64
00021 #include <Intrin.h>
00022 #endif
00023 
00024 #ifdef SSE2_INTRINSICS_AVAILABLE
00025         #ifdef __GNUC__
00026                 #include <xmmintrin.h>
00027                 #include <signal.h>
00028                 #include <setjmp.h>
00029                 #ifdef CRYPTOPP_MEMALIGN_AVAILABLE
00030                         #include <malloc.h>
00031                 #else
00032                         #include <stdlib.h>
00033                 #endif
00034         #else
00035                 #include <emmintrin.h>
00036         #endif
00037 #elif defined(_MSC_VER) && defined(_M_IX86)
00038         #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 intrinsics will be disabled.")
00039 #elif defined(__GNUC__) && defined(__i386__)
00040         #warning "You do not have GCC 3.3 or later, or did not specify -msse2 compiler option, so use of SSE2 intrinsics will be disabled."
00041 #endif
00042 
00043 NAMESPACE_BEGIN(CryptoPP)
00044 
00045 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00046 {
00047         if (valueType != typeid(Integer))
00048                 return false;
00049         *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00050         return true;
00051 }
00052 
00053 #ifdef SSE2_INTRINSICS_AVAILABLE
00054 template <class T>
00055 CPP_TYPENAME AlignedAllocator<T>::pointer AlignedAllocator<T>::allocate(size_type n, const void *)
00056 {
00057         CheckSize(n);
00058         if (n == 0)
00059                 return NULL;
00060         if (n >= 4)
00061         {
00062                 void *p;
00063         #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00064                 while (!(p = _mm_malloc(sizeof(T)*n, 16)))
00065         #elif defined(CRYPTOPP_MEMALIGN_AVAILABLE)
00066                 while (!(p = memalign(16, sizeof(T)*n)))
00067         #elif defined(CRYPTOPP_MALLOC_ALIGNMENT_IS_16)
00068                 while (!(p = malloc(sizeof(T)*n)))
00069         #else
00070                 while (!(p = (byte *)malloc(sizeof(T)*n + 8)))  // assume malloc alignment is at least 8
00071         #endif
00072                         CallNewHandler();
00073 
00074         #ifdef CRYPTOPP_NO_ALIGNED_ALLOC
00075                 assert(m_pBlock == NULL);
00076                 m_pBlock = p;
00077                 if (!IsAlignedOn(p, 16))
00078                 {
00079                         assert(IsAlignedOn(p, 8));
00080                         p = (byte *)p + 8;
00081                 }
00082         #endif
00083 
00084                 assert(IsAlignedOn(p, 16));
00085                 return (T*)p;
00086         }
00087         return new T[n];
00088 }
00089 
00090 template <class T>
00091 void AlignedAllocator<T>::deallocate(void *p, size_type n)
00092 {
00093         memset(p, 0, n*sizeof(T));
00094         if (n >= 4)
00095         {
00096                 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00097                         _mm_free(p);
00098                 #elif defined(CRYPTOPP_NO_ALIGNED_ALLOC)
00099                         assert(m_pBlock == p || (byte *)m_pBlock+8 == p);
00100                         free(m_pBlock);
00101                         m_pBlock = NULL;
00102                 #else
00103                         free(p);
00104                 #endif
00105         }
00106         else
00107                 delete [] (T *)p;
00108 }
00109 
00110 template class CRYPTOPP_DLL AlignedAllocator<word>;
00111 #endif
00112 
00113 static int Compare(const word *A, const word *B, size_t N)
00114 {
00115         while (N--)
00116                 if (A[N] > B[N])
00117                         return 1;
00118                 else if (A[N] < B[N])
00119                         return -1;
00120 
00121         return 0;
00122 }
00123 
00124 static int Increment(word *A, size_t N, word B=1)
00125 {
00126         assert(N);
00127         word t = A[0];
00128         A[0] = t+B;
00129         if (A[0] >= t)
00130                 return 0;
00131         for (unsigned i=1; i<N; i++)
00132                 if (++A[i])
00133                         return 0;
00134         return 1;
00135 }
00136 
00137 static int Decrement(word *A, size_t N, word B=1)
00138 {
00139         assert(N);
00140         word t = A[0];
00141         A[0] = t-B;
00142         if (A[0] <= t)
00143                 return 0;
00144         for (unsigned i=1; i<N; i++)
00145                 if (A[i]--)
00146                         return 0;
00147         return 1;
00148 }
00149 
00150 static void TwosComplement(word *A, size_t N)
00151 {
00152         Decrement(A, N);
00153         for (unsigned i=0; i<N; i++)
00154                 A[i] = ~A[i];
00155 }
00156 
00157 static word AtomicInverseModPower2(word A)
00158 {
00159         assert(A%2==1);
00160 
00161         word R=A%8;
00162 
00163         for (unsigned i=3; i<WORD_BITS; i*=2)
00164                 R = R*(2-R*A);
00165 
00166         assert(R*A==1);
00167         return R;
00168 }
00169 
00170 // ********************************************************
00171 
00172 class DWord
00173 {
00174 public:
00175         DWord() {}
00176 
00177 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00178         explicit DWord(word low)
00179         {
00180                 m_whole = low;
00181         }
00182 #else
00183         explicit DWord(word low)
00184         {
00185                 m_halfs.low = low;
00186                 m_halfs.high = 0;
00187         }
00188 #endif
00189 
00190         DWord(word low, word high)
00191         {
00192                 m_halfs.low = low;
00193                 m_halfs.high = high;
00194         }
00195 
00196         static DWord Multiply(word a, word b)
00197         {
00198                 DWord r;
00199                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00200                         r.m_whole = (dword)a * b;
00201                 #elif defined(__alpha__)
00202                         r.m_halfs.low = a*b; __asm__("umulh %1,%2,%0" : "=r" (r.m_halfs.high) : "r" (a), "r" (b));
00203                 #elif defined(__ia64__)
00204                         r.m_halfs.low = a*b; __asm__("xmpy.hu %0=%1,%2" : "=f" (r.m_halfs.high) : "f" (a), "f" (b));
00205                 #elif defined(_ARCH_PPC64)
00206                         r.m_halfs.low = a*b; __asm__("mulhdu %0,%1,%2" : "=r" (r.m_halfs.high) : "r" (a), "r" (b) : "cc");
00207                 #elif defined(__x86_64__)
00208                         __asm__("mulq %3" : "=d" (r.m_halfs.high), "=a" (r.m_halfs.low) : "a" (a), "rm" (b) : "cc");
00209                 #elif defined(__mips64)
00210                         __asm__("dmultu %2,%3" : "=h" (r.m_halfs.high), "=l" (r.m_halfs.low) : "r" (a), "r" (b));
00211                 #elif defined(_M_X64)
00212                         r.m_halfs.low = _umul128(a, b, &r.m_halfs.high);
00213                 #elif defined(_M_IX86)
00214                         // for testing
00215                         word64 t = (word64)a * b;
00216                         r.m_halfs.high = ((word32 *)(&t))[1];
00217                         r.m_halfs.low = (word32)t;
00218                 #else
00219                         #error can not implement DWord
00220                 #endif
00221                 return r;
00222         }
00223 
00224         static DWord MultiplyAndAdd(word a, word b, word c)
00225         {
00226                 DWord r = Multiply(a, b);
00227                 return r += c;
00228         }
00229 
00230         DWord & operator+=(word a)
00231         {
00232                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00233                         m_whole = m_whole + a;
00234                 #else
00235                         m_halfs.low += a;
00236                         m_halfs.high += (m_halfs.low < a);
00237                 #endif
00238                 return *this;
00239         }
00240 
00241         DWord operator+(word a)
00242         {
00243                 DWord r;
00244                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00245                         r.m_whole = m_whole + a;
00246                 #else
00247                         r.m_halfs.low = m_halfs.low + a;
00248                         r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00249                 #endif
00250                 return r;
00251         }
00252 
00253         DWord operator-(DWord a)
00254         {
00255                 DWord r;
00256                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00257                         r.m_whole = m_whole - a.m_whole;
00258                 #else
00259                         r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00260                         r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00261                 #endif
00262                 return r;
00263         }
00264 
00265         DWord operator-(word a)
00266         {
00267                 DWord r;
00268                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00269                         r.m_whole = m_whole - a;
00270                 #else
00271                         r.m_halfs.low = m_halfs.low - a;
00272                         r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00273                 #endif
00274                 return r;
00275         }
00276 
00277         // returns quotient, which must fit in a word
00278         word operator/(word divisor);
00279 
00280         word operator%(word a);
00281 
00282         bool operator!() const
00283         {
00284         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00285                 return !m_whole;
00286         #else
00287                 return !m_halfs.high && !m_halfs.low;
00288         #endif
00289         }
00290 
00291         word GetLowHalf() const {return m_halfs.low;}
00292         word GetHighHalf() const {return m_halfs.high;}
00293         word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00294 
00295 private:
00296         union
00297         {
00298         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00299                 dword m_whole;
00300         #endif
00301                 struct
00302                 {
00303                 #ifdef IS_LITTLE_ENDIAN
00304                         word low;
00305                         word high;
00306                 #else
00307                         word high;
00308                         word low;
00309                 #endif
00310                 } m_halfs;
00311         };
00312 };
00313 
00314 class Word
00315 {
00316 public:
00317         Word() {}
00318 
00319         Word(word value)
00320         {
00321                 m_whole = value;
00322         }
00323 
00324         Word(hword low, hword high)
00325         {
00326                 m_whole = low | (word(high) << (WORD_BITS/2));
00327         }
00328 
00329         static Word Multiply(hword a, hword b)
00330         {
00331                 Word r;
00332                 r.m_whole = (word)a * b;
00333                 return r;
00334         }
00335 
00336         Word operator-(Word a)
00337         {
00338                 Word r;
00339                 r.m_whole = m_whole - a.m_whole;
00340                 return r;
00341         }
00342 
00343         Word operator-(hword a)
00344         {
00345                 Word r;
00346                 r.m_whole = m_whole - a;
00347                 return r;
00348         }
00349 
00350         // returns quotient, which must fit in a word
00351         hword operator/(hword divisor)
00352         {
00353                 return hword(m_whole / divisor);
00354         }
00355 
00356         bool operator!() const
00357         {
00358                 return !m_whole;
00359         }
00360 
00361         word GetWhole() const {return m_whole;}
00362         hword GetLowHalf() const {return hword(m_whole);}
00363         hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00364         hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00365         
00366 private:
00367         word m_whole;
00368 };
00369 
00370 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
00371 template <class S, class D>
00372 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00373 {
00374         // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
00375         assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00376 
00377         // estimate the quotient: do a 2 S by 1 S divide
00378         S Q;
00379         if (S(B1+1) == 0)
00380                 Q = A[2];
00381         else
00382                 Q = D(A[1], A[2]) / S(B1+1);
00383 
00384         // now subtract Q*B from A
00385         D p = D::Multiply(B0, Q);
00386         D u = (D) A[0] - p.GetLowHalf();
00387         A[0] = u.GetLowHalf();
00388         u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00389         A[1] = u.GetLowHalf();
00390         A[2] += u.GetHighHalf();
00391 
00392         // Q <= actual quotient, so fix it
00393         while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00394         {
00395                 u = (D) A[0] - B0;
00396                 A[0] = u.GetLowHalf();
00397                 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00398                 A[1] = u.GetLowHalf();
00399                 A[2] += u.GetHighHalf();
00400                 Q++;
00401                 assert(Q);      // shouldn't overflow
00402         }
00403 
00404         return Q;
00405 }
00406 
00407 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
00408 template <class S, class D>
00409 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00410 {
00411         if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
00412                 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00413         else
00414         {
00415                 S Q[2];
00416                 T[0] = Al.GetLowHalf();
00417                 T[1] = Al.GetHighHalf(); 
00418                 T[2] = Ah.GetLowHalf();
00419                 T[3] = Ah.GetHighHalf();
00420                 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00421                 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00422                 return D(Q[0], Q[1]);
00423         }
00424 }
00425 
00426 // returns quotient, which must fit in a word
00427 inline word DWord::operator/(word a)
00428 {
00429         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00430                 return word(m_whole / a);
00431         #else
00432                 hword r[4];
00433                 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00434         #endif
00435 }
00436 
00437 inline word DWord::operator%(word a)
00438 {
00439         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00440                 return word(m_whole % a);
00441         #else
00442                 if (a < (word(1) << (WORD_BITS/2)))
00443                 {
00444                         hword h = hword(a);
00445                         word r = m_halfs.high % h;
00446                         r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00447                         return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00448                 }
00449                 else
00450                 {
00451                         hword r[4];
00452                         DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00453                         return Word(r[0], r[1]).GetWhole();
00454                 }
00455         #endif
00456 }
00457 
00458 // ********************************************************
00459 
00460 class Portable
00461 {
00462 public:
00463         static int Add(word *C, const word *A, const word *B, size_t N);
00464         static int Subtract(word *C, const word *A, const word *B, size_t N);
00465 
00466         static inline void Multiply2(word *C, const word *A, const word *B);
00467         static inline word Multiply2Add(word *C, const word *A, const word *B);
00468         static void Multiply4(word *C, const word *A, const word *B);
00469         static void Multiply8(word *C, const word *A, const word *B);
00470         static inline unsigned int MultiplyRecursionLimit() {return 8;}
00471 
00472         static inline void Multiply2Bottom(word *C, const word *A, const word *B);
00473         static void Multiply4Bottom(word *C, const word *A, const word *B);
00474         static void Multiply8Bottom(word *C, const word *A, const word *B);
00475         static inline unsigned int MultiplyBottomRecursionLimit() {return 8;}
00476 
00477         static void Square2(word *R, const word *A);
00478         static void Square4(word *R, const word *A);
00479         static void Square8(word *R, const word *A) {assert(false);}
00480         static inline unsigned int SquareRecursionLimit() {return 4;}
00481 };
00482 
00483 int Portable::Add(word *C, const word *A, const word *B, size_t N)
00484 {
00485         assert (N%2 == 0);
00486 
00487         DWord u(0, 0);
00488         for (unsigned int i = 0; i < N; i+=2)
00489         {
00490                 u = DWord(A[i]) + B[i] + u.GetHighHalf();
00491                 C[i] = u.GetLowHalf();
00492                 u = DWord(A[i+1]) + B[i+1] + u.GetHighHalf();
00493                 C[i+1] = u.GetLowHalf();
00494         }
00495         return int(u.GetHighHalf());
00496 }
00497 
00498 int Portable::Subtract(word *C, const word *A, const word *B, size_t N)
00499 {
00500         assert (N%2 == 0);
00501 
00502         DWord u(0, 0);
00503         for (unsigned int i = 0; i < N; i+=2)
00504         {
00505                 u = (DWord) A[i] - B[i] - u.GetHighHalfAsBorrow();
00506                 C[i] = u.GetLowHalf();
00507                 u = (DWord) A[i+1] - B[i+1] - u.GetHighHalfAsBorrow();
00508                 C[i+1] = u.GetLowHalf();
00509         }
00510         return int(0-u.GetHighHalf());
00511 }
00512 
00513 void Portable::Multiply2(word *C, const word *A, const word *B)
00514 {
00515 /*
00516         word s;
00517         dword d;
00518 
00519         if (A1 >= A0)
00520                 if (B0 >= B1)
00521                 {
00522                         s = 0;
00523                         d = (dword)(A1-A0)*(B0-B1);
00524                 }
00525                 else
00526                 {
00527                         s = (A1-A0);
00528                         d = (dword)s*(word)(B0-B1);
00529                 }
00530         else
00531                 if (B0 > B1)
00532                 {
00533                         s = (B0-B1);
00534                         d = (word)(A1-A0)*(dword)s;
00535                 }
00536                 else
00537                 {
00538                         s = 0;
00539                         d = (dword)(A0-A1)*(B1-B0);
00540                 }
00541 */
00542         // this segment is the branchless equivalent of above
00543         word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00544         unsigned int ai = A[1] < A[0];
00545         unsigned int bi = B[0] < B[1];
00546         unsigned int di = ai & bi;
00547         DWord d = DWord::Multiply(D[di], D[di+2]);
00548         D[1] = D[3] = 0;
00549         unsigned int si = ai + !bi;
00550         word s = D[si];
00551 
00552         DWord A0B0 = DWord::Multiply(A[0], B[0]);
00553         C[0] = A0B0.GetLowHalf();
00554 
00555         DWord A1B1 = DWord::Multiply(A[1], B[1]);
00556         DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf();
00557         C[1] = t.GetLowHalf();
00558 
00559         t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s;
00560         C[2] = t.GetLowHalf();
00561         C[3] = t.GetHighHalf();
00562 }
00563 
00564 inline void Portable::Multiply2Bottom(word *C, const word *A, const word *B)
00565 {
00566         DWord t = DWord::Multiply(A[0], B[0]);
00567         C[0] = t.GetLowHalf();
00568         C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0];
00569 }
00570 
00571 word Portable::Multiply2Add(word *C, const word *A, const word *B)
00572 {
00573         word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00574         unsigned int ai = A[1] < A[0];
00575         unsigned int bi = B[0] < B[1];
00576         unsigned int di = ai & bi;
00577         DWord d = DWord::Multiply(D[di], D[di+2]);
00578         D[1] = D[3] = 0;
00579         unsigned int si = ai + !bi;
00580         word s = D[si];
00581 
00582         DWord A0B0 = DWord::Multiply(A[0], B[0]);
00583         DWord t = A0B0 + C[0];
00584         C[0] = t.GetLowHalf();
00585 
00586         DWord A1B1 = DWord::Multiply(A[1], B[1]);
00587         t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1];
00588         C[1] = t.GetLowHalf();
00589 
00590         t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2];
00591         C[2] = t.GetLowHalf();
00592 
00593         t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3];
00594         C[3] = t.GetLowHalf();
00595         return t.GetHighHalf();
00596 }
00597 
00598 #define MulAcc(x, y)                                                            \
00599         p = DWord::MultiplyAndAdd(A[x], B[y], c);               \
00600         c = p.GetLowHalf();                                                             \
00601         p = (DWord) d + p.GetHighHalf();                                        \
00602         d = p.GetLowHalf();                                                             \
00603         e += p.GetHighHalf();
00604 
00605 #define SaveMulAcc(s, x, y)                                             \
00606         R[s] = c;                                                                               \
00607         p = DWord::MultiplyAndAdd(A[x], B[y], d);                               \
00608         c = p.GetLowHalf();                                                             \
00609         p = (DWord) e + p.GetHighHalf();                                        \
00610         d = p.GetLowHalf();                                                             \
00611         e = p.GetHighHalf();
00612 
00613 #define SquAcc(x, y)                                                            \
00614         q = DWord::Multiply(A[x], A[y]);        \
00615         p = q + c;                                      \
00616         c = p.GetLowHalf();                                                             \
00617         p = (DWord) d + p.GetHighHalf();                                        \
00618         d = p.GetLowHalf();                                                             \
00619         e += p.GetHighHalf();                   \
00620         p = q + c;                                      \
00621         c = p.GetLowHalf();                                                             \
00622         p = (DWord) d + p.GetHighHalf();                                        \
00623         d = p.GetLowHalf();                                                             \
00624         e += p.GetHighHalf();
00625 
00626 #define SaveSquAcc(s, x, y)                                             \
00627         R[s] = c;                                                                               \
00628         q = DWord::Multiply(A[x], A[y]);        \
00629         p = q + d;                                      \
00630         c = p.GetLowHalf();                                                             \
00631         p = (DWord) e + p.GetHighHalf();                                        \
00632         d = p.GetLowHalf();                                                             \
00633         e = p.GetHighHalf();                    \
00634         p = q + c;                                      \
00635         c = p.GetLowHalf();                                                             \
00636         p = (DWord) d + p.GetHighHalf();                                        \
00637         d = p.GetLowHalf();                                                             \
00638         e += p.GetHighHalf();
00639 
00640 void Portable::Multiply4(word *R, const word *A, const word *B)
00641 {
00642         DWord p;
00643         word c, d, e;
00644 
00645         p = DWord::Multiply(A[0], B[0]);
00646         R[0] = p.GetLowHalf();
00647         c = p.GetHighHalf();
00648         d = e = 0;
00649 
00650         MulAcc(0, 1);
00651         MulAcc(1, 0);
00652 
00653         SaveMulAcc(1, 2, 0);
00654         MulAcc(1, 1);
00655         MulAcc(0, 2);
00656 
00657         SaveMulAcc(2, 0, 3);
00658         MulAcc(1, 2);
00659         MulAcc(2, 1);
00660         MulAcc(3, 0);
00661 
00662         SaveMulAcc(3, 3, 1);
00663         MulAcc(2, 2);
00664         MulAcc(1, 3);
00665 
00666         SaveMulAcc(4, 2, 3);
00667         MulAcc(3, 2);
00668 
00669         R[5] = c;
00670         p = DWord::MultiplyAndAdd(A[3], B[3], d);
00671         R[6] = p.GetLowHalf();
00672         R[7] = e + p.GetHighHalf();
00673 }
00674 
00675 void Portable::Square2(word *R, const word *A)
00676 {
00677         DWord p, q;
00678         word c, d, e;
00679 
00680         p = DWord::Multiply(A[0], A[0]);
00681         R[0] = p.GetLowHalf();
00682         c = p.GetHighHalf();
00683         d = e = 0;
00684 
00685         SquAcc(0, 1);
00686 
00687         R[1] = c;
00688         p = DWord::MultiplyAndAdd(A[1], A[1], d);
00689         R[2] = p.GetLowHalf();
00690         R[3] = e + p.GetHighHalf();
00691 }
00692 
00693 void Portable::Square4(word *R, const word *A)
00694 {
00695 #ifdef _MSC_VER
00696         // VC60 workaround: MSVC 6.0 has an optimization bug that makes
00697         // (dword)A*B where either A or B has been cast to a dword before
00698         // very expensive. Revisit this function when this
00699         // bug is fixed.
00700         Multiply4(R, A, A);
00701 #else
00702         const word *B = A;
00703         DWord p, q;
00704         word c, d, e;
00705 
00706         p = DWord::Multiply(A[0], A[0]);
00707         R[0] = p.GetLowHalf();
00708         c = p.GetHighHalf();
00709         d = e = 0;
00710 
00711         SquAcc(0, 1);
00712 
00713         SaveSquAcc(1, 2, 0);
00714         MulAcc(1, 1);
00715 
00716         SaveSquAcc(2, 0, 3);
00717         SquAcc(1, 2);
00718 
00719         SaveSquAcc(3, 3, 1);
00720         MulAcc(2, 2);
00721 
00722         SaveSquAcc(4, 2, 3);
00723 
00724         R[5] = c;
00725         p = DWord::MultiplyAndAdd(A[3], A[3], d);
00726         R[6] = p.GetLowHalf();
00727         R[7] = e + p.GetHighHalf();
00728 #endif
00729 }
00730 
00731 void Portable::Multiply8(word *R, const word *A, const word *B)
00732 {
00733         DWord p;
00734         word c, d, e;
00735 
00736         p = DWord::Multiply(A[0], B[0]);
00737         R[0] = p.GetLowHalf();
00738         c = p.GetHighHalf();
00739         d = e = 0;
00740 
00741         MulAcc(0, 1);
00742         MulAcc(1, 0);
00743 
00744         SaveMulAcc(1, 2, 0);
00745         MulAcc(1, 1);
00746         MulAcc(0, 2);
00747 
00748         SaveMulAcc(2, 0, 3);
00749         MulAcc(1, 2);
00750         MulAcc(2, 1);
00751         MulAcc(3, 0);
00752 
00753         SaveMulAcc(3, 0, 4);
00754         MulAcc(1, 3);
00755         MulAcc(2, 2);
00756         MulAcc(3, 1);
00757         MulAcc(4, 0);
00758 
00759         SaveMulAcc(4, 0, 5);
00760         MulAcc(1, 4);
00761         MulAcc(2, 3);
00762         MulAcc(3, 2);
00763         MulAcc(4, 1);
00764         MulAcc(5, 0);
00765 
00766         SaveMulAcc(5, 0, 6);
00767         MulAcc(1, 5);
00768         MulAcc(2, 4);
00769         MulAcc(3, 3);
00770         MulAcc(4, 2);
00771         MulAcc(5, 1);
00772         MulAcc(6, 0);
00773 
00774         SaveMulAcc(6, 0, 7);
00775         MulAcc(1, 6);
00776         MulAcc(2, 5);
00777         MulAcc(3, 4);
00778         MulAcc(4, 3);
00779         MulAcc(5, 2);
00780         MulAcc(6, 1);
00781         MulAcc(7, 0);
00782 
00783         SaveMulAcc(7, 1, 7);
00784         MulAcc(2, 6);
00785         MulAcc(3, 5);
00786         MulAcc(4, 4);
00787         MulAcc(5, 3);
00788         MulAcc(6, 2);
00789         MulAcc(7, 1);
00790 
00791         SaveMulAcc(8, 2, 7);
00792         MulAcc(3, 6);
00793         MulAcc(4, 5);
00794         MulAcc(5, 4);
00795         MulAcc(6, 3);
00796         MulAcc(7, 2);
00797 
00798         SaveMulAcc(9, 3, 7);
00799         MulAcc(4, 6);
00800         MulAcc(5, 5);
00801         MulAcc(6, 4);
00802         MulAcc(7, 3);
00803 
00804         SaveMulAcc(10, 4, 7);
00805         MulAcc(5, 6);
00806         MulAcc(6, 5);
00807         MulAcc(7, 4);
00808 
00809         SaveMulAcc(11, 5, 7);
00810         MulAcc(6, 6);
00811         MulAcc(7, 5);
00812 
00813         SaveMulAcc(12, 6, 7);
00814         MulAcc(7, 6);
00815 
00816         R[13] = c;
00817         p = DWord::MultiplyAndAdd(A[7], B[7], d);
00818         R[14] = p.GetLowHalf();
00819         R[15] = e + p.GetHighHalf();
00820 }
00821 
00822 void Portable::Multiply4Bottom(word *R, const word *A, const word *B)
00823 {
00824         DWord p;
00825         word c, d, e;
00826 
00827         p = DWord::Multiply(A[0], B[0]);
00828         R[0] = p.GetLowHalf();
00829         c = p.GetHighHalf();
00830         d = e = 0;
00831 
00832         MulAcc(0, 1);
00833         MulAcc(1, 0);
00834 
00835         SaveMulAcc(1, 2, 0);
00836         MulAcc(1, 1);
00837         MulAcc(0, 2);
00838 
00839         R[2] = c;
00840         R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
00841 }
00842 
00843 void Portable::Multiply8Bottom(word *R, const word *A, const word *B)
00844 {
00845         DWord p;
00846         word c, d, e;
00847 
00848         p = DWord::Multiply(A[0], B[0]);
00849         R[0] = p.GetLowHalf();
00850         c = p.GetHighHalf();
00851         d = e = 0;
00852 
00853         MulAcc(0, 1);
00854         MulAcc(1, 0);
00855 
00856         SaveMulAcc(1, 2, 0);
00857         MulAcc(1, 1);
00858         MulAcc(0, 2);
00859 
00860         SaveMulAcc(2, 0, 3);
00861         MulAcc(1, 2);
00862         MulAcc(2, 1);
00863         MulAcc(3, 0);
00864 
00865         SaveMulAcc(3, 0, 4);
00866         MulAcc(1, 3);
00867         MulAcc(2, 2);
00868         MulAcc(3, 1);
00869         MulAcc(4, 0);
00870 
00871         SaveMulAcc(4, 0, 5);
00872         MulAcc(1, 4);
00873         MulAcc(2, 3);
00874         MulAcc(3, 2);
00875         MulAcc(4, 1);
00876         MulAcc(5, 0);
00877 
00878         SaveMulAcc(5, 0, 6);
00879         MulAcc(1, 5);
00880         MulAcc(2, 4);
00881         MulAcc(3, 3);
00882         MulAcc(4, 2);
00883         MulAcc(5, 1);
00884         MulAcc(6, 0);
00885 
00886         R[6] = c;
00887         R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
00888                                 A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
00889 }
00890 
00891 #undef MulAcc
00892 #undef SaveMulAcc
00893 #undef SquAcc
00894 #undef SaveSquAcc
00895 
00896 #ifdef CRYPTOPP_X86ASM_AVAILABLE
00897 
00898 // ************** x86 feature detection ***************
00899 
00900 static bool s_sse2Enabled = true;
00901 
00902 static void CpuId(word32 input, word32 *output)
00903 {
00904 #ifdef __GNUC__
00905         __asm__
00906         (
00907                 // save ebx in case -fPIC is being used
00908                 "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
00909                 : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
00910                 : "a" (input)
00911         );
00912 #else
00913         __asm
00914         {
00915                 mov eax, input
00916                 cpuid
00917                 mov edi, output
00918                 mov [edi], eax
00919                 mov [edi+4], ebx
00920                 mov [edi+8], ecx
00921                 mov [edi+12], edx
00922         }
00923 #endif
00924 }
00925 
00926 #ifdef SSE2_INTRINSICS_AVAILABLE
00927 #ifndef _MSC_VER
00928 static jmp_buf s_env;
00929 static void SigIllHandler(int)
00930 {
00931         longjmp(s_env, 1);
00932 }
00933 #endif
00934 
00935 static bool HasSSE2()
00936 {
00937         if (!s_sse2Enabled)
00938                 return false;
00939 
00940         word32 cpuid[4];
00941         CpuId(1, cpuid);
00942         if ((cpuid[3] & (1 << 26)) == 0)
00943                 return false;
00944 
00945 #ifdef _MSC_VER
00946     __try
00947         {
00948         __asm xorpd xmm0, xmm0        // executing SSE2 instruction
00949         }
00950     __except (1)
00951         {
00952                 return false;
00953     }
00954         return true;
00955 #else
00956         typedef void (*SigHandler)(int);
00957 
00958         SigHandler oldHandler = signal(SIGILL, SigIllHandler);
00959         if (oldHandler == SIG_ERR)
00960                 return false;
00961 
00962         bool result = true;
00963         if (setjmp(s_env))
00964                 result = false;
00965         else
00966                 __asm __volatile ("xorps %xmm0, %xmm0");
00967 
00968         signal(SIGILL, oldHandler);
00969         return result;
00970 #endif
00971 }
00972 #endif
00973 
00974 static bool IsP4()
00975 {
00976         word32 cpuid[4];
00977 
00978         CpuId(0, cpuid);
00979         std::swap(cpuid[2], cpuid[3]);
00980         if (memcmp(cpuid+1, "GenuineIntel", 12) != 0)
00981                 return false;
00982 
00983         CpuId(1, cpuid);
00984         return ((cpuid[0] >> 8) & 0xf) == 0xf;
00985 }
00986 
00987 // ************** Pentium/P4 optimizations ***************
00988 
00989 class PentiumOptimized : public Portable
00990 {
00991 public:
00992         static int Add(word *C, const word *A, const word *B, size_t N);
00993         static int Subtract(word *C, const word *A, const word *B, size_t N);
00994         static void Multiply4(word *C, const word *A, const word *B);
00995         static void Multiply8(word *C, const word *A, const word *B);
00996         static void Multiply8Bottom(word *C, const word *A, const word *B);
00997 };
00998 
00999 class P4Optimized
01000 {
01001 public:
01002         static int Add(word *C, const word *A, const word *B, size_t N);
01003         static int Subtract(word *C, const word *A, const word *B, size_t N);
01004 #ifdef SSE2_INTRINSICS_AVAILABLE
01005         static void Multiply4(word *C, const word *A, const word *B);
01006         static void Multiply8(word *C, const word *A, const word *B);
01007         static void Multiply8Bottom(word *C, const word *A, const word *B);
01008 #endif
01009 };
01010 
01011 typedef int (* PAddSub)(word *C, const word *A, const word *B, size_t N);
01012 typedef void (* PMul)(word *C, const word *A, const word *B);
01013 
01014 static PAddSub s_pAdd, s_pSub;
01015 #ifdef SSE2_INTRINSICS_AVAILABLE
01016 static PMul s_pMul4, s_pMul8, s_pMul8B;
01017 #endif
01018 
01019 static void SetPentiumFunctionPointers()
01020 {
01021         if (IsP4())
01022         {
01023                 s_pAdd = &P4Optimized::Add;
01024                 s_pSub = &P4Optimized::Subtract;
01025         }
01026         else
01027         {
01028                 s_pAdd = &PentiumOptimized::Add;
01029                 s_pSub = &PentiumOptimized::Subtract;
01030         }
01031 
01032 #ifdef SSE2_INTRINSICS_AVAILABLE
01033         if (HasSSE2())
01034         {
01035                 s_pMul4 = &P4Optimized::Multiply4;
01036                 s_pMul8 = &P4Optimized::Multiply8;
01037                 s_pMul8B = &P4Optimized::Multiply8Bottom;
01038         }
01039         else
01040         {
01041                 s_pMul4 = &PentiumOptimized::Multiply4;
01042                 s_pMul8 = &PentiumOptimized::Multiply8;
01043                 s_pMul8B = &PentiumOptimized::Multiply8Bottom;
01044         }
01045 #endif
01046 }
01047 
01048 void DisableSSE2()
01049 {
01050         s_sse2Enabled = false;
01051         SetPentiumFunctionPointers();
01052 }
01053 
01054 class LowLevel : public PentiumOptimized
01055 {
01056 public:
01057         inline static int Add(word *C, const word *A, const word *B, size_t N)
01058                 {return s_pAdd(C, A, B, N);}
01059         inline static int Subtract(word *C, const word *A, const word *B, size_t N)
01060                 {return s_pSub(C, A, B, N);}
01061         inline static void Square4(word *R, const word *A)
01062                 {Multiply4(R, A, A);}
01063 #ifdef SSE2_INTRINSICS_AVAILABLE
01064         inline static void Multiply4(word *C, const word *A, const word *B)
01065                 {s_pMul4(C, A, B);}
01066         inline static void Multiply8(word *C, const word *A, const word *B)
01067                 {s_pMul8(C, A, B);}
01068         inline static void Multiply8Bottom(word *C, const word *A, const word *B)
01069                 {s_pMul8B(C, A, B);}
01070 #endif
01071 };
01072 
01073 // use some tricks to share assembly code between MSVC and GCC
01074 #ifdef _MSC_VER
01075         #define CRYPTOPP_NAKED __declspec(naked)
01076         #define AS1(x) __asm x
01077         #define AS2(x, y) __asm x, y
01078         #define AddPrologue \
01079                 __asm   push ebp \
01080                 __asm   push ebx \
01081                 __asm   push esi \
01082                 __asm   push edi \
01083                 __asm   mov             ecx, [esp+20] \
01084                 __asm   mov             edx, [esp+24] \
01085                 __asm   mov             ebx, [esp+28] \
01086                 __asm   mov             esi, [esp+32]
01087         #define AddEpilogue \
01088                 __asm   pop edi \
01089                 __asm   pop esi \
01090                 __asm   pop ebx \
01091                 __asm   pop ebp \
01092                 __asm   ret
01093         #define MulPrologue \
01094                 __asm   push ebp \
01095                 __asm   push ebx \
01096                 __asm   push esi \
01097                 __asm   push edi \
01098                 __asm   mov ecx, [esp+28] \
01099                 __asm   mov esi, [esp+24] \
01100                 __asm   push [esp+20]
01101         #define MulEpilogue \
01102                 __asm   add esp, 4 \
01103                 __asm   pop edi \
01104                 __asm   pop esi \
01105                 __asm   pop ebx \
01106                 __asm   pop ebp \
01107                 __asm   ret
01108 #else
01109         #define CRYPTOPP_NAKED
01110         #define AS1(x) #x ";"
01111         #define AS2(x, y) #x ", " #y ";"
01112         #define AddPrologue \
01113                 __asm__ __volatile__ \
01114                 ( \
01115                         "push %%ebx;"   /* save this manually, in case of -fPIC */ \
01116                         "mov %2, %%ebx;" \
01117                         ".intel_syntax noprefix;" \
01118                         "push ebp;"
01119         #define AddEpilogue \
01120                         "pop ebp;" \
01121                         ".att_syntax prefix;" \
01122                         "pop %%ebx;" \
01123                                         : \
01124                                         : "c" (C), "d" (A), "m" (B), "S" (N) \
01125                                         : "%edi", "memory", "cc" \
01126                 );
01127         #define MulPrologue \
01128                 __asm__ __volatile__ \
01129                 ( \
01130                         "push %%ebx;"   /* save this manually, in case of -fPIC */ \
01131                         "push %%ebp;" \
01132                         "push %0;" \
01133                         ".intel_syntax noprefix;"
01134         #define MulEpilogue \
01135                         "add esp, 4;" \
01136                         "pop ebp;" \
01137                         "pop ebx;" \
01138                         ".att_syntax prefix;" \
01139                         : \
01140                         : "rm" (Z), "S" (X), "c" (Y) \
01141                         : "%eax", "%edx", "%edi", "memory", "cc" \
01142                 );
01143 #endif
01144 
01145 CRYPTOPP_NAKED int PentiumOptimized::Add(word *C, const word *A, const word *B, size_t N)
01146 {
01147         AddPrologue
01148 
01149         // now: ebx = B, ecx = C, edx = A, esi = N
01150         AS2(    sub ecx, edx)   // hold the distance between C & A so we can add this to A to get C
01151         AS2(    xor eax, eax)   // clear eax
01152 
01153         AS2(    sub eax, esi)   // eax is a negative index from end of B
01154         AS2(    lea ebx, [ebx+4*esi])   // ebx is end of B
01155 
01156         AS2(    sar eax, 1)             // unit of eax is now dwords; this also clears the carry flag
01157         AS1(    jz      loopendAdd)             // if no dwords then nothing to do
01158 
01159         AS1(loopstartAdd:)
01160         AS2(    mov    esi,[edx])                       // load lower word of A
01161         AS2(    mov    ebp,[edx+4])                     // load higher word of A
01162 
01163         AS2(    mov    edi,[ebx+8*eax])         // load lower word of B
01164         AS2(    lea    edx,[edx+8])                     // advance A and C
01165 
01166         AS2(    adc    esi,edi)                         // add lower words
01167         AS2(    mov    edi,[ebx+8*eax+4])       // load higher word of B
01168 
01169         AS2(    adc    ebp,edi)                         // add higher words
01170         AS1(    inc    eax)                                     // advance B
01171 
01172         AS2(    mov    [edx+ecx-8],esi)         // store lower word result
01173         AS2(    mov    [edx+ecx-4],ebp)         // store higher word result
01174 
01175         AS1(    jnz    loopstartAdd)                    // loop until eax overflows and becomes zero
01176 
01177         AS1(loopendAdd:)
01178         AS2(    adc eax, 0)             // store carry into eax (return result register)
01179 
01180         AddEpilogue
01181 }
01182 
01183 CRYPTOPP_NAKED int PentiumOptimized::Subtract(word *C, const word *A, const word *B, size_t N)
01184 {
01185         AddPrologue
01186 
01187         // now: ebx = B, ecx = C, edx = A, esi = N
01188         AS2(    sub ecx, edx)   // hold the distance between C & A so we can add this to A to get C
01189         AS2(    xor eax, eax)   // clear eax
01190 
01191         AS2(    sub eax, esi)   // eax is a negative index from end of B
01192         AS2(    lea ebx, [ebx+4*esi])   // ebx is end of B
01193 
01194         AS2(    sar eax, 1)             // unit of eax is now dwords; this also clears the carry flag
01195         AS1(    jz      loopendSub)             // if no dwords then nothing to do
01196 
01197         AS1(loopstartSub:)
01198         AS2(    mov    esi,[edx])                       // load lower word of A
01199         AS2(    mov    ebp,[edx+4])                     // load higher word of A
01200 
01201         AS2(    mov    edi,[ebx+8*eax])         // load lower word of B
01202         AS2(    lea    edx,[edx+8])                     // advance A and C
01203 
01204         AS2(    sbb    esi,edi)                         // subtract lower words
01205         AS2(    mov    edi,[ebx+8*eax+4])       // load higher word of B
01206 
01207         AS2(    sbb    ebp,edi)                         // subtract higher words
01208         AS1(    inc    eax)                                     // advance B
01209 
01210         AS2(    mov    [edx+ecx-8],esi)         // store lower word result
01211         AS2(    mov    [edx+ecx-4],ebp)         // store higher word result
01212 
01213         AS1(    jnz    loopstartSub)                    // loop until eax overflows and becomes zero
01214 
01215         AS1(loopendSub:)
01216         AS2(    adc eax, 0)             // store carry into eax (return result register)
01217 
01218         AddEpilogue
01219 }
01220 
01221 // On Pentium 4, the adc and sbb instructions are very expensive, so avoid them.
01222 
01223 CRYPTOPP_NAKED int P4Optimized::Add(word *C, const word *A, const word *B, size_t N)
01224 {
01225         AddPrologue
01226 
01227         // now: ebx = B, ecx = C, edx = A, esi = N
01228         AS2(    xor             eax, eax)
01229         AS1(    neg             esi)
01230         AS1(    jz              loopendAddP4)           // if no dwords then nothing to do
01231 
01232         AS2(    mov             edi, [edx])
01233         AS2(    mov             ebp, [ebx])
01234         AS1(    jmp             carry1AddP4)
01235 
01236         AS1(loopstartAddP4:)
01237         AS2(    mov             edi, [edx+8])
01238         AS2(    add             ecx, 8)
01239         AS2(    add             edx, 8)
01240         AS2(    mov             ebp, [ebx])
01241         AS2(    add             edi, eax)
01242         AS1(    jc              carry1AddP4)
01243         AS2(    xor             eax, eax)
01244 
01245         AS1(carry1AddP4:)
01246         AS2(    add             edi, ebp)
01247         AS2(    mov             ebp, 1)
01248         AS2(    mov             [ecx], edi)
01249         AS2(    mov             edi, [edx+4])
01250         AS2(    cmovc   eax, ebp)
01251         AS2(    mov             ebp, [ebx+4])
01252         AS2(    add             ebx, 8)
01253         AS2(    add             edi, eax)
01254         AS1(    jc              carry2AddP4)
01255         AS2(    xor             eax, eax)
01256 
01257         AS1(carry2AddP4:)
01258         AS2(    add             edi, ebp)
01259         AS2(    mov             ebp, 1)
01260         AS2(    cmovc   eax, ebp)
01261         AS2(    mov             [ecx+4], edi)
01262         AS2(    add             esi, 2)
01263         AS1(    jnz             loopstartAddP4)
01264 
01265         AS1(loopendAddP4:)
01266 
01267         AddEpilogue
01268 }
01269 
01270 CRYPTOPP_NAKED int P4Optimized::Subtract(word *C, const word *A, const word *B, size_t N)
01271 {
01272         AddPrologue
01273 
01274         // now: ebx = B, ecx = C, edx = A, esi = N
01275         AS2(    xor             eax, eax)
01276         AS1(    neg             esi)
01277         AS1(    jz              loopendSubP4)           // if no dwords then nothing to do
01278 
01279         AS2(    mov             edi, [edx])
01280         AS2(    mov             ebp, [ebx])
01281         AS1(    jmp             carry1SubP4)
01282 
01283         AS1(loopstartSubP4:)
01284         AS2(    mov             edi, [edx+8])
01285         AS2(    add             edx, 8)
01286         AS2(    add             ecx, 8)
01287         AS2(    mov             ebp, [ebx])
01288         AS2(    sub             edi, eax)
01289         AS1(    jc              carry1SubP4)
01290         AS2(    xor             eax, eax)
01291 
01292         AS1(carry1SubP4:)
01293         AS2(    sub             edi, ebp)
01294         AS2(    mov             ebp, 1)
01295         AS2(    mov             [ecx], edi)
01296         AS2(    mov             edi, [edx+4])
01297         AS2(    cmovc   eax, ebp)
01298         AS2(    mov             ebp, [ebx+4])
01299         AS2(    add             ebx, 8)
01300         AS2(    sub             edi, eax)
01301         AS1(    jc              carry2SubP4)
01302         AS2(    xor             eax, eax)
01303 
01304         AS1(carry2SubP4:)
01305         AS2(    sub             edi, ebp)
01306         AS2(    mov             ebp, 1)
01307         AS2(    cmovc   eax, ebp)
01308         AS2(    mov             [ecx+4], edi)
01309         AS2(    add             esi, 2)
01310         AS1(    jnz             loopstartSubP4)
01311 
01312         AS1(loopendSubP4:)
01313 
01314         AddEpilogue
01315 }
01316 
01317 // multiply assembly code originally contributed by Leonard Janke
01318 
01319 #define MulStartup \
01320         AS2(xor ebp, ebp) \
01321         AS2(xor edi, edi) \
01322         AS2(xor ebx, ebx) 
01323 
01324 #define MulShiftCarry \
01325         AS2(mov ebp, edx) \
01326         AS2(mov edi, ebx) \
01327         AS2(xor ebx, ebx)
01328 
01329 #define MulAccumulateBottom(i,j) \
01330         AS2(mov eax, [ecx+4*j]) \
01331         AS2(imul eax, dword ptr [esi+4*i]) \
01332         AS2(add ebp, eax)
01333 
01334 #define MulAccumulate(i,j) \
01335         AS2(mov eax, [ecx+4*j]) \
01336         AS1(mul dword ptr [esi+4*i]) \
01337         AS2(add ebp, eax) \
01338         AS2(adc edi, edx) \
01339         AS2(adc bl, bh)
01340 
01341 #define MulStoreDigit(i)  \
01342         AS2(mov edx, edi) \
01343         AS2(mov edi, [esp]) \
01344         AS2(mov [edi+4*i], ebp)
01345 
01346 #define MulLastDiagonal(digits) \
01347         AS2(mov eax, [ecx+4*(digits-1)]) \
01348         AS1(mul dword ptr [esi+4*(digits-1)]) \
01349         AS2(add ebp, eax) \
01350         AS2(adc edx, edi) \
01351         AS2(mov edi, [esp]) \
01352         AS2(mov [edi+4*(2*digits-2)], ebp) \
01353         AS2(mov [edi+4*(2*digits-1)], edx)
01354 
01355 CRYPTOPP_NAKED void PentiumOptimized::Multiply4(word* Z, const word* X, const word* Y)
01356 {
01357         MulPrologue
01358         // now: [esp] = Z, esi = X, ecx = Y
01359         MulStartup
01360         MulAccumulate(0,0)
01361         MulStoreDigit(0)
01362         MulShiftCarry
01363 
01364         MulAccumulate(1,0)
01365         MulAccumulate(0,1)
01366         MulStoreDigit(1)
01367         MulShiftCarry
01368 
01369         MulAccumulate(2,0)
01370         MulAccumulate(1,1)
01371         MulAccumulate(0,2)
01372         MulStoreDigit(2)
01373         MulShiftCarry
01374 
01375         MulAccumulate(3,0)
01376         MulAccumulate(2,1)
01377         MulAccumulate(1,2)
01378         MulAccumulate(0,3)
01379         MulStoreDigit(3)
01380         MulShiftCarry
01381 
01382         MulAccumulate(3,1)
01383         MulAccumulate(2,2)
01384         MulAccumulate(1,3)
01385         MulStoreDigit(4)
01386         MulShiftCarry
01387 
01388         MulAccumulate(3,2)
01389         MulAccumulate(2,3)
01390         MulStoreDigit(5)
01391         MulShiftCarry
01392 
01393         MulLastDiagonal(4)
01394         MulEpilogue
01395 }
01396 
01397 CRYPTOPP_NAKED void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y)
01398 {
01399         MulPrologue
01400         // now: [esp] = Z, esi = X, ecx = Y
01401         MulStartup
01402         MulAccumulate(0,0)
01403         MulStoreDigit(0)
01404         MulShiftCarry
01405 
01406         MulAccumulate(1,0)
01407         MulAccumulate(0,1)
01408         MulStoreDigit(1)
01409         MulShiftCarry
01410 
01411         MulAccumulate(2,0)
01412         MulAccumulate(1,1)
01413         MulAccumulate(0,2)
01414         MulStoreDigit(2)
01415         MulShiftCarry
01416 
01417         MulAccumulate(3,0)
01418         MulAccumulate(2,1)
01419         MulAccumulate(1,2)
01420         MulAccumulate(0,3)
01421         MulStoreDigit(3)
01422         MulShiftCarry
01423 
01424         MulAccumulate(4,0)
01425         MulAccumulate(3,1)
01426         MulAccumulate(2,2)
01427         MulAccumulate(1,3)
01428         MulAccumulate(0,4)
01429         MulStoreDigit(4)
01430         MulShiftCarry
01431 
01432         MulAccumulate(5,0)
01433         MulAccumulate(4,1)
01434         MulAccumulate(3,2)
01435         MulAccumulate(2,3)
01436         MulAccumulate(1,4)
01437         MulAccumulate(0,5)
01438         MulStoreDigit(5)
01439         MulShiftCarry
01440 
01441         MulAccumulate(6,0)
01442         MulAccumulate(5,1)
01443         MulAccumulate(4,2)
01444         MulAccumulate(3,3)
01445         MulAccumulate(2,4)
01446         MulAccumulate(1,5)
01447         MulAccumulate(0,6)
01448         MulStoreDigit(6)
01449         MulShiftCarry
01450 
01451         MulAccumulate(7,0)
01452         MulAccumulate(6,1)
01453         MulAccumulate(5,2)
01454         MulAccumulate(4,3)
01455         MulAccumulate(3,4)
01456         MulAccumulate(2,5)
01457         MulAccumulate(1,6)
01458         MulAccumulate(0,7)
01459         MulStoreDigit(7)
01460         MulShiftCarry
01461 
01462         MulAccumulate(7,1)
01463         MulAccumulate(6,2)
01464         MulAccumulate(5,3)
01465         MulAccumulate(4,4)
01466         MulAccumulate(3,5)
01467         MulAccumulate(2,6)
01468         MulAccumulate(1,7)
01469         MulStoreDigit(8)
01470         MulShiftCarry
01471 
01472         MulAccumulate(7,2)
01473         MulAccumulate(6,3)
01474         MulAccumulate(5,4)
01475         MulAccumulate(4,5)
01476         MulAccumulate(3,6)
01477         MulAccumulate(2,7)
01478         MulStoreDigit(9)
01479         MulShiftCarry
01480 
01481         MulAccumulate(7,3)
01482         MulAccumulate(6,4)
01483         MulAccumulate(5,5)
01484         MulAccumulate(4,6)
01485         MulAccumulate(3,7)
01486         MulStoreDigit(10)
01487         MulShiftCarry
01488 
01489         MulAccumulate(7,4)
01490         MulAccumulate(6,5)
01491         MulAccumulate(5,6)
01492         MulAccumulate(4,7)
01493         MulStoreDigit(11)
01494         MulShiftCarry
01495 
01496         MulAccumulate(7,5)
01497         MulAccumulate(6,6)
01498         MulAccumulate(5,7)
01499         MulStoreDigit(12)
01500         MulShiftCarry
01501 
01502         MulAccumulate(7,6)
01503         MulAccumulate(6,7)
01504         MulStoreDigit(13)
01505         MulShiftCarry
01506 
01507         MulLastDiagonal(8)
01508         MulEpilogue
01509 }
01510 
01511 CRYPTOPP_NAKED void PentiumOptimized::Multiply8Bottom(word* Z, const word* X, const word* Y)
01512 {
01513         MulPrologue
01514         // now: [esp] = Z, esi = X, ecx = Y
01515         MulStartup
01516         MulAccumulate(0,0)
01517         MulStoreDigit(0)
01518         MulShiftCarry
01519 
01520         MulAccumulate(1,0)
01521         MulAccumulate(0,1)
01522         MulStoreDigit(1)
01523         MulShiftCarry
01524 
01525         MulAccumulate(2,0)
01526         MulAccumulate(1,1)
01527         MulAccumulate(0,2)
01528         MulStoreDigit(2)
01529         MulShiftCarry
01530 
01531         MulAccumulate(3,0)
01532         MulAccumulate(2,1)
01533         MulAccumulate(1,2)
01534         MulAccumulate(0,3)
01535         MulStoreDigit(3)
01536         MulShiftCarry
01537 
01538         MulAccumulate(4,0)
01539         MulAccumulate(3,1)
01540         MulAccumulate(2,2)
01541         MulAccumulate(1,3)
01542         MulAccumulate(0,4)
01543         MulStoreDigit(4)
01544         MulShiftCarry
01545 
01546         MulAccumulate(5,0)
01547         MulAccumulate(4,1)
01548         MulAccumulate(3,2)
01549         MulAccumulate(2,3)
01550         MulAccumulate(1,4)
01551         MulAccumulate(0,5)
01552         MulStoreDigit(5)
01553         MulShiftCarry
01554 
01555         MulAccumulate(6,0)
01556         MulAccumulate(5,1)
01557         MulAccumulate(4,2)
01558         MulAccumulate(3,3)
01559         MulAccumulate(2,4)
01560         MulAccumulate(1,5)
01561         MulAccumulate(0,6)
01562         MulStoreDigit(6)
01563         MulShiftCarry
01564 
01565         MulAccumulateBottom(7,0)
01566         MulAccumulateBottom(6,1)
01567         MulAccumulateBottom(5,2)
01568         MulAccumulateBottom(4,3)
01569         MulAccumulateBottom(3,4)
01570         MulAccumulateBottom(2,5)
01571         MulAccumulateBottom(1,6)
01572         MulAccumulateBottom(0,7)
01573         MulStoreDigit(7)
01574         MulEpilogue
01575 }
01576 
01577 #undef AS1
01578 #undef AS2
01579 
01580 #else   // not x86 - no processor specific code at this layer
01581 
01582 typedef Portable LowLevel;
01583 
01584 #endif
01585 
01586 #ifdef SSE2_INTRINSICS_AVAILABLE
01587 
01588 #ifdef __GNUC__
01589 #define CRYPTOPP_FASTCALL
01590 #else
01591 #define CRYPTOPP_FASTCALL __fastcall
01592 #endif
01593 
01594 static void CRYPTOPP_FASTCALL P4_Mul(__m128i *C, const __m128i *A, const __m128i *B)
01595 {
01596         __m128i a3210 = _mm_load_si128(A);
01597         __m128i b3210 = _mm_load_si128(B);
01598 
01599         __m128i sum;
01600 
01601         __m128i z = _mm_setzero_si128();
01602         __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210);
01603         C[0] = a2b2_a0b0;
01604 
01605         __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0));
01606         __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1));
01607         __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021);
01608         __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z);
01609         __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z);
01610         C[1] = _mm_add_epi64(a1b0, a0b1);
01611 
01612         __m128i a31 = _mm_srli_epi64(a3210, 32);
01613         __m128i b31 = _mm_srli_epi64(b3210, 32);
01614         __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31);
01615         C[6] = a3b3_a1b1;
01616 
01617         __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z);
01618         __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2));
01619         __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012);
01620         __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z);
01621         __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z);
01622         sum = _mm_add_epi64(a1b1, a0b2);
01623         C[2] = _mm_add_epi64(sum, a2b0);
01624 
01625         __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1));
01626         __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3));
01627         __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012);
01628         __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103);
01629         __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z);
01630         __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z);
01631         __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z);
01632         __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z);
01633         __m128i sum1 = _mm_add_epi64(a3b0, a1b2);
01634         sum = _mm_add_epi64(a2b1, a0b3);
01635         C[3] = _mm_add_epi64(sum, sum1);
01636 
01637         __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103);
01638         __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z);
01639         __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z);
01640         __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z);
01641         sum = _mm_add_epi64(a2b2, a3b1);
01642         C[4] = _mm_add_epi64(sum, a1b3);
01643 
01644         __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2));
01645         __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3));
01646         __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203);
01647         __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z);
01648         __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z);
01649         C[5] = _mm_add_epi64(a3b2, a2b3);
01650 }
01651 
01652 void P4Optimized::Multiply4(word *C, const word *A, const word *B)
01653 {
01654         __m128i temp[7];
01655         const word *w = (word *)temp;
01656         const __m64 *mw = (__m64 *)w;
01657 
01658         P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01659 
01660         C[0] = w[0];
01661 
01662         __m64 s1, s2;
01663 
01664         __m64 w1 = _mm_cvtsi32_si64(w[1]);
01665         __m64 w4 = mw[2];
01666         __m64 w6 = mw[3];
01667         __m64 w8 = mw[4];
01668         __m64 w10 = mw[5];
01669         __m64 w12 = mw[6];
01670         __m64 w14 = mw[7];
01671         __m64 w16 = mw[8];
01672         __m64 w18 = mw[9];
01673         __m64 w20 = mw[10];
01674         __m64 w22 = mw[11];
01675         __m64 w26 = _mm_cvtsi32_si64(w[26]);
01676 
01677         s1 = _mm_add_si64(w1, w4);
01678         C[1] = _mm_cvtsi64_si32(s1);
01679         s1 = _mm_srli_si64(s1, 32);
01680 
01681         s2 = _mm_add_si64(w6, w8);
01682         s1 = _mm_add_si64(s1, s2);
01683         C[2] = _mm_cvtsi64_si32(s1);
01684         s1 = _mm_srli_si64(s1, 32);
01685 
01686         s2 = _mm_add_si64(w10, w12);
01687         s1 = _mm_add_si64(s1, s2);
01688         C[3] = _mm_cvtsi64_si32(s1);
01689         s1 = _mm_srli_si64(s1, 32);
01690 
01691         s2 = _mm_add_si64(w14, w16);
01692         s1 = _mm_add_si64(s1, s2);
01693         C[4] = _mm_cvtsi64_si32(s1);
01694         s1 = _mm_srli_si64(s1, 32);
01695 
01696         s2 = _mm_add_si64(w18, w20);
01697         s1 = _mm_add_si64(s1, s2);
01698         C[5] = _mm_cvtsi64_si32(s1);
01699         s1 = _mm_srli_si64(s1, 32);
01700 
01701         s2 = _mm_add_si64(w22, w26);
01702         s1 = _mm_add_si64(s1, s2);
01703         C[6] = _mm_cvtsi64_si32(s1);
01704         s1 = _mm_srli_si64(s1, 32);
01705 
01706         C[7] = _mm_cvtsi64_si32(s1) + w[27];
01707         _mm_empty();
01708 }
01709 
01710 void P4Optimized::Multiply8(word *C, const word *A, const word *B)
01711 {
01712         __m128i temp[28];
01713         const word *w = (word *)temp;
01714         const __m64 *mw = (__m64 *)w;
01715         const word *x = (word *)temp+7*4;
01716         const __m64 *mx = (__m64 *)x;
01717         const word *y = (word *)temp+7*4*2;
01718         const __m64 *my = (__m64 *)y;
01719         const word *z = (word *)temp+7*4*3;
01720         const __m64 *mz = (__m64 *)z;
01721 
01722         P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01723 
01724         P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01725 
01726         P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01727 
01728         P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1);
01729 
01730         C[0] = w[0];
01731 
01732         __m64 s1, s2, s3, s4;
01733 
01734         __m64 w1 = _mm_cvtsi32_si64(w[1]);
01735         __m64 w4 = mw[2];
01736         __m64 w6 = mw[3];
01737         __m64 w8 = mw[4];
01738         __m64 w10 = mw[5];
01739         __m64 w12 = mw[6];
01740         __m64 w14 = mw[7];
01741         __m64 w16 = mw[8];
01742         __m64 w18 = mw[9];
01743         __m64 w20 = mw[10];
01744         __m64 w22 = mw[11];
01745         __m64 w26 = _mm_cvtsi32_si64(w[26]);
01746         __m64 w27 = _mm_cvtsi32_si64(w[27]);
01747 
01748         __m64 x0 = _mm_cvtsi32_si64(x[0]);
01749         __m64 x1 = _mm_cvtsi32_si64(x[1]);
01750         __m64 x4 = mx[2];
01751         __m64 x6 = mx[3];
01752         __m64 x8 = mx[4];
01753         __m64 x10 = mx[5];
01754         __m64 x12 = mx[6];
01755         __m64 x14 = mx[7];
01756         __m64 x16 = mx[8];
01757         __m64 x18 = mx[9];
01758         __m64 x20 = mx[10];
01759         __m64 x22 = mx[11];
01760         __m64 x26 = _mm_cvtsi32_si64(x[26]);
01761         __m64 x27 = _mm_cvtsi32_si64(x[27]);
01762 
01763         __m64 y0 = _mm_cvtsi32_si64(y[0]);
01764         __m64 y1 = _mm_cvtsi32_si64(y[1]);
01765         __m64 y4 = my[2];
01766         __m64 y6 = my[3];
01767         __m64 y8 = my[4];
01768         __m64 y10 = my[5];
01769         __m64 y12 = my[6];
01770         __m64 y14 = my[7];
01771         __m64 y16 = my[8];
01772         __m64 y18 = my[9];
01773         __m64 y20 = my[10];
01774         __m64 y22 = my[11];
01775         __m64 y26 = _mm_cvtsi32_si64(y[26]);
01776         __m64 y27 = _mm_cvtsi32_si64(y[27]);
01777 
01778         __m64 z0 = _mm_cvtsi32_si64(z[0]);
01779         __m64 z1 = _mm_cvtsi32_si64(z[1]);
01780         __m64 z4 = mz[2];
01781         __m64 z6 = mz[3];
01782         __m64 z8 = mz[4];
01783         __m64 z10 = mz[5];
01784         __m64 z12 = mz[6];
01785         __m64 z14 = mz[7];
01786         __m64 z16 = mz[8];
01787         __m64 z18 = mz[9];
01788         __m64 z20 = mz[10];
01789         __m64 z22 = mz[11];
01790         __m64 z26 = _mm_cvtsi32_si64(z[26]);
01791 
01792         s1 = _mm_add_si64(w1, w4);
01793         C[1] = _mm_cvtsi64_si32(s1);
01794         s1 = _mm_srli_si64(s1, 32);
01795 
01796         s2 = _mm_add_si64(w6, w8);
01797         s1 = _mm_add_si64(s1, s2);
01798         C[2] = _mm_cvtsi64_si32(s1);
01799         s1 = _mm_srli_si64(s1, 32);
01800 
01801         s2 = _mm_add_si64(w10, w12);
01802         s1 = _mm_add_si64(s1, s2);
01803         C[3] = _mm_cvtsi64_si32(s1);
01804         s1 = _mm_srli_si64(s1, 32);
01805 
01806         s3 = _mm_add_si64(x0, y0);
01807         s2 = _mm_add_si64(w14, w16);
01808         s1 = _mm_add_si64(s1, s3);
01809         s1 = _mm_add_si64(s1, s2);
01810         C[4] = _mm_cvtsi64_si32(s1);
01811         s1 = _mm_srli_si64(s1, 32);
01812 
01813         s3 = _mm_add_si64(x1, y1);
01814         s4 = _mm_add_si64(x4, y4);
01815         s1 = _mm_add_si64(s1, w18);
01816         s3 = _mm_add_si64(s3, s4);
01817         s1 = _mm_add_si64(s1, w20);
01818         s1 = _mm_add_si64(s1, s3);
01819         C[5] = _mm_cvtsi64_si32(s1);
01820         s1 = _mm_srli_si64(s1, 32);
01821 
01822         s3 = _mm_add_si64(x6, y6);
01823         s4 = _mm_add_si64(x8, y8);
01824         s1 = _mm_add_si64(s1, w22);
01825         s3 = _mm_add_si64(s3, s4);
01826         s1 = _mm_add_si64(s1, w26);
01827         s1 = _mm_add_si64(s1, s3);
01828         C[6] = _mm_cvtsi64_si32(s1);
01829         s1 = _mm_srli_si64(s1, 32);
01830 
01831         s3 = _mm_add_si64(x10, y10);
01832         s4 = _mm_add_si64(x12, y12);
01833         s1 = _mm_add_si64(s1, w27);
01834         s3 = _mm_add_si64(s3, s4);
01835         s1 = _mm_add_si64(s1, s3);
01836         C[7] = _mm_cvtsi64_si32(s1);
01837         s1 = _mm_srli_si64(s1, 32);
01838 
01839         s3 = _mm_add_si64(x14, y14);
01840         s4 = _mm_add_si64(x16, y16);
01841         s1 = _mm_add_si64(s1, z0);
01842         s3 = _mm_add_si64(s3, s4);
01843         s1 = _mm_add_si64(s1, s3);
01844         C[8] = _mm_cvtsi64_si32(s1);
01845         s1 = _mm_srli_si64(s1, 32);
01846 
01847         s3 = _mm_add_si64(x18, y18);
01848         s4 = _mm_add_si64(x20, y20);
01849         s1 = _mm_add_si64(s1, z1);
01850         s3 = _mm_add_si64(s3, s4);
01851         s1 = _mm_add_si64(s1, z4);
01852         s1 = _mm_add_si64(s1, s3);
01853         C[9] = _mm_cvtsi64_si32(s1);
01854         s1 = _mm_srli_si64(s1, 32);
01855 
01856         s3 = _mm_add_si64(x22, y22);
01857         s4 = _mm_add_si64(x26, y26);
01858         s1 = _mm_add_si64(s1, z6);
01859         s3 = _mm_add_si64(s3, s4);
01860         s1 = _mm_add_si64(s1, z8);
01861         s1 = _mm_add_si64(s1, s3);
01862         C[10] = _mm_cvtsi64_si32(s1);
01863         s1 = _mm_srli_si64(s1, 32);
01864 
01865         s3 = _mm_add_si64(x27, y27);
01866         s1 = _mm_add_si64(s1, z10);
01867         s1 = _mm_add_si64(s1, z12);
01868         s1 = _mm_add_si64(s1, s3);
01869         C[11] = _mm_cvtsi64_si32(s1);
01870         s1 = _mm_srli_si64(s1, 32);
01871 
01872         s3 = _mm_add_si64(z14, z16);
01873         s1 = _mm_add_si64(s1, s3);
01874         C[12] = _mm_cvtsi64_si32(s1);
01875         s1 = _mm_srli_si64(s1, 32);
01876 
01877         s3 = _mm_add_si64(z18, z20);
01878         s1 = _mm_add_si64(s1, s3);
01879         C[13] = _mm_cvtsi64_si32(s1);
01880         s1 = _mm_srli_si64(s1, 32);
01881 
01882         s3 = _mm_add_si64(z22, z26);
01883         s1 = _mm_add_si64(s1, s3);
01884         C[14] = _mm_cvtsi64_si32(s1);
01885         s1 = _mm_srli_si64(s1, 32);
01886 
01887         C[15] = z[27] + _mm_cvtsi64_si32(s1);
01888         _mm_empty();
01889 }
01890 
01891 void P4Optimized::Multiply8Bottom(word *C, const word *A, const word *B)
01892 {
01893         __m128i temp[21];
01894         const word *w = (word *)temp;
01895         const __m64 *mw = (__m64 *)w;
01896         const word *x = (word *)temp+7*4;
01897         const __m64 *mx = (__m64 *)x;
01898         const word *y = (word *)temp+7*4*2;
01899         const __m64 *my = (__m64 *)y;
01900 
01901         P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01902 
01903         P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01904 
01905         P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01906 
01907         C[0] = w[0];
01908 
01909         __m64 s1, s2, s3, s4;
01910 
01911         __m64 w1 = _mm_cvtsi32_si64(w[1]);
01912         __m64 w4 = mw[2];
01913         __m64 w6 = mw[3];
01914         __m64 w8 = mw[4];
01915         __m64 w10 = mw[5];
01916         __m64 w12 = mw[6];
01917         __m64 w14 = mw[7];
01918         __m64 w16 = mw[8];
01919         __m64 w18 = mw[9];
01920         __m64 w20 = mw[10];
01921         __m64 w22 = mw[11];
01922         __m64 w26 = _mm_cvtsi32_si64(w[26]);
01923 
01924         __m64 x0 = _mm_cvtsi32_si64(x[0]);
01925         __m64 x1 = _mm_cvtsi32_si64(x[1]);
01926         __m64 x4 = mx[2];
01927         __m64 x6 = mx[3];
01928         __m64 x8 = mx[4];
01929 
01930         __m64 y0 = _mm_cvtsi32_si64(y[0]);
01931         __m64 y1 = _mm_cvtsi32_si64(y[1]);
01932         __m64 y4 = my[2];
01933         __m64 y6 = my[3];
01934         __m64 y8 = my[4];
01935 
01936         s1 = _mm_add_si64(w1, w4);
01937         C[1] = _mm_cvtsi64_si32(s1);
01938         s1 = _mm_srli_si64(s1, 32);
01939 
01940         s2 = _mm_add_si64(w6, w8);
01941         s1 = _mm_add_si64(s1, s2);
01942         C[2] = _mm_cvtsi64_si32(s1);
01943         s1 = _mm_srli_si64(s1, 32);
01944 
01945         s2 = _mm_add_si64(w10, w12);
01946         s1 = _mm_add_si64(s1, s2);
01947         C[3] = _mm_cvtsi64_si32(s1);
01948         s1 = _mm_srli_si64(s1, 32);
01949 
01950         s3 = _mm_add_si64(x0, y0);
01951         s2 = _mm_add_si64(w14, w16);
01952         s1 = _mm_add_si64(s1, s3);
01953         s1 = _mm_add_si64(s1, s2);
01954         C[4] = _mm_cvtsi64_si32(s1);
01955         s1 = _mm_srli_si64(s1, 32);
01956 
01957         s3 = _mm_add_si64(x1, y1);
01958         s4 = _mm_add_si64(x4, y4);
01959         s1 = _mm_add_si64(s1, w18);
01960         s3 = _mm_add_si64(s3, s4);
01961         s1 = _mm_add_si64(s1, w20);
01962         s1 = _mm_add_si64(s1, s3);
01963         C[5] = _mm_cvtsi64_si32(s1);
01964         s1 = _mm_srli_si64(s1, 32);
01965 
01966         s3 = _mm_add_si64(x6, y6);
01967         s4 = _mm_add_si64(x8, y8);
01968         s1 = _mm_add_si64(s1, w22);
01969         s3 = _mm_add_si64(s3, s4);
01970         s1 = _mm_add_si64(s1, w26);
01971         s1 = _mm_add_si64(s1, s3);
01972         C[6] = _mm_cvtsi64_si32(s1);
01973         s1 = _mm_srli_si64(s1, 32);
01974 
01975         C[7] = _mm_cvtsi64_si32(s1) + w[27] + x[10] + y[10] + x[12] + y[12];
01976         _mm_empty();
01977 }
01978 
01979 #endif  // #ifdef SSE2_INTRINSICS_AVAILABLE
01980 
01981 // ********************************************************
01982 
01983 #define A0              A
01984 #define A1              (A+N2)
01985 #define B0              B
01986 #define B1              (B+N2)
01987 
01988 #define T0              T
01989 #define T1              (T+N2)
01990 #define T2              (T+N)
01991 #define T3              (T+N+N2)
01992 
01993 #define R0              R
01994 #define R1              (R+N2)
01995 #define R2              (R+N)
01996 #define R3              (R+N+N2)
01997 
01998 // R[2*N] - result = A*B
01999 // T[2*N] - temporary work space
02000 // A[N] --- multiplier
02001 // B[N] --- multiplicant
02002 
02003 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02004 {
02005         assert(N>=2 && N%2==0);
02006 
02007         if (LowLevel::MultiplyRecursionLimit() >= 8 && N==8)
02008                 LowLevel::Multiply8(R, A, B);
02009         else if (LowLevel::MultiplyRecursionLimit() >= 4 && N==4)
02010                 LowLevel::Multiply4(R, A, B);
02011         else if (N==2)
02012                 LowLevel::Multiply2(R, A, B);
02013         else
02014         {
02015                 const size_t N2 = N/2;
02016                 int carry;
02017 
02018                 int aComp = Compare(A0, A1, N2);
02019                 int bComp = Compare(B0, B1, N2);
02020 
02021                 switch (2*aComp + aComp + bComp)
02022                 {
02023                 case -4:
02024                         LowLevel::Subtract(R0, A1, A0, N2);
02025                         LowLevel::Subtract(R1, B0, B1, N2);
02026                         RecursiveMultiply(T0, T2, R0, R1, N2);
02027                         LowLevel::Subtract(T1, T1, R0, N2);
02028                         carry = -1;
02029                         break;
02030                 case -2:
02031                         LowLevel::Subtract(R0, A1, A0, N2);
02032                         LowLevel::Subtract(R1, B0, B1, N2);
02033                         RecursiveMultiply(T0, T2, R0, R1, N2);
02034                         carry = 0;
02035                         break;
02036                 case 2:
02037                         LowLevel::Subtract(R0, A0, A1, N2);
02038                         LowLevel::Subtract(R1, B1, B0, N2);
02039                         RecursiveMultiply(T0, T2, R0, R1, N2);
02040                         carry = 0;
02041                         break;
02042                 case 4:
02043                         LowLevel::Subtract(R0, A1, A0, N2);
02044                         LowLevel::Subtract(R1, B0, B1, N2);
02045                         RecursiveMultiply(T0, T2, R0, R1, N2);
02046                         LowLevel::Subtract(T1, T1, R1, N2);
02047                         carry = -1;
02048                         break;
02049                 default:
02050                         SetWords(T0, 0, N);
02051                         carry = 0;
02052                 }
02053 
02054                 RecursiveMultiply(R0, T2, A0, B0, N2);
02055                 RecursiveMultiply(R2, T2, A1, B1, N2);
02056 
02057                 // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
02058 
02059                 carry += LowLevel::Add(T0, T0, R0, N);
02060                 carry += LowLevel::Add(T0, T0, R2, N);
02061                 carry += LowLevel::Add(R1, R1, T0, N);
02062 
02063                 assert (carry >= 0 && carry <= 2);
02064                 Increment(R3, N2, carry);
02065         }
02066 }
02067 
02068 // R[2*N] - result = A*A
02069 // T[2*N] - temporary work space
02070 // A[N] --- number to be squared
02071 
02072 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02073 {
02074         assert(N && N%2==0);
02075         if (LowLevel::SquareRecursionLimit() >= 8 && N==8)
02076                 LowLevel::Square8(R, A);
02077         if (LowLevel::SquareRecursionLimit() >= 4 && N==4)
02078                 LowLevel::Square4(R, A);
02079         else if (N==2)
02080                 LowLevel::Square2(R, A);
02081         else
02082         {
02083                 const size_t N2 = N/2;
02084 
02085                 RecursiveSquare(R0, T2, A0, N2);
02086                 RecursiveSquare(R2, T2, A1, N2);
02087                 RecursiveMultiply(T0, T2, A0, A1, N2);
02088 
02089                 int carry = LowLevel::Add(R1, R1, T0, N);
02090                 carry += LowLevel::Add(R1, R1, T0, N);
02091                 Increment(R3, N2, carry);
02092         }
02093 }
02094 
02095 // R[N] - bottom half of A*B
02096 // T[N] - temporary work space
02097 // A[N] - multiplier
02098 // B[N] - multiplicant
02099 
02100 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02101 {
02102         assert(N>=2 && N%2==0);
02103         if (LowLevel::MultiplyBottomRecursionLimit() >= 8 && N==8)
02104                 LowLevel::Multiply8Bottom(R, A, B);
02105         else if (LowLevel::MultiplyBottomRecursionLimit() >= 4 && N==4)
02106                 LowLevel::Multiply4Bottom(R, A, B);
02107         else if (N==2)
02108                 LowLevel::Multiply2Bottom(R, A, B);
02109         else
02110         {
02111                 const size_t N2 = N/2;
02112 
02113                 RecursiveMultiply(R, T, A0, B0, N2);
02114                 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02115                 LowLevel::Add(R1, R1, T0, N2);
02116                 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02117                 LowLevel::Add(R1, R1, T0, N2);
02118         }
02119 }
02120 
02121 // R[N] --- upper half of A*B
02122 // T[2*N] - temporary work space
02123 // L[N] --- lower half of A*B
02124 // A[N] --- multiplier
02125 // B[N] --- multiplicant
02126 
02127 void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02128 {
02129         assert(N>=2 && N%2==0);
02130 
02131         if (N==4)
02132         {
02133                 LowLevel::Multiply4(T, A, B);
02134                 memcpy(R, T+4, 4*WORD_SIZE);
02135         }
02136         else if (N==2)
02137         {
02138                 LowLevel::Multiply2(T, A, B);
02139                 memcpy(R, T+2, 2*WORD_SIZE);
02140         }
02141         else
02142         {
02143                 const size_t N2 = N/2;
02144                 int carry;
02145 
02146                 int aComp = Compare(A0, A1, N2);
02147                 int bComp = Compare(B0, B1, N2);
02148 
02149                 switch (2*aComp + aComp + bComp)
02150                 {
02151                 case -4:
02152                         LowLevel::Subtract(R0, A1, A0, N2);
02153                         LowLevel::Subtract(R1, B0, B1, N2);
02154                         RecursiveMultiply(T0, T2, R0, R1, N2);
02155                         LowLevel::Subtract(T1, T1, R0, N2);
02156                         carry = -1;
02157                         break;
02158                 case -2:
02159                         LowLevel::Subtract(R0, A1, A0, N2);
02160                         LowLevel::Subtract(R1, B0, B1, N2);
02161                         RecursiveMultiply(T0, T2, R0, R1, N2);
02162                         carry = 0;
02163                         break;
02164                 case 2:
02165                         LowLevel::Subtract(R0, A0, A1, N2);
02166                         LowLevel::Subtract(R1, B1, B0, N2);
02167                         RecursiveMultiply(T0, T2, R0, R1, N2);
02168                         carry = 0;
02169                         break;
02170                 case 4:
02171                         LowLevel::Subtract(R0, A1, A0, N2);
02172                         LowLevel::Subtract(R1, B0, B1, N2);
02173                         RecursiveMultiply(T0, T2, R0, R1, N2);
02174                         LowLevel::Subtract(T1, T1, R1, N2);
02175                         carry = -1;
02176                         break;
02177                 default:
02178                         SetWords(T0, 0, N);
02179                         carry = 0;
02180                 }
02181 
02182                 RecursiveMultiply(T2, R0, A1, B1, N2);
02183 
02184                 // now T[01] holds (A1-A0)*(B0-B1), T[23] holds A1*B1
02185 
02186                 int c2 = LowLevel::Subtract(R0, L+N2, L, N2);
02187                 c2 += LowLevel::Subtract(R0, R0, T0, N2);
02188                 int t = (Compare(R0, T2, N2) == -1);
02189 
02190                 carry += t;
02191                 carry += Increment(R0, N2, c2+t);
02192                 carry += LowLevel::Add(R0, R0, T1, N2);
02193                 carry += LowLevel::Add(R0, R0, T3, N2);
02194                 assert (carry >= 0 && carry <= 2);
02195 
02196                 CopyWords(R1, T3, N2);
02197                 Increment(R1, N2, carry);
02198         }
02199 }
02200 
02201 inline int Add(word *C, const word *A, const word *B, size_t N)
02202 {
02203         return LowLevel::Add(C, A, B, N);
02204 }
02205 
02206 inline int Subtract(word *C, const word *A, const word *B, size_t N)
02207 {
02208         return LowLevel::Subtract(C, A, B, N);
02209 }
02210 
02211 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02212 {
02213         RecursiveMultiply(R, T, A, B, N);
02214 }
02215 
02216 inline void Square(word *R, word *T, const word *A, size_t N)
02217 {
02218         RecursiveSquare(R, T, A, N);
02219 }
02220 
02221 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02222 {
02223         RecursiveMultiplyBottom(R, T, A, B, N);
02224 }
02225 
02226 inline void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02227 {
02228         RecursiveMultiplyTop(R, T, L, A, B, N);
02229 }
02230 
02231 static word LinearMultiply(word *C, const word *A, word B, size_t N)
02232 {
02233         word carry=0;
02234         for(unsigned i=0; i<N; i++)
02235         {
02236                 DWord p = DWord::MultiplyAndAdd(A[i], B, carry);
02237                 C[i] = p.GetLowHalf();
02238                 carry = p.GetHighHalf();
02239         }
02240         return carry;
02241 }
02242 
02243 // R[NA+NB] - result = A*B
02244 // T[NA+NB] - temporary work space
02245 // A[NA] ---- multiplier
02246 // B[NB] ---- multiplicant
02247 
02248 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02249 {
02250         if (NA == NB)
02251         {
02252                 if (A == B)
02253                         Square(R, T, A, NA);
02254                 else
02255                         Multiply(R, T, A, B, NA);
02256 
02257                 return;
02258         }
02259 
02260         if (NA > NB)
02261         {
02262                 std::swap(A, B);
02263                 std::swap(NA, NB);
02264         }
02265 
02266         assert(NB % NA == 0);
02267         assert((NB/NA)%2 == 0);         // NB is an even multiple of NA
02268 
02269         if (NA==2 && !A[1])
02270         {
02271                 switch (A[0])
02272                 {
02273                 case 0:
02274                         SetWords(R, 0, NB+2);
02275                         return;
02276                 case 1:
02277                         CopyWords(R, B, NB);
02278                         R[NB] = R[NB+1] = 0;
02279                         return;
02280                 default:
02281                         R[NB] = LinearMultiply(R, B, A[0], NB);
02282                         R[NB+1] = 0;
02283                         return;
02284                 }
02285         }
02286 
02287         Multiply(R, T, A, B, NA);
02288         CopyWords(T+2*NA, R+NA, NA);
02289 
02290         size_t i;
02291 
02292         for (i=2*NA; i<NB; i+=2*NA)
02293                 Multiply(T+NA+i, T, A, B+i, NA);
02294         for (i=NA; i<NB; i+=2*NA)
02295                 Multiply(R+i, T, A, B+i, NA);
02296 
02297         if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02298                 Increment(R+NB, NA);
02299 }
02300 
02301 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
02302 // T[3*N/2] - temporary work space
02303 // A[N] ----- an odd number as input
02304 
02305 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02306 {
02307         if (N==2)
02308         {
02309                 T[0] = AtomicInverseModPower2(A[0]);
02310                 T[1] = 0;
02311                 LowLevel::Multiply2Bottom(T+2, T, A);
02312                 TwosComplement(T+2, 2);
02313                 Increment(T+2, 2, 2);
02314                 LowLevel::Multiply2Bottom(R, T, T+2);
02315         }
02316         else
02317         {
02318                 const size_t N2 = N/2;
02319                 RecursiveInverseModPower2(R0, T0, A0, N2);
02320                 T0[0] = 1;
02321                 SetWords(T0+1, 0, N2-1);
02322                 MultiplyTop(R1, T1, T0, R0, A0, N2);
02323                 MultiplyBottom(T0, T1, R0, A1, N2);
02324                 Add(T0, R1, T0, N2);
02325                 TwosComplement(T0, N2);
02326                 MultiplyBottom(R1, T1, R0, T0, N2);
02327         }
02328 }
02329 
02330 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
02331 // T[3*N] - temporary work space
02332 // X[2*N] - number to be reduced
02333 // M[N] --- modulus
02334 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
02335 
02336 void MontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, size_t N)
02337 {
02338         MultiplyBottom(R, T, X, U, N);
02339         MultiplyTop(T, T+N, X, R, M, N);
02340         word borrow = Subtract(T, X+N, T, N);
02341         // defend against timing attack by doing this Add even when not needed
02342         word carry = Add(T+N, T, M, N);
02343         assert(carry || !borrow);
02344         CopyWords(R, T + (borrow ? N : 0), N);
02345 }
02346 
02347 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
02348 // T[2*N] - temporary work space
02349 // X[2*N] - number to be reduced
02350 // M[N] --- modulus
02351 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
02352 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
02353 
02354 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02355 {
02356         assert(N%2==0 && N>=4);
02357 
02358 #define M0              M
02359 #define M1              (M+N2)
02360 #define V0              V
02361 #define V1              (V+N2)
02362 
02363 #define X0              X
02364 #define X1              (X+N2)
02365 #define X2              (X+N)
02366 #define X3              (X+N+N2)
02367 
02368         const size_t N2 = N/2;
02369         Multiply(T0, T2, V0, X3, N2);
02370         int c2 = Add(T0, T0, X0, N);
02371         MultiplyBottom(T3, T2, T0, U, N2);
02372         MultiplyTop(T2, R, T0, T3, M0, N2);
02373         c2 -= Subtract(T2, T1, T2, N2);
02374         Multiply(T0, R, T3, M1, N2);
02375         c2 -= Subtract(T0, T2, T0, N2);
02376         int c3 = -(int)Subtract(T1, X2, T1, N2);
02377         Multiply(R0, T2, V1, X3, N2);
02378         c3 += Add(R, R, T, N);
02379 
02380         if (c2>0)
02381                 c3 += Increment(R1, N2);
02382         else if (c2<0)
02383                 c3 -= Decrement(R1, N2, -c2);
02384 
02385         assert(c3>=-1 && c3<=1);
02386         if (c3>0)
02387                 Subtract(R, R, M, N);
02388         else if (c3<0)
02389                 Add(R, R, M, N);
02390 
02391 #undef M0
02392 #undef M1
02393 #undef V0
02394 #undef V1
02395 
02396 #undef X0
02397 #undef X1
02398 #undef X2
02399 #undef X3
02400 }
02401 
02402 #undef A0
02403 #undef A1
02404 #undef B0
02405 #undef B1
02406 
02407 #undef T0
02408 #undef T1
02409 #undef T2
02410 #undef T3
02411 
02412 #undef R0
02413 #undef R1
02414 #undef R2
02415 #undef R3
02416 
02417 /*
02418 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
02419 static word SubatomicDivide(word *A, word B0, word B1)
02420 {
02421         // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
02422         assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
02423 
02424         // estimate the quotient: do a 2 word by 1 word divide
02425         word Q;
02426         if (B1+1 == 0)
02427                 Q = A[2];
02428         else
02429                 Q = DWord(A[1], A[2]).DividedBy(B1+1);
02430 
02431         // now subtract Q*B from A
02432         DWord p = DWord::Multiply(B0, Q);
02433         DWord u = (DWord) A[0] - p.GetLowHalf();
02434         A[0] = u.GetLowHalf();
02435         u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
02436         A[1] = u.GetLowHalf();
02437         A[2] += u.GetHighHalf();
02438 
02439         // Q <= actual quotient, so fix it
02440         while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
02441         {
02442                 u = (DWord) A[0] - B0;
02443                 A[0] = u.GetLowHalf();
02444                 u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
02445                 A[1] = u.GetLowHalf();
02446                 A[2] += u.GetHighHalf();
02447                 Q++;
02448                 assert(Q);      // shouldn't overflow
02449         }
02450 
02451         return Q;
02452 }
02453 
02454 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
02455 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02456 {
02457         if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
02458         {
02459                 Q[0] = A[2];
02460                 Q[1] = A[3];
02461         }
02462         else
02463         {
02464                 word T[4];
02465                 T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
02466                 Q[1] = SubatomicDivide(T+1, B[0], B[1]);
02467                 Q[0] = SubatomicDivide(T, B[0], B[1]);
02468 
02469 #ifndef NDEBUG
02470                 // multiply quotient and divisor and add remainder, make sure it equals dividend
02471                 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02472                 word P[4];
02473                 LowLevel::Multiply2(P, Q, B);
02474                 Add(P, P, T, 4);
02475                 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02476 #endif
02477         }
02478 }
02479 */
02480 
02481 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02482 {
02483         word T[4];
02484         DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02485         Q[0] = q.GetLowHalf();
02486         Q[1] = q.GetHighHalf();
02487 
02488 #ifndef NDEBUG
02489         if (B[0] || B[1])
02490         {
02491                 // multiply quotient and divisor and add remainder, make sure it equals dividend
02492                 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02493                 word P[4];
02494                 Portable::Multiply2(P, Q, B);
02495                 Add(P, P, T, 4);
02496                 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02497         }
02498 #endif
02499 }
02500 
02501 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
02502 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02503 {
02504         assert(N && N%2==0);
02505 
02506         if (Q[1])
02507         {
02508                 T[N] = T[N+1] = 0;
02509                 unsigned i;
02510                 for (i=0; i<N; i+=4)
02511                         LowLevel::Multiply2(T+i, Q, B+i);
02512                 for (i=2; i<N; i+=4)
02513                         if (LowLevel::Multiply2Add(T+i, Q, B+i))
02514                                 T[i+5] += (++T[i+4]==0);
02515         }
02516         else
02517         {
02518                 T[N] = LinearMultiply(T, B, Q[0], N);
02519                 T[N+1] = 0;
02520         }
02521 
02522         word borrow = Subtract(R, R, T, N+2);
02523         assert(!borrow && !R[N+1]);
02524 
02525         while (R[N] || Compare(R, B, N) >= 0)
02526         {
02527                 R[N] -= Subtract(R, R, B, N);
02528                 Q[1] += (++Q[0]==0);
02529                 assert(Q[0] || Q[1]); // no overflow
02530         }
02531 }
02532 
02533 // R[NB] -------- remainder = A%B
02534 // Q[NA-NB+2] --- quotient      = A/B
02535 // T[NA+2*NB+4] - temp work space
02536 // A[NA] -------- dividend
02537 // B[NB] -------- divisor
02538 
02539 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02540 {
02541         assert(NA && NB && NA%2==0 && NB%2==0);
02542         assert(B[NB-1] || B[NB-2]);
02543         assert(NB <= NA);
02544 
02545         // set up temporary work space
02546         word *const TA=T;
02547         word *const TB=T+NA+2;
02548         word *const TP=T+NA+2+NB;
02549 
02550         // copy B into TB and normalize it so that TB has highest bit set to 1
02551         unsigned shiftWords = (B[NB-1]==0);
02552         TB[0] = TB[NB-1] = 0;
02553         CopyWords(TB+shiftWords, B, NB-shiftWords);
02554         unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02555         assert(shiftBits < WORD_BITS);
02556         ShiftWordsLeftByBits(TB, NB, shiftBits);
02557 
02558         // copy A into TA and normalize it
02559         TA[0] = TA[NA] = TA[NA+1] = 0;
02560         CopyWords(TA+shiftWords, A, NA);
02561         ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02562 
02563         if (TA[NA+1]==0 && TA[NA] <= 1)
02564         {
02565                 Q[NA-NB+1] = Q[NA-NB] = 0;
02566                 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02567                 {
02568                         TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02569                         ++Q[NA-NB];
02570                 }
02571         }
02572         else
02573         {
02574                 NA+=2;
02575                 assert(Compare(TA+NA-NB, TB, NB) < 0);
02576         }
02577 
02578         word BT[2];
02579         BT[0] = TB[NB-2] + 1;
02580         BT[1] = TB[NB-1] + (BT[0]==0);
02581 
02582         // start reducing TA mod TB, 2 words at a time
02583         for (size_t i=NA-2; i>=NB; i-=2)
02584         {
02585                 AtomicDivide(Q+i-NB, TA+i-2, BT);
02586                 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02587         }
02588 
02589         // copy TA into R, and denormalize it
02590         CopyWords(R, TA+shiftWords, NB);
02591         ShiftWordsRightByBits(R, NB, shiftBits);
02592 }
02593 
02594 static inline size_t EvenWordCount(const word *X, size_t N)
02595 {
02596         while (N && X[N-2]==0 && X[N-1]==0)
02597                 N-=2;
02598         return N;
02599 }
02600 
02601 // return k
02602 // R[N] --- result = A^(-1) * 2^k mod M
02603 // T[4*N] - temporary work space
02604 // A[NA] -- number to take inverse of
02605 // M[N] --- modulus
02606 
02607 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02608 {
02609         assert(NA<=N && N && N%2==0);
02610 
02611         word *b = T;
02612         word *c = T+N;
02613         word *f = T+2*N;
02614         word *g = T+3*N;
02615         size_t bcLen=2, fgLen=EvenWordCount(M, N);
02616         unsigned int k=0, s=0;
02617 
02618         SetWords(T, 0, 3*N);
02619         b[0]=1;
02620         CopyWords(f, A, NA);
02621         CopyWords(g, M, N);
02622 
02623         while (1)
02624         {
02625                 word t=f[0];
02626                 while (!t)
02627                 {
02628                         if (EvenWordCount(f, fgLen)==0)
02629                         {
02630                                 SetWords(R, 0, N);
02631                                 return 0;
02632                         }
02633 
02634                         ShiftWordsRightByWords(f, fgLen, 1);
02635                         if (c[bcLen-1]) bcLen+=2;
02636                         assert(bcLen <= N);
02637                         ShiftWordsLeftByWords(c, bcLen, 1);
02638                         k+=WORD_BITS;
02639                         t=f[0];
02640                 }
02641 
02642                 unsigned int i=0;
02643                 while (t%2 == 0)
02644                 {
02645                         t>>=1;
02646                         i++;
02647                 }
02648                 k+=i;
02649 
02650                 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02651                 {
02652                         if (s%2==0)
02653                                 CopyWords(R, b, N);
02654                         else
02655                                 Subtract(R, M, b, N);
02656                         return k;
02657                 }
02658 
02659                 ShiftWordsRightByBits(f, fgLen, i);
02660                 t=ShiftWordsLeftByBits(c, bcLen, i);
02661                 if (t)
02662                 {
02663                         c[bcLen] = t;
02664                         bcLen+=2;
02665                         assert(bcLen <= N);
02666                 }
02667 
02668                 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02669                         fgLen-=2;
02670 
02671                 if (Compare(f, g, fgLen)==-1)
02672                 {
02673                         std::swap(f, g);
02674                         std::swap(b, c);
02675                         s++;
02676                 }
02677 
02678                 Subtract(f, f, g, fgLen);
02679 
02680                 if (Add(b, b, c, bcLen))
02681                 {
02682                         b[bcLen] = 1;
02683                         bcLen+=2;
02684                         assert(bcLen <= N);
02685                 }
02686         }
02687 }
02688 
02689 // R[N] - result = A/(2^k) mod M
02690 // A[N] - input
02691 // M[N] - modulus
02692 
02693 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02694 {
02695         CopyWords(R, A, N);
02696 
02697         while (k--)
02698         {
02699                 if (R[0]%2==0)
02700                         ShiftWordsRightByBits(R, N, 1);
02701                 else
02702                 {
02703                         word carry = Add(R, R, M, N);
02704                         ShiftWordsRightByBits(R, N, 1);
02705                         R[N-1] += carry<<(WORD_BITS-1);
02706                 }
02707         }
02708 }
02709 
02710 // R[N] - result = A*(2^k) mod M
02711 // A[N] - input
02712 // M[N] - modulus
02713 
02714 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02715 {
02716         CopyWords(R, A, N);
02717 
02718         while (k--)
02719                 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02720                         Subtract(R, R, M, N);
02721 }
02722 
02723 // ******************************************************************
02724 
02725 InitializeInteger::InitializeInteger()
02726 {
02727         if (!g_pAssignIntToInteger)
02728         {
02729 #ifdef CRYPTOPP_X86ASM_AVAILABLE
02730                 SetPentiumFunctionPointers();
02731 #endif
02732                 g_pAssignIntToInteger = AssignIntToInteger;
02733         }
02734 }
02735 
02736 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02737 
02738 static inline size_t RoundupSize(size_t n)
02739 {
02740         if (n<=8)
02741                 return RoundupSizeTable[n];
02742         else if (n<=16)
02743                 return 16;
02744         else if (n<=32)
02745                 return 32;
02746         else if (n<=64)
02747                 return 64;
02748         else return size_t(1) << BitPrecision(n-1);
02749 }
02750 
02751 Integer::Integer()
02752         : reg(2), sign(POSITIVE)
02753 {
02754         reg[0] = reg[1] = 0;
02755 }
02756 
02757 Integer::Integer(const Integer& t)
02758         : reg(RoundupSize(t.WordCount())), sign(t.sign)
02759 {
02760         CopyWords(reg, t.reg, reg.size());
02761 }
02762 
02763 Integer::Integer(Sign s, lword value)
02764         : reg(2), sign(s)
02765 {
02766         reg[0] = word(value);
02767         reg[1] = word(SafeRightShift<WORD_BITS>(value));
02768 }
02769 
02770 Integer::Integer(signed long value)
02771         : reg(2)
02772 {
02773         if (value >= 0)
02774                 sign = POSITIVE;
02775         else
02776         {
02777                 sign = NEGATIVE;
02778                 value = -value;
02779         }
02780         reg[0] = word(value);
02781         reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02782 }
02783 
02784 Integer::Integer(Sign s, word high, word low)
02785         : reg(2), sign(s)
02786 {
02787         reg[0] = low;
02788         reg[1] = high;
02789 }
02790 
02791 bool Integer::IsConvertableToLong() const
02792 {
02793         if (ByteCount() > sizeof(long))
02794                 return false;
02795 
02796         unsigned long value = (unsigned long)reg[0];
02797         value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02798 
02799         if (sign==POSITIVE)
02800                 return (signed long)value >= 0;
02801         else
02802                 return -(signed long)value < 0;
02803 }
02804 
02805 signed long Integer::ConvertToLong() const
02806 {
02807         assert(IsConvertableToLong());
02808 
02809         unsigned long value = (unsigned long)reg[0];
02810         value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02811         return sign==POSITIVE ? value : -(signed long)value;
02812 }
02813 
02814 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02815 {
02816         Decode(encodedInteger, byteCount, s);
02817 }
02818 
02819 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02820 {
02821         Decode(encodedInteger, byteCount, s);
02822 }
02823 
02824 Integer::Integer(BufferedTransformation &bt)
02825 {
02826         BERDecode(bt);
02827 }
02828 
02829 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02830 {
02831         Randomize(rng, bitcount);
02832 }
02833 
02834 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02835 {
02836         if (!Randomize(rng, min, max, rnType, equiv, mod))
02837                 throw Integer::RandomNumberNotFound();
02838 }
02839 
02840 Integer Integer::Power2(size_t e)
02841 {
02842         Integer r((word)0, BitsToWords(e+1));
02843         r.SetBit(e);
02844         return r;
02845 }
02846 
02847 template <long i>
02848 struct NewInteger
02849 {
02850         Integer * operator()() const
02851         {
02852                 return new Integer(i);
02853         }
02854 };
02855 
02856 const Integer &Integer::Zero()
02857 {
02858         return Singleton<Integer>().Ref();
02859 }
02860 
02861 const Integer &Integer::One()
02862 {
02863         return Singleton<Integer, NewInteger<1> >().Ref();
02864 }
02865 
02866 const Integer &Integer::Two()
02867 {
02868         return Singleton<Integer, NewInteger<2> >().Ref();
02869 }
02870 
02871 bool Integer::operator!() const
02872 {
02873         return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02874 }
02875 
02876 Integer& Integer::operator=(const Integer& t)
02877 {
02878         if (this != &t)
02879         {
02880                 reg.New(RoundupSize(t.WordCount()));
02881                 CopyWords(reg, t.reg, reg.size());
02882                 sign = t.sign;
02883         }
02884         return *this;
02885 }
02886 
02887 bool Integer::GetBit(size_t n) const
02888 {
02889         if (n/WORD_BITS >= reg.size())
02890                 return 0;
02891         else
02892                 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02893 }
02894 
02895 void Integer::SetBit(size_t n, bool value)
02896 {
02897         if (value)
02898         {
02899                 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02900                 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02901         }
02902         else
02903         {
02904                 if (n/WORD_BITS < reg.size())
02905                         reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02906         }
02907 }
02908 
02909 byte Integer::GetByte(size_t n) const
02910 {
02911         if (n/WORD_SIZE >= reg.size())
02912                 return 0;
02913         else
02914                 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02915 }
02916 
02917 void Integer::SetByte(size_t n, byte value)
02918 {
02919         reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02920         reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02921         reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02922 }
02923 
02924 lword Integer::GetBits(size_t i, size_t n) const
02925 {
02926         lword v = 0;
02927         assert(n <= sizeof(v)*8);
02928         for (unsigned int j=0; j<n; j++)
02929                 v |= lword(GetBit(i+j)) << j;
02930         return v;
02931 }
02932 
02933 Integer Integer::operator-() const
02934 {
02935         Integer result(*this);
02936         result.Negate();
02937         return result;
02938 }
02939 
02940 Integer Integer::AbsoluteValue() const
02941 {
02942         Integer result(*this);
02943         result.sign = POSITIVE;
02944         return result;
02945 }
02946 
02947 void Integer::swap(Integer &a)
02948 {
02949         reg.swap(a.reg);
02950         std::swap(sign, a.sign);
02951 }
02952 
02953 Integer::Integer(word value, size_t length)
02954         : reg(RoundupSize(length)), sign(POSITIVE)
02955 {
02956         reg[0] = value;
02957         SetWords(reg+1, 0, reg.size()-1);
02958 }
02959 
02960 template <class T>
02961 static Integer StringToInteger(const T *str)
02962 {
02963         int radix;
02964         // GCC workaround
02965         // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
02966         unsigned int length;
02967         for (length = 0; str[length] != 0; length++) {}
02968 
02969         Integer v;
02970 
02971         if (length == 0)
02972                 return v;
02973 
02974         switch (str[length-1])
02975         {
02976         case 'h':
02977         case 'H':
02978                 radix=16;
02979                 break;
02980         case 'o':
02981         case 'O':
02982                 radix=8;
02983                 break;
02984         case 'b':
02985         case 'B':
02986                 radix=2;
02987                 break;
02988         default:
02989                 radix=10;
02990         }
02991 
02992         if (length > 2 && str[0] == '0' && str[1] == 'x')
02993                 radix = 16;
02994 
02995         for (unsigned i=0; i<length; i++)
02996         {
02997                 int digit;
02998 
02999                 if (str[i] >= '0' && str[i] <= '9')
03000                         digit = str[i] - '0';
03001                 else if (str[i] >= 'A' && str[i] <= 'F')
03002                         digit = str[i] - 'A' + 10;
03003                 else if (str[i] >= 'a' && str[i] <= 'f')
03004                         digit = str[i] - 'a' + 10;
03005                 else
03006                         digit = radix;
03007 
03008                 if (digit < radix)
03009                 {
03010                         v *= radix;
03011                         v += digit;
03012                 }
03013         }
03014 
03015         if (str[0] == '-')
03016                 v.Negate();
03017 
03018         return v;
03019 }
03020 
03021 Integer::Integer(const char *str)
03022         : reg(2), sign(POSITIVE)
03023 {
03024         *this = StringToInteger(str);
03025 }
03026 
03027 Integer::Integer(const wchar_t *str)
03028         : reg(2), sign(POSITIVE)
03029 {
03030         *this = StringToInteger(str);
03031 }
03032 
03033 unsigned int Integer::WordCount() const
03034 {
03035         return (unsigned int)CountWords(reg, reg.size());
03036 }
03037 
03038 unsigned int Integer::ByteCount() const
03039 {
03040         unsigned wordCount = WordCount();
03041         if (wordCount)
03042                 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03043         else
03044                 return 0;
03045 }
03046 
03047 unsigned int Integer::BitCount() const
03048 {
03049         unsigned wordCount = WordCount();
03050         if (wordCount)
03051                 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03052         else
03053                 return 0;
03054 }
03055 
03056 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03057 {
03058         StringStore store(input, inputLen);
03059         Decode(store, inputLen, s);
03060 }
03061 
03062 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03063 {
03064         assert(bt.MaxRetrievable() >= inputLen);
03065 
03066         byte b;
03067         bt.Peek(b);
03068         sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03069 
03070         while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03071         {
03072                 bt.Skip(1);
03073                 inputLen--;
03074                 bt.Peek(b);
03075         }
03076 
03077         reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03078 
03079         for (size_t i=inputLen; i > 0; i--)
03080         {
03081                 bt.Get(b);
03082                 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03083         }
03084 
03085         if (sign == NEGATIVE)
03086         {
03087                 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03088                         reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03089                 TwosComplement(reg, reg.size());
03090         }
03091 }
03092 
03093 size_t Integer::MinEncodedSize(Signedness signedness) const
03094 {
03095         unsigned int outputLen = STDMAX(1U, ByteCount());
03096         if (signedness == UNSIGNED)
03097                 return outputLen;
03098         if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03099                 outputLen++;
03100         if (IsNegative() && *this < -Power2(outputLen*8-1))
03101                 outputLen++;
03102         return outputLen;
03103 }
03104 
03105 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03106 {
03107         ArraySink sink(output, outputLen);
03108         Encode(sink, outputLen, signedness);
03109 }
03110 
03111 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03112 {
03113         if (signedness == UNSIGNED || NotNegative())
03114         {
03115                 for (size_t i=outputLen; i > 0; i--)
03116                         bt.Put(GetByte(i-1));
03117         }
03118         else
03119         {
03120                 // take two's complement of *this
03121                 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
03122                 temp.Encode(bt, outputLen, UNSIGNED);
03123         }
03124 }
03125 
03126 void Integer::DEREncode(BufferedTransformation &bt) const
03127 {
03128         DERGeneralEncoder enc(bt, INTEGER);
03129         Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03130         enc.MessageEnd();
03131 }
03132 
03133 void Integer::BERDecode(const byte *input, size_t len)
03134 {
03135         StringStore store(input, len);
03136         BERDecode(store);
03137 }
03138 
03139 void Integer::BERDecode(BufferedTransformation &bt)
03140 {
03141         BERGeneralDecoder dec(bt, INTEGER);
03142         if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03143                 BERDecodeError();
03144         Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03145         dec.MessageEnd();
03146 }
03147 
03148 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03149 {
03150         DERGeneralEncoder enc(bt, OCTET_STRING);
03151         Encode(enc, length);
03152         enc.MessageEnd();
03153 }
03154 
03155 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03156 {
03157         BERGeneralDecoder dec(bt, OCTET_STRING);
03158         if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03159                 BERDecodeError();
03160         Decode(dec, length);
03161         dec.MessageEnd();
03162 }
03163 
03164 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03165 {
03166         ArraySink sink(output, len);
03167         return OpenPGPEncode(sink);
03168 }
03169 
03170 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03171 {
03172         word16 bitCount = BitCount();
03173         bt.PutWord16(bitCount);
03174         size_t byteCount = BitsToBytes(bitCount);
03175         Encode(bt, byteCount);
03176         return 2 + byteCount;
03177 }
03178 
03179 void Integer::OpenPGPDecode(const byte *input, size_t len)
03180 {
03181         StringStore store(input, len);
03182         OpenPGPDecode(store);
03183 }
03184 
03185 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03186 {
03187         word16 bitCount;
03188         if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03189                 throw OpenPGPDecodeErr();
03190         Decode(bt, BitsToBytes(bitCount));
03191 }
03192 
03193 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03194 {
03195         const size_t nbytes = nbits/8 + 1;
03196         SecByteBlock buf(nbytes);
03197         rng.GenerateBlock(buf, nbytes);
03198         if (nbytes)
03199                 buf[0] = (byte)Crop(buf[0], nbits % 8);
03200         Decode(buf, nbytes, UNSIGNED);
03201 }
03202 
03203 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03204 {
03205         if (min > max)
03206                 throw InvalidArgument("Integer: Min must be no greater than Max");
03207 
03208         Integer range = max - min;
03209         const unsigned int nbits = range.BitCount();
03210 
03211         do
03212         {
03213                 Randomize(rng, nbits);
03214         }
03215         while (*this > range);
03216 
03217         *this += min;
03218 }
03219 
03220 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03221 {
03222         return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03223 }
03224 
03225 class KDF2_RNG : public RandomNumberGenerator
03226 {
03227 public:
03228         KDF2_RNG(const byte *seed, size_t seedSize)
03229                 : m_counter(0), m_counterAndSeed(seedSize + 4)
03230         {
03231                 memcpy(m_counterAndSeed + 4, seed, seedSize);
03232         }
03233 
03234         byte GenerateByte()
03235         {
03236                 byte b;
03237                 GenerateBlock(&b, 1);
03238                 return b;
03239         }
03240 
03241         void GenerateBlock(byte *output, size_t size)
03242         {
03243                 UnalignedPutWord(BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03244                 ++m_counter;
03245                 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03246         }
03247 
03248 private:
03249         word32 m_counter;
03250         SecByteBlock m_counterAndSeed;
03251 };
03252 
03253 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
03254 {
03255         Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03256         Integer max;
03257         if (!params.GetValue("Max", max))
03258         {
03259                 int bitLength;
03260                 if (params.GetIntValue("BitLength", bitLength))
03261                         max = Integer::Power2(bitLength);
03262                 else
03263                         throw InvalidArgument("Integer: missing Max argument");
03264         }
03265         if (min > max)
03266                 throw InvalidArgument("Integer: Min must be no greater than Max");
03267 
03268         Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03269         Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03270 
03271         if (equiv.IsNegative() || equiv >= mod)
03272                 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03273 
03274         Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03275 
03276         member_ptr<KDF2_RNG> kdf2Rng;
03277         ConstByteArrayParameter seed;
03278         if (params.GetValue("Seed", seed))
03279         {
03280                 ByteQueue bq;
03281                 DERSequenceEncoder seq(bq);
03282                 min.DEREncode(seq);
03283                 max.DEREncode(seq);
03284                 equiv.DEREncode(seq);
03285                 mod.DEREncode(seq);
03286                 DEREncodeUnsigned(seq, rnType);
03287                 DEREncodeOctetString(seq, seed.begin(), seed.size());
03288                 seq.MessageEnd();
03289 
03290                 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03291                 bq.Get(finalSeed, finalSeed.size());
03292                 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03293         }
03294         RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03295 
03296         switch (rnType)
03297         {
03298                 case ANY:
03299                         if (mod == One())
03300                                 Randomize(rng, min, max);
03301                         else
03302                         {
03303                                 Integer min1 = min + (equiv-min)%mod;
03304                                 if (max < min1)
03305                                         return false;
03306                                 Randomize(rng, Zero(), (max - min1) / mod);
03307                                 *this *= mod;
03308                                 *this += min1;
03309                         }
03310                         return true;
03311 
03312                 case PRIME:
03313                 {
03314                         const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03315 
03316                         int i;
03317                         i = 0;
03318                         while (1)
03319                         {
03320                                 if (++i==16)
03321                                 {
03322                                         // check if there are any suitable primes in [min, max]
03323                                         Integer first = min;
03324                                         if (FirstPrime(first, max, equiv, mod, pSelector))
03325                                         {
03326                                                 // if there is only one suitable prime, we're done
03327                                                 *this = first;
03328                                                 if (!FirstPrime(first, max, equiv, mod, pSelector))
03329                                                         return true;
03330                                         }
03331                                         else
03332                                                 return false;
03333                                 }
03334 
03335                                 Randomize(rng, min, max);
03336                                 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03337                                         return true;
03338                         }
03339                 }
03340 
03341                 default:
03342                         throw InvalidArgument("Integer: invalid RandomNumberType argument");
03343         }
03344 }
03345 
03346 std::istream& operator>>(std::istream& in, Integer &a)
03347 {
03348         char c;
03349         unsigned int length = 0;
03350         SecBlock<char> str(length + 16);
03351 
03352         std::ws(in);
03353 
03354         do
03355         {
03356                 in.read(&c, 1);
03357                 str[length++] = c;
03358                 if (length >= str.size())
03359                         str.Grow(length + 16);
03360         }
03361         while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03362 
03363         if (in.gcount())
03364                 in.putback(c);
03365         str[length-1] = '\0';
03366         a = Integer(str);
03367 
03368         return in;
03369 }
03370 
03371 std::ostream& operator<<(std::ostream& out, const Integer &a)
03372 {
03373         // Get relevant conversion specifications from ostream.
03374         long f = out.flags() & std::ios::basefield; // Get base digits.
03375         int base, block;
03376         char suffix;
03377         switch(f)
03378         {
03379         case std::ios::oct :
03380                 base = 8;
03381                 block = 8;
03382                 suffix = 'o';
03383                 break;
03384         case std::ios::hex :
03385                 base = 16;
03386                 block = 4;
03387                 suffix = 'h';
03388                 break;
03389         default :
03390                 base = 10;
03391                 block = 3;
03392                 suffix = '.';
03393         }
03394 
03395         SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03396         Integer temp1=a, temp2;
03397         unsigned i=0;
03398         const char vec[]="0123456789ABCDEF";
03399 
03400         if (a.IsNegative())
03401         {
03402                 out << '-';
03403                 temp1.Negate();
03404         }
03405 
03406         if (!a)
03407                 out << '0';
03408 
03409         while (!!temp1)
03410         {
03411                 word digit;
03412                 Integer::Divide(digit, temp2, temp1, base);
03413                 s[i++]=vec[digit];
03414                 temp1=temp2;
03415         }
03416 
03417         while (i--)
03418         {
03419                 out << s[i];
03420 //              if (i && !(i%block))
03421 //                      out << ",";
03422         }
03423         return out << suffix;
03424 }
03425 
03426 Integer& Integer::operator++()
03427 {
03428         if (NotNegative())
03429         {
03430                 if (Increment(reg, reg.size()))
03431                 {
03432                         reg.CleanGrow(2*reg.size());
03433                         reg[reg.size()/2]=1;
03434                 }
03435         }
03436         else
03437         {
03438                 word borrow = Decrement(reg, reg.size());
03439                 assert(!borrow);
03440                 if (WordCount()==0)
03441                         *this = Zero();
03442         }
03443         return *this;
03444 }
03445 
03446 Integer& Integer::operator--()
03447 {
03448         if (IsNegative())
03449         {
03450                 if (Increment(reg, reg.size()))
03451                 {
03452                         reg.CleanGrow(2*reg.size());
03453                         reg[reg.size()/2]=1;
03454                 }
03455         }
03456         else
03457         {
03458                 if (Decrement(reg, reg.size()))
03459                         *this = -One();
03460         }
03461         return *this;
03462 }
03463 
03464 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03465 {
03466         int carry;
03467         if (a.reg.size() == b.reg.size())
03468                 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03469         else if (a.reg.size() > b.reg.size())
03470         {
03471                 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03472                 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03473                 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03474         }
03475         else
03476         {
03477                 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03478                 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03479                 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03480         }
03481 
03482         if (carry)
03483         {
03484                 sum.reg.CleanGrow(2*sum.reg.size());
03485                 sum.reg[sum.reg.size()/2] = 1;
03486         }
03487         sum.sign = Integer::POSITIVE;
03488 }
03489 
03490 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03491 {
03492         unsigned aSize = a.WordCount();
03493         aSize += aSize%2;
03494         unsigned bSize = b.WordCount();
03495         bSize += bSize%2;
03496 
03497         if (aSize == bSize)
03498         {
03499                 if (Compare(a.reg, b.reg, aSize) >= 0)
03500                 {
03501                         Subtract(diff.reg, a.reg, b.reg, aSize);
03502                         diff.sign = Integer::POSITIVE;
03503                 }
03504                 else
03505                 {
03506                         Subtract(diff.reg, b.reg, a.reg, aSize);
03507                         diff.sign = Integer::NEGATIVE;
03508                 }
03509         }
03510         else if (aSize > bSize)
03511         {
03512                 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03513                 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03514                 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03515                 assert(!borrow);
03516                 diff.sign = Integer::POSITIVE;
03517         }
03518         else
03519         {
03520                 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03521                 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03522                 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03523                 assert(!borrow);
03524                 diff.sign = Integer::NEGATIVE;
03525         }
03526 }
03527 
03528 // MSVC .NET 2003 workaround
03529 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03530 {
03531         return a < b ? b : a;
03532 }
03533 
03534 Integer Integer::Plus(const Integer& b) const
03535 {
03536         Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03537         if (NotNegative())
03538         {
03539                 if (b.NotNegative())
03540                         PositiveAdd(sum, *this, b);
03541                 else
03542                         PositiveSubtract(sum, *this, b);
03543         }
03544         else
03545         {
03546                 if (b.NotNegative())
03547                         PositiveSubtract(sum, b, *this);
03548                 else
03549                 {
03550                         PositiveAdd(sum, *this, b);
03551                         sum.sign = Integer::NEGATIVE;
03552                 }
03553         }
03554         return sum;
03555 }
03556 
03557 Integer& Integer::operator+=(const Integer& t)
03558 {
03559         reg.CleanGrow(t.reg.size());
03560         if (NotNegative())
03561         {
03562                 if (t.NotNegative())
03563                         PositiveAdd(*this, *this, t);
03564                 else
03565                         PositiveSubtract(*this, *this, t);
03566         }
03567         else
03568         {
03569                 if (t.NotNegative())
03570                         PositiveSubtract(*this, t, *this);
03571                 else
03572                 {
03573                         PositiveAdd(*this, *this, t);
03574                         sign = Integer::NEGATIVE;
03575                 }
03576         }
03577         return *this;
03578 }
03579 
03580 Integer Integer::Minus(const Integer& b) const
03581 {
03582         Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03583         if (NotNegative())
03584         {
03585                 if (b.NotNegative())
03586                         PositiveSubtract(diff, *this, b);
03587                 else
03588                         PositiveAdd(diff, *this, b);
03589         }
03590         else
03591         {
03592                 if (b.NotNegative())
03593                 {
03594                         PositiveAdd(diff, *this, b);
03595                         diff.sign = Integer::NEGATIVE;
03596                 }
03597                 else
03598                         PositiveSubtract(diff, b, *this);
03599         }
03600         return diff;
03601 }
03602 
03603 Integer& Integer::operator-=(const Integer& t)
03604 {
03605         reg.CleanGrow(t.reg.size());
03606         if (NotNegative())
03607         {
03608                 if (t.NotNegative())
03609                         PositiveSubtract(*this, *this, t);
03610                 else
03611                         PositiveAdd(*this, *this, t);
03612         }
03613         else
03614         {
03615                 if (t.NotNegative())
03616                 {
03617                         PositiveAdd(*this, *this, t);
03618                         sign = Integer::NEGATIVE;
03619                 }
03620                 else
03621                         PositiveSubtract(*this, t, *this);
03622         }
03623         return *this;
03624 }
03625 
03626 Integer& Integer::operator<<=(size_t n)
03627 {
03628         const size_t wordCount = WordCount();
03629         const size_t shiftWords = n / WORD_BITS;
03630         const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03631 
03632         reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03633         ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03634         ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03635         return *this;
03636 }
03637 
03638 Integer& Integer::operator>>=(size_t n)
03639 {
03640         const size_t wordCount = WordCount();
03641         const size_t shiftWords = n / WORD_BITS;
03642         const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03643 
03644         ShiftWordsRightByWords(reg, wordCount, shiftWords);
03645         if (wordCount > shiftWords)
03646                 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03647         if (IsNegative() && WordCount()==0)   // avoid -0
03648                 *this = Zero();
03649         return *this;
03650 }
03651 
03652 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03653 {
03654         size_t aSize = RoundupSize(a.WordCount());
03655         size_t bSize = RoundupSize(b.WordCount());
03656 
03657         product.reg.CleanNew(RoundupSize(aSize+bSize));
03658         product.sign = Integer::POSITIVE;
03659 
03660         SecAlignedWordBlock workspace(aSize + bSize);
03661         AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03662 }
03663 
03664 void Multiply(Integer &product, const Integer &a, const Integer &b)
03665 {
03666         PositiveMultiply(product, a, b);
03667 
03668         if (a.NotNegative() != b.NotNegative())
03669                 product.Negate();
03670 }
03671 
03672 Integer Integer::Times(const Integer &b) const
03673 {
03674         Integer product;
03675         Multiply(product, *this, b);
03676         return product;
03677 }
03678 
03679 /*
03680 void PositiveDivide(Integer &remainder, Integer &quotient,
03681                                    const Integer &dividend, const Integer &divisor)
03682 {
03683         remainder.reg.CleanNew(divisor.reg.size());
03684         remainder.sign = Integer::POSITIVE;
03685         quotient.reg.New(0);
03686         quotient.sign = Integer::POSITIVE;
03687         unsigned i=dividend.BitCount();
03688         while (i--)
03689         {
03690                 word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
03691                 remainder.reg[0] |= dividend[i];
03692                 if (overflow || remainder >= divisor)
03693                 {
03694                         Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
03695                         quotient.SetBit(i);
03696                 }
03697         }
03698 }
03699 */
03700 
03701 void PositiveDivide(Integer &remainder, Integer &quotient,
03702                                    const Integer &a, const Integer &b)
03703 {
03704         unsigned aSize = a.WordCount();
03705         unsigned bSize = b.WordCount();
03706 
03707         if (!bSize)
03708                 throw Integer::DivideByZero();
03709 
03710         if (a.PositiveCompare(b) == -1)
03711         {
03712                 remainder = a;
03713                 remainder.sign = Integer::POSITIVE;
03714                 quotient = Integer::Zero();
03715                 return;
03716         }
03717 
03718         aSize += aSize%2;       // round up to next even number
03719         bSize += bSize%2;
03720 
03721         remainder.reg.CleanNew(RoundupSize(bSize));
03722         remainder.sign = Integer::POSITIVE;
03723         quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03724         quotient.sign = Integer::POSITIVE;
03725 
03726         SecAlignedWordBlock T(aSize+2*bSize+4);
03727         Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03728 }
03729 
03730 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
03731 {
03732         PositiveDivide(remainder, quotient, dividend, divisor);
03733 
03734         if (dividend.IsNegative())
03735         {
03736                 quotient.Negate();
03737                 if (remainder.NotZero())
03738                 {
03739                         --quotient;
03740                         remainder = divisor.AbsoluteValue() - remainder;
03741                 }
03742         }
03743 
03744         if (divisor.IsNegative())
03745                 quotient.Negate();
03746 }
03747 
03748 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03749 {
03750         q = a;
03751         q >>= n;
03752 
03753         const size_t wordCount = BitsToWords(n);
03754         if (wordCount <= a.WordCount())
03755         {
03756                 r.reg.resize(RoundupSize(wordCount));
03757                 CopyWords(r.reg, a.reg, wordCount);
03758                 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03759                 if (n % WORD_BITS != 0)
03760                         r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
03761         }
03762         else
03763         {
03764                 r.reg.resize(RoundupSize(a.WordCount()));
03765                 CopyWords(r.reg, a.reg, r.reg.size());
03766         }
03767         r.sign = POSITIVE;
03768 
03769         if (a.IsNegative() && r.NotZero())
03770         {
03771                 --q;
03772                 r = Power2(n) - r;
03773         }
03774 }
03775 
03776 Integer Integer::DividedBy(const Integer &b) const
03777 {
03778         Integer remainder, quotient;
03779         Integer::Divide(remainder, quotient, *this, b);
03780         return quotient;
03781 }
03782 
03783 Integer Integer::Modulo(const Integer &b) const
03784 {
03785         Integer remainder, quotient;
03786         Integer::Divide(remainder, quotient, *this, b);
03787         return remainder;
03788 }
03789 
03790 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
03791 {
03792         if (!divisor)
03793                 throw Integer::DivideByZero();
03794 
03795         assert(divisor);
03796 
03797         if ((divisor & (divisor-1)) == 0)       // divisor is a power of 2
03798         {
03799                 quotient = dividend >> (BitPrecision(divisor)-1);
03800                 remainder = dividend.reg[0] & (divisor-1);
03801                 return;
03802         }
03803 
03804         unsigned int i = dividend.WordCount();
03805         quotient.reg.CleanNew(RoundupSize(i));
03806         remainder = 0;
03807         while (i--)
03808         {
03809                 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03810                 remainder = DWord(dividend.reg[i], remainder) % divisor;
03811         }
03812 
03813         if (dividend.NotNegative())
03814                 quotient.sign = POSITIVE;
03815         else
03816         {
03817                 quotient.sign = NEGATIVE;
03818                 if (remainder)
03819                 {
03820                         --quotient;
03821                         remainder = divisor - remainder;
03822                 }
03823         }
03824 }
03825 
03826 Integer Integer::DividedBy(word b) const
03827 {
03828         word remainder;
03829         Integer quotient;
03830         Integer::Divide(remainder, quotient, *this, b);
03831         return quotient;
03832 }
03833 
03834 word Integer::Modulo(word divisor) const
03835 {
03836         if (!divisor)
03837                 throw Integer::DivideByZero();
03838 
03839         assert(divisor);
03840 
03841         word remainder;
03842 
03843         if ((divisor & (divisor-1)) == 0)       // divisor is a power of 2
03844                 remainder = reg[0] & (divisor-1);
03845         else
03846         {
03847                 unsigned int i = WordCount();
03848 
03849                 if (divisor <= 5)
03850                 {
03851                         DWord sum(0, 0);
03852                         while (i--)
03853                                 sum += reg[i];
03854                         remainder = sum % divisor;
03855                 }
03856                 else
03857                 {
03858                         remainder = 0;
03859                         while (i--)
03860                                 remainder = DWord(reg[i], remainder) % divisor;
03861                 }
03862         }
03863 
03864         if (IsNegative() && remainder)
03865                 remainder = divisor - remainder;
03866 
03867         return remainder;
03868 }
03869 
03870 void Integer::Negate()
03871 {
03872         if (!!(*this))  // don't flip sign if *this==0
03873                 sign = Sign(1-sign);
03874 }
03875 
03876 int Integer::PositiveCompare(const Integer& t) const
03877 {
03878         unsigned size = WordCount(), tSize = t.WordCount();
03879 
03880         if (size == tSize)
03881                 return CryptoPP::Compare(reg, t.reg, size);
03882         else
03883                 return size > tSize ? 1 : -1;
03884 }
03885 
03886 int Integer::Compare(const Integer& t) const
03887 {
03888         if (NotNegative())
03889         {
03890                 if (t.NotNegative())
03891                         return PositiveCompare(t);
03892                 else
03893                         return 1;
03894         }
03895         else
03896         {
03897                 if (t.NotNegative())
03898                         return -1;
03899                 else
03900                         return -PositiveCompare(t);
03901         }
03902 }
03903 
03904 Integer Integer::SquareRoot() const
03905 {
03906         if (!IsPositive())
03907                 return Zero();
03908 
03909         // overestimate square root
03910         Integer x, y = Power2((BitCount()+1)/2);
03911         assert(y*y >= *this);
03912 
03913         do
03914         {
03915                 x = y;
03916                 y = (x + *this/x) >> 1;
03917         } while (y<x);
03918 
03919         return x;
03920 }
03921 
03922 bool Integer::IsSquare() const
03923 {
03924         Integer r = SquareRoot();
03925         return *this == r.Squared();
03926 }
03927 
03928 bool Integer::IsUnit() const
03929 {
03930         return (WordCount() == 1) && (reg[0] == 1);
03931 }
03932 
03933 Integer Integer::MultiplicativeInverse() const
03934 {
03935         return IsUnit() ? *this : Zero();
03936 }
03937 
03938 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03939 {
03940         return x*y%m;
03941 }
03942 
03943 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03944 {
03945         ModularArithmetic mr(m);
03946         return mr.Exponentiate(x, e);
03947 }
03948 
03949 Integer Integer::Gcd(const Integer &a, const Integer &b)
03950 {
03951         return EuclideanDomainOf<Integer>().Gcd(a, b);
03952 }
03953 
03954 Integer Integer::InverseMod(const Integer &m) const
03955 {
03956         assert(m.NotNegative());
03957 
03958         if (IsNegative() || *this>=m)
03959                 return (*this%m).InverseMod(m);
03960 
03961         if (m.IsEven())
03962         {
03963                 if (!m || IsEven())
03964                         return Zero();  // no inverse
03965                 if (*this == One())
03966                         return One();
03967 
03968                 Integer u = m.InverseMod(*this);
03969                 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03970         }
03971 
03972         SecBlock<word> T(m.reg.size() * 4);
03973         Integer r((word)0, m.reg.size());
03974         unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03975         DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03976         return r;
03977 }
03978 
03979 word Integer::InverseMod(word mod) const
03980 {
03981         word g0 = mod, g1 = *this % mod;
03982         word v0 = 0, v1 = 1;
03983         word y;
03984 
03985         while (g1)
03986         {
03987                 if (g1 == 1)
03988                         return v1;
03989                 y = g0 / g1;
03990                 g0 = g0 % g1;
03991                 v0 += y * v1;
03992 
03993                 if (!g0)
03994                         break;
03995                 if (g0 == 1)
03996                         return mod-v0;
03997                 y = g1 / g0;
03998                 g1 = g1 % g0;
03999                 v1 += y * v0;
04000         }
04001         return 0;
04002 }
04003 
04004 // ********************************************************
04005 
04006 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
04007 {
04008         BERSequenceDecoder seq(bt);
04009         OID oid(seq);
04010         if (oid != ASN1::prime_field())
04011                 BERDecodeError();
04012         m_modulus.BERDecode(seq);
04013         seq.MessageEnd();
04014         m_result.reg.resize(m_modulus.reg.size());
04015 }
04016 
04017 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04018 {
04019         DERSequenceEncoder seq(bt);
04020         ASN1::prime_field().DEREncode(seq);
04021         m_modulus.DEREncode(seq);
04022         seq.MessageEnd();
04023 }
04024 
04025 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04026 {
04027         a.DEREncodeAsOctetString(out, MaxElementByteLength());
04028 }
04029 
04030 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04031 {
04032         a.BERDecodeAsOctetString(in, MaxElementByteLength());
04033 }
04034 
04035 const Integer& ModularArithmetic::Half(const Integer &a) const
04036 {
04037         if (a.reg.size()==m_modulus.reg.size())
04038         {
04039                 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04040                 return m_result;
04041         }
04042         else
04043                 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04044 }
04045 
04046 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04047 {
04048         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04049         {
04050                 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04051                         || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04052                 {
04053                         CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04054                 }
04055                 return m_result;
04056         }
04057         else
04058         {
04059                 m_result1 = a+b;
04060                 if (m_result1 >= m_modulus)
04061                         m_result1 -= m_modulus;
04062                 return m_result1;
04063         }
04064 }
04065 
04066 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04067 {
04068         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04069         {
04070                 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04071                         || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04072                 {
04073                         CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04074                 }
04075         }
04076         else
04077         {
04078                 a+=b;
04079                 if (a>=m_modulus)
04080                         a-=m_modulus;
04081         }
04082 
04083         return a;
04084 }
04085 
04086 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04087 {
04088         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04089         {
04090                 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04091                         CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04092                 return m_result;
04093         }
04094         else
04095         {
04096                 m_result1 = a-b;
04097                 if (m_result1.IsNegative())
04098                         m_result1 += m_modulus;
04099                 return m_result1;
04100         }
04101 }
04102 
04103 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04104 {
04105         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04106         {
04107                 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04108                         CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04109         }
04110         else
04111         {
04112                 a-=b;
04113                 if (a.IsNegative())
04114                         a+=m_modulus;
04115         }
04116 
04117         return a;
04118 }
04119 
04120 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04121 {
04122         if (!a)
04123                 return a;
04124 
04125         CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04126         if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04127                 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04128 
04129         return m_result;
04130 }
04131 
04132 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04133 {
04134         if (m_modulus.IsOdd())
04135         {
04136                 MontgomeryRepresentation dr(m_modulus);
04137                 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04138         }
04139         else
04140                 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04141 }
04142 
04143 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04144 {
04145         if (m_modulus.IsOdd())
04146         {
04147                 MontgomeryRepresentation dr(m_modulus);
04148                 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04149                 for (unsigned int i=0; i<exponentsCount; i++)
04150                         results[i] = dr.ConvertOut(results[i]);
04151         }
04152         else
04153                 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04154 }
04155 
04156 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)    // modulus must be odd
04157         : ModularArithmetic(m),
04158           m_u((word)0, m_modulus.reg.size()),
04159           m_workspace(5*m_modulus.reg.size())
04160 {
04161         if (!m_modulus.IsOdd())
04162                 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04163 
04164         RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04165 }
04166 
04167 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04168 {
04169         word *const T = m_workspace.begin();
04170         word *const R = m_result.reg.begin();
04171         const size_t N = m_modulus.reg.size();
04172         assert(a.reg.size()<=N && b.reg.size()<=N);
04173 
04174         AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04175         SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04176         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04177         return m_result;
04178 }
04179 
04180 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04181 {
04182         word *const T = m_workspace.begin();
04183         word *const R = m_result.reg.begin();
04184         const size_t N = m_modulus.reg.size();
04185         assert(a.reg.size()<=N);
04186 
04187         CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04188         SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04189         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04190         return m_result;
04191 }
04192 
04193 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04194 {
04195         word *const T = m_workspace.begin();
04196         word *const R = m_result.reg.begin();
04197         const size_t N = m_modulus.reg.size();
04198         assert(a.reg.size()<=N);
04199 
04200         CopyWords(T, a.reg, a.reg.size());
04201         SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04202         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04203         return m_result;
04204 }
04205 
04206 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04207 {
04208 //        return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
04209         word *const T = m_workspace.begin();
04210         word *const R = m_result.reg.begin();
04211         const size_t N = m_modulus.reg.size();
04212         assert(a.reg.size()<=N);
04213 
04214         CopyWords(T, a.reg, a.reg.size());
04215         SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04216         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04217         unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04218 
04219 //      cout << "k=" << k << " N*32=" << 32*N << endl;
04220 
04221         if (k>N*WORD_BITS)
04222                 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04223         else
04224                 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04225 
04226         return m_result;
04227 }
04228 
04229 NAMESPACE_END
04230 
04231 #endif