00001
00002
00003
00004 #include "pch.h"
00005
00006 #ifndef CRYPTOPP_IMPORTS
00007
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"
00016 #include "sha.h"
00017
00018 #include <iostream>
00019
00020 #ifdef _M_X64
00021 #include <Intrin.h>
00022 #endif
00023
00024 #ifdef SSE2_INTRINSICS_AVAILABLE
00025 #ifdef __GNUC__
00026 #include <xmmintrin.h>
00027 #include <signal.h>
00028 #include <setjmp.h>
00029 #ifdef CRYPTOPP_MEMALIGN_AVAILABLE
00030 #include <malloc.h>
00031 #else
00032 #include <stdlib.h>
00033 #endif
00034 #else
00035 #include <emmintrin.h>
00036 #endif
00037 #elif defined(_MSC_VER) && defined(_M_IX86)
00038 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 intrinsics will be disabled.")
00039 #elif defined(__GNUC__) && defined(__i386__)
00040 #warning "You do not have GCC 3.3 or later, or did not specify -msse2 compiler option, so use of SSE2 intrinsics will be disabled."
00041 #endif
00042
00043 NAMESPACE_BEGIN(CryptoPP)
00044
00045 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00046 {
00047 if (valueType != typeid(Integer))
00048 return false;
00049 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00050 return true;
00051 }
00052
00053 #ifdef SSE2_INTRINSICS_AVAILABLE
00054 template <class T>
00055 CPP_TYPENAME AlignedAllocator<T>::pointer AlignedAllocator<T>::allocate(size_type n, const void *)
00056 {
00057 CheckSize(n);
00058 if (n == 0)
00059 return NULL;
00060 if (n >= 4)
00061 {
00062 void *p;
00063 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00064 while (!(p = _mm_malloc(sizeof(T)*n, 16)))
00065 #elif defined(CRYPTOPP_MEMALIGN_AVAILABLE)
00066 while (!(p = memalign(16, sizeof(T)*n)))
00067 #elif defined(CRYPTOPP_MALLOC_ALIGNMENT_IS_16)
00068 while (!(p = malloc(sizeof(T)*n)))
00069 #else
00070 while (!(p = (byte *)malloc(sizeof(T)*n + 8)))
00071 #endif
00072 CallNewHandler();
00073
00074 #ifdef CRYPTOPP_NO_ALIGNED_ALLOC
00075 assert(m_pBlock == NULL);
00076 m_pBlock = p;
00077 if (!IsAlignedOn(p, 16))
00078 {
00079 assert(IsAlignedOn(p, 8));
00080 p = (byte *)p + 8;
00081 }
00082 #endif
00083
00084 assert(IsAlignedOn(p, 16));
00085 return (T*)p;
00086 }
00087 return new T[n];
00088 }
00089
00090 template <class T>
00091 void AlignedAllocator<T>::deallocate(void *p, size_type n)
00092 {
00093 memset(p, 0, n*sizeof(T));
00094 if (n >= 4)
00095 {
00096 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00097 _mm_free(p);
00098 #elif defined(CRYPTOPP_NO_ALIGNED_ALLOC)
00099 assert(m_pBlock == p || (byte *)m_pBlock+8 == p);
00100 free(m_pBlock);
00101 m_pBlock = NULL;
00102 #else
00103 free(p);
00104 #endif
00105 }
00106 else
00107 delete [] (T *)p;
00108 }
00109
00110 template class CRYPTOPP_DLL AlignedAllocator<word>;
00111 #endif
00112
00113 static int Compare(const word *A, const word *B, size_t N)
00114 {
00115 while (N--)
00116 if (A[N] > B[N])
00117 return 1;
00118 else if (A[N] < B[N])
00119 return -1;
00120
00121 return 0;
00122 }
00123
00124 static int Increment(word *A, size_t N, word B=1)
00125 {
00126 assert(N);
00127 word t = A[0];
00128 A[0] = t+B;
00129 if (A[0] >= t)
00130 return 0;
00131 for (unsigned i=1; i<N; i++)
00132 if (++A[i])
00133 return 0;
00134 return 1;
00135 }
00136
00137 static int Decrement(word *A, size_t N, word B=1)
00138 {
00139 assert(N);
00140 word t = A[0];
00141 A[0] = t-B;
00142 if (A[0] <= t)
00143 return 0;
00144 for (unsigned i=1; i<N; i++)
00145 if (A[i]--)
00146 return 0;
00147 return 1;
00148 }
00149
00150 static void TwosComplement(word *A, size_t N)
00151 {
00152 Decrement(A, N);
00153 for (unsigned i=0; i<N; i++)
00154 A[i] = ~A[i];
00155 }
00156
00157 static word AtomicInverseModPower2(word A)
00158 {
00159 assert(A%2==1);
00160
00161 word R=A%8;
00162
00163 for (unsigned i=3; i<WORD_BITS; i*=2)
00164 R = R*(2-R*A);
00165
00166 assert(R*A==1);
00167 return R;
00168 }
00169
00170
00171
00172 class DWord
00173 {
00174 public:
00175 DWord() {}
00176
00177 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00178 explicit DWord(word low)
00179 {
00180 m_whole = low;
00181 }
00182 #else
00183 explicit DWord(word low)
00184 {
00185 m_halfs.low = low;
00186 m_halfs.high = 0;
00187 }
00188 #endif
00189
00190 DWord(word low, word high)
00191 {
00192 m_halfs.low = low;
00193 m_halfs.high = high;
00194 }
00195
00196 static DWord Multiply(word a, word b)
00197 {
00198 DWord r;
00199 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00200 r.m_whole = (dword)a * b;
00201 #elif defined(__alpha__)
00202 r.m_halfs.low = a*b; __asm__("umulh %1,%2,%0" : "=r" (r.m_halfs.high) : "r" (a), "r" (b));
00203 #elif defined(__ia64__)
00204 r.m_halfs.low = a*b; __asm__("xmpy.hu %0=%1,%2" : "=f" (r.m_halfs.high) : "f" (a), "f" (b));
00205 #elif defined(_ARCH_PPC64)
00206 r.m_halfs.low = a*b; __asm__("mulhdu %0,%1,%2" : "=r" (r.m_halfs.high) : "r" (a), "r" (b) : "cc");
00207 #elif defined(__x86_64__)
00208 __asm__("mulq %3" : "=d" (r.m_halfs.high), "=a" (r.m_halfs.low) : "a" (a), "rm" (b) : "cc");
00209 #elif defined(__mips64)
00210 __asm__("dmultu %2,%3" : "=h" (r.m_halfs.high), "=l" (r.m_halfs.low) : "r" (a), "r" (b));
00211 #elif defined(_M_X64)
00212 r.m_halfs.low = _umul128(a, b, &r.m_halfs.high);
00213 #elif defined(_M_IX86)
00214
00215 word64 t = (word64)a * b;
00216 r.m_halfs.high = ((word32 *)(&t))[1];
00217 r.m_halfs.low = (word32)t;
00218 #else
00219 #error can not implement DWord
00220 #endif
00221 return r;
00222 }
00223
00224 static DWord MultiplyAndAdd(word a, word b, word c)
00225 {
00226 DWord r = Multiply(a, b);
00227 return r += c;
00228 }
00229
00230 DWord & operator+=(word a)
00231 {
00232 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00233 m_whole = m_whole + a;
00234 #else
00235 m_halfs.low += a;
00236 m_halfs.high += (m_halfs.low < a);
00237 #endif
00238 return *this;
00239 }
00240
00241 DWord operator+(word a)
00242 {
00243 DWord r;
00244 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00245 r.m_whole = m_whole + a;
00246 #else
00247 r.m_halfs.low = m_halfs.low + a;
00248 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00249 #endif
00250 return r;
00251 }
00252
00253 DWord operator-(DWord a)
00254 {
00255 DWord r;
00256 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00257 r.m_whole = m_whole - a.m_whole;
00258 #else
00259 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00260 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00261 #endif
00262 return r;
00263 }
00264
00265 DWord operator-(word a)
00266 {
00267 DWord r;
00268 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00269 r.m_whole = m_whole - a;
00270 #else
00271 r.m_halfs.low = m_halfs.low - a;
00272 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00273 #endif
00274 return r;
00275 }
00276
00277
00278 word operator/(word divisor);
00279
00280 word operator%(word a);
00281
00282 bool operator!() const
00283 {
00284 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00285 return !m_whole;
00286 #else
00287 return !m_halfs.high && !m_halfs.low;
00288 #endif
00289 }
00290
00291 word GetLowHalf() const {return m_halfs.low;}
00292 word GetHighHalf() const {return m_halfs.high;}
00293 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00294
00295 private:
00296 union
00297 {
00298 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00299 dword m_whole;
00300 #endif
00301 struct
00302 {
00303 #ifdef IS_LITTLE_ENDIAN
00304 word low;
00305 word high;
00306 #else
00307 word high;
00308 word low;
00309 #endif
00310 } m_halfs;
00311 };
00312 };
00313
00314 class Word
00315 {
00316 public:
00317 Word() {}
00318
00319 Word(word value)
00320 {
00321 m_whole = value;
00322 }
00323
00324 Word(hword low, hword high)
00325 {
00326 m_whole = low | (word(high) << (WORD_BITS/2));
00327 }
00328
00329 static Word Multiply(hword a, hword b)
00330 {
00331 Word r;
00332 r.m_whole = (word)a * b;
00333 return r;
00334 }
00335
00336 Word operator-(Word a)
00337 {
00338 Word r;
00339 r.m_whole = m_whole - a.m_whole;
00340 return r;
00341 }
00342
00343 Word operator-(hword a)
00344 {
00345 Word r;
00346 r.m_whole = m_whole - a;
00347 return r;
00348 }
00349
00350
00351 hword operator/(hword divisor)
00352 {
00353 return hword(m_whole / divisor);
00354 }
00355
00356 bool operator!() const
00357 {
00358 return !m_whole;
00359 }
00360
00361 word GetWhole() const {return m_whole;}
00362 hword GetLowHalf() const {return hword(m_whole);}
00363 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00364 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00365
00366 private:
00367 word m_whole;
00368 };
00369
00370
00371 template <class S, class D>
00372 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00373 {
00374
00375 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00376
00377
00378 S Q;
00379 if (S(B1+1) == 0)
00380 Q = A[2];
00381 else
00382 Q = D(A[1], A[2]) / S(B1+1);
00383
00384
00385 D p = D::Multiply(B0, Q);
00386 D u = (D) A[0] - p.GetLowHalf();
00387 A[0] = u.GetLowHalf();
00388 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00389 A[1] = u.GetLowHalf();
00390 A[2] += u.GetHighHalf();
00391
00392
00393 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00394 {
00395 u = (D) A[0] - B0;
00396 A[0] = u.GetLowHalf();
00397 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00398 A[1] = u.GetLowHalf();
00399 A[2] += u.GetHighHalf();
00400 Q++;
00401 assert(Q);
00402 }
00403
00404 return Q;
00405 }
00406
00407
00408 template <class S, class D>
00409 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00410 {
00411 if (!B)
00412 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00413 else
00414 {
00415 S Q[2];
00416 T[0] = Al.GetLowHalf();
00417 T[1] = Al.GetHighHalf();
00418 T[2] = Ah.GetLowHalf();
00419 T[3] = Ah.GetHighHalf();
00420 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00421 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00422 return D(Q[0], Q[1]);
00423 }
00424 }
00425
00426
00427 inline word DWord::operator/(word a)
00428 {
00429 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00430 return word(m_whole / a);
00431 #else
00432 hword r[4];
00433 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00434 #endif
00435 }
00436
00437 inline word DWord::operator%(word a)
00438 {
00439 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00440 return word(m_whole % a);
00441 #else
00442 if (a < (word(1) << (WORD_BITS/2)))
00443 {
00444 hword h = hword(a);
00445 word r = m_halfs.high % h;
00446 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00447 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00448 }
00449 else
00450 {
00451 hword r[4];
00452 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00453 return Word(r[0], r[1]).GetWhole();
00454 }
00455 #endif
00456 }
00457
00458
00459
00460 class Portable
00461 {
00462 public:
00463 static int Add(word *C, const word *A, const word *B, size_t N);
00464 static int Subtract(word *C, const word *A, const word *B, size_t N);
00465
00466 static inline void Multiply2(word *C, const word *A, const word *B);
00467 static inline word Multiply2Add(word *C, const word *A, const word *B);
00468 static void Multiply4(word *C, const word *A, const word *B);
00469 static void Multiply8(word *C, const word *A, const word *B);
00470 static inline unsigned int MultiplyRecursionLimit() {return 8;}
00471
00472 static inline void Multiply2Bottom(word *C, const word *A, const word *B);
00473 static void Multiply4Bottom(word *C, const word *A, const word *B);
00474 static void Multiply8Bottom(word *C, const word *A, const word *B);
00475 static inline unsigned int MultiplyBottomRecursionLimit() {return 8;}
00476
00477 static void Square2(word *R, const word *A);
00478 static void Square4(word *R, const word *A);
00479 static void Square8(word *R, const word *A) {assert(false);}
00480 static inline unsigned int SquareRecursionLimit() {return 4;}
00481 };
00482
00483 int Portable::Add(word *C, const word *A, const word *B, size_t N)
00484 {
00485 assert (N%2 == 0);
00486
00487 DWord u(0, 0);
00488 for (unsigned int i = 0; i < N; i+=2)
00489 {
00490 u = DWord(A[i]) + B[i] + u.GetHighHalf();
00491 C[i] = u.GetLowHalf();
00492 u = DWord(A[i+1]) + B[i+1] + u.GetHighHalf();
00493 C[i+1] = u.GetLowHalf();
00494 }
00495 return int(u.GetHighHalf());
00496 }
00497
00498 int Portable::Subtract(word *C, const word *A, const word *B, size_t N)
00499 {
00500 assert (N%2 == 0);
00501
00502 DWord u(0, 0);
00503 for (unsigned int i = 0; i < N; i+=2)
00504 {
00505 u = (DWord) A[i] - B[i] - u.GetHighHalfAsBorrow();
00506 C[i] = u.GetLowHalf();
00507 u = (DWord) A[i+1] - B[i+1] - u.GetHighHalfAsBorrow();
00508 C[i+1] = u.GetLowHalf();
00509 }
00510 return int(0-u.GetHighHalf());
00511 }
00512
00513 void Portable::Multiply2(word *C, const word *A, const word *B)
00514 {
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00544 unsigned int ai = A[1] < A[0];
00545 unsigned int bi = B[0] < B[1];
00546 unsigned int di = ai & bi;
00547 DWord d = DWord::Multiply(D[di], D[di+2]);
00548 D[1] = D[3] = 0;
00549 unsigned int si = ai + !bi;
00550 word s = D[si];
00551
00552 DWord A0B0 = DWord::Multiply(A[0], B[0]);
00553 C[0] = A0B0.GetLowHalf();
00554
00555 DWord A1B1 = DWord::Multiply(A[1], B[1]);
00556 DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf();
00557 C[1] = t.GetLowHalf();
00558
00559 t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s;
00560 C[2] = t.GetLowHalf();
00561 C[3] = t.GetHighHalf();
00562 }
00563
00564 inline void Portable::Multiply2Bottom(word *C, const word *A, const word *B)
00565 {
00566 DWord t = DWord::Multiply(A[0], B[0]);
00567 C[0] = t.GetLowHalf();
00568 C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0];
00569 }
00570
00571 word Portable::Multiply2Add(word *C, const word *A, const word *B)
00572 {
00573 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00574 unsigned int ai = A[1] < A[0];
00575 unsigned int bi = B[0] < B[1];
00576 unsigned int di = ai & bi;
00577 DWord d = DWord::Multiply(D[di], D[di+2]);
00578 D[1] = D[3] = 0;
00579 unsigned int si = ai + !bi;
00580 word s = D[si];
00581
00582 DWord A0B0 = DWord::Multiply(A[0], B[0]);
00583 DWord t = A0B0 + C[0];
00584 C[0] = t.GetLowHalf();
00585
00586 DWord A1B1 = DWord::Multiply(A[1], B[1]);
00587 t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1];
00588 C[1] = t.GetLowHalf();
00589
00590 t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2];
00591 C[2] = t.GetLowHalf();
00592
00593 t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3];
00594 C[3] = t.GetLowHalf();
00595 return t.GetHighHalf();
00596 }
00597
00598 #define MulAcc(x, y) \
00599 p = DWord::MultiplyAndAdd(A[x], B[y], c); \
00600 c = p.GetLowHalf(); \
00601 p = (DWord) d + p.GetHighHalf(); \
00602 d = p.GetLowHalf(); \
00603 e += p.GetHighHalf();
00604
00605 #define SaveMulAcc(s, x, y) \
00606 R[s] = c; \
00607 p = DWord::MultiplyAndAdd(A[x], B[y], d); \
00608 c = p.GetLowHalf(); \
00609 p = (DWord) e + p.GetHighHalf(); \
00610 d = p.GetLowHalf(); \
00611 e = p.GetHighHalf();
00612
00613 #define SquAcc(x, y) \
00614 q = DWord::Multiply(A[x], A[y]); \
00615 p = q + c; \
00616 c = p.GetLowHalf(); \
00617 p = (DWord) d + p.GetHighHalf(); \
00618 d = p.GetLowHalf(); \
00619 e += p.GetHighHalf(); \
00620 p = q + c; \
00621 c = p.GetLowHalf(); \
00622 p = (DWord) d + p.GetHighHalf(); \
00623 d = p.GetLowHalf(); \
00624 e += p.GetHighHalf();
00625
00626 #define SaveSquAcc(s, x, y) \
00627 R[s] = c; \
00628 q = DWord::Multiply(A[x], A[y]); \
00629 p = q + d; \
00630 c = p.GetLowHalf(); \
00631 p = (DWord) e + p.GetHighHalf(); \
00632 d = p.GetLowHalf(); \
00633 e = p.GetHighHalf(); \
00634 p = q + c; \
00635 c = p.GetLowHalf(); \
00636 p = (DWord) d + p.GetHighHalf(); \
00637 d = p.GetLowHalf(); \
00638 e += p.GetHighHalf();
00639
00640 void Portable::Multiply4(word *R, const word *A, const word *B)
00641 {
00642 DWord p;
00643 word c, d, e;
00644
00645 p = DWord::Multiply(A[0], B[0]);
00646 R[0] = p.GetLowHalf();
00647 c = p.GetHighHalf();
00648 d = e = 0;
00649
00650 MulAcc(0, 1);
00651 MulAcc(1, 0);
00652
00653 SaveMulAcc(1, 2, 0);
00654 MulAcc(1, 1);
00655 MulAcc(0, 2);
00656
00657 SaveMulAcc(2, 0, 3);
00658 MulAcc(1, 2);
00659 MulAcc(2, 1);
00660 MulAcc(3, 0);
00661
00662 SaveMulAcc(3, 3, 1);
00663 MulAcc(2, 2);
00664 MulAcc(1, 3);
00665
00666 SaveMulAcc(4, 2, 3);
00667 MulAcc(3, 2);
00668
00669 R[5] = c;
00670 p = DWord::MultiplyAndAdd(A[3], B[3], d);
00671 R[6] = p.GetLowHalf();
00672 R[7] = e + p.GetHighHalf();
00673 }
00674
00675 void Portable::Square2(word *R, const word *A)
00676 {
00677 DWord p, q;
00678 word c, d, e;
00679
00680 p = DWord::Multiply(A[0], A[0]);
00681 R[0] = p.GetLowHalf();
00682 c = p.GetHighHalf();
00683 d = e = 0;
00684
00685 SquAcc(0, 1);
00686
00687 R[1] = c;
00688 p = DWord::MultiplyAndAdd(A[1], A[1], d);
00689 R[2] = p.GetLowHalf();
00690 R[3] = e + p.GetHighHalf();
00691 }
00692
00693 void Portable::Square4(word *R, const word *A)
00694 {
00695 #ifdef _MSC_VER
00696
00697
00698
00699
00700 Multiply4(R, A, A);
00701 #else
00702 const word *B = A;
00703 DWord p, q;
00704 word c, d, e;
00705
00706 p = DWord::Multiply(A[0], A[0]);
00707 R[0] = p.GetLowHalf();
00708 c = p.GetHighHalf();
00709 d = e = 0;
00710
00711 SquAcc(0, 1);
00712
00713 SaveSquAcc(1, 2, 0);
00714 MulAcc(1, 1);
00715
00716 SaveSquAcc(2, 0, 3);
00717 SquAcc(1, 2);
00718
00719 SaveSquAcc(3, 3, 1);
00720 MulAcc(2, 2);
00721
00722 SaveSquAcc(4, 2, 3);
00723
00724 R[5] = c;
00725 p = DWord::MultiplyAndAdd(A[3], A[3], d);
00726 R[6] = p.GetLowHalf();
00727 R[7] = e + p.GetHighHalf();
00728 #endif
00729 }
00730
00731 void Portable::Multiply8(word *R, const word *A, const word *B)
00732 {
00733 DWord p;
00734 word c, d, e;
00735
00736 p = DWord::Multiply(A[0], B[0]);
00737 R[0] = p.GetLowHalf();
00738 c = p.GetHighHalf();
00739 d = e = 0;
00740
00741 MulAcc(0, 1);
00742 MulAcc(1, 0);
00743
00744 SaveMulAcc(1, 2, 0);
00745 MulAcc(1, 1);
00746 MulAcc(0, 2);
00747
00748 SaveMulAcc(2, 0, 3);
00749 MulAcc(1, 2);
00750 MulAcc(2, 1);
00751 MulAcc(3, 0);
00752
00753 SaveMulAcc(3, 0, 4);
00754 MulAcc(1, 3);
00755 MulAcc(2, 2);
00756 MulAcc(3, 1);
00757 MulAcc(4, 0);
00758
00759 SaveMulAcc(4, 0, 5);
00760 MulAcc(1, 4);
00761 MulAcc(2, 3);
00762 MulAcc(3, 2);
00763 MulAcc(4, 1);
00764 MulAcc(5, 0);
00765
00766 SaveMulAcc(5, 0, 6);
00767 MulAcc(1, 5);
00768 MulAcc(2, 4);
00769 MulAcc(3, 3);
00770 MulAcc(4, 2);
00771 MulAcc(5, 1);
00772 MulAcc(6, 0);
00773
00774 SaveMulAcc(6, 0, 7);
00775 MulAcc(1, 6);
00776 MulAcc(2, 5);
00777 MulAcc(3, 4);
00778 MulAcc(4, 3);
00779 MulAcc(5, 2);
00780 MulAcc(6, 1);
00781 MulAcc(7, 0);
00782
00783 SaveMulAcc(7, 1, 7);
00784 MulAcc(2, 6);
00785 MulAcc(3, 5);
00786 MulAcc(4, 4);
00787 MulAcc(5, 3);
00788 MulAcc(6, 2);
00789 MulAcc(7, 1);
00790
00791 SaveMulAcc(8, 2, 7);
00792 MulAcc(3, 6);
00793 MulAcc(4, 5);
00794 MulAcc(5, 4);
00795 MulAcc(6, 3);
00796 MulAcc(7, 2);
00797
00798 SaveMulAcc(9, 3, 7);
00799 MulAcc(4, 6);
00800 MulAcc(5, 5);
00801 MulAcc(6, 4);
00802 MulAcc(7, 3);
00803
00804 SaveMulAcc(10, 4, 7);
00805 MulAcc(5, 6);
00806 MulAcc(6, 5);
00807 MulAcc(7, 4);
00808
00809 SaveMulAcc(11, 5, 7);
00810 MulAcc(6, 6);
00811 MulAcc(7, 5);
00812
00813 SaveMulAcc(12, 6, 7);
00814 MulAcc(7, 6);
00815
00816 R[13] = c;
00817 p = DWord::MultiplyAndAdd(A[7], B[7], d);
00818 R[14] = p.GetLowHalf();
00819 R[15] = e + p.GetHighHalf();
00820 }
00821
00822 void Portable::Multiply4Bottom(word *R, const word *A, const word *B)
00823 {
00824 DWord p;
00825 word c, d, e;
00826
00827 p = DWord::Multiply(A[0], B[0]);
00828 R[0] = p.GetLowHalf();
00829 c = p.GetHighHalf();
00830 d = e = 0;
00831
00832 MulAcc(0, 1);
00833 MulAcc(1, 0);
00834
00835 SaveMulAcc(1, 2, 0);
00836 MulAcc(1, 1);
00837 MulAcc(0, 2);
00838
00839 R[2] = c;
00840 R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
00841 }
00842
00843 void Portable::Multiply8Bottom(word *R, const word *A, const word *B)
00844 {
00845 DWord p;
00846 word c, d, e;
00847
00848 p = DWord::Multiply(A[0], B[0]);
00849 R[0] = p.GetLowHalf();
00850 c = p.GetHighHalf();
00851 d = e = 0;
00852
00853 MulAcc(0, 1);
00854 MulAcc(1, 0);
00855
00856 SaveMulAcc(1, 2, 0);
00857 MulAcc(1, 1);
00858 MulAcc(0, 2);
00859
00860 SaveMulAcc(2, 0, 3);
00861 MulAcc(1, 2);
00862 MulAcc(2, 1);
00863 MulAcc(3, 0);
00864
00865 SaveMulAcc(3, 0, 4);
00866 MulAcc(1, 3);
00867 MulAcc(2, 2);
00868 MulAcc(3, 1);
00869 MulAcc(4, 0);
00870
00871 SaveMulAcc(4, 0, 5);
00872 MulAcc(1, 4);
00873 MulAcc(2, 3);
00874 MulAcc(3, 2);
00875 MulAcc(4, 1);
00876 MulAcc(5, 0);
00877
00878 SaveMulAcc(5, 0, 6);
00879 MulAcc(1, 5);
00880 MulAcc(2, 4);
00881 MulAcc(3, 3);
00882 MulAcc(4, 2);
00883 MulAcc(5, 1);
00884 MulAcc(6, 0);
00885
00886 R[6] = c;
00887 R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
00888 A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
00889 }
00890
00891 #undef MulAcc
00892 #undef SaveMulAcc
00893 #undef SquAcc
00894 #undef SaveSquAcc
00895
00896 #ifdef CRYPTOPP_X86ASM_AVAILABLE
00897
00898
00899
00900 static bool s_sse2Enabled = true;
00901
00902 static void CpuId(word32 input, word32 *output)
00903 {
00904 #ifdef __GNUC__
00905 __asm__
00906 (
00907
00908 "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
00909 : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
00910 : "a" (input)
00911 );
00912 #else
00913 __asm
00914 {
00915 mov eax, input
00916 cpuid
00917 mov edi, output
00918 mov [edi], eax
00919 mov [edi+4], ebx
00920 mov [edi+8], ecx
00921 mov [edi+12], edx
00922 }
00923 #endif
00924 }
00925
00926 #ifdef SSE2_INTRINSICS_AVAILABLE
00927 #ifndef _MSC_VER
00928 static jmp_buf s_env;
00929 static void SigIllHandler(int)
00930 {
00931 longjmp(s_env, 1);
00932 }
00933 #endif
00934
00935 static bool HasSSE2()
00936 {
00937 if (!s_sse2Enabled)
00938 return false;
00939
00940 word32 cpuid[4];
00941 CpuId(1, cpuid);
00942 if ((cpuid[3] & (1 << 26)) == 0)
00943 return false;
00944
00945 #ifdef _MSC_VER
00946 __try
00947 {
00948 __asm xorpd xmm0, xmm0
00949 }
00950 __except (1)
00951 {
00952 return false;
00953 }
00954 return true;
00955 #else
00956 typedef void (*SigHandler)(int);
00957
00958 SigHandler oldHandler = signal(SIGILL, SigIllHandler);
00959 if (oldHandler == SIG_ERR)
00960 return false;
00961
00962 bool result = true;
00963 if (setjmp(s_env))
00964 result = false;
00965 else
00966 __asm __volatile ("xorps %xmm0, %xmm0");
00967
00968 signal(SIGILL, oldHandler);
00969 return result;
00970 #endif
00971 }
00972 #endif
00973
00974 static bool IsP4()
00975 {
00976 word32 cpuid[4];
00977
00978 CpuId(0, cpuid);
00979 std::swap(cpuid[2], cpuid[3]);
00980 if (memcmp(cpuid+1, "GenuineIntel", 12) != 0)
00981 return false;
00982
00983 CpuId(1, cpuid);
00984 return ((cpuid[0] >> 8) & 0xf) == 0xf;
00985 }
00986
00987
00988
00989 class PentiumOptimized : public Portable
00990 {
00991 public:
00992 static int Add(word *C, const word *A, const word *B, size_t N);
00993 static int Subtract(word *C, const word *A, const word *B, size_t N);
00994 static void Multiply4(word *C, const word *A, const word *B);
00995 static void Multiply8(word *C, const word *A, const word *B);
00996 static void Multiply8Bottom(word *C, const word *A, const word *B);
00997 };
00998
00999 class P4Optimized
01000 {
01001 public:
01002 static int Add(word *C, const word *A, const word *B, size_t N);
01003 static int Subtract(word *C, const word *A, const word *B, size_t N);
01004 #ifdef SSE2_INTRINSICS_AVAILABLE
01005 static void Multiply4(word *C, const word *A, const word *B);
01006 static void Multiply8(word *C, const word *A, const word *B);
01007 static void Multiply8Bottom(word *C, const word *A, const word *B);
01008 #endif
01009 };
01010
01011 typedef int (* PAddSub)(word *C, const word *A, const word *B, size_t N);
01012 typedef void (* PMul)(word *C, const word *A, const word *B);
01013
01014 static PAddSub s_pAdd, s_pSub;
01015 #ifdef SSE2_INTRINSICS_AVAILABLE
01016 static PMul s_pMul4, s_pMul8, s_pMul8B;
01017 #endif
01018
01019 static void SetPentiumFunctionPointers()
01020 {
01021 if (IsP4())
01022 {
01023 s_pAdd = &P4Optimized::Add;
01024 s_pSub = &P4Optimized::Subtract;
01025 }
01026 else
01027 {
01028 s_pAdd = &PentiumOptimized::Add;
01029 s_pSub = &PentiumOptimized::Subtract;
01030 }
01031
01032 #ifdef SSE2_INTRINSICS_AVAILABLE
01033 if (HasSSE2())
01034 {
01035 s_pMul4 = &P4Optimized::Multiply4;
01036 s_pMul8 = &P4Optimized::Multiply8;
01037 s_pMul8B = &P4Optimized::Multiply8Bottom;
01038 }
01039 else
01040 {
01041 s_pMul4 = &PentiumOptimized::Multiply4;
01042 s_pMul8 = &PentiumOptimized::Multiply8;
01043 s_pMul8B = &PentiumOptimized::Multiply8Bottom;
01044 }
01045 #endif
01046 }
01047
01048 void DisableSSE2()
01049 {
01050 s_sse2Enabled = false;
01051 SetPentiumFunctionPointers();
01052 }
01053
01054 class LowLevel : public PentiumOptimized
01055 {
01056 public:
01057 inline static int Add(word *C, const word *A, const word *B, size_t N)
01058 {return s_pAdd(C, A, B, N);}
01059 inline static int Subtract(word *C, const word *A, const word *B, size_t N)
01060 {return s_pSub(C, A, B, N);}
01061 inline static void Square4(word *R, const word *A)
01062 {Multiply4(R, A, A);}
01063 #ifdef SSE2_INTRINSICS_AVAILABLE
01064 inline static void Multiply4(word *C, const word *A, const word *B)
01065 {s_pMul4(C, A, B);}
01066 inline static void Multiply8(word *C, const word *A, const word *B)
01067 {s_pMul8(C, A, B);}
01068 inline static void Multiply8Bottom(word *C, const word *A, const word *B)
01069 {s_pMul8B(C, A, B);}
01070 #endif
01071 };
01072
01073
01074 #ifdef _MSC_VER
01075 #define CRYPTOPP_NAKED __declspec(naked)
01076 #define AS1(x) __asm x
01077 #define AS2(x, y) __asm x, y
01078 #define AddPrologue \
01079 __asm push ebp \
01080 __asm push ebx \
01081 __asm push esi \
01082 __asm push edi \
01083 __asm mov ecx, [esp+20] \
01084 __asm mov edx, [esp+24] \
01085 __asm mov ebx, [esp+28] \
01086 __asm mov esi, [esp+32]
01087 #define AddEpilogue \
01088 __asm pop edi \
01089 __asm pop esi \
01090 __asm pop ebx \
01091 __asm pop ebp \
01092 __asm ret
01093 #define MulPrologue \
01094 __asm push ebp \
01095 __asm push ebx \
01096 __asm push esi \
01097 __asm push edi \
01098 __asm mov ecx, [esp+28] \
01099 __asm mov esi, [esp+24] \
01100 __asm push [esp+20]
01101 #define MulEpilogue \
01102 __asm add esp, 4 \
01103 __asm pop edi \
01104 __asm pop esi \
01105 __asm pop ebx \
01106 __asm pop ebp \
01107 __asm ret
01108 #else
01109 #define CRYPTOPP_NAKED
01110 #define AS1(x) #x ";"
01111 #define AS2(x, y) #x ", " #y ";"
01112 #define AddPrologue \
01113 __asm__ __volatile__ \
01114 ( \
01115 "push %%ebx;" \
01116 "mov %2, %%ebx;" \
01117 ".intel_syntax noprefix;" \
01118 "push ebp;"
01119 #define AddEpilogue \
01120 "pop ebp;" \
01121 ".att_syntax prefix;" \
01122 "pop %%ebx;" \
01123 : \
01124 : "c" (C), "d" (A), "m" (B), "S" (N) \
01125 : "%edi", "memory", "cc" \
01126 );
01127 #define MulPrologue \
01128 __asm__ __volatile__ \
01129 ( \
01130 "push %%ebx;" \
01131 "push %%ebp;" \
01132 "push %0;" \
01133 ".intel_syntax noprefix;"
01134 #define MulEpilogue \
01135 "add esp, 4;" \
01136 "pop ebp;" \
01137 "pop ebx;" \
01138 ".att_syntax prefix;" \
01139 : \
01140 : "rm" (Z), "S" (X), "c" (Y) \
01141 : "%eax", "%edx", "%edi", "memory", "cc" \
01142 );
01143 #endif
01144
01145 CRYPTOPP_NAKED int PentiumOptimized::Add(word *C, const word *A, const word *B, size_t N)
01146 {
01147 AddPrologue
01148
01149
01150 AS2( sub ecx, edx)
01151 AS2( xor eax, eax)
01152
01153 AS2( sub eax, esi)
01154 AS2( lea ebx, [ebx+4*esi])
01155
01156 AS2( sar eax, 1)
01157 AS1( jz loopendAdd)
01158
01159 AS1(loopstartAdd:)
01160 AS2( mov esi,[edx])
01161 AS2( mov ebp,[edx+4])
01162
01163 AS2( mov edi,[ebx+8*eax])
01164 AS2( lea edx,[edx+8])
01165
01166 AS2( adc esi,edi)
01167 AS2( mov edi,[ebx+8*eax+4])
01168
01169 AS2( adc ebp,edi)
01170 AS1( inc eax)
01171
01172 AS2( mov [edx+ecx-8],esi)
01173 AS2( mov [edx+ecx-4],ebp)
01174
01175 AS1( jnz loopstartAdd)
01176
01177 AS1(loopendAdd:)
01178 AS2( adc eax, 0)
01179
01180 AddEpilogue
01181 }
01182
01183 CRYPTOPP_NAKED int PentiumOptimized::Subtract(word *C, const word *A, const word *B, size_t N)
01184 {
01185 AddPrologue
01186
01187
01188 AS2( sub ecx, edx)
01189 AS2( xor eax, eax)
01190
01191 AS2( sub eax, esi)
01192 AS2( lea ebx, [ebx+4*esi])
01193
01194 AS2( sar eax, 1)
01195 AS1( jz loopendSub)
01196
01197 AS1(loopstartSub:)
01198 AS2( mov esi,[edx])
01199 AS2( mov ebp,[edx+4])
01200
01201 AS2( mov edi,[ebx+8*eax])
01202 AS2( lea edx,[edx+8])
01203
01204 AS2( sbb esi,edi)
01205 AS2( mov edi,[ebx+8*eax+4])
01206
01207 AS2( sbb ebp,edi)
01208 AS1( inc eax)
01209
01210 AS2( mov [edx+ecx-8],esi)
01211 AS2( mov [edx+ecx-4],ebp)
01212
01213 AS1( jnz loopstartSub)
01214
01215 AS1(loopendSub:)
01216 AS2( adc eax, 0)
01217
01218 AddEpilogue
01219 }
01220
01221
01222
01223 CRYPTOPP_NAKED int P4Optimized::Add(word *C, const word *A, const word *B, size_t N)
01224 {
01225 AddPrologue
01226
01227
01228 AS2( xor eax, eax)
01229 AS1( neg esi)
01230 AS1( jz loopendAddP4)
01231
01232 AS2( mov edi, [edx])
01233 AS2( mov ebp, [ebx])
01234 AS1( jmp carry1AddP4)
01235
01236 AS1(loopstartAddP4:)
01237 AS2( mov edi, [edx+8])
01238 AS2( add ecx, 8)
01239 AS2( add edx, 8)
01240 AS2( mov ebp, [ebx])
01241 AS2( add edi, eax)
01242 AS1( jc carry1AddP4)
01243 AS2( xor eax, eax)
01244
01245 AS1(carry1AddP4:)
01246 AS2( add edi, ebp)
01247 AS2( mov ebp, 1)
01248 AS2( mov [ecx], edi)
01249 AS2( mov edi, [edx+4])
01250 AS2( cmovc eax, ebp)
01251 AS2( mov ebp, [ebx+4])
01252 AS2( add ebx, 8)
01253 AS2( add edi, eax)
01254 AS1( jc carry2AddP4)
01255 AS2( xor eax, eax)
01256
01257 AS1(carry2AddP4:)
01258 AS2( add edi, ebp)
01259 AS2( mov ebp, 1)
01260 AS2( cmovc eax, ebp)
01261 AS2( mov [ecx+4], edi)
01262 AS2( add esi, 2)
01263 AS1( jnz loopstartAddP4)
01264
01265 AS1(loopendAddP4:)
01266
01267 AddEpilogue
01268 }
01269
01270 CRYPTOPP_NAKED int P4Optimized::Subtract(word *C, const word *A, const word *B, size_t N)
01271 {
01272 AddPrologue
01273
01274
01275 AS2( xor eax, eax)
01276 AS1( neg esi)
01277 AS1( jz loopendSubP4)
01278
01279 AS2( mov edi, [edx])
01280 AS2( mov ebp, [ebx])
01281 AS1( jmp carry1SubP4)
01282
01283 AS1(loopstartSubP4:)
01284 AS2( mov edi, [edx+8])
01285 AS2( add edx, 8)
01286 AS2( add ecx, 8)
01287 AS2( mov ebp, [ebx])
01288 AS2( sub edi, eax)
01289 AS1( jc carry1SubP4)
01290 AS2( xor eax, eax)
01291
01292 AS1(carry1SubP4:)
01293 AS2( sub edi, ebp)
01294 AS2( mov ebp, 1)
01295 AS2( mov [ecx], edi)
01296 AS2( mov edi, [edx+4])
01297 AS2( cmovc eax, ebp)
01298 AS2( mov ebp, [ebx+4])
01299 AS2( add ebx, 8)
01300 AS2( sub edi, eax)
01301 AS1( jc carry2SubP4)
01302 AS2( xor eax, eax)
01303
01304 AS1(carry2SubP4:)
01305 AS2( sub edi, ebp)
01306 AS2( mov ebp, 1)
01307 AS2( cmovc eax, ebp)
01308 AS2( mov [ecx+4], edi)
01309 AS2( add esi, 2)
01310 AS1( jnz loopstartSubP4)
01311
01312 AS1(loopendSubP4:)
01313
01314 AddEpilogue
01315 }
01316
01317
01318
01319 #define MulStartup \
01320 AS2(xor ebp, ebp) \
01321 AS2(xor edi, edi) \
01322 AS2(xor ebx, ebx)
01323
01324 #define MulShiftCarry \
01325 AS2(mov ebp, edx) \
01326 AS2(mov edi, ebx) \
01327 AS2(xor ebx, ebx)
01328
01329 #define MulAccumulateBottom(i,j) \
01330 AS2(mov eax, [ecx+4*j]) \
01331 AS2(imul eax, dword ptr [esi+4*i]) \
01332 AS2(add ebp, eax)
01333
01334 #define MulAccumulate(i,j) \
01335 AS2(mov eax, [ecx+4*j]) \
01336 AS1(mul dword ptr [esi+4*i]) \
01337 AS2(add ebp, eax) \
01338 AS2(adc edi, edx) \
01339 AS2(adc bl, bh)
01340
01341 #define MulStoreDigit(i) \
01342 AS2(mov edx, edi) \
01343 AS2(mov edi, [esp]) \
01344 AS2(mov [edi+4*i], ebp)
01345
01346 #define MulLastDiagonal(digits) \
01347 AS2(mov eax, [ecx+4*(digits-1)]) \
01348 AS1(mul dword ptr [esi+4*(digits-1)]) \
01349 AS2(add ebp, eax) \
01350 AS2(adc edx, edi) \
01351 AS2(mov edi, [esp]) \
01352 AS2(mov [edi+4*(2*digits-2)], ebp) \
01353 AS2(mov [edi+4*(2*digits-1)], edx)
01354
01355 CRYPTOPP_NAKED void PentiumOptimized::Multiply4(word* Z, const word* X, const word* Y)
01356 {
01357 MulPrologue
01358
01359 MulStartup
01360 MulAccumulate(0,0)
01361 MulStoreDigit(0)
01362 MulShiftCarry
01363
01364 MulAccumulate(1,0)
01365 MulAccumulate(0,1)
01366 MulStoreDigit(1)
01367 MulShiftCarry
01368
01369 MulAccumulate(2,0)
01370 MulAccumulate(1,1)
01371 MulAccumulate(0,2)
01372 MulStoreDigit(2)
01373 MulShiftCarry
01374
01375 MulAccumulate(3,0)
01376 MulAccumulate(2,1)
01377 MulAccumulate(1,2)
01378 MulAccumulate(0,3)
01379 MulStoreDigit(3)
01380 MulShiftCarry
01381
01382 MulAccumulate(3,1)
01383 MulAccumulate(2,2)
01384 MulAccumulate(1,3)
01385 MulStoreDigit(4)
01386 MulShiftCarry
01387
01388 MulAccumulate(3,2)
01389 MulAccumulate(2,3)
01390 MulStoreDigit(5)
01391 MulShiftCarry
01392
01393 MulLastDiagonal(4)
01394 MulEpilogue
01395 }
01396
01397 CRYPTOPP_NAKED void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y)
01398 {
01399 MulPrologue
01400
01401 MulStartup
01402 MulAccumulate(0,0)
01403 MulStoreDigit(0)
01404 MulShiftCarry
01405
01406 MulAccumulate(1,0)
01407 MulAccumulate(0,1)
01408 MulStoreDigit(1)
01409 MulShiftCarry
01410
01411 MulAccumulate(2,0)
01412 MulAccumulate(1,1)
01413 MulAccumulate(0,2)
01414 MulStoreDigit(2)
01415 MulShiftCarry
01416
01417 MulAccumulate(3,0)
01418 MulAccumulate(2,1)
01419 MulAccumulate(1,2)
01420 MulAccumulate(0,3)
01421 MulStoreDigit(3)
01422 MulShiftCarry
01423
01424 MulAccumulate(4,0)
01425 MulAccumulate(3,1)
01426 MulAccumulate(2,2)
01427 MulAccumulate(1,3)
01428 MulAccumulate(0,4)
01429 MulStoreDigit(4)
01430 MulShiftCarry
01431
01432 MulAccumulate(5,0)
01433 MulAccumulate(4,1)
01434 MulAccumulate(3,2)
01435 MulAccumulate(2,3)
01436 MulAccumulate(1,4)
01437 MulAccumulate(0,5)
01438 MulStoreDigit(5)
01439 MulShiftCarry
01440
01441 MulAccumulate(6,0)
01442 MulAccumulate(5,1)
01443 MulAccumulate(4,2)
01444 MulAccumulate(3,3)
01445 MulAccumulate(2,4)
01446 MulAccumulate(1,5)
01447 MulAccumulate(0,6)
01448 MulStoreDigit(6)
01449 MulShiftCarry
01450
01451 MulAccumulate(7,0)
01452 MulAccumulate(6,1)
01453 MulAccumulate(5,2)
01454 MulAccumulate(4,3)
01455 MulAccumulate(3,4)
01456 MulAccumulate(2,5)
01457 MulAccumulate(1,6)
01458 MulAccumulate(0,7)
01459 MulStoreDigit(7)
01460 MulShiftCarry
01461
01462 MulAccumulate(7,1)
01463 MulAccumulate(6,2)
01464 MulAccumulate(5,3)
01465 MulAccumulate(4,4)
01466 MulAccumulate(3,5)
01467 MulAccumulate(2,6)
01468 MulAccumulate(1,7)
01469 MulStoreDigit(8)
01470 MulShiftCarry
01471
01472 MulAccumulate(7,2)
01473 MulAccumulate(6,3)
01474 MulAccumulate(5,4)
01475 MulAccumulate(4,5)
01476 MulAccumulate(3,6)
01477 MulAccumulate(2,7)
01478 MulStoreDigit(9)
01479 MulShiftCarry
01480
01481 MulAccumulate(7,3)
01482 MulAccumulate(6,4)
01483 MulAccumulate(5,5)
01484 MulAccumulate(4,6)
01485 MulAccumulate(3,7)
01486 MulStoreDigit(10)
01487 MulShiftCarry
01488
01489 MulAccumulate(7,4)
01490 MulAccumulate(6,5)
01491 MulAccumulate(5,6)
01492 MulAccumulate(4,7)
01493 MulStoreDigit(11)
01494 MulShiftCarry
01495
01496 MulAccumulate(7,5)
01497 MulAccumulate(6,6)
01498 MulAccumulate(5,7)
01499 MulStoreDigit(12)
01500 MulShiftCarry
01501
01502 MulAccumulate(7,6)
01503 MulAccumulate(6,7)
01504 MulStoreDigit(13)
01505 MulShiftCarry
01506
01507 MulLastDiagonal(8)
01508 MulEpilogue
01509 }
01510
01511 CRYPTOPP_NAKED void PentiumOptimized::Multiply8Bottom(word* Z, const word* X, const word* Y)
01512 {
01513 MulPrologue
01514
01515 MulStartup
01516 MulAccumulate(0,0)
01517 MulStoreDigit(0)
01518 MulShiftCarry
01519
01520 MulAccumulate(1,0)
01521 MulAccumulate(0,1)
01522 MulStoreDigit(1)
01523 MulShiftCarry
01524
01525 MulAccumulate(2,0)
01526 MulAccumulate(1,1)
01527 MulAccumulate(0,2)
01528 MulStoreDigit(2)
01529 MulShiftCarry
01530
01531 MulAccumulate(3,0)
01532 MulAccumulate(2,1)
01533 MulAccumulate(1,2)
01534 MulAccumulate(0,3)
01535 MulStoreDigit(3)
01536 MulShiftCarry
01537
01538 MulAccumulate(4,0)
01539 MulAccumulate(3,1)
01540 MulAccumulate(2,2)
01541 MulAccumulate(1,3)
01542 MulAccumulate(0,4)
01543 MulStoreDigit(4)
01544 MulShiftCarry
01545
01546 MulAccumulate(5,0)
01547 MulAccumulate(4,1)
01548 MulAccumulate(3,2)
01549 MulAccumulate(2,3)
01550 MulAccumulate(1,4)
01551 MulAccumulate(0,5)
01552 MulStoreDigit(5)
01553 MulShiftCarry
01554
01555 MulAccumulate(6,0)
01556 MulAccumulate(5,1)
01557 MulAccumulate(4,2)
01558 MulAccumulate(3,3)
01559 MulAccumulate(2,4)
01560 MulAccumulate(1,5)
01561 MulAccumulate(0,6)
01562 MulStoreDigit(6)
01563 MulShiftCarry
01564
01565 MulAccumulateBottom(7,0)
01566 MulAccumulateBottom(6,1)
01567 MulAccumulateBottom(5,2)
01568 MulAccumulateBottom(4,3)
01569 MulAccumulateBottom(3,4)
01570 MulAccumulateBottom(2,5)
01571 MulAccumulateBottom(1,6)
01572 MulAccumulateBottom(0,7)
01573 MulStoreDigit(7)
01574 MulEpilogue
01575 }
01576
01577 #undef AS1
01578 #undef AS2
01579
01580 #else // not x86 - no processor specific code at this layer
01581
01582 typedef Portable LowLevel;
01583
01584 #endif
01585
01586 #ifdef SSE2_INTRINSICS_AVAILABLE
01587
01588 #ifdef __GNUC__
01589 #define CRYPTOPP_FASTCALL
01590 #else
01591 #define CRYPTOPP_FASTCALL __fastcall
01592 #endif
01593
01594 static void CRYPTOPP_FASTCALL P4_Mul(__m128i *C, const __m128i *A, const __m128i *B)
01595 {
01596 __m128i a3210 = _mm_load_si128(A);
01597 __m128i b3210 = _mm_load_si128(B);
01598
01599 __m128i sum;
01600
01601 __m128i z = _mm_setzero_si128();
01602 __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210);
01603 C[0] = a2b2_a0b0;
01604
01605 __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0));
01606 __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1));
01607 __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021);
01608 __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z);
01609 __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z);
01610 C[1] = _mm_add_epi64(a1b0, a0b1);
01611
01612 __m128i a31 = _mm_srli_epi64(a3210, 32);
01613 __m128i b31 = _mm_srli_epi64(b3210, 32);
01614 __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31);
01615 C[6] = a3b3_a1b1;
01616
01617 __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z);
01618 __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2));
01619 __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012);
01620 __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z);
01621 __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z);
01622 sum = _mm_add_epi64(a1b1, a0b2);
01623 C[2] = _mm_add_epi64(sum, a2b0);
01624
01625 __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1));
01626 __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3));
01627 __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012);
01628 __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103);
01629 __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z);
01630 __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z);
01631 __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z);
01632 __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z);
01633 __m128i sum1 = _mm_add_epi64(a3b0, a1b2);
01634 sum = _mm_add_epi64(a2b1, a0b3);
01635 C[3] = _mm_add_epi64(sum, sum1);
01636
01637 __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103);
01638 __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z);
01639 __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z);
01640 __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z);
01641 sum = _mm_add_epi64(a2b2, a3b1);
01642 C[4] = _mm_add_epi64(sum, a1b3);
01643
01644 __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2));
01645 __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3));
01646 __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203);
01647 __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z);
01648 __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z);
01649 C[5] = _mm_add_epi64(a3b2, a2b3);
01650 }
01651
01652 void P4Optimized::Multiply4(word *C, const word *A, const word *B)
01653 {
01654 __m128i temp[7];
01655 const word *w = (word *)temp;
01656 const __m64 *mw = (__m64 *)w;
01657
01658 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01659
01660 C[0] = w[0];
01661
01662 __m64 s1, s2;
01663
01664 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01665 __m64 w4 = mw[2];
01666 __m64 w6 = mw[3];
01667 __m64 w8 = mw[4];
01668 __m64 w10 = mw[5];
01669 __m64 w12 = mw[6];
01670 __m64 w14 = mw[7];
01671 __m64 w16 = mw[8];
01672 __m64 w18 = mw[9];
01673 __m64 w20 = mw[10];
01674 __m64 w22 = mw[11];
01675 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01676
01677 s1 = _mm_add_si64(w1, w4);
01678 C[1] = _mm_cvtsi64_si32(s1);
01679 s1 = _mm_srli_si64(s1, 32);
01680
01681 s2 = _mm_add_si64(w6, w8);
01682 s1 = _mm_add_si64(s1, s2);
01683 C[2] = _mm_cvtsi64_si32(s1);
01684 s1 = _mm_srli_si64(s1, 32);
01685
01686 s2 = _mm_add_si64(w10, w12);
01687 s1 = _mm_add_si64(s1, s2);
01688 C[3] = _mm_cvtsi64_si32(s1);
01689 s1 = _mm_srli_si64(s1, 32);
01690
01691 s2 = _mm_add_si64(w14, w16);
01692 s1 = _mm_add_si64(s1, s2);
01693 C[4] = _mm_cvtsi64_si32(s1);
01694 s1 = _mm_srli_si64(s1, 32);
01695
01696 s2 = _mm_add_si64(w18, w20);
01697 s1 = _mm_add_si64(s1, s2);
01698 C[5] = _mm_cvtsi64_si32(s1);
01699 s1 = _mm_srli_si64(s1, 32);
01700
01701 s2 = _mm_add_si64(w22, w26);
01702 s1 = _mm_add_si64(s1, s2);
01703 C[6] = _mm_cvtsi64_si32(s1);
01704 s1 = _mm_srli_si64(s1, 32);
01705
01706 C[7] = _mm_cvtsi64_si32(s1) + w[27];
01707 _mm_empty();
01708 }
01709
01710 void P4Optimized::Multiply8(word *C, const word *A, const word *B)
01711 {
01712 __m128i temp[28];
01713 const word *w = (word *)temp;
01714 const __m64 *mw = (__m64 *)w;
01715 const word *x = (word *)temp+7*4;
01716 const __m64 *mx = (__m64 *)x;
01717 const word *y = (word *)temp+7*4*2;
01718 const __m64 *my = (__m64 *)y;
01719 const word *z = (word *)temp+7*4*3;
01720 const __m64 *mz = (__m64 *)z;
01721
01722 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01723
01724 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01725
01726 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01727
01728 P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1);
01729
01730 C[0] = w[0];
01731
01732 __m64 s1, s2, s3, s4;
01733
01734 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01735 __m64 w4 = mw[2];
01736 __m64 w6 = mw[3];
01737 __m64 w8 = mw[4];
01738 __m64 w10 = mw[5];
01739 __m64 w12 = mw[6];
01740 __m64 w14 = mw[7];
01741 __m64 w16 = mw[8];
01742 __m64 w18 = mw[9];
01743 __m64 w20 = mw[10];
01744 __m64 w22 = mw[11];
01745 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01746 __m64 w27 = _mm_cvtsi32_si64(w[27]);
01747
01748 __m64 x0 = _mm_cvtsi32_si64(x[0]);
01749 __m64 x1 = _mm_cvtsi32_si64(x[1]);
01750 __m64 x4 = mx[2];
01751 __m64 x6 = mx[3];
01752 __m64 x8 = mx[4];
01753 __m64 x10 = mx[5];
01754 __m64 x12 = mx[6];
01755 __m64 x14 = mx[7];
01756 __m64 x16 = mx[8];
01757 __m64 x18 = mx[9];
01758 __m64 x20 = mx[10];
01759 __m64 x22 = mx[11];
01760 __m64 x26 = _mm_cvtsi32_si64(x[26]);
01761 __m64 x27 = _mm_cvtsi32_si64(x[27]);
01762
01763 __m64 y0 = _mm_cvtsi32_si64(y[0]);
01764 __m64 y1 = _mm_cvtsi32_si64(y[1]);
01765 __m64 y4 = my[2];
01766 __m64 y6 = my[3];
01767 __m64 y8 = my[4];
01768 __m64 y10 = my[5];
01769 __m64 y12 = my[6];
01770 __m64 y14 = my[7];
01771 __m64 y16 = my[8];
01772 __m64 y18 = my[9];
01773 __m64 y20 = my[10];
01774 __m64 y22 = my[11];
01775 __m64 y26 = _mm_cvtsi32_si64(y[26]);
01776 __m64 y27 = _mm_cvtsi32_si64(y[27]);
01777
01778 __m64 z0 = _mm_cvtsi32_si64(z[0]);
01779 __m64 z1 = _mm_cvtsi32_si64(z[1]);
01780 __m64 z4 = mz[2];
01781 __m64 z6 = mz[3];
01782 __m64 z8 = mz[4];
01783 __m64 z10 = mz[5];
01784 __m64 z12 = mz[6];
01785 __m64 z14 = mz[7];
01786 __m64 z16 = mz[8];
01787 __m64 z18 = mz[9];
01788 __m64 z20 = mz[10];
01789 __m64 z22 = mz[11];
01790 __m64 z26 = _mm_cvtsi32_si64(z[26]);
01791
01792 s1 = _mm_add_si64(w1, w4);
01793 C[1] = _mm_cvtsi64_si32(s1);
01794 s1 = _mm_srli_si64(s1, 32);
01795
01796 s2 = _mm_add_si64(w6, w8);
01797 s1 = _mm_add_si64(s1, s2);
01798 C[2] = _mm_cvtsi64_si32(s1);
01799 s1 = _mm_srli_si64(s1, 32);
01800
01801 s2 = _mm_add_si64(w10, w12);
01802 s1 = _mm_add_si64(s1, s2);
01803 C[3] = _mm_cvtsi64_si32(s1);
01804 s1 = _mm_srli_si64(s1, 32);
01805
01806 s3 = _mm_add_si64(x0, y0);
01807 s2 = _mm_add_si64(w14, w16);
01808 s1 = _mm_add_si64(s1, s3);
01809 s1 = _mm_add_si64(s1, s2);
01810 C[4] = _mm_cvtsi64_si32(s1);
01811 s1 = _mm_srli_si64(s1, 32);
01812
01813 s3 = _mm_add_si64(x1, y1);
01814 s4 = _mm_add_si64(x4, y4);
01815 s1 = _mm_add_si64(s1, w18);
01816 s3 = _mm_add_si64(s3, s4);
01817 s1 = _mm_add_si64(s1, w20);
01818 s1 = _mm_add_si64(s1, s3);
01819 C[5] = _mm_cvtsi64_si32(s1);
01820 s1 = _mm_srli_si64(s1, 32);
01821
01822 s3 = _mm_add_si64(x6, y6);
01823 s4 = _mm_add_si64(x8, y8);
01824 s1 = _mm_add_si64(s1, w22);
01825 s3 = _mm_add_si64(s3, s4);
01826 s1 = _mm_add_si64(s1, w26);
01827 s1 = _mm_add_si64(s1, s3);
01828 C[6] = _mm_cvtsi64_si32(s1);
01829 s1 = _mm_srli_si64(s1, 32);
01830
01831 s3 = _mm_add_si64(x10, y10);
01832 s4 = _mm_add_si64(x12, y12);
01833 s1 = _mm_add_si64(s1, w27);
01834 s3 = _mm_add_si64(s3, s4);
01835 s1 = _mm_add_si64(s1, s3);
01836 C[7] = _mm_cvtsi64_si32(s1);
01837 s1 = _mm_srli_si64(s1, 32);
01838
01839 s3 = _mm_add_si64(x14, y14);
01840 s4 = _mm_add_si64(x16, y16);
01841 s1 = _mm_add_si64(s1, z0);
01842 s3 = _mm_add_si64(s3, s4);
01843 s1 = _mm_add_si64(s1, s3);
01844 C[8] = _mm_cvtsi64_si32(s1);
01845 s1 = _mm_srli_si64(s1, 32);
01846
01847 s3 = _mm_add_si64(x18, y18);
01848 s4 = _mm_add_si64(x20, y20);
01849 s1 = _mm_add_si64(s1, z1);
01850 s3 = _mm_add_si64(s3, s4);
01851 s1 = _mm_add_si64(s1, z4);
01852 s1 = _mm_add_si64(s1, s3);
01853 C[9] = _mm_cvtsi64_si32(s1);
01854 s1 = _mm_srli_si64(s1, 32);
01855
01856 s3 = _mm_add_si64(x22, y22);
01857 s4 = _mm_add_si64(x26, y26);
01858 s1 = _mm_add_si64(s1, z6);
01859 s3 = _mm_add_si64(s3, s4);
01860 s1 = _mm_add_si64(s1, z8);
01861 s1 = _mm_add_si64(s1, s3);
01862 C[10] = _mm_cvtsi64_si32(s1);
01863 s1 = _mm_srli_si64(s1, 32);
01864
01865 s3 = _mm_add_si64(x27, y27);
01866 s1 = _mm_add_si64(s1, z10);
01867 s1 = _mm_add_si64(s1, z12);
01868 s1 = _mm_add_si64(s1, s3);
01869 C[11] = _mm_cvtsi64_si32(s1);
01870 s1 = _mm_srli_si64(s1, 32);
01871
01872 s3 = _mm_add_si64(z14, z16);
01873 s1 = _mm_add_si64(s1, s3);
01874 C[12] = _mm_cvtsi64_si32(s1);
01875 s1 = _mm_srli_si64(s1, 32);
01876
01877 s3 = _mm_add_si64(z18, z20);
01878 s1 = _mm_add_si64(s1, s3);
01879 C[13] = _mm_cvtsi64_si32(s1);
01880 s1 = _mm_srli_si64(s1, 32);
01881
01882 s3 = _mm_add_si64(z22, z26);
01883 s1 = _mm_add_si64(s1, s3);
01884 C[14] = _mm_cvtsi64_si32(s1);
01885 s1 = _mm_srli_si64(s1, 32);
01886
01887 C[15] = z[27] + _mm_cvtsi64_si32(s1);
01888 _mm_empty();
01889 }
01890
01891 void P4Optimized::Multiply8Bottom(word *C, const word *A, const word *B)
01892 {
01893 __m128i temp[21];
01894 const word *w = (word *)temp;
01895 const __m64 *mw = (__m64 *)w;
01896 const word *x = (word *)temp+7*4;
01897 const __m64 *mx = (__m64 *)x;
01898 const word *y = (word *)temp+7*4*2;
01899 const __m64 *my = (__m64 *)y;
01900
01901 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01902
01903 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01904
01905 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01906
01907 C[0] = w[0];
01908
01909 __m64 s1, s2, s3, s4;
01910
01911 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01912 __m64 w4 = mw[2];
01913 __m64 w6 = mw[3];
01914 __m64 w8 = mw[4];
01915 __m64 w10 = mw[5];
01916 __m64 w12 = mw[6];
01917 __m64 w14 = mw[7];
01918 __m64 w16 = mw[8];
01919 __m64 w18 = mw[9];
01920 __m64 w20 = mw[10];
01921 __m64 w22 = mw[11];
01922 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01923
01924 __m64 x0 = _mm_cvtsi32_si64(x[0]);
01925 __m64 x1 = _mm_cvtsi32_si64(x[1]);
01926 __m64 x4 = mx[2];
01927 __m64 x6 = mx[3];
01928 __m64 x8 = mx[4];
01929
01930 __m64 y0 = _mm_cvtsi32_si64(y[0]);
01931 __m64 y1 = _mm_cvtsi32_si64(y[1]);
01932 __m64 y4 = my[2];
01933 __m64 y6 = my[3];
01934 __m64 y8 = my[4];
01935
01936 s1 = _mm_add_si64(w1, w4);
01937 C[1] = _mm_cvtsi64_si32(s1);
01938 s1 = _mm_srli_si64(s1, 32);
01939
01940 s2 = _mm_add_si64(w6, w8);
01941 s1 = _mm_add_si64(s1, s2);
01942 C[2] = _mm_cvtsi64_si32(s1);
01943 s1 = _mm_srli_si64(s1, 32);
01944
01945 s2 = _mm_add_si64(w10, w12);
01946 s1 = _mm_add_si64(s1, s2);
01947 C[3] = _mm_cvtsi64_si32(s1);
01948 s1 = _mm_srli_si64(s1, 32);
01949
01950 s3 = _mm_add_si64(x0, y0);
01951 s2 = _mm_add_si64(w14, w16);
01952 s1 = _mm_add_si64(s1, s3);
01953 s1 = _mm_add_si64(s1, s2);
01954 C[4] = _mm_cvtsi64_si32(s1);
01955 s1 = _mm_srli_si64(s1, 32);
01956
01957 s3 = _mm_add_si64(x1, y1);
01958 s4 = _mm_add_si64(x4, y4);
01959 s1 = _mm_add_si64(s1, w18);
01960 s3 = _mm_add_si64(s3, s4);
01961 s1 = _mm_add_si64(s1, w20);
01962 s1 = _mm_add_si64(s1, s3);
01963 C[5] = _mm_cvtsi64_si32(s1);
01964 s1 = _mm_srli_si64(s1, 32);
01965
01966 s3 = _mm_add_si64(x6, y6);
01967 s4 = _mm_add_si64(x8, y8);
01968 s1 = _mm_add_si64(s1, w22);
01969 s3 = _mm_add_si64(s3, s4);
01970 s1 = _mm_add_si64(s1, w26);
01971 s1 = _mm_add_si64(s1, s3);
01972 C[6] = _mm_cvtsi64_si32(s1);
01973 s1 = _mm_srli_si64(s1, 32);
01974
01975 C[7] = _mm_cvtsi64_si32(s1) + w[27] + x[10] + y[10] + x[12] + y[12];
01976 _mm_empty();
01977 }
01978
01979 #endif // #ifdef SSE2_INTRINSICS_AVAILABLE
01980
01981
01982
01983 #define A0 A
01984 #define A1 (A+N2)
01985 #define B0 B
01986 #define B1 (B+N2)
01987
01988 #define T0 T
01989 #define T1 (T+N2)
01990 #define T2 (T+N)
01991 #define T3 (T+N+N2)
01992
01993 #define R0 R
01994 #define R1 (R+N2)
01995 #define R2 (R+N)
01996 #define R3 (R+N+N2)
01997
01998
01999
02000
02001
02002
02003 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02004 {
02005 assert(N>=2 && N%2==0);
02006
02007 if (LowLevel::MultiplyRecursionLimit() >= 8 && N==8)
02008 LowLevel::Multiply8(R, A, B);
02009 else if (LowLevel::MultiplyRecursionLimit() >= 4 && N==4)
02010 LowLevel::Multiply4(R, A, B);
02011 else if (N==2)
02012 LowLevel::Multiply2(R, A, B);
02013 else
02014 {
02015 const size_t N2 = N/2;
02016 int carry;
02017
02018 int aComp = Compare(A0, A1, N2);
02019 int bComp = Compare(B0, B1, N2);
02020
02021 switch (2*aComp + aComp + bComp)
02022 {
02023 case -4:
02024 LowLevel::Subtract(R0, A1, A0, N2);
02025 LowLevel::Subtract(R1, B0, B1, N2);
02026 RecursiveMultiply(T0, T2, R0, R1, N2);
02027 LowLevel::Subtract(T1, T1, R0, N2);
02028 carry = -1;
02029 break;
02030 case -2:
02031 LowLevel::Subtract(R0, A1, A0, N2);
02032 LowLevel::Subtract(R1, B0, B1, N2);
02033 RecursiveMultiply(T0, T2, R0, R1, N2);
02034 carry = 0;
02035 break;
02036 case 2:
02037 LowLevel::Subtract(R0, A0, A1, N2);
02038 LowLevel::Subtract(R1, B1, B0, N2);
02039 RecursiveMultiply(T0, T2, R0, R1, N2);
02040 carry = 0;
02041 break;
02042 case 4:
02043 LowLevel::Subtract(R0, A1, A0, N2);
02044 LowLevel::Subtract(R1, B0, B1, N2);
02045 RecursiveMultiply(T0, T2, R0, R1, N2);
02046 LowLevel::Subtract(T1, T1, R1, N2);
02047 carry = -1;
02048 break;
02049 default:
02050 SetWords(T0, 0, N);
02051 carry = 0;
02052 }
02053
02054 RecursiveMultiply(R0, T2, A0, B0, N2);
02055 RecursiveMultiply(R2, T2, A1, B1, N2);
02056
02057
02058
02059 carry += LowLevel::Add(T0, T0, R0, N);
02060 carry += LowLevel::Add(T0, T0, R2, N);
02061 carry += LowLevel::Add(R1, R1, T0, N);
02062
02063 assert (carry >= 0 && carry <= 2);
02064 Increment(R3, N2, carry);
02065 }
02066 }
02067
02068
02069
02070
02071
02072 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02073 {
02074 assert(N && N%2==0);
02075 if (LowLevel::SquareRecursionLimit() >= 8 && N==8)
02076 LowLevel::Square8(R, A);
02077 if (LowLevel::SquareRecursionLimit() >= 4 && N==4)
02078 LowLevel::Square4(R, A);
02079 else if (N==2)
02080 LowLevel::Square2(R, A);
02081 else
02082 {
02083 const size_t N2 = N/2;
02084
02085 RecursiveSquare(R0, T2, A0, N2);
02086 RecursiveSquare(R2, T2, A1, N2);
02087 RecursiveMultiply(T0, T2, A0, A1, N2);
02088
02089 int carry = LowLevel::Add(R1, R1, T0, N);
02090 carry += LowLevel::Add(R1, R1, T0, N);
02091 Increment(R3, N2, carry);
02092 }
02093 }
02094
02095
02096
02097
02098
02099
02100 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02101 {
02102 assert(N>=2 && N%2==0);
02103 if (LowLevel::MultiplyBottomRecursionLimit() >= 8 && N==8)
02104 LowLevel::Multiply8Bottom(R, A, B);
02105 else if (LowLevel::MultiplyBottomRecursionLimit() >= 4 && N==4)
02106 LowLevel::Multiply4Bottom(R, A, B);
02107 else if (N==2)
02108 LowLevel::Multiply2Bottom(R, A, B);
02109 else
02110 {
02111 const size_t N2 = N/2;
02112
02113 RecursiveMultiply(R, T, A0, B0, N2);
02114 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02115 LowLevel::Add(R1, R1, T0, N2);
02116 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02117 LowLevel::Add(R1, R1, T0, N2);
02118 }
02119 }
02120
02121
02122
02123
02124
02125
02126
02127 void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02128 {
02129 assert(N>=2 && N%2==0);
02130
02131 if (N==4)
02132 {
02133 LowLevel::Multiply4(T, A, B);
02134 memcpy(R, T+4, 4*WORD_SIZE);
02135 }
02136 else if (N==2)
02137 {
02138 LowLevel::Multiply2(T, A, B);
02139 memcpy(R, T+2, 2*WORD_SIZE);
02140 }
02141 else
02142 {
02143 const size_t N2 = N/2;
02144 int carry;
02145
02146 int aComp = Compare(A0, A1, N2);
02147 int bComp = Compare(B0, B1, N2);
02148
02149 switch (2*aComp + aComp + bComp)
02150 {
02151 case -4:
02152 LowLevel::Subtract(R0, A1, A0, N2);
02153 LowLevel::Subtract(R1, B0, B1, N2);
02154 RecursiveMultiply(T0, T2, R0, R1, N2);
02155 LowLevel::Subtract(T1, T1, R0, N2);
02156 carry = -1;
02157 break;
02158 case -2:
02159 LowLevel::Subtract(R0, A1, A0, N2);
02160 LowLevel::Subtract(R1, B0, B1, N2);
02161 RecursiveMultiply(T0, T2, R0, R1, N2);
02162 carry = 0;
02163 break;
02164 case 2:
02165 LowLevel::Subtract(R0, A0, A1, N2);
02166 LowLevel::Subtract(R1, B1, B0, N2);
02167 RecursiveMultiply(T0, T2, R0, R1, N2);
02168 carry = 0;
02169 break;
02170 case 4:
02171 LowLevel::Subtract(R0, A1, A0, N2);
02172 LowLevel::Subtract(R1, B0, B1, N2);
02173 RecursiveMultiply(T0, T2, R0, R1, N2);
02174 LowLevel::Subtract(T1, T1, R1, N2);
02175 carry = -1;
02176 break;
02177 default:
02178 SetWords(T0, 0, N);
02179 carry = 0;
02180 }
02181
02182 RecursiveMultiply(T2, R0, A1, B1, N2);
02183
02184
02185
02186 int c2 = LowLevel::Subtract(R0, L+N2, L, N2);
02187 c2 += LowLevel::Subtract(R0, R0, T0, N2);
02188 int t = (Compare(R0, T2, N2) == -1);
02189
02190 carry += t;
02191 carry += Increment(R0, N2, c2+t);
02192 carry += LowLevel::Add(R0, R0, T1, N2);
02193 carry += LowLevel::Add(R0, R0, T3, N2);
02194 assert (carry >= 0 && carry <= 2);
02195
02196 CopyWords(R1, T3, N2);
02197 Increment(R1, N2, carry);
02198 }
02199 }
02200
02201 inline int Add(word *C, const word *A, const word *B, size_t N)
02202 {
02203 return LowLevel::Add(C, A, B, N);
02204 }
02205
02206 inline int Subtract(word *C, const word *A, const word *B, size_t N)
02207 {
02208 return LowLevel::Subtract(C, A, B, N);
02209 }
02210
02211 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02212 {
02213 RecursiveMultiply(R, T, A, B, N);
02214 }
02215
02216 inline void Square(word *R, word *T, const word *A, size_t N)
02217 {
02218 RecursiveSquare(R, T, A, N);
02219 }
02220
02221 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02222 {
02223 RecursiveMultiplyBottom(R, T, A, B, N);
02224 }
02225
02226 inline void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02227 {
02228 RecursiveMultiplyTop(R, T, L, A, B, N);
02229 }
02230
02231 static word LinearMultiply(word *C, const word *A, word B, size_t N)
02232 {
02233 word carry=0;
02234 for(unsigned i=0; i<N; i++)
02235 {
02236 DWord p = DWord::MultiplyAndAdd(A[i], B, carry);
02237 C[i] = p.GetLowHalf();
02238 carry = p.GetHighHalf();
02239 }
02240 return carry;
02241 }
02242
02243
02244
02245
02246
02247
02248 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02249 {
02250 if (NA == NB)
02251 {
02252 if (A == B)
02253 Square(R, T, A, NA);
02254 else
02255 Multiply(R, T, A, B, NA);
02256
02257 return;
02258 }
02259
02260 if (NA > NB)
02261 {
02262 std::swap(A, B);
02263 std::swap(NA, NB);
02264 }
02265
02266 assert(NB % NA == 0);
02267 assert((NB/NA)%2 == 0);
02268
02269 if (NA==2 && !A[1])
02270 {
02271 switch (A[0])
02272 {
02273 case 0:
02274 SetWords(R, 0, NB+2);
02275 return;
02276 case 1:
02277 CopyWords(R, B, NB);
02278 R[NB] = R[NB+1] = 0;
02279 return;
02280 default:
02281 R[NB] = LinearMultiply(R, B, A[0], NB);
02282 R[NB+1] = 0;
02283 return;
02284 }
02285 }
02286
02287 Multiply(R, T, A, B, NA);
02288 CopyWords(T+2*NA, R+NA, NA);
02289
02290 size_t i;
02291
02292 for (i=2*NA; i<NB; i+=2*NA)
02293 Multiply(T+NA+i, T, A, B+i, NA);
02294 for (i=NA; i<NB; i+=2*NA)
02295 Multiply(R+i, T, A, B+i, NA);
02296
02297 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02298 Increment(R+NB, NA);
02299 }
02300
02301
02302
02303
02304
02305 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02306 {
02307 if (N==2)
02308 {
02309 T[0] = AtomicInverseModPower2(A[0]);
02310 T[1] = 0;
02311 LowLevel::Multiply2Bottom(T+2, T, A);
02312 TwosComplement(T+2, 2);
02313 Increment(T+2, 2, 2);
02314 LowLevel::Multiply2Bottom(R, T, T+2);
02315 }
02316 else
02317 {
02318 const size_t N2 = N/2;
02319 RecursiveInverseModPower2(R0, T0, A0, N2);
02320 T0[0] = 1;
02321 SetWords(T0+1, 0, N2-1);
02322 MultiplyTop(R1, T1, T0, R0, A0, N2);
02323 MultiplyBottom(T0, T1, R0, A1, N2);
02324 Add(T0, R1, T0, N2);
02325 TwosComplement(T0, N2);
02326 MultiplyBottom(R1, T1, R0, T0, N2);
02327 }
02328 }
02329
02330
02331
02332
02333
02334
02335
02336 void MontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, size_t N)
02337 {
02338 MultiplyBottom(R, T, X, U, N);
02339 MultiplyTop(T, T+N, X, R, M, N);
02340 word borrow = Subtract(T, X+N, T, N);
02341
02342 word carry = Add(T+N, T, M, N);
02343 assert(carry || !borrow);
02344 CopyWords(R, T + (borrow ? N : 0), N);
02345 }
02346
02347
02348
02349
02350
02351
02352
02353
02354 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02355 {
02356 assert(N%2==0 && N>=4);
02357
02358 #define M0 M
02359 #define M1 (M+N2)
02360 #define V0 V
02361 #define V1 (V+N2)
02362
02363 #define X0 X
02364 #define X1 (X+N2)
02365 #define X2 (X+N)
02366 #define X3 (X+N+N2)
02367
02368 const size_t N2 = N/2;
02369 Multiply(T0, T2, V0, X3, N2);
02370 int c2 = Add(T0, T0, X0, N);
02371 MultiplyBottom(T3, T2, T0, U, N2);
02372 MultiplyTop(T2, R, T0, T3, M0, N2);
02373 c2 -= Subtract(T2, T1, T2, N2);
02374 Multiply(T0, R, T3, M1, N2);
02375 c2 -= Subtract(T0, T2, T0, N2);
02376 int c3 = -(int)Subtract(T1, X2, T1, N2);
02377 Multiply(R0, T2, V1, X3, N2);
02378 c3 += Add(R, R, T, N);
02379
02380 if (c2>0)
02381 c3 += Increment(R1, N2);
02382 else if (c2<0)
02383 c3 -= Decrement(R1, N2, -c2);
02384
02385 assert(c3>=-1 && c3<=1);
02386 if (c3>0)
02387 Subtract(R, R, M, N);
02388 else if (c3<0)
02389 Add(R, R, M, N);
02390
02391 #undef M0
02392 #undef M1
02393 #undef V0
02394 #undef V1
02395
02396 #undef X0
02397 #undef X1
02398 #undef X2
02399 #undef X3
02400 }
02401
02402 #undef A0
02403 #undef A1
02404 #undef B0
02405 #undef B1
02406
02407 #undef T0
02408 #undef T1
02409 #undef T2
02410 #undef T3
02411
02412 #undef R0
02413 #undef R1
02414 #undef R2
02415 #undef R3
02416
02417
02418
02419
02420
02421
02422
02423
02424
02425
02426
02427
02428
02429
02430
02431
02432
02433
02434
02435
02436
02437
02438
02439
02440
02441
02442
02443
02444
02445
02446
02447
02448
02449
02450
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476
02477
02478
02479
02480
02481 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02482 {
02483 word T[4];
02484 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02485 Q[0] = q.GetLowHalf();
02486 Q[1] = q.GetHighHalf();
02487
02488 #ifndef NDEBUG
02489 if (B[0] || B[1])
02490 {
02491
02492 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02493 word P[4];
02494 Portable::Multiply2(P, Q, B);
02495 Add(P, P, T, 4);
02496 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02497 }
02498 #endif
02499 }
02500
02501
02502 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02503 {
02504 assert(N && N%2==0);
02505
02506 if (Q[1])
02507 {
02508 T[N] = T[N+1] = 0;
02509 unsigned i;
02510 for (i=0; i<N; i+=4)
02511 LowLevel::Multiply2(T+i, Q, B+i);
02512 for (i=2; i<N; i+=4)
02513 if (LowLevel::Multiply2Add(T+i, Q, B+i))
02514 T[i+5] += (++T[i+4]==0);
02515 }
02516 else
02517 {
02518 T[N] = LinearMultiply(T, B, Q[0], N);
02519 T[N+1] = 0;
02520 }
02521
02522 word borrow = Subtract(R, R, T, N+2);
02523 assert(!borrow && !R[N+1]);
02524
02525 while (R[N] || Compare(R, B, N) >= 0)
02526 {
02527 R[N] -= Subtract(R, R, B, N);
02528 Q[1] += (++Q[0]==0);
02529 assert(Q[0] || Q[1]);
02530 }
02531 }
02532
02533
02534
02535
02536
02537
02538
02539 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02540 {
02541 assert(NA && NB && NA%2==0 && NB%2==0);
02542 assert(B[NB-1] || B[NB-2]);
02543 assert(NB <= NA);
02544
02545
02546 word *const TA=T;
02547 word *const TB=T+NA+2;
02548 word *const TP=T+NA+2+NB;
02549
02550
02551 unsigned shiftWords = (B[NB-1]==0);
02552 TB[0] = TB[NB-1] = 0;
02553 CopyWords(TB+shiftWords, B, NB-shiftWords);
02554 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02555 assert(shiftBits < WORD_BITS);
02556 ShiftWordsLeftByBits(TB, NB, shiftBits);
02557
02558
02559 TA[0] = TA[NA] = TA[NA+1] = 0;
02560 CopyWords(TA+shiftWords, A, NA);
02561 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02562
02563 if (TA[NA+1]==0 && TA[NA] <= 1)
02564 {
02565 Q[NA-NB+1] = Q[NA-NB] = 0;
02566 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02567 {
02568 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02569 ++Q[NA-NB];
02570 }
02571 }
02572 else
02573 {
02574 NA+=2;
02575 assert(Compare(TA+NA-NB, TB, NB) < 0);
02576 }
02577
02578 word BT[2];
02579 BT[0] = TB[NB-2] + 1;
02580 BT[1] = TB[NB-1] + (BT[0]==0);
02581
02582
02583 for (size_t i=NA-2; i>=NB; i-=2)
02584 {
02585 AtomicDivide(Q+i-NB, TA+i-2, BT);
02586 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02587 }
02588
02589
02590 CopyWords(R, TA+shiftWords, NB);
02591 ShiftWordsRightByBits(R, NB, shiftBits);
02592 }
02593
02594 static inline size_t EvenWordCount(const word *X, size_t N)
02595 {
02596 while (N && X[N-2]==0 && X[N-1]==0)
02597 N-=2;
02598 return N;
02599 }
02600
02601
02602
02603
02604
02605
02606
02607 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02608 {
02609 assert(NA<=N && N && N%2==0);
02610
02611 word *b = T;
02612 word *c = T+N;
02613 word *f = T+2*N;
02614 word *g = T+3*N;
02615 size_t bcLen=2, fgLen=EvenWordCount(M, N);
02616 unsigned int k=0, s=0;
02617
02618 SetWords(T, 0, 3*N);
02619 b[0]=1;
02620 CopyWords(f, A, NA);
02621 CopyWords(g, M, N);
02622
02623 while (1)
02624 {
02625 word t=f[0];
02626 while (!t)
02627 {
02628 if (EvenWordCount(f, fgLen)==0)
02629 {
02630 SetWords(R, 0, N);
02631 return 0;
02632 }
02633
02634 ShiftWordsRightByWords(f, fgLen, 1);
02635 if (c[bcLen-1]) bcLen+=2;
02636 assert(bcLen <= N);
02637 ShiftWordsLeftByWords(c, bcLen, 1);
02638 k+=WORD_BITS;
02639 t=f[0];
02640 }
02641
02642 unsigned int i=0;
02643 while (t%2 == 0)
02644 {
02645 t>>=1;
02646 i++;
02647 }
02648 k+=i;
02649
02650 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02651 {
02652 if (s%2==0)
02653 CopyWords(R, b, N);
02654 else
02655 Subtract(R, M, b, N);
02656 return k;
02657 }
02658
02659 ShiftWordsRightByBits(f, fgLen, i);
02660 t=ShiftWordsLeftByBits(c, bcLen, i);
02661 if (t)
02662 {
02663 c[bcLen] = t;
02664 bcLen+=2;
02665 assert(bcLen <= N);
02666 }
02667
02668 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02669 fgLen-=2;
02670
02671 if (Compare(f, g, fgLen)==-1)
02672 {
02673 std::swap(f, g);
02674 std::swap(b, c);
02675 s++;
02676 }
02677
02678 Subtract(f, f, g, fgLen);
02679
02680 if (Add(b, b, c, bcLen))
02681 {
02682 b[bcLen] = 1;
02683 bcLen+=2;
02684 assert(bcLen <= N);
02685 }
02686 }
02687 }
02688
02689
02690
02691
02692
02693 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02694 {
02695 CopyWords(R, A, N);
02696
02697 while (k--)
02698 {
02699 if (R[0]%2==0)
02700 ShiftWordsRightByBits(R, N, 1);
02701 else
02702 {
02703 word carry = Add(R, R, M, N);
02704 ShiftWordsRightByBits(R, N, 1);
02705 R[N-1] += carry<<(WORD_BITS-1);
02706 }
02707 }
02708 }
02709
02710
02711
02712
02713
02714 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02715 {
02716 CopyWords(R, A, N);
02717
02718 while (k--)
02719 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02720 Subtract(R, R, M, N);
02721 }
02722
02723
02724
02725 InitializeInteger::InitializeInteger()
02726 {
02727 if (!g_pAssignIntToInteger)
02728 {
02729 #ifdef CRYPTOPP_X86ASM_AVAILABLE
02730 SetPentiumFunctionPointers();
02731 #endif
02732 g_pAssignIntToInteger = AssignIntToInteger;
02733 }
02734 }
02735
02736 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02737
02738 static inline size_t RoundupSize(size_t n)
02739 {
02740 if (n<=8)
02741 return RoundupSizeTable[n];
02742 else if (n<=16)
02743 return 16;
02744 else if (n<=32)
02745 return 32;
02746 else if (n<=64)
02747 return 64;
02748 else return size_t(1) << BitPrecision(n-1);
02749 }
02750
02751 Integer::Integer()
02752 : reg(2), sign(POSITIVE)
02753 {
02754 reg[0] = reg[1] = 0;
02755 }
02756
02757 Integer::Integer(const Integer& t)
02758 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02759 {
02760 CopyWords(reg, t.reg, reg.size());
02761 }
02762
02763 Integer::Integer(Sign s, lword value)
02764 : reg(2), sign(s)
02765 {
02766 reg[0] = word(value);
02767 reg[1] = word(SafeRightShift<WORD_BITS>(value));
02768 }
02769
02770 Integer::Integer(signed long value)
02771 : reg(2)
02772 {
02773 if (value >= 0)
02774 sign = POSITIVE;
02775 else
02776 {
02777 sign = NEGATIVE;
02778 value = -value;
02779 }
02780 reg[0] = word(value);
02781 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02782 }
02783
02784 Integer::Integer(Sign s, word high, word low)
02785 : reg(2), sign(s)
02786 {
02787 reg[0] = low;
02788 reg[1] = high;
02789 }
02790
02791 bool Integer::IsConvertableToLong() const
02792 {
02793 if (ByteCount() > sizeof(long))
02794 return false;
02795
02796 unsigned long value = (unsigned long)reg[0];
02797 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02798
02799 if (sign==POSITIVE)
02800 return (signed long)value >= 0;
02801 else
02802 return -(signed long)value < 0;
02803 }
02804
02805 signed long Integer::ConvertToLong() const
02806 {
02807 assert(IsConvertableToLong());
02808
02809 unsigned long value = (unsigned long)reg[0];
02810 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02811 return sign==POSITIVE ? value : -(signed long)value;
02812 }
02813
02814 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02815 {
02816 Decode(encodedInteger, byteCount, s);
02817 }
02818
02819 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02820 {
02821 Decode(encodedInteger, byteCount, s);
02822 }
02823
02824 Integer::Integer(BufferedTransformation &bt)
02825 {
02826 BERDecode(bt);
02827 }
02828
02829 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02830 {
02831 Randomize(rng, bitcount);
02832 }
02833
02834 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02835 {
02836 if (!Randomize(rng, min, max, rnType, equiv, mod))
02837 throw Integer::RandomNumberNotFound();
02838 }
02839
02840 Integer Integer::Power2(size_t e)
02841 {
02842 Integer r((word)0, BitsToWords(e+1));
02843 r.SetBit(e);
02844 return r;
02845 }
02846
02847 template <long i>
02848 struct NewInteger
02849 {
02850 Integer * operator()() const
02851 {
02852 return new Integer(i);
02853 }
02854 };
02855
02856 const Integer &Integer::Zero()
02857 {
02858 return Singleton<Integer>().Ref();
02859 }
02860
02861 const Integer &Integer::One()
02862 {
02863 return Singleton<Integer, NewInteger<1> >().Ref();
02864 }
02865
02866 const Integer &Integer::Two()
02867 {
02868 return Singleton<Integer, NewInteger<2> >().Ref();
02869 }
02870
02871 bool Integer::operator!() const
02872 {
02873 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02874 }
02875
02876 Integer& Integer::operator=(const Integer& t)
02877 {
02878 if (this != &t)
02879 {
02880 reg.New(RoundupSize(t.WordCount()));
02881 CopyWords(reg, t.reg, reg.size());
02882 sign = t.sign;
02883 }
02884 return *this;
02885 }
02886
02887 bool Integer::GetBit(size_t n) const
02888 {
02889 if (n/WORD_BITS >= reg.size())
02890 return 0;
02891 else
02892 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02893 }
02894
02895 void Integer::SetBit(size_t n, bool value)
02896 {
02897 if (value)
02898 {
02899 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02900 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02901 }
02902 else
02903 {
02904 if (n/WORD_BITS < reg.size())
02905 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02906 }
02907 }
02908
02909 byte Integer::GetByte(size_t n) const
02910 {
02911 if (n/WORD_SIZE >= reg.size())
02912 return 0;
02913 else
02914 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02915 }
02916
02917 void Integer::SetByte(size_t n, byte value)
02918 {
02919 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02920 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02921 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02922 }
02923
02924 lword Integer::GetBits(size_t i, size_t n) const
02925 {
02926 lword v = 0;
02927 assert(n <= sizeof(v)*8);
02928 for (unsigned int j=0; j<n; j++)
02929 v |= lword(GetBit(i+j)) << j;
02930 return v;
02931 }
02932
02933 Integer Integer::operator-() const
02934 {
02935 Integer result(*this);
02936 result.Negate();
02937 return result;
02938 }
02939
02940 Integer Integer::AbsoluteValue() const
02941 {
02942 Integer result(*this);
02943 result.sign = POSITIVE;
02944 return result;
02945 }
02946
02947 void Integer::swap(Integer &a)
02948 {
02949 reg.swap(a.reg);
02950 std::swap(sign, a.sign);
02951 }
02952
02953 Integer::Integer(word value, size_t length)
02954 : reg(RoundupSize(length)), sign(POSITIVE)
02955 {
02956 reg[0] = value;
02957 SetWords(reg+1, 0, reg.size()-1);
02958 }
02959
02960 template <class T>
02961 static Integer StringToInteger(const T *str)
02962 {
02963 int radix;
02964
02965
02966 unsigned int length;
02967 for (length = 0; str[length] != 0; length++) {}
02968
02969 Integer v;
02970
02971 if (length == 0)
02972 return v;
02973
02974 switch (str[length-1])
02975 {
02976 case 'h':
02977 case 'H':
02978 radix=16;
02979 break;
02980 case 'o':
02981 case 'O':
02982 radix=8;
02983 break;
02984 case 'b':
02985 case 'B':
02986 radix=2;
02987 break;
02988 default:
02989 radix=10;
02990 }
02991
02992 if (length > 2 && str[0] == '0' && str[1] == 'x')
02993 radix = 16;
02994
02995 for (unsigned i=0; i<length; i++)
02996 {
02997 int digit;
02998
02999 if (str[i] >= '0' && str[i] <= '9')
03000 digit = str[i] - '0';
03001 else if (str[i] >= 'A' && str[i] <= 'F')
03002 digit = str[i] - 'A' + 10;
03003 else if (str[i] >= 'a' && str[i] <= 'f')
03004 digit = str[i] - 'a' + 10;
03005 else
03006 digit = radix;
03007
03008 if (digit < radix)
03009 {
03010 v *= radix;
03011 v += digit;
03012 }
03013 }
03014
03015 if (str[0] == '-')
03016 v.Negate();
03017
03018 return v;
03019 }
03020
03021 Integer::Integer(const char *str)
03022 : reg(2), sign(POSITIVE)
03023 {
03024 *this = StringToInteger(str);
03025 }
03026
03027 Integer::Integer(const wchar_t *str)
03028 : reg(2), sign(POSITIVE)
03029 {
03030 *this = StringToInteger(str);
03031 }
03032
03033 unsigned int Integer::WordCount() const
03034 {
03035 return (unsigned int)CountWords(reg, reg.size());
03036 }
03037
03038 unsigned int Integer::ByteCount() const
03039 {
03040 unsigned wordCount = WordCount();
03041 if (wordCount)
03042 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03043 else
03044 return 0;
03045 }
03046
03047 unsigned int Integer::BitCount() const
03048 {
03049 unsigned wordCount = WordCount();
03050 if (wordCount)
03051 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03052 else
03053 return 0;
03054 }
03055
03056 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03057 {
03058 StringStore store(input, inputLen);
03059 Decode(store, inputLen, s);
03060 }
03061
03062 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03063 {
03064 assert(bt.MaxRetrievable() >= inputLen);
03065
03066 byte b;
03067 bt.Peek(b);
03068 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03069
03070 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03071 {
03072 bt.Skip(1);
03073 inputLen--;
03074 bt.Peek(b);
03075 }
03076
03077 reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03078
03079 for (size_t i=inputLen; i > 0; i--)
03080 {
03081 bt.Get(b);
03082 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03083 }
03084
03085 if (sign == NEGATIVE)
03086 {
03087 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03088 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03089 TwosComplement(reg, reg.size());
03090 }
03091 }
03092
03093 size_t Integer::MinEncodedSize(Signedness signedness) const
03094 {
03095 unsigned int outputLen = STDMAX(1U, ByteCount());
03096 if (signedness == UNSIGNED)
03097 return outputLen;
03098 if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03099 outputLen++;
03100 if (IsNegative() && *this < -Power2(outputLen*8-1))
03101 outputLen++;
03102 return outputLen;
03103 }
03104
03105 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03106 {
03107 ArraySink sink(output, outputLen);
03108 Encode(sink, outputLen, signedness);
03109 }
03110
03111 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03112 {
03113 if (signedness == UNSIGNED || NotNegative())
03114 {
03115 for (size_t i=outputLen; i > 0; i--)
03116 bt.Put(GetByte(i-1));
03117 }
03118 else
03119 {
03120
03121 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
03122 temp.Encode(bt, outputLen, UNSIGNED);
03123 }
03124 }
03125
03126 void Integer::DEREncode(BufferedTransformation &bt) const
03127 {
03128 DERGeneralEncoder enc(bt, INTEGER);
03129 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03130 enc.MessageEnd();
03131 }
03132
03133 void Integer::BERDecode(const byte *input, size_t len)
03134 {
03135 StringStore store(input, len);
03136 BERDecode(store);
03137 }
03138
03139 void Integer::BERDecode(BufferedTransformation &bt)
03140 {
03141 BERGeneralDecoder dec(bt, INTEGER);
03142 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03143 BERDecodeError();
03144 Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03145 dec.MessageEnd();
03146 }
03147
03148 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03149 {
03150 DERGeneralEncoder enc(bt, OCTET_STRING);
03151 Encode(enc, length);
03152 enc.MessageEnd();
03153 }
03154
03155 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03156 {
03157 BERGeneralDecoder dec(bt, OCTET_STRING);
03158 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03159 BERDecodeError();
03160 Decode(dec, length);
03161 dec.MessageEnd();
03162 }
03163
03164 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03165 {
03166 ArraySink sink(output, len);
03167 return OpenPGPEncode(sink);
03168 }
03169
03170 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03171 {
03172 word16 bitCount = BitCount();
03173 bt.PutWord16(bitCount);
03174 size_t byteCount = BitsToBytes(bitCount);
03175 Encode(bt, byteCount);
03176 return 2 + byteCount;
03177 }
03178
03179 void Integer::OpenPGPDecode(const byte *input, size_t len)
03180 {
03181 StringStore store(input, len);
03182 OpenPGPDecode(store);
03183 }
03184
03185 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03186 {
03187 word16 bitCount;
03188 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03189 throw OpenPGPDecodeErr();
03190 Decode(bt, BitsToBytes(bitCount));
03191 }
03192
03193 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03194 {
03195 const size_t nbytes = nbits/8 + 1;
03196 SecByteBlock buf(nbytes);
03197 rng.GenerateBlock(buf, nbytes);
03198 if (nbytes)
03199 buf[0] = (byte)Crop(buf[0], nbits % 8);
03200 Decode(buf, nbytes, UNSIGNED);
03201 }
03202
03203 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03204 {
03205 if (min > max)
03206 throw InvalidArgument("Integer: Min must be no greater than Max");
03207
03208 Integer range = max - min;
03209 const unsigned int nbits = range.BitCount();
03210
03211 do
03212 {
03213 Randomize(rng, nbits);
03214 }
03215 while (*this > range);
03216
03217 *this += min;
03218 }
03219
03220 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03221 {
03222 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03223 }
03224
03225 class KDF2_RNG : public RandomNumberGenerator
03226 {
03227 public:
03228 KDF2_RNG(const byte *seed, size_t seedSize)
03229 : m_counter(0), m_counterAndSeed(seedSize + 4)
03230 {
03231 memcpy(m_counterAndSeed + 4, seed, seedSize);
03232 }
03233
03234 byte GenerateByte()
03235 {
03236 byte b;
03237 GenerateBlock(&b, 1);
03238 return b;
03239 }
03240
03241 void GenerateBlock(byte *output, size_t size)
03242 {
03243 UnalignedPutWord(BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03244 ++m_counter;
03245 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03246 }
03247
03248 private:
03249 word32 m_counter;
03250 SecByteBlock m_counterAndSeed;
03251 };
03252
03253 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
03254 {
03255 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03256 Integer max;
03257 if (!params.GetValue("Max", max))
03258 {
03259 int bitLength;
03260 if (params.GetIntValue("BitLength", bitLength))
03261 max = Integer::Power2(bitLength);
03262 else
03263 throw InvalidArgument("Integer: missing Max argument");
03264 }
03265 if (min > max)
03266 throw InvalidArgument("Integer: Min must be no greater than Max");
03267
03268 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03269 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03270
03271 if (equiv.IsNegative() || equiv >= mod)
03272 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03273
03274 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03275
03276 member_ptr<KDF2_RNG> kdf2Rng;
03277 ConstByteArrayParameter seed;
03278 if (params.GetValue("Seed", seed))
03279 {
03280 ByteQueue bq;
03281 DERSequenceEncoder seq(bq);
03282 min.DEREncode(seq);
03283 max.DEREncode(seq);
03284 equiv.DEREncode(seq);
03285 mod.DEREncode(seq);
03286 DEREncodeUnsigned(seq, rnType);
03287 DEREncodeOctetString(seq, seed.begin(), seed.size());
03288 seq.MessageEnd();
03289
03290 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03291 bq.Get(finalSeed, finalSeed.size());
03292 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03293 }
03294 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03295
03296 switch (rnType)
03297 {
03298 case ANY:
03299 if (mod == One())
03300 Randomize(rng, min, max);
03301 else
03302 {
03303 Integer min1 = min + (equiv-min)%mod;
03304 if (max < min1)
03305 return false;
03306 Randomize(rng, Zero(), (max - min1) / mod);
03307 *this *= mod;
03308 *this += min1;
03309 }
03310 return true;
03311
03312 case PRIME:
03313 {
03314 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03315
03316 int i;
03317 i = 0;
03318 while (1)
03319 {
03320 if (++i==16)
03321 {
03322
03323 Integer first = min;
03324 if (FirstPrime(first, max, equiv, mod, pSelector))
03325 {
03326
03327 *this = first;
03328 if (!FirstPrime(first, max, equiv, mod, pSelector))
03329 return true;
03330 }
03331 else
03332 return false;
03333 }
03334
03335 Randomize(rng, min, max);
03336 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03337 return true;
03338 }
03339 }
03340
03341 default:
03342 throw InvalidArgument("Integer: invalid RandomNumberType argument");
03343 }
03344 }
03345
03346 std::istream& operator>>(std::istream& in, Integer &a)
03347 {
03348 char c;
03349 unsigned int length = 0;
03350 SecBlock<char> str(length + 16);
03351
03352 std::ws(in);
03353
03354 do
03355 {
03356 in.read(&c, 1);
03357 str[length++] = c;
03358 if (length >= str.size())
03359 str.Grow(length + 16);
03360 }
03361 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03362
03363 if (in.gcount())
03364 in.putback(c);
03365 str[length-1] = '\0';
03366 a = Integer(str);
03367
03368 return in;
03369 }
03370
03371 std::ostream& operator<<(std::ostream& out, const Integer &a)
03372 {
03373
03374 long f = out.flags() & std::ios::basefield;
03375 int base, block;
03376 char suffix;
03377 switch(f)
03378 {
03379 case std::ios::oct :
03380 base = 8;
03381 block = 8;
03382 suffix = 'o';
03383 break;
03384 case std::ios::hex :
03385 base = 16;
03386 block = 4;
03387 suffix = 'h';
03388 break;
03389 default :
03390 base = 10;
03391 block = 3;
03392 suffix = '.';
03393 }
03394
03395 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03396 Integer temp1=a, temp2;
03397 unsigned i=0;
03398 const char vec[]="0123456789ABCDEF";
03399
03400 if (a.IsNegative())
03401 {
03402 out << '-';
03403 temp1.Negate();
03404 }
03405
03406 if (!a)
03407 out << '0';
03408
03409 while (!!temp1)
03410 {
03411 word digit;
03412 Integer::Divide(digit, temp2, temp1, base);
03413 s[i++]=vec[digit];
03414 temp1=temp2;
03415 }
03416
03417 while (i--)
03418 {
03419 out << s[i];
03420
03421
03422 }
03423 return out << suffix;
03424 }
03425
03426 Integer& Integer::operator++()
03427 {
03428 if (NotNegative())
03429 {
03430 if (Increment(reg, reg.size()))
03431 {
03432 reg.CleanGrow(2*reg.size());
03433 reg[reg.size()/2]=1;
03434 }
03435 }
03436 else
03437 {
03438 word borrow = Decrement(reg, reg.size());
03439 assert(!borrow);
03440 if (WordCount()==0)
03441 *this = Zero();
03442 }
03443 return *this;
03444 }
03445
03446 Integer& Integer::operator--()
03447 {
03448 if (IsNegative())
03449 {
03450 if (Increment(reg, reg.size()))
03451 {
03452 reg.CleanGrow(2*reg.size());
03453 reg[reg.size()/2]=1;
03454 }
03455 }
03456 else
03457 {
03458 if (Decrement(reg, reg.size()))
03459 *this = -One();
03460 }
03461 return *this;
03462 }
03463
03464 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03465 {
03466 int carry;
03467 if (a.reg.size() == b.reg.size())
03468 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03469 else if (a.reg.size() > b.reg.size())
03470 {
03471 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03472 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03473 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03474 }
03475 else
03476 {
03477 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03478 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03479 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03480 }
03481
03482 if (carry)
03483 {
03484 sum.reg.CleanGrow(2*sum.reg.size());
03485 sum.reg[sum.reg.size()/2] = 1;
03486 }
03487 sum.sign = Integer::POSITIVE;
03488 }
03489
03490 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03491 {
03492 unsigned aSize = a.WordCount();
03493 aSize += aSize%2;
03494 unsigned bSize = b.WordCount();
03495 bSize += bSize%2;
03496
03497 if (aSize == bSize)
03498 {
03499 if (Compare(a.reg, b.reg, aSize) >= 0)
03500 {
03501 Subtract(diff.reg, a.reg, b.reg, aSize);
03502 diff.sign = Integer::POSITIVE;
03503 }
03504 else
03505 {
03506 Subtract(diff.reg, b.reg, a.reg, aSize);
03507 diff.sign = Integer::NEGATIVE;
03508 }
03509 }
03510 else if (aSize > bSize)
03511 {
03512 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03513 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03514 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03515 assert(!borrow);
03516 diff.sign = Integer::POSITIVE;
03517 }
03518 else
03519 {
03520 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03521 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03522 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03523 assert(!borrow);
03524 diff.sign = Integer::NEGATIVE;
03525 }
03526 }
03527
03528
03529 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03530 {
03531 return a < b ? b : a;
03532 }
03533
03534 Integer Integer::Plus(const Integer& b) const
03535 {
03536 Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03537 if (NotNegative())
03538 {
03539 if (b.NotNegative())
03540 PositiveAdd(sum, *this, b);
03541 else
03542 PositiveSubtract(sum, *this, b);
03543 }
03544 else
03545 {
03546 if (b.NotNegative())
03547 PositiveSubtract(sum, b, *this);
03548 else
03549 {
03550 PositiveAdd(sum, *this, b);
03551 sum.sign = Integer::NEGATIVE;
03552 }
03553 }
03554 return sum;
03555 }
03556
03557 Integer& Integer::operator+=(const Integer& t)
03558 {
03559 reg.CleanGrow(t.reg.size());
03560 if (NotNegative())
03561 {
03562 if (t.NotNegative())
03563 PositiveAdd(*this, *this, t);
03564 else
03565 PositiveSubtract(*this, *this, t);
03566 }
03567 else
03568 {
03569 if (t.NotNegative())
03570 PositiveSubtract(*this, t, *this);
03571 else
03572 {
03573 PositiveAdd(*this, *this, t);
03574 sign = Integer::NEGATIVE;
03575 }
03576 }
03577 return *this;
03578 }
03579
03580 Integer Integer::Minus(const Integer& b) const
03581 {
03582 Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03583 if (NotNegative())
03584 {
03585 if (b.NotNegative())
03586 PositiveSubtract(diff, *this, b);
03587 else
03588 PositiveAdd(diff, *this, b);
03589 }
03590 else
03591 {
03592 if (b.NotNegative())
03593 {
03594 PositiveAdd(diff, *this, b);
03595 diff.sign = Integer::NEGATIVE;
03596 }
03597 else
03598 PositiveSubtract(diff, b, *this);
03599 }
03600 return diff;
03601 }
03602
03603 Integer& Integer::operator-=(const Integer& t)
03604 {
03605 reg.CleanGrow(t.reg.size());
03606 if (NotNegative())
03607 {
03608 if (t.NotNegative())
03609 PositiveSubtract(*this, *this, t);
03610 else
03611 PositiveAdd(*this, *this, t);
03612 }
03613 else
03614 {
03615 if (t.NotNegative())
03616 {
03617 PositiveAdd(*this, *this, t);
03618 sign = Integer::NEGATIVE;
03619 }
03620 else
03621 PositiveSubtract(*this, t, *this);
03622 }
03623 return *this;
03624 }
03625
03626 Integer& Integer::operator<<=(size_t n)
03627 {
03628 const size_t wordCount = WordCount();
03629 const size_t shiftWords = n / WORD_BITS;
03630 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03631
03632 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03633 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03634 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03635 return *this;
03636 }
03637
03638 Integer& Integer::operator>>=(size_t n)
03639 {
03640 const size_t wordCount = WordCount();
03641 const size_t shiftWords = n / WORD_BITS;
03642 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03643
03644 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03645 if (wordCount > shiftWords)
03646 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03647 if (IsNegative() && WordCount()==0)
03648 *this = Zero();
03649 return *this;
03650 }
03651
03652 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03653 {
03654 size_t aSize = RoundupSize(a.WordCount());
03655 size_t bSize = RoundupSize(b.WordCount());
03656
03657 product.reg.CleanNew(RoundupSize(aSize+bSize));
03658 product.sign = Integer::POSITIVE;
03659
03660 SecAlignedWordBlock workspace(aSize + bSize);
03661 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03662 }
03663
03664 void Multiply(Integer &product, const Integer &a, const Integer &b)
03665 {
03666 PositiveMultiply(product, a, b);
03667
03668 if (a.NotNegative() != b.NotNegative())
03669 product.Negate();
03670 }
03671
03672 Integer Integer::Times(const Integer &b) const
03673 {
03674 Integer product;
03675 Multiply(product, *this, b);
03676 return product;
03677 }
03678
03679
03680
03681
03682
03683
03684
03685
03686
03687
03688
03689
03690
03691
03692
03693
03694
03695
03696
03697
03698
03699
03700
03701 void PositiveDivide(Integer &remainder, Integer "ient,
03702 const Integer &a, const Integer &b)
03703 {
03704 unsigned aSize = a.WordCount();
03705 unsigned bSize = b.WordCount();
03706
03707 if (!bSize)
03708 throw Integer::DivideByZero();
03709
03710 if (a.PositiveCompare(b) == -1)
03711 {
03712 remainder = a;
03713 remainder.sign = Integer::POSITIVE;
03714 quotient = Integer::Zero();
03715 return;
03716 }
03717
03718 aSize += aSize%2;
03719 bSize += bSize%2;
03720
03721 remainder.reg.CleanNew(RoundupSize(bSize));
03722 remainder.sign = Integer::POSITIVE;
03723 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03724 quotient.sign = Integer::POSITIVE;
03725
03726 SecAlignedWordBlock T(aSize+2*bSize+4);
03727 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03728 }
03729
03730 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
03731 {
03732 PositiveDivide(remainder, quotient, dividend, divisor);
03733
03734 if (dividend.IsNegative())
03735 {
03736 quotient.Negate();
03737 if (remainder.NotZero())
03738 {
03739 --quotient;
03740 remainder = divisor.AbsoluteValue() - remainder;
03741 }
03742 }
03743
03744 if (divisor.IsNegative())
03745 quotient.Negate();
03746 }
03747
03748 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03749 {
03750 q = a;
03751 q >>= n;
03752
03753 const size_t wordCount = BitsToWords(n);
03754 if (wordCount <= a.WordCount())
03755 {
03756 r.reg.resize(RoundupSize(wordCount));
03757 CopyWords(r.reg, a.reg, wordCount);
03758 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03759 if (n % WORD_BITS != 0)
03760 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
03761 }
03762 else
03763 {
03764 r.reg.resize(RoundupSize(a.WordCount()));
03765 CopyWords(r.reg, a.reg, r.reg.size());
03766 }
03767 r.sign = POSITIVE;
03768
03769 if (a.IsNegative() && r.NotZero())
03770 {
03771 --q;
03772 r = Power2(n) - r;
03773 }
03774 }
03775
03776 Integer Integer::DividedBy(const Integer &b) const
03777 {
03778 Integer remainder, quotient;
03779 Integer::Divide(remainder, quotient, *this, b);
03780 return quotient;
03781 }
03782
03783 Integer Integer::Modulo(const Integer &b) const
03784 {
03785 Integer remainder, quotient;
03786 Integer::Divide(remainder, quotient, *this, b);
03787 return remainder;
03788 }
03789
03790 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
03791 {
03792 if (!divisor)
03793 throw Integer::DivideByZero();
03794
03795 assert(divisor);
03796
03797 if ((divisor & (divisor-1)) == 0)
03798 {
03799 quotient = dividend >> (BitPrecision(divisor)-1);
03800 remainder = dividend.reg[0] & (divisor-1);
03801 return;
03802 }
03803
03804 unsigned int i = dividend.WordCount();
03805 quotient.reg.CleanNew(RoundupSize(i));
03806 remainder = 0;
03807 while (i--)
03808 {
03809 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03810 remainder = DWord(dividend.reg[i], remainder) % divisor;
03811 }
03812
03813 if (dividend.NotNegative())
03814 quotient.sign = POSITIVE;
03815 else
03816 {
03817 quotient.sign = NEGATIVE;
03818 if (remainder)
03819 {
03820 --quotient;
03821 remainder = divisor - remainder;
03822 }
03823 }
03824 }
03825
03826 Integer Integer::DividedBy(word b) const
03827 {
03828 word remainder;
03829 Integer quotient;
03830 Integer::Divide(remainder, quotient, *this, b);
03831 return quotient;
03832 }
03833
03834 word Integer::Modulo(word divisor) const
03835 {
03836 if (!divisor)
03837 throw Integer::DivideByZero();
03838
03839 assert(divisor);
03840
03841 word remainder;
03842
03843 if ((divisor & (divisor-1)) == 0)
03844 remainder = reg[0] & (divisor-1);
03845 else
03846 {
03847 unsigned int i = WordCount();
03848
03849 if (divisor <= 5)
03850 {
03851 DWord sum(0, 0);
03852 while (i--)
03853 sum += reg[i];
03854 remainder = sum % divisor;
03855 }
03856 else
03857 {
03858 remainder = 0;
03859 while (i--)
03860 remainder = DWord(reg[i], remainder) % divisor;
03861 }
03862 }
03863
03864 if (IsNegative() && remainder)
03865 remainder = divisor - remainder;
03866
03867 return remainder;
03868 }
03869
03870 void Integer::Negate()
03871 {
03872 if (!!(*this))
03873 sign = Sign(1-sign);
03874 }
03875
03876 int Integer::PositiveCompare(const Integer& t) const
03877 {
03878 unsigned size = WordCount(), tSize = t.WordCount();
03879
03880 if (size == tSize)
03881 return CryptoPP::Compare(reg, t.reg, size);
03882 else
03883 return size > tSize ? 1 : -1;
03884 }
03885
03886 int Integer::Compare(const Integer& t) const
03887 {
03888 if (NotNegative())
03889 {
03890 if (t.NotNegative())
03891 return PositiveCompare(t);
03892 else
03893 return 1;
03894 }
03895 else
03896 {
03897 if (t.NotNegative())
03898 return -1;
03899 else
03900 return -PositiveCompare(t);
03901 }
03902 }
03903
03904 Integer Integer::SquareRoot() const
03905 {
03906 if (!IsPositive())
03907 return Zero();
03908
03909
03910 Integer x, y = Power2((BitCount()+1)/2);
03911 assert(y*y >= *this);
03912
03913 do
03914 {
03915 x = y;
03916 y = (x + *this/x) >> 1;
03917 } while (y<x);
03918
03919 return x;
03920 }
03921
03922 bool Integer::IsSquare() const
03923 {
03924 Integer r = SquareRoot();
03925 return *this == r.Squared();
03926 }
03927
03928 bool Integer::IsUnit() const
03929 {
03930 return (WordCount() == 1) && (reg[0] == 1);
03931 }
03932
03933 Integer Integer::MultiplicativeInverse() const
03934 {
03935 return IsUnit() ? *this : Zero();
03936 }
03937
03938 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03939 {
03940 return x*y%m;
03941 }
03942
03943 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03944 {
03945 ModularArithmetic mr(m);
03946 return mr.Exponentiate(x, e);
03947 }
03948
03949 Integer Integer::Gcd(const Integer &a, const Integer &b)
03950 {
03951 return EuclideanDomainOf<Integer>().Gcd(a, b);
03952 }
03953
03954 Integer Integer::InverseMod(const Integer &m) const
03955 {
03956 assert(m.NotNegative());
03957
03958 if (IsNegative() || *this>=m)
03959 return (*this%m).InverseMod(m);
03960
03961 if (m.IsEven())
03962 {
03963 if (!m || IsEven())
03964 return Zero();
03965 if (*this == One())
03966 return One();
03967
03968 Integer u = m.InverseMod(*this);
03969 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03970 }
03971
03972 SecBlock<word> T(m.reg.size() * 4);
03973 Integer r((word)0, m.reg.size());
03974 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03975 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03976 return r;
03977 }
03978
03979 word Integer::InverseMod(word mod) const
03980 {
03981 word g0 = mod, g1 = *this % mod;
03982 word v0 = 0, v1 = 1;
03983 word y;
03984
03985 while (g1)
03986 {
03987 if (g1 == 1)
03988 return v1;
03989 y = g0 / g1;
03990 g0 = g0 % g1;
03991 v0 += y * v1;
03992
03993 if (!g0)
03994 break;
03995 if (g0 == 1)
03996 return mod-v0;
03997 y = g1 / g0;
03998 g1 = g1 % g0;
03999 v1 += y * v0;
04000 }
04001 return 0;
04002 }
04003
04004
04005
04006 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
04007 {
04008 BERSequenceDecoder seq(bt);
04009 OID oid(seq);
04010 if (oid != ASN1::prime_field())
04011 BERDecodeError();
04012 m_modulus.BERDecode(seq);
04013 seq.MessageEnd();
04014 m_result.reg.resize(m_modulus.reg.size());
04015 }
04016
04017 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04018 {
04019 DERSequenceEncoder seq(bt);
04020 ASN1::prime_field().DEREncode(seq);
04021 m_modulus.DEREncode(seq);
04022 seq.MessageEnd();
04023 }
04024
04025 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04026 {
04027 a.DEREncodeAsOctetString(out, MaxElementByteLength());
04028 }
04029
04030 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04031 {
04032 a.BERDecodeAsOctetString(in, MaxElementByteLength());
04033 }
04034
04035 const Integer& ModularArithmetic::Half(const Integer &a) const
04036 {
04037 if (a.reg.size()==m_modulus.reg.size())
04038 {
04039 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04040 return m_result;
04041 }
04042 else
04043 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04044 }
04045
04046 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04047 {
04048 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04049 {
04050 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04051 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04052 {
04053 CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04054 }
04055 return m_result;
04056 }
04057 else
04058 {
04059 m_result1 = a+b;
04060 if (m_result1 >= m_modulus)
04061 m_result1 -= m_modulus;
04062 return m_result1;
04063 }
04064 }
04065
04066 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04067 {
04068 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04069 {
04070 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04071 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04072 {
04073 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04074 }
04075 }
04076 else
04077 {
04078 a+=b;
04079 if (a>=m_modulus)
04080 a-=m_modulus;
04081 }
04082
04083 return a;
04084 }
04085
04086 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04087 {
04088 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04089 {
04090 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04091 CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04092 return m_result;
04093 }
04094 else
04095 {
04096 m_result1 = a-b;
04097 if (m_result1.IsNegative())
04098 m_result1 += m_modulus;
04099 return m_result1;
04100 }
04101 }
04102
04103 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04104 {
04105 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04106 {
04107 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04108 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04109 }
04110 else
04111 {
04112 a-=b;
04113 if (a.IsNegative())
04114 a+=m_modulus;
04115 }
04116
04117 return a;
04118 }
04119
04120 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04121 {
04122 if (!a)
04123 return a;
04124
04125 CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04126 if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04127 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04128
04129 return m_result;
04130 }
04131
04132 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04133 {
04134 if (m_modulus.IsOdd())
04135 {
04136 MontgomeryRepresentation dr(m_modulus);
04137 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04138 }
04139 else
04140 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04141 }
04142
04143 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04144 {
04145 if (m_modulus.IsOdd())
04146 {
04147 MontgomeryRepresentation dr(m_modulus);
04148 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04149 for (unsigned int i=0; i<exponentsCount; i++)
04150 results[i] = dr.ConvertOut(results[i]);
04151 }
04152 else
04153 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04154 }
04155
04156 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
04157 : ModularArithmetic(m),
04158 m_u((word)0, m_modulus.reg.size()),
04159 m_workspace(5*m_modulus.reg.size())
04160 {
04161 if (!m_modulus.IsOdd())
04162 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04163
04164 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04165 }
04166
04167 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04168 {
04169 word *const T = m_workspace.begin();
04170 word *const R = m_result.reg.begin();
04171 const size_t N = m_modulus.reg.size();
04172 assert(a.reg.size()<=N && b.reg.size()<=N);
04173
04174 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04175 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04176 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04177 return m_result;
04178 }
04179
04180 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04181 {
04182 word *const T = m_workspace.begin();
04183 word *const R = m_result.reg.begin();
04184 const size_t N = m_modulus.reg.size();
04185 assert(a.reg.size()<=N);
04186
04187 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04188 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04189 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04190 return m_result;
04191 }
04192
04193 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04194 {
04195 word *const T = m_workspace.begin();
04196 word *const R = m_result.reg.begin();
04197 const size_t N = m_modulus.reg.size();
04198 assert(a.reg.size()<=N);
04199
04200 CopyWords(T, a.reg, a.reg.size());
04201 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04202 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04203 return m_result;
04204 }
04205
04206 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04207 {
04208
04209 word *const T = m_workspace.begin();
04210 word *const R = m_result.reg.begin();
04211 const size_t N = m_modulus.reg.size();
04212 assert(a.reg.size()<=N);
04213
04214 CopyWords(T, a.reg, a.reg.size());
04215 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04216 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04217 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04218
04219
04220
04221 if (k>N*WORD_BITS)
04222 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04223 else
04224 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04225
04226 return m_result;
04227 }
04228
04229 NAMESPACE_END
04230
04231 #endif