00001
00002
00003
00004 #include "pch.h"
00005
00006 #ifndef CRYPTOPP_IMPORTS
00007
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"
00016 #include "sha.h"
00017 #include "cpu.h"
00018
00019 #include <iostream>
00020
00021 #if _MSC_VER >= 1400
00022 #include <intrin.h>
00023 #endif
00024
00025 #ifdef __DECCXX
00026 #include <c_asm.h>
00027 #endif
00028
00029 #ifdef CRYPTOPP_MSVC6_NO_PP
00030 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
00031 #endif
00032
00033 #define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
00034
00035 NAMESPACE_BEGIN(CryptoPP)
00036
00037 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00038 {
00039 if (valueType != typeid(Integer))
00040 return false;
00041 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00042 return true;
00043 }
00044
00045 inline static int Compare(const word *A, const word *B, size_t N)
00046 {
00047 while (N--)
00048 if (A[N] > B[N])
00049 return 1;
00050 else if (A[N] < B[N])
00051 return -1;
00052
00053 return 0;
00054 }
00055
00056 inline static int Increment(word *A, size_t N, word B=1)
00057 {
00058 assert(N);
00059 word t = A[0];
00060 A[0] = t+B;
00061 if (A[0] >= t)
00062 return 0;
00063 for (unsigned i=1; i<N; i++)
00064 if (++A[i])
00065 return 0;
00066 return 1;
00067 }
00068
00069 inline static int Decrement(word *A, size_t N, word B=1)
00070 {
00071 assert(N);
00072 word t = A[0];
00073 A[0] = t-B;
00074 if (A[0] <= t)
00075 return 0;
00076 for (unsigned i=1; i<N; i++)
00077 if (A[i]--)
00078 return 0;
00079 return 1;
00080 }
00081
00082 static void TwosComplement(word *A, size_t N)
00083 {
00084 Decrement(A, N);
00085 for (unsigned i=0; i<N; i++)
00086 A[i] = ~A[i];
00087 }
00088
00089 static word AtomicInverseModPower2(word A)
00090 {
00091 assert(A%2==1);
00092
00093 word R=A%8;
00094
00095 for (unsigned i=3; i<WORD_BITS; i*=2)
00096 R = R*(2-R*A);
00097
00098 assert(R*A==1);
00099 return R;
00100 }
00101
00102
00103
00104 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || CRYPTOPP_BOOL_X64
00105 #define Declare2Words(x) word x##0, x##1;
00106 #define AssignWord(a, b) a##0 = b; a##1 = 0;
00107 #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
00108 #define LowWord(a) a##0
00109 #define HighWord(a) a##1
00110 #ifdef _MSC_VER
00111 #define MultiplyWords(p, a, b) p##0 = _umul128(a, b, &p##1);
00112 #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
00113 #elif defined(__DECCXX)
00114 #define MultiplyWords(p, a, b) p##0 = a*b; p##1 = asm("umulh %a0, %a1, %v0", a, b);
00115 #elif CRYPTOPP_BOOL_X64
00116 #define MultiplyWords(p, a, b) asm ("mulq %3" : "=a"(p##0), "=d"(p##1) : "a"(a), "g"(b) : "cc");
00117 #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00118 #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
00119 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
00120 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
00121 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
00122 #endif
00123 #ifndef Double3Words
00124 #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
00125 #endif
00126 #ifndef Acc2WordsBy2
00127 #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
00128 #endif
00129 #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
00130 #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
00131 #define GetCarry(u) u##1
00132 #define GetBorrow(u) u##1
00133 #else
00134 #define Declare2Words(x) dword x;
00135 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00136 #define MultiplyWords(p, a, b) p = __emulu(a, b);
00137 #else
00138 #define MultiplyWords(p, a, b) p = (dword)a*b;
00139 #endif
00140 #define AssignWord(a, b) a = b;
00141 #define Add2WordsBy1(a, b, c) a = b + c;
00142 #define Acc2WordsBy2(a, b) a += b;
00143 #define LowWord(a) word(a)
00144 #define HighWord(a) word(a>>WORD_BITS)
00145 #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
00146 #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
00147 #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
00148 #define GetCarry(u) HighWord(u)
00149 #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
00150 #endif
00151 #ifndef MulAcc
00152 #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
00153 #endif
00154 #ifndef Acc2WordsBy1
00155 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
00156 #endif
00157 #ifndef Acc3WordsBy2
00158 #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
00159 #endif
00160
00161 class DWord
00162 {
00163 public:
00164 DWord() {}
00165
00166 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00167 explicit DWord(word low)
00168 {
00169 m_whole = low;
00170 }
00171 #else
00172 explicit DWord(word low)
00173 {
00174 m_halfs.low = low;
00175 m_halfs.high = 0;
00176 }
00177 #endif
00178
00179 DWord(word low, word high)
00180 {
00181 m_halfs.low = low;
00182 m_halfs.high = high;
00183 }
00184
00185 static DWord Multiply(word a, word b)
00186 {
00187 DWord r;
00188 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00189 r.m_whole = (dword)a * b;
00190 #else
00191 r.m_halfs.low = _umul128(a, b, &r.m_halfs.high);
00192 #endif
00193 return r;
00194 }
00195
00196 static DWord MultiplyAndAdd(word a, word b, word c)
00197 {
00198 DWord r = Multiply(a, b);
00199 return r += c;
00200 }
00201
00202 DWord & operator+=(word a)
00203 {
00204 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00205 m_whole = m_whole + a;
00206 #else
00207 m_halfs.low += a;
00208 m_halfs.high += (m_halfs.low < a);
00209 #endif
00210 return *this;
00211 }
00212
00213 DWord operator+(word a)
00214 {
00215 DWord r;
00216 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00217 r.m_whole = m_whole + a;
00218 #else
00219 r.m_halfs.low = m_halfs.low + a;
00220 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00221 #endif
00222 return r;
00223 }
00224
00225 DWord operator-(DWord a)
00226 {
00227 DWord r;
00228 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00229 r.m_whole = m_whole - a.m_whole;
00230 #else
00231 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00232 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00233 #endif
00234 return r;
00235 }
00236
00237 DWord operator-(word a)
00238 {
00239 DWord r;
00240 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00241 r.m_whole = m_whole - a;
00242 #else
00243 r.m_halfs.low = m_halfs.low - a;
00244 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00245 #endif
00246 return r;
00247 }
00248
00249
00250 word operator/(word divisor);
00251
00252 word operator%(word a);
00253
00254 bool operator!() const
00255 {
00256 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00257 return !m_whole;
00258 #else
00259 return !m_halfs.high && !m_halfs.low;
00260 #endif
00261 }
00262
00263 word GetLowHalf() const {return m_halfs.low;}
00264 word GetHighHalf() const {return m_halfs.high;}
00265 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00266
00267 private:
00268 union
00269 {
00270 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00271 dword m_whole;
00272 #endif
00273 struct
00274 {
00275 #ifdef IS_LITTLE_ENDIAN
00276 word low;
00277 word high;
00278 #else
00279 word high;
00280 word low;
00281 #endif
00282 } m_halfs;
00283 };
00284 };
00285
00286 class Word
00287 {
00288 public:
00289 Word() {}
00290
00291 Word(word value)
00292 {
00293 m_whole = value;
00294 }
00295
00296 Word(hword low, hword high)
00297 {
00298 m_whole = low | (word(high) << (WORD_BITS/2));
00299 }
00300
00301 static Word Multiply(hword a, hword b)
00302 {
00303 Word r;
00304 r.m_whole = (word)a * b;
00305 return r;
00306 }
00307
00308 Word operator-(Word a)
00309 {
00310 Word r;
00311 r.m_whole = m_whole - a.m_whole;
00312 return r;
00313 }
00314
00315 Word operator-(hword a)
00316 {
00317 Word r;
00318 r.m_whole = m_whole - a;
00319 return r;
00320 }
00321
00322
00323 hword operator/(hword divisor)
00324 {
00325 return hword(m_whole / divisor);
00326 }
00327
00328 bool operator!() const
00329 {
00330 return !m_whole;
00331 }
00332
00333 word GetWhole() const {return m_whole;}
00334 hword GetLowHalf() const {return hword(m_whole);}
00335 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00336 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00337
00338 private:
00339 word m_whole;
00340 };
00341
00342
00343 template <class S, class D>
00344 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00345 {
00346
00347 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00348
00349
00350 S Q;
00351 if (S(B1+1) == 0)
00352 Q = A[2];
00353 else
00354 Q = D(A[1], A[2]) / S(B1+1);
00355
00356
00357 D p = D::Multiply(B0, Q);
00358 D u = (D) A[0] - p.GetLowHalf();
00359 A[0] = u.GetLowHalf();
00360 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00361 A[1] = u.GetLowHalf();
00362 A[2] += u.GetHighHalf();
00363
00364
00365 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00366 {
00367 u = (D) A[0] - B0;
00368 A[0] = u.GetLowHalf();
00369 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00370 A[1] = u.GetLowHalf();
00371 A[2] += u.GetHighHalf();
00372 Q++;
00373 assert(Q);
00374 }
00375
00376 return Q;
00377 }
00378
00379
00380 template <class S, class D>
00381 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00382 {
00383 if (!B)
00384 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00385 else
00386 {
00387 S Q[2];
00388 T[0] = Al.GetLowHalf();
00389 T[1] = Al.GetHighHalf();
00390 T[2] = Ah.GetLowHalf();
00391 T[3] = Ah.GetHighHalf();
00392 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00393 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00394 return D(Q[0], Q[1]);
00395 }
00396 }
00397
00398
00399 inline word DWord::operator/(word a)
00400 {
00401 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00402 return word(m_whole / a);
00403 #else
00404 hword r[4];
00405 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00406 #endif
00407 }
00408
00409 inline word DWord::operator%(word a)
00410 {
00411 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00412 return word(m_whole % a);
00413 #else
00414 if (a < (word(1) << (WORD_BITS/2)))
00415 {
00416 hword h = hword(a);
00417 word r = m_halfs.high % h;
00418 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00419 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00420 }
00421 else
00422 {
00423 hword r[4];
00424 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00425 return Word(r[0], r[1]).GetWhole();
00426 }
00427 #endif
00428 }
00429
00430
00431
00432
00433 #if defined(__GNUC__)
00434 #define AddPrologue \
00435 int result; \
00436 __asm__ __volatile__ \
00437 ( \
00438 ".intel_syntax noprefix;"
00439 #define AddEpilogue \
00440 ".att_syntax prefix;" \
00441 : "=a" (result)\
00442 : "d" (C), "a" (A), "D" (B), "c" (N) \
00443 : "%esi", "memory", "cc" \
00444 );\
00445 return result;
00446 #define MulPrologue \
00447 __asm__ __volatile__ \
00448 ( \
00449 ".intel_syntax noprefix;" \
00450 AS1( push ebx) \
00451 AS2( mov ebx, edx)
00452 #define MulEpilogue \
00453 AS1( pop ebx) \
00454 ".att_syntax prefix;" \
00455 : \
00456 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
00457 : "%esi", "memory", "cc" \
00458 );
00459 #define SquPrologue MulPrologue
00460 #define SquEpilogue \
00461 AS1( pop ebx) \
00462 ".att_syntax prefix;" \
00463 : \
00464 : "d" (s_maskLow16), "c" (C), "a" (A) \
00465 : "%esi", "%edi", "memory", "cc" \
00466 );
00467 #define TopPrologue MulPrologue
00468 #define TopEpilogue \
00469 AS1( pop ebx) \
00470 ".att_syntax prefix;" \
00471 : \
00472 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
00473 : "memory", "cc" \
00474 );
00475 #else
00476 #define AddPrologue \
00477 __asm push edi \
00478 __asm push esi \
00479 __asm mov eax, [esp+12] \
00480 __asm mov edi, [esp+16]
00481 #define AddEpilogue \
00482 __asm pop esi \
00483 __asm pop edi \
00484 __asm ret 8
00485 #if _MSC_VER < 1300
00486 #define SaveEBX __asm push ebx
00487 #define RestoreEBX __asm pop ebx
00488 #else
00489 #define SaveEBX
00490 #define RestoreEBX
00491 #endif
00492 #define SquPrologue \
00493 AS2( mov eax, A) \
00494 AS2( mov ecx, C) \
00495 SaveEBX \
00496 AS2( lea ebx, s_maskLow16)
00497 #define MulPrologue \
00498 AS2( mov eax, A) \
00499 AS2( mov edi, B) \
00500 AS2( mov ecx, C) \
00501 SaveEBX \
00502 AS2( lea ebx, s_maskLow16)
00503 #define TopPrologue \
00504 AS2( mov eax, A) \
00505 AS2( mov edi, B) \
00506 AS2( mov ecx, C) \
00507 AS2( mov esi, L) \
00508 SaveEBX \
00509 AS2( lea ebx, s_maskLow16)
00510 #define SquEpilogue RestoreEBX
00511 #define MulEpilogue RestoreEBX
00512 #define TopEpilogue RestoreEBX
00513 #endif
00514
00515 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00516 extern "C" {
00517 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
00518 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
00519 }
00520 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__)
00521 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
00522 {
00523 word result;
00524 __asm__ __volatile__
00525 (
00526 ".intel_syntax;"
00527 AS1( neg %1)
00528 ASJ( jz, 1, f)
00529 AS2( mov %0,[%3+8*%1])
00530 AS2( add %0,[%4+8*%1])
00531 AS2( mov [%2+8*%1],%0)
00532 ASL(0)
00533 AS2( mov %0,[%3+8*%1+8])
00534 AS2( adc %0,[%4+8*%1+8])
00535 AS2( mov [%2+8*%1+8],%0)
00536 AS2( lea %1,[%1+2])
00537 ASJ( jrcxz, 1, f)
00538 AS2( mov %0,[%3+8*%1])
00539 AS2( adc %0,[%4+8*%1])
00540 AS2( mov [%2+8*%1],%0)
00541 ASJ( jmp, 0, b)
00542 ASL(1)
00543 AS2( mov %0, 0)
00544 AS2( adc %0, %0)
00545 ".att_syntax;"
00546 : "=&r" (result)
00547 : "c" (N), "r" (C+N), "r" (A+N), "r" (B+N)
00548 : "memory", "cc"
00549 );
00550 return (int)result;
00551 }
00552
00553 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00554 {
00555 word result;
00556 __asm__ __volatile__
00557 (
00558 ".intel_syntax;"
00559 AS1( neg %1)
00560 ASJ( jz, 1, f)
00561 AS2( mov %0,[%3+8*%1])
00562 AS2( sub %0,[%4+8*%1])
00563 AS2( mov [%2+8*%1],%0)
00564 ASL(0)
00565 AS2( mov %0,[%3+8*%1+8])
00566 AS2( sbb %0,[%4+8*%1+8])
00567 AS2( mov [%2+8*%1+8],%0)
00568 AS2( lea %1,[%1+2])
00569 ASJ( jrcxz, 1, f)
00570 AS2( mov %0,[%3+8*%1])
00571 AS2( sbb %0,[%4+8*%1])
00572 AS2( mov [%2+8*%1],%0)
00573 ASJ( jmp, 0, b)
00574 ASL(1)
00575 AS2( mov %0, 0)
00576 AS2( adc %0, %0)
00577 ".att_syntax;"
00578 : "=&r" (result)
00579 : "c" (N), "r" (C+N), "r" (A+N), "r" (B+N)
00580 : "memory", "cc"
00581 );
00582 return (int)result;
00583 }
00584 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
00585 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00586 {
00587 AddPrologue
00588
00589
00590 AS2( lea eax, [eax+4*ecx])
00591 AS2( lea edi, [edi+4*ecx])
00592 AS2( lea edx, [edx+4*ecx])
00593
00594 AS1( neg ecx)
00595 AS2( test ecx, 2)
00596 ASJ( jz, 0, f)
00597 AS2( sub ecx, 2)
00598 ASJ( jmp, 1, f)
00599
00600 ASL(0)
00601 ASJ( jecxz, 2, f)
00602 AS2( mov esi,[eax+4*ecx])
00603 AS2( adc esi,[edi+4*ecx])
00604 AS2( mov [edx+4*ecx],esi)
00605 AS2( mov esi,[eax+4*ecx+4])
00606 AS2( adc esi,[edi+4*ecx+4])
00607 AS2( mov [edx+4*ecx+4],esi)
00608 ASL(1)
00609 AS2( mov esi,[eax+4*ecx+8])
00610 AS2( adc esi,[edi+4*ecx+8])
00611 AS2( mov [edx+4*ecx+8],esi)
00612 AS2( mov esi,[eax+4*ecx+12])
00613 AS2( adc esi,[edi+4*ecx+12])
00614 AS2( mov [edx+4*ecx+12],esi)
00615
00616 AS2( lea ecx,[ecx+4])
00617 ASJ( jmp, 0, b)
00618
00619 ASL(2)
00620 AS2( mov eax, 0)
00621 AS1( setc al)
00622
00623 AddEpilogue
00624 }
00625
00626 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00627 {
00628 AddPrologue
00629
00630
00631 AS2( lea eax, [eax+4*ecx])
00632 AS2( lea edi, [edi+4*ecx])
00633 AS2( lea edx, [edx+4*ecx])
00634
00635 AS1( neg ecx)
00636 AS2( test ecx, 2)
00637 ASJ( jz, 0, f)
00638 AS2( sub ecx, 2)
00639 ASJ( jmp, 1, f)
00640
00641 ASL(0)
00642 ASJ( jecxz, 2, f)
00643 AS2( mov esi,[eax+4*ecx])
00644 AS2( sbb esi,[edi+4*ecx])
00645 AS2( mov [edx+4*ecx],esi)
00646 AS2( mov esi,[eax+4*ecx+4])
00647 AS2( sbb esi,[edi+4*ecx+4])
00648 AS2( mov [edx+4*ecx+4],esi)
00649 ASL(1)
00650 AS2( mov esi,[eax+4*ecx+8])
00651 AS2( sbb esi,[edi+4*ecx+8])
00652 AS2( mov [edx+4*ecx+8],esi)
00653 AS2( mov esi,[eax+4*ecx+12])
00654 AS2( sbb esi,[edi+4*ecx+12])
00655 AS2( mov [edx+4*ecx+12],esi)
00656
00657 AS2( lea ecx,[ecx+4])
00658 ASJ( jmp, 0, b)
00659
00660 ASL(2)
00661 AS2( mov eax, 0)
00662 AS1( setc al)
00663
00664 AddEpilogue
00665 }
00666
00667 #if CRYPTOPP_INTEGER_SSE2
00668 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
00669 {
00670 AddPrologue
00671
00672
00673 AS2( lea eax, [eax+4*ecx])
00674 AS2( lea edi, [edi+4*ecx])
00675 AS2( lea edx, [edx+4*ecx])
00676
00677 AS1( neg ecx)
00678 AS2( pxor mm2, mm2)
00679 ASJ( jz, 2, f)
00680 AS2( test ecx, 2)
00681 ASJ( jz, 0, f)
00682 AS2( sub ecx, 2)
00683 ASJ( jmp, 1, f)
00684
00685 ASL(0)
00686 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00687 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00688 AS2( paddq mm0, mm1)
00689 AS2( paddq mm2, mm0)
00690 AS2( movd DWORD PTR [edx+4*ecx], mm2)
00691 AS2( psrlq mm2, 32)
00692
00693 AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
00694 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00695 AS2( paddq mm0, mm1)
00696 AS2( paddq mm2, mm0)
00697 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00698 AS2( psrlq mm2, 32)
00699
00700 ASL(1)
00701 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00702 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00703 AS2( paddq mm0, mm1)
00704 AS2( paddq mm2, mm0)
00705 AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
00706 AS2( psrlq mm2, 32)
00707
00708 AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
00709 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00710 AS2( paddq mm0, mm1)
00711 AS2( paddq mm2, mm0)
00712 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00713 AS2( psrlq mm2, 32)
00714
00715 AS2( add ecx, 4)
00716 ASJ( jnz, 0, b)
00717
00718 ASL(2)
00719 AS2( movd eax, mm2)
00720 AS1( emms)
00721
00722 AddEpilogue
00723 }
00724 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
00725 {
00726 AddPrologue
00727
00728
00729 AS2( lea eax, [eax+4*ecx])
00730 AS2( lea edi, [edi+4*ecx])
00731 AS2( lea edx, [edx+4*ecx])
00732
00733 AS1( neg ecx)
00734 AS2( pxor mm2, mm2)
00735 ASJ( jz, 2, f)
00736 AS2( test ecx, 2)
00737 ASJ( jz, 0, f)
00738 AS2( sub ecx, 2)
00739 ASJ( jmp, 1, f)
00740
00741 ASL(0)
00742 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00743 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00744 AS2( psubq mm0, mm1)
00745 AS2( psubq mm0, mm2)
00746 AS2( movd DWORD PTR [edx+4*ecx], mm0)
00747 AS2( psrlq mm0, 63)
00748
00749 AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
00750 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00751 AS2( psubq mm2, mm1)
00752 AS2( psubq mm2, mm0)
00753 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00754 AS2( psrlq mm2, 63)
00755
00756 ASL(1)
00757 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00758 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00759 AS2( psubq mm0, mm1)
00760 AS2( psubq mm0, mm2)
00761 AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
00762 AS2( psrlq mm0, 63)
00763
00764 AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
00765 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00766 AS2( psubq mm2, mm1)
00767 AS2( psubq mm2, mm0)
00768 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00769 AS2( psrlq mm2, 63)
00770
00771 AS2( add ecx, 4)
00772 ASJ( jnz, 0, b)
00773
00774 ASL(2)
00775 AS2( movd eax, mm2)
00776 AS1( emms)
00777
00778 AddEpilogue
00779 }
00780 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00781 #else
00782 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00783 {
00784 assert (N%2 == 0);
00785
00786 Declare2Words(u);
00787 AssignWord(u, 0);
00788 for (size_t i=0; i<N; i+=2)
00789 {
00790 AddWithCarry(u, A[i], B[i]);
00791 C[i] = LowWord(u);
00792 AddWithCarry(u, A[i+1], B[i+1]);
00793 C[i+1] = LowWord(u);
00794 }
00795 return int(GetCarry(u));
00796 }
00797
00798 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00799 {
00800 assert (N%2 == 0);
00801
00802 Declare2Words(u);
00803 AssignWord(u, 0);
00804 for (size_t i=0; i<N; i+=2)
00805 {
00806 SubtractWithBorrow(u, A[i], B[i]);
00807 C[i] = LowWord(u);
00808 SubtractWithBorrow(u, A[i+1], B[i+1]);
00809 C[i+1] = LowWord(u);
00810 }
00811 return int(GetBorrow(u));
00812 }
00813 #endif
00814
00815 static word LinearMultiply(word *C, const word *A, word B, size_t N)
00816 {
00817 word carry=0;
00818 for(unsigned i=0; i<N; i++)
00819 {
00820 Declare2Words(p);
00821 MultiplyWords(p, A[i], B);
00822 Acc2WordsBy1(p, carry);
00823 C[i] = LowWord(p);
00824 carry = HighWord(p);
00825 }
00826 return carry;
00827 }
00828
00829 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
00830
00831 #define Mul_2 \
00832 Mul_Begin(2) \
00833 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00834 Mul_End(1, 1)
00835
00836 #define Mul_4 \
00837 Mul_Begin(4) \
00838 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00839 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00840 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00841 Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
00842 Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
00843 Mul_End(5, 3)
00844
00845 #define Mul_8 \
00846 Mul_Begin(8) \
00847 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00848 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00849 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00850 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00851 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00852 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00853 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00854 Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
00855 Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
00856 Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
00857 Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
00858 Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
00859 Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
00860 Mul_End(13, 7)
00861
00862 #define Mul_16 \
00863 Mul_Begin(16) \
00864 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00865 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00866 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00867 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00868 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00869 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00870 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00871 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00872 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00873 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00874 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00875 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00876 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00877 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00878 Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
00879 Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
00880 Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
00881 Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
00882 Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
00883 Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
00884 Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
00885 Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
00886 Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
00887 Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
00888 Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
00889 Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
00890 Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
00891 Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
00892 Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
00893 Mul_End(29, 15)
00894
00895 #define Squ_2 \
00896 Squ_Begin(2) \
00897 Squ_End(2)
00898
00899 #define Squ_4 \
00900 Squ_Begin(4) \
00901 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00902 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00903 Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
00904 Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
00905 Squ_End(4)
00906
00907 #define Squ_8 \
00908 Squ_Begin(8) \
00909 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00910 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00911 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00912 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00913 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00914 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00915 Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00916 Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00917 Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00918 Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00919 Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
00920 Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
00921 Squ_End(8)
00922
00923 #define Squ_16 \
00924 Squ_Begin(16) \
00925 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00926 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00927 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00928 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00929 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00930 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00931 Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00932 Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00933 Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00934 Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00935 Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
00936 Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
00937 Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
00938 Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
00939 Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
00940 Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
00941 Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
00942 Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
00943 Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
00944 Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
00945 Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
00946 Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
00947 Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
00948 Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
00949 Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
00950 Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
00951 Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
00952 Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
00953 Squ_End(16)
00954
00955 #define Bot_2 \
00956 Mul_Begin(2) \
00957 Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
00958 Bot_End(2)
00959
00960 #define Bot_4 \
00961 Mul_Begin(4) \
00962 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00963 Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
00964 Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
00965 Bot_End(4)
00966
00967 #define Bot_8 \
00968 Mul_Begin(8) \
00969 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00970 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00971 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00972 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00973 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00974 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00975 Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
00976 Bot_End(8)
00977
00978 #define Bot_16 \
00979 Mul_Begin(16) \
00980 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00981 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00982 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00983 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00984 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00985 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00986 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00987 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00988 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00989 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00990 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00991 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00992 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00993 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00994 Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
00995 Bot_End(16)
00996
00997 #endif
00998
00999 #if 0
01000 #define Mul_Begin(n) \
01001 Declare2Words(p) \
01002 Declare2Words(c) \
01003 Declare2Words(d) \
01004 MultiplyWords(p, A[0], B[0]) \
01005 AssignWord(c, LowWord(p)) \
01006 AssignWord(d, HighWord(p))
01007
01008 #define Mul_Acc(i, j) \
01009 MultiplyWords(p, A[i], B[j]) \
01010 Acc2WordsBy1(c, LowWord(p)) \
01011 Acc2WordsBy1(d, HighWord(p))
01012
01013 #define Mul_SaveAcc(k, i, j) \
01014 R[k] = LowWord(c); \
01015 Add2WordsBy1(c, d, HighWord(c)) \
01016 MultiplyWords(p, A[i], B[j]) \
01017 AssignWord(d, HighWord(p)) \
01018 Acc2WordsBy1(c, LowWord(p))
01019
01020 #define Mul_End(n) \
01021 R[2*n-3] = LowWord(c); \
01022 Acc2WordsBy1(d, HighWord(c)) \
01023 MultiplyWords(p, A[n-1], B[n-1])\
01024 Acc2WordsBy2(d, p) \
01025 R[2*n-2] = LowWord(d); \
01026 R[2*n-1] = HighWord(d);
01027
01028 #define Bot_SaveAcc(k, i, j) \
01029 R[k] = LowWord(c); \
01030 word e = LowWord(d) + HighWord(c); \
01031 e += A[i] * B[j];
01032
01033 #define Bot_Acc(i, j) \
01034 e += A[i] * B[j];
01035
01036 #define Bot_End(n) \
01037 R[n-1] = e;
01038 #else
01039 #define Mul_Begin(n) \
01040 Declare2Words(p) \
01041 word c; \
01042 Declare2Words(d) \
01043 MultiplyWords(p, A[0], B[0]) \
01044 c = LowWord(p); \
01045 AssignWord(d, HighWord(p))
01046
01047 #define Mul_Acc(i, j) \
01048 MulAcc(c, d, A[i], B[j])
01049
01050 #define Mul_SaveAcc(k, i, j) \
01051 R[k] = c; \
01052 c = LowWord(d); \
01053 AssignWord(d, HighWord(d)) \
01054 MulAcc(c, d, A[i], B[j])
01055
01056 #define Mul_End(k, i) \
01057 R[k] = c; \
01058 MultiplyWords(p, A[i], B[i]) \
01059 Acc2WordsBy2(p, d) \
01060 R[k+1] = LowWord(p); \
01061 R[k+2] = HighWord(p);
01062
01063 #define Bot_SaveAcc(k, i, j) \
01064 R[k] = c; \
01065 c = LowWord(d); \
01066 c += A[i] * B[j];
01067
01068 #define Bot_Acc(i, j) \
01069 c += A[i] * B[j];
01070
01071 #define Bot_End(n) \
01072 R[n-1] = c;
01073 #endif
01074
01075 #define Squ_Begin(n) \
01076 Declare2Words(p) \
01077 word c; \
01078 Declare2Words(d) \
01079 Declare2Words(e) \
01080 MultiplyWords(p, A[0], A[0]) \
01081 R[0] = LowWord(p); \
01082 AssignWord(e, HighWord(p)) \
01083 MultiplyWords(p, A[0], A[1]) \
01084 c = LowWord(p); \
01085 AssignWord(d, HighWord(p)) \
01086 Squ_NonDiag \
01087
01088 #define Squ_NonDiag \
01089 Double3Words(c, d)
01090
01091 #define Squ_SaveAcc(k, i, j) \
01092 Acc3WordsBy2(c, d, e) \
01093 R[k] = c; \
01094 MultiplyWords(p, A[i], A[j]) \
01095 c = LowWord(p); \
01096 AssignWord(d, HighWord(p)) \
01097
01098 #define Squ_Acc(i, j) \
01099 MulAcc(c, d, A[i], A[j])
01100
01101 #define Squ_Diag(i) \
01102 Squ_NonDiag \
01103 MulAcc(c, d, A[i], A[i])
01104
01105 #define Squ_End(n) \
01106 Acc3WordsBy2(c, d, e) \
01107 R[2*n-3] = c; \
01108 MultiplyWords(p, A[n-1], A[n-1])\
01109 Acc2WordsBy2(p, e) \
01110 R[2*n-2] = LowWord(p); \
01111 R[2*n-1] = HighWord(p);
01112
01113 void Baseline_Multiply2(word *R, const word *A, const word *B)
01114 {
01115 Mul_2
01116 }
01117
01118 void Baseline_Multiply4(word *R, const word *A, const word *B)
01119 {
01120 Mul_4
01121 }
01122
01123 void Baseline_Multiply8(word *R, const word *A, const word *B)
01124 {
01125 Mul_8
01126 }
01127
01128 void Baseline_Square2(word *R, const word *A)
01129 {
01130 Squ_2
01131 }
01132
01133 void Baseline_Square4(word *R, const word *A)
01134 {
01135 Squ_4
01136 }
01137
01138 void Baseline_Square8(word *R, const word *A)
01139 {
01140 Squ_8
01141 }
01142
01143 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
01144 {
01145 Bot_2
01146 }
01147
01148 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
01149 {
01150 Bot_4
01151 }
01152
01153 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
01154 {
01155 Bot_8
01156 }
01157
01158 #define Top_Begin(n) \
01159 Declare2Words(p) \
01160 word c; \
01161 Declare2Words(d) \
01162 MultiplyWords(p, A[0], B[n-2]);\
01163 AssignWord(d, HighWord(p));
01164
01165 #define Top_Acc(i, j) \
01166 MultiplyWords(p, A[i], B[j]);\
01167 Acc2WordsBy1(d, HighWord(p));
01168
01169 #define Top_SaveAcc0(i, j) \
01170 c = LowWord(d); \
01171 AssignWord(d, HighWord(d)) \
01172 MulAcc(c, d, A[i], B[j])
01173
01174 #define Top_SaveAcc1(i, j) \
01175 c = L<c; \
01176 Acc2WordsBy1(d, c); \
01177 c = LowWord(d); \
01178 AssignWord(d, HighWord(d)) \
01179 MulAcc(c, d, A[i], B[j])
01180
01181 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
01182 {
01183 word T[4];
01184 Baseline_Multiply2(T, A, B);
01185 R[0] = T[2];
01186 R[1] = T[3];
01187 }
01188
01189 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
01190 {
01191 Top_Begin(4)
01192 Top_Acc(1, 1) Top_Acc(2, 0) \
01193 Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
01194 Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
01195 Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
01196 Mul_End(1, 3)
01197 }
01198
01199 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
01200 {
01201 Top_Begin(8)
01202 Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
01203 Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
01204 Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
01205 Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
01206 Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
01207 Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
01208 Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
01209 Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
01210 Mul_End(5, 7)
01211 }
01212
01213 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
01214 void Baseline_Multiply16(word *R, const word *A, const word *B)
01215 {
01216 Mul_16
01217 }
01218
01219 void Baseline_Square16(word *R, const word *A)
01220 {
01221 Squ_16
01222 }
01223
01224 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
01225 {
01226 Bot_16
01227 }
01228
01229 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
01230 {
01231 Top_Begin(16)
01232 Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
01233 Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
01234 Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
01235 Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
01236 Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
01237 Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
01238 Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
01239 Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
01240 Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
01241 Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
01242 Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
01243 Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
01244 Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
01245 Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
01246 Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
01247 Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
01248 Mul_End(13, 15)
01249 }
01250 #endif
01251
01252
01253
01254 #if CRYPTOPP_INTEGER_SSE2
01255
01256 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
01257
01258 #undef Mul_Begin
01259 #undef Mul_Acc
01260 #undef Top_Begin
01261 #undef Top_Acc
01262 #undef Squ_Acc
01263 #undef Squ_NonDiag
01264 #undef Squ_Diag
01265 #undef Squ_SaveAcc
01266 #undef Squ_Begin
01267 #undef Mul_SaveAcc
01268 #undef Bot_Acc
01269 #undef Bot_SaveAcc
01270 #undef Bot_End
01271 #undef Squ_End
01272 #undef Mul_End
01273
01274 #define SSE2_FinalSave(k) \
01275 AS2( psllq xmm5, 16) \
01276 AS2( paddq xmm4, xmm5) \
01277 AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
01278
01279 #define SSE2_SaveShift(k) \
01280 AS2( movq xmm0, xmm6) \
01281 AS2( punpckhqdq xmm6, xmm0) \
01282 AS2( movq xmm1, xmm7) \
01283 AS2( punpckhqdq xmm7, xmm1) \
01284 AS2( paddd xmm6, xmm0) \
01285 AS2( pslldq xmm6, 4) \
01286 AS2( paddd xmm7, xmm1) \
01287 AS2( paddd xmm4, xmm6) \
01288 AS2( pslldq xmm7, 4) \
01289 AS2( movq xmm6, xmm4) \
01290 AS2( paddd xmm5, xmm7) \
01291 AS2( movq xmm7, xmm5) \
01292 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01293 AS2( psrlq xmm6, 16) \
01294 AS2( paddq xmm6, xmm7) \
01295 AS2( punpckhqdq xmm4, xmm0) \
01296 AS2( punpckhqdq xmm5, xmm0) \
01297 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
01298 AS2( psrlq xmm6, 3*16) \
01299 AS2( paddd xmm4, xmm6) \
01300
01301 #define Squ_SSE2_SaveShift(k) \
01302 AS2( movq xmm0, xmm6) \
01303 AS2( punpckhqdq xmm6, xmm0) \
01304 AS2( movq xmm1, xmm7) \
01305 AS2( punpckhqdq xmm7, xmm1) \
01306 AS2( paddd xmm6, xmm0) \
01307 AS2( pslldq xmm6, 4) \
01308 AS2( paddd xmm7, xmm1) \
01309 AS2( paddd xmm4, xmm6) \
01310 AS2( pslldq xmm7, 4) \
01311 AS2( movhlps xmm6, xmm4) \
01312 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01313 AS2( paddd xmm5, xmm7) \
01314 AS2( movhps QWORD PTR [esp+12], xmm5)\
01315 AS2( psrlq xmm4, 16) \
01316 AS2( paddq xmm4, xmm5) \
01317 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
01318 AS2( psrlq xmm4, 3*16) \
01319 AS2( paddd xmm4, xmm6) \
01320 AS2( movq QWORD PTR [esp+4], xmm4)\
01321
01322 #define SSE2_FirstMultiply(i) \
01323 AS2( movdqa xmm7, [esi+(i)*16])\
01324 AS2( movdqa xmm5, [edi-(i)*16])\
01325 AS2( pmuludq xmm5, xmm7) \
01326 AS2( movdqa xmm4, [ebx])\
01327 AS2( movdqa xmm6, xmm4) \
01328 AS2( pand xmm4, xmm5) \
01329 AS2( psrld xmm5, 16) \
01330 AS2( pmuludq xmm7, [edx-(i)*16])\
01331 AS2( pand xmm6, xmm7) \
01332 AS2( psrld xmm7, 16)
01333
01334 #define Squ_Begin(n) \
01335 SquPrologue \
01336 AS2( mov esi, esp)\
01337 AS2( and esp, 0xfffffff0)\
01338 AS2( lea edi, [esp-32*n])\
01339 AS2( sub esp, 32*n+16)\
01340 AS1( push esi)\
01341 AS2( mov esi, edi) \
01342 AS2( xor edx, edx) \
01343 ASL(1) \
01344 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01345 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01346 AS2( movdqa [edi+2*edx], xmm0) \
01347 AS2( psrlq xmm0, 32) \
01348 AS2( movdqa [edi+2*edx+16], xmm0) \
01349 AS2( movdqa [edi+16*n+2*edx], xmm1) \
01350 AS2( psrlq xmm1, 32) \
01351 AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
01352 AS2( add edx, 16) \
01353 AS2( cmp edx, 8*(n)) \
01354 ASJ( jne, 1, b) \
01355 AS2( lea edx, [edi+16*n])\
01356 SSE2_FirstMultiply(0) \
01357
01358 #define Squ_Acc(i) \
01359 ASL(LSqu##i) \
01360 AS2( movdqa xmm1, [esi+(i)*16]) \
01361 AS2( movdqa xmm0, [edi-(i)*16]) \
01362 AS2( movdqa xmm2, [ebx]) \
01363 AS2( pmuludq xmm0, xmm1) \
01364 AS2( pmuludq xmm1, [edx-(i)*16]) \
01365 AS2( movdqa xmm3, xmm2) \
01366 AS2( pand xmm2, xmm0) \
01367 AS2( psrld xmm0, 16) \
01368 AS2( paddd xmm4, xmm2) \
01369 AS2( paddd xmm5, xmm0) \
01370 AS2( pand xmm3, xmm1) \
01371 AS2( psrld xmm1, 16) \
01372 AS2( paddd xmm6, xmm3) \
01373 AS2( paddd xmm7, xmm1) \
01374
01375 #define Squ_Acc1(i)
01376 #define Squ_Acc2(i) ASC(call, LSqu##i)
01377 #define Squ_Acc3(i) Squ_Acc2(i)
01378 #define Squ_Acc4(i) Squ_Acc2(i)
01379 #define Squ_Acc5(i) Squ_Acc2(i)
01380 #define Squ_Acc6(i) Squ_Acc2(i)
01381 #define Squ_Acc7(i) Squ_Acc2(i)
01382 #define Squ_Acc8(i) Squ_Acc2(i)
01383
01384 #define SSE2_End(E, n) \
01385 SSE2_SaveShift(2*(n)-3) \
01386 AS2( movdqa xmm7, [esi+16]) \
01387 AS2( movdqa xmm0, [edi]) \
01388 AS2( pmuludq xmm0, xmm7) \
01389 AS2( movdqa xmm2, [ebx]) \
01390 AS2( pmuludq xmm7, [edx]) \
01391 AS2( movdqa xmm6, xmm2) \
01392 AS2( pand xmm2, xmm0) \
01393 AS2( psrld xmm0, 16) \
01394 AS2( paddd xmm4, xmm2) \
01395 AS2( paddd xmm5, xmm0) \
01396 AS2( pand xmm6, xmm7) \
01397 AS2( psrld xmm7, 16) \
01398 SSE2_SaveShift(2*(n)-2) \
01399 SSE2_FinalSave(2*(n)-1) \
01400 AS1( pop esp)\
01401 E
01402
01403 #define Squ_End(n) SSE2_End(SquEpilogue, n)
01404 #define Mul_End(n) SSE2_End(MulEpilogue, n)
01405 #define Top_End(n) SSE2_End(TopEpilogue, n)
01406
01407 #define Squ_Column1(k, i) \
01408 Squ_SSE2_SaveShift(k) \
01409 AS2( add esi, 16) \
01410 SSE2_FirstMultiply(1)\
01411 Squ_Acc##i(i) \
01412 AS2( paddd xmm4, xmm4) \
01413 AS2( paddd xmm5, xmm5) \
01414 AS2( movdqa xmm3, [esi]) \
01415 AS2( movq xmm1, QWORD PTR [esi+8]) \
01416 AS2( pmuludq xmm1, xmm3) \
01417 AS2( pmuludq xmm3, xmm3) \
01418 AS2( movdqa xmm0, [ebx])\
01419 AS2( movdqa xmm2, xmm0) \
01420 AS2( pand xmm0, xmm1) \
01421 AS2( psrld xmm1, 16) \
01422 AS2( paddd xmm6, xmm0) \
01423 AS2( paddd xmm7, xmm1) \
01424 AS2( pand xmm2, xmm3) \
01425 AS2( psrld xmm3, 16) \
01426 AS2( paddd xmm6, xmm6) \
01427 AS2( paddd xmm7, xmm7) \
01428 AS2( paddd xmm4, xmm2) \
01429 AS2( paddd xmm5, xmm3) \
01430 AS2( movq xmm0, QWORD PTR [esp+4])\
01431 AS2( movq xmm1, QWORD PTR [esp+12])\
01432 AS2( paddd xmm4, xmm0)\
01433 AS2( paddd xmm5, xmm1)\
01434
01435 #define Squ_Column0(k, i) \
01436 Squ_SSE2_SaveShift(k) \
01437 AS2( add edi, 16) \
01438 AS2( add edx, 16) \
01439 SSE2_FirstMultiply(1)\
01440 Squ_Acc##i(i) \
01441 AS2( paddd xmm6, xmm6) \
01442 AS2( paddd xmm7, xmm7) \
01443 AS2( paddd xmm4, xmm4) \
01444 AS2( paddd xmm5, xmm5) \
01445 AS2( movq xmm0, QWORD PTR [esp+4])\
01446 AS2( movq xmm1, QWORD PTR [esp+12])\
01447 AS2( paddd xmm4, xmm0)\
01448 AS2( paddd xmm5, xmm1)\
01449
01450 #define SSE2_MulAdd45 \
01451 AS2( movdqa xmm7, [esi]) \
01452 AS2( movdqa xmm0, [edi]) \
01453 AS2( pmuludq xmm0, xmm7) \
01454 AS2( movdqa xmm2, [ebx]) \
01455 AS2( pmuludq xmm7, [edx]) \
01456 AS2( movdqa xmm6, xmm2) \
01457 AS2( pand xmm2, xmm0) \
01458 AS2( psrld xmm0, 16) \
01459 AS2( paddd xmm4, xmm2) \
01460 AS2( paddd xmm5, xmm0) \
01461 AS2( pand xmm6, xmm7) \
01462 AS2( psrld xmm7, 16)
01463
01464 #define Mul_Begin(n) \
01465 MulPrologue \
01466 AS2( mov esi, esp)\
01467 AS2( and esp, 0xfffffff0)\
01468 AS2( sub esp, 48*n+16)\
01469 AS1( push esi)\
01470 AS2( xor edx, edx) \
01471 ASL(1) \
01472 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01473 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01474 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01475 AS2( movdqa [esp+20+2*edx], xmm0) \
01476 AS2( psrlq xmm0, 32) \
01477 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01478 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01479 AS2( psrlq xmm1, 32) \
01480 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01481 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01482 AS2( psrlq xmm2, 32) \
01483 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01484 AS2( add edx, 16) \
01485 AS2( cmp edx, 8*(n)) \
01486 ASJ( jne, 1, b) \
01487 AS2( lea edi, [esp+20])\
01488 AS2( lea edx, [esp+20+16*n])\
01489 AS2( lea esi, [esp+20+32*n])\
01490 SSE2_FirstMultiply(0) \
01491
01492 #define Mul_Acc(i) \
01493 ASL(LMul##i) \
01494 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01495 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01496 AS2( movdqa xmm2, [ebx]) \
01497 AS2( pmuludq xmm0, xmm1) \
01498 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01499 AS2( movdqa xmm3, xmm2) \
01500 AS2( pand xmm2, xmm0) \
01501 AS2( psrld xmm0, 16) \
01502 AS2( paddd xmm4, xmm2) \
01503 AS2( paddd xmm5, xmm0) \
01504 AS2( pand xmm3, xmm1) \
01505 AS2( psrld xmm1, 16) \
01506 AS2( paddd xmm6, xmm3) \
01507 AS2( paddd xmm7, xmm1) \
01508
01509 #define Mul_Acc1(i)
01510 #define Mul_Acc2(i) ASC(call, LMul##i)
01511 #define Mul_Acc3(i) Mul_Acc2(i)
01512 #define Mul_Acc4(i) Mul_Acc2(i)
01513 #define Mul_Acc5(i) Mul_Acc2(i)
01514 #define Mul_Acc6(i) Mul_Acc2(i)
01515 #define Mul_Acc7(i) Mul_Acc2(i)
01516 #define Mul_Acc8(i) Mul_Acc2(i)
01517 #define Mul_Acc9(i) Mul_Acc2(i)
01518 #define Mul_Acc10(i) Mul_Acc2(i)
01519 #define Mul_Acc11(i) Mul_Acc2(i)
01520 #define Mul_Acc12(i) Mul_Acc2(i)
01521 #define Mul_Acc13(i) Mul_Acc2(i)
01522 #define Mul_Acc14(i) Mul_Acc2(i)
01523 #define Mul_Acc15(i) Mul_Acc2(i)
01524 #define Mul_Acc16(i) Mul_Acc2(i)
01525
01526 #define Mul_Column1(k, i) \
01527 SSE2_SaveShift(k) \
01528 AS2( add esi, 16) \
01529 SSE2_MulAdd45\
01530 Mul_Acc##i(i) \
01531
01532 #define Mul_Column0(k, i) \
01533 SSE2_SaveShift(k) \
01534 AS2( add edi, 16) \
01535 AS2( add edx, 16) \
01536 SSE2_MulAdd45\
01537 Mul_Acc##i(i) \
01538
01539 #define Bot_Acc(i) \
01540 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01541 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01542 AS2( pmuludq xmm0, xmm1) \
01543 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01544 AS2( paddq xmm4, xmm0) \
01545 AS2( paddd xmm6, xmm1)
01546
01547 #define Bot_SaveAcc(k) \
01548 SSE2_SaveShift(k) \
01549 AS2( add edi, 16) \
01550 AS2( add edx, 16) \
01551 AS2( movdqa xmm6, [esi]) \
01552 AS2( movdqa xmm0, [edi]) \
01553 AS2( pmuludq xmm0, xmm6) \
01554 AS2( paddq xmm4, xmm0) \
01555 AS2( psllq xmm5, 16) \
01556 AS2( paddq xmm4, xmm5) \
01557 AS2( pmuludq xmm6, [edx])
01558
01559 #define Bot_End(n) \
01560 AS2( movhlps xmm7, xmm6) \
01561 AS2( paddd xmm6, xmm7) \
01562 AS2( psllq xmm6, 32) \
01563 AS2( paddd xmm4, xmm6) \
01564 AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
01565 AS1( pop esp)\
01566 MulEpilogue
01567
01568 #define Top_Begin(n) \
01569 TopPrologue \
01570 AS2( mov edx, esp)\
01571 AS2( and esp, 0xfffffff0)\
01572 AS2( sub esp, 48*n+16)\
01573 AS1( push edx)\
01574 AS2( xor edx, edx) \
01575 ASL(1) \
01576 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01577 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01578 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01579 AS2( movdqa [esp+20+2*edx], xmm0) \
01580 AS2( psrlq xmm0, 32) \
01581 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01582 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01583 AS2( psrlq xmm1, 32) \
01584 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01585 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01586 AS2( psrlq xmm2, 32) \
01587 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01588 AS2( add edx, 16) \
01589 AS2( cmp edx, 8*(n)) \
01590 ASJ( jne, 1, b) \
01591 AS2( mov eax, esi) \
01592 AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
01593 AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
01594 AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
01595 AS2( pxor xmm4, xmm4)\
01596 AS2( pxor xmm5, xmm5)
01597
01598 #define Top_Acc(i) \
01599 AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
01600 AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01601 AS2( psrlq xmm0, 48) \
01602 AS2( paddd xmm5, xmm0)\
01603
01604 #define Top_Column0(i) \
01605 AS2( psllq xmm5, 32) \
01606 AS2( add edi, 16) \
01607 AS2( add edx, 16) \
01608 SSE2_MulAdd45\
01609 Mul_Acc##i(i) \
01610
01611 #define Top_Column1(i) \
01612 SSE2_SaveShift(0) \
01613 AS2( add esi, 16) \
01614 SSE2_MulAdd45\
01615 Mul_Acc##i(i) \
01616 AS2( shr eax, 16) \
01617 AS2( movd xmm0, eax)\
01618 AS2( movd xmm1, [ecx+4])\
01619 AS2( psrld xmm1, 16)\
01620 AS2( pcmpgtd xmm1, xmm0)\
01621 AS2( psrld xmm1, 31)\
01622 AS2( paddd xmm4, xmm1)\
01623
01624 void SSE2_Square4(word *C, const word *A)
01625 {
01626 Squ_Begin(2)
01627 Squ_Column0(0, 1)
01628 Squ_End(2)
01629 }
01630
01631 void SSE2_Square8(word *C, const word *A)
01632 {
01633 Squ_Begin(4)
01634 #ifndef __GNUC__
01635 ASJ( jmp, 0, f)
01636 Squ_Acc(2)
01637 AS1( ret) ASL(0)
01638 #endif
01639 Squ_Column0(0, 1)
01640 Squ_Column1(1, 1)
01641 Squ_Column0(2, 2)
01642 Squ_Column1(3, 1)
01643 Squ_Column0(4, 1)
01644 Squ_End(4)
01645 }
01646
01647 void SSE2_Square16(word *C, const word *A)
01648 {
01649 Squ_Begin(8)
01650 #ifndef __GNUC__
01651 ASJ( jmp, 0, f)
01652 Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01653 AS1( ret) ASL(0)
01654 #endif
01655 Squ_Column0(0, 1)
01656 Squ_Column1(1, 1)
01657 Squ_Column0(2, 2)
01658 Squ_Column1(3, 2)
01659 Squ_Column0(4, 3)
01660 Squ_Column1(5, 3)
01661 Squ_Column0(6, 4)
01662 Squ_Column1(7, 3)
01663 Squ_Column0(8, 3)
01664 Squ_Column1(9, 2)
01665 Squ_Column0(10, 2)
01666 Squ_Column1(11, 1)
01667 Squ_Column0(12, 1)
01668 Squ_End(8)
01669 }
01670
01671 void SSE2_Square32(word *C, const word *A)
01672 {
01673 Squ_Begin(16)
01674 ASJ( jmp, 0, f)
01675 Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01676 AS1( ret) ASL(0)
01677 Squ_Column0(0, 1)
01678 Squ_Column1(1, 1)
01679 Squ_Column0(2, 2)
01680 Squ_Column1(3, 2)
01681 Squ_Column0(4, 3)
01682 Squ_Column1(5, 3)
01683 Squ_Column0(6, 4)
01684 Squ_Column1(7, 4)
01685 Squ_Column0(8, 5)
01686 Squ_Column1(9, 5)
01687 Squ_Column0(10, 6)
01688 Squ_Column1(11, 6)
01689 Squ_Column0(12, 7)
01690 Squ_Column1(13, 7)
01691 Squ_Column0(14, 8)
01692 Squ_Column1(15, 7)
01693 Squ_Column0(16, 7)
01694 Squ_Column1(17, 6)
01695 Squ_Column0(18, 6)
01696 Squ_Column1(19, 5)
01697 Squ_Column0(20, 5)
01698 Squ_Column1(21, 4)
01699 Squ_Column0(22, 4)
01700 Squ_Column1(23, 3)
01701 Squ_Column0(24, 3)
01702 Squ_Column1(25, 2)
01703 Squ_Column0(26, 2)
01704 Squ_Column1(27, 1)
01705 Squ_Column0(28, 1)
01706 Squ_End(16)
01707 }
01708
01709 void SSE2_Multiply4(word *C, const word *A, const word *B)
01710 {
01711 Mul_Begin(2)
01712 #ifndef __GNUC__
01713 ASJ( jmp, 0, f)
01714 Mul_Acc(2)
01715 AS1( ret) ASL(0)
01716 #endif
01717 Mul_Column0(0, 2)
01718 Mul_End(2)
01719 }
01720
01721 void SSE2_Multiply8(word *C, const word *A, const word *B)
01722 {
01723 Mul_Begin(4)
01724 #ifndef __GNUC__
01725 ASJ( jmp, 0, f)
01726 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01727 AS1( ret) ASL(0)
01728 #endif
01729 Mul_Column0(0, 2)
01730 Mul_Column1(1, 3)
01731 Mul_Column0(2, 4)
01732 Mul_Column1(3, 3)
01733 Mul_Column0(4, 2)
01734 Mul_End(4)
01735 }
01736
01737 void SSE2_Multiply16(word *C, const word *A, const word *B)
01738 {
01739 Mul_Begin(8)
01740 #ifndef __GNUC__
01741 ASJ( jmp, 0, f)
01742 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01743 AS1( ret) ASL(0)
01744 #endif
01745 Mul_Column0(0, 2)
01746 Mul_Column1(1, 3)
01747 Mul_Column0(2, 4)
01748 Mul_Column1(3, 5)
01749 Mul_Column0(4, 6)
01750 Mul_Column1(5, 7)
01751 Mul_Column0(6, 8)
01752 Mul_Column1(7, 7)
01753 Mul_Column0(8, 6)
01754 Mul_Column1(9, 5)
01755 Mul_Column0(10, 4)
01756 Mul_Column1(11, 3)
01757 Mul_Column0(12, 2)
01758 Mul_End(8)
01759 }
01760
01761 void SSE2_Multiply32(word *C, const word *A, const word *B)
01762 {
01763 Mul_Begin(16)
01764 ASJ( jmp, 0, f)
01765 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01766 AS1( ret) ASL(0)
01767 Mul_Column0(0, 2)
01768 Mul_Column1(1, 3)
01769 Mul_Column0(2, 4)
01770 Mul_Column1(3, 5)
01771 Mul_Column0(4, 6)
01772 Mul_Column1(5, 7)
01773 Mul_Column0(6, 8)
01774 Mul_Column1(7, 9)
01775 Mul_Column0(8, 10)
01776 Mul_Column1(9, 11)
01777 Mul_Column0(10, 12)
01778 Mul_Column1(11, 13)
01779 Mul_Column0(12, 14)
01780 Mul_Column1(13, 15)
01781 Mul_Column0(14, 16)
01782 Mul_Column1(15, 15)
01783 Mul_Column0(16, 14)
01784 Mul_Column1(17, 13)
01785 Mul_Column0(18, 12)
01786 Mul_Column1(19, 11)
01787 Mul_Column0(20, 10)
01788 Mul_Column1(21, 9)
01789 Mul_Column0(22, 8)
01790 Mul_Column1(23, 7)
01791 Mul_Column0(24, 6)
01792 Mul_Column1(25, 5)
01793 Mul_Column0(26, 4)
01794 Mul_Column1(27, 3)
01795 Mul_Column0(28, 2)
01796 Mul_End(16)
01797 }
01798
01799 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
01800 {
01801 Mul_Begin(2)
01802 Bot_SaveAcc(0) Bot_Acc(2)
01803 Bot_End(2)
01804 }
01805
01806 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
01807 {
01808 Mul_Begin(4)
01809 #ifndef __GNUC__
01810 ASJ( jmp, 0, f)
01811 Mul_Acc(3) Mul_Acc(2)
01812 AS1( ret) ASL(0)
01813 #endif
01814 Mul_Column0(0, 2)
01815 Mul_Column1(1, 3)
01816 Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01817 Bot_End(4)
01818 }
01819
01820 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
01821 {
01822 Mul_Begin(8)
01823 #ifndef __GNUC__
01824 ASJ( jmp, 0, f)
01825 Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01826 AS1( ret) ASL(0)
01827 #endif
01828 Mul_Column0(0, 2)
01829 Mul_Column1(1, 3)
01830 Mul_Column0(2, 4)
01831 Mul_Column1(3, 5)
01832 Mul_Column0(4, 6)
01833 Mul_Column1(5, 7)
01834 Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01835 Bot_End(8)
01836 }
01837
01838 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
01839 {
01840 Mul_Begin(16)
01841 #ifndef __GNUC__
01842 ASJ( jmp, 0, f)
01843 Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01844 AS1( ret) ASL(0)
01845 #endif
01846 Mul_Column0(0, 2)
01847 Mul_Column1(1, 3)
01848 Mul_Column0(2, 4)
01849 Mul_Column1(3, 5)
01850 Mul_Column0(4, 6)
01851 Mul_Column1(5, 7)
01852 Mul_Column0(6, 8)
01853 Mul_Column1(7, 9)
01854 Mul_Column0(8, 10)
01855 Mul_Column1(9, 11)
01856 Mul_Column0(10, 12)
01857 Mul_Column1(11, 13)
01858 Mul_Column0(12, 14)
01859 Mul_Column1(13, 15)
01860 Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01861 Bot_End(16)
01862 }
01863
01864 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
01865 {
01866 Top_Begin(4)
01867 Top_Acc(3) Top_Acc(2) Top_Acc(1)
01868 #ifndef __GNUC__
01869 ASJ( jmp, 0, f)
01870 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01871 AS1( ret) ASL(0)
01872 #endif
01873 Top_Column0(4)
01874 Top_Column1(3)
01875 Mul_Column0(0, 2)
01876 Top_End(2)
01877 }
01878
01879 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
01880 {
01881 Top_Begin(8)
01882 Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01883 #ifndef __GNUC__
01884 ASJ( jmp, 0, f)
01885 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01886 AS1( ret) ASL(0)
01887 #endif
01888 Top_Column0(8)
01889 Top_Column1(7)
01890 Mul_Column0(0, 6)
01891 Mul_Column1(1, 5)
01892 Mul_Column0(2, 4)
01893 Mul_Column1(3, 3)
01894 Mul_Column0(4, 2)
01895 Top_End(4)
01896 }
01897
01898 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
01899 {
01900 Top_Begin(16)
01901 Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01902 #ifndef __GNUC__
01903 ASJ( jmp, 0, f)
01904 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01905 AS1( ret) ASL(0)
01906 #endif
01907 Top_Column0(16)
01908 Top_Column1(15)
01909 Mul_Column0(0, 14)
01910 Mul_Column1(1, 13)
01911 Mul_Column0(2, 12)
01912 Mul_Column1(3, 11)
01913 Mul_Column0(4, 10)
01914 Mul_Column1(5, 9)
01915 Mul_Column0(6, 8)
01916 Mul_Column1(7, 7)
01917 Mul_Column0(8, 6)
01918 Mul_Column1(9, 5)
01919 Mul_Column0(10, 4)
01920 Mul_Column1(11, 3)
01921 Mul_Column0(12, 2)
01922 Top_End(8)
01923 }
01924
01925 #endif // #if CRYPTOPP_INTEGER_SSE2
01926
01927
01928
01929 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
01930 typedef void (* PMul)(word *C, const word *A, const word *B);
01931 typedef void (* PSqu)(word *C, const word *A);
01932 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
01933
01934 #if CRYPTOPP_INTEGER_SSE2
01935 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
01936 static size_t s_recursionLimit = 8;
01937 #else
01938 static const size_t s_recursionLimit = 16;
01939 #endif
01940
01941 static PMul s_pMul[9], s_pBot[9];
01942 static PSqu s_pSqu[9];
01943 static PMulTop s_pTop[9];
01944
01945 static void SetFunctionPointers()
01946 {
01947 s_pMul[0] = &Baseline_Multiply2;
01948 s_pBot[0] = &Baseline_MultiplyBottom2;
01949 s_pSqu[0] = &Baseline_Square2;
01950 s_pTop[0] = &Baseline_MultiplyTop2;
01951 s_pTop[1] = &Baseline_MultiplyTop4;
01952
01953 #if CRYPTOPP_INTEGER_SSE2
01954 if (HasSSE2())
01955 {
01956 if (IsP4())
01957 {
01958 s_pAdd = &SSE2_Add;
01959 s_pSub = &SSE2_Sub;
01960 }
01961
01962 s_recursionLimit = 32;
01963
01964 s_pMul[1] = &SSE2_Multiply4;
01965 s_pMul[2] = &SSE2_Multiply8;
01966 s_pMul[4] = &SSE2_Multiply16;
01967 s_pMul[8] = &SSE2_Multiply32;
01968
01969 s_pBot[1] = &SSE2_MultiplyBottom4;
01970 s_pBot[2] = &SSE2_MultiplyBottom8;
01971 s_pBot[4] = &SSE2_MultiplyBottom16;
01972 s_pBot[8] = &SSE2_MultiplyBottom32;
01973
01974 s_pSqu[1] = &SSE2_Square4;
01975 s_pSqu[2] = &SSE2_Square8;
01976 s_pSqu[4] = &SSE2_Square16;
01977 s_pSqu[8] = &SSE2_Square32;
01978
01979 s_pTop[2] = &SSE2_MultiplyTop8;
01980 s_pTop[4] = &SSE2_MultiplyTop16;
01981 s_pTop[8] = &SSE2_MultiplyTop32;
01982 }
01983 else
01984 #endif
01985 {
01986 s_pMul[1] = &Baseline_Multiply4;
01987 s_pMul[2] = &Baseline_Multiply8;
01988
01989 s_pBot[1] = &Baseline_MultiplyBottom4;
01990 s_pBot[2] = &Baseline_MultiplyBottom8;
01991
01992 s_pSqu[1] = &Baseline_Square4;
01993 s_pSqu[2] = &Baseline_Square8;
01994
01995 s_pTop[2] = &Baseline_MultiplyTop8;
01996
01997 #if !CRYPTOPP_INTEGER_SSE2
01998 s_pMul[4] = &Baseline_Multiply16;
01999 s_pBot[4] = &Baseline_MultiplyBottom16;
02000 s_pSqu[4] = &Baseline_Square16;
02001 s_pTop[4] = &Baseline_MultiplyTop16;
02002 #endif
02003 }
02004 }
02005
02006 inline int Add(word *C, const word *A, const word *B, size_t N)
02007 {
02008 #if CRYPTOPP_INTEGER_SSE2
02009 return s_pAdd(N, C, A, B);
02010 #else
02011 return Baseline_Add(N, C, A, B);
02012 #endif
02013 }
02014
02015 inline int Subtract(word *C, const word *A, const word *B, size_t N)
02016 {
02017 #if CRYPTOPP_INTEGER_SSE2
02018 return s_pSub(N, C, A, B);
02019 #else
02020 return Baseline_Sub(N, C, A, B);
02021 #endif
02022 }
02023
02024
02025
02026
02027 #define A0 A
02028 #define A1 (A+N2)
02029 #define B0 B
02030 #define B1 (B+N2)
02031
02032 #define T0 T
02033 #define T1 (T+N2)
02034 #define T2 (T+N)
02035 #define T3 (T+N+N2)
02036
02037 #define R0 R
02038 #define R1 (R+N2)
02039 #define R2 (R+N)
02040 #define R3 (R+N+N2)
02041
02042
02043
02044
02045
02046
02047 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02048 {
02049 assert(N>=2 && N%2==0);
02050
02051 if (N <= s_recursionLimit)
02052 s_pMul[N/4](R, A, B);
02053 else
02054 {
02055 const size_t N2 = N/2;
02056
02057 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02058 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02059
02060 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02061 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02062
02063 RecursiveMultiply(R2, T2, A1, B1, N2);
02064 RecursiveMultiply(T0, T2, R0, R1, N2);
02065 RecursiveMultiply(R0, T2, A0, B0, N2);
02066
02067
02068
02069 int c2 = Add(R2, R2, R1, N2);
02070 int c3 = c2;
02071 c2 += Add(R1, R2, R0, N2);
02072 c3 += Add(R2, R2, R3, N2);
02073
02074 if (AN2 == BN2)
02075 c3 -= Subtract(R1, R1, T0, N);
02076 else
02077 c3 += Add(R1, R1, T0, N);
02078
02079 c3 += Increment(R2, N2, c2);
02080 assert (c3 >= 0 && c3 <= 2);
02081 Increment(R3, N2, c3);
02082 }
02083 }
02084
02085
02086
02087
02088
02089 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02090 {
02091 assert(N && N%2==0);
02092
02093 if (N <= s_recursionLimit)
02094 s_pSqu[N/4](R, A);
02095 else
02096 {
02097 const size_t N2 = N/2;
02098
02099 RecursiveSquare(R0, T2, A0, N2);
02100 RecursiveSquare(R2, T2, A1, N2);
02101 RecursiveMultiply(T0, T2, A0, A1, N2);
02102
02103 int carry = Add(R1, R1, T0, N);
02104 carry += Add(R1, R1, T0, N);
02105 Increment(R3, N2, carry);
02106 }
02107 }
02108
02109
02110
02111
02112
02113
02114 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02115 {
02116 assert(N>=2 && N%2==0);
02117
02118 if (N <= s_recursionLimit)
02119 s_pBot[N/4](R, A, B);
02120 else
02121 {
02122 const size_t N2 = N/2;
02123
02124 RecursiveMultiply(R, T, A0, B0, N2);
02125 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02126 Add(R1, R1, T0, N2);
02127 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02128 Add(R1, R1, T0, N2);
02129 }
02130 }
02131
02132
02133
02134
02135
02136
02137
02138 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02139 {
02140 assert(N>=2 && N%2==0);
02141
02142 if (N <= s_recursionLimit)
02143 s_pTop[N/4](R, A, B, L[N-1]);
02144 else
02145 {
02146 const size_t N2 = N/2;
02147
02148 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02149 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02150
02151 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02152 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02153
02154 RecursiveMultiply(T0, T2, R0, R1, N2);
02155 RecursiveMultiply(R0, T2, A1, B1, N2);
02156
02157
02158
02159 int t, c3;
02160 int c2 = Subtract(T2, L+N2, L, N2);
02161
02162 if (AN2 == BN2)
02163 {
02164 c2 -= Add(T2, T2, T0, N2);
02165 t = (Compare(T2, R0, N2) == -1);
02166 c3 = t - Subtract(T2, T2, T1, N2);
02167 }
02168 else
02169 {
02170 c2 += Subtract(T2, T2, T0, N2);
02171 t = (Compare(T2, R0, N2) == -1);
02172 c3 = t + Add(T2, T2, T1, N2);
02173 }
02174
02175 c2 += t;
02176 if (c2 >= 0)
02177 c3 += Increment(T2, N2, c2);
02178 else
02179 c3 -= Decrement(T2, N2, -c2);
02180 c3 += Add(R0, T2, R1, N2);
02181
02182 assert (c3 >= 0 && c3 <= 2);
02183 Increment(R1, N2, c3);
02184 }
02185 }
02186
02187 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02188 {
02189 RecursiveMultiply(R, T, A, B, N);
02190 }
02191
02192 inline void Square(word *R, word *T, const word *A, size_t N)
02193 {
02194 RecursiveSquare(R, T, A, N);
02195 }
02196
02197 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02198 {
02199 RecursiveMultiplyBottom(R, T, A, B, N);
02200 }
02201
02202
02203
02204
02205
02206
02207 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02208 {
02209 if (NA == NB)
02210 {
02211 if (A == B)
02212 Square(R, T, A, NA);
02213 else
02214 Multiply(R, T, A, B, NA);
02215
02216 return;
02217 }
02218
02219 if (NA > NB)
02220 {
02221 std::swap(A, B);
02222 std::swap(NA, NB);
02223 }
02224
02225 assert(NB % NA == 0);
02226
02227 if (NA==2 && !A[1])
02228 {
02229 switch (A[0])
02230 {
02231 case 0:
02232 SetWords(R, 0, NB+2);
02233 return;
02234 case 1:
02235 CopyWords(R, B, NB);
02236 R[NB] = R[NB+1] = 0;
02237 return;
02238 default:
02239 R[NB] = LinearMultiply(R, B, A[0], NB);
02240 R[NB+1] = 0;
02241 return;
02242 }
02243 }
02244
02245 size_t i;
02246 if ((NB/NA)%2 == 0)
02247 {
02248 Multiply(R, T, A, B, NA);
02249 CopyWords(T+2*NA, R+NA, NA);
02250
02251 for (i=2*NA; i<NB; i+=2*NA)
02252 Multiply(T+NA+i, T, A, B+i, NA);
02253 for (i=NA; i<NB; i+=2*NA)
02254 Multiply(R+i, T, A, B+i, NA);
02255 }
02256 else
02257 {
02258 for (i=0; i<NB; i+=2*NA)
02259 Multiply(R+i, T, A, B+i, NA);
02260 for (i=NA; i<NB; i+=2*NA)
02261 Multiply(T+NA+i, T, A, B+i, NA);
02262 }
02263
02264 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02265 Increment(R+NB, NA);
02266 }
02267
02268
02269
02270
02271
02272 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02273 {
02274 if (N==2)
02275 {
02276 T[0] = AtomicInverseModPower2(A[0]);
02277 T[1] = 0;
02278 s_pBot[0](T+2, T, A);
02279 TwosComplement(T+2, 2);
02280 Increment(T+2, 2, 2);
02281 s_pBot[0](R, T, T+2);
02282 }
02283 else
02284 {
02285 const size_t N2 = N/2;
02286 RecursiveInverseModPower2(R0, T0, A0, N2);
02287 T0[0] = 1;
02288 SetWords(T0+1, 0, N2-1);
02289 MultiplyTop(R1, T1, T0, R0, A0, N2);
02290 MultiplyBottom(T0, T1, R0, A1, N2);
02291 Add(T0, R1, T0, N2);
02292 TwosComplement(T0, N2);
02293 MultiplyBottom(R1, T1, R0, T0, N2);
02294 }
02295 }
02296
02297
02298
02299
02300
02301
02302
02303 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
02304 {
02305 #if 1
02306 MultiplyBottom(R, T, X, U, N);
02307 MultiplyTop(T, T+N, X, R, M, N);
02308 word borrow = Subtract(T, X+N, T, N);
02309
02310 word carry = Add(T+N, T, M, N);
02311 assert(carry || !borrow);
02312 CopyWords(R, T + (borrow ? N : 0), N);
02313 #elif 0
02314 const word u = 0-U[0];
02315 Declare2Words(p)
02316 for (size_t i=0; i<N; i++)
02317 {
02318 const word t = u * X[i];
02319 word c = 0;
02320 for (size_t j=0; j<N; j+=2)
02321 {
02322 MultiplyWords(p, t, M[j]);
02323 Acc2WordsBy1(p, X[i+j]);
02324 Acc2WordsBy1(p, c);
02325 X[i+j] = LowWord(p);
02326 c = HighWord(p);
02327 MultiplyWords(p, t, M[j+1]);
02328 Acc2WordsBy1(p, X[i+j+1]);
02329 Acc2WordsBy1(p, c);
02330 X[i+j+1] = LowWord(p);
02331 c = HighWord(p);
02332 }
02333
02334 if (Increment(X+N+i, N-i, c))
02335 while (!Subtract(X+N, X+N, M, N)) {}
02336 }
02337
02338 memcpy(R, X+N, N*WORD_SIZE);
02339 #else
02340 __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
02341 for (size_t i=0; i<N; i++)
02342 {
02343 __m64 t = _mm_cvtsi32_si64(X[i]);
02344 t = _mm_mul_su32(t, u);
02345 __m64 c = _mm_setzero_si64();
02346 for (size_t j=0; j<N; j+=2)
02347 {
02348 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
02349 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
02350 c = _mm_add_si64(c, p);
02351 X[i+j] = _mm_cvtsi64_si32(c);
02352 c = _mm_srli_si64(c, 32);
02353 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
02354 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
02355 c = _mm_add_si64(c, p);
02356 X[i+j+1] = _mm_cvtsi64_si32(c);
02357 c = _mm_srli_si64(c, 32);
02358 }
02359
02360 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
02361 while (!Subtract(X+N, X+N, M, N)) {}
02362 }
02363
02364 memcpy(R, X+N, N*WORD_SIZE);
02365 _mm_empty();
02366 #endif
02367 }
02368
02369
02370
02371
02372
02373
02374
02375
02376 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02377 {
02378 assert(N%2==0 && N>=4);
02379
02380 #define M0 M
02381 #define M1 (M+N2)
02382 #define V0 V
02383 #define V1 (V+N2)
02384
02385 #define X0 X
02386 #define X1 (X+N2)
02387 #define X2 (X+N)
02388 #define X3 (X+N+N2)
02389
02390 const size_t N2 = N/2;
02391 Multiply(T0, T2, V0, X3, N2);
02392 int c2 = Add(T0, T0, X0, N);
02393 MultiplyBottom(T3, T2, T0, U, N2);
02394 MultiplyTop(T2, R, T0, T3, M0, N2);
02395 c2 -= Subtract(T2, T1, T2, N2);
02396 Multiply(T0, R, T3, M1, N2);
02397 c2 -= Subtract(T0, T2, T0, N2);
02398 int c3 = -(int)Subtract(T1, X2, T1, N2);
02399 Multiply(R0, T2, V1, X3, N2);
02400 c3 += Add(R, R, T, N);
02401
02402 if (c2>0)
02403 c3 += Increment(R1, N2);
02404 else if (c2<0)
02405 c3 -= Decrement(R1, N2, -c2);
02406
02407 assert(c3>=-1 && c3<=1);
02408 if (c3>0)
02409 Subtract(R, R, M, N);
02410 else if (c3<0)
02411 Add(R, R, M, N);
02412
02413 #undef M0
02414 #undef M1
02415 #undef V0
02416 #undef V1
02417
02418 #undef X0
02419 #undef X1
02420 #undef X2
02421 #undef X3
02422 }
02423
02424 #undef A0
02425 #undef A1
02426 #undef B0
02427 #undef B1
02428
02429 #undef T0
02430 #undef T1
02431 #undef T2
02432 #undef T3
02433
02434 #undef R0
02435 #undef R1
02436 #undef R2
02437 #undef R3
02438
02439
02440
02441
02442
02443
02444
02445
02446
02447
02448
02449
02450
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476
02477
02478
02479
02480
02481
02482
02483
02484
02485
02486
02487
02488
02489
02490
02491
02492
02493
02494
02495
02496
02497
02498
02499
02500
02501
02502
02503 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02504 {
02505 word T[4];
02506 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02507 Q[0] = q.GetLowHalf();
02508 Q[1] = q.GetHighHalf();
02509
02510 #ifndef NDEBUG
02511 if (B[0] || B[1])
02512 {
02513
02514 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02515 word P[4];
02516 s_pMul[0](P, Q, B);
02517 Add(P, P, T, 4);
02518 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02519 }
02520 #endif
02521 }
02522
02523
02524 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02525 {
02526 assert(N && N%2==0);
02527
02528 AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
02529
02530 word borrow = Subtract(R, R, T, N+2);
02531 assert(!borrow && !R[N+1]);
02532
02533 while (R[N] || Compare(R, B, N) >= 0)
02534 {
02535 R[N] -= Subtract(R, R, B, N);
02536 Q[1] += (++Q[0]==0);
02537 assert(Q[0] || Q[1]);
02538 }
02539 }
02540
02541
02542
02543
02544
02545
02546
02547 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02548 {
02549 assert(NA && NB && NA%2==0 && NB%2==0);
02550 assert(B[NB-1] || B[NB-2]);
02551 assert(NB <= NA);
02552
02553
02554 word *const TA=T;
02555 word *const TB=T+NA+2;
02556 word *const TP=T+NA+2+NB;
02557
02558
02559 unsigned shiftWords = (B[NB-1]==0);
02560 TB[0] = TB[NB-1] = 0;
02561 CopyWords(TB+shiftWords, B, NB-shiftWords);
02562 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02563 assert(shiftBits < WORD_BITS);
02564 ShiftWordsLeftByBits(TB, NB, shiftBits);
02565
02566
02567 TA[0] = TA[NA] = TA[NA+1] = 0;
02568 CopyWords(TA+shiftWords, A, NA);
02569 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02570
02571 if (TA[NA+1]==0 && TA[NA] <= 1)
02572 {
02573 Q[NA-NB+1] = Q[NA-NB] = 0;
02574 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02575 {
02576 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02577 ++Q[NA-NB];
02578 }
02579 }
02580 else
02581 {
02582 NA+=2;
02583 assert(Compare(TA+NA-NB, TB, NB) < 0);
02584 }
02585
02586 word BT[2];
02587 BT[0] = TB[NB-2] + 1;
02588 BT[1] = TB[NB-1] + (BT[0]==0);
02589
02590
02591 for (size_t i=NA-2; i>=NB; i-=2)
02592 {
02593 AtomicDivide(Q+i-NB, TA+i-2, BT);
02594 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02595 }
02596
02597
02598 CopyWords(R, TA+shiftWords, NB);
02599 ShiftWordsRightByBits(R, NB, shiftBits);
02600 }
02601
02602 static inline size_t EvenWordCount(const word *X, size_t N)
02603 {
02604 while (N && X[N-2]==0 && X[N-1]==0)
02605 N-=2;
02606 return N;
02607 }
02608
02609
02610
02611
02612
02613
02614
02615 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02616 {
02617 assert(NA<=N && N && N%2==0);
02618
02619 word *b = T;
02620 word *c = T+N;
02621 word *f = T+2*N;
02622 word *g = T+3*N;
02623 size_t bcLen=2, fgLen=EvenWordCount(M, N);
02624 unsigned int k=0, s=0;
02625
02626 SetWords(T, 0, 3*N);
02627 b[0]=1;
02628 CopyWords(f, A, NA);
02629 CopyWords(g, M, N);
02630
02631 while (1)
02632 {
02633 word t=f[0];
02634 while (!t)
02635 {
02636 if (EvenWordCount(f, fgLen)==0)
02637 {
02638 SetWords(R, 0, N);
02639 return 0;
02640 }
02641
02642 ShiftWordsRightByWords(f, fgLen, 1);
02643 if (c[bcLen-1]) bcLen+=2;
02644 assert(bcLen <= N);
02645 ShiftWordsLeftByWords(c, bcLen, 1);
02646 k+=WORD_BITS;
02647 t=f[0];
02648 }
02649
02650 unsigned int i=0;
02651 while (t%2 == 0)
02652 {
02653 t>>=1;
02654 i++;
02655 }
02656 k+=i;
02657
02658 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02659 {
02660 if (s%2==0)
02661 CopyWords(R, b, N);
02662 else
02663 Subtract(R, M, b, N);
02664 return k;
02665 }
02666
02667 ShiftWordsRightByBits(f, fgLen, i);
02668 t=ShiftWordsLeftByBits(c, bcLen, i);
02669 if (t)
02670 {
02671 c[bcLen] = t;
02672 bcLen+=2;
02673 assert(bcLen <= N);
02674 }
02675
02676 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02677 fgLen-=2;
02678
02679 if (Compare(f, g, fgLen)==-1)
02680 {
02681 std::swap(f, g);
02682 std::swap(b, c);
02683 s++;
02684 }
02685
02686 Subtract(f, f, g, fgLen);
02687
02688 if (Add(b, b, c, bcLen))
02689 {
02690 b[bcLen] = 1;
02691 bcLen+=2;
02692 assert(bcLen <= N);
02693 }
02694 }
02695 }
02696
02697
02698
02699
02700
02701 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02702 {
02703 CopyWords(R, A, N);
02704
02705 while (k--)
02706 {
02707 if (R[0]%2==0)
02708 ShiftWordsRightByBits(R, N, 1);
02709 else
02710 {
02711 word carry = Add(R, R, M, N);
02712 ShiftWordsRightByBits(R, N, 1);
02713 R[N-1] += carry<<(WORD_BITS-1);
02714 }
02715 }
02716 }
02717
02718
02719
02720
02721
02722 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02723 {
02724 CopyWords(R, A, N);
02725
02726 while (k--)
02727 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02728 Subtract(R, R, M, N);
02729 }
02730
02731
02732
02733 InitializeInteger::InitializeInteger()
02734 {
02735 if (!g_pAssignIntToInteger)
02736 {
02737 SetFunctionPointers();
02738 g_pAssignIntToInteger = AssignIntToInteger;
02739 }
02740 }
02741
02742 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02743
02744 static inline size_t RoundupSize(size_t n)
02745 {
02746 if (n<=8)
02747 return RoundupSizeTable[n];
02748 else if (n<=16)
02749 return 16;
02750 else if (n<=32)
02751 return 32;
02752 else if (n<=64)
02753 return 64;
02754 else return size_t(1) << BitPrecision(n-1);
02755 }
02756
02757 Integer::Integer()
02758 : reg(2), sign(POSITIVE)
02759 {
02760 reg[0] = reg[1] = 0;
02761 }
02762
02763 Integer::Integer(const Integer& t)
02764 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02765 {
02766 CopyWords(reg, t.reg, reg.size());
02767 }
02768
02769 Integer::Integer(Sign s, lword value)
02770 : reg(2), sign(s)
02771 {
02772 reg[0] = word(value);
02773 reg[1] = word(SafeRightShift<WORD_BITS>(value));
02774 }
02775
02776 Integer::Integer(signed long value)
02777 : reg(2)
02778 {
02779 if (value >= 0)
02780 sign = POSITIVE;
02781 else
02782 {
02783 sign = NEGATIVE;
02784 value = -value;
02785 }
02786 reg[0] = word(value);
02787 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02788 }
02789
02790 Integer::Integer(Sign s, word high, word low)
02791 : reg(2), sign(s)
02792 {
02793 reg[0] = low;
02794 reg[1] = high;
02795 }
02796
02797 bool Integer::IsConvertableToLong() const
02798 {
02799 if (ByteCount() > sizeof(long))
02800 return false;
02801
02802 unsigned long value = (unsigned long)reg[0];
02803 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02804
02805 if (sign==POSITIVE)
02806 return (signed long)value >= 0;
02807 else
02808 return -(signed long)value < 0;
02809 }
02810
02811 signed long Integer::ConvertToLong() const
02812 {
02813 assert(IsConvertableToLong());
02814
02815 unsigned long value = (unsigned long)reg[0];
02816 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02817 return sign==POSITIVE ? value : -(signed long)value;
02818 }
02819
02820 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02821 {
02822 Decode(encodedInteger, byteCount, s);
02823 }
02824
02825 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02826 {
02827 Decode(encodedInteger, byteCount, s);
02828 }
02829
02830 Integer::Integer(BufferedTransformation &bt)
02831 {
02832 BERDecode(bt);
02833 }
02834
02835 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02836 {
02837 Randomize(rng, bitcount);
02838 }
02839
02840 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02841 {
02842 if (!Randomize(rng, min, max, rnType, equiv, mod))
02843 throw Integer::RandomNumberNotFound();
02844 }
02845
02846 Integer Integer::Power2(size_t e)
02847 {
02848 Integer r((word)0, BitsToWords(e+1));
02849 r.SetBit(e);
02850 return r;
02851 }
02852
02853 template <long i>
02854 struct NewInteger
02855 {
02856 Integer * operator()() const
02857 {
02858 return new Integer(i);
02859 }
02860 };
02861
02862 const Integer &Integer::Zero()
02863 {
02864 return Singleton<Integer>().Ref();
02865 }
02866
02867 const Integer &Integer::One()
02868 {
02869 return Singleton<Integer, NewInteger<1> >().Ref();
02870 }
02871
02872 const Integer &Integer::Two()
02873 {
02874 return Singleton<Integer, NewInteger<2> >().Ref();
02875 }
02876
02877 bool Integer::operator!() const
02878 {
02879 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02880 }
02881
02882 Integer& Integer::operator=(const Integer& t)
02883 {
02884 if (this != &t)
02885 {
02886 if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
02887 reg.New(RoundupSize(t.WordCount()));
02888 CopyWords(reg, t.reg, reg.size());
02889 sign = t.sign;
02890 }
02891 return *this;
02892 }
02893
02894 bool Integer::GetBit(size_t n) const
02895 {
02896 if (n/WORD_BITS >= reg.size())
02897 return 0;
02898 else
02899 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02900 }
02901
02902 void Integer::SetBit(size_t n, bool value)
02903 {
02904 if (value)
02905 {
02906 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02907 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02908 }
02909 else
02910 {
02911 if (n/WORD_BITS < reg.size())
02912 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02913 }
02914 }
02915
02916 byte Integer::GetByte(size_t n) const
02917 {
02918 if (n/WORD_SIZE >= reg.size())
02919 return 0;
02920 else
02921 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02922 }
02923
02924 void Integer::SetByte(size_t n, byte value)
02925 {
02926 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02927 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02928 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02929 }
02930
02931 lword Integer::GetBits(size_t i, size_t n) const
02932 {
02933 lword v = 0;
02934 assert(n <= sizeof(v)*8);
02935 for (unsigned int j=0; j<n; j++)
02936 v |= lword(GetBit(i+j)) << j;
02937 return v;
02938 }
02939
02940 Integer Integer::operator-() const
02941 {
02942 Integer result(*this);
02943 result.Negate();
02944 return result;
02945 }
02946
02947 Integer Integer::AbsoluteValue() const
02948 {
02949 Integer result(*this);
02950 result.sign = POSITIVE;
02951 return result;
02952 }
02953
02954 void Integer::swap(Integer &a)
02955 {
02956 reg.swap(a.reg);
02957 std::swap(sign, a.sign);
02958 }
02959
02960 Integer::Integer(word value, size_t length)
02961 : reg(RoundupSize(length)), sign(POSITIVE)
02962 {
02963 reg[0] = value;
02964 SetWords(reg+1, 0, reg.size()-1);
02965 }
02966
02967 template <class T>
02968 static Integer StringToInteger(const T *str)
02969 {
02970 int radix;
02971
02972
02973 unsigned int length;
02974 for (length = 0; str[length] != 0; length++) {}
02975
02976 Integer v;
02977
02978 if (length == 0)
02979 return v;
02980
02981 switch (str[length-1])
02982 {
02983 case 'h':
02984 case 'H':
02985 radix=16;
02986 break;
02987 case 'o':
02988 case 'O':
02989 radix=8;
02990 break;
02991 case 'b':
02992 case 'B':
02993 radix=2;
02994 break;
02995 default:
02996 radix=10;
02997 }
02998
02999 if (length > 2 && str[0] == '0' && str[1] == 'x')
03000 radix = 16;
03001
03002 for (unsigned i=0; i<length; i++)
03003 {
03004 int digit;
03005
03006 if (str[i] >= '0' && str[i] <= '9')
03007 digit = str[i] - '0';
03008 else if (str[i] >= 'A' && str[i] <= 'F')
03009 digit = str[i] - 'A' + 10;
03010 else if (str[i] >= 'a' && str[i] <= 'f')
03011 digit = str[i] - 'a' + 10;
03012 else
03013 digit = radix;
03014
03015 if (digit < radix)
03016 {
03017 v *= radix;
03018 v += digit;
03019 }
03020 }
03021
03022 if (str[0] == '-')
03023 v.Negate();
03024
03025 return v;
03026 }
03027
03028 Integer::Integer(const char *str)
03029 : reg(2), sign(POSITIVE)
03030 {
03031 *this = StringToInteger(str);
03032 }
03033
03034 Integer::Integer(const wchar_t *str)
03035 : reg(2), sign(POSITIVE)
03036 {
03037 *this = StringToInteger(str);
03038 }
03039
03040 unsigned int Integer::WordCount() const
03041 {
03042 return (unsigned int)CountWords(reg, reg.size());
03043 }
03044
03045 unsigned int Integer::ByteCount() const
03046 {
03047 unsigned wordCount = WordCount();
03048 if (wordCount)
03049 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03050 else
03051 return 0;
03052 }
03053
03054 unsigned int Integer::BitCount() const
03055 {
03056 unsigned wordCount = WordCount();
03057 if (wordCount)
03058 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03059 else
03060 return 0;
03061 }
03062
03063 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03064 {
03065 StringStore store(input, inputLen);
03066 Decode(store, inputLen, s);
03067 }
03068
03069 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03070 {
03071 assert(bt.MaxRetrievable() >= inputLen);
03072
03073 byte b;
03074 bt.Peek(b);
03075 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03076
03077 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03078 {
03079 bt.Skip(1);
03080 inputLen--;
03081 bt.Peek(b);
03082 }
03083
03084 reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03085
03086 for (size_t i=inputLen; i > 0; i--)
03087 {
03088 bt.Get(b);
03089 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03090 }
03091
03092 if (sign == NEGATIVE)
03093 {
03094 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03095 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03096 TwosComplement(reg, reg.size());
03097 }
03098 }
03099
03100 size_t Integer::MinEncodedSize(Signedness signedness) const
03101 {
03102 unsigned int outputLen = STDMAX(1U, ByteCount());
03103 if (signedness == UNSIGNED)
03104 return outputLen;
03105 if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03106 outputLen++;
03107 if (IsNegative() && *this < -Power2(outputLen*8-1))
03108 outputLen++;
03109 return outputLen;
03110 }
03111
03112 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03113 {
03114 ArraySink sink(output, outputLen);
03115 Encode(sink, outputLen, signedness);
03116 }
03117
03118 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03119 {
03120 if (signedness == UNSIGNED || NotNegative())
03121 {
03122 for (size_t i=outputLen; i > 0; i--)
03123 bt.Put(GetByte(i-1));
03124 }
03125 else
03126 {
03127
03128 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
03129 temp.Encode(bt, outputLen, UNSIGNED);
03130 }
03131 }
03132
03133 void Integer::DEREncode(BufferedTransformation &bt) const
03134 {
03135 DERGeneralEncoder enc(bt, INTEGER);
03136 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03137 enc.MessageEnd();
03138 }
03139
03140 void Integer::BERDecode(const byte *input, size_t len)
03141 {
03142 StringStore store(input, len);
03143 BERDecode(store);
03144 }
03145
03146 void Integer::BERDecode(BufferedTransformation &bt)
03147 {
03148 BERGeneralDecoder dec(bt, INTEGER);
03149 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03150 BERDecodeError();
03151 Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03152 dec.MessageEnd();
03153 }
03154
03155 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03156 {
03157 DERGeneralEncoder enc(bt, OCTET_STRING);
03158 Encode(enc, length);
03159 enc.MessageEnd();
03160 }
03161
03162 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03163 {
03164 BERGeneralDecoder dec(bt, OCTET_STRING);
03165 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03166 BERDecodeError();
03167 Decode(dec, length);
03168 dec.MessageEnd();
03169 }
03170
03171 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03172 {
03173 ArraySink sink(output, len);
03174 return OpenPGPEncode(sink);
03175 }
03176
03177 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03178 {
03179 word16 bitCount = BitCount();
03180 bt.PutWord16(bitCount);
03181 size_t byteCount = BitsToBytes(bitCount);
03182 Encode(bt, byteCount);
03183 return 2 + byteCount;
03184 }
03185
03186 void Integer::OpenPGPDecode(const byte *input, size_t len)
03187 {
03188 StringStore store(input, len);
03189 OpenPGPDecode(store);
03190 }
03191
03192 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03193 {
03194 word16 bitCount;
03195 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03196 throw OpenPGPDecodeErr();
03197 Decode(bt, BitsToBytes(bitCount));
03198 }
03199
03200 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03201 {
03202 const size_t nbytes = nbits/8 + 1;
03203 SecByteBlock buf(nbytes);
03204 rng.GenerateBlock(buf, nbytes);
03205 if (nbytes)
03206 buf[0] = (byte)Crop(buf[0], nbits % 8);
03207 Decode(buf, nbytes, UNSIGNED);
03208 }
03209
03210 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03211 {
03212 if (min > max)
03213 throw InvalidArgument("Integer: Min must be no greater than Max");
03214
03215 Integer range = max - min;
03216 const unsigned int nbits = range.BitCount();
03217
03218 do
03219 {
03220 Randomize(rng, nbits);
03221 }
03222 while (*this > range);
03223
03224 *this += min;
03225 }
03226
03227 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03228 {
03229 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03230 }
03231
03232 class KDF2_RNG : public RandomNumberGenerator
03233 {
03234 public:
03235 KDF2_RNG(const byte *seed, size_t seedSize)
03236 : m_counter(0), m_counterAndSeed(seedSize + 4)
03237 {
03238 memcpy(m_counterAndSeed + 4, seed, seedSize);
03239 }
03240
03241 void GenerateBlock(byte *output, size_t size)
03242 {
03243 PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03244 ++m_counter;
03245 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03246 }
03247
03248 private:
03249 word32 m_counter;
03250 SecByteBlock m_counterAndSeed;
03251 };
03252
03253 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
03254 {
03255 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03256 Integer max;
03257 if (!params.GetValue("Max", max))
03258 {
03259 int bitLength;
03260 if (params.GetIntValue("BitLength", bitLength))
03261 max = Integer::Power2(bitLength);
03262 else
03263 throw InvalidArgument("Integer: missing Max argument");
03264 }
03265 if (min > max)
03266 throw InvalidArgument("Integer: Min must be no greater than Max");
03267
03268 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03269 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03270
03271 if (equiv.IsNegative() || equiv >= mod)
03272 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03273
03274 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03275
03276 member_ptr<KDF2_RNG> kdf2Rng;
03277 ConstByteArrayParameter seed;
03278 if (params.GetValue("Seed", seed))
03279 {
03280 ByteQueue bq;
03281 DERSequenceEncoder seq(bq);
03282 min.DEREncode(seq);
03283 max.DEREncode(seq);
03284 equiv.DEREncode(seq);
03285 mod.DEREncode(seq);
03286 DEREncodeUnsigned(seq, rnType);
03287 DEREncodeOctetString(seq, seed.begin(), seed.size());
03288 seq.MessageEnd();
03289
03290 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03291 bq.Get(finalSeed, finalSeed.size());
03292 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03293 }
03294 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03295
03296 switch (rnType)
03297 {
03298 case ANY:
03299 if (mod == One())
03300 Randomize(rng, min, max);
03301 else
03302 {
03303 Integer min1 = min + (equiv-min)%mod;
03304 if (max < min1)
03305 return false;
03306 Randomize(rng, Zero(), (max - min1) / mod);
03307 *this *= mod;
03308 *this += min1;
03309 }
03310 return true;
03311
03312 case PRIME:
03313 {
03314 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03315
03316 int i;
03317 i = 0;
03318 while (1)
03319 {
03320 if (++i==16)
03321 {
03322
03323 Integer first = min;
03324 if (FirstPrime(first, max, equiv, mod, pSelector))
03325 {
03326
03327 *this = first;
03328 if (!FirstPrime(first, max, equiv, mod, pSelector))
03329 return true;
03330 }
03331 else
03332 return false;
03333 }
03334
03335 Randomize(rng, min, max);
03336 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03337 return true;
03338 }
03339 }
03340
03341 default:
03342 throw InvalidArgument("Integer: invalid RandomNumberType argument");
03343 }
03344 }
03345
03346 std::istream& operator>>(std::istream& in, Integer &a)
03347 {
03348 char c;
03349 unsigned int length = 0;
03350 SecBlock<char> str(length + 16);
03351
03352 std::ws(in);
03353
03354 do
03355 {
03356 in.read(&c, 1);
03357 str[length++] = c;
03358 if (length >= str.size())
03359 str.Grow(length + 16);
03360 }
03361 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03362
03363 if (in.gcount())
03364 in.putback(c);
03365 str[length-1] = '\0';
03366 a = Integer(str);
03367
03368 return in;
03369 }
03370
03371 std::ostream& operator<<(std::ostream& out, const Integer &a)
03372 {
03373
03374 long f = out.flags() & std::ios::basefield;
03375 int base, block;
03376 char suffix;
03377 switch(f)
03378 {
03379 case std::ios::oct :
03380 base = 8;
03381 block = 8;
03382 suffix = 'o';
03383 break;
03384 case std::ios::hex :
03385 base = 16;
03386 block = 4;
03387 suffix = 'h';
03388 break;
03389 default :
03390 base = 10;
03391 block = 3;
03392 suffix = '.';
03393 }
03394
03395 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03396 Integer temp1=a, temp2;
03397 unsigned i=0;
03398 const char vec[]="0123456789ABCDEF";
03399
03400 if (a.IsNegative())
03401 {
03402 out << '-';
03403 temp1.Negate();
03404 }
03405
03406 if (!a)
03407 out << '0';
03408
03409 while (!!temp1)
03410 {
03411 word digit;
03412 Integer::Divide(digit, temp2, temp1, base);
03413 s[i++]=vec[digit];
03414 temp1=temp2;
03415 }
03416
03417 while (i--)
03418 {
03419 out << s[i];
03420
03421
03422 }
03423 return out << suffix;
03424 }
03425
03426 Integer& Integer::operator++()
03427 {
03428 if (NotNegative())
03429 {
03430 if (Increment(reg, reg.size()))
03431 {
03432 reg.CleanGrow(2*reg.size());
03433 reg[reg.size()/2]=1;
03434 }
03435 }
03436 else
03437 {
03438 word borrow = Decrement(reg, reg.size());
03439 assert(!borrow);
03440 if (WordCount()==0)
03441 *this = Zero();
03442 }
03443 return *this;
03444 }
03445
03446 Integer& Integer::operator--()
03447 {
03448 if (IsNegative())
03449 {
03450 if (Increment(reg, reg.size()))
03451 {
03452 reg.CleanGrow(2*reg.size());
03453 reg[reg.size()/2]=1;
03454 }
03455 }
03456 else
03457 {
03458 if (Decrement(reg, reg.size()))
03459 *this = -One();
03460 }
03461 return *this;
03462 }
03463
03464 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03465 {
03466 int carry;
03467 if (a.reg.size() == b.reg.size())
03468 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03469 else if (a.reg.size() > b.reg.size())
03470 {
03471 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03472 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03473 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03474 }
03475 else
03476 {
03477 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03478 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03479 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03480 }
03481
03482 if (carry)
03483 {
03484 sum.reg.CleanGrow(2*sum.reg.size());
03485 sum.reg[sum.reg.size()/2] = 1;
03486 }
03487 sum.sign = Integer::POSITIVE;
03488 }
03489
03490 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03491 {
03492 unsigned aSize = a.WordCount();
03493 aSize += aSize%2;
03494 unsigned bSize = b.WordCount();
03495 bSize += bSize%2;
03496
03497 if (aSize == bSize)
03498 {
03499 if (Compare(a.reg, b.reg, aSize) >= 0)
03500 {
03501 Subtract(diff.reg, a.reg, b.reg, aSize);
03502 diff.sign = Integer::POSITIVE;
03503 }
03504 else
03505 {
03506 Subtract(diff.reg, b.reg, a.reg, aSize);
03507 diff.sign = Integer::NEGATIVE;
03508 }
03509 }
03510 else if (aSize > bSize)
03511 {
03512 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03513 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03514 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03515 assert(!borrow);
03516 diff.sign = Integer::POSITIVE;
03517 }
03518 else
03519 {
03520 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03521 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03522 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03523 assert(!borrow);
03524 diff.sign = Integer::NEGATIVE;
03525 }
03526 }
03527
03528
03529 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03530 {
03531 return a < b ? b : a;
03532 }
03533
03534 Integer Integer::Plus(const Integer& b) const
03535 {
03536 Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03537 if (NotNegative())
03538 {
03539 if (b.NotNegative())
03540 PositiveAdd(sum, *this, b);
03541 else
03542 PositiveSubtract(sum, *this, b);
03543 }
03544 else
03545 {
03546 if (b.NotNegative())
03547 PositiveSubtract(sum, b, *this);
03548 else
03549 {
03550 PositiveAdd(sum, *this, b);
03551 sum.sign = Integer::NEGATIVE;
03552 }
03553 }
03554 return sum;
03555 }
03556
03557 Integer& Integer::operator+=(const Integer& t)
03558 {
03559 reg.CleanGrow(t.reg.size());
03560 if (NotNegative())
03561 {
03562 if (t.NotNegative())
03563 PositiveAdd(*this, *this, t);
03564 else
03565 PositiveSubtract(*this, *this, t);
03566 }
03567 else
03568 {
03569 if (t.NotNegative())
03570 PositiveSubtract(*this, t, *this);
03571 else
03572 {
03573 PositiveAdd(*this, *this, t);
03574 sign = Integer::NEGATIVE;
03575 }
03576 }
03577 return *this;
03578 }
03579
03580 Integer Integer::Minus(const Integer& b) const
03581 {
03582 Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03583 if (NotNegative())
03584 {
03585 if (b.NotNegative())
03586 PositiveSubtract(diff, *this, b);
03587 else
03588 PositiveAdd(diff, *this, b);
03589 }
03590 else
03591 {
03592 if (b.NotNegative())
03593 {
03594 PositiveAdd(diff, *this, b);
03595 diff.sign = Integer::NEGATIVE;
03596 }
03597 else
03598 PositiveSubtract(diff, b, *this);
03599 }
03600 return diff;
03601 }
03602
03603 Integer& Integer::operator-=(const Integer& t)
03604 {
03605 reg.CleanGrow(t.reg.size());
03606 if (NotNegative())
03607 {
03608 if (t.NotNegative())
03609 PositiveSubtract(*this, *this, t);
03610 else
03611 PositiveAdd(*this, *this, t);
03612 }
03613 else
03614 {
03615 if (t.NotNegative())
03616 {
03617 PositiveAdd(*this, *this, t);
03618 sign = Integer::NEGATIVE;
03619 }
03620 else
03621 PositiveSubtract(*this, t, *this);
03622 }
03623 return *this;
03624 }
03625
03626 Integer& Integer::operator<<=(size_t n)
03627 {
03628 const size_t wordCount = WordCount();
03629 const size_t shiftWords = n / WORD_BITS;
03630 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03631
03632 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03633 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03634 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03635 return *this;
03636 }
03637
03638 Integer& Integer::operator>>=(size_t n)
03639 {
03640 const size_t wordCount = WordCount();
03641 const size_t shiftWords = n / WORD_BITS;
03642 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03643
03644 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03645 if (wordCount > shiftWords)
03646 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03647 if (IsNegative() && WordCount()==0)
03648 *this = Zero();
03649 return *this;
03650 }
03651
03652 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03653 {
03654 size_t aSize = RoundupSize(a.WordCount());
03655 size_t bSize = RoundupSize(b.WordCount());
03656
03657 product.reg.CleanNew(RoundupSize(aSize+bSize));
03658 product.sign = Integer::POSITIVE;
03659
03660 IntegerSecBlock workspace(aSize + bSize);
03661 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03662 }
03663
03664 void Multiply(Integer &product, const Integer &a, const Integer &b)
03665 {
03666 PositiveMultiply(product, a, b);
03667
03668 if (a.NotNegative() != b.NotNegative())
03669 product.Negate();
03670 }
03671
03672 Integer Integer::Times(const Integer &b) const
03673 {
03674 Integer product;
03675 Multiply(product, *this, b);
03676 return product;
03677 }
03678
03679
03680
03681
03682
03683
03684
03685
03686
03687
03688
03689
03690
03691
03692
03693
03694
03695
03696
03697
03698
03699
03700
03701 void PositiveDivide(Integer &remainder, Integer "ient,
03702 const Integer &a, const Integer &b)
03703 {
03704 unsigned aSize = a.WordCount();
03705 unsigned bSize = b.WordCount();
03706
03707 if (!bSize)
03708 throw Integer::DivideByZero();
03709
03710 if (a.PositiveCompare(b) == -1)
03711 {
03712 remainder = a;
03713 remainder.sign = Integer::POSITIVE;
03714 quotient = Integer::Zero();
03715 return;
03716 }
03717
03718 aSize += aSize%2;
03719 bSize += bSize%2;
03720
03721 remainder.reg.CleanNew(RoundupSize(bSize));
03722 remainder.sign = Integer::POSITIVE;
03723 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03724 quotient.sign = Integer::POSITIVE;
03725
03726 IntegerSecBlock T(aSize+3*(bSize+2));
03727 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03728 }
03729
03730 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
03731 {
03732 PositiveDivide(remainder, quotient, dividend, divisor);
03733
03734 if (dividend.IsNegative())
03735 {
03736 quotient.Negate();
03737 if (remainder.NotZero())
03738 {
03739 --quotient;
03740 remainder = divisor.AbsoluteValue() - remainder;
03741 }
03742 }
03743
03744 if (divisor.IsNegative())
03745 quotient.Negate();
03746 }
03747
03748 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03749 {
03750 q = a;
03751 q >>= n;
03752
03753 const size_t wordCount = BitsToWords(n);
03754 if (wordCount <= a.WordCount())
03755 {
03756 r.reg.resize(RoundupSize(wordCount));
03757 CopyWords(r.reg, a.reg, wordCount);
03758 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03759 if (n % WORD_BITS != 0)
03760 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
03761 }
03762 else
03763 {
03764 r.reg.resize(RoundupSize(a.WordCount()));
03765 CopyWords(r.reg, a.reg, r.reg.size());
03766 }
03767 r.sign = POSITIVE;
03768
03769 if (a.IsNegative() && r.NotZero())
03770 {
03771 --q;
03772 r = Power2(n) - r;
03773 }
03774 }
03775
03776 Integer Integer::DividedBy(const Integer &b) const
03777 {
03778 Integer remainder, quotient;
03779 Integer::Divide(remainder, quotient, *this, b);
03780 return quotient;
03781 }
03782
03783 Integer Integer::Modulo(const Integer &b) const
03784 {
03785 Integer remainder, quotient;
03786 Integer::Divide(remainder, quotient, *this, b);
03787 return remainder;
03788 }
03789
03790 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
03791 {
03792 if (!divisor)
03793 throw Integer::DivideByZero();
03794
03795 assert(divisor);
03796
03797 if ((divisor & (divisor-1)) == 0)
03798 {
03799 quotient = dividend >> (BitPrecision(divisor)-1);
03800 remainder = dividend.reg[0] & (divisor-1);
03801 return;
03802 }
03803
03804 unsigned int i = dividend.WordCount();
03805 quotient.reg.CleanNew(RoundupSize(i));
03806 remainder = 0;
03807 while (i--)
03808 {
03809 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03810 remainder = DWord(dividend.reg[i], remainder) % divisor;
03811 }
03812
03813 if (dividend.NotNegative())
03814 quotient.sign = POSITIVE;
03815 else
03816 {
03817 quotient.sign = NEGATIVE;
03818 if (remainder)
03819 {
03820 --quotient;
03821 remainder = divisor - remainder;
03822 }
03823 }
03824 }
03825
03826 Integer Integer::DividedBy(word b) const
03827 {
03828 word remainder;
03829 Integer quotient;
03830 Integer::Divide(remainder, quotient, *this, b);
03831 return quotient;
03832 }
03833
03834 word Integer::Modulo(word divisor) const
03835 {
03836 if (!divisor)
03837 throw Integer::DivideByZero();
03838
03839 assert(divisor);
03840
03841 word remainder;
03842
03843 if ((divisor & (divisor-1)) == 0)
03844 remainder = reg[0] & (divisor-1);
03845 else
03846 {
03847 unsigned int i = WordCount();
03848
03849 if (divisor <= 5)
03850 {
03851 DWord sum(0, 0);
03852 while (i--)
03853 sum += reg[i];
03854 remainder = sum % divisor;
03855 }
03856 else
03857 {
03858 remainder = 0;
03859 while (i--)
03860 remainder = DWord(reg[i], remainder) % divisor;
03861 }
03862 }
03863
03864 if (IsNegative() && remainder)
03865 remainder = divisor - remainder;
03866
03867 return remainder;
03868 }
03869
03870 void Integer::Negate()
03871 {
03872 if (!!(*this))
03873 sign = Sign(1-sign);
03874 }
03875
03876 int Integer::PositiveCompare(const Integer& t) const
03877 {
03878 unsigned size = WordCount(), tSize = t.WordCount();
03879
03880 if (size == tSize)
03881 return CryptoPP::Compare(reg, t.reg, size);
03882 else
03883 return size > tSize ? 1 : -1;
03884 }
03885
03886 int Integer::Compare(const Integer& t) const
03887 {
03888 if (NotNegative())
03889 {
03890 if (t.NotNegative())
03891 return PositiveCompare(t);
03892 else
03893 return 1;
03894 }
03895 else
03896 {
03897 if (t.NotNegative())
03898 return -1;
03899 else
03900 return -PositiveCompare(t);
03901 }
03902 }
03903
03904 Integer Integer::SquareRoot() const
03905 {
03906 if (!IsPositive())
03907 return Zero();
03908
03909
03910 Integer x, y = Power2((BitCount()+1)/2);
03911 assert(y*y >= *this);
03912
03913 do
03914 {
03915 x = y;
03916 y = (x + *this/x) >> 1;
03917 } while (y<x);
03918
03919 return x;
03920 }
03921
03922 bool Integer::IsSquare() const
03923 {
03924 Integer r = SquareRoot();
03925 return *this == r.Squared();
03926 }
03927
03928 bool Integer::IsUnit() const
03929 {
03930 return (WordCount() == 1) && (reg[0] == 1);
03931 }
03932
03933 Integer Integer::MultiplicativeInverse() const
03934 {
03935 return IsUnit() ? *this : Zero();
03936 }
03937
03938 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03939 {
03940 return x*y%m;
03941 }
03942
03943 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03944 {
03945 ModularArithmetic mr(m);
03946 return mr.Exponentiate(x, e);
03947 }
03948
03949 Integer Integer::Gcd(const Integer &a, const Integer &b)
03950 {
03951 return EuclideanDomainOf<Integer>().Gcd(a, b);
03952 }
03953
03954 Integer Integer::InverseMod(const Integer &m) const
03955 {
03956 assert(m.NotNegative());
03957
03958 if (IsNegative() || *this>=m)
03959 return (*this%m).InverseMod(m);
03960
03961 if (m.IsEven())
03962 {
03963 if (!m || IsEven())
03964 return Zero();
03965 if (*this == One())
03966 return One();
03967
03968 Integer u = m.InverseMod(*this);
03969 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03970 }
03971
03972 SecBlock<word> T(m.reg.size() * 4);
03973 Integer r((word)0, m.reg.size());
03974 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03975 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03976 return r;
03977 }
03978
03979 word Integer::InverseMod(word mod) const
03980 {
03981 word g0 = mod, g1 = *this % mod;
03982 word v0 = 0, v1 = 1;
03983 word y;
03984
03985 while (g1)
03986 {
03987 if (g1 == 1)
03988 return v1;
03989 y = g0 / g1;
03990 g0 = g0 % g1;
03991 v0 += y * v1;
03992
03993 if (!g0)
03994 break;
03995 if (g0 == 1)
03996 return mod-v0;
03997 y = g1 / g0;
03998 g1 = g1 % g0;
03999 v1 += y * v0;
04000 }
04001 return 0;
04002 }
04003
04004
04005
04006 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
04007 {
04008 BERSequenceDecoder seq(bt);
04009 OID oid(seq);
04010 if (oid != ASN1::prime_field())
04011 BERDecodeError();
04012 m_modulus.BERDecode(seq);
04013 seq.MessageEnd();
04014 m_result.reg.resize(m_modulus.reg.size());
04015 }
04016
04017 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04018 {
04019 DERSequenceEncoder seq(bt);
04020 ASN1::prime_field().DEREncode(seq);
04021 m_modulus.DEREncode(seq);
04022 seq.MessageEnd();
04023 }
04024
04025 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04026 {
04027 a.DEREncodeAsOctetString(out, MaxElementByteLength());
04028 }
04029
04030 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04031 {
04032 a.BERDecodeAsOctetString(in, MaxElementByteLength());
04033 }
04034
04035 const Integer& ModularArithmetic::Half(const Integer &a) const
04036 {
04037 if (a.reg.size()==m_modulus.reg.size())
04038 {
04039 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04040 return m_result;
04041 }
04042 else
04043 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04044 }
04045
04046 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04047 {
04048 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04049 {
04050 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04051 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04052 {
04053 CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04054 }
04055 return m_result;
04056 }
04057 else
04058 {
04059 m_result1 = a+b;
04060 if (m_result1 >= m_modulus)
04061 m_result1 -= m_modulus;
04062 return m_result1;
04063 }
04064 }
04065
04066 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04067 {
04068 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04069 {
04070 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04071 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04072 {
04073 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04074 }
04075 }
04076 else
04077 {
04078 a+=b;
04079 if (a>=m_modulus)
04080 a-=m_modulus;
04081 }
04082
04083 return a;
04084 }
04085
04086 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04087 {
04088 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04089 {
04090 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04091 CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04092 return m_result;
04093 }
04094 else
04095 {
04096 m_result1 = a-b;
04097 if (m_result1.IsNegative())
04098 m_result1 += m_modulus;
04099 return m_result1;
04100 }
04101 }
04102
04103 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04104 {
04105 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04106 {
04107 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04108 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04109 }
04110 else
04111 {
04112 a-=b;
04113 if (a.IsNegative())
04114 a+=m_modulus;
04115 }
04116
04117 return a;
04118 }
04119
04120 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04121 {
04122 if (!a)
04123 return a;
04124
04125 CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04126 if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04127 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04128
04129 return m_result;
04130 }
04131
04132 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04133 {
04134 if (m_modulus.IsOdd())
04135 {
04136 MontgomeryRepresentation dr(m_modulus);
04137 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04138 }
04139 else
04140 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04141 }
04142
04143 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04144 {
04145 if (m_modulus.IsOdd())
04146 {
04147 MontgomeryRepresentation dr(m_modulus);
04148 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04149 for (unsigned int i=0; i<exponentsCount; i++)
04150 results[i] = dr.ConvertOut(results[i]);
04151 }
04152 else
04153 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04154 }
04155
04156 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
04157 : ModularArithmetic(m),
04158 m_u((word)0, m_modulus.reg.size()),
04159 m_workspace(5*m_modulus.reg.size())
04160 {
04161 if (!m_modulus.IsOdd())
04162 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04163
04164 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04165 }
04166
04167 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04168 {
04169 word *const T = m_workspace.begin();
04170 word *const R = m_result.reg.begin();
04171 const size_t N = m_modulus.reg.size();
04172 assert(a.reg.size()<=N && b.reg.size()<=N);
04173
04174 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04175 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04176 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04177 return m_result;
04178 }
04179
04180 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04181 {
04182 word *const T = m_workspace.begin();
04183 word *const R = m_result.reg.begin();
04184 const size_t N = m_modulus.reg.size();
04185 assert(a.reg.size()<=N);
04186
04187 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04188 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04189 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04190 return m_result;
04191 }
04192
04193 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04194 {
04195 word *const T = m_workspace.begin();
04196 word *const R = m_result.reg.begin();
04197 const size_t N = m_modulus.reg.size();
04198 assert(a.reg.size()<=N);
04199
04200 CopyWords(T, a.reg, a.reg.size());
04201 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04202 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04203 return m_result;
04204 }
04205
04206 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04207 {
04208
04209 word *const T = m_workspace.begin();
04210 word *const R = m_result.reg.begin();
04211 const size_t N = m_modulus.reg.size();
04212 assert(a.reg.size()<=N);
04213
04214 CopyWords(T, a.reg, a.reg.size());
04215 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04216 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04217 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04218
04219
04220
04221 if (k>N*WORD_BITS)
04222 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04223 else
04224 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04225
04226 return m_result;
04227 }
04228
04229 NAMESPACE_END
04230
04231 #endif