Crypto++  5.6.5
Free C++ class library of cryptographic schemes
integer.cpp
1 // integer.cpp - originally written and placed in the public domain by Wei Dai
2 // contains public domain code contributed by Alister Lee and Leonard Janke
3 
4 // Notes by JW: The Integer class needs to do two things. First, it needs to set function
5 // pointers on some platforms, like X86 and X64. The function pointers select a fast multiply
6 // and addition based on the cpu. Second, it wants to create Integer::Zero(), Integer::One()
7 // and Integer::Two().
8 // The function pointers are initialized in the InitializeInteger class by calling
9 // SetFunctionPointers(). The call to SetFunctionPointers() is guarded to run once. If C++11
10 // dynamic initialization is available, then a standard run_once is used. Otherwise, and simple
11 // flag is used. The flag suffers a race, but the worse case is the same function pointers
12 // get written twice without leaking memory.
13 // For Integer::Zero(), Integer::One() and Integer::Two(), we use one of two strategies. First,
14 // if C++11 dynamic initialization is available, then we use a static variable. Second, if
15 // C++11 dynamic initialization is not available, then we fall back to Wei's original code of
16 // a Singleton.
17 // Wei's original code was much simpler. It simply used the Singleton pattern, but it always
18 // produced memory findings on some platforms. The Singleton generates memory findings because
19 // it uses a Create On First Use pattern (a dumb Nifty Counter) and the compiler had to be smart
20 // enough to fold them to return the same object. Unix and Linux compilers do a good job of folding
21 // objects, but Microsoft compilers do a rather poor job for some versions of the compilers.
22 // Another problem with the Singleton is resource destruction requires running resource acquisition
23 // in reverse. For resources provided through the Singletons, there is no way to express the
24 // dependency order to safely destroy resources. (That's one of the problems C++11 dynamic
25 // intitialization with concurrent execution is supposed to solve).
26 
27 #include "pch.h"
28 #include "config.h"
29 
30 #if CRYPTOPP_MSC_VERSION
31 # pragma warning(disable: 4100)
32 #endif
33 
34 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
35 # pragma GCC diagnostic ignored "-Wunused"
36 #if !defined(__clang__)
37 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
38 #endif
39 #endif
40 
41 // Issue 340
42 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
43 # pragma GCC diagnostic ignored "-Wconversion"
44 # pragma GCC diagnostic ignored "-Wsign-conversion"
45 #endif
46 
47 // Define this to statically initialize Integer Zero(), One()
48 // and Two() using Microsoft init_seg(). This is useful for
49 // testing Integer code for leaks when the MSC compiler
50 // does not fold use of the Singletons.
51 // #define USE_MSC_INIT_PRIORITY 1
52 
53 #ifndef CRYPTOPP_IMPORTS
54 
55 #include "integer.h"
56 #include "secblock.h"
57 #include "modarith.h"
58 #include "nbtheory.h"
59 #include "smartptr.h"
60 #include "algparam.h"
61 #include "filters.h"
62 #include "stdcpp.h"
63 #include "asn.h"
64 #include "oids.h"
65 #include "words.h"
66 #include "pubkey.h" // for P1363_KDF2
67 #include "sha.h"
68 #include "cpu.h"
69 #include "misc.h"
70 
71 #include <iostream>
72 
73 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
74  #include <intrin.h>
75 #endif
76 
77 #ifdef __DECCXX
78  #include <c_asm.h>
79 #endif
80 
81 // "Error: The operand ___LKDB cannot be assigned to", http://github.com/weidai11/cryptopp/issues/188
82 #if (__SUNPRO_CC >= 0x5130)
83 # define MAYBE_CONST
84 # define MAYBE_UNCONST_CAST(x) const_cast<word*>(x)
85 #else
86 # define MAYBE_CONST const
87 # define MAYBE_UNCONST_CAST(x) x
88 #endif
89 
90 // "Inline assembly operands don't work with .intel_syntax",
91 // http://llvm.org/bugs/show_bug.cgi?id=24232
92 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
93 # undef CRYPTOPP_X86_ASM_AVAILABLE
94 # undef CRYPTOPP_X32_ASM_AVAILABLE
95 # undef CRYPTOPP_X64_ASM_AVAILABLE
96 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
97 # undef CRYPTOPP_SSSE3_ASM_AVAILABLE
98 #else
99 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86))
100 #endif
101 
102 // ***************** C++ Static Initialization ********************
103 
104 NAMESPACE_BEGIN(CryptoPP)
105 static void SetFunctionPointers();
106 InitializeInteger::InitializeInteger()
107 {
108 #if !(HAVE_GCC_INIT_PRIORITY || HAVE_MSC_INIT_PRIORITY)
109 #if defined(CRYPTOPP_CXX11_SYNCHRONIZATION) && defined(CRYPTOPP_CXX11_DYNAMIC_INIT)
110  static std::once_flag s_flag;
111  std::call_once(s_flag, []() {
112  SetFunctionPointers();
113  });
114 #else
115  static bool s_flag;
116  MEMORY_BARRIER();
117  if (s_flag == false)
118  {
119  SetFunctionPointers();
120  s_flag = true;
121  MEMORY_BARRIER();
122  }
123 #endif // C++11 or C++03 flag
124 #endif // not GCC and MSC init priorities
125 }
126 
127 template <long i>
129 {
130  Integer * operator()() const
131  {
132  return new Integer(i);
133  }
134 };
135 
136 // ***************** Library code ********************
137 
138 inline static int Compare(const word *A, const word *B, size_t N)
139 {
140  while (N--)
141  if (A[N] > B[N])
142  return 1;
143  else if (A[N] < B[N])
144  return -1;
145 
146  return 0;
147 }
148 
149 inline static int Increment(word *A, size_t N, word B=1)
150 {
151  CRYPTOPP_ASSERT(N);
152  word t = A[0];
153  A[0] = t+B;
154  if (A[0] >= t)
155  return 0;
156  for (unsigned i=1; i<N; i++)
157  if (++A[i])
158  return 0;
159  return 1;
160 }
161 
162 inline static int Decrement(word *A, size_t N, word B=1)
163 {
164  CRYPTOPP_ASSERT(N);
165  word t = A[0];
166  A[0] = t-B;
167  if (A[0] <= t)
168  return 0;
169  for (unsigned i=1; i<N; i++)
170  if (A[i]--)
171  return 0;
172  return 1;
173 }
174 
175 static void TwosComplement(word *A, size_t N)
176 {
177  Decrement(A, N);
178  for (unsigned i=0; i<N; i++)
179  A[i] = ~A[i];
180 }
181 
182 static word AtomicInverseModPower2(word A)
183 {
184  CRYPTOPP_ASSERT(A%2==1);
185 
186  word R=A%8;
187 
188  for (unsigned i=3; i<WORD_BITS; i*=2)
189  R = R*(2-R*A);
190 
191  CRYPTOPP_ASSERT(R*A==1);
192  return R;
193 }
194 
195 // ********************************************************
196 
197 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
198  #define Declare2Words(x) word x##0, x##1;
199  #define AssignWord(a, b) a##0 = b; a##1 = 0;
200  #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
201  #define LowWord(a) a##0
202  #define HighWord(a) a##1
203  #ifdef _MSC_VER
204  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
205  #ifndef __INTEL_COMPILER
206  #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
207  #endif
208  #elif defined(__DECCXX)
209  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
210  #elif defined(__x86_64__)
211  #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
212  // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
213  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
214  #else
215  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
216  #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
217  #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
218  #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
219  #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
220  #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
221  #endif
222  #endif
223  #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
224  #ifndef Double3Words
225  #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
226  #endif
227  #ifndef Acc2WordsBy2
228  #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
229  #endif
230  #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
231  #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
232  #define GetCarry(u) u##1
233  #define GetBorrow(u) u##1
234 #else
235  #define Declare2Words(x) dword x;
236  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
237  #define MultiplyWords(p, a, b) p = __emulu(a, b);
238  #else
239  #define MultiplyWords(p, a, b) p = (dword)a*b;
240  #endif
241  #define AssignWord(a, b) a = b;
242  #define Add2WordsBy1(a, b, c) a = b + c;
243  #define Acc2WordsBy2(a, b) a += b;
244  #define LowWord(a) word(a)
245  #define HighWord(a) word(a>>WORD_BITS)
246  #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
247  #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
248  #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
249  #define GetCarry(u) HighWord(u)
250  #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
251 #endif
252 #ifndef MulAcc
253  #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
254 #endif
255 #ifndef Acc2WordsBy1
256  #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
257 #endif
258 #ifndef Acc3WordsBy2
259  #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
260 #endif
261 
262 class DWord
263 {
264 public:
265 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
266  DWord() {std::memset(&m_whole, 0x00, sizeof(m_whole));}
267 #else
268  DWord() {std::memset(&m_halfs, 0x00, sizeof(m_halfs));}
269 #endif
270 
271 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
272  explicit DWord(word low) : m_whole(low) { }
273 #else
274  explicit DWord(word low)
275  {
276  m_halfs.high = 0;
277  m_halfs.low = low;
278  }
279 #endif
280 
281 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
282  DWord(word low, word high) : m_whole()
283 #else
284  DWord(word low, word high) : m_halfs()
285 #endif
286  {
287 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
288 # if defined(CRYPTOPP_LITTLE_ENDIAN)
289  const word t[2] = {low,high};
290  memcpy(&m_whole, t, sizeof(m_whole));
291 # else
292  const word t[2] = {high,low};
293  memcpy(&m_whole, t, sizeof(m_whole));
294 # endif
295 #else
296  m_halfs.low = low;
297  m_halfs.high = high;
298 #endif
299  }
300 
301  static DWord Multiply(word a, word b)
302  {
303  DWord r;
304  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
305  r.m_whole = (dword)a * b;
306  #elif defined(MultiplyWordsLoHi)
307  MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
308  #else
309  CRYPTOPP_ASSERT(0);
310  #endif
311  return r;
312  }
313 
314  static DWord MultiplyAndAdd(word a, word b, word c)
315  {
316  DWord r = Multiply(a, b);
317  return r += c;
318  }
319 
320  DWord & operator+=(word a)
321  {
322  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
323  m_whole = m_whole + a;
324  #else
325  m_halfs.low += a;
326  m_halfs.high += (m_halfs.low < a);
327  #endif
328  return *this;
329  }
330 
331  DWord operator+(word a)
332  {
333  DWord r;
334  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
335  r.m_whole = m_whole + a;
336  #else
337  r.m_halfs.low = m_halfs.low + a;
338  r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
339  #endif
340  return r;
341  }
342 
343  DWord operator-(DWord a)
344  {
345  DWord r;
346  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
347  r.m_whole = m_whole - a.m_whole;
348  #else
349  r.m_halfs.low = m_halfs.low - a.m_halfs.low;
350  r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
351  #endif
352  return r;
353  }
354 
355  DWord operator-(word a)
356  {
357  DWord r;
358  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
359  r.m_whole = m_whole - a;
360  #else
361  r.m_halfs.low = m_halfs.low - a;
362  r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
363  #endif
364  return r;
365  }
366 
367  // returns quotient, which must fit in a word
368  word operator/(word divisor);
369 
370  word operator%(word a);
371 
372  bool operator!() const
373  {
374  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
375  return !m_whole;
376  #else
377  return !m_halfs.high && !m_halfs.low;
378  #endif
379  }
380 
381  // TODO: When NATIVE_DWORD is in effect, we access high and low, which are inactive
382  // union members, and that's UB. Also see http://stackoverflow.com/q/11373203.
383  word GetLowHalf() const {return m_halfs.low;}
384  word GetHighHalf() const {return m_halfs.high;}
385  word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
386 
387 private:
388  // Issue 274, "Types cannot be declared in anonymous union",
389  // http://github.com/weidai11/cryptopp/issues/274
390  // Thanks to Martin Bonner at http://stackoverflow.com/a/39507183
391  struct half_words
392  {
393  #ifdef CRYPTOPP_LITTLE_ENDIAN
394  word low;
395  word high;
396  #else
397  word high;
398  word low;
399  #endif
400  };
401  union
402  {
403  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
404  dword m_whole;
405  #endif
406  half_words m_halfs;
407  };
408 };
409 
410 class Word
411 {
412 public:
413  Word() : m_whole(0) {}
414  Word(word value) : m_whole(value) {}
415  Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
416 
417  static Word Multiply(hword a, hword b)
418  {
419  Word r;
420  r.m_whole = (word)a * b;
421  return r;
422  }
423 
424  Word operator-(Word a)
425  {
426  Word r;
427  r.m_whole = m_whole - a.m_whole;
428  return r;
429  }
430 
431  Word operator-(hword a)
432  {
433  Word r;
434  r.m_whole = m_whole - a;
435  return r;
436  }
437 
438  // returns quotient, which must fit in a word
439  hword operator/(hword divisor)
440  {
441  return hword(m_whole / divisor);
442  }
443 
444  bool operator!() const
445  {
446  return !m_whole;
447  }
448 
449  word GetWhole() const {return m_whole;}
450  hword GetLowHalf() const {return hword(m_whole);}
451  hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
452  hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
453 
454 private:
455  word m_whole;
456 };
457 
458 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
459 template <class S, class D>
460 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULLPTR)
461 {
462  CRYPTOPP_UNUSED(dummy);
463 
464  // Assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
465  CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
466 
467  // estimate the quotient: do a 2 S by 1 S divide.
468  // Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
469  // The code change occurred at Commit dc99266599a0e72d.
470 
471  S Q; bool pre = (S(B1+1) == 0);
472  if (B1 > 0 && !pre)
473  Q = D(A[1], A[2]) / S(B1+1);
474  else if (pre)
475  Q = A[2];
476  else
477  Q = D(A[0], A[1]) / B0;
478 
479  // now subtract Q*B from A
480  D p = D::Multiply(B0, Q);
481  D u = (D) A[0] - p.GetLowHalf();
482  A[0] = u.GetLowHalf();
483  u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
484  A[1] = u.GetLowHalf();
485  A[2] += u.GetHighHalf();
486 
487  // Q <= actual quotient, so fix it
488  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
489  {
490  u = (D) A[0] - B0;
491  A[0] = u.GetLowHalf();
492  u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
493  A[1] = u.GetLowHalf();
494  A[2] += u.GetHighHalf();
495  Q++;
496  CRYPTOPP_ASSERT(Q); // shouldn't overflow
497  }
498 
499  return Q;
500 }
501 
502 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
503 template <class S, class D>
504 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
505 {
506  // Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
507  // The code change occurred at Commit dc99266599a0e72d.
508 
509  if (!!B)
510  {
511  S Q[2];
512  T[0] = Al.GetLowHalf();
513  T[1] = Al.GetHighHalf();
514  T[2] = Ah.GetLowHalf();
515  T[3] = Ah.GetHighHalf();
516  Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
517  Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
518  return D(Q[0], Q[1]);
519  }
520  else // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
521  {
522  return D(Ah.GetLowHalf(), Ah.GetHighHalf());
523  }
524 }
525 
526 // returns quotient, which must fit in a word
527 inline word DWord::operator/(word a)
528 {
529  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
530  return word(m_whole / a);
531  #else
532  hword r[4];
533  return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
534  #endif
535 }
536 
537 inline word DWord::operator%(word a)
538 {
539  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
540  return word(m_whole % a);
541  #else
542  if (a < (word(1) << (WORD_BITS/2)))
543  {
544  hword h = hword(a);
545  word r = m_halfs.high % h;
546  r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
547  return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
548  }
549  else
550  {
551  hword r[4];
552  DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
553  return Word(r[0], r[1]).GetWhole();
554  }
555  #endif
556 }
557 
558 // ********************************************************
559 
560 // Use some tricks to share assembly code between MSVC, GCC, Clang and Sun CC.
561 #if defined(__GNUC__)
562  #define AddPrologue \
563  int result; \
564  __asm__ __volatile__ \
565  ( \
566  INTEL_NOPREFIX
567  #define AddEpilogue \
568  ATT_PREFIX \
569  : "=a" (result)\
570  : "d" (C), "a" (A), "D" (B), "c" (N) \
571  : "%esi", "memory", "cc" \
572  );\
573  return result;
574  #define MulPrologue \
575  __asm__ __volatile__ \
576  ( \
577  INTEL_NOPREFIX \
578  AS1( push ebx) \
579  AS2( mov ebx, edx)
580  #define MulEpilogue \
581  AS1( pop ebx) \
582  ATT_PREFIX \
583  : \
584  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
585  : "%esi", "memory", "cc" \
586  );
587  #define SquPrologue MulPrologue
588  #define SquEpilogue \
589  AS1( pop ebx) \
590  ATT_PREFIX \
591  : \
592  : "d" (s_maskLow16), "c" (C), "a" (A) \
593  : "%esi", "%edi", "memory", "cc" \
594  );
595  #define TopPrologue MulPrologue
596  #define TopEpilogue \
597  AS1( pop ebx) \
598  ATT_PREFIX \
599  : \
600  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
601  : "memory", "cc" \
602  );
603 #else
604  #define AddPrologue \
605  __asm push edi \
606  __asm push esi \
607  __asm mov eax, [esp+12] \
608  __asm mov edi, [esp+16]
609  #define AddEpilogue \
610  __asm pop esi \
611  __asm pop edi \
612  __asm ret 8
613  #define SaveEBX
614  #define RestoreEBX
615  #define SquPrologue \
616  AS2( mov eax, A) \
617  AS2( mov ecx, C) \
618  SaveEBX \
619  AS2( lea ebx, s_maskLow16)
620  #define MulPrologue \
621  AS2( mov eax, A) \
622  AS2( mov edi, B) \
623  AS2( mov ecx, C) \
624  SaveEBX \
625  AS2( lea ebx, s_maskLow16)
626  #define TopPrologue \
627  AS2( mov eax, A) \
628  AS2( mov edi, B) \
629  AS2( mov ecx, C) \
630  AS2( mov esi, L) \
631  SaveEBX \
632  AS2( lea ebx, s_maskLow16)
633  #define SquEpilogue RestoreEBX
634  #define MulEpilogue RestoreEBX
635  #define TopEpilogue RestoreEBX
636 #endif
637 
638 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
639 extern "C" {
640 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
641 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
642 }
643 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
644 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
645 {
646  word result;
647  __asm__ __volatile__
648  (
649  INTEL_NOPREFIX
650  AS1( neg %1)
651  ASJ( jz, 1, f)
652  AS2( mov %0,[%3+8*%1])
653  AS2( add %0,[%4+8*%1])
654  AS2( mov [%2+8*%1],%0)
655  ASL(0)
656  AS2( mov %0,[%3+8*%1+8])
657  AS2( adc %0,[%4+8*%1+8])
658  AS2( mov [%2+8*%1+8],%0)
659  AS2( lea %1,[%1+2])
660  ASJ( jrcxz, 1, f)
661  AS2( mov %0,[%3+8*%1])
662  AS2( adc %0,[%4+8*%1])
663  AS2( mov [%2+8*%1],%0)
664  ASJ( jmp, 0, b)
665  ASL(1)
666  AS2( mov %0, 0)
667  AS2( adc %0, %0)
668  ATT_NOPREFIX
669  : "=&r" (result), "+c" (N)
670  : "r" (C+N), "r" (A+N), "r" (B+N)
671  : "memory", "cc"
672  );
673  return (int)result;
674 }
675 
676 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
677 {
678  word result;
679  __asm__ __volatile__
680  (
681  INTEL_NOPREFIX
682  AS1( neg %1)
683  ASJ( jz, 1, f)
684  AS2( mov %0,[%3+8*%1])
685  AS2( sub %0,[%4+8*%1])
686  AS2( mov [%2+8*%1],%0)
687  ASL(0)
688  AS2( mov %0,[%3+8*%1+8])
689  AS2( sbb %0,[%4+8*%1+8])
690  AS2( mov [%2+8*%1+8],%0)
691  AS2( lea %1,[%1+2])
692  ASJ( jrcxz, 1, f)
693  AS2( mov %0,[%3+8*%1])
694  AS2( sbb %0,[%4+8*%1])
695  AS2( mov [%2+8*%1],%0)
696  ASJ( jmp, 0, b)
697  ASL(1)
698  AS2( mov %0, 0)
699  AS2( adc %0, %0)
700  ATT_NOPREFIX
701  : "=&r" (result), "+c" (N)
702  : "r" (C+N), "r" (A+N), "r" (B+N)
703  : "memory", "cc"
704  );
705  return (int)result;
706 }
707 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
708 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
709 {
710  AddPrologue
711 
712  // now: eax = A, edi = B, edx = C, ecx = N
713  AS2( lea eax, [eax+4*ecx])
714  AS2( lea edi, [edi+4*ecx])
715  AS2( lea edx, [edx+4*ecx])
716 
717  AS1( neg ecx) // ecx is negative index
718  AS2( test ecx, 2) // this clears carry flag
719  ASJ( jz, 0, f)
720  AS2( sub ecx, 2)
721  ASJ( jmp, 1, f)
722 
723  ASL(0)
724  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
725  AS2( mov esi,[eax+4*ecx])
726  AS2( adc esi,[edi+4*ecx])
727  AS2( mov [edx+4*ecx],esi)
728  AS2( mov esi,[eax+4*ecx+4])
729  AS2( adc esi,[edi+4*ecx+4])
730  AS2( mov [edx+4*ecx+4],esi)
731  ASL(1)
732  AS2( mov esi,[eax+4*ecx+8])
733  AS2( adc esi,[edi+4*ecx+8])
734  AS2( mov [edx+4*ecx+8],esi)
735  AS2( mov esi,[eax+4*ecx+12])
736  AS2( adc esi,[edi+4*ecx+12])
737  AS2( mov [edx+4*ecx+12],esi)
738 
739  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
740  ASJ( jmp, 0, b)
741 
742  ASL(2)
743  AS2( mov eax, 0)
744  AS1( setc al) // store carry into eax (return result register)
745 
746  AddEpilogue
747 }
748 
749 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
750 {
751  AddPrologue
752 
753  // now: eax = A, edi = B, edx = C, ecx = N
754  AS2( lea eax, [eax+4*ecx])
755  AS2( lea edi, [edi+4*ecx])
756  AS2( lea edx, [edx+4*ecx])
757 
758  AS1( neg ecx) // ecx is negative index
759  AS2( test ecx, 2) // this clears carry flag
760  ASJ( jz, 0, f)
761  AS2( sub ecx, 2)
762  ASJ( jmp, 1, f)
763 
764  ASL(0)
765  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
766  AS2( mov esi,[eax+4*ecx])
767  AS2( sbb esi,[edi+4*ecx])
768  AS2( mov [edx+4*ecx],esi)
769  AS2( mov esi,[eax+4*ecx+4])
770  AS2( sbb esi,[edi+4*ecx+4])
771  AS2( mov [edx+4*ecx+4],esi)
772  ASL(1)
773  AS2( mov esi,[eax+4*ecx+8])
774  AS2( sbb esi,[edi+4*ecx+8])
775  AS2( mov [edx+4*ecx+8],esi)
776  AS2( mov esi,[eax+4*ecx+12])
777  AS2( sbb esi,[edi+4*ecx+12])
778  AS2( mov [edx+4*ecx+12],esi)
779 
780  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
781  ASJ( jmp, 0, b)
782 
783  ASL(2)
784  AS2( mov eax, 0)
785  AS1( setc al) // store carry into eax (return result register)
786 
787  AddEpilogue
788 }
789 
790 #if CRYPTOPP_INTEGER_SSE2
791 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
792 {
793  AddPrologue
794 
795  // now: eax = A, edi = B, edx = C, ecx = N
796  AS2( lea eax, [eax+4*ecx])
797  AS2( lea edi, [edi+4*ecx])
798  AS2( lea edx, [edx+4*ecx])
799 
800  AS1( neg ecx) // ecx is negative index
801  AS2( pxor mm2, mm2)
802  ASJ( jz, 2, f)
803  AS2( test ecx, 2) // this clears carry flag
804  ASJ( jz, 0, f)
805  AS2( sub ecx, 2)
806  ASJ( jmp, 1, f)
807 
808  ASL(0)
809  AS2( movd mm0, DWORD PTR [eax+4*ecx])
810  AS2( movd mm1, DWORD PTR [edi+4*ecx])
811  AS2( paddq mm0, mm1)
812  AS2( paddq mm2, mm0)
813  AS2( movd DWORD PTR [edx+4*ecx], mm2)
814  AS2( psrlq mm2, 32)
815 
816  AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
817  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
818  AS2( paddq mm0, mm1)
819  AS2( paddq mm2, mm0)
820  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
821  AS2( psrlq mm2, 32)
822 
823  ASL(1)
824  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
825  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
826  AS2( paddq mm0, mm1)
827  AS2( paddq mm2, mm0)
828  AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
829  AS2( psrlq mm2, 32)
830 
831  AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
832  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
833  AS2( paddq mm0, mm1)
834  AS2( paddq mm2, mm0)
835  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
836  AS2( psrlq mm2, 32)
837 
838  AS2( add ecx, 4)
839  ASJ( jnz, 0, b)
840 
841  ASL(2)
842  AS2( movd eax, mm2)
843  AS1( emms)
844 
845  AddEpilogue
846 }
847 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
848 {
849  AddPrologue
850 
851  // now: eax = A, edi = B, edx = C, ecx = N
852  AS2( lea eax, [eax+4*ecx])
853  AS2( lea edi, [edi+4*ecx])
854  AS2( lea edx, [edx+4*ecx])
855 
856  AS1( neg ecx) // ecx is negative index
857  AS2( pxor mm2, mm2)
858  ASJ( jz, 2, f)
859  AS2( test ecx, 2) // this clears carry flag
860  ASJ( jz, 0, f)
861  AS2( sub ecx, 2)
862  ASJ( jmp, 1, f)
863 
864  ASL(0)
865  AS2( movd mm0, DWORD PTR [eax+4*ecx])
866  AS2( movd mm1, DWORD PTR [edi+4*ecx])
867  AS2( psubq mm0, mm1)
868  AS2( psubq mm0, mm2)
869  AS2( movd DWORD PTR [edx+4*ecx], mm0)
870  AS2( psrlq mm0, 63)
871 
872  AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
873  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
874  AS2( psubq mm2, mm1)
875  AS2( psubq mm2, mm0)
876  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
877  AS2( psrlq mm2, 63)
878 
879  ASL(1)
880  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
881  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
882  AS2( psubq mm0, mm1)
883  AS2( psubq mm0, mm2)
884  AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
885  AS2( psrlq mm0, 63)
886 
887  AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
888  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
889  AS2( psubq mm2, mm1)
890  AS2( psubq mm2, mm0)
891  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
892  AS2( psrlq mm2, 63)
893 
894  AS2( add ecx, 4)
895  ASJ( jnz, 0, b)
896 
897  ASL(2)
898  AS2( movd eax, mm2)
899  AS1( emms)
900 
901  AddEpilogue
902 }
903 #endif // CRYPTOPP_INTEGER_SSE2
904 #else // CRYPTOPP_SSE2_ASM_AVAILABLE
905 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
906 {
907  CRYPTOPP_ASSERT (N%2 == 0);
908 
909  Declare2Words(u);
910  AssignWord(u, 0);
911  for (size_t i=0; i<N; i+=2)
912  {
913  AddWithCarry(u, A[i], B[i]);
914  C[i] = LowWord(u);
915  AddWithCarry(u, A[i+1], B[i+1]);
916  C[i+1] = LowWord(u);
917  }
918  return int(GetCarry(u));
919 }
920 
921 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
922 {
923  CRYPTOPP_ASSERT (N%2 == 0);
924 
925  Declare2Words(u);
926  AssignWord(u, 0);
927  for (size_t i=0; i<N; i+=2)
928  {
929  SubtractWithBorrow(u, A[i], B[i]);
930  C[i] = LowWord(u);
931  SubtractWithBorrow(u, A[i+1], B[i+1]);
932  C[i+1] = LowWord(u);
933  }
934  return int(GetBorrow(u));
935 }
936 #endif
937 
938 static word LinearMultiply(word *C, const word *AA, word B, size_t N)
939 {
940  // http://github.com/weidai11/cryptopp/issues/188
941  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
942 
943  word carry=0;
944  for(unsigned i=0; i<N; i++)
945  {
946  Declare2Words(p);
947  MultiplyWords(p, A[i], B);
948  Acc2WordsBy1(p, carry);
949  C[i] = LowWord(p);
950  carry = HighWord(p);
951  }
952  return carry;
953 }
954 
955 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
956 
957 #define Mul_2 \
958  Mul_Begin(2) \
959  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
960  Mul_End(1, 1)
961 
962 #define Mul_4 \
963  Mul_Begin(4) \
964  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
965  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
966  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
967  Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
968  Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
969  Mul_End(5, 3)
970 
971 #define Mul_8 \
972  Mul_Begin(8) \
973  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
974  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
975  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
976  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
977  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
978  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
979  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
980  Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
981  Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
982  Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
983  Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
984  Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
985  Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
986  Mul_End(13, 7)
987 
988 #define Mul_16 \
989  Mul_Begin(16) \
990  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
991  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
992  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
993  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
994  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
995  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
996  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
997  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
998  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
999  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1000  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1001  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1002  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1003  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1004  Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1005  Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1006  Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1007  Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1008  Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1009  Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1010  Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1011  Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1012  Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1013  Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1014  Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1015  Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1016  Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1017  Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1018  Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
1019  Mul_End(29, 15)
1020 
1021 #define Squ_2 \
1022  Squ_Begin(2) \
1023  Squ_End(2)
1024 
1025 #define Squ_4 \
1026  Squ_Begin(4) \
1027  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1028  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1029  Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
1030  Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
1031  Squ_End(4)
1032 
1033 #define Squ_8 \
1034  Squ_Begin(8) \
1035  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1036  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1037  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1038  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1039  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1040  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1041  Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1042  Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1043  Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1044  Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1045  Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
1046  Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
1047  Squ_End(8)
1048 
1049 #define Squ_16 \
1050  Squ_Begin(16) \
1051  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1052  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1053  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1054  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1055  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1056  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1057  Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1058  Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1059  Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1060  Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1061  Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
1062  Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
1063  Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
1064  Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1065  Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1066  Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1067  Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1068  Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1069  Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1070  Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1071  Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1072  Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1073  Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1074  Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1075  Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1076  Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1077  Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1078  Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1079  Squ_End(16)
1080 
1081 #define Bot_2 \
1082  Mul_Begin(2) \
1083  Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1084  Bot_End(2)
1085 
1086 #define Bot_4 \
1087  Mul_Begin(4) \
1088  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1089  Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1090  Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1091  Bot_End(4)
1092 
1093 #define Bot_8 \
1094  Mul_Begin(8) \
1095  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1096  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1097  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1098  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1099  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1100  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1101  Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1102  Bot_End(8)
1103 
1104 #define Bot_16 \
1105  Mul_Begin(16) \
1106  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1107  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1108  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1109  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1110  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1111  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1112  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1113  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1114  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1115  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1116  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1117  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1118  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1119  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1120  Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1121  Bot_End(16)
1122 
1123 #endif
1124 
1125 #if 0
1126 #define Mul_Begin(n) \
1127  Declare2Words(p) \
1128  Declare2Words(c) \
1129  Declare2Words(d) \
1130  MultiplyWords(p, A[0], B[0]) \
1131  AssignWord(c, LowWord(p)) \
1132  AssignWord(d, HighWord(p))
1133 
1134 #define Mul_Acc(i, j) \
1135  MultiplyWords(p, A[i], B[j]) \
1136  Acc2WordsBy1(c, LowWord(p)) \
1137  Acc2WordsBy1(d, HighWord(p))
1138 
1139 #define Mul_SaveAcc(k, i, j) \
1140  R[k] = LowWord(c); \
1141  Add2WordsBy1(c, d, HighWord(c)) \
1142  MultiplyWords(p, A[i], B[j]) \
1143  AssignWord(d, HighWord(p)) \
1144  Acc2WordsBy1(c, LowWord(p))
1145 
1146 #define Mul_End(n) \
1147  R[2*n-3] = LowWord(c); \
1148  Acc2WordsBy1(d, HighWord(c)) \
1149  MultiplyWords(p, A[n-1], B[n-1])\
1150  Acc2WordsBy2(d, p) \
1151  R[2*n-2] = LowWord(d); \
1152  R[2*n-1] = HighWord(d);
1153 
1154 #define Bot_SaveAcc(k, i, j) \
1155  R[k] = LowWord(c); \
1156  word e = LowWord(d) + HighWord(c); \
1157  e += A[i] * B[j];
1158 
1159 #define Bot_Acc(i, j) \
1160  e += A[i] * B[j];
1161 
1162 #define Bot_End(n) \
1163  R[n-1] = e;
1164 #else
1165 #define Mul_Begin(n) \
1166  Declare2Words(p) \
1167  word c; \
1168  Declare2Words(d) \
1169  MultiplyWords(p, A[0], B[0]) \
1170  c = LowWord(p); \
1171  AssignWord(d, HighWord(p))
1172 
1173 #define Mul_Acc(i, j) \
1174  MulAcc(c, d, A[i], B[j])
1175 
1176 #define Mul_SaveAcc(k, i, j) \
1177  R[k] = c; \
1178  c = LowWord(d); \
1179  AssignWord(d, HighWord(d)) \
1180  MulAcc(c, d, A[i], B[j])
1181 
1182 #define Mul_End(k, i) \
1183  R[k] = c; \
1184  MultiplyWords(p, A[i], B[i]) \
1185  Acc2WordsBy2(p, d) \
1186  R[k+1] = LowWord(p); \
1187  R[k+2] = HighWord(p);
1188 
1189 #define Bot_SaveAcc(k, i, j) \
1190  R[k] = c; \
1191  c = LowWord(d); \
1192  c += A[i] * B[j];
1193 
1194 #define Bot_Acc(i, j) \
1195  c += A[i] * B[j];
1196 
1197 #define Bot_End(n) \
1198  R[n-1] = c;
1199 #endif
1200 
1201 #define Squ_Begin(n) \
1202  Declare2Words(p) \
1203  word c; \
1204  Declare2Words(d) \
1205  Declare2Words(e) \
1206  MultiplyWords(p, A[0], A[0]) \
1207  R[0] = LowWord(p); \
1208  AssignWord(e, HighWord(p)) \
1209  MultiplyWords(p, A[0], A[1]) \
1210  c = LowWord(p); \
1211  AssignWord(d, HighWord(p)) \
1212  Squ_NonDiag \
1213 
1214 #define Squ_NonDiag \
1215  Double3Words(c, d)
1216 
1217 #define Squ_SaveAcc(k, i, j) \
1218  Acc3WordsBy2(c, d, e) \
1219  R[k] = c; \
1220  MultiplyWords(p, A[i], A[j]) \
1221  c = LowWord(p); \
1222  AssignWord(d, HighWord(p)) \
1223 
1224 #define Squ_Acc(i, j) \
1225  MulAcc(c, d, A[i], A[j])
1226 
1227 #define Squ_Diag(i) \
1228  Squ_NonDiag \
1229  MulAcc(c, d, A[i], A[i])
1230 
1231 #define Squ_End(n) \
1232  Acc3WordsBy2(c, d, e) \
1233  R[2*n-3] = c; \
1234  MultiplyWords(p, A[n-1], A[n-1])\
1235  Acc2WordsBy2(p, e) \
1236  R[2*n-2] = LowWord(p); \
1237  R[2*n-1] = HighWord(p);
1238 
1239 
1240 void Baseline_Multiply2(word *R, const word *AA, const word *BB)
1241 {
1242  // http://github.com/weidai11/cryptopp/issues/188
1243  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1244  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1245 
1246  Mul_2
1247 }
1248 
1249 void Baseline_Multiply4(word *R, const word *AA, const word *BB)
1250 {
1251  // http://github.com/weidai11/cryptopp/issues/188
1252  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1253  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1254 
1255  Mul_4
1256 }
1257 
1258 void Baseline_Multiply8(word *R, const word *AA, const word *BB)
1259 {
1260  // http://github.com/weidai11/cryptopp/issues/188
1261  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1262  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1263 
1264  Mul_8
1265 }
1266 
1267 void Baseline_Square2(word *R, const word *AA)
1268 {
1269  // http://github.com/weidai11/cryptopp/issues/188
1270  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1271 
1272  Squ_2
1273 }
1274 
1275 void Baseline_Square4(word *R, const word *AA)
1276 {
1277  // http://github.com/weidai11/cryptopp/issues/188
1278  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1279 
1280  Squ_4
1281 }
1282 
1283 void Baseline_Square8(word *R, const word *AA)
1284 {
1285  // http://github.com/weidai11/cryptopp/issues/188
1286  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1287 
1288  Squ_8
1289 }
1290 
1291 void Baseline_MultiplyBottom2(word *R, const word *AA, const word *BB)
1292 {
1293  // http://github.com/weidai11/cryptopp/issues/188
1294  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1295  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1296 
1297  Bot_2
1298 }
1299 
1300 void Baseline_MultiplyBottom4(word *R, const word *AA, const word *BB)
1301 {
1302  // http://github.com/weidai11/cryptopp/issues/188
1303  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1304  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1305 
1306  Bot_4
1307 }
1308 
1309 void Baseline_MultiplyBottom8(word *R, const word *AA, const word *BB)
1310 {
1311  // http://github.com/weidai11/cryptopp/issues/188
1312  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1313  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1314 
1315  Bot_8
1316 }
1317 
1318 #define Top_Begin(n) \
1319  Declare2Words(p) \
1320  word c; \
1321  Declare2Words(d) \
1322  MultiplyWords(p, A[0], B[n-2]);\
1323  AssignWord(d, HighWord(p));
1324 
1325 #define Top_Acc(i, j) \
1326  MultiplyWords(p, A[i], B[j]);\
1327  Acc2WordsBy1(d, HighWord(p));
1328 
1329 #define Top_SaveAcc0(i, j) \
1330  c = LowWord(d); \
1331  AssignWord(d, HighWord(d)) \
1332  MulAcc(c, d, A[i], B[j])
1333 
1334 #define Top_SaveAcc1(i, j) \
1335  c = L<c; \
1336  Acc2WordsBy1(d, c); \
1337  c = LowWord(d); \
1338  AssignWord(d, HighWord(d)) \
1339  MulAcc(c, d, A[i], B[j])
1340 
1341 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1342 {
1343  CRYPTOPP_UNUSED(L);
1344  word T[4];
1345  Baseline_Multiply2(T, A, B);
1346  R[0] = T[2];
1347  R[1] = T[3];
1348 }
1349 
1350 void Baseline_MultiplyTop4(word *R, const word *AA, const word *BB, word L)
1351 {
1352  // http://github.com/weidai11/cryptopp/issues/188
1353  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1354  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1355 
1356  Top_Begin(4)
1357  Top_Acc(1, 1) Top_Acc(2, 0) \
1358  Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1359  Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1360  Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1361  Mul_End(1, 3)
1362 }
1363 
1364 void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1365 {
1366  // http://github.com/weidai11/cryptopp/issues/188
1367  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1368  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1369 
1370  Top_Begin(8)
1371  Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1372  Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1373  Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1374  Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1375  Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1376  Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1377  Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1378  Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1379  Mul_End(5, 7)
1380 }
1381 
1382 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1383 void Baseline_Multiply16(word *R, const word *AA, const word *BB)
1384 {
1385  // http://github.com/weidai11/cryptopp/issues/188
1386  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1387  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1388 
1389  Mul_16
1390 }
1391 
1392 void Baseline_Square16(word *R, const word *AA)
1393 {
1394  // http://github.com/weidai11/cryptopp/issues/188
1395  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1396 
1397  Squ_16
1398 }
1399 
1400 void Baseline_MultiplyBottom16(word *R, const word *AA, const word *BB)
1401 {
1402  // http://github.com/weidai11/cryptopp/issues/188
1403  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1404  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1405 
1406  Bot_16
1407 }
1408 
1409 void Baseline_MultiplyTop16(word *R, const word *AA, const word *BB, word L)
1410 {
1411  // http://github.com/weidai11/cryptopp/issues/188
1412  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1413  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1414 
1415  Top_Begin(16)
1416  Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1417  Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1418  Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1419  Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1420  Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1421  Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1422  Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1423  Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1424  Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1425  Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1426  Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1427  Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1428  Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1429  Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1430  Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1431  Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1432  Mul_End(13, 15)
1433 }
1434 #endif
1435 
1436 // ********************************************************
1437 
1438 #if CRYPTOPP_INTEGER_SSE2
1439 
1440 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1441 
1442 #undef Mul_Begin
1443 #undef Mul_Acc
1444 #undef Top_Begin
1445 #undef Top_Acc
1446 #undef Squ_Acc
1447 #undef Squ_NonDiag
1448 #undef Squ_Diag
1449 #undef Squ_SaveAcc
1450 #undef Squ_Begin
1451 #undef Mul_SaveAcc
1452 #undef Bot_Acc
1453 #undef Bot_SaveAcc
1454 #undef Bot_End
1455 #undef Squ_End
1456 #undef Mul_End
1457 
1458 #define SSE2_FinalSave(k) \
1459  AS2( psllq xmm5, 16) \
1460  AS2( paddq xmm4, xmm5) \
1461  AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1462 
1463 #define SSE2_SaveShift(k) \
1464  AS2( movq xmm0, xmm6) \
1465  AS2( punpckhqdq xmm6, xmm0) \
1466  AS2( movq xmm1, xmm7) \
1467  AS2( punpckhqdq xmm7, xmm1) \
1468  AS2( paddd xmm6, xmm0) \
1469  AS2( pslldq xmm6, 4) \
1470  AS2( paddd xmm7, xmm1) \
1471  AS2( paddd xmm4, xmm6) \
1472  AS2( pslldq xmm7, 4) \
1473  AS2( movq xmm6, xmm4) \
1474  AS2( paddd xmm5, xmm7) \
1475  AS2( movq xmm7, xmm5) \
1476  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1477  AS2( psrlq xmm6, 16) \
1478  AS2( paddq xmm6, xmm7) \
1479  AS2( punpckhqdq xmm4, xmm0) \
1480  AS2( punpckhqdq xmm5, xmm0) \
1481  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1482  AS2( psrlq xmm6, 3*16) \
1483  AS2( paddd xmm4, xmm6) \
1484 
1485 #define Squ_SSE2_SaveShift(k) \
1486  AS2( movq xmm0, xmm6) \
1487  AS2( punpckhqdq xmm6, xmm0) \
1488  AS2( movq xmm1, xmm7) \
1489  AS2( punpckhqdq xmm7, xmm1) \
1490  AS2( paddd xmm6, xmm0) \
1491  AS2( pslldq xmm6, 4) \
1492  AS2( paddd xmm7, xmm1) \
1493  AS2( paddd xmm4, xmm6) \
1494  AS2( pslldq xmm7, 4) \
1495  AS2( movhlps xmm6, xmm4) \
1496  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1497  AS2( paddd xmm5, xmm7) \
1498  AS2( movhps QWORD PTR [esp+12], xmm5)\
1499  AS2( psrlq xmm4, 16) \
1500  AS2( paddq xmm4, xmm5) \
1501  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1502  AS2( psrlq xmm4, 3*16) \
1503  AS2( paddd xmm4, xmm6) \
1504  AS2( movq QWORD PTR [esp+4], xmm4)\
1505 
1506 #define SSE2_FirstMultiply(i) \
1507  AS2( movdqa xmm7, [esi+(i)*16])\
1508  AS2( movdqa xmm5, [edi-(i)*16])\
1509  AS2( pmuludq xmm5, xmm7) \
1510  AS2( movdqa xmm4, [ebx])\
1511  AS2( movdqa xmm6, xmm4) \
1512  AS2( pand xmm4, xmm5) \
1513  AS2( psrld xmm5, 16) \
1514  AS2( pmuludq xmm7, [edx-(i)*16])\
1515  AS2( pand xmm6, xmm7) \
1516  AS2( psrld xmm7, 16)
1517 
1518 #define Squ_Begin(n) \
1519  SquPrologue \
1520  AS2( mov esi, esp)\
1521  AS2( and esp, 0xfffffff0)\
1522  AS2( lea edi, [esp-32*n])\
1523  AS2( sub esp, 32*n+16)\
1524  AS1( push esi)\
1525  AS2( mov esi, edi) \
1526  AS2( xor edx, edx) \
1527  ASL(1) \
1528  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1529  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1530  AS2( movdqa [edi+2*edx], xmm0) \
1531  AS2( psrlq xmm0, 32) \
1532  AS2( movdqa [edi+2*edx+16], xmm0) \
1533  AS2( movdqa [edi+16*n+2*edx], xmm1) \
1534  AS2( psrlq xmm1, 32) \
1535  AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1536  AS2( add edx, 16) \
1537  AS2( cmp edx, 8*(n)) \
1538  ASJ( jne, 1, b) \
1539  AS2( lea edx, [edi+16*n])\
1540  SSE2_FirstMultiply(0) \
1541 
1542 #define Squ_Acc(i) \
1543  ASL(LSqu##i) \
1544  AS2( movdqa xmm1, [esi+(i)*16]) \
1545  AS2( movdqa xmm0, [edi-(i)*16]) \
1546  AS2( movdqa xmm2, [ebx]) \
1547  AS2( pmuludq xmm0, xmm1) \
1548  AS2( pmuludq xmm1, [edx-(i)*16]) \
1549  AS2( movdqa xmm3, xmm2) \
1550  AS2( pand xmm2, xmm0) \
1551  AS2( psrld xmm0, 16) \
1552  AS2( paddd xmm4, xmm2) \
1553  AS2( paddd xmm5, xmm0) \
1554  AS2( pand xmm3, xmm1) \
1555  AS2( psrld xmm1, 16) \
1556  AS2( paddd xmm6, xmm3) \
1557  AS2( paddd xmm7, xmm1) \
1558 
1559 #define Squ_Acc1(i)
1560 #define Squ_Acc2(i) ASC(call, LSqu##i)
1561 #define Squ_Acc3(i) Squ_Acc2(i)
1562 #define Squ_Acc4(i) Squ_Acc2(i)
1563 #define Squ_Acc5(i) Squ_Acc2(i)
1564 #define Squ_Acc6(i) Squ_Acc2(i)
1565 #define Squ_Acc7(i) Squ_Acc2(i)
1566 #define Squ_Acc8(i) Squ_Acc2(i)
1567 
1568 #define SSE2_End(E, n) \
1569  SSE2_SaveShift(2*(n)-3) \
1570  AS2( movdqa xmm7, [esi+16]) \
1571  AS2( movdqa xmm0, [edi]) \
1572  AS2( pmuludq xmm0, xmm7) \
1573  AS2( movdqa xmm2, [ebx]) \
1574  AS2( pmuludq xmm7, [edx]) \
1575  AS2( movdqa xmm6, xmm2) \
1576  AS2( pand xmm2, xmm0) \
1577  AS2( psrld xmm0, 16) \
1578  AS2( paddd xmm4, xmm2) \
1579  AS2( paddd xmm5, xmm0) \
1580  AS2( pand xmm6, xmm7) \
1581  AS2( psrld xmm7, 16) \
1582  SSE2_SaveShift(2*(n)-2) \
1583  SSE2_FinalSave(2*(n)-1) \
1584  AS1( pop esp)\
1585  E
1586 
1587 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1588 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1589 #define Top_End(n) SSE2_End(TopEpilogue, n)
1590 
1591 #define Squ_Column1(k, i) \
1592  Squ_SSE2_SaveShift(k) \
1593  AS2( add esi, 16) \
1594  SSE2_FirstMultiply(1)\
1595  Squ_Acc##i(i) \
1596  AS2( paddd xmm4, xmm4) \
1597  AS2( paddd xmm5, xmm5) \
1598  AS2( movdqa xmm3, [esi]) \
1599  AS2( movq xmm1, QWORD PTR [esi+8]) \
1600  AS2( pmuludq xmm1, xmm3) \
1601  AS2( pmuludq xmm3, xmm3) \
1602  AS2( movdqa xmm0, [ebx])\
1603  AS2( movdqa xmm2, xmm0) \
1604  AS2( pand xmm0, xmm1) \
1605  AS2( psrld xmm1, 16) \
1606  AS2( paddd xmm6, xmm0) \
1607  AS2( paddd xmm7, xmm1) \
1608  AS2( pand xmm2, xmm3) \
1609  AS2( psrld xmm3, 16) \
1610  AS2( paddd xmm6, xmm6) \
1611  AS2( paddd xmm7, xmm7) \
1612  AS2( paddd xmm4, xmm2) \
1613  AS2( paddd xmm5, xmm3) \
1614  AS2( movq xmm0, QWORD PTR [esp+4])\
1615  AS2( movq xmm1, QWORD PTR [esp+12])\
1616  AS2( paddd xmm4, xmm0)\
1617  AS2( paddd xmm5, xmm1)\
1618 
1619 #define Squ_Column0(k, i) \
1620  Squ_SSE2_SaveShift(k) \
1621  AS2( add edi, 16) \
1622  AS2( add edx, 16) \
1623  SSE2_FirstMultiply(1)\
1624  Squ_Acc##i(i) \
1625  AS2( paddd xmm6, xmm6) \
1626  AS2( paddd xmm7, xmm7) \
1627  AS2( paddd xmm4, xmm4) \
1628  AS2( paddd xmm5, xmm5) \
1629  AS2( movq xmm0, QWORD PTR [esp+4])\
1630  AS2( movq xmm1, QWORD PTR [esp+12])\
1631  AS2( paddd xmm4, xmm0)\
1632  AS2( paddd xmm5, xmm1)\
1633 
1634 #define SSE2_MulAdd45 \
1635  AS2( movdqa xmm7, [esi]) \
1636  AS2( movdqa xmm0, [edi]) \
1637  AS2( pmuludq xmm0, xmm7) \
1638  AS2( movdqa xmm2, [ebx]) \
1639  AS2( pmuludq xmm7, [edx]) \
1640  AS2( movdqa xmm6, xmm2) \
1641  AS2( pand xmm2, xmm0) \
1642  AS2( psrld xmm0, 16) \
1643  AS2( paddd xmm4, xmm2) \
1644  AS2( paddd xmm5, xmm0) \
1645  AS2( pand xmm6, xmm7) \
1646  AS2( psrld xmm7, 16)
1647 
1648 #define Mul_Begin(n) \
1649  MulPrologue \
1650  AS2( mov esi, esp)\
1651  AS2( and esp, 0xfffffff0)\
1652  AS2( sub esp, 48*n+16)\
1653  AS1( push esi)\
1654  AS2( xor edx, edx) \
1655  ASL(1) \
1656  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1657  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1658  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1659  AS2( movdqa [esp+20+2*edx], xmm0) \
1660  AS2( psrlq xmm0, 32) \
1661  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1662  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1663  AS2( psrlq xmm1, 32) \
1664  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1665  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1666  AS2( psrlq xmm2, 32) \
1667  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1668  AS2( add edx, 16) \
1669  AS2( cmp edx, 8*(n)) \
1670  ASJ( jne, 1, b) \
1671  AS2( lea edi, [esp+20])\
1672  AS2( lea edx, [esp+20+16*n])\
1673  AS2( lea esi, [esp+20+32*n])\
1674  SSE2_FirstMultiply(0) \
1675 
1676 #define Mul_Acc(i) \
1677  ASL(LMul##i) \
1678  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1679  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1680  AS2( movdqa xmm2, [ebx]) \
1681  AS2( pmuludq xmm0, xmm1) \
1682  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1683  AS2( movdqa xmm3, xmm2) \
1684  AS2( pand xmm2, xmm0) \
1685  AS2( psrld xmm0, 16) \
1686  AS2( paddd xmm4, xmm2) \
1687  AS2( paddd xmm5, xmm0) \
1688  AS2( pand xmm3, xmm1) \
1689  AS2( psrld xmm1, 16) \
1690  AS2( paddd xmm6, xmm3) \
1691  AS2( paddd xmm7, xmm1) \
1692 
1693 #define Mul_Acc1(i)
1694 #define Mul_Acc2(i) ASC(call, LMul##i)
1695 #define Mul_Acc3(i) Mul_Acc2(i)
1696 #define Mul_Acc4(i) Mul_Acc2(i)
1697 #define Mul_Acc5(i) Mul_Acc2(i)
1698 #define Mul_Acc6(i) Mul_Acc2(i)
1699 #define Mul_Acc7(i) Mul_Acc2(i)
1700 #define Mul_Acc8(i) Mul_Acc2(i)
1701 #define Mul_Acc9(i) Mul_Acc2(i)
1702 #define Mul_Acc10(i) Mul_Acc2(i)
1703 #define Mul_Acc11(i) Mul_Acc2(i)
1704 #define Mul_Acc12(i) Mul_Acc2(i)
1705 #define Mul_Acc13(i) Mul_Acc2(i)
1706 #define Mul_Acc14(i) Mul_Acc2(i)
1707 #define Mul_Acc15(i) Mul_Acc2(i)
1708 #define Mul_Acc16(i) Mul_Acc2(i)
1709 
1710 #define Mul_Column1(k, i) \
1711  SSE2_SaveShift(k) \
1712  AS2( add esi, 16) \
1713  SSE2_MulAdd45\
1714  Mul_Acc##i(i) \
1715 
1716 #define Mul_Column0(k, i) \
1717  SSE2_SaveShift(k) \
1718  AS2( add edi, 16) \
1719  AS2( add edx, 16) \
1720  SSE2_MulAdd45\
1721  Mul_Acc##i(i) \
1722 
1723 #define Bot_Acc(i) \
1724  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1725  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1726  AS2( pmuludq xmm0, xmm1) \
1727  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1728  AS2( paddq xmm4, xmm0) \
1729  AS2( paddd xmm6, xmm1)
1730 
1731 #define Bot_SaveAcc(k) \
1732  SSE2_SaveShift(k) \
1733  AS2( add edi, 16) \
1734  AS2( add edx, 16) \
1735  AS2( movdqa xmm6, [esi]) \
1736  AS2( movdqa xmm0, [edi]) \
1737  AS2( pmuludq xmm0, xmm6) \
1738  AS2( paddq xmm4, xmm0) \
1739  AS2( psllq xmm5, 16) \
1740  AS2( paddq xmm4, xmm5) \
1741  AS2( pmuludq xmm6, [edx])
1742 
1743 #define Bot_End(n) \
1744  AS2( movhlps xmm7, xmm6) \
1745  AS2( paddd xmm6, xmm7) \
1746  AS2( psllq xmm6, 32) \
1747  AS2( paddd xmm4, xmm6) \
1748  AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1749  AS1( pop esp)\
1750  MulEpilogue
1751 
1752 #define Top_Begin(n) \
1753  TopPrologue \
1754  AS2( mov edx, esp)\
1755  AS2( and esp, 0xfffffff0)\
1756  AS2( sub esp, 48*n+16)\
1757  AS1( push edx)\
1758  AS2( xor edx, edx) \
1759  ASL(1) \
1760  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1761  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1762  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1763  AS2( movdqa [esp+20+2*edx], xmm0) \
1764  AS2( psrlq xmm0, 32) \
1765  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1766  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1767  AS2( psrlq xmm1, 32) \
1768  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1769  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1770  AS2( psrlq xmm2, 32) \
1771  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1772  AS2( add edx, 16) \
1773  AS2( cmp edx, 8*(n)) \
1774  ASJ( jne, 1, b) \
1775  AS2( mov eax, esi) \
1776  AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1777  AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1778  AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1779  AS2( pxor xmm4, xmm4)\
1780  AS2( pxor xmm5, xmm5)
1781 
1782 #define Top_Acc(i) \
1783  AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1784  AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1785  AS2( psrlq xmm0, 48) \
1786  AS2( paddd xmm5, xmm0)\
1787 
1788 #define Top_Column0(i) \
1789  AS2( psllq xmm5, 32) \
1790  AS2( add edi, 16) \
1791  AS2( add edx, 16) \
1792  SSE2_MulAdd45\
1793  Mul_Acc##i(i) \
1794 
1795 #define Top_Column1(i) \
1796  SSE2_SaveShift(0) \
1797  AS2( add esi, 16) \
1798  SSE2_MulAdd45\
1799  Mul_Acc##i(i) \
1800  AS2( shr eax, 16) \
1801  AS2( movd xmm0, eax)\
1802  AS2( movd xmm1, [ecx+4])\
1803  AS2( psrld xmm1, 16)\
1804  AS2( pcmpgtd xmm1, xmm0)\
1805  AS2( psrld xmm1, 31)\
1806  AS2( paddd xmm4, xmm1)\
1807 
1808 void SSE2_Square4(word *C, const word *A)
1809 {
1810  Squ_Begin(2)
1811  Squ_Column0(0, 1)
1812  Squ_End(2)
1813 }
1814 
1815 void SSE2_Square8(word *C, const word *A)
1816 {
1817  Squ_Begin(4)
1818 #ifndef __GNUC__
1819  ASJ( jmp, 0, f)
1820  Squ_Acc(2)
1821  AS1( ret) ASL(0)
1822 #endif
1823  Squ_Column0(0, 1)
1824  Squ_Column1(1, 1)
1825  Squ_Column0(2, 2)
1826  Squ_Column1(3, 1)
1827  Squ_Column0(4, 1)
1828  Squ_End(4)
1829 }
1830 
1831 void SSE2_Square16(word *C, const word *A)
1832 {
1833  Squ_Begin(8)
1834 #ifndef __GNUC__
1835  ASJ( jmp, 0, f)
1836  Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1837  AS1( ret) ASL(0)
1838 #endif
1839  Squ_Column0(0, 1)
1840  Squ_Column1(1, 1)
1841  Squ_Column0(2, 2)
1842  Squ_Column1(3, 2)
1843  Squ_Column0(4, 3)
1844  Squ_Column1(5, 3)
1845  Squ_Column0(6, 4)
1846  Squ_Column1(7, 3)
1847  Squ_Column0(8, 3)
1848  Squ_Column1(9, 2)
1849  Squ_Column0(10, 2)
1850  Squ_Column1(11, 1)
1851  Squ_Column0(12, 1)
1852  Squ_End(8)
1853 }
1854 
1855 void SSE2_Square32(word *C, const word *A)
1856 {
1857  Squ_Begin(16)
1858  ASJ( jmp, 0, f)
1859  Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1860  AS1( ret) ASL(0)
1861  Squ_Column0(0, 1)
1862  Squ_Column1(1, 1)
1863  Squ_Column0(2, 2)
1864  Squ_Column1(3, 2)
1865  Squ_Column0(4, 3)
1866  Squ_Column1(5, 3)
1867  Squ_Column0(6, 4)
1868  Squ_Column1(7, 4)
1869  Squ_Column0(8, 5)
1870  Squ_Column1(9, 5)
1871  Squ_Column0(10, 6)
1872  Squ_Column1(11, 6)
1873  Squ_Column0(12, 7)
1874  Squ_Column1(13, 7)
1875  Squ_Column0(14, 8)
1876  Squ_Column1(15, 7)
1877  Squ_Column0(16, 7)
1878  Squ_Column1(17, 6)
1879  Squ_Column0(18, 6)
1880  Squ_Column1(19, 5)
1881  Squ_Column0(20, 5)
1882  Squ_Column1(21, 4)
1883  Squ_Column0(22, 4)
1884  Squ_Column1(23, 3)
1885  Squ_Column0(24, 3)
1886  Squ_Column1(25, 2)
1887  Squ_Column0(26, 2)
1888  Squ_Column1(27, 1)
1889  Squ_Column0(28, 1)
1890  Squ_End(16)
1891 }
1892 
1893 void SSE2_Multiply4(word *C, const word *A, const word *B)
1894 {
1895  Mul_Begin(2)
1896 #ifndef __GNUC__
1897  ASJ( jmp, 0, f)
1898  Mul_Acc(2)
1899  AS1( ret) ASL(0)
1900 #endif
1901  Mul_Column0(0, 2)
1902  Mul_End(2)
1903 }
1904 
1905 void SSE2_Multiply8(word *C, const word *A, const word *B)
1906 {
1907  Mul_Begin(4)
1908 #ifndef __GNUC__
1909  ASJ( jmp, 0, f)
1910  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1911  AS1( ret) ASL(0)
1912 #endif
1913  Mul_Column0(0, 2)
1914  Mul_Column1(1, 3)
1915  Mul_Column0(2, 4)
1916  Mul_Column1(3, 3)
1917  Mul_Column0(4, 2)
1918  Mul_End(4)
1919 }
1920 
1921 void SSE2_Multiply16(word *C, const word *A, const word *B)
1922 {
1923  Mul_Begin(8)
1924 #ifndef __GNUC__
1925  ASJ( jmp, 0, f)
1926  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1927  AS1( ret) ASL(0)
1928 #endif
1929  Mul_Column0(0, 2)
1930  Mul_Column1(1, 3)
1931  Mul_Column0(2, 4)
1932  Mul_Column1(3, 5)
1933  Mul_Column0(4, 6)
1934  Mul_Column1(5, 7)
1935  Mul_Column0(6, 8)
1936  Mul_Column1(7, 7)
1937  Mul_Column0(8, 6)
1938  Mul_Column1(9, 5)
1939  Mul_Column0(10, 4)
1940  Mul_Column1(11, 3)
1941  Mul_Column0(12, 2)
1942  Mul_End(8)
1943 }
1944 
1945 void SSE2_Multiply32(word *C, const word *A, const word *B)
1946 {
1947  Mul_Begin(16)
1948  ASJ( jmp, 0, f)
1949  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1950  AS1( ret) ASL(0)
1951  Mul_Column0(0, 2)
1952  Mul_Column1(1, 3)
1953  Mul_Column0(2, 4)
1954  Mul_Column1(3, 5)
1955  Mul_Column0(4, 6)
1956  Mul_Column1(5, 7)
1957  Mul_Column0(6, 8)
1958  Mul_Column1(7, 9)
1959  Mul_Column0(8, 10)
1960  Mul_Column1(9, 11)
1961  Mul_Column0(10, 12)
1962  Mul_Column1(11, 13)
1963  Mul_Column0(12, 14)
1964  Mul_Column1(13, 15)
1965  Mul_Column0(14, 16)
1966  Mul_Column1(15, 15)
1967  Mul_Column0(16, 14)
1968  Mul_Column1(17, 13)
1969  Mul_Column0(18, 12)
1970  Mul_Column1(19, 11)
1971  Mul_Column0(20, 10)
1972  Mul_Column1(21, 9)
1973  Mul_Column0(22, 8)
1974  Mul_Column1(23, 7)
1975  Mul_Column0(24, 6)
1976  Mul_Column1(25, 5)
1977  Mul_Column0(26, 4)
1978  Mul_Column1(27, 3)
1979  Mul_Column0(28, 2)
1980  Mul_End(16)
1981 }
1982 
1983 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
1984 {
1985  Mul_Begin(2)
1986  Bot_SaveAcc(0) Bot_Acc(2)
1987  Bot_End(2)
1988 }
1989 
1990 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
1991 {
1992  Mul_Begin(4)
1993 #ifndef __GNUC__
1994  ASJ( jmp, 0, f)
1995  Mul_Acc(3) Mul_Acc(2)
1996  AS1( ret) ASL(0)
1997 #endif
1998  Mul_Column0(0, 2)
1999  Mul_Column1(1, 3)
2000  Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2001  Bot_End(4)
2002 }
2003 
2004 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
2005 {
2006  Mul_Begin(8)
2007 #ifndef __GNUC__
2008  ASJ( jmp, 0, f)
2009  Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2010  AS1( ret) ASL(0)
2011 #endif
2012  Mul_Column0(0, 2)
2013  Mul_Column1(1, 3)
2014  Mul_Column0(2, 4)
2015  Mul_Column1(3, 5)
2016  Mul_Column0(4, 6)
2017  Mul_Column1(5, 7)
2018  Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2019  Bot_End(8)
2020 }
2021 
2022 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
2023 {
2024  Mul_Begin(16)
2025 #ifndef __GNUC__
2026  ASJ( jmp, 0, f)
2027  Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2028  AS1( ret) ASL(0)
2029 #endif
2030  Mul_Column0(0, 2)
2031  Mul_Column1(1, 3)
2032  Mul_Column0(2, 4)
2033  Mul_Column1(3, 5)
2034  Mul_Column0(4, 6)
2035  Mul_Column1(5, 7)
2036  Mul_Column0(6, 8)
2037  Mul_Column1(7, 9)
2038  Mul_Column0(8, 10)
2039  Mul_Column1(9, 11)
2040  Mul_Column0(10, 12)
2041  Mul_Column1(11, 13)
2042  Mul_Column0(12, 14)
2043  Mul_Column1(13, 15)
2044  Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2045  Bot_End(16)
2046 }
2047 
2048 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
2049 {
2050  Top_Begin(4)
2051  Top_Acc(3) Top_Acc(2) Top_Acc(1)
2052 #ifndef __GNUC__
2053  ASJ( jmp, 0, f)
2054  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2055  AS1( ret) ASL(0)
2056 #endif
2057  Top_Column0(4)
2058  Top_Column1(3)
2059  Mul_Column0(0, 2)
2060  Top_End(2)
2061 }
2062 
2063 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
2064 {
2065  Top_Begin(8)
2066  Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2067 #ifndef __GNUC__
2068  ASJ( jmp, 0, f)
2069  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2070  AS1( ret) ASL(0)
2071 #endif
2072  Top_Column0(8)
2073  Top_Column1(7)
2074  Mul_Column0(0, 6)
2075  Mul_Column1(1, 5)
2076  Mul_Column0(2, 4)
2077  Mul_Column1(3, 3)
2078  Mul_Column0(4, 2)
2079  Top_End(4)
2080 }
2081 
2082 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2083 {
2084  Top_Begin(16)
2085  Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2086 #ifndef __GNUC__
2087  ASJ( jmp, 0, f)
2088  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2089  AS1( ret) ASL(0)
2090 #endif
2091  Top_Column0(16)
2092  Top_Column1(15)
2093  Mul_Column0(0, 14)
2094  Mul_Column1(1, 13)
2095  Mul_Column0(2, 12)
2096  Mul_Column1(3, 11)
2097  Mul_Column0(4, 10)
2098  Mul_Column1(5, 9)
2099  Mul_Column0(6, 8)
2100  Mul_Column1(7, 7)
2101  Mul_Column0(8, 6)
2102  Mul_Column1(9, 5)
2103  Mul_Column0(10, 4)
2104  Mul_Column1(11, 3)
2105  Mul_Column0(12, 2)
2106  Top_End(8)
2107 }
2108 
2109 #endif // #if CRYPTOPP_INTEGER_SSE2
2110 
2111 // ********************************************************
2112 
2113 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
2114 typedef void (* PMul)(word *C, const word *A, const word *B);
2115 typedef void (* PSqu)(word *C, const word *A);
2116 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
2117 
2118 #if CRYPTOPP_INTEGER_SSE2
2119 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2120 static size_t s_recursionLimit = 8;
2121 #else
2122 static const size_t s_recursionLimit = 16;
2123 #endif // CRYPTOPP_INTEGER_SSE2
2124 
2125 static PMul s_pMul[9], s_pBot[9];
2126 static PSqu s_pSqu[9];
2127 static PMulTop s_pTop[9];
2128 
2129 void SetFunctionPointers()
2130 {
2131  s_pMul[0] = &Baseline_Multiply2;
2132  s_pBot[0] = &Baseline_MultiplyBottom2;
2133  s_pSqu[0] = &Baseline_Square2;
2134  s_pTop[0] = &Baseline_MultiplyTop2;
2135  s_pTop[1] = &Baseline_MultiplyTop4;
2136 
2137 #if CRYPTOPP_INTEGER_SSE2
2138  if (HasSSE2())
2139  {
2140  if (IsP4())
2141  {
2142  s_pAdd = &SSE2_Add;
2143  s_pSub = &SSE2_Sub;
2144  }
2145 
2146  s_recursionLimit = 32;
2147 
2148  s_pMul[1] = &SSE2_Multiply4;
2149  s_pMul[2] = &SSE2_Multiply8;
2150  s_pMul[4] = &SSE2_Multiply16;
2151  s_pMul[8] = &SSE2_Multiply32;
2152 
2153  s_pBot[1] = &SSE2_MultiplyBottom4;
2154  s_pBot[2] = &SSE2_MultiplyBottom8;
2155  s_pBot[4] = &SSE2_MultiplyBottom16;
2156  s_pBot[8] = &SSE2_MultiplyBottom32;
2157 
2158  s_pSqu[1] = &SSE2_Square4;
2159  s_pSqu[2] = &SSE2_Square8;
2160  s_pSqu[4] = &SSE2_Square16;
2161  s_pSqu[8] = &SSE2_Square32;
2162 
2163  s_pTop[2] = &SSE2_MultiplyTop8;
2164  s_pTop[4] = &SSE2_MultiplyTop16;
2165  s_pTop[8] = &SSE2_MultiplyTop32;
2166  }
2167  else
2168 #endif // CRYPTOPP_INTEGER_SSE2
2169  {
2170  s_pMul[1] = &Baseline_Multiply4;
2171  s_pMul[2] = &Baseline_Multiply8;
2172 
2173  s_pBot[1] = &Baseline_MultiplyBottom4;
2174  s_pBot[2] = &Baseline_MultiplyBottom8;
2175 
2176  s_pSqu[1] = &Baseline_Square4;
2177  s_pSqu[2] = &Baseline_Square8;
2178 
2179  s_pTop[2] = &Baseline_MultiplyTop8;
2180 
2181 #if !CRYPTOPP_INTEGER_SSE2
2182  s_pMul[4] = &Baseline_Multiply16;
2183  s_pBot[4] = &Baseline_MultiplyBottom16;
2184  s_pSqu[4] = &Baseline_Square16;
2185  s_pTop[4] = &Baseline_MultiplyTop16;
2186 #endif // !CRYPTOPP_INTEGER_SSE2
2187  }
2188 }
2189 
2190 inline int Add(word *C, const word *A, const word *B, size_t N)
2191 {
2192 #if CRYPTOPP_INTEGER_SSE2
2193  return s_pAdd(N, C, A, B);
2194 #else
2195  return Baseline_Add(N, C, A, B);
2196 #endif // CRYPTOPP_INTEGER_SSE2
2197 }
2198 
2199 inline int Subtract(word *C, const word *A, const word *B, size_t N)
2200 {
2201 #if CRYPTOPP_INTEGER_SSE2
2202  return s_pSub(N, C, A, B);
2203 #else
2204  return Baseline_Sub(N, C, A, B);
2205 #endif // CRYPTOPP_INTEGER_SSE2
2206 }
2207 
2208 // ********************************************************
2209 
2210 
2211 #define A0 A
2212 #define A1 (A+N2)
2213 #define B0 B
2214 #define B1 (B+N2)
2215 
2216 #define T0 T
2217 #define T1 (T+N2)
2218 #define T2 (T+N)
2219 #define T3 (T+N+N2)
2220 
2221 #define R0 R
2222 #define R1 (R+N2)
2223 #define R2 (R+N)
2224 #define R3 (R+N+N2)
2225 
2226 // R[2*N] - result = A*B
2227 // T[2*N] - temporary work space
2228 // A[N] --- multiplier
2229 // B[N] --- multiplicant
2230 
2231 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2232 {
2233  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2234 
2235  if (N <= s_recursionLimit)
2236  s_pMul[N/4](R, A, B);
2237  else
2238  {
2239  const size_t N2 = N/2;
2240 
2241  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2242  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2243 
2244  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2245  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2246 
2247  RecursiveMultiply(R2, T2, A1, B1, N2);
2248  RecursiveMultiply(T0, T2, R0, R1, N2);
2249  RecursiveMultiply(R0, T2, A0, B0, N2);
2250 
2251  // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2252 
2253  int c2 = Add(R2, R2, R1, N2);
2254  int c3 = c2;
2255  c2 += Add(R1, R2, R0, N2);
2256  c3 += Add(R2, R2, R3, N2);
2257 
2258  if (AN2 == BN2)
2259  c3 -= Subtract(R1, R1, T0, N);
2260  else
2261  c3 += Add(R1, R1, T0, N);
2262 
2263  c3 += Increment(R2, N2, c2);
2264  CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2265  Increment(R3, N2, c3);
2266  }
2267 }
2268 
2269 // R[2*N] - result = A*A
2270 // T[2*N] - temporary work space
2271 // A[N] --- number to be squared
2272 
2273 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2274 {
2275  CRYPTOPP_ASSERT(N && N%2==0);
2276 
2277  if (N <= s_recursionLimit)
2278  s_pSqu[N/4](R, A);
2279  else
2280  {
2281  const size_t N2 = N/2;
2282 
2283  RecursiveSquare(R0, T2, A0, N2);
2284  RecursiveSquare(R2, T2, A1, N2);
2285  RecursiveMultiply(T0, T2, A0, A1, N2);
2286 
2287  int carry = Add(R1, R1, T0, N);
2288  carry += Add(R1, R1, T0, N);
2289  Increment(R3, N2, carry);
2290  }
2291 }
2292 
2293 // R[N] - bottom half of A*B
2294 // T[3*N/2] - temporary work space
2295 // A[N] - multiplier
2296 // B[N] - multiplicant
2297 
2298 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2299 {
2300  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2301 
2302  if (N <= s_recursionLimit)
2303  s_pBot[N/4](R, A, B);
2304  else
2305  {
2306  const size_t N2 = N/2;
2307 
2308  RecursiveMultiply(R, T, A0, B0, N2);
2309  RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2310  Add(R1, R1, T0, N2);
2311  RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2312  Add(R1, R1, T0, N2);
2313  }
2314 }
2315 
2316 // R[N] --- upper half of A*B
2317 // T[2*N] - temporary work space
2318 // L[N] --- lower half of A*B
2319 // A[N] --- multiplier
2320 // B[N] --- multiplicant
2321 
2322 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2323 {
2324  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2325 
2326  if (N <= s_recursionLimit)
2327  s_pTop[N/4](R, A, B, L[N-1]);
2328  else
2329  {
2330  const size_t N2 = N/2;
2331 
2332  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2333  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2334 
2335  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2336  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2337 
2338  RecursiveMultiply(T0, T2, R0, R1, N2);
2339  RecursiveMultiply(R0, T2, A1, B1, N2);
2340 
2341  // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2342 
2343  int t, c3;
2344  int c2 = Subtract(T2, L+N2, L, N2);
2345 
2346  if (AN2 == BN2)
2347  {
2348  c2 -= Add(T2, T2, T0, N2);
2349  t = (Compare(T2, R0, N2) == -1);
2350  c3 = t - Subtract(T2, T2, T1, N2);
2351  }
2352  else
2353  {
2354  c2 += Subtract(T2, T2, T0, N2);
2355  t = (Compare(T2, R0, N2) == -1);
2356  c3 = t + Add(T2, T2, T1, N2);
2357  }
2358 
2359  c2 += t;
2360  if (c2 >= 0)
2361  c3 += Increment(T2, N2, c2);
2362  else
2363  c3 -= Decrement(T2, N2, -c2);
2364  c3 += Add(R0, T2, R1, N2);
2365 
2366  CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2367  Increment(R1, N2, c3);
2368  }
2369 }
2370 
2371 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2372 {
2373  RecursiveMultiply(R, T, A, B, N);
2374 }
2375 
2376 inline void Square(word *R, word *T, const word *A, size_t N)
2377 {
2378  RecursiveSquare(R, T, A, N);
2379 }
2380 
2381 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2382 {
2383  RecursiveMultiplyBottom(R, T, A, B, N);
2384 }
2385 
2386 // R[NA+NB] - result = A*B
2387 // T[NA+NB] - temporary work space
2388 // A[NA] ---- multiplier
2389 // B[NB] ---- multiplicant
2390 
2391 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2392 {
2393  if (NA == NB)
2394  {
2395  // Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
2396  // The code change occurred at Commit dc99266599a0e72d.
2397  if (A != B)
2398  Multiply(R, T, A, B, NA);
2399  else
2400  Square(R, T, A, NA);
2401 
2402  return;
2403  }
2404 
2405  if (NA > NB)
2406  {
2407  std::swap(A, B);
2408  std::swap(NA, NB);
2409  }
2410 
2411  CRYPTOPP_ASSERT(NB % NA == 0);
2412 
2413  if (NA==2 && !A[1])
2414  {
2415  // Profiling tells us the original Default case was dominant, so it was promoted to the first Case statement.
2416  // The code change occurred at Commit dc99266599a0e72d.
2417  switch (A[0])
2418  {
2419  default:
2420  R[NB] = LinearMultiply(R, B, A[0], NB);
2421  R[NB+1] = 0;
2422  return;
2423  case 0:
2424  SetWords(R, 0, NB+2);
2425  return;
2426  case 1:
2427  CopyWords(R, B, NB);
2428  R[NB] = R[NB+1] = 0;
2429  return;
2430  }
2431  }
2432 
2433  size_t i;
2434  if ((NB/NA)%2 == 0)
2435  {
2436  Multiply(R, T, A, B, NA);
2437  CopyWords(T+2*NA, R+NA, NA);
2438 
2439  for (i=2*NA; i<NB; i+=2*NA)
2440  Multiply(T+NA+i, T, A, B+i, NA);
2441  for (i=NA; i<NB; i+=2*NA)
2442  Multiply(R+i, T, A, B+i, NA);
2443  }
2444  else
2445  {
2446  for (i=0; i<NB; i+=2*NA)
2447  Multiply(R+i, T, A, B+i, NA);
2448  for (i=NA; i<NB; i+=2*NA)
2449  Multiply(T+NA+i, T, A, B+i, NA);
2450  }
2451 
2452  if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2453  Increment(R+NB, NA);
2454 }
2455 
2456 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2457 // T[3*N/2] - temporary work space
2458 // A[N] ----- an odd number as input
2459 
2460 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2461 {
2462  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
2463  // The code change occurred at Commit dc99266599a0e72d.
2464  if (N!=2)
2465  {
2466  const size_t N2 = N/2;
2467  RecursiveInverseModPower2(R0, T0, A0, N2);
2468  T0[0] = 1;
2469  SetWords(T0+1, 0, N2-1);
2470  MultiplyTop(R1, T1, T0, R0, A0, N2);
2471  MultiplyBottom(T0, T1, R0, A1, N2);
2472  Add(T0, R1, T0, N2);
2473  TwosComplement(T0, N2);
2474  MultiplyBottom(R1, T1, R0, T0, N2);
2475  }
2476  else
2477  {
2478  T[0] = AtomicInverseModPower2(A[0]);
2479  T[1] = 0;
2480  s_pBot[0](T+2, T, A);
2481  TwosComplement(T+2, 2);
2482  Increment(T+2, 2, 2);
2483  s_pBot[0](R, T, T+2);
2484  }
2485 }
2486 
2487 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2488 // T[3*N] - temporary work space
2489 // X[2*N] - number to be reduced
2490 // M[N] --- modulus
2491 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2492 
2493 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2494 {
2495 #if 1
2496  MultiplyBottom(R, T, X, U, N);
2497  MultiplyTop(T, T+N, X, R, M, N);
2498  word borrow = Subtract(T, X+N, T, N);
2499  // defend against timing attack by doing this Add even when not needed
2500  word carry = Add(T+N, T, M, N);
2501  CRYPTOPP_ASSERT(carry | !borrow);
2502  CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2503  CopyWords(R, T + ((0-borrow) & N), N);
2504 #elif 0
2505  const word u = 0-U[0];
2506  Declare2Words(p)
2507  for (size_t i=0; i<N; i++)
2508  {
2509  const word t = u * X[i];
2510  word c = 0;
2511  for (size_t j=0; j<N; j+=2)
2512  {
2513  MultiplyWords(p, t, M[j]);
2514  Acc2WordsBy1(p, X[i+j]);
2515  Acc2WordsBy1(p, c);
2516  X[i+j] = LowWord(p);
2517  c = HighWord(p);
2518  MultiplyWords(p, t, M[j+1]);
2519  Acc2WordsBy1(p, X[i+j+1]);
2520  Acc2WordsBy1(p, c);
2521  X[i+j+1] = LowWord(p);
2522  c = HighWord(p);
2523  }
2524 
2525  if (Increment(X+N+i, N-i, c))
2526  while (!Subtract(X+N, X+N, M, N)) {}
2527  }
2528 
2529  memcpy(R, X+N, N*WORD_SIZE);
2530 #else
2531  __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2532  for (size_t i=0; i<N; i++)
2533  {
2534  __m64 t = _mm_cvtsi32_si64(X[i]);
2535  t = _mm_mul_su32(t, u);
2536  __m64 c = _mm_setzero_si64();
2537  for (size_t j=0; j<N; j+=2)
2538  {
2539  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2540  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2541  c = _mm_add_si64(c, p);
2542  X[i+j] = _mm_cvtsi64_si32(c);
2543  c = _mm_srli_si64(c, 32);
2544  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2545  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2546  c = _mm_add_si64(c, p);
2547  X[i+j+1] = _mm_cvtsi64_si32(c);
2548  c = _mm_srli_si64(c, 32);
2549  }
2550 
2551  if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2552  while (!Subtract(X+N, X+N, M, N)) {}
2553  }
2554 
2555  memcpy(R, X+N, N*WORD_SIZE);
2556  _mm_empty();
2557 #endif
2558 }
2559 
2560 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2561 // T[2*N] - temporary work space
2562 // X[2*N] - number to be reduced
2563 // M[N] --- modulus
2564 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2565 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
2566 
2567 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2568 {
2569  CRYPTOPP_ASSERT(N%2==0 && N>=4);
2570 
2571 #define M0 M
2572 #define M1 (M+N2)
2573 #define V0 V
2574 #define V1 (V+N2)
2575 
2576 #define X0 X
2577 #define X1 (X+N2)
2578 #define X2 (X+N)
2579 #define X3 (X+N+N2)
2580 
2581  const size_t N2 = N/2;
2582  Multiply(T0, T2, V0, X3, N2);
2583  int c2 = Add(T0, T0, X0, N);
2584  MultiplyBottom(T3, T2, T0, U, N2);
2585  MultiplyTop(T2, R, T0, T3, M0, N2);
2586  c2 -= Subtract(T2, T1, T2, N2);
2587  Multiply(T0, R, T3, M1, N2);
2588  c2 -= Subtract(T0, T2, T0, N2);
2589  int c3 = -(int)Subtract(T1, X2, T1, N2);
2590  Multiply(R0, T2, V1, X3, N2);
2591  c3 += Add(R, R, T, N);
2592 
2593  if (c2>0)
2594  c3 += Increment(R1, N2);
2595  else if (c2<0)
2596  c3 -= Decrement(R1, N2, -c2);
2597 
2598  CRYPTOPP_ASSERT(c3>=-1 && c3<=1);
2599  if (c3>0)
2600  Subtract(R, R, M, N);
2601  else if (c3<0)
2602  Add(R, R, M, N);
2603 
2604 #undef M0
2605 #undef M1
2606 #undef V0
2607 #undef V1
2608 
2609 #undef X0
2610 #undef X1
2611 #undef X2
2612 #undef X3
2613 }
2614 
2615 #undef A0
2616 #undef A1
2617 #undef B0
2618 #undef B1
2619 
2620 #undef T0
2621 #undef T1
2622 #undef T2
2623 #undef T3
2624 
2625 #undef R0
2626 #undef R1
2627 #undef R2
2628 #undef R3
2629 
2630 /*
2631 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2632 static word SubatomicDivide(word *A, word B0, word B1)
2633 {
2634  // Assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2635  CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2636 
2637  // estimate the quotient: do a 2 word by 1 word divide
2638  word Q;
2639  if (B1+1 == 0)
2640  Q = A[2];
2641  else
2642  Q = DWord(A[1], A[2]).DividedBy(B1+1);
2643 
2644  // now subtract Q*B from A
2645  DWord p = DWord::Multiply(B0, Q);
2646  DWord u = (DWord) A[0] - p.GetLowHalf();
2647  A[0] = u.GetLowHalf();
2648  u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2649  A[1] = u.GetLowHalf();
2650  A[2] += u.GetHighHalf();
2651 
2652  // Q <= actual quotient, so fix it
2653  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2654  {
2655  u = (DWord) A[0] - B0;
2656  A[0] = u.GetLowHalf();
2657  u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2658  A[1] = u.GetLowHalf();
2659  A[2] += u.GetHighHalf();
2660  Q++;
2661  CRYPTOPP_ASSERT(Q); // shouldn't overflow
2662  }
2663 
2664  return Q;
2665 }
2666 
2667 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2668 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2669 {
2670  if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2671  {
2672  Q[0] = A[2];
2673  Q[1] = A[3];
2674  }
2675  else
2676  {
2677  word T[4];
2678  T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2679  Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2680  Q[0] = SubatomicDivide(T, B[0], B[1]);
2681 
2682 #if defined(CRYPTOPP_DEBUG)
2683  // multiply quotient and divisor and add remainder, make sure it equals dividend
2684  CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2685  word P[4];
2686  LowLevel::Multiply2(P, Q, B);
2687  Add(P, P, T, 4);
2688  CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2689 #endif
2690  }
2691 }
2692 */
2693 
2694 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2695 {
2696  word T[4];
2697  DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2698  Q[0] = q.GetLowHalf();
2699  Q[1] = q.GetHighHalf();
2700 
2701 #if defined(CRYPTOPP_DEBUG)
2702  if (B[0] || B[1])
2703  {
2704  // multiply quotient and divisor and add remainder, make sure it equals dividend
2705  CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2706  word P[4];
2707  s_pMul[0](P, Q, B);
2708  Add(P, P, T, 4);
2709  CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2710  }
2711 #endif
2712 }
2713 
2714 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2715 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2716 {
2717  CRYPTOPP_ASSERT(N && N%2==0);
2718 
2719  AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2720 
2721  word borrow = Subtract(R, R, T, N+2);
2722  CRYPTOPP_ASSERT(!borrow && !R[N+1]);
2723  CRYPTOPP_UNUSED(borrow);
2724 
2725  while (R[N] || Compare(R, B, N) >= 0)
2726  {
2727  R[N] -= Subtract(R, R, B, N);
2728  Q[1] += (++Q[0]==0);
2729  CRYPTOPP_ASSERT(Q[0] || Q[1]); // no overflow
2730  }
2731 }
2732 
2733 // R[NB] -------- remainder = A%B
2734 // Q[NA-NB+2] --- quotient = A/B
2735 // T[NA+3*(NB+2)] - temp work space
2736 // A[NA] -------- dividend
2737 // B[NB] -------- divisor
2738 
2739 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2740 {
2741  CRYPTOPP_ASSERT(NA && NB && NA%2==0 && NB%2==0);
2742  CRYPTOPP_ASSERT(B[NB-1] || B[NB-2]);
2743  CRYPTOPP_ASSERT(NB <= NA);
2744 
2745  // set up temporary work space
2746  word *const TA=T;
2747  word *const TB=T+NA+2;
2748  word *const TP=T+NA+2+NB;
2749 
2750  // copy B into TB and normalize it so that TB has highest bit set to 1
2751  unsigned shiftWords = (B[NB-1]==0);
2752  TB[0] = TB[NB-1] = 0;
2753  CopyWords(TB+shiftWords, B, NB-shiftWords);
2754  unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2755  CRYPTOPP_ASSERT(shiftBits < WORD_BITS);
2756  ShiftWordsLeftByBits(TB, NB, shiftBits);
2757 
2758  // copy A into TA and normalize it
2759  TA[0] = TA[NA] = TA[NA+1] = 0;
2760  CopyWords(TA+shiftWords, A, NA);
2761  ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2762 
2763  if (TA[NA+1]==0 && TA[NA] <= 1)
2764  {
2765  Q[NA-NB+1] = Q[NA-NB] = 0;
2766  while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2767  {
2768  TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2769  ++Q[NA-NB];
2770  }
2771  }
2772  else
2773  {
2774  NA+=2;
2775  CRYPTOPP_ASSERT(Compare(TA+NA-NB, TB, NB) < 0);
2776  }
2777 
2778  word BT[2];
2779  BT[0] = TB[NB-2] + 1;
2780  BT[1] = TB[NB-1] + (BT[0]==0);
2781 
2782  // start reducing TA mod TB, 2 words at a time
2783  for (size_t i=NA-2; i>=NB; i-=2)
2784  {
2785  AtomicDivide(Q+i-NB, TA+i-2, BT);
2786  CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2787  }
2788 
2789  // copy TA into R, and denormalize it
2790  CopyWords(R, TA+shiftWords, NB);
2791  ShiftWordsRightByBits(R, NB, shiftBits);
2792 }
2793 
2794 static inline size_t EvenWordCount(const word *X, size_t N)
2795 {
2796  while (N && X[N-2]==0 && X[N-1]==0)
2797  N-=2;
2798  return N;
2799 }
2800 
2801 // return k
2802 // R[N] --- result = A^(-1) * 2^k mod M
2803 // T[4*N] - temporary work space
2804 // A[NA] -- number to take inverse of
2805 // M[N] --- modulus
2806 
2807 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2808 {
2809  CRYPTOPP_ASSERT(NA<=N && N && N%2==0);
2810 
2811  word *b = T;
2812  word *c = T+N;
2813  word *f = T+2*N;
2814  word *g = T+3*N;
2815  size_t bcLen=2, fgLen=EvenWordCount(M, N);
2816  unsigned int k=0;
2817  bool s=false;
2818 
2819  SetWords(T, 0, 3*N);
2820  b[0]=1;
2821  CopyWords(f, A, NA);
2822  CopyWords(g, M, N);
2823 
2824  while (1)
2825  {
2826  word t=f[0];
2827  while (!t)
2828  {
2829  if (EvenWordCount(f, fgLen)==0)
2830  {
2831  SetWords(R, 0, N);
2832  return 0;
2833  }
2834 
2835  ShiftWordsRightByWords(f, fgLen, 1);
2836  bcLen += 2 * (c[bcLen-1] != 0);
2837  CRYPTOPP_ASSERT(bcLen <= N);
2838  ShiftWordsLeftByWords(c, bcLen, 1);
2839  k+=WORD_BITS;
2840  t=f[0];
2841  }
2842 
2843  // t must be non-0; otherwise undefined.
2844  unsigned int i = TrailingZeros(t);
2845  t >>= i;
2846  k += i;
2847 
2848  if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2849  {
2850  if (s)
2851  Subtract(R, M, b, N);
2852  else
2853  CopyWords(R, b, N);
2854  return k;
2855  }
2856 
2857  ShiftWordsRightByBits(f, fgLen, i);
2858  t = ShiftWordsLeftByBits(c, bcLen, i);
2859  c[bcLen] += t;
2860  bcLen += 2 * (t!=0);
2861  CRYPTOPP_ASSERT(bcLen <= N);
2862 
2863  bool swap = Compare(f, g, fgLen)==-1;
2864  ConditionalSwapPointers(swap, f, g);
2865  ConditionalSwapPointers(swap, b, c);
2866  s ^= swap;
2867 
2868  fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2869 
2870  Subtract(f, f, g, fgLen);
2871  t = Add(b, b, c, bcLen);
2872  b[bcLen] += t;
2873  bcLen += 2*t;
2874  CRYPTOPP_ASSERT(bcLen <= N);
2875  }
2876 }
2877 
2878 // R[N] - result = A/(2^k) mod M
2879 // A[N] - input
2880 // M[N] - modulus
2881 
2882 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2883 {
2884  CopyWords(R, A, N);
2885 
2886  while (k--)
2887  {
2888  if (R[0]%2==0)
2889  ShiftWordsRightByBits(R, N, 1);
2890  else
2891  {
2892  word carry = Add(R, R, M, N);
2893  ShiftWordsRightByBits(R, N, 1);
2894  R[N-1] += carry<<(WORD_BITS-1);
2895  }
2896  }
2897 }
2898 
2899 // R[N] - result = A*(2^k) mod M
2900 // A[N] - input
2901 // M[N] - modulus
2902 
2903 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2904 {
2905  CopyWords(R, A, N);
2906 
2907  while (k--)
2908  if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2909  Subtract(R, R, M, N);
2910 }
2911 
2912 // ******************************************************************
2913 
2914 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2915 
2916 static inline size_t RoundupSize(size_t n)
2917 {
2918  if (n<=8)
2919  return RoundupSizeTable[n];
2920  else if (n<=16)
2921  return 16;
2922  else if (n<=32)
2923  return 32;
2924  else if (n<=64)
2925  return 64;
2926  else
2927  return size_t(1) << BitPrecision(n-1);
2928 }
2929 
2931  : reg(2), sign(POSITIVE)
2932 {
2933  reg[0] = reg[1] = 0;
2934 }
2935 
2937  : reg(RoundupSize(t.WordCount())), sign(t.sign)
2938 {
2939  CopyWords(reg, t.reg, reg.size());
2940 }
2941 
2942 Integer::Integer(Sign s, lword value)
2943  : reg(2), sign(s)
2944 {
2945  reg[0] = word(value);
2946  reg[1] = word(SafeRightShift<WORD_BITS>(value));
2947 }
2948 
2949 Integer::Integer(signed long value)
2950  : reg(2)
2951 {
2952  if (value >= 0)
2953  sign = POSITIVE;
2954  else
2955  {
2956  sign = NEGATIVE;
2957  value = -value;
2958  }
2959  reg[0] = word(value);
2960  reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2961 }
2962 
2963 Integer::Integer(Sign s, word high, word low)
2964  : reg(2), sign(s)
2965 {
2966  reg[0] = low;
2967  reg[1] = high;
2968 }
2969 
2971 {
2972  if (ByteCount() > sizeof(long))
2973  return false;
2974 
2975  unsigned long value = (unsigned long)reg[0];
2976  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2977 
2978  if (sign==POSITIVE)
2979  return (signed long)value >= 0;
2980  else
2981  return -(signed long)value < 0;
2982 }
2983 
2984 signed long Integer::ConvertToLong() const
2985 {
2987 
2988  unsigned long value = (unsigned long)reg[0];
2989  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2990  return sign==POSITIVE ? value : -(signed long)value;
2991 }
2992 
2993 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2994 {
2996 
2997  if (o != LITTLE_ENDIAN_ORDER)
2998  {
2999  Decode(encodedInteger, byteCount, s);
3000  }
3001  else
3002  {
3003  SecByteBlock block(byteCount);
3004  encodedInteger.Get(block, block.size());
3005  std::reverse(block.begin(), block.begin()+block.size());
3006 
3007  Decode(block.begin(), block.size(), s);
3008  }
3009 }
3010 
3011 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
3012 {
3013  CRYPTOPP_ASSERT(encodedInteger && byteCount); // NULL buffer
3015 
3016  if (o != LITTLE_ENDIAN_ORDER)
3017  {
3018  Decode(encodedInteger, byteCount, s);
3019  }
3020  else
3021  {
3022  SecByteBlock block(byteCount);
3023 #if (_MSC_FULL_VER >= 140050727)
3024  std::reverse_copy(encodedInteger, encodedInteger+byteCount,
3025  stdext::make_checked_array_iterator(block.begin(), block.size()));
3026 #else
3027  std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
3028 #endif
3029  Decode(block.begin(), block.size(), s);
3030  return;
3031  }
3032 }
3033 
3035 {
3036  // Make explicit call to avoid virtual-dispatch findings in ctor
3037  Integer::BERDecode(bt);
3038 }
3039 
3041 {
3042  Randomize(rng, bitcount);
3043 }
3044 
3045 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3046 {
3047  if (!Randomize(rng, min, max, rnType, equiv, mod))
3049 }
3050 
3052 {
3053  Integer r((word)0, BitsToWords(e+1));
3054  r.SetBit(e);
3055  return r;
3056 }
3057 
3059 {
3060  return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
3061 }
3062 
3064 {
3065  if (this != &t)
3066  {
3067  if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
3068  reg.New(RoundupSize(t.WordCount()));
3069  CopyWords(reg, t.reg, reg.size());
3070  sign = t.sign;
3071  }
3072  return *this;
3073 }
3074 
3075 bool Integer::GetBit(size_t n) const
3076 {
3077  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
3078  // The code change occurred at Commit dc99266599a0e72d.
3079  if (n/WORD_BITS < reg.size())
3080  return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3081  else
3082  return 0;
3083 }
3084 
3085 void Integer::SetBit(size_t n, bool value)
3086 {
3087  if (value)
3088  {
3089  reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
3090  reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3091  }
3092  else
3093  {
3094  if (n/WORD_BITS < reg.size())
3095  reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3096  }
3097 }
3098 
3099 byte Integer::GetByte(size_t n) const
3100 {
3101  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
3102  // The code change occurred at Commit dc99266599a0e72d.
3103  if (n/WORD_SIZE < reg.size())
3104  return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3105  else
3106  return 0;
3107 }
3108 
3109 void Integer::SetByte(size_t n, byte value)
3110 {
3111  reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3112  reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3113  reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3114 }
3115 
3116 lword Integer::GetBits(size_t i, size_t n) const
3117 {
3118  lword v = 0;
3119  CRYPTOPP_ASSERT(n <= sizeof(v)*8);
3120  for (unsigned int j=0; j<n; j++)
3121  v |= lword(GetBit(i+j)) << j;
3122  return v;
3123 }
3124 
3126 {
3127  Integer result(*this);
3128  result.Negate();
3129  return result;
3130 }
3131 
3133 {
3134  Integer result(*this);
3135  result.sign = POSITIVE;
3136  return result;
3137 }
3138 
3140 {
3141  reg.swap(a.reg);
3142  std::swap(sign, a.sign);
3143 }
3144 
3145 Integer::Integer(word value, size_t length)
3146  : reg(RoundupSize(length)), sign(POSITIVE)
3147 {
3148  reg[0] = value;
3149  SetWords(reg+1, 0, reg.size()-1);
3150 }
3151 
3152 template <class T>
3153 static Integer StringToInteger(const T *str, ByteOrder order)
3154 {
3155  CRYPTOPP_ASSERT( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3156 
3157  int radix, sign = 1;
3158  // GCC workaround
3159  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3160  unsigned int length;
3161  for (length = 0; str[length] != 0; length++) {}
3162 
3163  Integer v;
3164 
3165  if (length == 0)
3166  return Integer::Zero();
3167 
3168  switch (str[length-1])
3169  {
3170  case 'h':
3171  case 'H':
3172  radix=16;
3173  break;
3174  case 'o':
3175  case 'O':
3176  radix=8;
3177  break;
3178  case 'b':
3179  case 'B':
3180  radix=2;
3181  break;
3182  default:
3183  radix=10;
3184  }
3185 
3186  // 'str' is of length 1 or more
3187  if (str[0] == '-')
3188  {
3189  sign = -1;
3190  str += 1, length -= 1;
3191  }
3192 
3193  if (length > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
3194  {
3195  radix = 16;
3196  str += 2, length -= 2;
3197  }
3198 
3199  if (order == BIG_ENDIAN_ORDER)
3200  {
3201  for (unsigned int i=0; i<length; i++)
3202  {
3203  int digit, ch = static_cast<int>(str[i]);
3204 
3205  // Profiling showd the second and third Else needed to be swapped
3206  // The code change occurred at Commit dc99266599a0e72d.
3207  if (ch >= '0' && ch <= '9')
3208  digit = ch - '0';
3209  else if (ch >= 'a' && ch <= 'f')
3210  digit = ch - 'a' + 10;
3211  else if (ch >= 'A' && ch <= 'F')
3212  digit = ch - 'A' + 10;
3213  else
3214  digit = radix;
3215 
3216  if (digit < radix)
3217  {
3218  v *= radix;
3219  v += digit;
3220  }
3221  }
3222  }
3223  else if (radix == 16 && order == LITTLE_ENDIAN_ORDER)
3224  {
3225  // Nibble high, low and count
3226  unsigned int nh = 0, nl = 0, nc = 0;
3227  Integer position(Integer::One());
3228 
3229  for (unsigned int i=0; i<length; i++)
3230  {
3231  int digit, ch = static_cast<int>(str[i]);
3232 
3233  if (ch >= '0' && ch <= '9')
3234  digit = ch - '0';
3235  else if (ch >= 'a' && ch <= 'f')
3236  digit = ch - 'a' + 10;
3237  else if (ch >= 'A' && ch <= 'F')
3238  digit = ch - 'A' + 10;
3239  else
3240  digit = radix;
3241 
3242  if (digit < radix)
3243  {
3244  if (nc++ == 0)
3245  nh = digit;
3246  else
3247  nl = digit;
3248 
3249  if (nc == 2)
3250  {
3251  v += position * (nh << 4 | nl);
3252  nc = 0, position <<= 8;
3253  }
3254  }
3255  }
3256 
3257  if (nc == 1)
3258  v += nh * position;
3259  }
3260  else // LITTLE_ENDIAN_ORDER && radix != 16
3261  {
3262  for (int i=static_cast<int>(length)-1; i>=0; i--)
3263  {
3264  int digit, ch = static_cast<int>(str[i]);
3265 
3266  if (ch >= '0' && ch <= '9')
3267  digit = ch - '0';
3268  else if (ch >= 'a' && ch <= 'f')
3269  digit = ch - 'a' + 10;
3270  else if (ch >= 'A' && ch <= 'F')
3271  digit = ch - 'A' + 10;
3272  else
3273  digit = radix;
3274 
3275  if (digit < radix)
3276  {
3277  v *= radix;
3278  v += digit;
3279  }
3280  }
3281  }
3282 
3283  if (sign == -1)
3284  v.Negate();
3285 
3286  return v;
3287 }
3288 
3289 Integer::Integer(const char *str, ByteOrder order)
3290  : reg(2), sign(POSITIVE)
3291 {
3292  *this = StringToInteger(str,order);
3293 }
3294 
3295 Integer::Integer(const wchar_t *str, ByteOrder order)
3296  : reg(2), sign(POSITIVE)
3297 {
3298  *this = StringToInteger(str,order);
3299 }
3300 
3301 unsigned int Integer::WordCount() const
3302 {
3303  return (unsigned int)CountWords(reg, reg.size());
3304 }
3305 
3306 unsigned int Integer::ByteCount() const
3307 {
3308  unsigned wordCount = WordCount();
3309  if (wordCount)
3310  return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3311  else
3312  return 0;
3313 }
3314 
3315 unsigned int Integer::BitCount() const
3316 {
3317  unsigned wordCount = WordCount();
3318  if (wordCount)
3319  return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3320  else
3321  return 0;
3322 }
3323 
3324 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3325 {
3326  CRYPTOPP_ASSERT(input && inputLen); // NULL buffer
3327  StringStore store(input, inputLen);
3328  Decode(store, inputLen, s);
3329 }
3330 
3332 {
3333  CRYPTOPP_ASSERT(bt.MaxRetrievable() >= inputLen);
3334  if (bt.MaxRetrievable() < inputLen)
3335  throw InvalidArgument("Integer: input length is too small");
3336 
3337  byte b;
3338  bt.Peek(b);
3339  sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3340 
3341  while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3342  {
3343  bt.Skip(1);
3344  inputLen--;
3345  bt.Peek(b);
3346  }
3347 
3348  reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
3349  for (size_t i=inputLen; i > 0; i--)
3350  {
3351  (void)bt.Get(b);
3352  reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3353  }
3354 
3355  if (sign == NEGATIVE)
3356  {
3357  for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3358  reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3359  TwosComplement(reg, reg.size());
3360  }
3361 }
3362 
3363 size_t Integer::MinEncodedSize(Signedness signedness) const
3364 {
3365  // Profiling tells us the original second If was dominant, so it was promoted to the first If statement.
3366  // The code change occurred at Commit dc99266599a0e72d.
3367  unsigned int outputLen = STDMAX(1U, ByteCount());
3368  const bool pre = (signedness == UNSIGNED);
3369  if (!pre && NotNegative() && (GetByte(outputLen-1) & 0x80))
3370  outputLen++;
3371  if (pre)
3372  return outputLen;
3373  if (IsNegative() && *this < -Power2(outputLen*8-1))
3374  outputLen++;
3375  return outputLen;
3376 }
3377 
3378 // PKCS12_PBKDF and other classes use undersized buffers
3379 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3380 {
3381  CRYPTOPP_ASSERT(output && outputLen); // NULL buffer
3382  ArraySink sink(output, outputLen);
3383  Encode(sink, outputLen, signedness);
3384 }
3385 
3386 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3387 {
3388  if (signedness == UNSIGNED || NotNegative())
3389  {
3390  for (size_t i=outputLen; i > 0; i--)
3391  bt.Put(GetByte(i-1));
3392  }
3393  else
3394  {
3395  // take two's complement of *this
3396  Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3397  temp.Encode(bt, outputLen, UNSIGNED);
3398  }
3399 }
3400 
3402 {
3403  DERGeneralEncoder enc(bt, INTEGER);
3405  enc.MessageEnd();
3406 }
3407 
3408 void Integer::BERDecode(const byte *input, size_t len)
3409 {
3410  CRYPTOPP_ASSERT(input && len); // NULL buffer
3411  StringStore store(input, len);
3412  BERDecode(store);
3413 }
3414 
3416 {
3417  BERGeneralDecoder dec(bt, INTEGER);
3418  if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3419  BERDecodeError();
3420  Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3421  dec.MessageEnd();
3422 }
3423 
3425 {
3426  DERGeneralEncoder enc(bt, OCTET_STRING);
3427  Encode(enc, length);
3428  enc.MessageEnd();
3429 }
3430 
3432 {
3433  BERGeneralDecoder dec(bt, OCTET_STRING);
3434  if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3435  BERDecodeError();
3436  Decode(dec, length);
3437  dec.MessageEnd();
3438 }
3439 
3440 size_t Integer::OpenPGPEncode(byte *output, size_t bufferSize) const
3441 {
3442  CRYPTOPP_ASSERT(output && bufferSize); // NULL buffer
3443  CRYPTOPP_ASSERT(bufferSize >= MinEncodedSize()); // Undersized buffer
3444  ArraySink sink(output, bufferSize);
3445  return OpenPGPEncode(sink);
3446 }
3447 
3449 {
3450  word16 bitCount = word16(BitCount());
3451  bt.PutWord16(bitCount);
3452  size_t byteCount = BitsToBytes(bitCount);
3453  Encode(bt, byteCount);
3454  return 2 + byteCount;
3455 }
3456 
3457 void Integer::OpenPGPDecode(const byte *input, size_t len)
3458 {
3459  CRYPTOPP_ASSERT(input && len); // NULL buffer
3460  StringStore store(input, len);
3461  OpenPGPDecode(store);
3462 }
3463 
3465 {
3466  word16 bitCount;
3467  if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3468  throw OpenPGPDecodeErr();
3469  Decode(bt, BitsToBytes(bitCount));
3470 }
3471 
3473 {
3474  const size_t nbytes = nbits/8 + 1;
3475  SecByteBlock buf(nbytes);
3476  rng.GenerateBlock(buf, nbytes);
3477  if (nbytes)
3478  buf[0] = (byte)Crop(buf[0], nbits % 8);
3479  Decode(buf, nbytes, UNSIGNED);
3480 }
3481 
3482 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3483 {
3484  if (min > max)
3485  throw InvalidArgument("Integer: Min must be no greater than Max");
3486 
3487  Integer range = max - min;
3488  const unsigned int nbits = range.BitCount();
3489 
3490  do
3491  {
3492  Randomize(rng, nbits);
3493  }
3494  while (*this > range);
3495 
3496  *this += min;
3497 }
3498 
3499 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3500 {
3501  return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3502 }
3503 
3505 {
3506 public:
3507  KDF2_RNG(const byte *seed, size_t seedSize)
3508  : m_counter(0), m_counterAndSeed(seedSize + 4)
3509  {
3510  memcpy(m_counterAndSeed + 4, seed, seedSize);
3511  }
3512 
3513  void GenerateBlock(byte *output, size_t size)
3514  {
3515  CRYPTOPP_ASSERT(output && size); // NULL buffer
3516  PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3517  ++m_counter;
3518  P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULLPTR, 0);
3519  }
3520 
3521 private:
3522  word32 m_counter;
3523  SecByteBlock m_counterAndSeed;
3524 };
3525 
3526 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
3527 {
3528  Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3529  Integer max;
3530  if (!params.GetValue("Max", max))
3531  {
3532  int bitLength;
3533  if (params.GetIntValue("BitLength", bitLength))
3534  max = Integer::Power2(bitLength);
3535  else
3536  throw InvalidArgument("Integer: missing Max argument");
3537  }
3538  if (min > max)
3539  throw InvalidArgument("Integer: Min must be no greater than Max");
3540 
3541  Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3542  Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3543 
3544  if (equiv.IsNegative() || equiv >= mod)
3545  throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3546 
3547  Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3548 
3549  member_ptr<KDF2_RNG> kdf2Rng;
3551  if (params.GetValue(Name::Seed(), seed))
3552  {
3553  ByteQueue bq;
3554  DERSequenceEncoder seq(bq);
3555  min.DEREncode(seq);
3556  max.DEREncode(seq);
3557  equiv.DEREncode(seq);
3558  mod.DEREncode(seq);
3559  DEREncodeUnsigned(seq, rnType);
3560  DEREncodeOctetString(seq, seed.begin(), seed.size());
3561  seq.MessageEnd();
3562 
3563  SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3564  bq.Get(finalSeed, finalSeed.size());
3565  kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3566  }
3567  RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3568 
3569  switch (rnType)
3570  {
3571  case ANY:
3572  if (mod == One())
3573  Randomize(rng, min, max);
3574  else
3575  {
3576  Integer min1 = min + (equiv-min)%mod;
3577  if (max < min1)
3578  return false;
3579  Randomize(rng, Zero(), (max - min1) / mod);
3580  *this *= mod;
3581  *this += min1;
3582  }
3583  return true;
3584 
3585  case PRIME:
3586  {
3587  const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULLPTR);
3588 
3589  int i;
3590  i = 0;
3591  while (1)
3592  {
3593  if (++i==16)
3594  {
3595  // check if there are any suitable primes in [min, max]
3596  Integer first = min;
3597  if (FirstPrime(first, max, equiv, mod, pSelector))
3598  {
3599  // if there is only one suitable prime, we're done
3600  *this = first;
3601  if (!FirstPrime(first, max, equiv, mod, pSelector))
3602  return true;
3603  }
3604  else
3605  return false;
3606  }
3607 
3608  Randomize(rng, min, max);
3609  if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3610  return true;
3611  }
3612  }
3613 
3614  default:
3615  throw InvalidArgument("Integer: invalid RandomNumberType argument");
3616  }
3617 }
3618 
3619 std::istream& operator>>(std::istream& in, Integer &a)
3620 {
3621  char c;
3622  unsigned int length = 0;
3623  SecBlock<char> str(length + 16);
3624 
3625  std::ws(in);
3626 
3627  do
3628  {
3629  in.read(&c, 1);
3630  str[length++] = c;
3631  if (length >= str.size())
3632  str.Grow(length + 16);
3633  }
3634  while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3635 
3636  if (in.gcount())
3637  in.putback(c);
3638  str[length-1] = '\0';
3639  a = Integer(str);
3640 
3641  return in;
3642 }
3643 
3644 std::ostream& operator<<(std::ostream& out, const Integer &a)
3645 {
3646  // Get relevant conversion specifications from ostream.
3647  const long f = out.flags() & std::ios::basefield; // Get base digits.
3648  int base, block;
3649  char suffix;
3650  switch(f)
3651  {
3652  case std::ios::oct :
3653  base = 8;
3654  block = 8;
3655  suffix = 'o';
3656  break;
3657  case std::ios::hex :
3658  base = 16;
3659  block = 4;
3660  suffix = 'h';
3661  break;
3662  default :
3663  base = 10;
3664  block = 3;
3665  suffix = '.';
3666  }
3667 
3668  Integer temp1=a, temp2;
3669 
3670  if (a.IsNegative())
3671  {
3672  out << '-';
3673  temp1.Negate();
3674  }
3675 
3676  if (!a)
3677  out << '0';
3678 
3679  static const char upper[]="0123456789ABCDEF";
3680  static const char lower[]="0123456789abcdef";
3681 
3682  const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3683  unsigned int i=0;
3684  SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3685 
3686  while (!!temp1)
3687  {
3688  word digit;
3689  Integer::Divide(digit, temp2, temp1, base);
3690  s[i++]=vec[digit];
3691  temp1.swap(temp2);
3692  }
3693 
3694  while (i--)
3695  {
3696  out << s[i];
3697 // if (i && !(i%block))
3698 // out << ",";
3699  }
3700 
3701 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3702  if (out.flags() & std::ios_base::showbase)
3703  out << suffix;
3704 
3705  return out;
3706 #else
3707  return out << suffix;
3708 #endif
3709 }
3710 
3712 {
3713  if (NotNegative())
3714  {
3715  if (Increment(reg, reg.size()))
3716  {
3717  reg.CleanGrow(2*reg.size());
3718  reg[reg.size()/2]=1;
3719  }
3720  }
3721  else
3722  {
3723  word borrow = Decrement(reg, reg.size());
3724  CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow);
3725 
3726  if (WordCount()==0)
3727  *this = Zero();
3728  }
3729  return *this;
3730 }
3731 
3733 {
3734  if (IsNegative())
3735  {
3736  if (Increment(reg, reg.size()))
3737  {
3738  reg.CleanGrow(2*reg.size());
3739  reg[reg.size()/2]=1;
3740  }
3741  }
3742  else
3743  {
3744  if (Decrement(reg, reg.size()))
3745  *this = -One();
3746  }
3747  return *this;
3748 }
3749 
3750 // This is a bit operation. We set sign to POSITIVE, so there's no need to
3751 // worry about negative zero. Also see http://stackoverflow.com/q/11644362.
3753 {
3754  if (this == &t)
3755  {
3756  return AbsoluteValue();
3757  }
3758  else if (reg.size() >= t.reg.size())
3759  {
3760  Integer result(t);
3761  AndWords(result.reg, reg, t.reg.size());
3762 
3763  result.sign = POSITIVE;
3764  return result;
3765  }
3766  else // reg.size() < t.reg.size()
3767  {
3768  Integer result(*this);
3769  AndWords(result.reg, t.reg, reg.size());
3770 
3771  result.sign = POSITIVE;
3772  return result;
3773  }
3774 }
3775 
3776 // This is a bit operation. We set sign to POSITIVE, so there's no need to
3777 // worry about negative zero. Also see http://stackoverflow.com/q/11644362.
3778 Integer Integer::Or(const Integer& t) const
3779 {
3780  if (this == &t)
3781  {
3782  return AbsoluteValue();
3783  }
3784  else if (reg.size() >= t.reg.size())
3785  {
3786  Integer result(*this);
3787  OrWords(result.reg, t.reg, t.reg.size());
3788 
3789  result.sign = POSITIVE;
3790  return result;
3791  }
3792  else // reg.size() < t.reg.size()
3793  {
3794  Integer result(t);
3795  OrWords(result.reg, reg, reg.size());
3796 
3797  result.sign = POSITIVE;
3798  return result;
3799  }
3800 }
3801 
3802 // This is a bit operation. We set sign to POSITIVE, so there's no need to
3803 // worry about negative zero. Also see http://stackoverflow.com/q/11644362.
3805 {
3806  if (this == &t)
3807  {
3808  return Integer::Zero();
3809  }
3810  else if (reg.size() >= t.reg.size())
3811  {
3812  Integer result(*this);
3813  XorWords(result.reg, t.reg, t.reg.size());
3814 
3815  result.sign = POSITIVE;
3816  return result;
3817  }
3818  else // reg.size() < t.reg.size()
3819  {
3820  Integer result(t);
3821  XorWords(result.reg, reg, reg.size());
3822 
3823  result.sign = POSITIVE;
3824  return result;
3825  }
3826 }
3827 
3828 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3829 {
3830  // Profiling tells us the original second Else If was dominant, so it was promoted to the first If statement.
3831  // The code change occurred at Commit dc99266599a0e72d.
3832  int carry; const bool pre = (a.reg.size() == b.reg.size());
3833  if (!pre && a.reg.size() > b.reg.size())
3834  {
3835  carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3836  CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3837  carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3838  }
3839  else if (pre)
3840  {
3841  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3842  }
3843  else
3844  {
3845  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3846  CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3847  carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3848  }
3849 
3850  if (carry)
3851  {
3852  sum.reg.CleanGrow(2*sum.reg.size());
3853  sum.reg[sum.reg.size()/2] = 1;
3854  }
3855  sum.sign = Integer::POSITIVE;
3856 }
3857 
3858 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3859 {
3860  unsigned aSize = a.WordCount();
3861  aSize += aSize%2;
3862  unsigned bSize = b.WordCount();
3863  bSize += bSize%2;
3864 
3865  // Profiling tells us the original second Else If was dominant, so it was promoted to the first If statement.
3866  // The code change occurred at Commit dc99266599a0e72d.
3867  if (aSize > bSize)
3868  {
3869  word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3870  CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3871  borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3872  CRYPTOPP_ASSERT(!borrow);
3873  diff.sign = Integer::POSITIVE;
3874  }
3875  else if (aSize == bSize)
3876  {
3877  if (Compare(a.reg, b.reg, aSize) >= 0)
3878  {
3879  Subtract(diff.reg, a.reg, b.reg, aSize);
3880  diff.sign = Integer::POSITIVE;
3881  }
3882  else
3883  {
3884  Subtract(diff.reg, b.reg, a.reg, aSize);
3885  diff.sign = Integer::NEGATIVE;
3886  }
3887  }
3888  else
3889  {
3890  word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3891  CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3892  borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3893  CRYPTOPP_ASSERT(!borrow);
3894  diff.sign = Integer::NEGATIVE;
3895  }
3896 }
3897 
3898 // MSVC .NET 2003 workaround
3899 template <class T> inline const T& STDMAX2(const T& a, const T& b)
3900 {
3901  return a < b ? b : a;
3902 }
3903 
3905 {
3906  Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3907  if (NotNegative())
3908  {
3909  if (b.NotNegative())
3910  PositiveAdd(sum, *this, b);
3911  else
3912  PositiveSubtract(sum, *this, b);
3913  }
3914  else
3915  {
3916  if (b.NotNegative())
3917  PositiveSubtract(sum, b, *this);
3918  else
3919  {
3920  PositiveAdd(sum, *this, b);
3921  sum.sign = Integer::NEGATIVE;
3922  }
3923  }
3924  return sum;
3925 }
3926 
3928 {
3929  reg.CleanGrow(t.reg.size());
3930  if (NotNegative())
3931  {
3932  if (t.NotNegative())
3933  PositiveAdd(*this, *this, t);
3934  else
3935  PositiveSubtract(*this, *this, t);
3936  }
3937  else
3938  {
3939  if (t.NotNegative())
3940  PositiveSubtract(*this, t, *this);
3941  else
3942  {
3943  PositiveAdd(*this, *this, t);
3944  sign = Integer::NEGATIVE;
3945  }
3946  }
3947  return *this;
3948 }
3949 
3951 {
3952  Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
3953  if (NotNegative())
3954  {
3955  if (b.NotNegative())
3956  PositiveSubtract(diff, *this, b);
3957  else
3958  PositiveAdd(diff, *this, b);
3959  }
3960  else
3961  {
3962  if (b.NotNegative())
3963  {
3964  PositiveAdd(diff, *this, b);
3965  diff.sign = Integer::NEGATIVE;
3966  }
3967  else
3968  PositiveSubtract(diff, b, *this);
3969  }
3970  return diff;
3971 }
3972 
3974 {
3975  reg.CleanGrow(t.reg.size());
3976  if (NotNegative())
3977  {
3978  if (t.NotNegative())
3979  PositiveSubtract(*this, *this, t);
3980  else
3981  PositiveAdd(*this, *this, t);
3982  }
3983  else
3984  {
3985  if (t.NotNegative())
3986  {
3987  PositiveAdd(*this, *this, t);
3988  sign = Integer::NEGATIVE;
3989  }
3990  else
3991  PositiveSubtract(*this, t, *this);
3992  }
3993  return *this;
3994 }
3995 
3997 {
3998  const size_t wordCount = WordCount();
3999  const size_t shiftWords = n / WORD_BITS;
4000  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
4001 
4002  reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
4003  ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
4004  ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
4005  return *this;
4006 }
4007 
4009 {
4010  const size_t wordCount = WordCount();
4011  const size_t shiftWords = n / WORD_BITS;
4012  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
4013 
4014  ShiftWordsRightByWords(reg, wordCount, shiftWords);
4015  if (wordCount > shiftWords)
4016  ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
4017  if (IsNegative() && WordCount()==0) // avoid -0
4018  *this = Zero();
4019  return *this;
4020 }
4021 
4023 {
4024  if (this != &t)
4025  {
4026  const size_t size = STDMIN(reg.size(), t.reg.size());
4027  reg.resize(size);
4028  AndWords(reg, t.reg, size);
4029  }
4030  sign = POSITIVE;
4031  return *this;
4032 }
4033 
4035 {
4036  if (this != &t)
4037  {
4038  if (reg.size() >= t.reg.size())
4039  {
4040  OrWords(reg, t.reg, t.reg.size());
4041  }
4042  else // reg.size() < t.reg.size()
4043  {
4044  const size_t head = reg.size();
4045  const size_t tail = t.reg.size() - reg.size();
4046  reg.resize(head+tail);
4047  OrWords(reg, t.reg, head);
4048  CopyWords(reg+head,t.reg+head,tail);
4049  }
4050  }
4051  sign = POSITIVE;
4052  return *this;
4053 }
4054 
4056 {
4057  if (this == &t)
4058  {
4059  *this = Zero();
4060  }
4061  else
4062  {
4063  if (reg.size() >= t.reg.size())
4064  {
4065  XorWords(reg, t.reg, t.reg.size());
4066  }
4067  else // reg.size() < t.reg.size()
4068  {
4069  const size_t head = reg.size();
4070  const size_t tail = t.reg.size() - reg.size();
4071  reg.resize(head+tail);
4072  XorWords(reg, t.reg, head);
4073  CopyWords(reg+head,t.reg+head,tail);
4074  }
4075  }
4076  sign = POSITIVE;
4077  return *this;
4078 }
4079 
4080 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
4081 {
4082  size_t aSize = RoundupSize(a.WordCount());
4083  size_t bSize = RoundupSize(b.WordCount());
4084 
4085  product.reg.CleanNew(RoundupSize(aSize+bSize));
4086  product.sign = Integer::POSITIVE;
4087 
4088  IntegerSecBlock workspace(aSize + bSize);
4089  AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
4090 }
4091 
4092 void Multiply(Integer &product, const Integer &a, const Integer &b)
4093 {
4094  PositiveMultiply(product, a, b);
4095 
4096  if (a.NotNegative() != b.NotNegative())
4097  product.Negate();
4098 }
4099 
4101 {
4102  Integer product;
4103  Multiply(product, *this, b);
4104  return product;
4105 }
4106 
4107 /*
4108 void PositiveDivide(Integer &remainder, Integer &quotient,
4109  const Integer &dividend, const Integer &divisor)
4110 {
4111  remainder.reg.CleanNew(divisor.reg.size());
4112  remainder.sign = Integer::POSITIVE;
4113  quotient.reg.New(0);
4114  quotient.sign = Integer::POSITIVE;
4115  unsigned i=dividend.BitCount();
4116  while (i--)
4117  {
4118  word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
4119  remainder.reg[0] |= dividend[i];
4120  if (overflow || remainder >= divisor)
4121  {
4122  Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
4123  quotient.SetBit(i);
4124  }
4125  }
4126 }
4127 */
4128 
4129 void PositiveDivide(Integer &remainder, Integer &quotient,
4130  const Integer &a, const Integer &b)
4131 {
4132  unsigned aSize = a.WordCount();
4133  unsigned bSize = b.WordCount();
4134 
4135  if (!bSize)
4136  throw Integer::DivideByZero();
4137 
4138  if (aSize < bSize)
4139  {
4140  remainder = a;
4141  remainder.sign = Integer::POSITIVE;
4142  quotient = Integer::Zero();
4143  return;
4144  }
4145 
4146  aSize += aSize%2; // round up to next even number
4147  bSize += bSize%2;
4148 
4149  remainder.reg.CleanNew(RoundupSize(bSize));
4150  remainder.sign = Integer::POSITIVE;
4151  quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
4152  quotient.sign = Integer::POSITIVE;
4153 
4154  IntegerSecBlock T(aSize+3*(bSize+2));
4155  Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
4156 }
4157 
4158 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
4159 {
4160  PositiveDivide(remainder, quotient, dividend, divisor);
4161 
4162  if (dividend.IsNegative())
4163  {
4164  quotient.Negate();
4165  if (remainder.NotZero())
4166  {
4167  --quotient;
4168  remainder = divisor.AbsoluteValue() - remainder;
4169  }
4170  }
4171 
4172  if (divisor.IsNegative())
4173  quotient.Negate();
4174 }
4175 
4176 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
4177 {
4178  q = a;
4179  q >>= n;
4180 
4181  const size_t wordCount = BitsToWords(n);
4182  if (wordCount <= a.WordCount())
4183  {
4184  r.reg.resize(RoundupSize(wordCount));
4185  CopyWords(r.reg, a.reg, wordCount);
4186  SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
4187  if (n % WORD_BITS != 0)
4188  r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4189  }
4190  else
4191  {
4192  r.reg.resize(RoundupSize(a.WordCount()));
4193  CopyWords(r.reg, a.reg, r.reg.size());
4194  }
4195  r.sign = POSITIVE;
4196 
4197  if (a.IsNegative() && r.NotZero())
4198  {
4199  --q;
4200  r = Power2(n) - r;
4201  }
4202 }
4203 
4205 {
4206  Integer remainder, quotient;
4207  Integer::Divide(remainder, quotient, *this, b);
4208  return quotient;
4209 }
4210 
4212 {
4213  Integer remainder, quotient;
4214  Integer::Divide(remainder, quotient, *this, b);
4215  return remainder;
4216 }
4217 
4218 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
4219 {
4220  if (!divisor)
4221  throw Integer::DivideByZero();
4222 
4223  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4224  {
4225  quotient = dividend >> (BitPrecision(divisor)-1);
4226  remainder = dividend.reg[0] & (divisor-1);
4227  return;
4228  }
4229 
4230  unsigned int i = dividend.WordCount();
4231  quotient.reg.CleanNew(RoundupSize(i));
4232  remainder = 0;
4233  while (i--)
4234  {
4235  quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
4236  remainder = DWord(dividend.reg[i], remainder) % divisor;
4237  }
4238 
4239  if (dividend.NotNegative())
4240  quotient.sign = POSITIVE;
4241  else
4242  {
4243  quotient.sign = NEGATIVE;
4244  if (remainder)
4245  {
4246  --quotient;
4247  remainder = divisor - remainder;
4248  }
4249  }
4250 }
4251 
4253 {
4254  word remainder;
4255  Integer quotient;
4256  Integer::Divide(remainder, quotient, *this, b);
4257  return quotient;
4258 }
4259 
4260 word Integer::Modulo(word divisor) const
4261 {
4262  if (!divisor)
4263  throw Integer::DivideByZero();
4264 
4265  word remainder;
4266 
4267  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4268  // The code change occurred at Commit dc99266599a0e72d.
4269  if ((divisor & (divisor-1)) != 0) // divisor is not a power of 2
4270  {
4271  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4272  // The code change occurred at Commit dc99266599a0e72d.
4273  unsigned int i = WordCount();
4274  if (divisor > 5)
4275  {
4276  remainder = 0;
4277  while (i--)
4278  remainder = DWord(reg[i], remainder) % divisor;
4279  }
4280  else
4281  {
4282  DWord sum(0, 0);
4283  while (i--)
4284  sum += reg[i];
4285  remainder = sum % divisor;
4286  }
4287  }
4288  else // divisor is a power of 2
4289  {
4290  remainder = reg[0] & (divisor-1);
4291  }
4292 
4293  if (IsNegative() && remainder)
4294  remainder = divisor - remainder;
4295 
4296  return remainder;
4297 }
4298 
4300 {
4301  if (!!(*this)) // don't flip sign if *this==0
4302  sign = Sign(1-sign);
4303 }
4304 
4305 int Integer::PositiveCompare(const Integer& t) const
4306 {
4307  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4308  // The code change occurred at Commit dc99266599a0e72d.
4309  const unsigned size = WordCount(), tSize = t.WordCount();
4310  if (size != tSize)
4311  return size > tSize ? 1 : -1;
4312  else
4313  return CryptoPP::Compare(reg, t.reg, size);
4314 }
4315 
4316 int Integer::Compare(const Integer& t) const
4317 {
4318  if (NotNegative())
4319  {
4320  if (t.NotNegative())
4321  return PositiveCompare(t);
4322  else
4323  return 1;
4324  }
4325  else
4326  {
4327  if (t.NotNegative())
4328  return -1;
4329  else
4330  return -PositiveCompare(t);
4331  }
4332 }
4333 
4335 {
4336  if (!IsPositive())
4337  return Zero();
4338 
4339  // overestimate square root
4340  Integer x, y = Power2((BitCount()+1)/2);
4341  CRYPTOPP_ASSERT(y*y >= *this);
4342 
4343  do
4344  {
4345  x = y;
4346  y = (x + *this/x) >> 1;
4347  } while (y<x);
4348 
4349  return x;
4350 }
4351 
4352 bool Integer::IsSquare() const
4353 {
4354  Integer r = SquareRoot();
4355  return *this == r.Squared();
4356 }
4357 
4358 bool Integer::IsUnit() const
4359 {
4360  return (WordCount() == 1) && (reg[0] == 1);
4361 }
4362 
4364 {
4365  return IsUnit() ? *this : Zero();
4366 }
4367 
4368 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4369 {
4370  return x*y%m;
4371 }
4372 
4373 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4374 {
4375  ModularArithmetic mr(m);
4376  return mr.Exponentiate(x, e);
4377 }
4378 
4380 {
4381  return EuclideanDomainOf<Integer>().Gcd(a, b);
4382 }
4383 
4385 {
4387 
4388  if (IsNegative())
4389  return Modulo(m).InverseMod(m);
4390 
4391  if (m.IsEven())
4392  {
4393  if (!m || IsEven())
4394  return Zero(); // no inverse
4395  if (*this == One())
4396  return One();
4397 
4398  Integer u = m.Modulo(*this).InverseMod(*this);
4399  return !u ? Zero() : (m*(*this-u)+1)/(*this);
4400  }
4401 
4402  SecBlock<word> T(m.reg.size() * 4);
4403  Integer r((word)0, m.reg.size());
4404  unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4405  DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4406  return r;
4407 }
4408 
4409 word Integer::InverseMod(word mod) const
4410 {
4411  word g0 = mod, g1 = *this % mod;
4412  word v0 = 0, v1 = 1;
4413  word y;
4414 
4415  while (g1)
4416  {
4417  if (g1 == 1)
4418  return v1;
4419  y = g0 / g1;
4420  g0 = g0 % g1;
4421  v0 += y * v1;
4422 
4423  if (!g0)
4424  break;
4425  if (g0 == 1)
4426  return mod-v0;
4427  y = g1 / g0;
4428  g1 = g1 % g0;
4429  v1 += y * v0;
4430  }
4431  return 0;
4432 }
4433 
4434 // ********************************************************
4435 
4437 {
4438  BERSequenceDecoder seq(bt);
4439  OID oid(seq);
4440  if (oid != ASN1::prime_field())
4441  BERDecodeError();
4442  m_modulus.BERDecode(seq);
4443  seq.MessageEnd();
4444  m_result.reg.resize(m_modulus.reg.size());
4445 }
4446 
4448 {
4449  DERSequenceEncoder seq(bt);
4450  ASN1::prime_field().DEREncode(seq);
4451  m_modulus.DEREncode(seq);
4452  seq.MessageEnd();
4453 }
4454 
4456 {
4457  a.DEREncodeAsOctetString(out, MaxElementByteLength());
4458 }
4459 
4461 {
4462  a.BERDecodeAsOctetString(in, MaxElementByteLength());
4463 }
4464 
4466 {
4467  if (a.reg.size()==m_modulus.reg.size())
4468  {
4469  CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4470  return m_result;
4471  }
4472  else
4473  return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4474 }
4475 
4476 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4477 {
4478  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4479  {
4480  if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4481  || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4482  {
4483  CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4484  }
4485  return m_result;
4486  }
4487  else
4488  {
4489  m_result1 = a+b;
4490  if (m_result1 >= m_modulus)
4491  m_result1 -= m_modulus;
4492  return m_result1;
4493  }
4494 }
4495 
4497 {
4498  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4499  {
4500  if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4501  || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4502  {
4503  CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4504  }
4505  }
4506  else
4507  {
4508  a+=b;
4509  if (a>=m_modulus)
4510  a-=m_modulus;
4511  }
4512 
4513  return a;
4514 }
4515 
4516 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4517 {
4518  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4519  {
4520  if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4521  CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4522  return m_result;
4523  }
4524  else
4525  {
4526  m_result1 = a-b;
4527  if (m_result1.IsNegative())
4528  m_result1 += m_modulus;
4529  return m_result1;
4530  }
4531 }
4532 
4534 {
4535  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4536  {
4537  if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4538  CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4539  }
4540  else
4541  {
4542  a-=b;
4543  if (a.IsNegative())
4544  a+=m_modulus;
4545  }
4546 
4547  return a;
4548 }
4549 
4551 {
4552  if (!a)
4553  return a;
4554 
4555  CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4556  if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4557  Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4558 
4559  return m_result;
4560 }
4561 
4562 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4563 {
4564  if (m_modulus.IsOdd())
4565  {
4566  MontgomeryRepresentation dr(m_modulus);
4567  return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4568  }
4569  else
4570  return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4571 }
4572 
4573 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4574 {
4575  if (m_modulus.IsOdd())
4576  {
4577  MontgomeryRepresentation dr(m_modulus);
4578  dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4579  for (unsigned int i=0; i<exponentsCount; i++)
4580  results[i] = dr.ConvertOut(results[i]);
4581  }
4582  else
4583  AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4584 }
4585 
4587  : ModularArithmetic(m),
4588  m_u((word)0, m_modulus.reg.size()),
4589  m_workspace(5*m_modulus.reg.size())
4590 {
4591  if (!m_modulus.IsOdd())
4592  throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4593 
4594  RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4595 }
4596 
4598 {
4599  word *const T = m_workspace.begin();
4600  word *const R = m_result.reg.begin();
4601  const size_t N = m_modulus.reg.size();
4602  CRYPTOPP_ASSERT(a.reg.size()<=N && b.reg.size()<=N);
4603 
4604  AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4605  SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4606  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4607  return m_result;
4608 }
4609 
4611 {
4612  word *const T = m_workspace.begin();
4613  word *const R = m_result.reg.begin();
4614  const size_t N = m_modulus.reg.size();
4615  CRYPTOPP_ASSERT(a.reg.size()<=N);
4616 
4617  CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4618  SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4619  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4620  return m_result;
4621 }
4622 
4624 {
4625  word *const T = m_workspace.begin();
4626  word *const R = m_result.reg.begin();
4627  const size_t N = m_modulus.reg.size();
4628  CRYPTOPP_ASSERT(a.reg.size()<=N);
4629 
4630  CopyWords(T, a.reg, a.reg.size());
4631  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4632  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4633  return m_result;
4634 }
4635 
4637 {
4638 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4639  word *const T = m_workspace.begin();
4640  word *const R = m_result.reg.begin();
4641  const size_t N = m_modulus.reg.size();
4642  CRYPTOPP_ASSERT(a.reg.size()<=N);
4643 
4644  CopyWords(T, a.reg, a.reg.size());
4645  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4646  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4647  unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4648 
4649 // cout << "k=" << k << " N*32=" << 32*N << endl;
4650 
4651  if (k>N*WORD_BITS)
4652  DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4653  else
4654  MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4655 
4656  return m_result;
4657 }
4658 
4659 // Specialization declared in misc.h to allow us to print integers
4660 // with additional control options, like arbirary bases and uppercase.
4661 template <> CRYPTOPP_DLL
4662 std::string IntToString<Integer>(Integer value, unsigned int base)
4663 {
4664  // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4665  static const unsigned int BIT_32 = (1U << 31);
4666  const bool UPPER = !!(base & BIT_32);
4667  static const unsigned int BIT_31 = (1U << 30);
4668  const bool BASE = !!(base & BIT_31);
4669 
4670  const char CH = UPPER ? 'A' : 'a';
4671  base &= ~(BIT_32|BIT_31);
4672  CRYPTOPP_ASSERT(base >= 2 && base <= 32);
4673 
4674  if (value == 0)
4675  return "0";
4676 
4677  bool negative = false, zero = false;
4678  if (value.IsNegative())
4679  {
4680  negative = true;
4681  value.Negate();
4682  }
4683 
4684  if (!value)
4685  zero = true;
4686 
4687  SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4688  Integer temp;
4689 
4690  unsigned int i=0;
4691  while (!!value)
4692  {
4693  word digit;
4694  Integer::Divide(digit, temp, value, word(base));
4695  s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4696  value.swap(temp);
4697  }
4698 
4699  std::string result;
4700  result.reserve(i+2);
4701 
4702  if (negative)
4703  result += '-';
4704 
4705  if (zero)
4706  result += '0';
4707 
4708  while (i--)
4709  result += s[i];
4710 
4711  if (BASE)
4712  {
4713  if (base == 10)
4714  result += '.';
4715  else if (base == 16)
4716  result += 'h';
4717  else if (base == 8)
4718  result += 'o';
4719  else if (base == 2)
4720  result += 'b';
4721  }
4722 
4723  return result;
4724 }
4725 
4726 // Specialization declared in misc.h to avoid Coverity findings.
4727 template <> CRYPTOPP_DLL
4728 std::string IntToString<word64>(word64 value, unsigned int base)
4729 {
4730  // Hack... set the high bit for uppercase.
4731  static const unsigned int HIGH_BIT = (1U << 31);
4732  const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4733  base &= ~HIGH_BIT;
4734 
4735  CRYPTOPP_ASSERT(base >= 2);
4736  if (value == 0)
4737  return "0";
4738 
4739  std::string result;
4740  while (value > 0)
4741  {
4742  word64 digit = value % base;
4743  result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4744  value /= base;
4745  }
4746  return result;
4747 }
4748 
4749 #ifndef CRYPTOPP_NO_ASSIGN_TO_INTEGER
4750 // Allow the linker to discard Integer code if not needed.
4751 // Also see http://github.com/weidai11/cryptopp/issues/389.
4752 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
4753 {
4754  if (valueType != typeid(Integer))
4755  return false;
4756  *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
4757  return true;
4758 }
4759 #endif // CRYPTOPP_NO_ASSIGN_TO_INTEGER
4760 
4761 // *************************** C++ Static Initialization ***************************
4762 
4764 {
4765 public:
4766  InitInteger()
4767  {
4768  SetFunctionPointers();
4769  }
4770 };
4771 
4772 // This is not really needed because each Integer can dynamically initialize itself,
4773 // but we take a peephole optimization and initialize the class once if init priorities are
4774 // available. Dynamic initialization will be used if init priorities are not available.
4775 
4776 #if HAVE_GCC_INIT_PRIORITY
4777  const InitInteger s_init __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 10))) = InitInteger();
4778 #elif defined(HAVE_MSC_INIT_PRIORITY)
4779  #pragma warning(disable: 4075)
4780  #pragma init_seg(".CRT$XCU")
4781  const InitInteger s_init;
4782 # if defined(USE_MSC_INIT_PRIORITY)
4783  const Integer g_zero(0L);
4784  const Integer g_one(1L);
4785  const Integer g_two(2L);
4786 # endif
4787  #pragma warning(default: 4075)
4788 #else
4789  const InitInteger s_init;
4790 #endif
4791 
4792 // ***************** Library code ********************
4793 
4795 {
4796 #if defined(CRYPTOPP_CXX11_DYNAMIC_INIT)
4797  static Integer s_zero(0L);
4798  return s_zero;
4799 #elif defined(HAVE_MSC_INIT_PRIORITY) && defined(USE_MSC_INIT_PRIORITY)
4800  return g_zero;
4801 #else
4802  return Singleton<Integer, NewInteger<0L> >().Ref();
4803 #endif
4804 }
4805 
4807 {
4808 #if defined(CRYPTOPP_CXX11_DYNAMIC_INIT)
4809  static Integer s_one(1L);
4810  return s_one;
4811 #elif defined(HAVE_MSC_INIT_PRIORITY) && defined(USE_MSC_INIT_PRIORITY)
4812  return g_one;
4813 #else
4814  return Singleton<Integer, NewInteger<1L> >().Ref();
4815 #endif
4816 }
4817 
4819 {
4820 #if defined(CRYPTOPP_CXX11_DYNAMIC_INIT)
4821  static Integer s_two(2L);
4822  return s_two;
4823 #elif defined(HAVE_MSC_INIT_PRIORITY) && defined(USE_MSC_INIT_PRIORITY)
4824  return g_two;
4825 #else
4826  return Singleton<Integer, NewInteger<2L> >().Ref();
4827 #endif
4828 }
4829 
4830 NAMESPACE_END
4831 
4832 #endif // CRYPTOPP_IMPORTS
Used to pass byte array input as part of a NameValuePairs object.
Definition: algparam.h:31
An invalid argument was detected.
Definition: cryptlib.h:201
unsigned int WordCount() const
Determines the number of words required to represent the Integer.
Definition: integer.cpp:3301
Integer MultiplicativeInverse() const
return inverse if 1 or -1, otherwise return 0
Definition: integer.cpp:4363
Integer & Reduce(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4533
bool NotZero() const
Determines if the Integer is non-0.
Definition: integer.h:333
Classes for working with NameValuePairs.
Integer Or(const Integer &t) const
Bitwise OR.
Definition: integer.cpp:3778
Integer & operator|=(const Integer &t)
Bitwise OR Assignment.
Definition: integer.cpp:4034
void swap(SecBlock< T, A > &b)
Swap contents with another SecBlock.
Definition: secblock.h:810
Integer And(const Integer &t) const
Bitwise AND.
Definition: integer.cpp:3752
a number which is probabilistically prime
Definition: integer.h:95
Utility functions for the Crypto++ library.
Integer Plus(const Integer &b) const
Addition.
Definition: integer.cpp:3904
virtual size_t Peek(byte &outByte) const
Peek a 8-bit byte.
Definition: cryptlib.cpp:511
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:141
Restricts the instantiation of a class to one static object without locks.
Definition: misc.h:294
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:749
an unsigned value
Definition: integer.h:85
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
Definition: cryptlib.h:365
bool IsSquare() const
Determine whether this integer is a perfect square.
Definition: integer.cpp:4352
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
Definition: integer.cpp:3401
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode unsigned value.
Definition: asn.h:455
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: cryptlib.cpp:309
size_t size() const
Length of the memory block.
Definition: algparam.h:95
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
Definition: misc.h:788
This file contains helper classes/functions for implementing public key algorithms.
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
Encode absolute value as big-endian octet string.
Definition: integer.cpp:3424
Integer & operator=(const Integer &t)
Assignment.
Definition: integer.cpp:3063
bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
Definition: nbtheory.cpp:379
static Integer Gcd(const Integer &a, const Integer &n)
greatest common divisor
Definition: integer.cpp:4379
void resize(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:800
Integer & operator+=(const Integer &t)
Addition Assignment.
Definition: integer.cpp:3927
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
Definition: misc.h:808
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
Definition: misc.h:2293
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
Definition: misc.h:669
void CleanGrow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:782
Integer & operator--()
Pre-decrement.
Definition: integer.cpp:3732
virtual Element CascadeExponentiate(const Element &x, const Integer &e1, const Element &y, const Integer &e2) const
TODO.
Definition: algebra.cpp:323
Secure memory block with allocator and cleanup.
Definition: secblock.h:459
const Integer & Inverse(const Integer &a) const
Inverts the element in the ring.
Definition: integer.cpp:4550
signed long ConvertToLong() const
Convert the Integer to Long.
Definition: integer.cpp:2984
const Integer & Subtract(const Integer &a, const Integer &b) const
Subtracts elements in the ring.
Definition: integer.cpp:4516
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
Definition: integer.cpp:3457
Signedness
Used when importing and exporting integers.
Definition: integer.h:83
ASN.1 object identifiers for algorthms and schemes.
Square block cipher.
Definition: square.h:24
Classes for automatic resource management.
bool IsP4()
Determines if the CPU is an Intel P4.
Definition: cpu.h:206
bool IsNegative() const
Determines if the Integer is negative.
Definition: integer.h:336
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: queue.h:34
Library configuration file.
MontgomeryRepresentation(const Integer &modulus)
Construct a MontgomeryRepresentation.
Definition: integer.cpp:4586
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
returns same result as Divide(r, q, a, Power2(n)), but faster
Definition: integer.cpp:4176
Ring of congruence classes modulo n.
Definition: modarith.h:39
Interface for random number generators.
Definition: cryptlib.h:1339
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
Definition: misc.h:798
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
Definition: integer.cpp:3472
std::string IntToString< word64 >(word64 value, unsigned int base)
Converts an unsigned value to a string.
Definition: integer.cpp:4728
void New(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:734
void SetByte(size_t n, byte value)
Set the n-th byte to value.
Definition: integer.cpp:3109
Integer InverseMod(const Integer &n) const
calculate multiplicative inverse of *this mod n
Definition: integer.cpp:4384
the value is negative
Definition: integer.h:77
SecBlock<byte> typedef.
Definition: secblock.h:828
BER Sequence Decoder.
Definition: asn.h:303
Interface for buffered transformations.
Definition: cryptlib.h:1486
bool IsPositive() const
Determines if the Integer is positive.
Definition: integer.h:342
bool NotNegative() const
Determines if the Integer is non-negative.
Definition: integer.h:339
const Integer & Add(const Integer &a, const Integer &b) const
Adds elements in the ring.
Definition: integer.cpp:4476
static const Integer & One()
Integer representing 1.
Definition: integer.cpp:4806
Integer & Accumulate(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4496
byte order is little-endian
Definition: cryptlib.h:143
Sign
Used internally to represent the integer.
Definition: integer.h:73
Integer ConvertIn(const Integer &a) const
Reduces an element in the congruence class.
Definition: modarith.h:294
bool operator!() const
Negation.
Definition: integer.cpp:3058
Pointer that overloads operator ->
Definition: smartptr.h:39
bool IsUnit() const
is 1 or -1
Definition: integer.cpp:4358
unsigned int ByteCount() const
Determines the number of bytes required to represent the Integer.
Definition: integer.cpp:3306
Classes and functions for secure memory allocations.
void DEREncodeElement(BufferedTransformation &out, const Element &a) const
Encodes element in DER format.
Definition: integer.cpp:4455
Integer Modulo(const Integer &b) const
Remainder.
Definition: integer.cpp:4211
Copy input to a memory buffer.
Definition: filters.h:1157
inline ::Integer operator-(const ::Integer &a, const ::Integer &b)
Subtraction.
Definition: integer.h:668
void BERDecodeElement(BufferedTransformation &in, Element &a) const
Decodes element in DER format.
Definition: integer.cpp:4460
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
Minimum number of bytes to encode this integer.
Definition: integer.cpp:3363
Integer DividedBy(const Integer &b) const
Division.
Definition: integer.cpp:4204
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: integer.cpp:4562
Integer Times(const Integer &b) const
Multiplication.
Definition: integer.cpp:4100
a number with no special properties
Definition: integer.h:93
const byte * begin() const
Pointer to the first byte in the memory block.
Definition: algparam.h:91
size_t Put(byte inByte, bool blocking=true)
Input a byte for processing.
Definition: cryptlib.h:1508
Integer & operator++()
Pre-increment.
Definition: integer.cpp:3711
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
Definition: algparam.h:516
void swap(Integer &a)
Swaps this Integer with another Integer.
Definition: integer.cpp:3139
Integer()
Creates the zero integer.
Definition: integer.cpp:2930
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
Definition: misc.h:717
Integer & operator<<=(size_t n)
Left-shift Assignment.
Definition: integer.cpp:3996
Exception thrown when an error is encountered decoding an OpenPGP integer.
Definition: integer.h:282
void Negate()
Reverse the Sign of the Integer.
Definition: integer.cpp:4299
int Compare(const Integer &a) const
Perform signed comparison.
Definition: integer.cpp:4316
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
Definition: misc.h:776
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
Definition: integer.cpp:3431
Application callback to signal suitability of a cabdidate prime.
Definition: nbtheory.h:80
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branchless swap of pointers a and b if condition c is true.
Definition: misc.h:1149
static Integer Power2(size_t e)
Exponentiates to a power of 2.
Definition: integer.cpp:3051
Multiple precision integer with arithmetic operations.
Definition: integer.h:49
virtual Element Exponentiate(const Element &a, const Integer &e) const
Raises a base to an exponent in the group.
Definition: algebra.cpp:316
OID operator+(const OID &lhs, unsigned long rhs)
Append a value to an OID.
const Integer & Multiply(const Integer &a, const Integer &b) const
Multiplies elements in the ring.
Definition: integer.cpp:4597
a signed value
Definition: integer.h:87
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
Definition: integer.cpp:3440
const Integer & Half(const Integer &a) const
Divides an element by 2.
Definition: integer.cpp:4465
static const Integer & Two()
Integer representing 2.
Definition: integer.cpp:4818
virtual lword Skip(lword skipMax=LWORD_MAX)
Discard skipMax bytes from the output buffer.
Definition: cryptlib.cpp:530
const Integer & Square(const Integer &a) const
Square an element in the ring.
Definition: integer.cpp:4610
Integer & operator^=(const Integer &t)
Bitwise XOR Assignment.
Definition: integer.cpp:4055
#define MEMORY_BARRIER
A memory barrier.
Definition: misc.h:258
RandomNumberType
Properties of a random integer.
Definition: integer.h:91
const char * Seed()
ConstByteArrayParameter.
Definition: argnames.h:19
bool IsEven() const
Determines if the Integer is even parity.
Definition: integer.h:348
byte order is big-endian
Definition: cryptlib.h:145
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
Definition: misc.h:506
String-based implementation of Store interface.
Definition: filters.h:1218
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:60
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
calculate r and q such that (a == d*q + r) && (0 <= r < abs(d))
Definition: integer.cpp:4158
ModularArithmetic(const Integer &modulus=Integer::One())
Construct a ModularArithmetic.
Definition: modarith.h:50
void BERDecodeError()
Raises a BERDecodeErr.
Definition: asn.h:69
Data structure used to store byte strings.
Definition: queue.h:19
Functions for CPU features and intrinsics.
Classes and functions for working with ANS.1 objects.
Classes for SHA-1 and SHA-2 family of message digests.
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
Definition: integer.cpp:3085
size_t GetWord16(word16 &value, ByteOrder order=BIG_ENDIAN_ORDER)
Retrieve a 16-bit word.
Definition: cryptlib.cpp:739
iterator begin()
Provides an iterator pointing to the first element in the memory block.
Definition: secblock.h:541
unsigned int BitCount() const
Determines the number of bits required to represent the Integer.
Definition: integer.cpp:3315
const char * PointerToPrimeSelector()
const PrimeSelector *
Definition: argnames.h:42
Implementation of BufferedTransformation&#39;s attachment interface.
Classes and functions for number theoretic operations.
byte GetByte(size_t i) const
Provides the i-th byte of the Integer.
Definition: integer.cpp:3099
Exception thrown when division by 0 is encountered.
Definition: integer.h:55
DER Sequence Encoder.
Definition: asn.h:313
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
Definition: integer.cpp:3379
Integer Squared() const
Multiply this integer by itself.
Definition: integer.h:576
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
Definition: misc.h:905
virtual lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: cryptlib.cpp:473
Exception thrown when a random number cannot be found that satisfies the condition.
Definition: integer.h:63
Performs modular arithmetic in Montgomery representation for increased speed.
Definition: modarith.h:276
Integer operator<<(size_t n) const
Left-shift.
Definition: integer.h:565
DER General Encoder.
Definition: asn.h:284
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:114
Integer Minus(const Integer &b) const
Subtraction.
Definition: integer.cpp:3950
void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: integer.cpp:3513
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
Definition: integer.cpp:3324
size_t PutWord16(word16 value, ByteOrder order=BIG_ENDIAN_ORDER, bool blocking=true)
Input a 16-bit word for processing.
Definition: cryptlib.cpp:691
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: modarith.h:308
size_t DEREncodeOctetString(BufferedTransformation &bt, const byte *str, size_t strLen)
DER encode octet string.
Definition: asn.cpp:104
Integer operator>>(size_t n) const
Right-shift.
Definition: integer.h:563
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: integer.cpp:4573
Multiple precision integer with arithmetic operations.
Integer Xor(const Integer &t) const
Bitwise XOR.
Definition: integer.cpp:3804
static const Integer & Zero()
Integer representing 0.
Definition: integer.cpp:4794
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
Definition: misc.h:517
Euclidean domain.
Definition: algebra.h:315
void Grow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:764
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
Definition: integer.cpp:3408
std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
Definition: integer.cpp:4662
virtual size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: cryptlib.cpp:492
Class file for performing modular arithmetic.
Crypto++ library namespace.
bool GetValue(const char *name, T &value) const
Get a named value.
Definition: cryptlib.h:352
Integer & operator>>=(size_t n)
Right-shift Assignment.
Definition: integer.cpp:4008
bool GetBit(size_t i) const
Provides the i-th bit of the Integer.
Definition: integer.cpp:3075
Integer SquareRoot() const
Extract square root.
Definition: integer.cpp:4334
BER General Decoder.
Definition: asn.h:246
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:388
bool IsConvertableToLong() const
Determines if the Integer is convertable to Long.
Definition: integer.cpp:2970
Object Identifier.
Definition: asn.h:166
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: queue.cpp:300
Integer AbsoluteValue() const
Retrieve the absolute value of this integer.
Definition: integer.cpp:3132
Integer & operator-=(const Integer &t)
Subtraction Assignment.
Definition: integer.cpp:3973
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
Definition: misc.h:692
size_type size() const
Provides the count of elements in the SecBlock.
Definition: secblock.h:566
Integer & operator &=(const Integer &t)
Bitwise AND Assignment.
void DEREncode(BufferedTransformation &bt) const
Encodes in DER format.
Definition: integer.cpp:4447
lword GetBits(size_t i, size_t n) const
Provides the low order bits of the Integer.
Definition: integer.cpp:3116
Integer operator-() const
Subtraction.
Definition: integer.cpp:3125
the value is positive or 0
Definition: integer.h:75
const Integer & MultiplicativeInverse(const Integer &a) const
Calculate the multiplicative inverse of an element in the ring.
Definition: integer.cpp:4636
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: modarith.h:311
virtual void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the Ring.
Definition: algebra.cpp:334
Interface for retrieving values given their names.
Definition: cryptlib.h:294
Integer ConvertOut(const Integer &a) const
Reduces an element in the congruence class.
Definition: integer.cpp:4623