Crypto++  5.6.5
Free C++ class library of cryptographic schemes
integer.cpp
1 // integer.cpp - originally written and placed in the public domain by Wei Dai
2 // contains public domain code contributed by Alister Lee and Leonard Janke
3 
4 // Notes by JW: The Integer class needs to do two things. First, it needs to set function
5 // pointers on some platforms, like X86 and X64. The function pointers select a fast multiply
6 // and addition based on the cpu. Second, it wants to create Integer::Zero(), Integer::One()
7 // and Integer::Two(). The function pointers are initialized in the class InitializeInteger.
8 // Wei's original code was much simpler. It uses the Singleton pattern, but it always produced
9 // memory findings. The Singleton generates memory findings because it used for a Create on
10 // First Use pattern. Resource destruction effectivley requires running resource acquisition
11 // with dependencies in reverse. For resources provided through the Singletons, there is no way
12 // to express the dependency order to safely destroy resources.
13 // The difference in the changes below is we use platform and language specific remediations
14 // if they are available. If not available, then we fall back to Wei's original code. If
15 // NO_OS_DEPENDENCE is defined, then the library uses Wei's original code.
16 // Under all versions of C++ on Linux and Microsoft platforms, we can use GCC's init_priority
17 // or MSVC's init_seg(lib) to initialize the function pointers and create the Integers 0, 1 and 2
18 // after CRT startup. This avoids the Singletons and clears over half the reports of memory
19 // leaks. However, it does not apply to Apple or Sun platforms.
20 // C++11 allows us to use call_once to set the function pointers, and Integer does so when
21 // init_priority and init_seg(lib) are not available. The class also uses the Singleton pattern
22 // to ensure integers 0, 1 and 2 are available. The Singleton will produce memory findings, but
23 // we don't have anything else to use in this case.
24 // C++03 on platforms like Apple and Sun, we use a boolean flag to track when the function pointers
25 // have been set based on the cpu. Its just a Nifty Counter in disguise, and its similar to using
26 // the g_pAssignToInteger to track initialization. It has concurrency issues, but a race is not a
27 // problem. It does not matter if two threads both set the same pointers. The Singleton pattern
28 // is also used to ensure integers 0, 1 and 2 are available. The Singleton will produce memory
29 // findings, but we don't have anything else to use in this case.
30 // While not readily apparent, Integer does not need to inherit from InitializeInteger when
31 // init_priority and init_seg(lib) are available. They just create an InitializePointers object
32 // at the right time after CRT initialization. The additional class avoids the small runtime
33 // overhead associated with checking the flags, and hides the detail from the interface.
34 
35 #include "pch.h"
36 #include "config.h"
37 
38 #if CRYPTOPP_MSC_VERSION
39 # pragma warning(disable: 4100)
40 #endif
41 
42 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
43 # pragma GCC diagnostic ignored "-Wunused"
44 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
45 #endif
46 
47 // Issue 340
48 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
49 # pragma GCC diagnostic ignored "-Wconversion"
50 # pragma GCC diagnostic ignored "-Wsign-conversion"
51 #endif
52 
53 #ifndef CRYPTOPP_IMPORTS
54 
55 #include "integer.h"
56 #include "secblock.h"
57 #include "modarith.h"
58 #include "nbtheory.h"
59 #include "smartptr.h"
60 #include "algparam.h"
61 #include "filters.h"
62 #include "stdcpp.h"
63 #include "asn.h"
64 #include "oids.h"
65 #include "words.h"
66 #include "pubkey.h" // for P1363_KDF2
67 #include "sha.h"
68 #include "cpu.h"
69 #include "misc.h"
70 
71 #include <iostream>
72 
73 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
74  #include <intrin.h>
75 #endif
76 
77 #ifdef __DECCXX
78  #include <c_asm.h>
79 #endif
80 
81 // "Error: The operand ___LKDB cannot be assigned to", http://github.com/weidai11/cryptopp/issues/188
82 #if (__SUNPRO_CC >= 0x5130)
83 # define MAYBE_CONST
84 # define MAYBE_UNCONST_CAST(x) const_cast<word*>(x)
85 #else
86 # define MAYBE_CONST const
87 # define MAYBE_UNCONST_CAST(x) x
88 #endif
89 
90 // "Inline assembly operands don't work with .intel_syntax",
91 // http://llvm.org/bugs/show_bug.cgi?id=24232
92 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
93 # undef CRYPTOPP_X86_ASM_AVAILABLE
94 # undef CRYPTOPP_X32_ASM_AVAILABLE
95 # undef CRYPTOPP_X64_ASM_AVAILABLE
96 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
97 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
98 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
99 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
100 #else
101 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86))
102 #endif
103 
104 // ***************** C++ Static Initialization ********************
105 
106 NAMESPACE_BEGIN(CryptoPP)
107 static void SetFunctionPointers();
108 #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY)
109 // Add InitializePointers to perform the work of setting pointers once.
110 struct InitializePointers
111 {
112  InitializePointers()
113  {
114  SetFunctionPointers();
115  }
116 };
117 // Leave InitializeInteger empty so no work is done.
118 InitializeInteger::InitializeInteger()
119 {
120 }
121 #elif defined(CRYPTOPP_CXX11_SYNCHRONIZATION) && defined(CRYPTOPP_CXX11_DYNAMIC_INIT)
122 std::once_flag s_flag;
123 InitializeInteger::InitializeInteger()
124 {
125  std::call_once(s_flag, []() {
126  SetFunctionPointers();
127  });
128 }
129 #else
130 static bool s_flag;
131 InitializeInteger::InitializeInteger()
132 {
133  MEMORY_BARRIER();
134  if (s_flag == false)
135  {
136  SetFunctionPointers();
137  s_flag = true;
138  MEMORY_BARRIER();
139  }
140 }
141 #endif
142 template <long i>
144 {
145  Integer * operator()() const
146  {
147  return new Integer(i);
148  }
149 };
150 NAMESPACE_END
151 
152 ANONYMOUS_NAMESPACE_BEGIN
153 #if defined(HAVE_GCC_INIT_PRIORITY)
154 const CryptoPP::InitializePointers s_init __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 30))) = CryptoPP::InitializePointers();
155 const CryptoPP::Integer s_zero __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 31))) = CryptoPP::Integer(0L);
156 const CryptoPP::Integer s_one __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 32))) = CryptoPP::Integer(1L);
157 const CryptoPP::Integer s_two __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 33))) = CryptoPP::Integer(2L);
158 #elif defined(HAVE_MSC_INIT_PRIORITY)
159 #pragma warning(disable: 4075)
160 #pragma init_seg(".CRT$XCU-030")
161 const CryptoPP::InitializePointers s_init;
162 const CryptoPP::Integer s_zero(0L);
163 const CryptoPP::Integer s_one(1L);
164 const CryptoPP::Integer s_two(2L);
165 #pragma warning(default: 4075)
166 #endif
167 ANONYMOUS_NAMESPACE_END
168 
169 // ***************** Library code ********************
170 
171 NAMESPACE_BEGIN(CryptoPP)
172 inline static int Compare(const word *A, const word *B, size_t N)
173 {
174  while (N--)
175  if (A[N] > B[N])
176  return 1;
177  else if (A[N] < B[N])
178  return -1;
179 
180  return 0;
181 }
182 
183 inline static int Increment(word *A, size_t N, word B=1)
184 {
185  CRYPTOPP_ASSERT(N);
186  word t = A[0];
187  A[0] = t+B;
188  if (A[0] >= t)
189  return 0;
190  for (unsigned i=1; i<N; i++)
191  if (++A[i])
192  return 0;
193  return 1;
194 }
195 
196 inline static int Decrement(word *A, size_t N, word B=1)
197 {
198  CRYPTOPP_ASSERT(N);
199  word t = A[0];
200  A[0] = t-B;
201  if (A[0] <= t)
202  return 0;
203  for (unsigned i=1; i<N; i++)
204  if (A[i]--)
205  return 0;
206  return 1;
207 }
208 
209 static void TwosComplement(word *A, size_t N)
210 {
211  Decrement(A, N);
212  for (unsigned i=0; i<N; i++)
213  A[i] = ~A[i];
214 }
215 
216 static word AtomicInverseModPower2(word A)
217 {
218  CRYPTOPP_ASSERT(A%2==1);
219 
220  word R=A%8;
221 
222  for (unsigned i=3; i<WORD_BITS; i*=2)
223  R = R*(2-R*A);
224 
225  CRYPTOPP_ASSERT(R*A==1);
226  return R;
227 }
228 
229 // ********************************************************
230 
231 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
232  #define Declare2Words(x) word x##0, x##1;
233  #define AssignWord(a, b) a##0 = b; a##1 = 0;
234  #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
235  #define LowWord(a) a##0
236  #define HighWord(a) a##1
237  #ifdef _MSC_VER
238  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
239  #ifndef __INTEL_COMPILER
240  #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
241  #endif
242  #elif defined(__DECCXX)
243  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
244  #elif defined(__x86_64__)
245  #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
246  // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
247  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
248  #else
249  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
250  #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
251  #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
252  #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
253  #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
254  #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
255  #endif
256  #endif
257  #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
258  #ifndef Double3Words
259  #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
260  #endif
261  #ifndef Acc2WordsBy2
262  #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
263  #endif
264  #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
265  #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
266  #define GetCarry(u) u##1
267  #define GetBorrow(u) u##1
268 #else
269  #define Declare2Words(x) dword x;
270  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
271  #define MultiplyWords(p, a, b) p = __emulu(a, b);
272  #else
273  #define MultiplyWords(p, a, b) p = (dword)a*b;
274  #endif
275  #define AssignWord(a, b) a = b;
276  #define Add2WordsBy1(a, b, c) a = b + c;
277  #define Acc2WordsBy2(a, b) a += b;
278  #define LowWord(a) word(a)
279  #define HighWord(a) word(a>>WORD_BITS)
280  #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
281  #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
282  #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
283  #define GetCarry(u) HighWord(u)
284  #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
285 #endif
286 #ifndef MulAcc
287  #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
288 #endif
289 #ifndef Acc2WordsBy1
290  #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
291 #endif
292 #ifndef Acc3WordsBy2
293  #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
294 #endif
295 
296 class DWord
297 {
298 public:
299 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
300  DWord() : m_whole() { }
301 #else
302  DWord() : m_halfs() { }
303 #endif
304 
305 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
306  explicit DWord(word low) : m_whole(low) { }
307 #else
308  explicit DWord(word low) : m_halfs()
309  {
310  m_halfs.low = low;
311  }
312 #endif
313 
314 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
315  DWord(word low, word high) : m_whole()
316 #else
317  DWord(word low, word high) : m_halfs()
318 #endif
319  {
320 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
321 # if defined(IS_LITTLE_ENDIAN)
322  const word t[2] = {low,high};
323  memcpy(&m_whole, &t, sizeof(m_whole));
324 # else
325  const word t[2] = {high,low};
326  memcpy(&m_whole, &t, sizeof(m_whole));
327 # endif
328 #else
329  m_halfs.low = low;
330  m_halfs.high = high;
331 #endif
332  }
333 
334  static DWord Multiply(word a, word b)
335  {
336  DWord r;
337  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
338  r.m_whole = (dword)a * b;
339  #elif defined(MultiplyWordsLoHi)
340  MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
341  #else
342  CRYPTOPP_ASSERT(0);
343  #endif
344  return r;
345  }
346 
347  static DWord MultiplyAndAdd(word a, word b, word c)
348  {
349  DWord r = Multiply(a, b);
350  return r += c;
351  }
352 
353  DWord & operator+=(word a)
354  {
355  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
356  m_whole = m_whole + a;
357  #else
358  m_halfs.low += a;
359  m_halfs.high += (m_halfs.low < a);
360  #endif
361  return *this;
362  }
363 
364  DWord operator+(word a)
365  {
366  DWord r;
367  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
368  r.m_whole = m_whole + a;
369  #else
370  r.m_halfs.low = m_halfs.low + a;
371  r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
372  #endif
373  return r;
374  }
375 
376  DWord operator-(DWord a)
377  {
378  DWord r;
379  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
380  r.m_whole = m_whole - a.m_whole;
381  #else
382  r.m_halfs.low = m_halfs.low - a.m_halfs.low;
383  r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
384  #endif
385  return r;
386  }
387 
388  DWord operator-(word a)
389  {
390  DWord r;
391  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
392  r.m_whole = m_whole - a;
393  #else
394  r.m_halfs.low = m_halfs.low - a;
395  r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
396  #endif
397  return r;
398  }
399 
400  // returns quotient, which must fit in a word
401  word operator/(word divisor);
402 
403  word operator%(word a);
404 
405  bool operator!() const
406  {
407  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
408  return !m_whole;
409  #else
410  return !m_halfs.high && !m_halfs.low;
411  #endif
412  }
413 
414  // TODO: When NATIVE_DWORD is in effect, we access high and low, which are inactive
415  // union members, and that's UB. Also see http://stackoverflow.com/q/11373203.
416  word GetLowHalf() const {return m_halfs.low;}
417  word GetHighHalf() const {return m_halfs.high;}
418  word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
419 
420 private:
421  // Issue 274, "Types cannot be declared in anonymous union",
422  // http://github.com/weidai11/cryptopp/issues/274
423  // Thanks to Martin Bonner at http://stackoverflow.com/a/39507183
424  struct half_words
425  {
426  #ifdef IS_LITTLE_ENDIAN
427  word low;
428  word high;
429  #else
430  word high;
431  word low;
432  #endif
433  };
434  union
435  {
436  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
437  dword m_whole;
438  #endif
439  half_words m_halfs;
440  };
441 };
442 
443 class Word
444 {
445 public:
446  Word() : m_whole(0) {}
447  Word(word value) : m_whole(value) {}
448  Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
449 
450  static Word Multiply(hword a, hword b)
451  {
452  Word r;
453  r.m_whole = (word)a * b;
454  return r;
455  }
456 
457  Word operator-(Word a)
458  {
459  Word r;
460  r.m_whole = m_whole - a.m_whole;
461  return r;
462  }
463 
464  Word operator-(hword a)
465  {
466  Word r;
467  r.m_whole = m_whole - a;
468  return r;
469  }
470 
471  // returns quotient, which must fit in a word
472  hword operator/(hword divisor)
473  {
474  return hword(m_whole / divisor);
475  }
476 
477  bool operator!() const
478  {
479  return !m_whole;
480  }
481 
482  word GetWhole() const {return m_whole;}
483  hword GetLowHalf() const {return hword(m_whole);}
484  hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
485  hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
486 
487 private:
488  word m_whole;
489 };
490 
491 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
492 template <class S, class D>
493 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULLPTR)
494 {
495  CRYPTOPP_UNUSED(dummy);
496 
497  // Assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
498  CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
499 
500  // estimate the quotient: do a 2 S by 1 S divide.
501  // Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
502  // The code change occurred at Commit dc99266599a0e72d.
503 
504  S Q; bool pre = (S(B1+1) == 0);
505  if (B1 > 0 && !pre)
506  Q = D(A[1], A[2]) / S(B1+1);
507  else if (pre)
508  Q = A[2];
509  else
510  Q = D(A[0], A[1]) / B0;
511 
512  // now subtract Q*B from A
513  D p = D::Multiply(B0, Q);
514  D u = (D) A[0] - p.GetLowHalf();
515  A[0] = u.GetLowHalf();
516  u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
517  A[1] = u.GetLowHalf();
518  A[2] += u.GetHighHalf();
519 
520  // Q <= actual quotient, so fix it
521  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
522  {
523  u = (D) A[0] - B0;
524  A[0] = u.GetLowHalf();
525  u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
526  A[1] = u.GetLowHalf();
527  A[2] += u.GetHighHalf();
528  Q++;
529  CRYPTOPP_ASSERT(Q); // shouldn't overflow
530  }
531 
532  return Q;
533 }
534 
535 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
536 template <class S, class D>
537 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
538 {
539  // Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
540  // The code change occurred at Commit dc99266599a0e72d.
541 
542  if (!!B)
543  {
544  S Q[2];
545  T[0] = Al.GetLowHalf();
546  T[1] = Al.GetHighHalf();
547  T[2] = Ah.GetLowHalf();
548  T[3] = Ah.GetHighHalf();
549  Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
550  Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
551  return D(Q[0], Q[1]);
552  }
553  else // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
554  {
555  return D(Ah.GetLowHalf(), Ah.GetHighHalf());
556  }
557 }
558 
559 // returns quotient, which must fit in a word
560 inline word DWord::operator/(word a)
561 {
562  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
563  return word(m_whole / a);
564  #else
565  hword r[4];
566  return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
567  #endif
568 }
569 
570 inline word DWord::operator%(word a)
571 {
572  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
573  return word(m_whole % a);
574  #else
575  if (a < (word(1) << (WORD_BITS/2)))
576  {
577  hword h = hword(a);
578  word r = m_halfs.high % h;
579  r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
580  return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
581  }
582  else
583  {
584  hword r[4];
585  DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
586  return Word(r[0], r[1]).GetWhole();
587  }
588  #endif
589 }
590 
591 // ********************************************************
592 
593 // Use some tricks to share assembly code between MSVC, GCC, Clang and Sun CC.
594 #if defined(__GNUC__)
595  #define AddPrologue \
596  int result; \
597  __asm__ __volatile__ \
598  ( \
599  INTEL_NOPREFIX
600  #define AddEpilogue \
601  ATT_PREFIX \
602  : "=a" (result)\
603  : "d" (C), "a" (A), "D" (B), "c" (N) \
604  : "%esi", "memory", "cc" \
605  );\
606  return result;
607  #define MulPrologue \
608  __asm__ __volatile__ \
609  ( \
610  INTEL_NOPREFIX \
611  AS1( push ebx) \
612  AS2( mov ebx, edx)
613  #define MulEpilogue \
614  AS1( pop ebx) \
615  ATT_PREFIX \
616  : \
617  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
618  : "%esi", "memory", "cc" \
619  );
620  #define SquPrologue MulPrologue
621  #define SquEpilogue \
622  AS1( pop ebx) \
623  ATT_PREFIX \
624  : \
625  : "d" (s_maskLow16), "c" (C), "a" (A) \
626  : "%esi", "%edi", "memory", "cc" \
627  );
628  #define TopPrologue MulPrologue
629  #define TopEpilogue \
630  AS1( pop ebx) \
631  ATT_PREFIX \
632  : \
633  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
634  : "memory", "cc" \
635  );
636 #else
637  #define AddPrologue \
638  __asm push edi \
639  __asm push esi \
640  __asm mov eax, [esp+12] \
641  __asm mov edi, [esp+16]
642  #define AddEpilogue \
643  __asm pop esi \
644  __asm pop edi \
645  __asm ret 8
646  #define SaveEBX
647  #define RestoreEBX
648  #define SquPrologue \
649  AS2( mov eax, A) \
650  AS2( mov ecx, C) \
651  SaveEBX \
652  AS2( lea ebx, s_maskLow16)
653  #define MulPrologue \
654  AS2( mov eax, A) \
655  AS2( mov edi, B) \
656  AS2( mov ecx, C) \
657  SaveEBX \
658  AS2( lea ebx, s_maskLow16)
659  #define TopPrologue \
660  AS2( mov eax, A) \
661  AS2( mov edi, B) \
662  AS2( mov ecx, C) \
663  AS2( mov esi, L) \
664  SaveEBX \
665  AS2( lea ebx, s_maskLow16)
666  #define SquEpilogue RestoreEBX
667  #define MulEpilogue RestoreEBX
668  #define TopEpilogue RestoreEBX
669 #endif
670 
671 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
672 extern "C" {
673 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
674 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
675 }
676 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
677 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
678 {
679  word result;
680  __asm__ __volatile__
681  (
682  INTEL_NOPREFIX
683  AS1( neg %1)
684  ASJ( jz, 1, f)
685  AS2( mov %0,[%3+8*%1])
686  AS2( add %0,[%4+8*%1])
687  AS2( mov [%2+8*%1],%0)
688  ASL(0)
689  AS2( mov %0,[%3+8*%1+8])
690  AS2( adc %0,[%4+8*%1+8])
691  AS2( mov [%2+8*%1+8],%0)
692  AS2( lea %1,[%1+2])
693  ASJ( jrcxz, 1, f)
694  AS2( mov %0,[%3+8*%1])
695  AS2( adc %0,[%4+8*%1])
696  AS2( mov [%2+8*%1],%0)
697  ASJ( jmp, 0, b)
698  ASL(1)
699  AS2( mov %0, 0)
700  AS2( adc %0, %0)
701  ATT_NOPREFIX
702  : "=&r" (result), "+c" (N)
703  : "r" (C+N), "r" (A+N), "r" (B+N)
704  : "memory", "cc"
705  );
706  return (int)result;
707 }
708 
709 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
710 {
711  word result;
712  __asm__ __volatile__
713  (
714  INTEL_NOPREFIX
715  AS1( neg %1)
716  ASJ( jz, 1, f)
717  AS2( mov %0,[%3+8*%1])
718  AS2( sub %0,[%4+8*%1])
719  AS2( mov [%2+8*%1],%0)
720  ASL(0)
721  AS2( mov %0,[%3+8*%1+8])
722  AS2( sbb %0,[%4+8*%1+8])
723  AS2( mov [%2+8*%1+8],%0)
724  AS2( lea %1,[%1+2])
725  ASJ( jrcxz, 1, f)
726  AS2( mov %0,[%3+8*%1])
727  AS2( sbb %0,[%4+8*%1])
728  AS2( mov [%2+8*%1],%0)
729  ASJ( jmp, 0, b)
730  ASL(1)
731  AS2( mov %0, 0)
732  AS2( adc %0, %0)
733  ATT_NOPREFIX
734  : "=&r" (result), "+c" (N)
735  : "r" (C+N), "r" (A+N), "r" (B+N)
736  : "memory", "cc"
737  );
738  return (int)result;
739 }
740 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
741 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
742 {
743  AddPrologue
744 
745  // now: eax = A, edi = B, edx = C, ecx = N
746  AS2( lea eax, [eax+4*ecx])
747  AS2( lea edi, [edi+4*ecx])
748  AS2( lea edx, [edx+4*ecx])
749 
750  AS1( neg ecx) // ecx is negative index
751  AS2( test ecx, 2) // this clears carry flag
752  ASJ( jz, 0, f)
753  AS2( sub ecx, 2)
754  ASJ( jmp, 1, f)
755 
756  ASL(0)
757  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
758  AS2( mov esi,[eax+4*ecx])
759  AS2( adc esi,[edi+4*ecx])
760  AS2( mov [edx+4*ecx],esi)
761  AS2( mov esi,[eax+4*ecx+4])
762  AS2( adc esi,[edi+4*ecx+4])
763  AS2( mov [edx+4*ecx+4],esi)
764  ASL(1)
765  AS2( mov esi,[eax+4*ecx+8])
766  AS2( adc esi,[edi+4*ecx+8])
767  AS2( mov [edx+4*ecx+8],esi)
768  AS2( mov esi,[eax+4*ecx+12])
769  AS2( adc esi,[edi+4*ecx+12])
770  AS2( mov [edx+4*ecx+12],esi)
771 
772  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
773  ASJ( jmp, 0, b)
774 
775  ASL(2)
776  AS2( mov eax, 0)
777  AS1( setc al) // store carry into eax (return result register)
778 
779  AddEpilogue
780 }
781 
782 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
783 {
784  AddPrologue
785 
786  // now: eax = A, edi = B, edx = C, ecx = N
787  AS2( lea eax, [eax+4*ecx])
788  AS2( lea edi, [edi+4*ecx])
789  AS2( lea edx, [edx+4*ecx])
790 
791  AS1( neg ecx) // ecx is negative index
792  AS2( test ecx, 2) // this clears carry flag
793  ASJ( jz, 0, f)
794  AS2( sub ecx, 2)
795  ASJ( jmp, 1, f)
796 
797  ASL(0)
798  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
799  AS2( mov esi,[eax+4*ecx])
800  AS2( sbb esi,[edi+4*ecx])
801  AS2( mov [edx+4*ecx],esi)
802  AS2( mov esi,[eax+4*ecx+4])
803  AS2( sbb esi,[edi+4*ecx+4])
804  AS2( mov [edx+4*ecx+4],esi)
805  ASL(1)
806  AS2( mov esi,[eax+4*ecx+8])
807  AS2( sbb esi,[edi+4*ecx+8])
808  AS2( mov [edx+4*ecx+8],esi)
809  AS2( mov esi,[eax+4*ecx+12])
810  AS2( sbb esi,[edi+4*ecx+12])
811  AS2( mov [edx+4*ecx+12],esi)
812 
813  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
814  ASJ( jmp, 0, b)
815 
816  ASL(2)
817  AS2( mov eax, 0)
818  AS1( setc al) // store carry into eax (return result register)
819 
820  AddEpilogue
821 }
822 
823 #if CRYPTOPP_INTEGER_SSE2
824 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
825 {
826  AddPrologue
827 
828  // now: eax = A, edi = B, edx = C, ecx = N
829  AS2( lea eax, [eax+4*ecx])
830  AS2( lea edi, [edi+4*ecx])
831  AS2( lea edx, [edx+4*ecx])
832 
833  AS1( neg ecx) // ecx is negative index
834  AS2( pxor mm2, mm2)
835  ASJ( jz, 2, f)
836  AS2( test ecx, 2) // this clears carry flag
837  ASJ( jz, 0, f)
838  AS2( sub ecx, 2)
839  ASJ( jmp, 1, f)
840 
841  ASL(0)
842  AS2( movd mm0, DWORD PTR [eax+4*ecx])
843  AS2( movd mm1, DWORD PTR [edi+4*ecx])
844  AS2( paddq mm0, mm1)
845  AS2( paddq mm2, mm0)
846  AS2( movd DWORD PTR [edx+4*ecx], mm2)
847  AS2( psrlq mm2, 32)
848 
849  AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
850  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
851  AS2( paddq mm0, mm1)
852  AS2( paddq mm2, mm0)
853  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
854  AS2( psrlq mm2, 32)
855 
856  ASL(1)
857  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
858  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
859  AS2( paddq mm0, mm1)
860  AS2( paddq mm2, mm0)
861  AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
862  AS2( psrlq mm2, 32)
863 
864  AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
865  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
866  AS2( paddq mm0, mm1)
867  AS2( paddq mm2, mm0)
868  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
869  AS2( psrlq mm2, 32)
870 
871  AS2( add ecx, 4)
872  ASJ( jnz, 0, b)
873 
874  ASL(2)
875  AS2( movd eax, mm2)
876  AS1( emms)
877 
878  AddEpilogue
879 }
880 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
881 {
882  AddPrologue
883 
884  // now: eax = A, edi = B, edx = C, ecx = N
885  AS2( lea eax, [eax+4*ecx])
886  AS2( lea edi, [edi+4*ecx])
887  AS2( lea edx, [edx+4*ecx])
888 
889  AS1( neg ecx) // ecx is negative index
890  AS2( pxor mm2, mm2)
891  ASJ( jz, 2, f)
892  AS2( test ecx, 2) // this clears carry flag
893  ASJ( jz, 0, f)
894  AS2( sub ecx, 2)
895  ASJ( jmp, 1, f)
896 
897  ASL(0)
898  AS2( movd mm0, DWORD PTR [eax+4*ecx])
899  AS2( movd mm1, DWORD PTR [edi+4*ecx])
900  AS2( psubq mm0, mm1)
901  AS2( psubq mm0, mm2)
902  AS2( movd DWORD PTR [edx+4*ecx], mm0)
903  AS2( psrlq mm0, 63)
904 
905  AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
906  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
907  AS2( psubq mm2, mm1)
908  AS2( psubq mm2, mm0)
909  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
910  AS2( psrlq mm2, 63)
911 
912  ASL(1)
913  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
914  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
915  AS2( psubq mm0, mm1)
916  AS2( psubq mm0, mm2)
917  AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
918  AS2( psrlq mm0, 63)
919 
920  AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
921  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
922  AS2( psubq mm2, mm1)
923  AS2( psubq mm2, mm0)
924  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
925  AS2( psrlq mm2, 63)
926 
927  AS2( add ecx, 4)
928  ASJ( jnz, 0, b)
929 
930  ASL(2)
931  AS2( movd eax, mm2)
932  AS1( emms)
933 
934  AddEpilogue
935 }
936 #endif // CRYPTOPP_INTEGER_SSE2
937 #else // CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
938 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
939 {
940  CRYPTOPP_ASSERT (N%2 == 0);
941 
942  Declare2Words(u);
943  AssignWord(u, 0);
944  for (size_t i=0; i<N; i+=2)
945  {
946  AddWithCarry(u, A[i], B[i]);
947  C[i] = LowWord(u);
948  AddWithCarry(u, A[i+1], B[i+1]);
949  C[i+1] = LowWord(u);
950  }
951  return int(GetCarry(u));
952 }
953 
954 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
955 {
956  CRYPTOPP_ASSERT (N%2 == 0);
957 
958  Declare2Words(u);
959  AssignWord(u, 0);
960  for (size_t i=0; i<N; i+=2)
961  {
962  SubtractWithBorrow(u, A[i], B[i]);
963  C[i] = LowWord(u);
964  SubtractWithBorrow(u, A[i+1], B[i+1]);
965  C[i+1] = LowWord(u);
966  }
967  return int(GetBorrow(u));
968 }
969 #endif
970 
971 static word LinearMultiply(word *C, const word *AA, word B, size_t N)
972 {
973  // http://github.com/weidai11/cryptopp/issues/188
974  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
975 
976  word carry=0;
977  for(unsigned i=0; i<N; i++)
978  {
979  Declare2Words(p);
980  MultiplyWords(p, A[i], B);
981  Acc2WordsBy1(p, carry);
982  C[i] = LowWord(p);
983  carry = HighWord(p);
984  }
985  return carry;
986 }
987 
988 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
989 
990 #define Mul_2 \
991  Mul_Begin(2) \
992  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
993  Mul_End(1, 1)
994 
995 #define Mul_4 \
996  Mul_Begin(4) \
997  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
998  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
999  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1000  Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1001  Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
1002  Mul_End(5, 3)
1003 
1004 #define Mul_8 \
1005  Mul_Begin(8) \
1006  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1007  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1008  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1009  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1010  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1011  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1012  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1013  Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1014  Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1015  Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1016  Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1017  Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1018  Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
1019  Mul_End(13, 7)
1020 
1021 #define Mul_16 \
1022  Mul_Begin(16) \
1023  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1024  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1025  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1026  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1027  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1028  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1029  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1030  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1031  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1032  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1033  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1034  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1035  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1036  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1037  Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1038  Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1039  Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1040  Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1041  Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1042  Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1043  Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1044  Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1045  Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1046  Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1047  Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1048  Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1049  Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1050  Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1051  Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
1052  Mul_End(29, 15)
1053 
1054 #define Squ_2 \
1055  Squ_Begin(2) \
1056  Squ_End(2)
1057 
1058 #define Squ_4 \
1059  Squ_Begin(4) \
1060  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1061  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1062  Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
1063  Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
1064  Squ_End(4)
1065 
1066 #define Squ_8 \
1067  Squ_Begin(8) \
1068  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1069  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1070  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1071  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1072  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1073  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1074  Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1075  Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1076  Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1077  Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1078  Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
1079  Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
1080  Squ_End(8)
1081 
1082 #define Squ_16 \
1083  Squ_Begin(16) \
1084  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1085  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1086  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1087  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1088  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1089  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1090  Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1091  Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1092  Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1093  Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1094  Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
1095  Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
1096  Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
1097  Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1098  Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1099  Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1100  Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1101  Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1102  Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1103  Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1104  Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1105  Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1106  Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1107  Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1108  Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1109  Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1110  Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1111  Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1112  Squ_End(16)
1113 
1114 #define Bot_2 \
1115  Mul_Begin(2) \
1116  Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1117  Bot_End(2)
1118 
1119 #define Bot_4 \
1120  Mul_Begin(4) \
1121  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1122  Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1123  Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1124  Bot_End(4)
1125 
1126 #define Bot_8 \
1127  Mul_Begin(8) \
1128  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1129  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1130  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1131  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1132  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1133  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1134  Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1135  Bot_End(8)
1136 
1137 #define Bot_16 \
1138  Mul_Begin(16) \
1139  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1140  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1141  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1142  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1143  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1144  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1145  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1146  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1147  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1148  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1149  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1150  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1151  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1152  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1153  Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1154  Bot_End(16)
1155 
1156 #endif
1157 
1158 #if 0
1159 #define Mul_Begin(n) \
1160  Declare2Words(p) \
1161  Declare2Words(c) \
1162  Declare2Words(d) \
1163  MultiplyWords(p, A[0], B[0]) \
1164  AssignWord(c, LowWord(p)) \
1165  AssignWord(d, HighWord(p))
1166 
1167 #define Mul_Acc(i, j) \
1168  MultiplyWords(p, A[i], B[j]) \
1169  Acc2WordsBy1(c, LowWord(p)) \
1170  Acc2WordsBy1(d, HighWord(p))
1171 
1172 #define Mul_SaveAcc(k, i, j) \
1173  R[k] = LowWord(c); \
1174  Add2WordsBy1(c, d, HighWord(c)) \
1175  MultiplyWords(p, A[i], B[j]) \
1176  AssignWord(d, HighWord(p)) \
1177  Acc2WordsBy1(c, LowWord(p))
1178 
1179 #define Mul_End(n) \
1180  R[2*n-3] = LowWord(c); \
1181  Acc2WordsBy1(d, HighWord(c)) \
1182  MultiplyWords(p, A[n-1], B[n-1])\
1183  Acc2WordsBy2(d, p) \
1184  R[2*n-2] = LowWord(d); \
1185  R[2*n-1] = HighWord(d);
1186 
1187 #define Bot_SaveAcc(k, i, j) \
1188  R[k] = LowWord(c); \
1189  word e = LowWord(d) + HighWord(c); \
1190  e += A[i] * B[j];
1191 
1192 #define Bot_Acc(i, j) \
1193  e += A[i] * B[j];
1194 
1195 #define Bot_End(n) \
1196  R[n-1] = e;
1197 #else
1198 #define Mul_Begin(n) \
1199  Declare2Words(p) \
1200  word c; \
1201  Declare2Words(d) \
1202  MultiplyWords(p, A[0], B[0]) \
1203  c = LowWord(p); \
1204  AssignWord(d, HighWord(p))
1205 
1206 #define Mul_Acc(i, j) \
1207  MulAcc(c, d, A[i], B[j])
1208 
1209 #define Mul_SaveAcc(k, i, j) \
1210  R[k] = c; \
1211  c = LowWord(d); \
1212  AssignWord(d, HighWord(d)) \
1213  MulAcc(c, d, A[i], B[j])
1214 
1215 #define Mul_End(k, i) \
1216  R[k] = c; \
1217  MultiplyWords(p, A[i], B[i]) \
1218  Acc2WordsBy2(p, d) \
1219  R[k+1] = LowWord(p); \
1220  R[k+2] = HighWord(p);
1221 
1222 #define Bot_SaveAcc(k, i, j) \
1223  R[k] = c; \
1224  c = LowWord(d); \
1225  c += A[i] * B[j];
1226 
1227 #define Bot_Acc(i, j) \
1228  c += A[i] * B[j];
1229 
1230 #define Bot_End(n) \
1231  R[n-1] = c;
1232 #endif
1233 
1234 #define Squ_Begin(n) \
1235  Declare2Words(p) \
1236  word c; \
1237  Declare2Words(d) \
1238  Declare2Words(e) \
1239  MultiplyWords(p, A[0], A[0]) \
1240  R[0] = LowWord(p); \
1241  AssignWord(e, HighWord(p)) \
1242  MultiplyWords(p, A[0], A[1]) \
1243  c = LowWord(p); \
1244  AssignWord(d, HighWord(p)) \
1245  Squ_NonDiag \
1246 
1247 #define Squ_NonDiag \
1248  Double3Words(c, d)
1249 
1250 #define Squ_SaveAcc(k, i, j) \
1251  Acc3WordsBy2(c, d, e) \
1252  R[k] = c; \
1253  MultiplyWords(p, A[i], A[j]) \
1254  c = LowWord(p); \
1255  AssignWord(d, HighWord(p)) \
1256 
1257 #define Squ_Acc(i, j) \
1258  MulAcc(c, d, A[i], A[j])
1259 
1260 #define Squ_Diag(i) \
1261  Squ_NonDiag \
1262  MulAcc(c, d, A[i], A[i])
1263 
1264 #define Squ_End(n) \
1265  Acc3WordsBy2(c, d, e) \
1266  R[2*n-3] = c; \
1267  MultiplyWords(p, A[n-1], A[n-1])\
1268  Acc2WordsBy2(p, e) \
1269  R[2*n-2] = LowWord(p); \
1270  R[2*n-1] = HighWord(p);
1271 
1272 
1273 void Baseline_Multiply2(word *R, const word *AA, const word *BB)
1274 {
1275  // http://github.com/weidai11/cryptopp/issues/188
1276  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1277  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1278 
1279  Mul_2
1280 }
1281 
1282 void Baseline_Multiply4(word *R, const word *AA, const word *BB)
1283 {
1284  // http://github.com/weidai11/cryptopp/issues/188
1285  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1286  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1287 
1288  Mul_4
1289 }
1290 
1291 void Baseline_Multiply8(word *R, const word *AA, const word *BB)
1292 {
1293  // http://github.com/weidai11/cryptopp/issues/188
1294  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1295  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1296 
1297  Mul_8
1298 }
1299 
1300 void Baseline_Square2(word *R, const word *AA)
1301 {
1302  // http://github.com/weidai11/cryptopp/issues/188
1303  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1304 
1305  Squ_2
1306 }
1307 
1308 void Baseline_Square4(word *R, const word *AA)
1309 {
1310  // http://github.com/weidai11/cryptopp/issues/188
1311  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1312 
1313  Squ_4
1314 }
1315 
1316 void Baseline_Square8(word *R, const word *AA)
1317 {
1318  // http://github.com/weidai11/cryptopp/issues/188
1319  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1320 
1321  Squ_8
1322 }
1323 
1324 void Baseline_MultiplyBottom2(word *R, const word *AA, const word *BB)
1325 {
1326  // http://github.com/weidai11/cryptopp/issues/188
1327  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1328  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1329 
1330  Bot_2
1331 }
1332 
1333 void Baseline_MultiplyBottom4(word *R, const word *AA, const word *BB)
1334 {
1335  // http://github.com/weidai11/cryptopp/issues/188
1336  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1337  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1338 
1339  Bot_4
1340 }
1341 
1342 void Baseline_MultiplyBottom8(word *R, const word *AA, const word *BB)
1343 {
1344  // http://github.com/weidai11/cryptopp/issues/188
1345  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1346  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1347 
1348  Bot_8
1349 }
1350 
1351 #define Top_Begin(n) \
1352  Declare2Words(p) \
1353  word c; \
1354  Declare2Words(d) \
1355  MultiplyWords(p, A[0], B[n-2]);\
1356  AssignWord(d, HighWord(p));
1357 
1358 #define Top_Acc(i, j) \
1359  MultiplyWords(p, A[i], B[j]);\
1360  Acc2WordsBy1(d, HighWord(p));
1361 
1362 #define Top_SaveAcc0(i, j) \
1363  c = LowWord(d); \
1364  AssignWord(d, HighWord(d)) \
1365  MulAcc(c, d, A[i], B[j])
1366 
1367 #define Top_SaveAcc1(i, j) \
1368  c = L<c; \
1369  Acc2WordsBy1(d, c); \
1370  c = LowWord(d); \
1371  AssignWord(d, HighWord(d)) \
1372  MulAcc(c, d, A[i], B[j])
1373 
1374 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1375 {
1376  CRYPTOPP_UNUSED(L);
1377  word T[4];
1378  Baseline_Multiply2(T, A, B);
1379  R[0] = T[2];
1380  R[1] = T[3];
1381 }
1382 
1383 void Baseline_MultiplyTop4(word *R, const word *AA, const word *BB, word L)
1384 {
1385  // http://github.com/weidai11/cryptopp/issues/188
1386  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1387  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1388 
1389  Top_Begin(4)
1390  Top_Acc(1, 1) Top_Acc(2, 0) \
1391  Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1392  Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1393  Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1394  Mul_End(1, 3)
1395 }
1396 
1397 void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1398 {
1399  // http://github.com/weidai11/cryptopp/issues/188
1400  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1401  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1402 
1403  Top_Begin(8)
1404  Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1405  Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1406  Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1407  Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1408  Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1409  Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1410  Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1411  Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1412  Mul_End(5, 7)
1413 }
1414 
1415 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1416 void Baseline_Multiply16(word *R, const word *AA, const word *BB)
1417 {
1418  // http://github.com/weidai11/cryptopp/issues/188
1419  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1420  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1421 
1422  Mul_16
1423 }
1424 
1425 void Baseline_Square16(word *R, const word *AA)
1426 {
1427  // http://github.com/weidai11/cryptopp/issues/188
1428  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1429 
1430  Squ_16
1431 }
1432 
1433 void Baseline_MultiplyBottom16(word *R, const word *AA, const word *BB)
1434 {
1435  // http://github.com/weidai11/cryptopp/issues/188
1436  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1437  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1438 
1439  Bot_16
1440 }
1441 
1442 void Baseline_MultiplyTop16(word *R, const word *AA, const word *BB, word L)
1443 {
1444  // http://github.com/weidai11/cryptopp/issues/188
1445  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1446  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1447 
1448  Top_Begin(16)
1449  Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1450  Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1451  Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1452  Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1453  Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1454  Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1455  Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1456  Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1457  Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1458  Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1459  Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1460  Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1461  Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1462  Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1463  Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1464  Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1465  Mul_End(13, 15)
1466 }
1467 #endif
1468 
1469 // ********************************************************
1470 
1471 #if CRYPTOPP_INTEGER_SSE2
1472 
1473 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1474 
1475 #undef Mul_Begin
1476 #undef Mul_Acc
1477 #undef Top_Begin
1478 #undef Top_Acc
1479 #undef Squ_Acc
1480 #undef Squ_NonDiag
1481 #undef Squ_Diag
1482 #undef Squ_SaveAcc
1483 #undef Squ_Begin
1484 #undef Mul_SaveAcc
1485 #undef Bot_Acc
1486 #undef Bot_SaveAcc
1487 #undef Bot_End
1488 #undef Squ_End
1489 #undef Mul_End
1490 
1491 #define SSE2_FinalSave(k) \
1492  AS2( psllq xmm5, 16) \
1493  AS2( paddq xmm4, xmm5) \
1494  AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1495 
1496 #define SSE2_SaveShift(k) \
1497  AS2( movq xmm0, xmm6) \
1498  AS2( punpckhqdq xmm6, xmm0) \
1499  AS2( movq xmm1, xmm7) \
1500  AS2( punpckhqdq xmm7, xmm1) \
1501  AS2( paddd xmm6, xmm0) \
1502  AS2( pslldq xmm6, 4) \
1503  AS2( paddd xmm7, xmm1) \
1504  AS2( paddd xmm4, xmm6) \
1505  AS2( pslldq xmm7, 4) \
1506  AS2( movq xmm6, xmm4) \
1507  AS2( paddd xmm5, xmm7) \
1508  AS2( movq xmm7, xmm5) \
1509  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1510  AS2( psrlq xmm6, 16) \
1511  AS2( paddq xmm6, xmm7) \
1512  AS2( punpckhqdq xmm4, xmm0) \
1513  AS2( punpckhqdq xmm5, xmm0) \
1514  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1515  AS2( psrlq xmm6, 3*16) \
1516  AS2( paddd xmm4, xmm6) \
1517 
1518 #define Squ_SSE2_SaveShift(k) \
1519  AS2( movq xmm0, xmm6) \
1520  AS2( punpckhqdq xmm6, xmm0) \
1521  AS2( movq xmm1, xmm7) \
1522  AS2( punpckhqdq xmm7, xmm1) \
1523  AS2( paddd xmm6, xmm0) \
1524  AS2( pslldq xmm6, 4) \
1525  AS2( paddd xmm7, xmm1) \
1526  AS2( paddd xmm4, xmm6) \
1527  AS2( pslldq xmm7, 4) \
1528  AS2( movhlps xmm6, xmm4) \
1529  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1530  AS2( paddd xmm5, xmm7) \
1531  AS2( movhps QWORD PTR [esp+12], xmm5)\
1532  AS2( psrlq xmm4, 16) \
1533  AS2( paddq xmm4, xmm5) \
1534  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1535  AS2( psrlq xmm4, 3*16) \
1536  AS2( paddd xmm4, xmm6) \
1537  AS2( movq QWORD PTR [esp+4], xmm4)\
1538 
1539 #define SSE2_FirstMultiply(i) \
1540  AS2( movdqa xmm7, [esi+(i)*16])\
1541  AS2( movdqa xmm5, [edi-(i)*16])\
1542  AS2( pmuludq xmm5, xmm7) \
1543  AS2( movdqa xmm4, [ebx])\
1544  AS2( movdqa xmm6, xmm4) \
1545  AS2( pand xmm4, xmm5) \
1546  AS2( psrld xmm5, 16) \
1547  AS2( pmuludq xmm7, [edx-(i)*16])\
1548  AS2( pand xmm6, xmm7) \
1549  AS2( psrld xmm7, 16)
1550 
1551 #define Squ_Begin(n) \
1552  SquPrologue \
1553  AS2( mov esi, esp)\
1554  AS2( and esp, 0xfffffff0)\
1555  AS2( lea edi, [esp-32*n])\
1556  AS2( sub esp, 32*n+16)\
1557  AS1( push esi)\
1558  AS2( mov esi, edi) \
1559  AS2( xor edx, edx) \
1560  ASL(1) \
1561  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1562  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1563  AS2( movdqa [edi+2*edx], xmm0) \
1564  AS2( psrlq xmm0, 32) \
1565  AS2( movdqa [edi+2*edx+16], xmm0) \
1566  AS2( movdqa [edi+16*n+2*edx], xmm1) \
1567  AS2( psrlq xmm1, 32) \
1568  AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1569  AS2( add edx, 16) \
1570  AS2( cmp edx, 8*(n)) \
1571  ASJ( jne, 1, b) \
1572  AS2( lea edx, [edi+16*n])\
1573  SSE2_FirstMultiply(0) \
1574 
1575 #define Squ_Acc(i) \
1576  ASL(LSqu##i) \
1577  AS2( movdqa xmm1, [esi+(i)*16]) \
1578  AS2( movdqa xmm0, [edi-(i)*16]) \
1579  AS2( movdqa xmm2, [ebx]) \
1580  AS2( pmuludq xmm0, xmm1) \
1581  AS2( pmuludq xmm1, [edx-(i)*16]) \
1582  AS2( movdqa xmm3, xmm2) \
1583  AS2( pand xmm2, xmm0) \
1584  AS2( psrld xmm0, 16) \
1585  AS2( paddd xmm4, xmm2) \
1586  AS2( paddd xmm5, xmm0) \
1587  AS2( pand xmm3, xmm1) \
1588  AS2( psrld xmm1, 16) \
1589  AS2( paddd xmm6, xmm3) \
1590  AS2( paddd xmm7, xmm1) \
1591 
1592 #define Squ_Acc1(i)
1593 #define Squ_Acc2(i) ASC(call, LSqu##i)
1594 #define Squ_Acc3(i) Squ_Acc2(i)
1595 #define Squ_Acc4(i) Squ_Acc2(i)
1596 #define Squ_Acc5(i) Squ_Acc2(i)
1597 #define Squ_Acc6(i) Squ_Acc2(i)
1598 #define Squ_Acc7(i) Squ_Acc2(i)
1599 #define Squ_Acc8(i) Squ_Acc2(i)
1600 
1601 #define SSE2_End(E, n) \
1602  SSE2_SaveShift(2*(n)-3) \
1603  AS2( movdqa xmm7, [esi+16]) \
1604  AS2( movdqa xmm0, [edi]) \
1605  AS2( pmuludq xmm0, xmm7) \
1606  AS2( movdqa xmm2, [ebx]) \
1607  AS2( pmuludq xmm7, [edx]) \
1608  AS2( movdqa xmm6, xmm2) \
1609  AS2( pand xmm2, xmm0) \
1610  AS2( psrld xmm0, 16) \
1611  AS2( paddd xmm4, xmm2) \
1612  AS2( paddd xmm5, xmm0) \
1613  AS2( pand xmm6, xmm7) \
1614  AS2( psrld xmm7, 16) \
1615  SSE2_SaveShift(2*(n)-2) \
1616  SSE2_FinalSave(2*(n)-1) \
1617  AS1( pop esp)\
1618  E
1619 
1620 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1621 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1622 #define Top_End(n) SSE2_End(TopEpilogue, n)
1623 
1624 #define Squ_Column1(k, i) \
1625  Squ_SSE2_SaveShift(k) \
1626  AS2( add esi, 16) \
1627  SSE2_FirstMultiply(1)\
1628  Squ_Acc##i(i) \
1629  AS2( paddd xmm4, xmm4) \
1630  AS2( paddd xmm5, xmm5) \
1631  AS2( movdqa xmm3, [esi]) \
1632  AS2( movq xmm1, QWORD PTR [esi+8]) \
1633  AS2( pmuludq xmm1, xmm3) \
1634  AS2( pmuludq xmm3, xmm3) \
1635  AS2( movdqa xmm0, [ebx])\
1636  AS2( movdqa xmm2, xmm0) \
1637  AS2( pand xmm0, xmm1) \
1638  AS2( psrld xmm1, 16) \
1639  AS2( paddd xmm6, xmm0) \
1640  AS2( paddd xmm7, xmm1) \
1641  AS2( pand xmm2, xmm3) \
1642  AS2( psrld xmm3, 16) \
1643  AS2( paddd xmm6, xmm6) \
1644  AS2( paddd xmm7, xmm7) \
1645  AS2( paddd xmm4, xmm2) \
1646  AS2( paddd xmm5, xmm3) \
1647  AS2( movq xmm0, QWORD PTR [esp+4])\
1648  AS2( movq xmm1, QWORD PTR [esp+12])\
1649  AS2( paddd xmm4, xmm0)\
1650  AS2( paddd xmm5, xmm1)\
1651 
1652 #define Squ_Column0(k, i) \
1653  Squ_SSE2_SaveShift(k) \
1654  AS2( add edi, 16) \
1655  AS2( add edx, 16) \
1656  SSE2_FirstMultiply(1)\
1657  Squ_Acc##i(i) \
1658  AS2( paddd xmm6, xmm6) \
1659  AS2( paddd xmm7, xmm7) \
1660  AS2( paddd xmm4, xmm4) \
1661  AS2( paddd xmm5, xmm5) \
1662  AS2( movq xmm0, QWORD PTR [esp+4])\
1663  AS2( movq xmm1, QWORD PTR [esp+12])\
1664  AS2( paddd xmm4, xmm0)\
1665  AS2( paddd xmm5, xmm1)\
1666 
1667 #define SSE2_MulAdd45 \
1668  AS2( movdqa xmm7, [esi]) \
1669  AS2( movdqa xmm0, [edi]) \
1670  AS2( pmuludq xmm0, xmm7) \
1671  AS2( movdqa xmm2, [ebx]) \
1672  AS2( pmuludq xmm7, [edx]) \
1673  AS2( movdqa xmm6, xmm2) \
1674  AS2( pand xmm2, xmm0) \
1675  AS2( psrld xmm0, 16) \
1676  AS2( paddd xmm4, xmm2) \
1677  AS2( paddd xmm5, xmm0) \
1678  AS2( pand xmm6, xmm7) \
1679  AS2( psrld xmm7, 16)
1680 
1681 #define Mul_Begin(n) \
1682  MulPrologue \
1683  AS2( mov esi, esp)\
1684  AS2( and esp, 0xfffffff0)\
1685  AS2( sub esp, 48*n+16)\
1686  AS1( push esi)\
1687  AS2( xor edx, edx) \
1688  ASL(1) \
1689  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1690  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1691  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1692  AS2( movdqa [esp+20+2*edx], xmm0) \
1693  AS2( psrlq xmm0, 32) \
1694  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1695  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1696  AS2( psrlq xmm1, 32) \
1697  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1698  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1699  AS2( psrlq xmm2, 32) \
1700  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1701  AS2( add edx, 16) \
1702  AS2( cmp edx, 8*(n)) \
1703  ASJ( jne, 1, b) \
1704  AS2( lea edi, [esp+20])\
1705  AS2( lea edx, [esp+20+16*n])\
1706  AS2( lea esi, [esp+20+32*n])\
1707  SSE2_FirstMultiply(0) \
1708 
1709 #define Mul_Acc(i) \
1710  ASL(LMul##i) \
1711  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1712  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1713  AS2( movdqa xmm2, [ebx]) \
1714  AS2( pmuludq xmm0, xmm1) \
1715  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1716  AS2( movdqa xmm3, xmm2) \
1717  AS2( pand xmm2, xmm0) \
1718  AS2( psrld xmm0, 16) \
1719  AS2( paddd xmm4, xmm2) \
1720  AS2( paddd xmm5, xmm0) \
1721  AS2( pand xmm3, xmm1) \
1722  AS2( psrld xmm1, 16) \
1723  AS2( paddd xmm6, xmm3) \
1724  AS2( paddd xmm7, xmm1) \
1725 
1726 #define Mul_Acc1(i)
1727 #define Mul_Acc2(i) ASC(call, LMul##i)
1728 #define Mul_Acc3(i) Mul_Acc2(i)
1729 #define Mul_Acc4(i) Mul_Acc2(i)
1730 #define Mul_Acc5(i) Mul_Acc2(i)
1731 #define Mul_Acc6(i) Mul_Acc2(i)
1732 #define Mul_Acc7(i) Mul_Acc2(i)
1733 #define Mul_Acc8(i) Mul_Acc2(i)
1734 #define Mul_Acc9(i) Mul_Acc2(i)
1735 #define Mul_Acc10(i) Mul_Acc2(i)
1736 #define Mul_Acc11(i) Mul_Acc2(i)
1737 #define Mul_Acc12(i) Mul_Acc2(i)
1738 #define Mul_Acc13(i) Mul_Acc2(i)
1739 #define Mul_Acc14(i) Mul_Acc2(i)
1740 #define Mul_Acc15(i) Mul_Acc2(i)
1741 #define Mul_Acc16(i) Mul_Acc2(i)
1742 
1743 #define Mul_Column1(k, i) \
1744  SSE2_SaveShift(k) \
1745  AS2( add esi, 16) \
1746  SSE2_MulAdd45\
1747  Mul_Acc##i(i) \
1748 
1749 #define Mul_Column0(k, i) \
1750  SSE2_SaveShift(k) \
1751  AS2( add edi, 16) \
1752  AS2( add edx, 16) \
1753  SSE2_MulAdd45\
1754  Mul_Acc##i(i) \
1755 
1756 #define Bot_Acc(i) \
1757  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1758  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1759  AS2( pmuludq xmm0, xmm1) \
1760  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1761  AS2( paddq xmm4, xmm0) \
1762  AS2( paddd xmm6, xmm1)
1763 
1764 #define Bot_SaveAcc(k) \
1765  SSE2_SaveShift(k) \
1766  AS2( add edi, 16) \
1767  AS2( add edx, 16) \
1768  AS2( movdqa xmm6, [esi]) \
1769  AS2( movdqa xmm0, [edi]) \
1770  AS2( pmuludq xmm0, xmm6) \
1771  AS2( paddq xmm4, xmm0) \
1772  AS2( psllq xmm5, 16) \
1773  AS2( paddq xmm4, xmm5) \
1774  AS2( pmuludq xmm6, [edx])
1775 
1776 #define Bot_End(n) \
1777  AS2( movhlps xmm7, xmm6) \
1778  AS2( paddd xmm6, xmm7) \
1779  AS2( psllq xmm6, 32) \
1780  AS2( paddd xmm4, xmm6) \
1781  AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1782  AS1( pop esp)\
1783  MulEpilogue
1784 
1785 #define Top_Begin(n) \
1786  TopPrologue \
1787  AS2( mov edx, esp)\
1788  AS2( and esp, 0xfffffff0)\
1789  AS2( sub esp, 48*n+16)\
1790  AS1( push edx)\
1791  AS2( xor edx, edx) \
1792  ASL(1) \
1793  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1794  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1795  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1796  AS2( movdqa [esp+20+2*edx], xmm0) \
1797  AS2( psrlq xmm0, 32) \
1798  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1799  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1800  AS2( psrlq xmm1, 32) \
1801  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1802  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1803  AS2( psrlq xmm2, 32) \
1804  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1805  AS2( add edx, 16) \
1806  AS2( cmp edx, 8*(n)) \
1807  ASJ( jne, 1, b) \
1808  AS2( mov eax, esi) \
1809  AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1810  AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1811  AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1812  AS2( pxor xmm4, xmm4)\
1813  AS2( pxor xmm5, xmm5)
1814 
1815 #define Top_Acc(i) \
1816  AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1817  AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1818  AS2( psrlq xmm0, 48) \
1819  AS2( paddd xmm5, xmm0)\
1820 
1821 #define Top_Column0(i) \
1822  AS2( psllq xmm5, 32) \
1823  AS2( add edi, 16) \
1824  AS2( add edx, 16) \
1825  SSE2_MulAdd45\
1826  Mul_Acc##i(i) \
1827 
1828 #define Top_Column1(i) \
1829  SSE2_SaveShift(0) \
1830  AS2( add esi, 16) \
1831  SSE2_MulAdd45\
1832  Mul_Acc##i(i) \
1833  AS2( shr eax, 16) \
1834  AS2( movd xmm0, eax)\
1835  AS2( movd xmm1, [ecx+4])\
1836  AS2( psrld xmm1, 16)\
1837  AS2( pcmpgtd xmm1, xmm0)\
1838  AS2( psrld xmm1, 31)\
1839  AS2( paddd xmm4, xmm1)\
1840 
1841 void SSE2_Square4(word *C, const word *A)
1842 {
1843  Squ_Begin(2)
1844  Squ_Column0(0, 1)
1845  Squ_End(2)
1846 }
1847 
1848 void SSE2_Square8(word *C, const word *A)
1849 {
1850  Squ_Begin(4)
1851 #ifndef __GNUC__
1852  ASJ( jmp, 0, f)
1853  Squ_Acc(2)
1854  AS1( ret) ASL(0)
1855 #endif
1856  Squ_Column0(0, 1)
1857  Squ_Column1(1, 1)
1858  Squ_Column0(2, 2)
1859  Squ_Column1(3, 1)
1860  Squ_Column0(4, 1)
1861  Squ_End(4)
1862 }
1863 
1864 void SSE2_Square16(word *C, const word *A)
1865 {
1866  Squ_Begin(8)
1867 #ifndef __GNUC__
1868  ASJ( jmp, 0, f)
1869  Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1870  AS1( ret) ASL(0)
1871 #endif
1872  Squ_Column0(0, 1)
1873  Squ_Column1(1, 1)
1874  Squ_Column0(2, 2)
1875  Squ_Column1(3, 2)
1876  Squ_Column0(4, 3)
1877  Squ_Column1(5, 3)
1878  Squ_Column0(6, 4)
1879  Squ_Column1(7, 3)
1880  Squ_Column0(8, 3)
1881  Squ_Column1(9, 2)
1882  Squ_Column0(10, 2)
1883  Squ_Column1(11, 1)
1884  Squ_Column0(12, 1)
1885  Squ_End(8)
1886 }
1887 
1888 void SSE2_Square32(word *C, const word *A)
1889 {
1890  Squ_Begin(16)
1891  ASJ( jmp, 0, f)
1892  Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1893  AS1( ret) ASL(0)
1894  Squ_Column0(0, 1)
1895  Squ_Column1(1, 1)
1896  Squ_Column0(2, 2)
1897  Squ_Column1(3, 2)
1898  Squ_Column0(4, 3)
1899  Squ_Column1(5, 3)
1900  Squ_Column0(6, 4)
1901  Squ_Column1(7, 4)
1902  Squ_Column0(8, 5)
1903  Squ_Column1(9, 5)
1904  Squ_Column0(10, 6)
1905  Squ_Column1(11, 6)
1906  Squ_Column0(12, 7)
1907  Squ_Column1(13, 7)
1908  Squ_Column0(14, 8)
1909  Squ_Column1(15, 7)
1910  Squ_Column0(16, 7)
1911  Squ_Column1(17, 6)
1912  Squ_Column0(18, 6)
1913  Squ_Column1(19, 5)
1914  Squ_Column0(20, 5)
1915  Squ_Column1(21, 4)
1916  Squ_Column0(22, 4)
1917  Squ_Column1(23, 3)
1918  Squ_Column0(24, 3)
1919  Squ_Column1(25, 2)
1920  Squ_Column0(26, 2)
1921  Squ_Column1(27, 1)
1922  Squ_Column0(28, 1)
1923  Squ_End(16)
1924 }
1925 
1926 void SSE2_Multiply4(word *C, const word *A, const word *B)
1927 {
1928  Mul_Begin(2)
1929 #ifndef __GNUC__
1930  ASJ( jmp, 0, f)
1931  Mul_Acc(2)
1932  AS1( ret) ASL(0)
1933 #endif
1934  Mul_Column0(0, 2)
1935  Mul_End(2)
1936 }
1937 
1938 void SSE2_Multiply8(word *C, const word *A, const word *B)
1939 {
1940  Mul_Begin(4)
1941 #ifndef __GNUC__
1942  ASJ( jmp, 0, f)
1943  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1944  AS1( ret) ASL(0)
1945 #endif
1946  Mul_Column0(0, 2)
1947  Mul_Column1(1, 3)
1948  Mul_Column0(2, 4)
1949  Mul_Column1(3, 3)
1950  Mul_Column0(4, 2)
1951  Mul_End(4)
1952 }
1953 
1954 void SSE2_Multiply16(word *C, const word *A, const word *B)
1955 {
1956  Mul_Begin(8)
1957 #ifndef __GNUC__
1958  ASJ( jmp, 0, f)
1959  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1960  AS1( ret) ASL(0)
1961 #endif
1962  Mul_Column0(0, 2)
1963  Mul_Column1(1, 3)
1964  Mul_Column0(2, 4)
1965  Mul_Column1(3, 5)
1966  Mul_Column0(4, 6)
1967  Mul_Column1(5, 7)
1968  Mul_Column0(6, 8)
1969  Mul_Column1(7, 7)
1970  Mul_Column0(8, 6)
1971  Mul_Column1(9, 5)
1972  Mul_Column0(10, 4)
1973  Mul_Column1(11, 3)
1974  Mul_Column0(12, 2)
1975  Mul_End(8)
1976 }
1977 
1978 void SSE2_Multiply32(word *C, const word *A, const word *B)
1979 {
1980  Mul_Begin(16)
1981  ASJ( jmp, 0, f)
1982  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1983  AS1( ret) ASL(0)
1984  Mul_Column0(0, 2)
1985  Mul_Column1(1, 3)
1986  Mul_Column0(2, 4)
1987  Mul_Column1(3, 5)
1988  Mul_Column0(4, 6)
1989  Mul_Column1(5, 7)
1990  Mul_Column0(6, 8)
1991  Mul_Column1(7, 9)
1992  Mul_Column0(8, 10)
1993  Mul_Column1(9, 11)
1994  Mul_Column0(10, 12)
1995  Mul_Column1(11, 13)
1996  Mul_Column0(12, 14)
1997  Mul_Column1(13, 15)
1998  Mul_Column0(14, 16)
1999  Mul_Column1(15, 15)
2000  Mul_Column0(16, 14)
2001  Mul_Column1(17, 13)
2002  Mul_Column0(18, 12)
2003  Mul_Column1(19, 11)
2004  Mul_Column0(20, 10)
2005  Mul_Column1(21, 9)
2006  Mul_Column0(22, 8)
2007  Mul_Column1(23, 7)
2008  Mul_Column0(24, 6)
2009  Mul_Column1(25, 5)
2010  Mul_Column0(26, 4)
2011  Mul_Column1(27, 3)
2012  Mul_Column0(28, 2)
2013  Mul_End(16)
2014 }
2015 
2016 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
2017 {
2018  Mul_Begin(2)
2019  Bot_SaveAcc(0) Bot_Acc(2)
2020  Bot_End(2)
2021 }
2022 
2023 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
2024 {
2025  Mul_Begin(4)
2026 #ifndef __GNUC__
2027  ASJ( jmp, 0, f)
2028  Mul_Acc(3) Mul_Acc(2)
2029  AS1( ret) ASL(0)
2030 #endif
2031  Mul_Column0(0, 2)
2032  Mul_Column1(1, 3)
2033  Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2034  Bot_End(4)
2035 }
2036 
2037 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
2038 {
2039  Mul_Begin(8)
2040 #ifndef __GNUC__
2041  ASJ( jmp, 0, f)
2042  Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2043  AS1( ret) ASL(0)
2044 #endif
2045  Mul_Column0(0, 2)
2046  Mul_Column1(1, 3)
2047  Mul_Column0(2, 4)
2048  Mul_Column1(3, 5)
2049  Mul_Column0(4, 6)
2050  Mul_Column1(5, 7)
2051  Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2052  Bot_End(8)
2053 }
2054 
2055 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
2056 {
2057  Mul_Begin(16)
2058 #ifndef __GNUC__
2059  ASJ( jmp, 0, f)
2060  Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2061  AS1( ret) ASL(0)
2062 #endif
2063  Mul_Column0(0, 2)
2064  Mul_Column1(1, 3)
2065  Mul_Column0(2, 4)
2066  Mul_Column1(3, 5)
2067  Mul_Column0(4, 6)
2068  Mul_Column1(5, 7)
2069  Mul_Column0(6, 8)
2070  Mul_Column1(7, 9)
2071  Mul_Column0(8, 10)
2072  Mul_Column1(9, 11)
2073  Mul_Column0(10, 12)
2074  Mul_Column1(11, 13)
2075  Mul_Column0(12, 14)
2076  Mul_Column1(13, 15)
2077  Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2078  Bot_End(16)
2079 }
2080 
2081 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
2082 {
2083  Top_Begin(4)
2084  Top_Acc(3) Top_Acc(2) Top_Acc(1)
2085 #ifndef __GNUC__
2086  ASJ( jmp, 0, f)
2087  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2088  AS1( ret) ASL(0)
2089 #endif
2090  Top_Column0(4)
2091  Top_Column1(3)
2092  Mul_Column0(0, 2)
2093  Top_End(2)
2094 }
2095 
2096 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
2097 {
2098  Top_Begin(8)
2099  Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2100 #ifndef __GNUC__
2101  ASJ( jmp, 0, f)
2102  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2103  AS1( ret) ASL(0)
2104 #endif
2105  Top_Column0(8)
2106  Top_Column1(7)
2107  Mul_Column0(0, 6)
2108  Mul_Column1(1, 5)
2109  Mul_Column0(2, 4)
2110  Mul_Column1(3, 3)
2111  Mul_Column0(4, 2)
2112  Top_End(4)
2113 }
2114 
2115 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2116 {
2117  Top_Begin(16)
2118  Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2119 #ifndef __GNUC__
2120  ASJ( jmp, 0, f)
2121  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2122  AS1( ret) ASL(0)
2123 #endif
2124  Top_Column0(16)
2125  Top_Column1(15)
2126  Mul_Column0(0, 14)
2127  Mul_Column1(1, 13)
2128  Mul_Column0(2, 12)
2129  Mul_Column1(3, 11)
2130  Mul_Column0(4, 10)
2131  Mul_Column1(5, 9)
2132  Mul_Column0(6, 8)
2133  Mul_Column1(7, 7)
2134  Mul_Column0(8, 6)
2135  Mul_Column1(9, 5)
2136  Mul_Column0(10, 4)
2137  Mul_Column1(11, 3)
2138  Mul_Column0(12, 2)
2139  Top_End(8)
2140 }
2141 
2142 #endif // #if CRYPTOPP_INTEGER_SSE2
2143 
2144 // ********************************************************
2145 
2146 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
2147 typedef void (* PMul)(word *C, const word *A, const word *B);
2148 typedef void (* PSqu)(word *C, const word *A);
2149 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
2150 
2151 #if CRYPTOPP_INTEGER_SSE2
2152 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2153 static size_t s_recursionLimit = 8;
2154 #else
2155 static const size_t s_recursionLimit = 16;
2156 #endif // CRYPTOPP_INTEGER_SSE2
2157 
2158 static PMul s_pMul[9], s_pBot[9];
2159 static PSqu s_pSqu[9];
2160 static PMulTop s_pTop[9];
2161 
2162 void SetFunctionPointers()
2163 {
2164  s_pMul[0] = &Baseline_Multiply2;
2165  s_pBot[0] = &Baseline_MultiplyBottom2;
2166  s_pSqu[0] = &Baseline_Square2;
2167  s_pTop[0] = &Baseline_MultiplyTop2;
2168  s_pTop[1] = &Baseline_MultiplyTop4;
2169 
2170 #if CRYPTOPP_INTEGER_SSE2
2171  if (HasSSE2())
2172  {
2173  if (IsP4())
2174  {
2175  s_pAdd = &SSE2_Add;
2176  s_pSub = &SSE2_Sub;
2177  }
2178 
2179  s_recursionLimit = 32;
2180 
2181  s_pMul[1] = &SSE2_Multiply4;
2182  s_pMul[2] = &SSE2_Multiply8;
2183  s_pMul[4] = &SSE2_Multiply16;
2184  s_pMul[8] = &SSE2_Multiply32;
2185 
2186  s_pBot[1] = &SSE2_MultiplyBottom4;
2187  s_pBot[2] = &SSE2_MultiplyBottom8;
2188  s_pBot[4] = &SSE2_MultiplyBottom16;
2189  s_pBot[8] = &SSE2_MultiplyBottom32;
2190 
2191  s_pSqu[1] = &SSE2_Square4;
2192  s_pSqu[2] = &SSE2_Square8;
2193  s_pSqu[4] = &SSE2_Square16;
2194  s_pSqu[8] = &SSE2_Square32;
2195 
2196  s_pTop[2] = &SSE2_MultiplyTop8;
2197  s_pTop[4] = &SSE2_MultiplyTop16;
2198  s_pTop[8] = &SSE2_MultiplyTop32;
2199  }
2200  else
2201 #endif // CRYPTOPP_INTEGER_SSE2
2202  {
2203  s_pMul[1] = &Baseline_Multiply4;
2204  s_pMul[2] = &Baseline_Multiply8;
2205 
2206  s_pBot[1] = &Baseline_MultiplyBottom4;
2207  s_pBot[2] = &Baseline_MultiplyBottom8;
2208 
2209  s_pSqu[1] = &Baseline_Square4;
2210  s_pSqu[2] = &Baseline_Square8;
2211 
2212  s_pTop[2] = &Baseline_MultiplyTop8;
2213 
2214 #if !CRYPTOPP_INTEGER_SSE2
2215  s_pMul[4] = &Baseline_Multiply16;
2216  s_pBot[4] = &Baseline_MultiplyBottom16;
2217  s_pSqu[4] = &Baseline_Square16;
2218  s_pTop[4] = &Baseline_MultiplyTop16;
2219 #endif // !CRYPTOPP_INTEGER_SSE2
2220  }
2221 }
2222 
2223 inline int Add(word *C, const word *A, const word *B, size_t N)
2224 {
2225 #if CRYPTOPP_INTEGER_SSE2
2226  return s_pAdd(N, C, A, B);
2227 #else
2228  return Baseline_Add(N, C, A, B);
2229 #endif // CRYPTOPP_INTEGER_SSE2
2230 }
2231 
2232 inline int Subtract(word *C, const word *A, const word *B, size_t N)
2233 {
2234 #if CRYPTOPP_INTEGER_SSE2
2235  return s_pSub(N, C, A, B);
2236 #else
2237  return Baseline_Sub(N, C, A, B);
2238 #endif // CRYPTOPP_INTEGER_SSE2
2239 }
2240 
2241 // ********************************************************
2242 
2243 
2244 #define A0 A
2245 #define A1 (A+N2)
2246 #define B0 B
2247 #define B1 (B+N2)
2248 
2249 #define T0 T
2250 #define T1 (T+N2)
2251 #define T2 (T+N)
2252 #define T3 (T+N+N2)
2253 
2254 #define R0 R
2255 #define R1 (R+N2)
2256 #define R2 (R+N)
2257 #define R3 (R+N+N2)
2258 
2259 // R[2*N] - result = A*B
2260 // T[2*N] - temporary work space
2261 // A[N] --- multiplier
2262 // B[N] --- multiplicant
2263 
2264 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2265 {
2266  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2267 
2268  if (N <= s_recursionLimit)
2269  s_pMul[N/4](R, A, B);
2270  else
2271  {
2272  const size_t N2 = N/2;
2273 
2274  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2275  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2276 
2277  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2278  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2279 
2280  RecursiveMultiply(R2, T2, A1, B1, N2);
2281  RecursiveMultiply(T0, T2, R0, R1, N2);
2282  RecursiveMultiply(R0, T2, A0, B0, N2);
2283 
2284  // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2285 
2286  int c2 = Add(R2, R2, R1, N2);
2287  int c3 = c2;
2288  c2 += Add(R1, R2, R0, N2);
2289  c3 += Add(R2, R2, R3, N2);
2290 
2291  if (AN2 == BN2)
2292  c3 -= Subtract(R1, R1, T0, N);
2293  else
2294  c3 += Add(R1, R1, T0, N);
2295 
2296  c3 += Increment(R2, N2, c2);
2297  CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2298  Increment(R3, N2, c3);
2299  }
2300 }
2301 
2302 // R[2*N] - result = A*A
2303 // T[2*N] - temporary work space
2304 // A[N] --- number to be squared
2305 
2306 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2307 {
2308  CRYPTOPP_ASSERT(N && N%2==0);
2309 
2310  if (N <= s_recursionLimit)
2311  s_pSqu[N/4](R, A);
2312  else
2313  {
2314  const size_t N2 = N/2;
2315 
2316  RecursiveSquare(R0, T2, A0, N2);
2317  RecursiveSquare(R2, T2, A1, N2);
2318  RecursiveMultiply(T0, T2, A0, A1, N2);
2319 
2320  int carry = Add(R1, R1, T0, N);
2321  carry += Add(R1, R1, T0, N);
2322  Increment(R3, N2, carry);
2323  }
2324 }
2325 
2326 // R[N] - bottom half of A*B
2327 // T[3*N/2] - temporary work space
2328 // A[N] - multiplier
2329 // B[N] - multiplicant
2330 
2331 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2332 {
2333  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2334 
2335  if (N <= s_recursionLimit)
2336  s_pBot[N/4](R, A, B);
2337  else
2338  {
2339  const size_t N2 = N/2;
2340 
2341  RecursiveMultiply(R, T, A0, B0, N2);
2342  RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2343  Add(R1, R1, T0, N2);
2344  RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2345  Add(R1, R1, T0, N2);
2346  }
2347 }
2348 
2349 // R[N] --- upper half of A*B
2350 // T[2*N] - temporary work space
2351 // L[N] --- lower half of A*B
2352 // A[N] --- multiplier
2353 // B[N] --- multiplicant
2354 
2355 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2356 {
2357  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2358 
2359  if (N <= s_recursionLimit)
2360  s_pTop[N/4](R, A, B, L[N-1]);
2361  else
2362  {
2363  const size_t N2 = N/2;
2364 
2365  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2366  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2367 
2368  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2369  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2370 
2371  RecursiveMultiply(T0, T2, R0, R1, N2);
2372  RecursiveMultiply(R0, T2, A1, B1, N2);
2373 
2374  // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2375 
2376  int t, c3;
2377  int c2 = Subtract(T2, L+N2, L, N2);
2378 
2379  if (AN2 == BN2)
2380  {
2381  c2 -= Add(T2, T2, T0, N2);
2382  t = (Compare(T2, R0, N2) == -1);
2383  c3 = t - Subtract(T2, T2, T1, N2);
2384  }
2385  else
2386  {
2387  c2 += Subtract(T2, T2, T0, N2);
2388  t = (Compare(T2, R0, N2) == -1);
2389  c3 = t + Add(T2, T2, T1, N2);
2390  }
2391 
2392  c2 += t;
2393  if (c2 >= 0)
2394  c3 += Increment(T2, N2, c2);
2395  else
2396  c3 -= Decrement(T2, N2, -c2);
2397  c3 += Add(R0, T2, R1, N2);
2398 
2399  CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2400  Increment(R1, N2, c3);
2401  }
2402 }
2403 
2404 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2405 {
2406  RecursiveMultiply(R, T, A, B, N);
2407 }
2408 
2409 inline void Square(word *R, word *T, const word *A, size_t N)
2410 {
2411  RecursiveSquare(R, T, A, N);
2412 }
2413 
2414 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2415 {
2416  RecursiveMultiplyBottom(R, T, A, B, N);
2417 }
2418 
2419 // R[NA+NB] - result = A*B
2420 // T[NA+NB] - temporary work space
2421 // A[NA] ---- multiplier
2422 // B[NB] ---- multiplicant
2423 
2424 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2425 {
2426  if (NA == NB)
2427  {
2428  // Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
2429  // The code change occurred at Commit dc99266599a0e72d.
2430  if (A != B)
2431  Multiply(R, T, A, B, NA);
2432  else
2433  Square(R, T, A, NA);
2434 
2435  return;
2436  }
2437 
2438  if (NA > NB)
2439  {
2440  std::swap(A, B);
2441  std::swap(NA, NB);
2442  }
2443 
2444  CRYPTOPP_ASSERT(NB % NA == 0);
2445 
2446  if (NA==2 && !A[1])
2447  {
2448  // Profiling tells us the original Default case was dominant, so it was promoted to the first Case statement.
2449  // The code change occurred at Commit dc99266599a0e72d.
2450  switch (A[0])
2451  {
2452  default:
2453  R[NB] = LinearMultiply(R, B, A[0], NB);
2454  R[NB+1] = 0;
2455  return;
2456  case 0:
2457  SetWords(R, 0, NB+2);
2458  return;
2459  case 1:
2460  CopyWords(R, B, NB);
2461  R[NB] = R[NB+1] = 0;
2462  return;
2463  }
2464  }
2465 
2466  size_t i;
2467  if ((NB/NA)%2 == 0)
2468  {
2469  Multiply(R, T, A, B, NA);
2470  CopyWords(T+2*NA, R+NA, NA);
2471 
2472  for (i=2*NA; i<NB; i+=2*NA)
2473  Multiply(T+NA+i, T, A, B+i, NA);
2474  for (i=NA; i<NB; i+=2*NA)
2475  Multiply(R+i, T, A, B+i, NA);
2476  }
2477  else
2478  {
2479  for (i=0; i<NB; i+=2*NA)
2480  Multiply(R+i, T, A, B+i, NA);
2481  for (i=NA; i<NB; i+=2*NA)
2482  Multiply(T+NA+i, T, A, B+i, NA);
2483  }
2484 
2485  if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2486  Increment(R+NB, NA);
2487 }
2488 
2489 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2490 // T[3*N/2] - temporary work space
2491 // A[N] ----- an odd number as input
2492 
2493 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2494 {
2495  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
2496  // The code change occurred at Commit dc99266599a0e72d.
2497  if (N!=2)
2498  {
2499  const size_t N2 = N/2;
2500  RecursiveInverseModPower2(R0, T0, A0, N2);
2501  T0[0] = 1;
2502  SetWords(T0+1, 0, N2-1);
2503  MultiplyTop(R1, T1, T0, R0, A0, N2);
2504  MultiplyBottom(T0, T1, R0, A1, N2);
2505  Add(T0, R1, T0, N2);
2506  TwosComplement(T0, N2);
2507  MultiplyBottom(R1, T1, R0, T0, N2);
2508  }
2509  else
2510  {
2511  T[0] = AtomicInverseModPower2(A[0]);
2512  T[1] = 0;
2513  s_pBot[0](T+2, T, A);
2514  TwosComplement(T+2, 2);
2515  Increment(T+2, 2, 2);
2516  s_pBot[0](R, T, T+2);
2517  }
2518 }
2519 
2520 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2521 // T[3*N] - temporary work space
2522 // X[2*N] - number to be reduced
2523 // M[N] --- modulus
2524 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2525 
2526 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2527 {
2528 #if 1
2529  MultiplyBottom(R, T, X, U, N);
2530  MultiplyTop(T, T+N, X, R, M, N);
2531  word borrow = Subtract(T, X+N, T, N);
2532  // defend against timing attack by doing this Add even when not needed
2533  word carry = Add(T+N, T, M, N);
2534  CRYPTOPP_ASSERT(carry | !borrow);
2535  CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2536  CopyWords(R, T + ((0-borrow) & N), N);
2537 #elif 0
2538  const word u = 0-U[0];
2539  Declare2Words(p)
2540  for (size_t i=0; i<N; i++)
2541  {
2542  const word t = u * X[i];
2543  word c = 0;
2544  for (size_t j=0; j<N; j+=2)
2545  {
2546  MultiplyWords(p, t, M[j]);
2547  Acc2WordsBy1(p, X[i+j]);
2548  Acc2WordsBy1(p, c);
2549  X[i+j] = LowWord(p);
2550  c = HighWord(p);
2551  MultiplyWords(p, t, M[j+1]);
2552  Acc2WordsBy1(p, X[i+j+1]);
2553  Acc2WordsBy1(p, c);
2554  X[i+j+1] = LowWord(p);
2555  c = HighWord(p);
2556  }
2557 
2558  if (Increment(X+N+i, N-i, c))
2559  while (!Subtract(X+N, X+N, M, N)) {}
2560  }
2561 
2562  memcpy(R, X+N, N*WORD_SIZE);
2563 #else
2564  __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2565  for (size_t i=0; i<N; i++)
2566  {
2567  __m64 t = _mm_cvtsi32_si64(X[i]);
2568  t = _mm_mul_su32(t, u);
2569  __m64 c = _mm_setzero_si64();
2570  for (size_t j=0; j<N; j+=2)
2571  {
2572  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2573  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2574  c = _mm_add_si64(c, p);
2575  X[i+j] = _mm_cvtsi64_si32(c);
2576  c = _mm_srli_si64(c, 32);
2577  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2578  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2579  c = _mm_add_si64(c, p);
2580  X[i+j+1] = _mm_cvtsi64_si32(c);
2581  c = _mm_srli_si64(c, 32);
2582  }
2583 
2584  if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2585  while (!Subtract(X+N, X+N, M, N)) {}
2586  }
2587 
2588  memcpy(R, X+N, N*WORD_SIZE);
2589  _mm_empty();
2590 #endif
2591 }
2592 
2593 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2594 // T[2*N] - temporary work space
2595 // X[2*N] - number to be reduced
2596 // M[N] --- modulus
2597 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2598 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
2599 
2600 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2601 {
2602  CRYPTOPP_ASSERT(N%2==0 && N>=4);
2603 
2604 #define M0 M
2605 #define M1 (M+N2)
2606 #define V0 V
2607 #define V1 (V+N2)
2608 
2609 #define X0 X
2610 #define X1 (X+N2)
2611 #define X2 (X+N)
2612 #define X3 (X+N+N2)
2613 
2614  const size_t N2 = N/2;
2615  Multiply(T0, T2, V0, X3, N2);
2616  int c2 = Add(T0, T0, X0, N);
2617  MultiplyBottom(T3, T2, T0, U, N2);
2618  MultiplyTop(T2, R, T0, T3, M0, N2);
2619  c2 -= Subtract(T2, T1, T2, N2);
2620  Multiply(T0, R, T3, M1, N2);
2621  c2 -= Subtract(T0, T2, T0, N2);
2622  int c3 = -(int)Subtract(T1, X2, T1, N2);
2623  Multiply(R0, T2, V1, X3, N2);
2624  c3 += Add(R, R, T, N);
2625 
2626  if (c2>0)
2627  c3 += Increment(R1, N2);
2628  else if (c2<0)
2629  c3 -= Decrement(R1, N2, -c2);
2630 
2631  CRYPTOPP_ASSERT(c3>=-1 && c3<=1);
2632  if (c3>0)
2633  Subtract(R, R, M, N);
2634  else if (c3<0)
2635  Add(R, R, M, N);
2636 
2637 #undef M0
2638 #undef M1
2639 #undef V0
2640 #undef V1
2641 
2642 #undef X0
2643 #undef X1
2644 #undef X2
2645 #undef X3
2646 }
2647 
2648 #undef A0
2649 #undef A1
2650 #undef B0
2651 #undef B1
2652 
2653 #undef T0
2654 #undef T1
2655 #undef T2
2656 #undef T3
2657 
2658 #undef R0
2659 #undef R1
2660 #undef R2
2661 #undef R3
2662 
2663 /*
2664 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2665 static word SubatomicDivide(word *A, word B0, word B1)
2666 {
2667  // Assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2668  CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2669 
2670  // estimate the quotient: do a 2 word by 1 word divide
2671  word Q;
2672  if (B1+1 == 0)
2673  Q = A[2];
2674  else
2675  Q = DWord(A[1], A[2]).DividedBy(B1+1);
2676 
2677  // now subtract Q*B from A
2678  DWord p = DWord::Multiply(B0, Q);
2679  DWord u = (DWord) A[0] - p.GetLowHalf();
2680  A[0] = u.GetLowHalf();
2681  u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2682  A[1] = u.GetLowHalf();
2683  A[2] += u.GetHighHalf();
2684 
2685  // Q <= actual quotient, so fix it
2686  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2687  {
2688  u = (DWord) A[0] - B0;
2689  A[0] = u.GetLowHalf();
2690  u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2691  A[1] = u.GetLowHalf();
2692  A[2] += u.GetHighHalf();
2693  Q++;
2694  CRYPTOPP_ASSERT(Q); // shouldn't overflow
2695  }
2696 
2697  return Q;
2698 }
2699 
2700 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2701 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2702 {
2703  if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2704  {
2705  Q[0] = A[2];
2706  Q[1] = A[3];
2707  }
2708  else
2709  {
2710  word T[4];
2711  T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2712  Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2713  Q[0] = SubatomicDivide(T, B[0], B[1]);
2714 
2715 #if defined(CRYPTOPP_DEBUG)
2716  // multiply quotient and divisor and add remainder, make sure it equals dividend
2717  CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2718  word P[4];
2719  LowLevel::Multiply2(P, Q, B);
2720  Add(P, P, T, 4);
2721  CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2722 #endif
2723  }
2724 }
2725 */
2726 
2727 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2728 {
2729  word T[4];
2730  DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2731  Q[0] = q.GetLowHalf();
2732  Q[1] = q.GetHighHalf();
2733 
2734 #if defined(CRYPTOPP_DEBUG)
2735  if (B[0] || B[1])
2736  {
2737  // multiply quotient and divisor and add remainder, make sure it equals dividend
2738  CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2739  word P[4];
2740  s_pMul[0](P, Q, B);
2741  Add(P, P, T, 4);
2742  CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2743  }
2744 #endif
2745 }
2746 
2747 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2748 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2749 {
2750  CRYPTOPP_ASSERT(N && N%2==0);
2751 
2752  AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2753 
2754  word borrow = Subtract(R, R, T, N+2);
2755  CRYPTOPP_ASSERT(!borrow && !R[N+1]);
2756  CRYPTOPP_UNUSED(borrow);
2757 
2758  while (R[N] || Compare(R, B, N) >= 0)
2759  {
2760  R[N] -= Subtract(R, R, B, N);
2761  Q[1] += (++Q[0]==0);
2762  CRYPTOPP_ASSERT(Q[0] || Q[1]); // no overflow
2763  }
2764 }
2765 
2766 // R[NB] -------- remainder = A%B
2767 // Q[NA-NB+2] --- quotient = A/B
2768 // T[NA+3*(NB+2)] - temp work space
2769 // A[NA] -------- dividend
2770 // B[NB] -------- divisor
2771 
2772 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2773 {
2774  CRYPTOPP_ASSERT(NA && NB && NA%2==0 && NB%2==0);
2775  CRYPTOPP_ASSERT(B[NB-1] || B[NB-2]);
2776  CRYPTOPP_ASSERT(NB <= NA);
2777 
2778  // set up temporary work space
2779  word *const TA=T;
2780  word *const TB=T+NA+2;
2781  word *const TP=T+NA+2+NB;
2782 
2783  // copy B into TB and normalize it so that TB has highest bit set to 1
2784  unsigned shiftWords = (B[NB-1]==0);
2785  TB[0] = TB[NB-1] = 0;
2786  CopyWords(TB+shiftWords, B, NB-shiftWords);
2787  unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2788  CRYPTOPP_ASSERT(shiftBits < WORD_BITS);
2789  ShiftWordsLeftByBits(TB, NB, shiftBits);
2790 
2791  // copy A into TA and normalize it
2792  TA[0] = TA[NA] = TA[NA+1] = 0;
2793  CopyWords(TA+shiftWords, A, NA);
2794  ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2795 
2796  if (TA[NA+1]==0 && TA[NA] <= 1)
2797  {
2798  Q[NA-NB+1] = Q[NA-NB] = 0;
2799  while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2800  {
2801  TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2802  ++Q[NA-NB];
2803  }
2804  }
2805  else
2806  {
2807  NA+=2;
2808  CRYPTOPP_ASSERT(Compare(TA+NA-NB, TB, NB) < 0);
2809  }
2810 
2811  word BT[2];
2812  BT[0] = TB[NB-2] + 1;
2813  BT[1] = TB[NB-1] + (BT[0]==0);
2814 
2815  // start reducing TA mod TB, 2 words at a time
2816  for (size_t i=NA-2; i>=NB; i-=2)
2817  {
2818  AtomicDivide(Q+i-NB, TA+i-2, BT);
2819  CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2820  }
2821 
2822  // copy TA into R, and denormalize it
2823  CopyWords(R, TA+shiftWords, NB);
2824  ShiftWordsRightByBits(R, NB, shiftBits);
2825 }
2826 
2827 static inline size_t EvenWordCount(const word *X, size_t N)
2828 {
2829  while (N && X[N-2]==0 && X[N-1]==0)
2830  N-=2;
2831  return N;
2832 }
2833 
2834 // return k
2835 // R[N] --- result = A^(-1) * 2^k mod M
2836 // T[4*N] - temporary work space
2837 // A[NA] -- number to take inverse of
2838 // M[N] --- modulus
2839 
2840 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2841 {
2842  CRYPTOPP_ASSERT(NA<=N && N && N%2==0);
2843 
2844  word *b = T;
2845  word *c = T+N;
2846  word *f = T+2*N;
2847  word *g = T+3*N;
2848  size_t bcLen=2, fgLen=EvenWordCount(M, N);
2849  unsigned int k=0;
2850  bool s=false;
2851 
2852  SetWords(T, 0, 3*N);
2853  b[0]=1;
2854  CopyWords(f, A, NA);
2855  CopyWords(g, M, N);
2856 
2857  while (1)
2858  {
2859  word t=f[0];
2860  while (!t)
2861  {
2862  if (EvenWordCount(f, fgLen)==0)
2863  {
2864  SetWords(R, 0, N);
2865  return 0;
2866  }
2867 
2868  ShiftWordsRightByWords(f, fgLen, 1);
2869  bcLen += 2 * (c[bcLen-1] != 0);
2870  CRYPTOPP_ASSERT(bcLen <= N);
2871  ShiftWordsLeftByWords(c, bcLen, 1);
2872  k+=WORD_BITS;
2873  t=f[0];
2874  }
2875 
2876  // t must be non-0; otherwise undefined.
2877  unsigned int i = TrailingZeros(t);
2878  t >>= i;
2879  k += i;
2880 
2881  if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2882  {
2883  if (s)
2884  Subtract(R, M, b, N);
2885  else
2886  CopyWords(R, b, N);
2887  return k;
2888  }
2889 
2890  ShiftWordsRightByBits(f, fgLen, i);
2891  t = ShiftWordsLeftByBits(c, bcLen, i);
2892  c[bcLen] += t;
2893  bcLen += 2 * (t!=0);
2894  CRYPTOPP_ASSERT(bcLen <= N);
2895 
2896  bool swap = Compare(f, g, fgLen)==-1;
2897  ConditionalSwapPointers(swap, f, g);
2898  ConditionalSwapPointers(swap, b, c);
2899  s ^= swap;
2900 
2901  fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2902 
2903  Subtract(f, f, g, fgLen);
2904  t = Add(b, b, c, bcLen);
2905  b[bcLen] += t;
2906  bcLen += 2*t;
2907  CRYPTOPP_ASSERT(bcLen <= N);
2908  }
2909 }
2910 
2911 // R[N] - result = A/(2^k) mod M
2912 // A[N] - input
2913 // M[N] - modulus
2914 
2915 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2916 {
2917  CopyWords(R, A, N);
2918 
2919  while (k--)
2920  {
2921  if (R[0]%2==0)
2922  ShiftWordsRightByBits(R, N, 1);
2923  else
2924  {
2925  word carry = Add(R, R, M, N);
2926  ShiftWordsRightByBits(R, N, 1);
2927  R[N-1] += carry<<(WORD_BITS-1);
2928  }
2929  }
2930 }
2931 
2932 // R[N] - result = A*(2^k) mod M
2933 // A[N] - input
2934 // M[N] - modulus
2935 
2936 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2937 {
2938  CopyWords(R, A, N);
2939 
2940  while (k--)
2941  if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2942  Subtract(R, R, M, N);
2943 }
2944 
2945 // ******************************************************************
2946 
2947 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2948 
2949 static inline size_t RoundupSize(size_t n)
2950 {
2951  if (n<=8)
2952  return RoundupSizeTable[n];
2953  else if (n<=16)
2954  return 16;
2955  else if (n<=32)
2956  return 32;
2957  else if (n<=64)
2958  return 64;
2959  else
2960  return size_t(1) << BitPrecision(n-1);
2961 }
2962 
2964  : reg(2), sign(POSITIVE)
2965 {
2966  reg[0] = reg[1] = 0;
2967 }
2968 
2970  : reg(RoundupSize(t.WordCount())), sign(t.sign)
2971 {
2972  CopyWords(reg, t.reg, reg.size());
2973 }
2974 
2975 Integer::Integer(Sign s, lword value)
2976  : reg(2), sign(s)
2977 {
2978  reg[0] = word(value);
2979  reg[1] = word(SafeRightShift<WORD_BITS>(value));
2980 }
2981 
2982 Integer::Integer(signed long value)
2983  : reg(2)
2984 {
2985  if (value >= 0)
2986  sign = POSITIVE;
2987  else
2988  {
2989  sign = NEGATIVE;
2990  value = -value;
2991  }
2992  reg[0] = word(value);
2993  reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2994 }
2995 
2996 Integer::Integer(Sign s, word high, word low)
2997  : reg(2), sign(s)
2998 {
2999  reg[0] = low;
3000  reg[1] = high;
3001 }
3002 
3004 {
3005  if (ByteCount() > sizeof(long))
3006  return false;
3007 
3008  unsigned long value = (unsigned long)reg[0];
3009  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
3010 
3011  if (sign==POSITIVE)
3012  return (signed long)value >= 0;
3013  else
3014  return -(signed long)value < 0;
3015 }
3016 
3017 signed long Integer::ConvertToLong() const
3018 {
3020 
3021  unsigned long value = (unsigned long)reg[0];
3022  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
3023  return sign==POSITIVE ? value : -(signed long)value;
3024 }
3025 
3026 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
3027 {
3029 
3030  if (o == LITTLE_ENDIAN_ORDER)
3031  {
3032  SecByteBlock block(byteCount);
3033  encodedInteger.Get(block, block.size());
3034  std::reverse(block.begin(), block.begin()+block.size());
3035 
3036  Decode(block.begin(), block.size(), s);
3037  return;
3038  }
3039 
3040  Decode(encodedInteger, byteCount, s);
3041 }
3042 
3043 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
3044 {
3045  CRYPTOPP_ASSERT(encodedInteger && byteCount); // NULL buffer
3047 
3048  if (o == LITTLE_ENDIAN_ORDER)
3049  {
3050  SecByteBlock block(byteCount);
3051 #if (_MSC_FULL_VER >= 140050727)
3052  std::reverse_copy(encodedInteger, encodedInteger+byteCount,
3053  stdext::make_checked_array_iterator(block.begin(), block.size()));
3054 #else
3055  std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
3056 #endif
3057  Decode(block.begin(), block.size(), s);
3058  return;
3059  }
3060 
3061  Decode(encodedInteger, byteCount, s);
3062 }
3063 
3065 {
3066  BERDecode(bt);
3067 }
3068 
3070 {
3071  Randomize(rng, bitcount);
3072 }
3073 
3074 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3075 {
3076  if (!Randomize(rng, min, max, rnType, equiv, mod))
3078 }
3079 
3081 {
3082  Integer r((word)0, BitsToWords(e+1));
3083  r.SetBit(e);
3084  return r;
3085 }
3086 
3088 {
3089 #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY)
3090  return s_zero;
3091 #else
3092  return Singleton<Integer, NewInteger<0L> >().Ref();
3093 #endif
3094 }
3095 
3097 {
3098 #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY)
3099  return s_one;
3100 #else
3101  return Singleton<Integer, NewInteger<1L> >().Ref();
3102 #endif
3103 }
3104 
3106 {
3107 #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY)
3108  return s_two;
3109 #else
3110  return Singleton<Integer, NewInteger<2L> >().Ref();
3111 #endif
3112 }
3113 
3115 {
3116  return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
3117 }
3118 
3120 {
3121  if (this != &t)
3122  {
3123  if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
3124  reg.New(RoundupSize(t.WordCount()));
3125  CopyWords(reg, t.reg, reg.size());
3126  sign = t.sign;
3127  }
3128  return *this;
3129 }
3130 
3131 bool Integer::GetBit(size_t n) const
3132 {
3133  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
3134  // The code change occurred at Commit dc99266599a0e72d.
3135  if (n/WORD_BITS < reg.size())
3136  return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3137  else
3138  return 0;
3139 }
3140 
3141 void Integer::SetBit(size_t n, bool value)
3142 {
3143  if (value)
3144  {
3145  reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
3146  reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3147  }
3148  else
3149  {
3150  if (n/WORD_BITS < reg.size())
3151  reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3152  }
3153 }
3154 
3155 byte Integer::GetByte(size_t n) const
3156 {
3157  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
3158  // The code change occurred at Commit dc99266599a0e72d.
3159  if (n/WORD_SIZE < reg.size())
3160  return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3161  else
3162  return 0;
3163 }
3164 
3165 void Integer::SetByte(size_t n, byte value)
3166 {
3167  reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3168  reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3169  reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3170 }
3171 
3172 lword Integer::GetBits(size_t i, size_t n) const
3173 {
3174  lword v = 0;
3175  CRYPTOPP_ASSERT(n <= sizeof(v)*8);
3176  for (unsigned int j=0; j<n; j++)
3177  v |= lword(GetBit(i+j)) << j;
3178  return v;
3179 }
3180 
3182 {
3183  Integer result(*this);
3184  result.Negate();
3185  return result;
3186 }
3187 
3189 {
3190  Integer result(*this);
3191  result.sign = POSITIVE;
3192  return result;
3193 }
3194 
3196 {
3197  reg.swap(a.reg);
3198  std::swap(sign, a.sign);
3199 }
3200 
3201 Integer::Integer(word value, size_t length)
3202  : reg(RoundupSize(length)), sign(POSITIVE)
3203 {
3204  reg[0] = value;
3205  SetWords(reg+1, 0, reg.size()-1);
3206 }
3207 
3208 template <class T>
3209 static Integer StringToInteger(const T *str, ByteOrder order)
3210 {
3211  CRYPTOPP_ASSERT( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3212 
3213  int radix, sign = 1;
3214  // GCC workaround
3215  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3216  unsigned int length;
3217  for (length = 0; str[length] != 0; length++) {}
3218 
3219  Integer v;
3220 
3221  if (length == 0)
3222  return Integer::Zero();
3223 
3224  switch (str[length-1])
3225  {
3226  case 'h':
3227  case 'H':
3228  radix=16;
3229  break;
3230  case 'o':
3231  case 'O':
3232  radix=8;
3233  break;
3234  case 'b':
3235  case 'B':
3236  radix=2;
3237  break;
3238  default:
3239  radix=10;
3240  }
3241 
3242  // 'str' is of length 1 or more
3243  if (str[0] == '-')
3244  {
3245  sign = -1;
3246  str += 1, length -= 1;
3247  }
3248 
3249  if (length > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
3250  {
3251  radix = 16;
3252  str += 2, length -= 2;
3253  }
3254 
3255  if (order == BIG_ENDIAN_ORDER)
3256  {
3257  for (unsigned int i=0; i<length; i++)
3258  {
3259  int digit, ch = static_cast<int>(str[i]);
3260 
3261  // Profiling showd the second and third Else needed to be swapped
3262  // The code change occurred at Commit dc99266599a0e72d.
3263  if (ch >= '0' && ch <= '9')
3264  digit = ch - '0';
3265  else if (ch >= 'a' && ch <= 'f')
3266  digit = ch - 'a' + 10;
3267  else if (ch >= 'A' && ch <= 'F')
3268  digit = ch - 'A' + 10;
3269  else
3270  digit = radix;
3271 
3272  if (digit < radix)
3273  {
3274  v *= radix;
3275  v += digit;
3276  }
3277  }
3278  }
3279  else if (radix == 16 && order == LITTLE_ENDIAN_ORDER)
3280  {
3281  // Nibble high, low and count
3282  unsigned int nh = 0, nl = 0, nc = 0;
3283  Integer position(Integer::One());
3284 
3285  for (unsigned int i=0; i<length; i++)
3286  {
3287  int digit, ch = static_cast<int>(str[i]);
3288 
3289  if (ch >= '0' && ch <= '9')
3290  digit = ch - '0';
3291  else if (ch >= 'a' && ch <= 'f')
3292  digit = ch - 'a' + 10;
3293  else if (ch >= 'A' && ch <= 'F')
3294  digit = ch - 'A' + 10;
3295  else
3296  digit = radix;
3297 
3298  if (digit < radix)
3299  {
3300  if (nc++ == 0)
3301  nh = digit;
3302  else
3303  nl = digit;
3304 
3305  if (nc == 2)
3306  {
3307  v += position * (nh << 4 | nl);
3308  nc = 0, position <<= 8;
3309  }
3310  }
3311  }
3312 
3313  if (nc == 1)
3314  v += nh * position;
3315  }
3316  else // LITTLE_ENDIAN_ORDER && radix != 16
3317  {
3318  for (int i=static_cast<int>(length)-1; i>=0; i--)
3319  {
3320  int digit, ch = static_cast<int>(str[i]);
3321 
3322  if (ch >= '0' && ch <= '9')
3323  digit = ch - '0';
3324  else if (ch >= 'a' && ch <= 'f')
3325  digit = ch - 'a' + 10;
3326  else if (ch >= 'A' && ch <= 'F')
3327  digit = ch - 'A' + 10;
3328  else
3329  digit = radix;
3330 
3331  if (digit < radix)
3332  {
3333  v *= radix;
3334  v += digit;
3335  }
3336  }
3337  }
3338 
3339  if (sign == -1)
3340  v.Negate();
3341 
3342  return v;
3343 }
3344 
3345 Integer::Integer(const char *str, ByteOrder order)
3346  : reg(2), sign(POSITIVE)
3347 {
3348  *this = StringToInteger(str,order);
3349 }
3350 
3351 Integer::Integer(const wchar_t *str, ByteOrder order)
3352  : reg(2), sign(POSITIVE)
3353 {
3354  *this = StringToInteger(str,order);
3355 }
3356 
3357 unsigned int Integer::WordCount() const
3358 {
3359  return (unsigned int)CountWords(reg, reg.size());
3360 }
3361 
3362 unsigned int Integer::ByteCount() const
3363 {
3364  unsigned wordCount = WordCount();
3365  if (wordCount)
3366  return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3367  else
3368  return 0;
3369 }
3370 
3371 unsigned int Integer::BitCount() const
3372 {
3373  unsigned wordCount = WordCount();
3374  if (wordCount)
3375  return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3376  else
3377  return 0;
3378 }
3379 
3380 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3381 {
3382  CRYPTOPP_ASSERT(input && inputLen); // NULL buffer
3383  StringStore store(input, inputLen);
3384  Decode(store, inputLen, s);
3385 }
3386 
3388 {
3389  CRYPTOPP_ASSERT(bt.MaxRetrievable() >= inputLen);
3390  if (bt.MaxRetrievable() < inputLen)
3391  throw InvalidArgument("Integer: input length is too small");
3392 
3393  byte b;
3394  bt.Peek(b);
3395  sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3396 
3397  while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3398  {
3399  bt.Skip(1);
3400  inputLen--;
3401  bt.Peek(b);
3402  }
3403 
3404  reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
3405  for (size_t i=inputLen; i > 0; i--)
3406  {
3407  (void)bt.Get(b);
3408  reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3409  }
3410 
3411  if (sign == NEGATIVE)
3412  {
3413  for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3414  reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3415  TwosComplement(reg, reg.size());
3416  }
3417 }
3418 
3419 size_t Integer::MinEncodedSize(Signedness signedness) const
3420 {
3421  // Profiling tells us the original second If was dominant, so it was promoted to the first If statement.
3422  // The code change occurred at Commit dc99266599a0e72d.
3423  unsigned int outputLen = STDMAX(1U, ByteCount());
3424  const bool pre = (signedness == UNSIGNED);
3425  if (!pre && NotNegative() && (GetByte(outputLen-1) & 0x80))
3426  outputLen++;
3427  if (pre)
3428  return outputLen;
3429  if (IsNegative() && *this < -Power2(outputLen*8-1))
3430  outputLen++;
3431  return outputLen;
3432 }
3433 
3434 // PKCS12_PBKDF and other classes use undersized buffers
3435 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3436 {
3437  CRYPTOPP_ASSERT(output && outputLen); // NULL buffer
3438  ArraySink sink(output, outputLen);
3439  Encode(sink, outputLen, signedness);
3440 }
3441 
3442 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3443 {
3444  if (signedness == UNSIGNED || NotNegative())
3445  {
3446  for (size_t i=outputLen; i > 0; i--)
3447  bt.Put(GetByte(i-1));
3448  }
3449  else
3450  {
3451  // take two's complement of *this
3452  Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3453  temp.Encode(bt, outputLen, UNSIGNED);
3454  }
3455 }
3456 
3458 {
3459  DERGeneralEncoder enc(bt, INTEGER);
3461  enc.MessageEnd();
3462 }
3463 
3464 void Integer::BERDecode(const byte *input, size_t len)
3465 {
3466  CRYPTOPP_ASSERT(input && len); // NULL buffer
3467  StringStore store(input, len);
3468  BERDecode(store);
3469 }
3470 
3472 {
3473  BERGeneralDecoder dec(bt, INTEGER);
3474  if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3475  BERDecodeError();
3476  Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3477  dec.MessageEnd();
3478 }
3479 
3481 {
3482  DERGeneralEncoder enc(bt, OCTET_STRING);
3483  Encode(enc, length);
3484  enc.MessageEnd();
3485 }
3486 
3488 {
3489  BERGeneralDecoder dec(bt, OCTET_STRING);
3490  if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3491  BERDecodeError();
3492  Decode(dec, length);
3493  dec.MessageEnd();
3494 }
3495 
3496 size_t Integer::OpenPGPEncode(byte *output, size_t bufferSize) const
3497 {
3498  CRYPTOPP_ASSERT(output && bufferSize); // NULL buffer
3499  CRYPTOPP_ASSERT(bufferSize >= MinEncodedSize()); // Undersized buffer
3500  ArraySink sink(output, bufferSize);
3501  return OpenPGPEncode(sink);
3502 }
3503 
3505 {
3506  word16 bitCount = word16(BitCount());
3507  bt.PutWord16(bitCount);
3508  size_t byteCount = BitsToBytes(bitCount);
3509  Encode(bt, byteCount);
3510  return 2 + byteCount;
3511 }
3512 
3513 void Integer::OpenPGPDecode(const byte *input, size_t len)
3514 {
3515  CRYPTOPP_ASSERT(input && len); // NULL buffer
3516  StringStore store(input, len);
3517  OpenPGPDecode(store);
3518 }
3519 
3521 {
3522  word16 bitCount;
3523  if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3524  throw OpenPGPDecodeErr();
3525  Decode(bt, BitsToBytes(bitCount));
3526 }
3527 
3529 {
3530  const size_t nbytes = nbits/8 + 1;
3531  SecByteBlock buf(nbytes);
3532  rng.GenerateBlock(buf, nbytes);
3533  if (nbytes)
3534  buf[0] = (byte)Crop(buf[0], nbits % 8);
3535  Decode(buf, nbytes, UNSIGNED);
3536 }
3537 
3538 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3539 {
3540  if (min > max)
3541  throw InvalidArgument("Integer: Min must be no greater than Max");
3542 
3543  Integer range = max - min;
3544  const unsigned int nbits = range.BitCount();
3545 
3546  do
3547  {
3548  Randomize(rng, nbits);
3549  }
3550  while (*this > range);
3551 
3552  *this += min;
3553 }
3554 
3555 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3556 {
3557  return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3558 }
3559 
3561 {
3562 public:
3563  KDF2_RNG(const byte *seed, size_t seedSize)
3564  : m_counter(0), m_counterAndSeed(seedSize + 4)
3565  {
3566  memcpy(m_counterAndSeed + 4, seed, seedSize);
3567  }
3568 
3569  void GenerateBlock(byte *output, size_t size)
3570  {
3571  CRYPTOPP_ASSERT(output && size); // NULL buffer
3572  PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3573  ++m_counter;
3574  P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULLPTR, 0);
3575  }
3576 
3577 private:
3578  word32 m_counter;
3579  SecByteBlock m_counterAndSeed;
3580 };
3581 
3582 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
3583 {
3584  Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3585  Integer max;
3586  if (!params.GetValue("Max", max))
3587  {
3588  int bitLength;
3589  if (params.GetIntValue("BitLength", bitLength))
3590  max = Integer::Power2(bitLength);
3591  else
3592  throw InvalidArgument("Integer: missing Max argument");
3593  }
3594  if (min > max)
3595  throw InvalidArgument("Integer: Min must be no greater than Max");
3596 
3597  Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3598  Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3599 
3600  if (equiv.IsNegative() || equiv >= mod)
3601  throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3602 
3603  Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3604 
3605  member_ptr<KDF2_RNG> kdf2Rng;
3607  if (params.GetValue(Name::Seed(), seed))
3608  {
3609  ByteQueue bq;
3610  DERSequenceEncoder seq(bq);
3611  min.DEREncode(seq);
3612  max.DEREncode(seq);
3613  equiv.DEREncode(seq);
3614  mod.DEREncode(seq);
3615  DEREncodeUnsigned(seq, rnType);
3616  DEREncodeOctetString(seq, seed.begin(), seed.size());
3617  seq.MessageEnd();
3618 
3619  SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3620  bq.Get(finalSeed, finalSeed.size());
3621  kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3622  }
3623  RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3624 
3625  switch (rnType)
3626  {
3627  case ANY:
3628  if (mod == One())
3629  Randomize(rng, min, max);
3630  else
3631  {
3632  Integer min1 = min + (equiv-min)%mod;
3633  if (max < min1)
3634  return false;
3635  Randomize(rng, Zero(), (max - min1) / mod);
3636  *this *= mod;
3637  *this += min1;
3638  }
3639  return true;
3640 
3641  case PRIME:
3642  {
3643  const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULLPTR);
3644 
3645  int i;
3646  i = 0;
3647  while (1)
3648  {
3649  if (++i==16)
3650  {
3651  // check if there are any suitable primes in [min, max]
3652  Integer first = min;
3653  if (FirstPrime(first, max, equiv, mod, pSelector))
3654  {
3655  // if there is only one suitable prime, we're done
3656  *this = first;
3657  if (!FirstPrime(first, max, equiv, mod, pSelector))
3658  return true;
3659  }
3660  else
3661  return false;
3662  }
3663 
3664  Randomize(rng, min, max);
3665  if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3666  return true;
3667  }
3668  }
3669 
3670  default:
3671  throw InvalidArgument("Integer: invalid RandomNumberType argument");
3672  }
3673 }
3674 
3675 std::istream& operator>>(std::istream& in, Integer &a)
3676 {
3677  char c;
3678  unsigned int length = 0;
3679  SecBlock<char> str(length + 16);
3680 
3681  std::ws(in);
3682 
3683  do
3684  {
3685  in.read(&c, 1);
3686  str[length++] = c;
3687  if (length >= str.size())
3688  str.Grow(length + 16);
3689  }
3690  while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3691 
3692  if (in.gcount())
3693  in.putback(c);
3694  str[length-1] = '\0';
3695  a = Integer(str);
3696 
3697  return in;
3698 }
3699 
3700 std::ostream& operator<<(std::ostream& out, const Integer &a)
3701 {
3702  // Get relevant conversion specifications from ostream.
3703  const long f = out.flags() & std::ios::basefield; // Get base digits.
3704  int base, block;
3705  char suffix;
3706  switch(f)
3707  {
3708  case std::ios::oct :
3709  base = 8;
3710  block = 8;
3711  suffix = 'o';
3712  break;
3713  case std::ios::hex :
3714  base = 16;
3715  block = 4;
3716  suffix = 'h';
3717  break;
3718  default :
3719  base = 10;
3720  block = 3;
3721  suffix = '.';
3722  }
3723 
3724  Integer temp1=a, temp2;
3725 
3726  if (a.IsNegative())
3727  {
3728  out << '-';
3729  temp1.Negate();
3730  }
3731 
3732  if (!a)
3733  out << '0';
3734 
3735  static const char upper[]="0123456789ABCDEF";
3736  static const char lower[]="0123456789abcdef";
3737 
3738  const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3739  unsigned int i=0;
3740  SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3741 
3742  while (!!temp1)
3743  {
3744  word digit;
3745  Integer::Divide(digit, temp2, temp1, base);
3746  s[i++]=vec[digit];
3747  temp1.swap(temp2);
3748  }
3749 
3750  while (i--)
3751  {
3752  out << s[i];
3753 // if (i && !(i%block))
3754 // out << ",";
3755  }
3756 
3757 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3758  if (out.flags() & std::ios_base::showbase)
3759  out << suffix;
3760 
3761  return out;
3762 #else
3763  return out << suffix;
3764 #endif
3765 }
3766 
3768 {
3769  if (NotNegative())
3770  {
3771  if (Increment(reg, reg.size()))
3772  {
3773  reg.CleanGrow(2*reg.size());
3774  reg[reg.size()/2]=1;
3775  }
3776  }
3777  else
3778  {
3779  word borrow = Decrement(reg, reg.size());
3780  CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow);
3781 
3782  if (WordCount()==0)
3783  *this = Zero();
3784  }
3785  return *this;
3786 }
3787 
3789 {
3790  if (IsNegative())
3791  {
3792  if (Increment(reg, reg.size()))
3793  {
3794  reg.CleanGrow(2*reg.size());
3795  reg[reg.size()/2]=1;
3796  }
3797  }
3798  else
3799  {
3800  if (Decrement(reg, reg.size()))
3801  *this = -One();
3802  }
3803  return *this;
3804 }
3805 
3806 // This is a bit operation. We set sign to POSITIVE, so there's no need to
3807 // worry about negative zero. Also see http://stackoverflow.com/q/11644362.
3809 {
3810  if (this == &t)
3811  {
3812  return AbsoluteValue();
3813  }
3814  else if (reg.size() >= t.reg.size())
3815  {
3816  Integer result(t);
3817  AndWords(result.reg, reg, t.reg.size());
3818 
3819  result.sign = POSITIVE;
3820  return result;
3821  }
3822  else // reg.size() < t.reg.size()
3823  {
3824  Integer result(*this);
3825  AndWords(result.reg, t.reg, reg.size());
3826 
3827  result.sign = POSITIVE;
3828  return result;
3829  }
3830 }
3831 
3832 // This is a bit operation. We set sign to POSITIVE, so there's no need to
3833 // worry about negative zero. Also see http://stackoverflow.com/q/11644362.
3834 Integer Integer::Or(const Integer& t) const
3835 {
3836  if (this == &t)
3837  {
3838  return AbsoluteValue();
3839  }
3840  else if (reg.size() >= t.reg.size())
3841  {
3842  Integer result(*this);
3843  OrWords(result.reg, t.reg, t.reg.size());
3844 
3845  result.sign = POSITIVE;
3846  return result;
3847  }
3848  else // reg.size() < t.reg.size()
3849  {
3850  Integer result(t);
3851  OrWords(result.reg, reg, reg.size());
3852 
3853  result.sign = POSITIVE;
3854  return result;
3855  }
3856 }
3857 
3858 // This is a bit operation. We set sign to POSITIVE, so there's no need to
3859 // worry about negative zero. Also see http://stackoverflow.com/q/11644362.
3861 {
3862  if (this == &t)
3863  {
3864  return Integer::Zero();
3865  }
3866  else if (reg.size() >= t.reg.size())
3867  {
3868  Integer result(*this);
3869  XorWords(result.reg, t.reg, t.reg.size());
3870 
3871  result.sign = POSITIVE;
3872  return result;
3873  }
3874  else // reg.size() < t.reg.size()
3875  {
3876  Integer result(t);
3877  XorWords(result.reg, reg, reg.size());
3878 
3879  result.sign = POSITIVE;
3880  return result;
3881  }
3882 }
3883 
3884 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3885 {
3886  // Profiling tells us the original second Else If was dominant, so it was promoted to the first If statement.
3887  // The code change occurred at Commit dc99266599a0e72d.
3888  int carry; const bool pre = (a.reg.size() == b.reg.size());
3889  if (!pre && a.reg.size() > b.reg.size())
3890  {
3891  carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3892  CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3893  carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3894  }
3895  else if (pre)
3896  {
3897  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3898  }
3899  else
3900  {
3901  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3902  CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3903  carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3904  }
3905 
3906  if (carry)
3907  {
3908  sum.reg.CleanGrow(2*sum.reg.size());
3909  sum.reg[sum.reg.size()/2] = 1;
3910  }
3911  sum.sign = Integer::POSITIVE;
3912 }
3913 
3914 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3915 {
3916  unsigned aSize = a.WordCount();
3917  aSize += aSize%2;
3918  unsigned bSize = b.WordCount();
3919  bSize += bSize%2;
3920 
3921  // Profiling tells us the original second Else If was dominant, so it was promoted to the first If statement.
3922  // The code change occurred at Commit dc99266599a0e72d.
3923  if (aSize > bSize)
3924  {
3925  word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3926  CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3927  borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3928  CRYPTOPP_ASSERT(!borrow);
3929  diff.sign = Integer::POSITIVE;
3930  }
3931  else if (aSize == bSize)
3932  {
3933  if (Compare(a.reg, b.reg, aSize) >= 0)
3934  {
3935  Subtract(diff.reg, a.reg, b.reg, aSize);
3936  diff.sign = Integer::POSITIVE;
3937  }
3938  else
3939  {
3940  Subtract(diff.reg, b.reg, a.reg, aSize);
3941  diff.sign = Integer::NEGATIVE;
3942  }
3943  }
3944  else
3945  {
3946  word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3947  CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3948  borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3949  CRYPTOPP_ASSERT(!borrow);
3950  diff.sign = Integer::NEGATIVE;
3951  }
3952 }
3953 
3954 // MSVC .NET 2003 workaround
3955 template <class T> inline const T& STDMAX2(const T& a, const T& b)
3956 {
3957  return a < b ? b : a;
3958 }
3959 
3961 {
3962  Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3963  if (NotNegative())
3964  {
3965  if (b.NotNegative())
3966  PositiveAdd(sum, *this, b);
3967  else
3968  PositiveSubtract(sum, *this, b);
3969  }
3970  else
3971  {
3972  if (b.NotNegative())
3973  PositiveSubtract(sum, b, *this);
3974  else
3975  {
3976  PositiveAdd(sum, *this, b);
3977  sum.sign = Integer::NEGATIVE;
3978  }
3979  }
3980  return sum;
3981 }
3982 
3984 {
3985  reg.CleanGrow(t.reg.size());
3986  if (NotNegative())
3987  {
3988  if (t.NotNegative())
3989  PositiveAdd(*this, *this, t);
3990  else
3991  PositiveSubtract(*this, *this, t);
3992  }
3993  else
3994  {
3995  if (t.NotNegative())
3996  PositiveSubtract(*this, t, *this);
3997  else
3998  {
3999  PositiveAdd(*this, *this, t);
4000  sign = Integer::NEGATIVE;
4001  }
4002  }
4003  return *this;
4004 }
4005 
4007 {
4008  Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
4009  if (NotNegative())
4010  {
4011  if (b.NotNegative())
4012  PositiveSubtract(diff, *this, b);
4013  else
4014  PositiveAdd(diff, *this, b);
4015  }
4016  else
4017  {
4018  if (b.NotNegative())
4019  {
4020  PositiveAdd(diff, *this, b);
4021  diff.sign = Integer::NEGATIVE;
4022  }
4023  else
4024  PositiveSubtract(diff, b, *this);
4025  }
4026  return diff;
4027 }
4028 
4030 {
4031  reg.CleanGrow(t.reg.size());
4032  if (NotNegative())
4033  {
4034  if (t.NotNegative())
4035  PositiveSubtract(*this, *this, t);
4036  else
4037  PositiveAdd(*this, *this, t);
4038  }
4039  else
4040  {
4041  if (t.NotNegative())
4042  {
4043  PositiveAdd(*this, *this, t);
4044  sign = Integer::NEGATIVE;
4045  }
4046  else
4047  PositiveSubtract(*this, t, *this);
4048  }
4049  return *this;
4050 }
4051 
4053 {
4054  const size_t wordCount = WordCount();
4055  const size_t shiftWords = n / WORD_BITS;
4056  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
4057 
4058  reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
4059  ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
4060  ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
4061  return *this;
4062 }
4063 
4065 {
4066  const size_t wordCount = WordCount();
4067  const size_t shiftWords = n / WORD_BITS;
4068  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
4069 
4070  ShiftWordsRightByWords(reg, wordCount, shiftWords);
4071  if (wordCount > shiftWords)
4072  ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
4073  if (IsNegative() && WordCount()==0) // avoid -0
4074  *this = Zero();
4075  return *this;
4076 }
4077 
4079 {
4080  if (this != &t)
4081  {
4082  const size_t size = STDMIN(reg.size(), t.reg.size());
4083  reg.resize(size);
4084  AndWords(reg, t.reg, size);
4085  }
4086  sign = POSITIVE;
4087  return *this;
4088 }
4089 
4091 {
4092  if (this != &t)
4093  {
4094  if (reg.size() >= t.reg.size())
4095  {
4096  OrWords(reg, t.reg, t.reg.size());
4097  }
4098  else // reg.size() < t.reg.size()
4099  {
4100  const size_t head = reg.size();
4101  const size_t tail = t.reg.size() - reg.size();
4102  reg.resize(head+tail);
4103  OrWords(reg, t.reg, head);
4104  CopyWords(reg+head,t.reg+head,tail);
4105  }
4106  }
4107  sign = POSITIVE;
4108  return *this;
4109 }
4110 
4112 {
4113  if (this == &t)
4114  {
4115  *this = Zero();
4116  }
4117  else
4118  {
4119  if (reg.size() >= t.reg.size())
4120  {
4121  XorWords(reg, t.reg, t.reg.size());
4122  }
4123  else // reg.size() < t.reg.size()
4124  {
4125  const size_t head = reg.size();
4126  const size_t tail = t.reg.size() - reg.size();
4127  reg.resize(head+tail);
4128  XorWords(reg, t.reg, head);
4129  CopyWords(reg+head,t.reg+head,tail);
4130  }
4131  }
4132  sign = POSITIVE;
4133  return *this;
4134 }
4135 
4136 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
4137 {
4138  size_t aSize = RoundupSize(a.WordCount());
4139  size_t bSize = RoundupSize(b.WordCount());
4140 
4141  product.reg.CleanNew(RoundupSize(aSize+bSize));
4142  product.sign = Integer::POSITIVE;
4143 
4144  IntegerSecBlock workspace(aSize + bSize);
4145  AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
4146 }
4147 
4148 void Multiply(Integer &product, const Integer &a, const Integer &b)
4149 {
4150  PositiveMultiply(product, a, b);
4151 
4152  if (a.NotNegative() != b.NotNegative())
4153  product.Negate();
4154 }
4155 
4157 {
4158  Integer product;
4159  Multiply(product, *this, b);
4160  return product;
4161 }
4162 
4163 /*
4164 void PositiveDivide(Integer &remainder, Integer &quotient,
4165  const Integer &dividend, const Integer &divisor)
4166 {
4167  remainder.reg.CleanNew(divisor.reg.size());
4168  remainder.sign = Integer::POSITIVE;
4169  quotient.reg.New(0);
4170  quotient.sign = Integer::POSITIVE;
4171  unsigned i=dividend.BitCount();
4172  while (i--)
4173  {
4174  word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
4175  remainder.reg[0] |= dividend[i];
4176  if (overflow || remainder >= divisor)
4177  {
4178  Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
4179  quotient.SetBit(i);
4180  }
4181  }
4182 }
4183 */
4184 
4185 void PositiveDivide(Integer &remainder, Integer &quotient,
4186  const Integer &a, const Integer &b)
4187 {
4188  unsigned aSize = a.WordCount();
4189  unsigned bSize = b.WordCount();
4190 
4191  if (!bSize)
4192  throw Integer::DivideByZero();
4193 
4194  if (aSize < bSize)
4195  {
4196  remainder = a;
4197  remainder.sign = Integer::POSITIVE;
4198  quotient = Integer::Zero();
4199  return;
4200  }
4201 
4202  aSize += aSize%2; // round up to next even number
4203  bSize += bSize%2;
4204 
4205  remainder.reg.CleanNew(RoundupSize(bSize));
4206  remainder.sign = Integer::POSITIVE;
4207  quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
4208  quotient.sign = Integer::POSITIVE;
4209 
4210  IntegerSecBlock T(aSize+3*(bSize+2));
4211  Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
4212 }
4213 
4214 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
4215 {
4216  PositiveDivide(remainder, quotient, dividend, divisor);
4217 
4218  if (dividend.IsNegative())
4219  {
4220  quotient.Negate();
4221  if (remainder.NotZero())
4222  {
4223  --quotient;
4224  remainder = divisor.AbsoluteValue() - remainder;
4225  }
4226  }
4227 
4228  if (divisor.IsNegative())
4229  quotient.Negate();
4230 }
4231 
4232 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
4233 {
4234  q = a;
4235  q >>= n;
4236 
4237  const size_t wordCount = BitsToWords(n);
4238  if (wordCount <= a.WordCount())
4239  {
4240  r.reg.resize(RoundupSize(wordCount));
4241  CopyWords(r.reg, a.reg, wordCount);
4242  SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
4243  if (n % WORD_BITS != 0)
4244  r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4245  }
4246  else
4247  {
4248  r.reg.resize(RoundupSize(a.WordCount()));
4249  CopyWords(r.reg, a.reg, r.reg.size());
4250  }
4251  r.sign = POSITIVE;
4252 
4253  if (a.IsNegative() && r.NotZero())
4254  {
4255  --q;
4256  r = Power2(n) - r;
4257  }
4258 }
4259 
4261 {
4262  Integer remainder, quotient;
4263  Integer::Divide(remainder, quotient, *this, b);
4264  return quotient;
4265 }
4266 
4268 {
4269  Integer remainder, quotient;
4270  Integer::Divide(remainder, quotient, *this, b);
4271  return remainder;
4272 }
4273 
4274 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
4275 {
4276  if (!divisor)
4277  throw Integer::DivideByZero();
4278 
4279  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4280  {
4281  quotient = dividend >> (BitPrecision(divisor)-1);
4282  remainder = dividend.reg[0] & (divisor-1);
4283  return;
4284  }
4285 
4286  unsigned int i = dividend.WordCount();
4287  quotient.reg.CleanNew(RoundupSize(i));
4288  remainder = 0;
4289  while (i--)
4290  {
4291  quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
4292  remainder = DWord(dividend.reg[i], remainder) % divisor;
4293  }
4294 
4295  if (dividend.NotNegative())
4296  quotient.sign = POSITIVE;
4297  else
4298  {
4299  quotient.sign = NEGATIVE;
4300  if (remainder)
4301  {
4302  --quotient;
4303  remainder = divisor - remainder;
4304  }
4305  }
4306 }
4307 
4309 {
4310  word remainder;
4311  Integer quotient;
4312  Integer::Divide(remainder, quotient, *this, b);
4313  return quotient;
4314 }
4315 
4316 word Integer::Modulo(word divisor) const
4317 {
4318  if (!divisor)
4319  throw Integer::DivideByZero();
4320 
4321  word remainder;
4322 
4323  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4324  // The code change occurred at Commit dc99266599a0e72d.
4325  if ((divisor & (divisor-1)) != 0) // divisor is not a power of 2
4326  {
4327  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4328  // The code change occurred at Commit dc99266599a0e72d.
4329  unsigned int i = WordCount();
4330  if (divisor > 5)
4331  {
4332  remainder = 0;
4333  while (i--)
4334  remainder = DWord(reg[i], remainder) % divisor;
4335  }
4336  else
4337  {
4338  DWord sum(0, 0);
4339  while (i--)
4340  sum += reg[i];
4341  remainder = sum % divisor;
4342  }
4343  }
4344  else // divisor is a power of 2
4345  {
4346  remainder = reg[0] & (divisor-1);
4347  }
4348 
4349  if (IsNegative() && remainder)
4350  remainder = divisor - remainder;
4351 
4352  return remainder;
4353 }
4354 
4356 {
4357  if (!!(*this)) // don't flip sign if *this==0
4358  sign = Sign(1-sign);
4359 }
4360 
4361 int Integer::PositiveCompare(const Integer& t) const
4362 {
4363  // Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4364  // The code change occurred at Commit dc99266599a0e72d.
4365  const unsigned size = WordCount(), tSize = t.WordCount();
4366  if (size != tSize)
4367  return size > tSize ? 1 : -1;
4368  else
4369  return CryptoPP::Compare(reg, t.reg, size);
4370 }
4371 
4372 int Integer::Compare(const Integer& t) const
4373 {
4374  if (NotNegative())
4375  {
4376  if (t.NotNegative())
4377  return PositiveCompare(t);
4378  else
4379  return 1;
4380  }
4381  else
4382  {
4383  if (t.NotNegative())
4384  return -1;
4385  else
4386  return -PositiveCompare(t);
4387  }
4388 }
4389 
4391 {
4392  if (!IsPositive())
4393  return Zero();
4394 
4395  // overestimate square root
4396  Integer x, y = Power2((BitCount()+1)/2);
4397  CRYPTOPP_ASSERT(y*y >= *this);
4398 
4399  do
4400  {
4401  x = y;
4402  y = (x + *this/x) >> 1;
4403  } while (y<x);
4404 
4405  return x;
4406 }
4407 
4408 bool Integer::IsSquare() const
4409 {
4410  Integer r = SquareRoot();
4411  return *this == r.Squared();
4412 }
4413 
4414 bool Integer::IsUnit() const
4415 {
4416  return (WordCount() == 1) && (reg[0] == 1);
4417 }
4418 
4420 {
4421  return IsUnit() ? *this : Zero();
4422 }
4423 
4424 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4425 {
4426  return x*y%m;
4427 }
4428 
4429 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4430 {
4431  ModularArithmetic mr(m);
4432  return mr.Exponentiate(x, e);
4433 }
4434 
4436 {
4437  return EuclideanDomainOf<Integer>().Gcd(a, b);
4438 }
4439 
4441 {
4443 
4444  if (IsNegative())
4445  return Modulo(m).InverseMod(m);
4446 
4447  if (m.IsEven())
4448  {
4449  if (!m || IsEven())
4450  return Zero(); // no inverse
4451  if (*this == One())
4452  return One();
4453 
4454  Integer u = m.Modulo(*this).InverseMod(*this);
4455  return !u ? Zero() : (m*(*this-u)+1)/(*this);
4456  }
4457 
4458  SecBlock<word> T(m.reg.size() * 4);
4459  Integer r((word)0, m.reg.size());
4460  unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4461  DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4462  return r;
4463 }
4464 
4465 word Integer::InverseMod(word mod) const
4466 {
4467  word g0 = mod, g1 = *this % mod;
4468  word v0 = 0, v1 = 1;
4469  word y;
4470 
4471  while (g1)
4472  {
4473  if (g1 == 1)
4474  return v1;
4475  y = g0 / g1;
4476  g0 = g0 % g1;
4477  v0 += y * v1;
4478 
4479  if (!g0)
4480  break;
4481  if (g0 == 1)
4482  return mod-v0;
4483  y = g1 / g0;
4484  g1 = g1 % g0;
4485  v1 += y * v0;
4486  }
4487  return 0;
4488 }
4489 
4490 // ********************************************************
4491 
4493 {
4494  BERSequenceDecoder seq(bt);
4495  OID oid(seq);
4496  if (oid != ASN1::prime_field())
4497  BERDecodeError();
4498  m_modulus.BERDecode(seq);
4499  seq.MessageEnd();
4500  m_result.reg.resize(m_modulus.reg.size());
4501 }
4502 
4504 {
4505  DERSequenceEncoder seq(bt);
4506  ASN1::prime_field().DEREncode(seq);
4507  m_modulus.DEREncode(seq);
4508  seq.MessageEnd();
4509 }
4510 
4512 {
4513  a.DEREncodeAsOctetString(out, MaxElementByteLength());
4514 }
4515 
4517 {
4518  a.BERDecodeAsOctetString(in, MaxElementByteLength());
4519 }
4520 
4522 {
4523  if (a.reg.size()==m_modulus.reg.size())
4524  {
4525  CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4526  return m_result;
4527  }
4528  else
4529  return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4530 }
4531 
4532 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4533 {
4534  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4535  {
4536  if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4537  || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4538  {
4539  CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4540  }
4541  return m_result;
4542  }
4543  else
4544  {
4545  m_result1 = a+b;
4546  if (m_result1 >= m_modulus)
4547  m_result1 -= m_modulus;
4548  return m_result1;
4549  }
4550 }
4551 
4553 {
4554  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4555  {
4556  if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4557  || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4558  {
4559  CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4560  }
4561  }
4562  else
4563  {
4564  a+=b;
4565  if (a>=m_modulus)
4566  a-=m_modulus;
4567  }
4568 
4569  return a;
4570 }
4571 
4572 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4573 {
4574  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4575  {
4576  if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4577  CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4578  return m_result;
4579  }
4580  else
4581  {
4582  m_result1 = a-b;
4583  if (m_result1.IsNegative())
4584  m_result1 += m_modulus;
4585  return m_result1;
4586  }
4587 }
4588 
4590 {
4591  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4592  {
4593  if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4594  CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4595  }
4596  else
4597  {
4598  a-=b;
4599  if (a.IsNegative())
4600  a+=m_modulus;
4601  }
4602 
4603  return a;
4604 }
4605 
4607 {
4608  if (!a)
4609  return a;
4610 
4611  CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4612  if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4613  Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4614 
4615  return m_result;
4616 }
4617 
4618 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4619 {
4620  if (m_modulus.IsOdd())
4621  {
4622  MontgomeryRepresentation dr(m_modulus);
4623  return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4624  }
4625  else
4626  return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4627 }
4628 
4629 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4630 {
4631  if (m_modulus.IsOdd())
4632  {
4633  MontgomeryRepresentation dr(m_modulus);
4634  dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4635  for (unsigned int i=0; i<exponentsCount; i++)
4636  results[i] = dr.ConvertOut(results[i]);
4637  }
4638  else
4639  AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4640 }
4641 
4643  : ModularArithmetic(m),
4644  m_u((word)0, m_modulus.reg.size()),
4645  m_workspace(5*m_modulus.reg.size())
4646 {
4647  if (!m_modulus.IsOdd())
4648  throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4649 
4650  RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4651 }
4652 
4654 {
4655  word *const T = m_workspace.begin();
4656  word *const R = m_result.reg.begin();
4657  const size_t N = m_modulus.reg.size();
4658  CRYPTOPP_ASSERT(a.reg.size()<=N && b.reg.size()<=N);
4659 
4660  AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4661  SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4662  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4663  return m_result;
4664 }
4665 
4667 {
4668  word *const T = m_workspace.begin();
4669  word *const R = m_result.reg.begin();
4670  const size_t N = m_modulus.reg.size();
4671  CRYPTOPP_ASSERT(a.reg.size()<=N);
4672 
4673  CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4674  SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4675  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4676  return m_result;
4677 }
4678 
4680 {
4681  word *const T = m_workspace.begin();
4682  word *const R = m_result.reg.begin();
4683  const size_t N = m_modulus.reg.size();
4684  CRYPTOPP_ASSERT(a.reg.size()<=N);
4685 
4686  CopyWords(T, a.reg, a.reg.size());
4687  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4688  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4689  return m_result;
4690 }
4691 
4693 {
4694 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4695  word *const T = m_workspace.begin();
4696  word *const R = m_result.reg.begin();
4697  const size_t N = m_modulus.reg.size();
4698  CRYPTOPP_ASSERT(a.reg.size()<=N);
4699 
4700  CopyWords(T, a.reg, a.reg.size());
4701  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4702  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4703  unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4704 
4705 // cout << "k=" << k << " N*32=" << 32*N << endl;
4706 
4707  if (k>N*WORD_BITS)
4708  DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4709  else
4710  MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4711 
4712  return m_result;
4713 }
4714 
4715 // Specialization declared in misc.h to allow us to print integers
4716 // with additional control options, like arbirary bases and uppercase.
4717 template <> CRYPTOPP_DLL
4718 std::string IntToString<Integer>(Integer value, unsigned int base)
4719 {
4720  // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4721  static const unsigned int BIT_32 = (1U << 31);
4722  const bool UPPER = !!(base & BIT_32);
4723  static const unsigned int BIT_31 = (1U << 30);
4724  const bool BASE = !!(base & BIT_31);
4725 
4726  const char CH = UPPER ? 'A' : 'a';
4727  base &= ~(BIT_32|BIT_31);
4728  CRYPTOPP_ASSERT(base >= 2 && base <= 32);
4729 
4730  if (value == 0)
4731  return "0";
4732 
4733  bool negative = false, zero = false;
4734  if (value.IsNegative())
4735  {
4736  negative = true;
4737  value.Negate();
4738  }
4739 
4740  if (!value)
4741  zero = true;
4742 
4743  SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4744  Integer temp;
4745 
4746  unsigned int i=0;
4747  while (!!value)
4748  {
4749  word digit;
4750  Integer::Divide(digit, temp, value, word(base));
4751  s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4752  value.swap(temp);
4753  }
4754 
4755  std::string result;
4756  result.reserve(i+2);
4757 
4758  if (negative)
4759  result += '-';
4760 
4761  if (zero)
4762  result += '0';
4763 
4764  while (i--)
4765  result += s[i];
4766 
4767  if (BASE)
4768  {
4769  if (base == 10)
4770  result += '.';
4771  else if (base == 16)
4772  result += 'h';
4773  else if (base == 8)
4774  result += 'o';
4775  else if (base == 2)
4776  result += 'b';
4777  }
4778 
4779  return result;
4780 }
4781 
4782 // Specialization declared in misc.h to avoid Coverity findings.
4783 template <> CRYPTOPP_DLL
4784 std::string IntToString<word64>(word64 value, unsigned int base)
4785 {
4786  // Hack... set the high bit for uppercase.
4787  static const unsigned int HIGH_BIT = (1U << 31);
4788  const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4789  base &= ~HIGH_BIT;
4790 
4791  CRYPTOPP_ASSERT(base >= 2);
4792  if (value == 0)
4793  return "0";
4794 
4795  std::string result;
4796  while (value > 0)
4797  {
4798  word64 digit = value % base;
4799  result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4800  value /= base;
4801  }
4802  return result;
4803 }
4804 
4805 #ifndef CRYPTOPP_NO_ASSIGN_TO_INTEGER
4806 // Allow the linker to discard Integer code if not needed.
4807 // Also see http://github.com/weidai11/cryptopp/issues/389.
4808 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
4809 {
4810  if (valueType != typeid(Integer))
4811  return false;
4812  *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
4813  return true;
4814 }
4815 #endif
4816 
4817 NAMESPACE_END
4818 
4819 #endif
Used to pass byte array input as part of a NameValuePairs object.
Definition: algparam.h:29
An invalid argument was detected.
Definition: cryptlib.h:194
unsigned int WordCount() const
Determines the number of words required to represent the Integer.
Definition: integer.cpp:3357
Integer MultiplicativeInverse() const
return inverse if 1 or -1, otherwise return 0
Definition: integer.cpp:4419
Integer & Reduce(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4589
bool NotZero() const
Determines if the Integer is non-0.
Definition: integer.h:333
Classes for working with NameValuePairs.
Integer & operator|=(const Integer &t)
Bitwise OR Assignment.
Definition: integer.cpp:4090
void swap(SecBlock< T, A > &b)
Swap contents with another SecBlock.
Definition: secblock.h:792
a number which is probabilistically prime
Definition: integer.h:95
Utility functions for the Crypto++ library.
Integer Plus(const Integer &b) const
Addition.
Definition: integer.cpp:3960
virtual size_t Peek(byte &outByte) const
Peek a 8-bit byte.
Definition: cryptlib.cpp:506
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:134
Restricts the instantiation of a class to one static object without locks.
Definition: misc.h:309
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:732
Integer And(const Integer &) const
Bitwise AND.
Definition: integer.cpp:3808
an unsigned value
Definition: integer.h:85
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
Definition: cryptlib.h:356
bool IsSquare() const
Determine whether this integer is a perfect square.
Definition: integer.cpp:4408
Integer Xor(const Integer &) const
Bitwise XOR.
Definition: integer.cpp:3860
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
Definition: integer.cpp:3457
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode unsigned value.
Definition: asn.h:455
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: cryptlib.cpp:298
size_t size() const
Length of the memory block.
Definition: algparam.h:93
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
Definition: misc.h:796
This file contains helper classes/functions for implementing public key algorithms.
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
Encode absolute value as big-endian octet string.
Definition: integer.cpp:3480
Integer & operator=(const Integer &t)
Assignment.
Definition: integer.cpp:3119
bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
Definition: nbtheory.cpp:381
static Integer Gcd(const Integer &a, const Integer &n)
greatest common divisor
Definition: integer.cpp:4435
void resize(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:782
Integer & operator+=(const Integer &t)
Addition Assignment.
Definition: integer.cpp:3983
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
Definition: misc.h:816
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
Definition: misc.h:2235
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
Definition: misc.h:677
void CleanGrow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:764
Integer & operator--()
Pre-decrement.
Definition: integer.cpp:3788
virtual Element CascadeExponentiate(const Element &x, const Integer &e1, const Element &y, const Integer &e2) const
TODO.
Definition: algebra.cpp:323
Secure memory block with allocator and cleanup.
Definition: secblock.h:483
const Integer & Inverse(const Integer &a) const
Inverts the element in the ring.
Definition: integer.cpp:4606
signed long ConvertToLong() const
Convert the Integer to Long.
Definition: integer.cpp:3017
const Integer & Subtract(const Integer &a, const Integer &b) const
Subtracts elements in the ring.
Definition: integer.cpp:4572
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
Definition: integer.cpp:3513
Signedness
Used when importing and exporting integers.
Definition: integer.h:83
ASN.1 object identifiers for algorthms and schemes.
Square block cipher.
Definition: square.h:24
Classes for automatic resource management.
bool IsP4()
Determines if the CPU is an Intel P4.
Definition: cpu.h:245
bool IsNegative() const
Determines if the Integer is negative.
Definition: integer.h:336
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: queue.h:34
Library configuration file.
MontgomeryRepresentation(const Integer &modulus)
Construct a MontgomeryRepresentation.
Definition: integer.cpp:4642
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
returns same result as Divide(r, q, a, Power2(n)), but faster
Definition: integer.cpp:4232
Ring of congruence classes modulo n.
Definition: modarith.h:34
Interface for random number generators.
Definition: cryptlib.h:1188
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
Definition: misc.h:806
Integer Or(const Integer &) const
Bitwise OR.
Definition: integer.cpp:3834
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
Definition: integer.cpp:3528
std::string IntToString< word64 >(word64 value, unsigned int base)
Converts an unsigned value to a string.
Definition: integer.cpp:4784
void New(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:717
void SetByte(size_t n, byte value)
Set the n-th byte to value.
Definition: integer.cpp:3165
Integer InverseMod(const Integer &n) const
calculate multiplicative inverse of *this mod n
Definition: integer.cpp:4440
the value is negative
Definition: integer.h:77
SecBlock<byte> typedef.
Definition: secblock.h:810
BER Sequence Decoder.
Definition: asn.h:303
Interface for buffered transformations.
Definition: cryptlib.h:1343
bool IsPositive() const
Determines if the Integer is positive.
Definition: integer.h:342
bool NotNegative() const
Determines if the Integer is non-negative.
Definition: integer.h:339
const Integer & Add(const Integer &a, const Integer &b) const
Adds elements in the ring.
Definition: integer.cpp:4532
static const Integer & One()
Integer representing 1.
Definition: integer.cpp:3096
Integer & Accumulate(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4552
byte order is little-endian
Definition: cryptlib.h:136
Sign
Used internally to represent the integer.
Definition: integer.h:73
Integer ConvertIn(const Integer &a) const
Reduces an element in the congruence class.
Definition: modarith.h:289
bool operator!() const
Negation.
Definition: integer.cpp:3114
Pointer that overloads operator ->
Definition: smartptr.h:39
bool IsUnit() const
is 1 or -1
Definition: integer.cpp:4414
unsigned int ByteCount() const
Determines the number of bytes required to represent the Integer.
Definition: integer.cpp:3362
Classes and functions for secure memory allocations.
void DEREncodeElement(BufferedTransformation &out, const Element &a) const
Encodes element in DER format.
Definition: integer.cpp:4511
Integer Modulo(const Integer &b) const
Remainder.
Definition: integer.cpp:4267
Copy input to a memory buffer.
Definition: filters.h:1126
inline ::Integer operator-(const ::Integer &a, const ::Integer &b)
Subtraction.
Definition: integer.h:668
void BERDecodeElement(BufferedTransformation &in, Element &a) const
Decodes element in DER format.
Definition: integer.cpp:4516
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
Minimum number of bytes to encode this integer.
Definition: integer.cpp:3419
Integer DividedBy(const Integer &b) const
Division.
Definition: integer.cpp:4260
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: integer.cpp:4618
Integer Times(const Integer &b) const
Multiplication.
Definition: integer.cpp:4156
a number with no special properties
Definition: integer.h:93
const byte * begin() const
Pointer to the first byte in the memory block.
Definition: algparam.h:89
size_t Put(byte inByte, bool blocking=true)
Input a byte for processing.
Definition: cryptlib.h:1364
Integer & operator++()
Pre-increment.
Definition: integer.cpp:3767
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
Definition: algparam.h:502
void swap(Integer &a)
Swaps this Integer with another Integer.
Definition: integer.cpp:3195
Integer()
Creates the zero integer.
Definition: integer.cpp:2963
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
Definition: misc.h:725
Integer & operator<<=(size_t n)
Left-shift Assignment.
Definition: integer.cpp:4052
Exception thrown when an error is encountered decoding an OpenPGP integer.
Definition: integer.h:282
void Negate()
Reverse the Sign of the Integer.
Definition: integer.cpp:4355
int Compare(const Integer &a) const
Perform signed comparison.
Definition: integer.cpp:4372
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
Definition: misc.h:784
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
Definition: integer.cpp:3487
Application callback to signal suitability of a cabdidate prime.
Definition: nbtheory.h:80
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branchless swap of pointers a and b if condition c is true.
Definition: misc.h:1157
static Integer Power2(size_t e)
Exponentiates to a power of 2.
Definition: integer.cpp:3080
Multiple precision integer with arithmetic operations.
Definition: integer.h:49
virtual Element Exponentiate(const Element &a, const Integer &e) const
Raises a base to an exponent in the group.
Definition: algebra.cpp:316
OID operator+(const OID &lhs, unsigned long rhs)
Append a value to an OID.
const Integer & Multiply(const Integer &a, const Integer &b) const
Multiplies elements in the ring.
Definition: integer.cpp:4653
a signed value
Definition: integer.h:87
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
Definition: integer.cpp:3496
const Integer & Half(const Integer &a) const
Divides an element by 2.
Definition: integer.cpp:4521
static const Integer & Two()
Integer representing 2.
Definition: integer.cpp:3105
virtual lword Skip(lword skipMax=LWORD_MAX)
Discard skipMax bytes from the output buffer.
Definition: cryptlib.cpp:525
const Integer & Square(const Integer &a) const
Square an element in the ring.
Definition: integer.cpp:4666
Integer & operator^=(const Integer &t)
Bitwise XOR Assignment.
Definition: integer.cpp:4111
#define MEMORY_BARRIER
A memory barrier.
Definition: misc.h:273
RandomNumberType
Properties of a random integer.
Definition: integer.h:91
const char * Seed()
ConstByteArrayParameter.
Definition: argnames.h:19
bool IsEven() const
Determines if the Integer is even parity.
Definition: integer.h:348
byte order is big-endian
Definition: cryptlib.h:138
Safely right shift values when undefined behavior could occur.
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
Definition: misc.h:514
String-based implementation of Store interface.
Definition: filters.h:1182
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:60
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
calculate r and q such that (a == d*q + r) && (0 <= r < abs(d))
Definition: integer.cpp:4214
ModularArithmetic(const Integer &modulus=Integer::One())
Construct a ModularArithmetic.
Definition: modarith.h:45
void BERDecodeError()
Raises a BERDecodeErr.
Definition: asn.h:69
Data structure used to store byte strings.
Definition: queue.h:19
Functions for CPU features and intrinsics.
Classes and functions for working with ANS.1 objects.
Classes for SHA-1 and SHA-2 family of message digests.
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
Definition: integer.cpp:3141
size_t GetWord16(word16 &value, ByteOrder order=BIG_ENDIAN_ORDER)
Retrieve a 16-bit word.
Definition: cryptlib.cpp:734
iterator begin()
Provides an iterator pointing to the first element in the memory block.
Definition: secblock.h:545
unsigned int BitCount() const
Determines the number of bits required to represent the Integer.
Definition: integer.cpp:3371
const char * PointerToPrimeSelector()
const PrimeSelector *
Definition: argnames.h:42
Implementation of BufferedTransformation&#39;s attachment interface.
Classes and functions for number theoretic operations.
byte GetByte(size_t i) const
Provides the i-th byte of the Integer.
Definition: integer.cpp:3155
Exception thrown when division by 0 is encountered.
Definition: integer.h:55
DER Sequence Encoder.
Definition: asn.h:313
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
Definition: integer.cpp:3435
Integer Squared() const
Multiply this integer by itself.
Definition: integer.h:576
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
Definition: misc.h:913
virtual lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: cryptlib.cpp:468
Exception thrown when a random number cannot be found that satisfies the condition.
Definition: integer.h:63
Performs modular arithmetic in Montgomery representation for increased speed.
Definition: modarith.h:271
Integer operator<<(size_t n) const
Left-shift.
Definition: integer.h:565
DER General Encoder.
Definition: asn.h:284
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:180
Integer Minus(const Integer &b) const
Subtraction.
Definition: integer.cpp:4006
void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: integer.cpp:3569
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
Definition: integer.cpp:3380
size_t PutWord16(word16 value, ByteOrder order=BIG_ENDIAN_ORDER, bool blocking=true)
Input a 16-bit word for processing.
Definition: cryptlib.cpp:686
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: modarith.h:303
size_t DEREncodeOctetString(BufferedTransformation &bt, const byte *str, size_t strLen)
DER encode octet string.
Definition: asn.cpp:104
Integer operator>>(size_t n) const
Right-shift.
Definition: integer.h:563
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: integer.cpp:4629
Multiple precision integer with arithmetic operations.
Safely left shift values when undefined behavior could occur.
static const Integer & Zero()
Integer representing 0.
Definition: integer.cpp:3087
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
Definition: misc.h:525
Euclidean domain.
Definition: algebra.h:315
void Grow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:746
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
Definition: integer.cpp:3464
std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
Definition: integer.cpp:4718
virtual size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: cryptlib.cpp:487
Class file for performing modular arithmetic.
Crypto++ library namespace.
bool GetValue(const char *name, T &value) const
Get a named value.
Definition: cryptlib.h:343
Integer & operator>>=(size_t n)
Right-shift Assignment.
Definition: integer.cpp:4064
bool GetBit(size_t i) const
Provides the i-th bit of the Integer.
Definition: integer.cpp:3131
Integer SquareRoot() const
Extract square root.
Definition: integer.cpp:4390
BER General Decoder.
Definition: asn.h:246
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:379
bool IsConvertableToLong() const
Determines if the Integer is convertable to Long.
Definition: integer.cpp:3003
Object Identifier.
Definition: asn.h:166
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: queue.cpp:300
Integer AbsoluteValue() const
Retrieve the absolute value of this integer.
Definition: integer.cpp:3188
Integer & operator-=(const Integer &t)
Subtraction Assignment.
Definition: integer.cpp:4029
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
Definition: misc.h:700
size_type size() const
Provides the count of elements in the SecBlock.
Definition: secblock.h:570
Integer & operator &=(const Integer &t)
Bitwise AND Assignment.
void DEREncode(BufferedTransformation &bt) const
Encodes in DER format.
Definition: integer.cpp:4503
lword GetBits(size_t i, size_t n) const
Provides the low order bits of the Integer.
Definition: integer.cpp:3172
Integer operator-() const
Subtraction.
Definition: integer.cpp:3181
the value is positive or 0
Definition: integer.h:75
const Integer & MultiplicativeInverse(const Integer &a) const
Calculate the multiplicative inverse of an element in the ring.
Definition: integer.cpp:4692
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: modarith.h:306
virtual void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the Ring.
Definition: algebra.cpp:334
Interface for retrieving values given their names.
Definition: cryptlib.h:285
Integer ConvertOut(const Integer &a) const
Reduces an element in the congruence class.
Definition: integer.cpp:4679