Crypto++  5.6.3
Free C++ class library of cryptographic schemes
integer.cpp
1 // integer.cpp - written and placed in the public domain by Wei Dai
2 // contains public domain code contributed by Alister Lee and Leonard Janke
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #if CRYPTOPP_MSC_VERSION
8 # pragma warning(disable: 4100)
9 #endif
10 
11 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
12 # pragma GCC diagnostic ignored "-Wunused"
13 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
14 #endif
15 
16 #ifndef CRYPTOPP_IMPORTS
17 
18 #include "integer.h"
19 #include "secblock.h"
20 #include "modarith.h"
21 #include "nbtheory.h"
22 #include "smartptr.h"
23 #include "algparam.h"
24 #include "filters.h"
25 #include "asn.h"
26 #include "oids.h"
27 #include "words.h"
28 #include "pubkey.h" // for P1363_KDF2
29 #include "sha.h"
30 #include "cpu.h"
31 #include "misc.h"
32 
33 #include <iostream>
34 
35 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
36  #include <intrin.h>
37 #endif
38 
39 #ifdef __DECCXX
40  #include <c_asm.h>
41 #endif
42 
43 #ifdef CRYPTOPP_MSVC6_NO_PP
44  #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
45 #endif
46 
47 // "Error: The operand ___LKDB cannot be assigned to", http://github.com/weidai11/cryptopp/issues/188
48 #if (__SUNPRO_CC >= 0x5130)
49 # define MAYBE_CONST
50 # define MAYBE_UNCONST_CAST const_cast<word*>
51 #else
52 # define MAYBE_CONST const
53 # define MAYBE_UNCONST_CAST
54 #endif
55 
56 // "Inline assembly operands don't work with .intel_syntax",
57 // http://llvm.org/bugs/show_bug.cgi?id=24232
58 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
59 # undef CRYPTOPP_X86_ASM_AVAILABLE
60 # undef CRYPTOPP_X32_ASM_AVAILABLE
61 # undef CRYPTOPP_X64_ASM_AVAILABLE
62 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
63 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
64 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
65 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
66 #else
67 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
68 #endif
69 
70 // Debian QEMU/ARMEL issue in MultiplyTop; see http://github.com/weidai11/cryptopp/issues/31.
71 #if __ARMEL__ && (CRYPTOPP_GCC_VERSION >= 40900) && (CRYPTOPP_GCC_VERSION < 70000) && __OPTIMIZE__
72 # define WORKAROUND_ARMEL_BUG 1
73 #endif
74 
75 #if WORKAROUND_ARMEL_BUG
76 # pragma GCC push_options
77 # pragma GCC optimize("O1")
78 #endif
79 
80 NAMESPACE_BEGIN(CryptoPP)
81 
82 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
83 {
84  if (valueType != typeid(Integer))
85  return false;
86  *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
87  return true;
88 }
89 
90 inline static int Compare(const word *A, const word *B, size_t N)
91 {
92  while (N--)
93  if (A[N] > B[N])
94  return 1;
95  else if (A[N] < B[N])
96  return -1;
97 
98  return 0;
99 }
100 
101 inline static int Increment(word *A, size_t N, word B=1)
102 {
103  assert(N);
104  word t = A[0];
105  A[0] = t+B;
106  if (A[0] >= t)
107  return 0;
108  for (unsigned i=1; i<N; i++)
109  if (++A[i])
110  return 0;
111  return 1;
112 }
113 
114 inline static int Decrement(word *A, size_t N, word B=1)
115 {
116  assert(N);
117  word t = A[0];
118  A[0] = t-B;
119  if (A[0] <= t)
120  return 0;
121  for (unsigned i=1; i<N; i++)
122  if (A[i]--)
123  return 0;
124  return 1;
125 }
126 
127 static void TwosComplement(word *A, size_t N)
128 {
129  Decrement(A, N);
130  for (unsigned i=0; i<N; i++)
131  A[i] = ~A[i];
132 }
133 
134 static word AtomicInverseModPower2(word A)
135 {
136  assert(A%2==1);
137 
138  word R=A%8;
139 
140  for (unsigned i=3; i<WORD_BITS; i*=2)
141  R = R*(2-R*A);
142 
143  assert(R*A==1);
144  return R;
145 }
146 
147 // ********************************************************
148 
149 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
150  #define Declare2Words(x) word x##0, x##1;
151  #define AssignWord(a, b) a##0 = b; a##1 = 0;
152  #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
153  #define LowWord(a) a##0
154  #define HighWord(a) a##1
155  #ifdef _MSC_VER
156  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
157  #ifndef __INTEL_COMPILER
158  #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
159  #endif
160  #elif defined(__DECCXX)
161  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
162  #elif defined(__x86_64__)
163  #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
164  // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
165  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
166  #else
167  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
168  #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
169  #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
170  #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
171  #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
172  #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
173  #endif
174  #endif
175  #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
176  #ifndef Double3Words
177  #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
178  #endif
179  #ifndef Acc2WordsBy2
180  #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
181  #endif
182  #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
183  #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
184  #define GetCarry(u) u##1
185  #define GetBorrow(u) u##1
186 #else
187  #define Declare2Words(x) dword x;
188  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
189  #define MultiplyWords(p, a, b) p = __emulu(a, b);
190  #else
191  #define MultiplyWords(p, a, b) p = (dword)a*b;
192  #endif
193  #define AssignWord(a, b) a = b;
194  #define Add2WordsBy1(a, b, c) a = b + c;
195  #define Acc2WordsBy2(a, b) a += b;
196  #define LowWord(a) word(a)
197  #define HighWord(a) word(a>>WORD_BITS)
198  #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
199  #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
200  #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
201  #define GetCarry(u) HighWord(u)
202  #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
203 #endif
204 #ifndef MulAcc
205  #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
206 #endif
207 #ifndef Acc2WordsBy1
208  #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
209 #endif
210 #ifndef Acc3WordsBy2
211  #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
212 #endif
213 
214 class DWord
215 {
216 public:
217  // Converity finding on default ctor. We've isntrumented the code,
218  // and cannot uncover a case where it affects a result.
219 #if (defined(__COVERITY__) || !defined(NDEBUG)) && defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
220  // Repeating pattern of 1010 for debug builds to break things...
221  DWord() : m_whole(0) {memset(&m_whole, 0xa, sizeof(m_whole));}
222 #elif (defined(__COVERITY__) || !defined(NDEBUG)) && !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
223  // Repeating pattern of 1010 for debug builds to break things...
224  DWord() : m_halfs() {memset(&m_halfs, 0xaa, sizeof(m_halfs));}
225 #else
226  DWord() {}
227 #endif
228 
229 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
230  explicit DWord(word low) : m_whole(low) {}
231 #else
232  explicit DWord(word low)
233  {
234  m_halfs.low = low;
235  m_halfs.high = 0;
236  }
237 #endif
238 
239  DWord(word low, word high)
240  {
241  m_halfs.low = low;
242  m_halfs.high = high;
243  }
244 
245  static DWord Multiply(word a, word b)
246  {
247  DWord r;
248  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
249  r.m_whole = (dword)a * b;
250  #elif defined(MultiplyWordsLoHi)
251  MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
252  #else
253  assert(0);
254  #endif
255  return r;
256  }
257 
258  static DWord MultiplyAndAdd(word a, word b, word c)
259  {
260  DWord r = Multiply(a, b);
261  return r += c;
262  }
263 
264  DWord & operator+=(word a)
265  {
266  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
267  m_whole = m_whole + a;
268  #else
269  m_halfs.low += a;
270  m_halfs.high += (m_halfs.low < a);
271  #endif
272  return *this;
273  }
274 
275  DWord operator+(word a)
276  {
277  DWord r;
278  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
279  r.m_whole = m_whole + a;
280  #else
281  r.m_halfs.low = m_halfs.low + a;
282  r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
283  #endif
284  return r;
285  }
286 
287  DWord operator-(DWord a)
288  {
289  DWord r;
290  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
291  r.m_whole = m_whole - a.m_whole;
292  #else
293  r.m_halfs.low = m_halfs.low - a.m_halfs.low;
294  r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
295  #endif
296  return r;
297  }
298 
299  DWord operator-(word a)
300  {
301  DWord r;
302  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
303  r.m_whole = m_whole - a;
304  #else
305  r.m_halfs.low = m_halfs.low - a;
306  r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
307  #endif
308  return r;
309  }
310 
311  // returns quotient, which must fit in a word
312  word operator/(word divisor);
313 
314  word operator%(word a);
315 
316  bool operator!() const
317  {
318  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
319  return !m_whole;
320  #else
321  return !m_halfs.high && !m_halfs.low;
322  #endif
323  }
324 
325  word GetLowHalf() const {return m_halfs.low;}
326  word GetHighHalf() const {return m_halfs.high;}
327  word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
328 
329 private:
330  union
331  {
332  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
333  dword m_whole;
334  #endif
335  struct
336  {
337  #ifdef IS_LITTLE_ENDIAN
338  word low;
339  word high;
340  #else
341  word high;
342  word low;
343  #endif
344  } m_halfs;
345  };
346 };
347 
348 class Word
349 {
350 public:
351  // Converity finding on default ctor. We've isntrumented the code,
352  // and cannot uncover a case where it affects a result.
353 #if defined(__COVERITY__)
354  Word() : m_whole(0) {}
355 #elif !defined(NDEBUG)
356  // Repeating pattern of 1010 for debug builds to break things...
357  Word() : m_whole(0) {memset(&m_whole, 0xaa, sizeof(m_whole));}
358 #else
359  Word() {}
360 #endif
361 
362  Word(word value) : m_whole(value) {}
363  Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
364 
365  static Word Multiply(hword a, hword b)
366  {
367  Word r;
368  r.m_whole = (word)a * b;
369  return r;
370  }
371 
372  Word operator-(Word a)
373  {
374  Word r;
375  r.m_whole = m_whole - a.m_whole;
376  return r;
377  }
378 
379  Word operator-(hword a)
380  {
381  Word r;
382  r.m_whole = m_whole - a;
383  return r;
384  }
385 
386  // returns quotient, which must fit in a word
387  hword operator/(hword divisor)
388  {
389  return hword(m_whole / divisor);
390  }
391 
392  bool operator!() const
393  {
394  return !m_whole;
395  }
396 
397  word GetWhole() const {return m_whole;}
398  hword GetLowHalf() const {return hword(m_whole);}
399  hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
400  hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
401 
402 private:
403  word m_whole;
404 };
405 
406 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
407 template <class S, class D>
408 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
409 {
410  CRYPTOPP_UNUSED(dummy);
411 
412  // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
413  assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
414 
415  // estimate the quotient: do a 2 S by 1 S divide
416  S Q;
417  if (S(B1+1) == 0)
418  Q = A[2];
419  else if (B1 > 0)
420  Q = D(A[1], A[2]) / S(B1+1);
421  else
422  Q = D(A[0], A[1]) / B0;
423 
424  // now subtract Q*B from A
425  D p = D::Multiply(B0, Q);
426  D u = (D) A[0] - p.GetLowHalf();
427  A[0] = u.GetLowHalf();
428  u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
429  A[1] = u.GetLowHalf();
430  A[2] += u.GetHighHalf();
431 
432  // Q <= actual quotient, so fix it
433  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
434  {
435  u = (D) A[0] - B0;
436  A[0] = u.GetLowHalf();
437  u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
438  A[1] = u.GetLowHalf();
439  A[2] += u.GetHighHalf();
440  Q++;
441  assert(Q); // shouldn't overflow
442  }
443 
444  return Q;
445 }
446 
447 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
448 template <class S, class D>
449 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
450 {
451  if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
452  return D(Ah.GetLowHalf(), Ah.GetHighHalf());
453  else
454  {
455  S Q[2];
456  T[0] = Al.GetLowHalf();
457  T[1] = Al.GetHighHalf();
458  T[2] = Ah.GetLowHalf();
459  T[3] = Ah.GetHighHalf();
460  Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
461  Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
462  return D(Q[0], Q[1]);
463  }
464 }
465 
466 // returns quotient, which must fit in a word
467 inline word DWord::operator/(word a)
468 {
469  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
470  return word(m_whole / a);
471  #else
472  hword r[4];
473  return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
474  #endif
475 }
476 
477 inline word DWord::operator%(word a)
478 {
479  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
480  return word(m_whole % a);
481  #else
482  if (a < (word(1) << (WORD_BITS/2)))
483  {
484  hword h = hword(a);
485  word r = m_halfs.high % h;
486  r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
487  return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
488  }
489  else
490  {
491  hword r[4];
492  DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
493  return Word(r[0], r[1]).GetWhole();
494  }
495  #endif
496 }
497 
498 // ********************************************************
499 
500 // Use some tricks to share assembly code between MSVC, GCC, Clang and Sun CC.
501 #if defined(__GNUC__)
502  #define AddPrologue \
503  int result; \
504  __asm__ __volatile__ \
505  ( \
506  INTEL_NOPREFIX
507  #define AddEpilogue \
508  ATT_PREFIX \
509  : "=a" (result)\
510  : "d" (C), "a" (A), "D" (B), "c" (N) \
511  : "%esi", "memory", "cc" \
512  );\
513  return result;
514  #define MulPrologue \
515  __asm__ __volatile__ \
516  ( \
517  INTEL_NOPREFIX \
518  AS1( push ebx) \
519  AS2( mov ebx, edx)
520  #define MulEpilogue \
521  AS1( pop ebx) \
522  ATT_PREFIX \
523  : \
524  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
525  : "%esi", "memory", "cc" \
526  );
527  #define SquPrologue MulPrologue
528  #define SquEpilogue \
529  AS1( pop ebx) \
530  ATT_PREFIX \
531  : \
532  : "d" (s_maskLow16), "c" (C), "a" (A) \
533  : "%esi", "%edi", "memory", "cc" \
534  );
535  #define TopPrologue MulPrologue
536  #define TopEpilogue \
537  AS1( pop ebx) \
538  ATT_PREFIX \
539  : \
540  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
541  : "memory", "cc" \
542  );
543 #else
544  #define AddPrologue \
545  __asm push edi \
546  __asm push esi \
547  __asm mov eax, [esp+12] \
548  __asm mov edi, [esp+16]
549  #define AddEpilogue \
550  __asm pop esi \
551  __asm pop edi \
552  __asm ret 8
553 #if _MSC_VER < 1300
554  #define SaveEBX __asm push ebx
555  #define RestoreEBX __asm pop ebx
556 #else
557  #define SaveEBX
558  #define RestoreEBX
559 #endif
560  #define SquPrologue \
561  AS2( mov eax, A) \
562  AS2( mov ecx, C) \
563  SaveEBX \
564  AS2( lea ebx, s_maskLow16)
565  #define MulPrologue \
566  AS2( mov eax, A) \
567  AS2( mov edi, B) \
568  AS2( mov ecx, C) \
569  SaveEBX \
570  AS2( lea ebx, s_maskLow16)
571  #define TopPrologue \
572  AS2( mov eax, A) \
573  AS2( mov edi, B) \
574  AS2( mov ecx, C) \
575  AS2( mov esi, L) \
576  SaveEBX \
577  AS2( lea ebx, s_maskLow16)
578  #define SquEpilogue RestoreEBX
579  #define MulEpilogue RestoreEBX
580  #define TopEpilogue RestoreEBX
581 #endif
582 
583 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
584 extern "C" {
585 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
586 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
587 }
588 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
589 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
590 {
591  word result;
592  __asm__ __volatile__
593  (
594  INTEL_NOPREFIX
595  AS1( neg %1)
596  ASJ( jz, 1, f)
597  AS2( mov %0,[%3+8*%1])
598  AS2( add %0,[%4+8*%1])
599  AS2( mov [%2+8*%1],%0)
600  ASL(0)
601  AS2( mov %0,[%3+8*%1+8])
602  AS2( adc %0,[%4+8*%1+8])
603  AS2( mov [%2+8*%1+8],%0)
604  AS2( lea %1,[%1+2])
605  ASJ( jrcxz, 1, f)
606  AS2( mov %0,[%3+8*%1])
607  AS2( adc %0,[%4+8*%1])
608  AS2( mov [%2+8*%1],%0)
609  ASJ( jmp, 0, b)
610  ASL(1)
611  AS2( mov %0, 0)
612  AS2( adc %0, %0)
613  ATT_NOPREFIX
614  : "=&r" (result), "+c" (N)
615  : "r" (C+N), "r" (A+N), "r" (B+N)
616  : "memory", "cc"
617  );
618  return (int)result;
619 }
620 
621 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
622 {
623  word result;
624  __asm__ __volatile__
625  (
626  INTEL_NOPREFIX
627  AS1( neg %1)
628  ASJ( jz, 1, f)
629  AS2( mov %0,[%3+8*%1])
630  AS2( sub %0,[%4+8*%1])
631  AS2( mov [%2+8*%1],%0)
632  ASL(0)
633  AS2( mov %0,[%3+8*%1+8])
634  AS2( sbb %0,[%4+8*%1+8])
635  AS2( mov [%2+8*%1+8],%0)
636  AS2( lea %1,[%1+2])
637  ASJ( jrcxz, 1, f)
638  AS2( mov %0,[%3+8*%1])
639  AS2( sbb %0,[%4+8*%1])
640  AS2( mov [%2+8*%1],%0)
641  ASJ( jmp, 0, b)
642  ASL(1)
643  AS2( mov %0, 0)
644  AS2( adc %0, %0)
645  ATT_NOPREFIX
646  : "=&r" (result), "+c" (N)
647  : "r" (C+N), "r" (A+N), "r" (B+N)
648  : "memory", "cc"
649  );
650  return (int)result;
651 }
652 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
653 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
654 {
655  AddPrologue
656 
657  // now: eax = A, edi = B, edx = C, ecx = N
658  AS2( lea eax, [eax+4*ecx])
659  AS2( lea edi, [edi+4*ecx])
660  AS2( lea edx, [edx+4*ecx])
661 
662  AS1( neg ecx) // ecx is negative index
663  AS2( test ecx, 2) // this clears carry flag
664  ASJ( jz, 0, f)
665  AS2( sub ecx, 2)
666  ASJ( jmp, 1, f)
667 
668  ASL(0)
669  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
670  AS2( mov esi,[eax+4*ecx])
671  AS2( adc esi,[edi+4*ecx])
672  AS2( mov [edx+4*ecx],esi)
673  AS2( mov esi,[eax+4*ecx+4])
674  AS2( adc esi,[edi+4*ecx+4])
675  AS2( mov [edx+4*ecx+4],esi)
676  ASL(1)
677  AS2( mov esi,[eax+4*ecx+8])
678  AS2( adc esi,[edi+4*ecx+8])
679  AS2( mov [edx+4*ecx+8],esi)
680  AS2( mov esi,[eax+4*ecx+12])
681  AS2( adc esi,[edi+4*ecx+12])
682  AS2( mov [edx+4*ecx+12],esi)
683 
684  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
685  ASJ( jmp, 0, b)
686 
687  ASL(2)
688  AS2( mov eax, 0)
689  AS1( setc al) // store carry into eax (return result register)
690 
691  AddEpilogue
692 }
693 
694 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
695 {
696  AddPrologue
697 
698  // now: eax = A, edi = B, edx = C, ecx = N
699  AS2( lea eax, [eax+4*ecx])
700  AS2( lea edi, [edi+4*ecx])
701  AS2( lea edx, [edx+4*ecx])
702 
703  AS1( neg ecx) // ecx is negative index
704  AS2( test ecx, 2) // this clears carry flag
705  ASJ( jz, 0, f)
706  AS2( sub ecx, 2)
707  ASJ( jmp, 1, f)
708 
709  ASL(0)
710  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
711  AS2( mov esi,[eax+4*ecx])
712  AS2( sbb esi,[edi+4*ecx])
713  AS2( mov [edx+4*ecx],esi)
714  AS2( mov esi,[eax+4*ecx+4])
715  AS2( sbb esi,[edi+4*ecx+4])
716  AS2( mov [edx+4*ecx+4],esi)
717  ASL(1)
718  AS2( mov esi,[eax+4*ecx+8])
719  AS2( sbb esi,[edi+4*ecx+8])
720  AS2( mov [edx+4*ecx+8],esi)
721  AS2( mov esi,[eax+4*ecx+12])
722  AS2( sbb esi,[edi+4*ecx+12])
723  AS2( mov [edx+4*ecx+12],esi)
724 
725  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
726  ASJ( jmp, 0, b)
727 
728  ASL(2)
729  AS2( mov eax, 0)
730  AS1( setc al) // store carry into eax (return result register)
731 
732  AddEpilogue
733 }
734 
735 #if CRYPTOPP_INTEGER_SSE2
736 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
737 {
738  AddPrologue
739 
740  // now: eax = A, edi = B, edx = C, ecx = N
741  AS2( lea eax, [eax+4*ecx])
742  AS2( lea edi, [edi+4*ecx])
743  AS2( lea edx, [edx+4*ecx])
744 
745  AS1( neg ecx) // ecx is negative index
746  AS2( pxor mm2, mm2)
747  ASJ( jz, 2, f)
748  AS2( test ecx, 2) // this clears carry flag
749  ASJ( jz, 0, f)
750  AS2( sub ecx, 2)
751  ASJ( jmp, 1, f)
752 
753  ASL(0)
754  AS2( movd mm0, DWORD PTR [eax+4*ecx])
755  AS2( movd mm1, DWORD PTR [edi+4*ecx])
756  AS2( paddq mm0, mm1)
757  AS2( paddq mm2, mm0)
758  AS2( movd DWORD PTR [edx+4*ecx], mm2)
759  AS2( psrlq mm2, 32)
760 
761  AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
762  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
763  AS2( paddq mm0, mm1)
764  AS2( paddq mm2, mm0)
765  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
766  AS2( psrlq mm2, 32)
767 
768  ASL(1)
769  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
770  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
771  AS2( paddq mm0, mm1)
772  AS2( paddq mm2, mm0)
773  AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
774  AS2( psrlq mm2, 32)
775 
776  AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
777  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
778  AS2( paddq mm0, mm1)
779  AS2( paddq mm2, mm0)
780  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
781  AS2( psrlq mm2, 32)
782 
783  AS2( add ecx, 4)
784  ASJ( jnz, 0, b)
785 
786  ASL(2)
787  AS2( movd eax, mm2)
788  AS1( emms)
789 
790  AddEpilogue
791 }
792 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
793 {
794  AddPrologue
795 
796  // now: eax = A, edi = B, edx = C, ecx = N
797  AS2( lea eax, [eax+4*ecx])
798  AS2( lea edi, [edi+4*ecx])
799  AS2( lea edx, [edx+4*ecx])
800 
801  AS1( neg ecx) // ecx is negative index
802  AS2( pxor mm2, mm2)
803  ASJ( jz, 2, f)
804  AS2( test ecx, 2) // this clears carry flag
805  ASJ( jz, 0, f)
806  AS2( sub ecx, 2)
807  ASJ( jmp, 1, f)
808 
809  ASL(0)
810  AS2( movd mm0, DWORD PTR [eax+4*ecx])
811  AS2( movd mm1, DWORD PTR [edi+4*ecx])
812  AS2( psubq mm0, mm1)
813  AS2( psubq mm0, mm2)
814  AS2( movd DWORD PTR [edx+4*ecx], mm0)
815  AS2( psrlq mm0, 63)
816 
817  AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
818  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
819  AS2( psubq mm2, mm1)
820  AS2( psubq mm2, mm0)
821  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
822  AS2( psrlq mm2, 63)
823 
824  ASL(1)
825  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
826  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
827  AS2( psubq mm0, mm1)
828  AS2( psubq mm0, mm2)
829  AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
830  AS2( psrlq mm0, 63)
831 
832  AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
833  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
834  AS2( psubq mm2, mm1)
835  AS2( psubq mm2, mm0)
836  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
837  AS2( psrlq mm2, 63)
838 
839  AS2( add ecx, 4)
840  ASJ( jnz, 0, b)
841 
842  ASL(2)
843  AS2( movd eax, mm2)
844  AS1( emms)
845 
846  AddEpilogue
847 }
848 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
849 #else
850 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
851 {
852  assert (N%2 == 0);
853 
854  Declare2Words(u);
855  AssignWord(u, 0);
856  for (size_t i=0; i<N; i+=2)
857  {
858  AddWithCarry(u, A[i], B[i]);
859  C[i] = LowWord(u);
860  AddWithCarry(u, A[i+1], B[i+1]);
861  C[i+1] = LowWord(u);
862  }
863  return int(GetCarry(u));
864 }
865 
866 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
867 {
868  assert (N%2 == 0);
869 
870  Declare2Words(u);
871  AssignWord(u, 0);
872  for (size_t i=0; i<N; i+=2)
873  {
874  SubtractWithBorrow(u, A[i], B[i]);
875  C[i] = LowWord(u);
876  SubtractWithBorrow(u, A[i+1], B[i+1]);
877  C[i+1] = LowWord(u);
878  }
879  return int(GetBorrow(u));
880 }
881 #endif
882 
883 static word LinearMultiply(word *C, const word *AA, word B, size_t N)
884 {
885  // http://github.com/weidai11/cryptopp/issues/188
886  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
887 
888  word carry=0;
889  for(unsigned i=0; i<N; i++)
890  {
891  Declare2Words(p);
892  MultiplyWords(p, A[i], B);
893  Acc2WordsBy1(p, carry);
894  C[i] = LowWord(p);
895  carry = HighWord(p);
896  }
897  return carry;
898 }
899 
900 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
901 
902 #define Mul_2 \
903  Mul_Begin(2) \
904  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
905  Mul_End(1, 1)
906 
907 #define Mul_4 \
908  Mul_Begin(4) \
909  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
910  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
911  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
912  Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
913  Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
914  Mul_End(5, 3)
915 
916 #define Mul_8 \
917  Mul_Begin(8) \
918  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
919  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
920  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
921  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
922  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
923  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
924  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
925  Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
926  Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
927  Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
928  Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
929  Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
930  Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
931  Mul_End(13, 7)
932 
933 #define Mul_16 \
934  Mul_Begin(16) \
935  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
936  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
937  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
938  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
939  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
940  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
941  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
942  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
943  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
944  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
945  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
946  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
947  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
948  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
949  Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
950  Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
951  Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
952  Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
953  Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
954  Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
955  Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
956  Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
957  Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
958  Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
959  Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
960  Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
961  Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
962  Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
963  Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
964  Mul_End(29, 15)
965 
966 #define Squ_2 \
967  Squ_Begin(2) \
968  Squ_End(2)
969 
970 #define Squ_4 \
971  Squ_Begin(4) \
972  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
973  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
974  Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
975  Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
976  Squ_End(4)
977 
978 #define Squ_8 \
979  Squ_Begin(8) \
980  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
981  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
982  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
983  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
984  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
985  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
986  Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
987  Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
988  Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
989  Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
990  Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
991  Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
992  Squ_End(8)
993 
994 #define Squ_16 \
995  Squ_Begin(16) \
996  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
997  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
998  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
999  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1000  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1001  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1002  Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1003  Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1004  Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1005  Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1006  Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
1007  Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
1008  Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
1009  Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1010  Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1011  Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1012  Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1013  Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1014  Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1015  Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1016  Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1017  Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1018  Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1019  Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1020  Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1021  Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1022  Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1023  Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1024  Squ_End(16)
1025 
1026 #define Bot_2 \
1027  Mul_Begin(2) \
1028  Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1029  Bot_End(2)
1030 
1031 #define Bot_4 \
1032  Mul_Begin(4) \
1033  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1034  Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1035  Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1036  Bot_End(4)
1037 
1038 #define Bot_8 \
1039  Mul_Begin(8) \
1040  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1041  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1042  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1043  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1044  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1045  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1046  Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1047  Bot_End(8)
1048 
1049 #define Bot_16 \
1050  Mul_Begin(16) \
1051  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1052  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1053  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1054  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1055  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1056  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1057  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1058  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1059  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1060  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1061  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1062  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1063  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1064  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1065  Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1066  Bot_End(16)
1067 
1068 #endif
1069 
1070 #if 0
1071 #define Mul_Begin(n) \
1072  Declare2Words(p) \
1073  Declare2Words(c) \
1074  Declare2Words(d) \
1075  MultiplyWords(p, A[0], B[0]) \
1076  AssignWord(c, LowWord(p)) \
1077  AssignWord(d, HighWord(p))
1078 
1079 #define Mul_Acc(i, j) \
1080  MultiplyWords(p, A[i], B[j]) \
1081  Acc2WordsBy1(c, LowWord(p)) \
1082  Acc2WordsBy1(d, HighWord(p))
1083 
1084 #define Mul_SaveAcc(k, i, j) \
1085  R[k] = LowWord(c); \
1086  Add2WordsBy1(c, d, HighWord(c)) \
1087  MultiplyWords(p, A[i], B[j]) \
1088  AssignWord(d, HighWord(p)) \
1089  Acc2WordsBy1(c, LowWord(p))
1090 
1091 #define Mul_End(n) \
1092  R[2*n-3] = LowWord(c); \
1093  Acc2WordsBy1(d, HighWord(c)) \
1094  MultiplyWords(p, A[n-1], B[n-1])\
1095  Acc2WordsBy2(d, p) \
1096  R[2*n-2] = LowWord(d); \
1097  R[2*n-1] = HighWord(d);
1098 
1099 #define Bot_SaveAcc(k, i, j) \
1100  R[k] = LowWord(c); \
1101  word e = LowWord(d) + HighWord(c); \
1102  e += A[i] * B[j];
1103 
1104 #define Bot_Acc(i, j) \
1105  e += A[i] * B[j];
1106 
1107 #define Bot_End(n) \
1108  R[n-1] = e;
1109 #else
1110 #define Mul_Begin(n) \
1111  Declare2Words(p) \
1112  word c; \
1113  Declare2Words(d) \
1114  MultiplyWords(p, A[0], B[0]) \
1115  c = LowWord(p); \
1116  AssignWord(d, HighWord(p))
1117 
1118 #define Mul_Acc(i, j) \
1119  MulAcc(c, d, A[i], B[j])
1120 
1121 #define Mul_SaveAcc(k, i, j) \
1122  R[k] = c; \
1123  c = LowWord(d); \
1124  AssignWord(d, HighWord(d)) \
1125  MulAcc(c, d, A[i], B[j])
1126 
1127 #define Mul_End(k, i) \
1128  R[k] = c; \
1129  MultiplyWords(p, A[i], B[i]) \
1130  Acc2WordsBy2(p, d) \
1131  R[k+1] = LowWord(p); \
1132  R[k+2] = HighWord(p);
1133 
1134 #define Bot_SaveAcc(k, i, j) \
1135  R[k] = c; \
1136  c = LowWord(d); \
1137  c += A[i] * B[j];
1138 
1139 #define Bot_Acc(i, j) \
1140  c += A[i] * B[j];
1141 
1142 #define Bot_End(n) \
1143  R[n-1] = c;
1144 #endif
1145 
1146 #define Squ_Begin(n) \
1147  Declare2Words(p) \
1148  word c; \
1149  Declare2Words(d) \
1150  Declare2Words(e) \
1151  MultiplyWords(p, A[0], A[0]) \
1152  R[0] = LowWord(p); \
1153  AssignWord(e, HighWord(p)) \
1154  MultiplyWords(p, A[0], A[1]) \
1155  c = LowWord(p); \
1156  AssignWord(d, HighWord(p)) \
1157  Squ_NonDiag \
1158 
1159 #define Squ_NonDiag \
1160  Double3Words(c, d)
1161 
1162 #define Squ_SaveAcc(k, i, j) \
1163  Acc3WordsBy2(c, d, e) \
1164  R[k] = c; \
1165  MultiplyWords(p, A[i], A[j]) \
1166  c = LowWord(p); \
1167  AssignWord(d, HighWord(p)) \
1168 
1169 #define Squ_Acc(i, j) \
1170  MulAcc(c, d, A[i], A[j])
1171 
1172 #define Squ_Diag(i) \
1173  Squ_NonDiag \
1174  MulAcc(c, d, A[i], A[i])
1175 
1176 #define Squ_End(n) \
1177  Acc3WordsBy2(c, d, e) \
1178  R[2*n-3] = c; \
1179  MultiplyWords(p, A[n-1], A[n-1])\
1180  Acc2WordsBy2(p, e) \
1181  R[2*n-2] = LowWord(p); \
1182  R[2*n-1] = HighWord(p);
1183 
1184 
1185 void Baseline_Multiply2(word *R, const word *AA, const word *BB)
1186 {
1187  // http://github.com/weidai11/cryptopp/issues/188
1188  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1189  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1190 
1191  Mul_2
1192 }
1193 
1194 void Baseline_Multiply4(word *R, const word *AA, const word *BB)
1195 {
1196  // http://github.com/weidai11/cryptopp/issues/188
1197  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1198  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1199 
1200  Mul_4
1201 }
1202 
1203 void Baseline_Multiply8(word *R, const word *AA, const word *BB)
1204 {
1205  // http://github.com/weidai11/cryptopp/issues/188
1206  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1207  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1208 
1209  Mul_8
1210 }
1211 
1212 void Baseline_Square2(word *R, const word *AA)
1213 {
1214  // http://github.com/weidai11/cryptopp/issues/188
1215  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1216 
1217  Squ_2
1218 }
1219 
1220 void Baseline_Square4(word *R, const word *AA)
1221 {
1222  // http://github.com/weidai11/cryptopp/issues/188
1223  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1224 
1225  Squ_4
1226 }
1227 
1228 void Baseline_Square8(word *R, const word *AA)
1229 {
1230  // http://github.com/weidai11/cryptopp/issues/188
1231  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1232 
1233  Squ_8
1234 }
1235 
1236 void Baseline_MultiplyBottom2(word *R, const word *AA, const word *BB)
1237 {
1238  // http://github.com/weidai11/cryptopp/issues/188
1239  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1240  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1241 
1242  Bot_2
1243 }
1244 
1245 void Baseline_MultiplyBottom4(word *R, const word *AA, const word *BB)
1246 {
1247  // http://github.com/weidai11/cryptopp/issues/188
1248  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1249  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1250 
1251  Bot_4
1252 }
1253 
1254 void Baseline_MultiplyBottom8(word *R, const word *AA, const word *BB)
1255 {
1256  // http://github.com/weidai11/cryptopp/issues/188
1257  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1258  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1259 
1260  Bot_8
1261 }
1262 
1263 #define Top_Begin(n) \
1264  Declare2Words(p) \
1265  word c; \
1266  Declare2Words(d) \
1267  MultiplyWords(p, A[0], B[n-2]);\
1268  AssignWord(d, HighWord(p));
1269 
1270 #define Top_Acc(i, j) \
1271  MultiplyWords(p, A[i], B[j]);\
1272  Acc2WordsBy1(d, HighWord(p));
1273 
1274 #define Top_SaveAcc0(i, j) \
1275  c = LowWord(d); \
1276  AssignWord(d, HighWord(d)) \
1277  MulAcc(c, d, A[i], B[j])
1278 
1279 #define Top_SaveAcc1(i, j) \
1280  c = L<c; \
1281  Acc2WordsBy1(d, c); \
1282  c = LowWord(d); \
1283  AssignWord(d, HighWord(d)) \
1284  MulAcc(c, d, A[i], B[j])
1285 
1286 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1287 {
1288  CRYPTOPP_UNUSED(L);
1289  word T[4];
1290  Baseline_Multiply2(T, A, B);
1291  R[0] = T[2];
1292  R[1] = T[3];
1293 }
1294 
1295 void Baseline_MultiplyTop4(word *R, const word *AA, const word *BB, word L)
1296 {
1297  // http://github.com/weidai11/cryptopp/issues/188
1298  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1299  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1300 
1301  Top_Begin(4)
1302  Top_Acc(1, 1) Top_Acc(2, 0) \
1303  Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1304  Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1305  Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1306  Mul_End(1, 3)
1307 }
1308 
1309 void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1310 {
1311  // http://github.com/weidai11/cryptopp/issues/188
1312  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1313  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1314 
1315  Top_Begin(8)
1316  Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1317  Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1318  Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1319  Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1320  Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1321  Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1322  Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1323  Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1324  Mul_End(5, 7)
1325 }
1326 
1327 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1328 void Baseline_Multiply16(word *R, const word *AA, const word *BB)
1329 {
1330  // http://github.com/weidai11/cryptopp/issues/188
1331  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1332  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1333 
1334  Mul_16
1335 }
1336 
1337 void Baseline_Square16(word *R, const word *AA)
1338 {
1339  // http://github.com/weidai11/cryptopp/issues/188
1340  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1341 
1342  Squ_16
1343 }
1344 
1345 void Baseline_MultiplyBottom16(word *R, const word *AA, const word *BB)
1346 {
1347  // http://github.com/weidai11/cryptopp/issues/188
1348  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1349  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1350 
1351  Bot_16
1352 }
1353 
1354 void Baseline_MultiplyTop16(word *R, const word *AA, const word *BB, word L)
1355 {
1356  // http://github.com/weidai11/cryptopp/issues/188
1357  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1358  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1359 
1360  Top_Begin(16)
1361  Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1362  Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1363  Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1364  Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1365  Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1366  Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1367  Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1368  Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1369  Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1370  Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1371  Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1372  Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1373  Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1374  Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1375  Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1376  Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1377  Mul_End(13, 15)
1378 }
1379 #endif
1380 
1381 // ********************************************************
1382 
1383 #if CRYPTOPP_INTEGER_SSE2
1384 
1385 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1386 
1387 #undef Mul_Begin
1388 #undef Mul_Acc
1389 #undef Top_Begin
1390 #undef Top_Acc
1391 #undef Squ_Acc
1392 #undef Squ_NonDiag
1393 #undef Squ_Diag
1394 #undef Squ_SaveAcc
1395 #undef Squ_Begin
1396 #undef Mul_SaveAcc
1397 #undef Bot_Acc
1398 #undef Bot_SaveAcc
1399 #undef Bot_End
1400 #undef Squ_End
1401 #undef Mul_End
1402 
1403 #define SSE2_FinalSave(k) \
1404  AS2( psllq xmm5, 16) \
1405  AS2( paddq xmm4, xmm5) \
1406  AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1407 
1408 #define SSE2_SaveShift(k) \
1409  AS2( movq xmm0, xmm6) \
1410  AS2( punpckhqdq xmm6, xmm0) \
1411  AS2( movq xmm1, xmm7) \
1412  AS2( punpckhqdq xmm7, xmm1) \
1413  AS2( paddd xmm6, xmm0) \
1414  AS2( pslldq xmm6, 4) \
1415  AS2( paddd xmm7, xmm1) \
1416  AS2( paddd xmm4, xmm6) \
1417  AS2( pslldq xmm7, 4) \
1418  AS2( movq xmm6, xmm4) \
1419  AS2( paddd xmm5, xmm7) \
1420  AS2( movq xmm7, xmm5) \
1421  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1422  AS2( psrlq xmm6, 16) \
1423  AS2( paddq xmm6, xmm7) \
1424  AS2( punpckhqdq xmm4, xmm0) \
1425  AS2( punpckhqdq xmm5, xmm0) \
1426  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1427  AS2( psrlq xmm6, 3*16) \
1428  AS2( paddd xmm4, xmm6) \
1429 
1430 #define Squ_SSE2_SaveShift(k) \
1431  AS2( movq xmm0, xmm6) \
1432  AS2( punpckhqdq xmm6, xmm0) \
1433  AS2( movq xmm1, xmm7) \
1434  AS2( punpckhqdq xmm7, xmm1) \
1435  AS2( paddd xmm6, xmm0) \
1436  AS2( pslldq xmm6, 4) \
1437  AS2( paddd xmm7, xmm1) \
1438  AS2( paddd xmm4, xmm6) \
1439  AS2( pslldq xmm7, 4) \
1440  AS2( movhlps xmm6, xmm4) \
1441  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1442  AS2( paddd xmm5, xmm7) \
1443  AS2( movhps QWORD PTR [esp+12], xmm5)\
1444  AS2( psrlq xmm4, 16) \
1445  AS2( paddq xmm4, xmm5) \
1446  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1447  AS2( psrlq xmm4, 3*16) \
1448  AS2( paddd xmm4, xmm6) \
1449  AS2( movq QWORD PTR [esp+4], xmm4)\
1450 
1451 #define SSE2_FirstMultiply(i) \
1452  AS2( movdqa xmm7, [esi+(i)*16])\
1453  AS2( movdqa xmm5, [edi-(i)*16])\
1454  AS2( pmuludq xmm5, xmm7) \
1455  AS2( movdqa xmm4, [ebx])\
1456  AS2( movdqa xmm6, xmm4) \
1457  AS2( pand xmm4, xmm5) \
1458  AS2( psrld xmm5, 16) \
1459  AS2( pmuludq xmm7, [edx-(i)*16])\
1460  AS2( pand xmm6, xmm7) \
1461  AS2( psrld xmm7, 16)
1462 
1463 #define Squ_Begin(n) \
1464  SquPrologue \
1465  AS2( mov esi, esp)\
1466  AS2( and esp, 0xfffffff0)\
1467  AS2( lea edi, [esp-32*n])\
1468  AS2( sub esp, 32*n+16)\
1469  AS1( push esi)\
1470  AS2( mov esi, edi) \
1471  AS2( xor edx, edx) \
1472  ASL(1) \
1473  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1474  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1475  AS2( movdqa [edi+2*edx], xmm0) \
1476  AS2( psrlq xmm0, 32) \
1477  AS2( movdqa [edi+2*edx+16], xmm0) \
1478  AS2( movdqa [edi+16*n+2*edx], xmm1) \
1479  AS2( psrlq xmm1, 32) \
1480  AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1481  AS2( add edx, 16) \
1482  AS2( cmp edx, 8*(n)) \
1483  ASJ( jne, 1, b) \
1484  AS2( lea edx, [edi+16*n])\
1485  SSE2_FirstMultiply(0) \
1486 
1487 #define Squ_Acc(i) \
1488  ASL(LSqu##i) \
1489  AS2( movdqa xmm1, [esi+(i)*16]) \
1490  AS2( movdqa xmm0, [edi-(i)*16]) \
1491  AS2( movdqa xmm2, [ebx]) \
1492  AS2( pmuludq xmm0, xmm1) \
1493  AS2( pmuludq xmm1, [edx-(i)*16]) \
1494  AS2( movdqa xmm3, xmm2) \
1495  AS2( pand xmm2, xmm0) \
1496  AS2( psrld xmm0, 16) \
1497  AS2( paddd xmm4, xmm2) \
1498  AS2( paddd xmm5, xmm0) \
1499  AS2( pand xmm3, xmm1) \
1500  AS2( psrld xmm1, 16) \
1501  AS2( paddd xmm6, xmm3) \
1502  AS2( paddd xmm7, xmm1) \
1503 
1504 #define Squ_Acc1(i)
1505 #define Squ_Acc2(i) ASC(call, LSqu##i)
1506 #define Squ_Acc3(i) Squ_Acc2(i)
1507 #define Squ_Acc4(i) Squ_Acc2(i)
1508 #define Squ_Acc5(i) Squ_Acc2(i)
1509 #define Squ_Acc6(i) Squ_Acc2(i)
1510 #define Squ_Acc7(i) Squ_Acc2(i)
1511 #define Squ_Acc8(i) Squ_Acc2(i)
1512 
1513 #define SSE2_End(E, n) \
1514  SSE2_SaveShift(2*(n)-3) \
1515  AS2( movdqa xmm7, [esi+16]) \
1516  AS2( movdqa xmm0, [edi]) \
1517  AS2( pmuludq xmm0, xmm7) \
1518  AS2( movdqa xmm2, [ebx]) \
1519  AS2( pmuludq xmm7, [edx]) \
1520  AS2( movdqa xmm6, xmm2) \
1521  AS2( pand xmm2, xmm0) \
1522  AS2( psrld xmm0, 16) \
1523  AS2( paddd xmm4, xmm2) \
1524  AS2( paddd xmm5, xmm0) \
1525  AS2( pand xmm6, xmm7) \
1526  AS2( psrld xmm7, 16) \
1527  SSE2_SaveShift(2*(n)-2) \
1528  SSE2_FinalSave(2*(n)-1) \
1529  AS1( pop esp)\
1530  E
1531 
1532 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1533 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1534 #define Top_End(n) SSE2_End(TopEpilogue, n)
1535 
1536 #define Squ_Column1(k, i) \
1537  Squ_SSE2_SaveShift(k) \
1538  AS2( add esi, 16) \
1539  SSE2_FirstMultiply(1)\
1540  Squ_Acc##i(i) \
1541  AS2( paddd xmm4, xmm4) \
1542  AS2( paddd xmm5, xmm5) \
1543  AS2( movdqa xmm3, [esi]) \
1544  AS2( movq xmm1, QWORD PTR [esi+8]) \
1545  AS2( pmuludq xmm1, xmm3) \
1546  AS2( pmuludq xmm3, xmm3) \
1547  AS2( movdqa xmm0, [ebx])\
1548  AS2( movdqa xmm2, xmm0) \
1549  AS2( pand xmm0, xmm1) \
1550  AS2( psrld xmm1, 16) \
1551  AS2( paddd xmm6, xmm0) \
1552  AS2( paddd xmm7, xmm1) \
1553  AS2( pand xmm2, xmm3) \
1554  AS2( psrld xmm3, 16) \
1555  AS2( paddd xmm6, xmm6) \
1556  AS2( paddd xmm7, xmm7) \
1557  AS2( paddd xmm4, xmm2) \
1558  AS2( paddd xmm5, xmm3) \
1559  AS2( movq xmm0, QWORD PTR [esp+4])\
1560  AS2( movq xmm1, QWORD PTR [esp+12])\
1561  AS2( paddd xmm4, xmm0)\
1562  AS2( paddd xmm5, xmm1)\
1563 
1564 #define Squ_Column0(k, i) \
1565  Squ_SSE2_SaveShift(k) \
1566  AS2( add edi, 16) \
1567  AS2( add edx, 16) \
1568  SSE2_FirstMultiply(1)\
1569  Squ_Acc##i(i) \
1570  AS2( paddd xmm6, xmm6) \
1571  AS2( paddd xmm7, xmm7) \
1572  AS2( paddd xmm4, xmm4) \
1573  AS2( paddd xmm5, xmm5) \
1574  AS2( movq xmm0, QWORD PTR [esp+4])\
1575  AS2( movq xmm1, QWORD PTR [esp+12])\
1576  AS2( paddd xmm4, xmm0)\
1577  AS2( paddd xmm5, xmm1)\
1578 
1579 #define SSE2_MulAdd45 \
1580  AS2( movdqa xmm7, [esi]) \
1581  AS2( movdqa xmm0, [edi]) \
1582  AS2( pmuludq xmm0, xmm7) \
1583  AS2( movdqa xmm2, [ebx]) \
1584  AS2( pmuludq xmm7, [edx]) \
1585  AS2( movdqa xmm6, xmm2) \
1586  AS2( pand xmm2, xmm0) \
1587  AS2( psrld xmm0, 16) \
1588  AS2( paddd xmm4, xmm2) \
1589  AS2( paddd xmm5, xmm0) \
1590  AS2( pand xmm6, xmm7) \
1591  AS2( psrld xmm7, 16)
1592 
1593 #define Mul_Begin(n) \
1594  MulPrologue \
1595  AS2( mov esi, esp)\
1596  AS2( and esp, 0xfffffff0)\
1597  AS2( sub esp, 48*n+16)\
1598  AS1( push esi)\
1599  AS2( xor edx, edx) \
1600  ASL(1) \
1601  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1602  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1603  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1604  AS2( movdqa [esp+20+2*edx], xmm0) \
1605  AS2( psrlq xmm0, 32) \
1606  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1607  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1608  AS2( psrlq xmm1, 32) \
1609  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1610  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1611  AS2( psrlq xmm2, 32) \
1612  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1613  AS2( add edx, 16) \
1614  AS2( cmp edx, 8*(n)) \
1615  ASJ( jne, 1, b) \
1616  AS2( lea edi, [esp+20])\
1617  AS2( lea edx, [esp+20+16*n])\
1618  AS2( lea esi, [esp+20+32*n])\
1619  SSE2_FirstMultiply(0) \
1620 
1621 #define Mul_Acc(i) \
1622  ASL(LMul##i) \
1623  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1624  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1625  AS2( movdqa xmm2, [ebx]) \
1626  AS2( pmuludq xmm0, xmm1) \
1627  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1628  AS2( movdqa xmm3, xmm2) \
1629  AS2( pand xmm2, xmm0) \
1630  AS2( psrld xmm0, 16) \
1631  AS2( paddd xmm4, xmm2) \
1632  AS2( paddd xmm5, xmm0) \
1633  AS2( pand xmm3, xmm1) \
1634  AS2( psrld xmm1, 16) \
1635  AS2( paddd xmm6, xmm3) \
1636  AS2( paddd xmm7, xmm1) \
1637 
1638 #define Mul_Acc1(i)
1639 #define Mul_Acc2(i) ASC(call, LMul##i)
1640 #define Mul_Acc3(i) Mul_Acc2(i)
1641 #define Mul_Acc4(i) Mul_Acc2(i)
1642 #define Mul_Acc5(i) Mul_Acc2(i)
1643 #define Mul_Acc6(i) Mul_Acc2(i)
1644 #define Mul_Acc7(i) Mul_Acc2(i)
1645 #define Mul_Acc8(i) Mul_Acc2(i)
1646 #define Mul_Acc9(i) Mul_Acc2(i)
1647 #define Mul_Acc10(i) Mul_Acc2(i)
1648 #define Mul_Acc11(i) Mul_Acc2(i)
1649 #define Mul_Acc12(i) Mul_Acc2(i)
1650 #define Mul_Acc13(i) Mul_Acc2(i)
1651 #define Mul_Acc14(i) Mul_Acc2(i)
1652 #define Mul_Acc15(i) Mul_Acc2(i)
1653 #define Mul_Acc16(i) Mul_Acc2(i)
1654 
1655 #define Mul_Column1(k, i) \
1656  SSE2_SaveShift(k) \
1657  AS2( add esi, 16) \
1658  SSE2_MulAdd45\
1659  Mul_Acc##i(i) \
1660 
1661 #define Mul_Column0(k, i) \
1662  SSE2_SaveShift(k) \
1663  AS2( add edi, 16) \
1664  AS2( add edx, 16) \
1665  SSE2_MulAdd45\
1666  Mul_Acc##i(i) \
1667 
1668 #define Bot_Acc(i) \
1669  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1670  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1671  AS2( pmuludq xmm0, xmm1) \
1672  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1673  AS2( paddq xmm4, xmm0) \
1674  AS2( paddd xmm6, xmm1)
1675 
1676 #define Bot_SaveAcc(k) \
1677  SSE2_SaveShift(k) \
1678  AS2( add edi, 16) \
1679  AS2( add edx, 16) \
1680  AS2( movdqa xmm6, [esi]) \
1681  AS2( movdqa xmm0, [edi]) \
1682  AS2( pmuludq xmm0, xmm6) \
1683  AS2( paddq xmm4, xmm0) \
1684  AS2( psllq xmm5, 16) \
1685  AS2( paddq xmm4, xmm5) \
1686  AS2( pmuludq xmm6, [edx])
1687 
1688 #define Bot_End(n) \
1689  AS2( movhlps xmm7, xmm6) \
1690  AS2( paddd xmm6, xmm7) \
1691  AS2( psllq xmm6, 32) \
1692  AS2( paddd xmm4, xmm6) \
1693  AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1694  AS1( pop esp)\
1695  MulEpilogue
1696 
1697 #define Top_Begin(n) \
1698  TopPrologue \
1699  AS2( mov edx, esp)\
1700  AS2( and esp, 0xfffffff0)\
1701  AS2( sub esp, 48*n+16)\
1702  AS1( push edx)\
1703  AS2( xor edx, edx) \
1704  ASL(1) \
1705  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1706  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1707  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1708  AS2( movdqa [esp+20+2*edx], xmm0) \
1709  AS2( psrlq xmm0, 32) \
1710  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1711  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1712  AS2( psrlq xmm1, 32) \
1713  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1714  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1715  AS2( psrlq xmm2, 32) \
1716  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1717  AS2( add edx, 16) \
1718  AS2( cmp edx, 8*(n)) \
1719  ASJ( jne, 1, b) \
1720  AS2( mov eax, esi) \
1721  AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1722  AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1723  AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1724  AS2( pxor xmm4, xmm4)\
1725  AS2( pxor xmm5, xmm5)
1726 
1727 #define Top_Acc(i) \
1728  AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1729  AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1730  AS2( psrlq xmm0, 48) \
1731  AS2( paddd xmm5, xmm0)\
1732 
1733 #define Top_Column0(i) \
1734  AS2( psllq xmm5, 32) \
1735  AS2( add edi, 16) \
1736  AS2( add edx, 16) \
1737  SSE2_MulAdd45\
1738  Mul_Acc##i(i) \
1739 
1740 #define Top_Column1(i) \
1741  SSE2_SaveShift(0) \
1742  AS2( add esi, 16) \
1743  SSE2_MulAdd45\
1744  Mul_Acc##i(i) \
1745  AS2( shr eax, 16) \
1746  AS2( movd xmm0, eax)\
1747  AS2( movd xmm1, [ecx+4])\
1748  AS2( psrld xmm1, 16)\
1749  AS2( pcmpgtd xmm1, xmm0)\
1750  AS2( psrld xmm1, 31)\
1751  AS2( paddd xmm4, xmm1)\
1752 
1753 void SSE2_Square4(word *C, const word *A)
1754 {
1755  Squ_Begin(2)
1756  Squ_Column0(0, 1)
1757  Squ_End(2)
1758 }
1759 
1760 void SSE2_Square8(word *C, const word *A)
1761 {
1762  Squ_Begin(4)
1763 #ifndef __GNUC__
1764  ASJ( jmp, 0, f)
1765  Squ_Acc(2)
1766  AS1( ret) ASL(0)
1767 #endif
1768  Squ_Column0(0, 1)
1769  Squ_Column1(1, 1)
1770  Squ_Column0(2, 2)
1771  Squ_Column1(3, 1)
1772  Squ_Column0(4, 1)
1773  Squ_End(4)
1774 }
1775 
1776 void SSE2_Square16(word *C, const word *A)
1777 {
1778  Squ_Begin(8)
1779 #ifndef __GNUC__
1780  ASJ( jmp, 0, f)
1781  Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1782  AS1( ret) ASL(0)
1783 #endif
1784  Squ_Column0(0, 1)
1785  Squ_Column1(1, 1)
1786  Squ_Column0(2, 2)
1787  Squ_Column1(3, 2)
1788  Squ_Column0(4, 3)
1789  Squ_Column1(5, 3)
1790  Squ_Column0(6, 4)
1791  Squ_Column1(7, 3)
1792  Squ_Column0(8, 3)
1793  Squ_Column1(9, 2)
1794  Squ_Column0(10, 2)
1795  Squ_Column1(11, 1)
1796  Squ_Column0(12, 1)
1797  Squ_End(8)
1798 }
1799 
1800 void SSE2_Square32(word *C, const word *A)
1801 {
1802  Squ_Begin(16)
1803  ASJ( jmp, 0, f)
1804  Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1805  AS1( ret) ASL(0)
1806  Squ_Column0(0, 1)
1807  Squ_Column1(1, 1)
1808  Squ_Column0(2, 2)
1809  Squ_Column1(3, 2)
1810  Squ_Column0(4, 3)
1811  Squ_Column1(5, 3)
1812  Squ_Column0(6, 4)
1813  Squ_Column1(7, 4)
1814  Squ_Column0(8, 5)
1815  Squ_Column1(9, 5)
1816  Squ_Column0(10, 6)
1817  Squ_Column1(11, 6)
1818  Squ_Column0(12, 7)
1819  Squ_Column1(13, 7)
1820  Squ_Column0(14, 8)
1821  Squ_Column1(15, 7)
1822  Squ_Column0(16, 7)
1823  Squ_Column1(17, 6)
1824  Squ_Column0(18, 6)
1825  Squ_Column1(19, 5)
1826  Squ_Column0(20, 5)
1827  Squ_Column1(21, 4)
1828  Squ_Column0(22, 4)
1829  Squ_Column1(23, 3)
1830  Squ_Column0(24, 3)
1831  Squ_Column1(25, 2)
1832  Squ_Column0(26, 2)
1833  Squ_Column1(27, 1)
1834  Squ_Column0(28, 1)
1835  Squ_End(16)
1836 }
1837 
1838 void SSE2_Multiply4(word *C, const word *A, const word *B)
1839 {
1840  Mul_Begin(2)
1841 #ifndef __GNUC__
1842  ASJ( jmp, 0, f)
1843  Mul_Acc(2)
1844  AS1( ret) ASL(0)
1845 #endif
1846  Mul_Column0(0, 2)
1847  Mul_End(2)
1848 }
1849 
1850 void SSE2_Multiply8(word *C, const word *A, const word *B)
1851 {
1852  Mul_Begin(4)
1853 #ifndef __GNUC__
1854  ASJ( jmp, 0, f)
1855  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1856  AS1( ret) ASL(0)
1857 #endif
1858  Mul_Column0(0, 2)
1859  Mul_Column1(1, 3)
1860  Mul_Column0(2, 4)
1861  Mul_Column1(3, 3)
1862  Mul_Column0(4, 2)
1863  Mul_End(4)
1864 }
1865 
1866 void SSE2_Multiply16(word *C, const word *A, const word *B)
1867 {
1868  Mul_Begin(8)
1869 #ifndef __GNUC__
1870  ASJ( jmp, 0, f)
1871  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1872  AS1( ret) ASL(0)
1873 #endif
1874  Mul_Column0(0, 2)
1875  Mul_Column1(1, 3)
1876  Mul_Column0(2, 4)
1877  Mul_Column1(3, 5)
1878  Mul_Column0(4, 6)
1879  Mul_Column1(5, 7)
1880  Mul_Column0(6, 8)
1881  Mul_Column1(7, 7)
1882  Mul_Column0(8, 6)
1883  Mul_Column1(9, 5)
1884  Mul_Column0(10, 4)
1885  Mul_Column1(11, 3)
1886  Mul_Column0(12, 2)
1887  Mul_End(8)
1888 }
1889 
1890 void SSE2_Multiply32(word *C, const word *A, const word *B)
1891 {
1892  Mul_Begin(16)
1893  ASJ( jmp, 0, f)
1894  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1895  AS1( ret) ASL(0)
1896  Mul_Column0(0, 2)
1897  Mul_Column1(1, 3)
1898  Mul_Column0(2, 4)
1899  Mul_Column1(3, 5)
1900  Mul_Column0(4, 6)
1901  Mul_Column1(5, 7)
1902  Mul_Column0(6, 8)
1903  Mul_Column1(7, 9)
1904  Mul_Column0(8, 10)
1905  Mul_Column1(9, 11)
1906  Mul_Column0(10, 12)
1907  Mul_Column1(11, 13)
1908  Mul_Column0(12, 14)
1909  Mul_Column1(13, 15)
1910  Mul_Column0(14, 16)
1911  Mul_Column1(15, 15)
1912  Mul_Column0(16, 14)
1913  Mul_Column1(17, 13)
1914  Mul_Column0(18, 12)
1915  Mul_Column1(19, 11)
1916  Mul_Column0(20, 10)
1917  Mul_Column1(21, 9)
1918  Mul_Column0(22, 8)
1919  Mul_Column1(23, 7)
1920  Mul_Column0(24, 6)
1921  Mul_Column1(25, 5)
1922  Mul_Column0(26, 4)
1923  Mul_Column1(27, 3)
1924  Mul_Column0(28, 2)
1925  Mul_End(16)
1926 }
1927 
1928 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
1929 {
1930  Mul_Begin(2)
1931  Bot_SaveAcc(0) Bot_Acc(2)
1932  Bot_End(2)
1933 }
1934 
1935 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
1936 {
1937  Mul_Begin(4)
1938 #ifndef __GNUC__
1939  ASJ( jmp, 0, f)
1940  Mul_Acc(3) Mul_Acc(2)
1941  AS1( ret) ASL(0)
1942 #endif
1943  Mul_Column0(0, 2)
1944  Mul_Column1(1, 3)
1945  Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1946  Bot_End(4)
1947 }
1948 
1949 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
1950 {
1951  Mul_Begin(8)
1952 #ifndef __GNUC__
1953  ASJ( jmp, 0, f)
1954  Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1955  AS1( ret) ASL(0)
1956 #endif
1957  Mul_Column0(0, 2)
1958  Mul_Column1(1, 3)
1959  Mul_Column0(2, 4)
1960  Mul_Column1(3, 5)
1961  Mul_Column0(4, 6)
1962  Mul_Column1(5, 7)
1963  Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1964  Bot_End(8)
1965 }
1966 
1967 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
1968 {
1969  Mul_Begin(16)
1970 #ifndef __GNUC__
1971  ASJ( jmp, 0, f)
1972  Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1973  AS1( ret) ASL(0)
1974 #endif
1975  Mul_Column0(0, 2)
1976  Mul_Column1(1, 3)
1977  Mul_Column0(2, 4)
1978  Mul_Column1(3, 5)
1979  Mul_Column0(4, 6)
1980  Mul_Column1(5, 7)
1981  Mul_Column0(6, 8)
1982  Mul_Column1(7, 9)
1983  Mul_Column0(8, 10)
1984  Mul_Column1(9, 11)
1985  Mul_Column0(10, 12)
1986  Mul_Column1(11, 13)
1987  Mul_Column0(12, 14)
1988  Mul_Column1(13, 15)
1989  Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1990  Bot_End(16)
1991 }
1992 
1993 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
1994 {
1995  Top_Begin(4)
1996  Top_Acc(3) Top_Acc(2) Top_Acc(1)
1997 #ifndef __GNUC__
1998  ASJ( jmp, 0, f)
1999  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2000  AS1( ret) ASL(0)
2001 #endif
2002  Top_Column0(4)
2003  Top_Column1(3)
2004  Mul_Column0(0, 2)
2005  Top_End(2)
2006 }
2007 
2008 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
2009 {
2010  Top_Begin(8)
2011  Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2012 #ifndef __GNUC__
2013  ASJ( jmp, 0, f)
2014  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2015  AS1( ret) ASL(0)
2016 #endif
2017  Top_Column0(8)
2018  Top_Column1(7)
2019  Mul_Column0(0, 6)
2020  Mul_Column1(1, 5)
2021  Mul_Column0(2, 4)
2022  Mul_Column1(3, 3)
2023  Mul_Column0(4, 2)
2024  Top_End(4)
2025 }
2026 
2027 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2028 {
2029  Top_Begin(16)
2030  Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2031 #ifndef __GNUC__
2032  ASJ( jmp, 0, f)
2033  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2034  AS1( ret) ASL(0)
2035 #endif
2036  Top_Column0(16)
2037  Top_Column1(15)
2038  Mul_Column0(0, 14)
2039  Mul_Column1(1, 13)
2040  Mul_Column0(2, 12)
2041  Mul_Column1(3, 11)
2042  Mul_Column0(4, 10)
2043  Mul_Column1(5, 9)
2044  Mul_Column0(6, 8)
2045  Mul_Column1(7, 7)
2046  Mul_Column0(8, 6)
2047  Mul_Column1(9, 5)
2048  Mul_Column0(10, 4)
2049  Mul_Column1(11, 3)
2050  Mul_Column0(12, 2)
2051  Top_End(8)
2052 }
2053 
2054 #endif // #if CRYPTOPP_INTEGER_SSE2
2055 
2056 // ********************************************************
2057 
2058 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
2059 typedef void (* PMul)(word *C, const word *A, const word *B);
2060 typedef void (* PSqu)(word *C, const word *A);
2061 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
2062 
2063 #if CRYPTOPP_INTEGER_SSE2
2064 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2065 static size_t s_recursionLimit = 8;
2066 #else
2067 static const size_t s_recursionLimit = 16;
2068 #endif
2069 
2070 static PMul s_pMul[9], s_pBot[9];
2071 static PSqu s_pSqu[9];
2072 static PMulTop s_pTop[9];
2073 
2074 static void SetFunctionPointers()
2075 {
2076  s_pMul[0] = &Baseline_Multiply2;
2077  s_pBot[0] = &Baseline_MultiplyBottom2;
2078  s_pSqu[0] = &Baseline_Square2;
2079  s_pTop[0] = &Baseline_MultiplyTop2;
2080  s_pTop[1] = &Baseline_MultiplyTop4;
2081 
2082 #if CRYPTOPP_INTEGER_SSE2
2083  if (HasSSE2())
2084  {
2085 #if _MSC_VER != 1200 || defined(NDEBUG)
2086  if (IsP4())
2087  {
2088  s_pAdd = &SSE2_Add;
2089  s_pSub = &SSE2_Sub;
2090  }
2091 #endif
2092 
2093  s_recursionLimit = 32;
2094 
2095  s_pMul[1] = &SSE2_Multiply4;
2096  s_pMul[2] = &SSE2_Multiply8;
2097  s_pMul[4] = &SSE2_Multiply16;
2098  s_pMul[8] = &SSE2_Multiply32;
2099 
2100  s_pBot[1] = &SSE2_MultiplyBottom4;
2101  s_pBot[2] = &SSE2_MultiplyBottom8;
2102  s_pBot[4] = &SSE2_MultiplyBottom16;
2103  s_pBot[8] = &SSE2_MultiplyBottom32;
2104 
2105  s_pSqu[1] = &SSE2_Square4;
2106  s_pSqu[2] = &SSE2_Square8;
2107  s_pSqu[4] = &SSE2_Square16;
2108  s_pSqu[8] = &SSE2_Square32;
2109 
2110  s_pTop[2] = &SSE2_MultiplyTop8;
2111  s_pTop[4] = &SSE2_MultiplyTop16;
2112  s_pTop[8] = &SSE2_MultiplyTop32;
2113  }
2114  else
2115 #endif
2116  {
2117  s_pMul[1] = &Baseline_Multiply4;
2118  s_pMul[2] = &Baseline_Multiply8;
2119 
2120  s_pBot[1] = &Baseline_MultiplyBottom4;
2121  s_pBot[2] = &Baseline_MultiplyBottom8;
2122 
2123  s_pSqu[1] = &Baseline_Square4;
2124  s_pSqu[2] = &Baseline_Square8;
2125 
2126  s_pTop[2] = &Baseline_MultiplyTop8;
2127 
2128 #if !CRYPTOPP_INTEGER_SSE2
2129  s_pMul[4] = &Baseline_Multiply16;
2130  s_pBot[4] = &Baseline_MultiplyBottom16;
2131  s_pSqu[4] = &Baseline_Square16;
2132  s_pTop[4] = &Baseline_MultiplyTop16;
2133 #endif
2134  }
2135 }
2136 
2137 inline int Add(word *C, const word *A, const word *B, size_t N)
2138 {
2139 #if CRYPTOPP_INTEGER_SSE2
2140  return s_pAdd(N, C, A, B);
2141 #else
2142  return Baseline_Add(N, C, A, B);
2143 #endif
2144 }
2145 
2146 inline int Subtract(word *C, const word *A, const word *B, size_t N)
2147 {
2148 #if CRYPTOPP_INTEGER_SSE2
2149  return s_pSub(N, C, A, B);
2150 #else
2151  return Baseline_Sub(N, C, A, B);
2152 #endif
2153 }
2154 
2155 // ********************************************************
2156 
2157 
2158 #define A0 A
2159 #define A1 (A+N2)
2160 #define B0 B
2161 #define B1 (B+N2)
2162 
2163 #define T0 T
2164 #define T1 (T+N2)
2165 #define T2 (T+N)
2166 #define T3 (T+N+N2)
2167 
2168 #define R0 R
2169 #define R1 (R+N2)
2170 #define R2 (R+N)
2171 #define R3 (R+N+N2)
2172 
2173 // R[2*N] - result = A*B
2174 // T[2*N] - temporary work space
2175 // A[N] --- multiplier
2176 // B[N] --- multiplicant
2177 
2178 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2179 {
2180  assert(N>=2 && N%2==0);
2181 
2182  if (N <= s_recursionLimit)
2183  s_pMul[N/4](R, A, B);
2184  else
2185  {
2186  const size_t N2 = N/2;
2187 
2188  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2189  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2190 
2191  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2192  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2193 
2194  RecursiveMultiply(R2, T2, A1, B1, N2);
2195  RecursiveMultiply(T0, T2, R0, R1, N2);
2196  RecursiveMultiply(R0, T2, A0, B0, N2);
2197 
2198  // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2199 
2200  int c2 = Add(R2, R2, R1, N2);
2201  int c3 = c2;
2202  c2 += Add(R1, R2, R0, N2);
2203  c3 += Add(R2, R2, R3, N2);
2204 
2205  if (AN2 == BN2)
2206  c3 -= Subtract(R1, R1, T0, N);
2207  else
2208  c3 += Add(R1, R1, T0, N);
2209 
2210  c3 += Increment(R2, N2, c2);
2211  assert (c3 >= 0 && c3 <= 2);
2212  Increment(R3, N2, c3);
2213  }
2214 }
2215 
2216 // R[2*N] - result = A*A
2217 // T[2*N] - temporary work space
2218 // A[N] --- number to be squared
2219 
2220 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2221 {
2222  assert(N && N%2==0);
2223 
2224  if (N <= s_recursionLimit)
2225  s_pSqu[N/4](R, A);
2226  else
2227  {
2228  const size_t N2 = N/2;
2229 
2230  RecursiveSquare(R0, T2, A0, N2);
2231  RecursiveSquare(R2, T2, A1, N2);
2232  RecursiveMultiply(T0, T2, A0, A1, N2);
2233 
2234  int carry = Add(R1, R1, T0, N);
2235  carry += Add(R1, R1, T0, N);
2236  Increment(R3, N2, carry);
2237  }
2238 }
2239 
2240 // R[N] - bottom half of A*B
2241 // T[3*N/2] - temporary work space
2242 // A[N] - multiplier
2243 // B[N] - multiplicant
2244 
2245 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2246 {
2247  assert(N>=2 && N%2==0);
2248 
2249  if (N <= s_recursionLimit)
2250  s_pBot[N/4](R, A, B);
2251  else
2252  {
2253  const size_t N2 = N/2;
2254 
2255  RecursiveMultiply(R, T, A0, B0, N2);
2256  RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2257  Add(R1, R1, T0, N2);
2258  RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2259  Add(R1, R1, T0, N2);
2260  }
2261 }
2262 
2263 // R[N] --- upper half of A*B
2264 // T[2*N] - temporary work space
2265 // L[N] --- lower half of A*B
2266 // A[N] --- multiplier
2267 // B[N] --- multiplicant
2268 
2269 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2270 {
2271  assert(N>=2 && N%2==0);
2272 
2273  if (N <= s_recursionLimit)
2274  s_pTop[N/4](R, A, B, L[N-1]);
2275  else
2276  {
2277  const size_t N2 = N/2;
2278 
2279  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2280  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2281 
2282  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2283  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2284 
2285  RecursiveMultiply(T0, T2, R0, R1, N2);
2286  RecursiveMultiply(R0, T2, A1, B1, N2);
2287 
2288  // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2289 
2290  int t, c3;
2291  int c2 = Subtract(T2, L+N2, L, N2);
2292 
2293  if (AN2 == BN2)
2294  {
2295  c2 -= Add(T2, T2, T0, N2);
2296  t = (Compare(T2, R0, N2) == -1);
2297  c3 = t - Subtract(T2, T2, T1, N2);
2298  }
2299  else
2300  {
2301  c2 += Subtract(T2, T2, T0, N2);
2302  t = (Compare(T2, R0, N2) == -1);
2303  c3 = t + Add(T2, T2, T1, N2);
2304  }
2305 
2306  c2 += t;
2307  if (c2 >= 0)
2308  c3 += Increment(T2, N2, c2);
2309  else
2310  c3 -= Decrement(T2, N2, -c2);
2311  c3 += Add(R0, T2, R1, N2);
2312 
2313  assert (c3 >= 0 && c3 <= 2);
2314  Increment(R1, N2, c3);
2315  }
2316 }
2317 
2318 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2319 {
2320  RecursiveMultiply(R, T, A, B, N);
2321 }
2322 
2323 inline void Square(word *R, word *T, const word *A, size_t N)
2324 {
2325  RecursiveSquare(R, T, A, N);
2326 }
2327 
2328 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2329 {
2330  RecursiveMultiplyBottom(R, T, A, B, N);
2331 }
2332 
2333 // R[NA+NB] - result = A*B
2334 // T[NA+NB] - temporary work space
2335 // A[NA] ---- multiplier
2336 // B[NB] ---- multiplicant
2337 
2338 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2339 {
2340  if (NA == NB)
2341  {
2342  if (A == B)
2343  Square(R, T, A, NA);
2344  else
2345  Multiply(R, T, A, B, NA);
2346 
2347  return;
2348  }
2349 
2350  if (NA > NB)
2351  {
2352  std::swap(A, B);
2353  std::swap(NA, NB);
2354  }
2355 
2356  assert(NB % NA == 0);
2357 
2358  if (NA==2 && !A[1])
2359  {
2360  switch (A[0])
2361  {
2362  case 0:
2363  SetWords(R, 0, NB+2);
2364  return;
2365  case 1:
2366  CopyWords(R, B, NB);
2367  R[NB] = R[NB+1] = 0;
2368  return;
2369  default:
2370  R[NB] = LinearMultiply(R, B, A[0], NB);
2371  R[NB+1] = 0;
2372  return;
2373  }
2374  }
2375 
2376  size_t i;
2377  if ((NB/NA)%2 == 0)
2378  {
2379  Multiply(R, T, A, B, NA);
2380  CopyWords(T+2*NA, R+NA, NA);
2381 
2382  for (i=2*NA; i<NB; i+=2*NA)
2383  Multiply(T+NA+i, T, A, B+i, NA);
2384  for (i=NA; i<NB; i+=2*NA)
2385  Multiply(R+i, T, A, B+i, NA);
2386  }
2387  else
2388  {
2389  for (i=0; i<NB; i+=2*NA)
2390  Multiply(R+i, T, A, B+i, NA);
2391  for (i=NA; i<NB; i+=2*NA)
2392  Multiply(T+NA+i, T, A, B+i, NA);
2393  }
2394 
2395  if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2396  Increment(R+NB, NA);
2397 }
2398 
2399 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2400 // T[3*N/2] - temporary work space
2401 // A[N] ----- an odd number as input
2402 
2403 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2404 {
2405  if (N==2)
2406  {
2407  T[0] = AtomicInverseModPower2(A[0]);
2408  T[1] = 0;
2409  s_pBot[0](T+2, T, A);
2410  TwosComplement(T+2, 2);
2411  Increment(T+2, 2, 2);
2412  s_pBot[0](R, T, T+2);
2413  }
2414  else
2415  {
2416  const size_t N2 = N/2;
2417  RecursiveInverseModPower2(R0, T0, A0, N2);
2418  T0[0] = 1;
2419  SetWords(T0+1, 0, N2-1);
2420  MultiplyTop(R1, T1, T0, R0, A0, N2);
2421  MultiplyBottom(T0, T1, R0, A1, N2);
2422  Add(T0, R1, T0, N2);
2423  TwosComplement(T0, N2);
2424  MultiplyBottom(R1, T1, R0, T0, N2);
2425  }
2426 }
2427 
2428 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2429 // T[3*N] - temporary work space
2430 // X[2*N] - number to be reduced
2431 // M[N] --- modulus
2432 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2433 
2434 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2435 {
2436 #if 1
2437  MultiplyBottom(R, T, X, U, N);
2438  MultiplyTop(T, T+N, X, R, M, N);
2439  word borrow = Subtract(T, X+N, T, N);
2440  // defend against timing attack by doing this Add even when not needed
2441  word carry = Add(T+N, T, M, N);
2442  assert(carry | !borrow);
2443  CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2444  CopyWords(R, T + ((0-borrow) & N), N);
2445 #elif 0
2446  const word u = 0-U[0];
2447  Declare2Words(p)
2448  for (size_t i=0; i<N; i++)
2449  {
2450  const word t = u * X[i];
2451  word c = 0;
2452  for (size_t j=0; j<N; j+=2)
2453  {
2454  MultiplyWords(p, t, M[j]);
2455  Acc2WordsBy1(p, X[i+j]);
2456  Acc2WordsBy1(p, c);
2457  X[i+j] = LowWord(p);
2458  c = HighWord(p);
2459  MultiplyWords(p, t, M[j+1]);
2460  Acc2WordsBy1(p, X[i+j+1]);
2461  Acc2WordsBy1(p, c);
2462  X[i+j+1] = LowWord(p);
2463  c = HighWord(p);
2464  }
2465 
2466  if (Increment(X+N+i, N-i, c))
2467  while (!Subtract(X+N, X+N, M, N)) {}
2468  }
2469 
2470  memcpy(R, X+N, N*WORD_SIZE);
2471 #else
2472  __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2473  for (size_t i=0; i<N; i++)
2474  {
2475  __m64 t = _mm_cvtsi32_si64(X[i]);
2476  t = _mm_mul_su32(t, u);
2477  __m64 c = _mm_setzero_si64();
2478  for (size_t j=0; j<N; j+=2)
2479  {
2480  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2481  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2482  c = _mm_add_si64(c, p);
2483  X[i+j] = _mm_cvtsi64_si32(c);
2484  c = _mm_srli_si64(c, 32);
2485  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2486  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2487  c = _mm_add_si64(c, p);
2488  X[i+j+1] = _mm_cvtsi64_si32(c);
2489  c = _mm_srli_si64(c, 32);
2490  }
2491 
2492  if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2493  while (!Subtract(X+N, X+N, M, N)) {}
2494  }
2495 
2496  memcpy(R, X+N, N*WORD_SIZE);
2497  _mm_empty();
2498 #endif
2499 }
2500 
2501 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2502 // T[2*N] - temporary work space
2503 // X[2*N] - number to be reduced
2504 // M[N] --- modulus
2505 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2506 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
2507 
2508 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2509 {
2510  assert(N%2==0 && N>=4);
2511 
2512 #define M0 M
2513 #define M1 (M+N2)
2514 #define V0 V
2515 #define V1 (V+N2)
2516 
2517 #define X0 X
2518 #define X1 (X+N2)
2519 #define X2 (X+N)
2520 #define X3 (X+N+N2)
2521 
2522  const size_t N2 = N/2;
2523  Multiply(T0, T2, V0, X3, N2);
2524  int c2 = Add(T0, T0, X0, N);
2525  MultiplyBottom(T3, T2, T0, U, N2);
2526  MultiplyTop(T2, R, T0, T3, M0, N2);
2527  c2 -= Subtract(T2, T1, T2, N2);
2528  Multiply(T0, R, T3, M1, N2);
2529  c2 -= Subtract(T0, T2, T0, N2);
2530  int c3 = -(int)Subtract(T1, X2, T1, N2);
2531  Multiply(R0, T2, V1, X3, N2);
2532  c3 += Add(R, R, T, N);
2533 
2534  if (c2>0)
2535  c3 += Increment(R1, N2);
2536  else if (c2<0)
2537  c3 -= Decrement(R1, N2, -c2);
2538 
2539  assert(c3>=-1 && c3<=1);
2540  if (c3>0)
2541  Subtract(R, R, M, N);
2542  else if (c3<0)
2543  Add(R, R, M, N);
2544 
2545 #undef M0
2546 #undef M1
2547 #undef V0
2548 #undef V1
2549 
2550 #undef X0
2551 #undef X1
2552 #undef X2
2553 #undef X3
2554 }
2555 
2556 #undef A0
2557 #undef A1
2558 #undef B0
2559 #undef B1
2560 
2561 #undef T0
2562 #undef T1
2563 #undef T2
2564 #undef T3
2565 
2566 #undef R0
2567 #undef R1
2568 #undef R2
2569 #undef R3
2570 
2571 /*
2572 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2573 static word SubatomicDivide(word *A, word B0, word B1)
2574 {
2575  // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2576  assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2577 
2578  // estimate the quotient: do a 2 word by 1 word divide
2579  word Q;
2580  if (B1+1 == 0)
2581  Q = A[2];
2582  else
2583  Q = DWord(A[1], A[2]).DividedBy(B1+1);
2584 
2585  // now subtract Q*B from A
2586  DWord p = DWord::Multiply(B0, Q);
2587  DWord u = (DWord) A[0] - p.GetLowHalf();
2588  A[0] = u.GetLowHalf();
2589  u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2590  A[1] = u.GetLowHalf();
2591  A[2] += u.GetHighHalf();
2592 
2593  // Q <= actual quotient, so fix it
2594  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2595  {
2596  u = (DWord) A[0] - B0;
2597  A[0] = u.GetLowHalf();
2598  u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2599  A[1] = u.GetLowHalf();
2600  A[2] += u.GetHighHalf();
2601  Q++;
2602  assert(Q); // shouldn't overflow
2603  }
2604 
2605  return Q;
2606 }
2607 
2608 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2609 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2610 {
2611  if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2612  {
2613  Q[0] = A[2];
2614  Q[1] = A[3];
2615  }
2616  else
2617  {
2618  word T[4];
2619  T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2620  Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2621  Q[0] = SubatomicDivide(T, B[0], B[1]);
2622 
2623 #ifndef NDEBUG
2624  // multiply quotient and divisor and add remainder, make sure it equals dividend
2625  assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2626  word P[4];
2627  LowLevel::Multiply2(P, Q, B);
2628  Add(P, P, T, 4);
2629  assert(memcmp(P, A, 4*WORD_SIZE)==0);
2630 #endif
2631  }
2632 }
2633 */
2634 
2635 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2636 {
2637  word T[4];
2638  DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2639  Q[0] = q.GetLowHalf();
2640  Q[1] = q.GetHighHalf();
2641 
2642 #ifndef NDEBUG
2643  if (B[0] || B[1])
2644  {
2645  // multiply quotient and divisor and add remainder, make sure it equals dividend
2646  assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2647  word P[4];
2648  s_pMul[0](P, Q, B);
2649  Add(P, P, T, 4);
2650  assert(memcmp(P, A, 4*WORD_SIZE)==0);
2651  }
2652 #endif
2653 }
2654 
2655 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2656 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2657 {
2658  assert(N && N%2==0);
2659 
2660  AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2661 
2662  word borrow = Subtract(R, R, T, N+2);
2663  assert(!borrow && !R[N+1]);
2664  CRYPTOPP_UNUSED(borrow);
2665 
2666  while (R[N] || Compare(R, B, N) >= 0)
2667  {
2668  R[N] -= Subtract(R, R, B, N);
2669  Q[1] += (++Q[0]==0);
2670  assert(Q[0] || Q[1]); // no overflow
2671  }
2672 }
2673 
2674 // R[NB] -------- remainder = A%B
2675 // Q[NA-NB+2] --- quotient = A/B
2676 // T[NA+3*(NB+2)] - temp work space
2677 // A[NA] -------- dividend
2678 // B[NB] -------- divisor
2679 
2680 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2681 {
2682  assert(NA && NB && NA%2==0 && NB%2==0);
2683  assert(B[NB-1] || B[NB-2]);
2684  assert(NB <= NA);
2685 
2686  // set up temporary work space
2687  word *const TA=T;
2688  word *const TB=T+NA+2;
2689  word *const TP=T+NA+2+NB;
2690 
2691  // copy B into TB and normalize it so that TB has highest bit set to 1
2692  unsigned shiftWords = (B[NB-1]==0);
2693  TB[0] = TB[NB-1] = 0;
2694  CopyWords(TB+shiftWords, B, NB-shiftWords);
2695  unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2696  assert(shiftBits < WORD_BITS);
2697  ShiftWordsLeftByBits(TB, NB, shiftBits);
2698 
2699  // copy A into TA and normalize it
2700  TA[0] = TA[NA] = TA[NA+1] = 0;
2701  CopyWords(TA+shiftWords, A, NA);
2702  ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2703 
2704  if (TA[NA+1]==0 && TA[NA] <= 1)
2705  {
2706  Q[NA-NB+1] = Q[NA-NB] = 0;
2707  while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2708  {
2709  TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2710  ++Q[NA-NB];
2711  }
2712  }
2713  else
2714  {
2715  NA+=2;
2716  assert(Compare(TA+NA-NB, TB, NB) < 0);
2717  }
2718 
2719  word BT[2];
2720  BT[0] = TB[NB-2] + 1;
2721  BT[1] = TB[NB-1] + (BT[0]==0);
2722 
2723  // start reducing TA mod TB, 2 words at a time
2724  for (size_t i=NA-2; i>=NB; i-=2)
2725  {
2726  AtomicDivide(Q+i-NB, TA+i-2, BT);
2727  CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2728  }
2729 
2730  // copy TA into R, and denormalize it
2731  CopyWords(R, TA+shiftWords, NB);
2732  ShiftWordsRightByBits(R, NB, shiftBits);
2733 }
2734 
2735 static inline size_t EvenWordCount(const word *X, size_t N)
2736 {
2737  while (N && X[N-2]==0 && X[N-1]==0)
2738  N-=2;
2739  return N;
2740 }
2741 
2742 // return k
2743 // R[N] --- result = A^(-1) * 2^k mod M
2744 // T[4*N] - temporary work space
2745 // A[NA] -- number to take inverse of
2746 // M[N] --- modulus
2747 
2748 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2749 {
2750  assert(NA<=N && N && N%2==0);
2751 
2752  word *b = T;
2753  word *c = T+N;
2754  word *f = T+2*N;
2755  word *g = T+3*N;
2756  size_t bcLen=2, fgLen=EvenWordCount(M, N);
2757  unsigned int k=0;
2758  bool s=false;
2759 
2760  SetWords(T, 0, 3*N);
2761  b[0]=1;
2762  CopyWords(f, A, NA);
2763  CopyWords(g, M, N);
2764 
2765  while (1)
2766  {
2767  word t=f[0];
2768  while (!t)
2769  {
2770  if (EvenWordCount(f, fgLen)==0)
2771  {
2772  SetWords(R, 0, N);
2773  return 0;
2774  }
2775 
2776  ShiftWordsRightByWords(f, fgLen, 1);
2777  bcLen += 2 * (c[bcLen-1] != 0);
2778  assert(bcLen <= N);
2779  ShiftWordsLeftByWords(c, bcLen, 1);
2780  k+=WORD_BITS;
2781  t=f[0];
2782  }
2783 
2784  // t must be non-0; otherwise undefined.
2785  unsigned int i = TrailingZeros(t);
2786  t >>= i;
2787  k += i;
2788 
2789  if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2790  {
2791  if (s)
2792  Subtract(R, M, b, N);
2793  else
2794  CopyWords(R, b, N);
2795  return k;
2796  }
2797 
2798  ShiftWordsRightByBits(f, fgLen, i);
2799  t = ShiftWordsLeftByBits(c, bcLen, i);
2800  c[bcLen] += t;
2801  bcLen += 2 * (t!=0);
2802  assert(bcLen <= N);
2803 
2804  bool swap = Compare(f, g, fgLen)==-1;
2805  ConditionalSwapPointers(swap, f, g);
2806  ConditionalSwapPointers(swap, b, c);
2807  s ^= swap;
2808 
2809  fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2810 
2811  Subtract(f, f, g, fgLen);
2812  t = Add(b, b, c, bcLen);
2813  b[bcLen] += t;
2814  bcLen += 2*t;
2815  assert(bcLen <= N);
2816  }
2817 }
2818 
2819 // R[N] - result = A/(2^k) mod M
2820 // A[N] - input
2821 // M[N] - modulus
2822 
2823 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2824 {
2825  CopyWords(R, A, N);
2826 
2827  while (k--)
2828  {
2829  if (R[0]%2==0)
2830  ShiftWordsRightByBits(R, N, 1);
2831  else
2832  {
2833  word carry = Add(R, R, M, N);
2834  ShiftWordsRightByBits(R, N, 1);
2835  R[N-1] += carry<<(WORD_BITS-1);
2836  }
2837  }
2838 }
2839 
2840 // R[N] - result = A*(2^k) mod M
2841 // A[N] - input
2842 // M[N] - modulus
2843 
2844 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2845 {
2846  CopyWords(R, A, N);
2847 
2848  while (k--)
2849  if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2850  Subtract(R, R, M, N);
2851 }
2852 
2853 // ******************************************************************
2854 
2855 InitializeInteger::InitializeInteger()
2856 {
2857  if (!g_pAssignIntToInteger)
2858  {
2859  SetFunctionPointers();
2860  g_pAssignIntToInteger = (CryptoPP::PAssignIntToInteger)AssignIntToInteger;
2861  }
2862 }
2863 
2864 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2865 
2866 static inline size_t RoundupSize(size_t n)
2867 {
2868  if (n<=8)
2869  return RoundupSizeTable[n];
2870  else if (n<=16)
2871  return 16;
2872  else if (n<=32)
2873  return 32;
2874  else if (n<=64)
2875  return 64;
2876  else return size_t(1) << BitPrecision(n-1);
2877 }
2878 
2880  : reg(2), sign(POSITIVE)
2881 {
2882  reg[0] = reg[1] = 0;
2883 }
2884 
2886  : reg(RoundupSize(t.WordCount())), sign(t.sign)
2887 {
2888  CopyWords(reg, t.reg, reg.size());
2889 }
2890 
2891 Integer::Integer(Sign s, lword value)
2892  : reg(2), sign(s)
2893 {
2894  reg[0] = word(value);
2895  reg[1] = word(SafeRightShift<WORD_BITS>(value));
2896 }
2897 
2898 Integer::Integer(signed long value)
2899  : reg(2)
2900 {
2901  if (value >= 0)
2902  sign = POSITIVE;
2903  else
2904  {
2905  sign = NEGATIVE;
2906  value = -value;
2907  }
2908  reg[0] = word(value);
2909  reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2910 }
2911 
2912 Integer::Integer(Sign s, word high, word low)
2913  : reg(2), sign(s)
2914 {
2915  reg[0] = low;
2916  reg[1] = high;
2917 }
2918 
2920 {
2921  if (ByteCount() > sizeof(long))
2922  return false;
2923 
2924  unsigned long value = (unsigned long)reg[0];
2925  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2926 
2927  if (sign==POSITIVE)
2928  return (signed long)value >= 0;
2929  else
2930  return -(signed long)value < 0;
2931 }
2932 
2933 signed long Integer::ConvertToLong() const
2934 {
2935  assert(IsConvertableToLong());
2936 
2937  unsigned long value = (unsigned long)reg[0];
2938  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2939  return sign==POSITIVE ? value : -(signed long)value;
2940 }
2941 
2942 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2943 {
2944  assert(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
2945 
2946  if(o == LITTLE_ENDIAN_ORDER)
2947  {
2948  SecByteBlock block(byteCount);
2949  encodedInteger.Get(block, block.size());
2950  std::reverse(block.begin(), block.begin()+block.size());
2951 
2952  Decode(block.begin(), block.size(), s);
2953  return;
2954  }
2955 
2956  Decode(encodedInteger, byteCount, s);
2957 }
2958 
2959 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2960 {
2961  assert(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
2962 
2963  if(o == LITTLE_ENDIAN_ORDER)
2964  {
2965  SecByteBlock block(byteCount);
2966 #if (CRYPTOPP_MSC_VERSION >= 1500)
2967  std::reverse_copy(encodedInteger, encodedInteger+byteCount,
2968  stdext::make_checked_array_iterator(block.begin(), block.size()));
2969 #else
2970  std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
2971 #endif
2972  Decode(block.begin(), block.size(), s);
2973  return;
2974  }
2975 
2976  Decode(encodedInteger, byteCount, s);
2977 }
2978 
2980 {
2981  BERDecode(bt);
2982 }
2983 
2985 {
2986  Randomize(rng, bitcount);
2987 }
2988 
2989 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
2990 {
2991  if (!Randomize(rng, min, max, rnType, equiv, mod))
2993 }
2994 
2996 {
2997  Integer r((word)0, BitsToWords(e+1));
2998  r.SetBit(e);
2999  return r;
3000 }
3001 
3002 template <long i>
3004 {
3005  Integer * operator()() const
3006  {
3007  return new Integer(i);
3008  }
3009 };
3010 
3012 {
3013  return Singleton<Integer>().Ref();
3014 }
3015 
3017 {
3018  return Singleton<Integer, NewInteger<1> >().Ref();
3019 }
3020 
3022 {
3023  return Singleton<Integer, NewInteger<2> >().Ref();
3024 }
3025 
3026 bool Integer::operator!() const
3027 {
3028  return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
3029 }
3030 
3031 Integer& Integer::operator=(const Integer& t)
3032 {
3033  if (this != &t)
3034  {
3035  if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
3036  reg.New(RoundupSize(t.WordCount()));
3037  CopyWords(reg, t.reg, reg.size());
3038  sign = t.sign;
3039  }
3040  return *this;
3041 }
3042 
3043 bool Integer::GetBit(size_t n) const
3044 {
3045  if (n/WORD_BITS >= reg.size())
3046  return 0;
3047  else
3048  return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3049 }
3050 
3051 void Integer::SetBit(size_t n, bool value)
3052 {
3053  if (value)
3054  {
3055  reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
3056  reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3057  }
3058  else
3059  {
3060  if (n/WORD_BITS < reg.size())
3061  reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3062  }
3063 }
3064 
3065 byte Integer::GetByte(size_t n) const
3066 {
3067  if (n/WORD_SIZE >= reg.size())
3068  return 0;
3069  else
3070  return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3071 }
3072 
3073 void Integer::SetByte(size_t n, byte value)
3074 {
3075  reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3076  reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3077  reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3078 }
3079 
3080 lword Integer::GetBits(size_t i, size_t n) const
3081 {
3082  lword v = 0;
3083  assert(n <= sizeof(v)*8);
3084  for (unsigned int j=0; j<n; j++)
3085  v |= lword(GetBit(i+j)) << j;
3086  return v;
3087 }
3088 
3089 Integer Integer::operator-() const
3090 {
3091  Integer result(*this);
3092  result.Negate();
3093  return result;
3094 }
3095 
3096 Integer Integer::AbsoluteValue() const
3097 {
3098  Integer result(*this);
3099  result.sign = POSITIVE;
3100  return result;
3101 }
3102 
3104 {
3105  reg.swap(a.reg);
3106  std::swap(sign, a.sign);
3107 }
3108 
3109 Integer::Integer(word value, size_t length)
3110  : reg(RoundupSize(length)), sign(POSITIVE)
3111 {
3112  reg[0] = value;
3113  SetWords(reg+1, 0, reg.size()-1);
3114 }
3115 
3116 template <class T>
3117 static Integer StringToInteger(const T *str, ByteOrder order)
3118 {
3119  assert( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3120 
3121  int radix, sign = 1;
3122  // GCC workaround
3123  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3124  unsigned int length;
3125  for (length = 0; str[length] != 0; length++) {}
3126 
3127  Integer v;
3128 
3129  if (length == 0)
3130  return Integer::Zero();
3131 
3132  switch (str[length-1])
3133  {
3134  case 'h':
3135  case 'H':
3136  radix=16;
3137  break;
3138  case 'o':
3139  case 'O':
3140  radix=8;
3141  break;
3142  case 'b':
3143  case 'B':
3144  radix=2;
3145  break;
3146  default:
3147  radix=10;
3148  }
3149 
3150  // 'str' is of length 1 or more
3151  if (str[0] == '-')
3152  {
3153  sign = -1;
3154  str += 1, length -= 1;
3155  }
3156 
3157  if (length > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
3158  {
3159  radix = 16;
3160  str += 2, length -= 2;
3161  }
3162 
3163  if(order == BIG_ENDIAN_ORDER)
3164  {
3165  for (unsigned int i=0; i<length; i++)
3166  {
3167  int digit, ch = static_cast<int>(str[i]);
3168 
3169  if (ch >= '0' && ch <= '9')
3170  digit = ch - '0';
3171  else if (ch >= 'A' && ch <= 'F')
3172  digit = ch - 'A' + 10;
3173  else if (ch >= 'a' && ch <= 'f')
3174  digit = ch - 'a' + 10;
3175  else
3176  digit = radix;
3177 
3178  if (digit < radix)
3179  {
3180  v *= radix;
3181  v += digit;
3182  }
3183  }
3184  }
3185  else if(radix == 16 && order == LITTLE_ENDIAN_ORDER)
3186  {
3187  // Nibble high, low and count
3188  unsigned int nh = 0, nl = 0, nc = 0;
3189  Integer position(Integer::One());
3190 
3191  for (unsigned int i=0; i<length; i++)
3192  {
3193  int digit, ch = static_cast<int>(str[i]);
3194 
3195  if (ch >= '0' && ch <= '9')
3196  digit = ch - '0';
3197  else if (ch >= 'A' && ch <= 'F')
3198  digit = ch - 'A' + 10;
3199  else if (ch >= 'a' && ch <= 'f')
3200  digit = ch - 'a' + 10;
3201  else
3202  digit = radix;
3203 
3204  if (digit < radix)
3205  {
3206  if(nc++ == 0)
3207  nh = digit;
3208  else
3209  nl = digit;
3210 
3211  if(nc == 2)
3212  {
3213  v += position * (nh << 4 | nl);
3214  nc = 0, position <<= 8;
3215  }
3216  }
3217  }
3218 
3219  if(nc == 1)
3220  v += nh * position;
3221  }
3222  else // LITTLE_ENDIAN_ORDER && radix != 16
3223  {
3224  for (int i=static_cast<int>(length)-1; i>=0; i--)
3225  {
3226  int digit, ch = static_cast<int>(str[i]);
3227 
3228  if (ch >= '0' && ch <= '9')
3229  digit = ch - '0';
3230  else if (ch >= 'A' && ch <= 'F')
3231  digit = ch - 'A' + 10;
3232  else if (ch >= 'a' && ch <= 'f')
3233  digit = ch - 'a' + 10;
3234  else
3235  digit = radix;
3236 
3237  if (digit < radix)
3238  {
3239  v *= radix;
3240  v += digit;
3241  }
3242  }
3243  }
3244 
3245  if (sign == -1)
3246  v.Negate();
3247 
3248  return v;
3249 }
3250 
3251 Integer::Integer(const char *str, ByteOrder order)
3252  : reg(2), sign(POSITIVE)
3253 {
3254  *this = StringToInteger(str,order);
3255 }
3256 
3257 Integer::Integer(const wchar_t *str, ByteOrder order)
3258  : reg(2), sign(POSITIVE)
3259 {
3260  *this = StringToInteger(str,order);
3261 }
3262 
3263 unsigned int Integer::WordCount() const
3264 {
3265  return (unsigned int)CountWords(reg, reg.size());
3266 }
3267 
3268 unsigned int Integer::ByteCount() const
3269 {
3270  unsigned wordCount = WordCount();
3271  if (wordCount)
3272  return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3273  else
3274  return 0;
3275 }
3276 
3277 unsigned int Integer::BitCount() const
3278 {
3279  unsigned wordCount = WordCount();
3280  if (wordCount)
3281  return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3282  else
3283  return 0;
3284 }
3285 
3286 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3287 {
3288  StringStore store(input, inputLen);
3289  Decode(store, inputLen, s);
3290 }
3291 
3293 {
3294  assert(bt.MaxRetrievable() >= inputLen);
3295 
3296  byte b;
3297  bt.Peek(b);
3298  sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3299 
3300  while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3301  {
3302  bt.Skip(1);
3303  inputLen--;
3304  bt.Peek(b);
3305  }
3306 
3307  // The call to CleanNew is optimized away above -O0/-Og.
3308  const size_t size = RoundupSize(BytesToWords(inputLen));
3309  reg.CleanNew(size);
3310 
3311  assert(reg.SizeInBytes() >= inputLen);
3312  for (size_t i=inputLen; i > 0; i--)
3313  {
3314  bt.Get(b);
3315  reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3316  }
3317 
3318  if (sign == NEGATIVE)
3319  {
3320  for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3321  reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3322  TwosComplement(reg, reg.size());
3323  }
3324 }
3325 
3326 size_t Integer::MinEncodedSize(Signedness signedness) const
3327 {
3328  unsigned int outputLen = STDMAX(1U, ByteCount());
3329  if (signedness == UNSIGNED)
3330  return outputLen;
3331  if (NotNegative() && (GetByte(outputLen-1) & 0x80))
3332  outputLen++;
3333  if (IsNegative() && *this < -Power2(outputLen*8-1))
3334  outputLen++;
3335  return outputLen;
3336 }
3337 
3338 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3339 {
3340  assert(output && outputLen);
3341  ArraySink sink(output, outputLen);
3342  Encode(sink, outputLen, signedness);
3343 }
3344 
3345 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3346 {
3347  if (signedness == UNSIGNED || NotNegative())
3348  {
3349  for (size_t i=outputLen; i > 0; i--)
3350  bt.Put(GetByte(i-1));
3351  }
3352  else
3353  {
3354  // take two's complement of *this
3355  Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3356  temp.Encode(bt, outputLen, UNSIGNED);
3357  }
3358 }
3359 
3361 {
3362  DERGeneralEncoder enc(bt, INTEGER);
3364  enc.MessageEnd();
3365 }
3366 
3367 void Integer::BERDecode(const byte *input, size_t len)
3368 {
3369  StringStore store(input, len);
3370  BERDecode(store);
3371 }
3372 
3374 {
3375  BERGeneralDecoder dec(bt, INTEGER);
3376  if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3377  BERDecodeError();
3378  Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3379  dec.MessageEnd();
3380 }
3381 
3383 {
3384  DERGeneralEncoder enc(bt, OCTET_STRING);
3385  Encode(enc, length);
3386  enc.MessageEnd();
3387 }
3388 
3390 {
3391  BERGeneralDecoder dec(bt, OCTET_STRING);
3392  if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3393  BERDecodeError();
3394  Decode(dec, length);
3395  dec.MessageEnd();
3396 }
3397 
3398 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
3399 {
3400  ArraySink sink(output, len);
3401  return OpenPGPEncode(sink);
3402 }
3403 
3405 {
3406  word16 bitCount = word16(BitCount());
3407  bt.PutWord16(bitCount);
3408  size_t byteCount = BitsToBytes(bitCount);
3409  Encode(bt, byteCount);
3410  return 2 + byteCount;
3411 }
3412 
3413 void Integer::OpenPGPDecode(const byte *input, size_t len)
3414 {
3415  StringStore store(input, len);
3416  OpenPGPDecode(store);
3417 }
3418 
3420 {
3421  word16 bitCount;
3422  if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3423  throw OpenPGPDecodeErr();
3424  Decode(bt, BitsToBytes(bitCount));
3425 }
3426 
3428 {
3429  const size_t nbytes = nbits/8 + 1;
3430  SecByteBlock buf(nbytes);
3431  rng.GenerateBlock(buf, nbytes);
3432  if (nbytes)
3433  buf[0] = (byte)Crop(buf[0], nbits % 8);
3434  Decode(buf, nbytes, UNSIGNED);
3435 }
3436 
3437 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3438 {
3439  if (min > max)
3440  throw InvalidArgument("Integer: Min must be no greater than Max");
3441 
3442  Integer range = max - min;
3443  const unsigned int nbits = range.BitCount();
3444 
3445  do
3446  {
3447  Randomize(rng, nbits);
3448  }
3449  while (*this > range);
3450 
3451  *this += min;
3452 }
3453 
3454 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3455 {
3456  return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3457 }
3458 
3460 {
3461 public:
3462  KDF2_RNG(const byte *seed, size_t seedSize)
3463  : m_counter(0), m_counterAndSeed(seedSize + 4)
3464  {
3465  memcpy(m_counterAndSeed + 4, seed, seedSize);
3466  }
3467 
3468  void GenerateBlock(byte *output, size_t size)
3469  {
3470  PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3471  ++m_counter;
3472  P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
3473  }
3474 
3475 private:
3476  word32 m_counter;
3477  SecByteBlock m_counterAndSeed;
3478 };
3479 
3480 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
3481 {
3482  Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3483  Integer max;
3484  if (!params.GetValue("Max", max))
3485  {
3486  int bitLength;
3487  if (params.GetIntValue("BitLength", bitLength))
3488  max = Integer::Power2(bitLength);
3489  else
3490  throw InvalidArgument("Integer: missing Max argument");
3491  }
3492  if (min > max)
3493  throw InvalidArgument("Integer: Min must be no greater than Max");
3494 
3495  Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3496  Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3497 
3498  if (equiv.IsNegative() || equiv >= mod)
3499  throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3500 
3501  Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3502 
3503  member_ptr<KDF2_RNG> kdf2Rng;
3505  if (params.GetValue(Name::Seed(), seed))
3506  {
3507  ByteQueue bq;
3508  DERSequenceEncoder seq(bq);
3509  min.DEREncode(seq);
3510  max.DEREncode(seq);
3511  equiv.DEREncode(seq);
3512  mod.DEREncode(seq);
3513  DEREncodeUnsigned(seq, rnType);
3514  DEREncodeOctetString(seq, seed.begin(), seed.size());
3515  seq.MessageEnd();
3516 
3517  SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3518  bq.Get(finalSeed, finalSeed.size());
3519  kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3520  }
3521  RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3522 
3523  switch (rnType)
3524  {
3525  case ANY:
3526  if (mod == One())
3527  Randomize(rng, min, max);
3528  else
3529  {
3530  Integer min1 = min + (equiv-min)%mod;
3531  if (max < min1)
3532  return false;
3533  Randomize(rng, Zero(), (max - min1) / mod);
3534  *this *= mod;
3535  *this += min1;
3536  }
3537  return true;
3538 
3539  case PRIME:
3540  {
3541  const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
3542 
3543  int i;
3544  i = 0;
3545  while (1)
3546  {
3547  if (++i==16)
3548  {
3549  // check if there are any suitable primes in [min, max]
3550  Integer first = min;
3551  if (FirstPrime(first, max, equiv, mod, pSelector))
3552  {
3553  // if there is only one suitable prime, we're done
3554  *this = first;
3555  if (!FirstPrime(first, max, equiv, mod, pSelector))
3556  return true;
3557  }
3558  else
3559  return false;
3560  }
3561 
3562  Randomize(rng, min, max);
3563  if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3564  return true;
3565  }
3566  }
3567 
3568  default:
3569  throw InvalidArgument("Integer: invalid RandomNumberType argument");
3570  }
3571 }
3572 
3573 std::istream& operator>>(std::istream& in, Integer &a)
3574 {
3575  char c;
3576  unsigned int length = 0;
3577  SecBlock<char> str(length + 16);
3578 
3579  std::ws(in);
3580 
3581  do
3582  {
3583  in.read(&c, 1);
3584  str[length++] = c;
3585  if (length >= str.size())
3586  str.Grow(length + 16);
3587  }
3588  while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3589 
3590  if (in.gcount())
3591  in.putback(c);
3592  str[length-1] = '\0';
3593  a = Integer(str);
3594 
3595  return in;
3596 }
3597 
3598 std::ostream& operator<<(std::ostream& out, const Integer &a)
3599 {
3600  // Get relevant conversion specifications from ostream.
3601  const long f = out.flags() & std::ios::basefield; // Get base digits.
3602  int base, block;
3603  char suffix;
3604  switch(f)
3605  {
3606  case std::ios::oct :
3607  base = 8;
3608  block = 8;
3609  suffix = 'o';
3610  break;
3611  case std::ios::hex :
3612  base = 16;
3613  block = 4;
3614  suffix = 'h';
3615  break;
3616  default :
3617  base = 10;
3618  block = 3;
3619  suffix = '.';
3620  }
3621 
3622  Integer temp1=a, temp2;
3623 
3624  if (a.IsNegative())
3625  {
3626  out << '-';
3627  temp1.Negate();
3628  }
3629 
3630  if (!a)
3631  out << '0';
3632 
3633  static const char upper[]="0123456789ABCDEF";
3634  static const char lower[]="0123456789abcdef";
3635 
3636  const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3637  unsigned int i=0;
3638  SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3639 
3640  while (!!temp1)
3641  {
3642  word digit;
3643  Integer::Divide(digit, temp2, temp1, base);
3644  s[i++]=vec[digit];
3645  temp1.swap(temp2);
3646  }
3647 
3648  while (i--)
3649  {
3650  out << s[i];
3651 // if (i && !(i%block))
3652 // out << ",";
3653  }
3654 
3655 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3656  if(out.flags() & std::ios_base::showbase)
3657  out << suffix;
3658 
3659  return out;
3660 #else
3661  return out << suffix;
3662 #endif
3663 }
3664 
3665 Integer& Integer::operator++()
3666 {
3667  if (NotNegative())
3668  {
3669  if (Increment(reg, reg.size()))
3670  {
3671  reg.CleanGrow(2*reg.size());
3672  reg[reg.size()/2]=1;
3673  }
3674  }
3675  else
3676  {
3677  word borrow = Decrement(reg, reg.size());
3678  assert(!borrow);
3679  CRYPTOPP_UNUSED(borrow);
3680 
3681  if (WordCount()==0)
3682  *this = Zero();
3683  }
3684  return *this;
3685 }
3686 
3687 Integer& Integer::operator--()
3688 {
3689  if (IsNegative())
3690  {
3691  if (Increment(reg, reg.size()))
3692  {
3693  reg.CleanGrow(2*reg.size());
3694  reg[reg.size()/2]=1;
3695  }
3696  }
3697  else
3698  {
3699  if (Decrement(reg, reg.size()))
3700  *this = -One();
3701  }
3702  return *this;
3703 }
3704 
3705 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3706 {
3707  int carry;
3708  if (a.reg.size() == b.reg.size())
3709  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3710  else if (a.reg.size() > b.reg.size())
3711  {
3712  carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3713  CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3714  carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3715  }
3716  else
3717  {
3718  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3719  CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3720  carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3721  }
3722 
3723  if (carry)
3724  {
3725  sum.reg.CleanGrow(2*sum.reg.size());
3726  sum.reg[sum.reg.size()/2] = 1;
3727  }
3728  sum.sign = Integer::POSITIVE;
3729 }
3730 
3731 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3732 {
3733  unsigned aSize = a.WordCount();
3734  aSize += aSize%2;
3735  unsigned bSize = b.WordCount();
3736  bSize += bSize%2;
3737 
3738  if (aSize == bSize)
3739  {
3740  if (Compare(a.reg, b.reg, aSize) >= 0)
3741  {
3742  Subtract(diff.reg, a.reg, b.reg, aSize);
3743  diff.sign = Integer::POSITIVE;
3744  }
3745  else
3746  {
3747  Subtract(diff.reg, b.reg, a.reg, aSize);
3748  diff.sign = Integer::NEGATIVE;
3749  }
3750  }
3751  else if (aSize > bSize)
3752  {
3753  word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3754  CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3755  borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3756  assert(!borrow);
3757  diff.sign = Integer::POSITIVE;
3758  }
3759  else
3760  {
3761  word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3762  CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3763  borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3764  assert(!borrow);
3765  diff.sign = Integer::NEGATIVE;
3766  }
3767 }
3768 
3769 // MSVC .NET 2003 workaround
3770 template <class T> inline const T& STDMAX2(const T& a, const T& b)
3771 {
3772  return a < b ? b : a;
3773 }
3774 
3775 Integer Integer::Plus(const Integer& b) const
3776 {
3777  Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3778  if (NotNegative())
3779  {
3780  if (b.NotNegative())
3781  PositiveAdd(sum, *this, b);
3782  else
3783  PositiveSubtract(sum, *this, b);
3784  }
3785  else
3786  {
3787  if (b.NotNegative())
3788  PositiveSubtract(sum, b, *this);
3789  else
3790  {
3791  PositiveAdd(sum, *this, b);
3792  sum.sign = Integer::NEGATIVE;
3793  }
3794  }
3795  return sum;
3796 }
3797 
3798 Integer& Integer::operator+=(const Integer& t)
3799 {
3800  reg.CleanGrow(t.reg.size());
3801  if (NotNegative())
3802  {
3803  if (t.NotNegative())
3804  PositiveAdd(*this, *this, t);
3805  else
3806  PositiveSubtract(*this, *this, t);
3807  }
3808  else
3809  {
3810  if (t.NotNegative())
3811  PositiveSubtract(*this, t, *this);
3812  else
3813  {
3814  PositiveAdd(*this, *this, t);
3815  sign = Integer::NEGATIVE;
3816  }
3817  }
3818  return *this;
3819 }
3820 
3821 Integer Integer::Minus(const Integer& b) const
3822 {
3823  Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
3824  if (NotNegative())
3825  {
3826  if (b.NotNegative())
3827  PositiveSubtract(diff, *this, b);
3828  else
3829  PositiveAdd(diff, *this, b);
3830  }
3831  else
3832  {
3833  if (b.NotNegative())
3834  {
3835  PositiveAdd(diff, *this, b);
3836  diff.sign = Integer::NEGATIVE;
3837  }
3838  else
3839  PositiveSubtract(diff, b, *this);
3840  }
3841  return diff;
3842 }
3843 
3844 Integer& Integer::operator-=(const Integer& t)
3845 {
3846  reg.CleanGrow(t.reg.size());
3847  if (NotNegative())
3848  {
3849  if (t.NotNegative())
3850  PositiveSubtract(*this, *this, t);
3851  else
3852  PositiveAdd(*this, *this, t);
3853  }
3854  else
3855  {
3856  if (t.NotNegative())
3857  {
3858  PositiveAdd(*this, *this, t);
3859  sign = Integer::NEGATIVE;
3860  }
3861  else
3862  PositiveSubtract(*this, t, *this);
3863  }
3864  return *this;
3865 }
3866 
3867 Integer& Integer::operator<<=(size_t n)
3868 {
3869  const size_t wordCount = WordCount();
3870  const size_t shiftWords = n / WORD_BITS;
3871  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3872 
3873  reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
3874  ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
3875  ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
3876  return *this;
3877 }
3878 
3879 Integer& Integer::operator>>=(size_t n)
3880 {
3881  const size_t wordCount = WordCount();
3882  const size_t shiftWords = n / WORD_BITS;
3883  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3884 
3885  ShiftWordsRightByWords(reg, wordCount, shiftWords);
3886  if (wordCount > shiftWords)
3887  ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
3888  if (IsNegative() && WordCount()==0) // avoid -0
3889  *this = Zero();
3890  return *this;
3891 }
3892 
3893 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
3894 {
3895  size_t aSize = RoundupSize(a.WordCount());
3896  size_t bSize = RoundupSize(b.WordCount());
3897 
3898  product.reg.CleanNew(RoundupSize(aSize+bSize));
3899  product.sign = Integer::POSITIVE;
3900 
3901  IntegerSecBlock workspace(aSize + bSize);
3902  AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
3903 }
3904 
3905 void Multiply(Integer &product, const Integer &a, const Integer &b)
3906 {
3907  PositiveMultiply(product, a, b);
3908 
3909  if (a.NotNegative() != b.NotNegative())
3910  product.Negate();
3911 }
3912 
3914 {
3915  Integer product;
3916  Multiply(product, *this, b);
3917  return product;
3918 }
3919 
3920 /*
3921 void PositiveDivide(Integer &remainder, Integer &quotient,
3922  const Integer &dividend, const Integer &divisor)
3923 {
3924  remainder.reg.CleanNew(divisor.reg.size());
3925  remainder.sign = Integer::POSITIVE;
3926  quotient.reg.New(0);
3927  quotient.sign = Integer::POSITIVE;
3928  unsigned i=dividend.BitCount();
3929  while (i--)
3930  {
3931  word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
3932  remainder.reg[0] |= dividend[i];
3933  if (overflow || remainder >= divisor)
3934  {
3935  Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
3936  quotient.SetBit(i);
3937  }
3938  }
3939 }
3940 */
3941 
3942 void PositiveDivide(Integer &remainder, Integer &quotient,
3943  const Integer &a, const Integer &b)
3944 {
3945  unsigned aSize = a.WordCount();
3946  unsigned bSize = b.WordCount();
3947 
3948  if (!bSize)
3949  throw Integer::DivideByZero();
3950 
3951  if (aSize < bSize)
3952  {
3953  remainder = a;
3954  remainder.sign = Integer::POSITIVE;
3955  quotient = Integer::Zero();
3956  return;
3957  }
3958 
3959  aSize += aSize%2; // round up to next even number
3960  bSize += bSize%2;
3961 
3962  remainder.reg.CleanNew(RoundupSize(bSize));
3963  remainder.sign = Integer::POSITIVE;
3964  quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
3965  quotient.sign = Integer::POSITIVE;
3966 
3967  IntegerSecBlock T(aSize+3*(bSize+2));
3968  Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
3969 }
3970 
3971 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
3972 {
3973  PositiveDivide(remainder, quotient, dividend, divisor);
3974 
3975  if (dividend.IsNegative())
3976  {
3977  quotient.Negate();
3978  if (remainder.NotZero())
3979  {
3980  --quotient;
3981  remainder = divisor.AbsoluteValue() - remainder;
3982  }
3983  }
3984 
3985  if (divisor.IsNegative())
3986  quotient.Negate();
3987 }
3988 
3989 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
3990 {
3991  q = a;
3992  q >>= n;
3993 
3994  const size_t wordCount = BitsToWords(n);
3995  if (wordCount <= a.WordCount())
3996  {
3997  r.reg.resize(RoundupSize(wordCount));
3998  CopyWords(r.reg, a.reg, wordCount);
3999  SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
4000  if (n % WORD_BITS != 0)
4001  r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4002  }
4003  else
4004  {
4005  r.reg.resize(RoundupSize(a.WordCount()));
4006  CopyWords(r.reg, a.reg, r.reg.size());
4007  }
4008  r.sign = POSITIVE;
4009 
4010  if (a.IsNegative() && r.NotZero())
4011  {
4012  --q;
4013  r = Power2(n) - r;
4014  }
4015 }
4016 
4017 Integer Integer::DividedBy(const Integer &b) const
4018 {
4019  Integer remainder, quotient;
4020  Integer::Divide(remainder, quotient, *this, b);
4021  return quotient;
4022 }
4023 
4025 {
4026  Integer remainder, quotient;
4027  Integer::Divide(remainder, quotient, *this, b);
4028  return remainder;
4029 }
4030 
4031 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
4032 {
4033  if (!divisor)
4034  throw Integer::DivideByZero();
4035 
4036  assert(divisor);
4037 
4038  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4039  {
4040  quotient = dividend >> (BitPrecision(divisor)-1);
4041  remainder = dividend.reg[0] & (divisor-1);
4042  return;
4043  }
4044 
4045  unsigned int i = dividend.WordCount();
4046  quotient.reg.CleanNew(RoundupSize(i));
4047  remainder = 0;
4048  while (i--)
4049  {
4050  quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
4051  remainder = DWord(dividend.reg[i], remainder) % divisor;
4052  }
4053 
4054  if (dividend.NotNegative())
4055  quotient.sign = POSITIVE;
4056  else
4057  {
4058  quotient.sign = NEGATIVE;
4059  if (remainder)
4060  {
4061  --quotient;
4062  remainder = divisor - remainder;
4063  }
4064  }
4065 }
4066 
4067 Integer Integer::DividedBy(word b) const
4068 {
4069  word remainder;
4070  Integer quotient;
4071  Integer::Divide(remainder, quotient, *this, b);
4072  return quotient;
4073 }
4074 
4075 word Integer::Modulo(word divisor) const
4076 {
4077  if (!divisor)
4078  throw Integer::DivideByZero();
4079 
4080  assert(divisor);
4081 
4082  word remainder;
4083 
4084  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4085  remainder = reg[0] & (divisor-1);
4086  else
4087  {
4088  unsigned int i = WordCount();
4089 
4090  if (divisor <= 5)
4091  {
4092  DWord sum(0, 0);
4093  while (i--)
4094  sum += reg[i];
4095  remainder = sum % divisor;
4096  }
4097  else
4098  {
4099  remainder = 0;
4100  while (i--)
4101  remainder = DWord(reg[i], remainder) % divisor;
4102  }
4103  }
4104 
4105  if (IsNegative() && remainder)
4106  remainder = divisor - remainder;
4107 
4108  return remainder;
4109 }
4110 
4112 {
4113  if (!!(*this)) // don't flip sign if *this==0
4114  sign = Sign(1-sign);
4115 }
4116 
4117 int Integer::PositiveCompare(const Integer& t) const
4118 {
4119  unsigned size = WordCount(), tSize = t.WordCount();
4120 
4121  if (size == tSize)
4122  return CryptoPP::Compare(reg, t.reg, size);
4123  else
4124  return size > tSize ? 1 : -1;
4125 }
4126 
4127 int Integer::Compare(const Integer& t) const
4128 {
4129  if (NotNegative())
4130  {
4131  if (t.NotNegative())
4132  return PositiveCompare(t);
4133  else
4134  return 1;
4135  }
4136  else
4137  {
4138  if (t.NotNegative())
4139  return -1;
4140  else
4141  return -PositiveCompare(t);
4142  }
4143 }
4144 
4146 {
4147  if (!IsPositive())
4148  return Zero();
4149 
4150  // overestimate square root
4151  Integer x, y = Power2((BitCount()+1)/2);
4152  assert(y*y >= *this);
4153 
4154  do
4155  {
4156  x = y;
4157  y = (x + *this/x) >> 1;
4158  } while (y<x);
4159 
4160  return x;
4161 }
4162 
4163 bool Integer::IsSquare() const
4164 {
4165  Integer r = SquareRoot();
4166  return *this == r.Squared();
4167 }
4168 
4169 bool Integer::IsUnit() const
4170 {
4171  return (WordCount() == 1) && (reg[0] == 1);
4172 }
4173 
4175 {
4176  return IsUnit() ? *this : Zero();
4177 }
4178 
4179 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4180 {
4181  return x*y%m;
4182 }
4183 
4184 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4185 {
4186  ModularArithmetic mr(m);
4187  return mr.Exponentiate(x, e);
4188 }
4189 
4191 {
4192  return EuclideanDomainOf<Integer>().Gcd(a, b);
4193 }
4194 
4196 {
4197  assert(m.NotNegative());
4198 
4199  if (IsNegative())
4200  return Modulo(m).InverseMod(m);
4201 
4202  if (m.IsEven())
4203  {
4204  if (!m || IsEven())
4205  return Zero(); // no inverse
4206  if (*this == One())
4207  return One();
4208 
4209  Integer u = m.Modulo(*this).InverseMod(*this);
4210  return !u ? Zero() : (m*(*this-u)+1)/(*this);
4211  }
4212 
4213  SecBlock<word> T(m.reg.size() * 4);
4214  Integer r((word)0, m.reg.size());
4215  unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4216  DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4217  return r;
4218 }
4219 
4220 word Integer::InverseMod(word mod) const
4221 {
4222  word g0 = mod, g1 = *this % mod;
4223  word v0 = 0, v1 = 1;
4224  word y;
4225 
4226  while (g1)
4227  {
4228  if (g1 == 1)
4229  return v1;
4230  y = g0 / g1;
4231  g0 = g0 % g1;
4232  v0 += y * v1;
4233 
4234  if (!g0)
4235  break;
4236  if (g0 == 1)
4237  return mod-v0;
4238  y = g1 / g0;
4239  g1 = g1 % g0;
4240  v1 += y * v0;
4241  }
4242  return 0;
4243 }
4244 
4245 // ********************************************************
4246 
4248 {
4249  BERSequenceDecoder seq(bt);
4250  OID oid(seq);
4251  if (oid != ASN1::prime_field())
4252  BERDecodeError();
4253  m_modulus.BERDecode(seq);
4254  seq.MessageEnd();
4255  m_result.reg.resize(m_modulus.reg.size());
4256 }
4257 
4259 {
4260  DERSequenceEncoder seq(bt);
4261  ASN1::prime_field().DEREncode(seq);
4262  m_modulus.DEREncode(seq);
4263  seq.MessageEnd();
4264 }
4265 
4267 {
4269 }
4270 
4272 {
4274 }
4275 
4277 {
4278  if (a.reg.size()==m_modulus.reg.size())
4279  {
4280  CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4281  return m_result;
4282  }
4283  else
4284  return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4285 }
4286 
4287 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4288 {
4289  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4290  {
4291  if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4292  || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4293  {
4294  CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4295  }
4296  return m_result;
4297  }
4298  else
4299  {
4300  m_result1 = a+b;
4301  if (m_result1 >= m_modulus)
4302  m_result1 -= m_modulus;
4303  return m_result1;
4304  }
4305 }
4306 
4308 {
4309  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4310  {
4311  if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4312  || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4313  {
4314  CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4315  }
4316  }
4317  else
4318  {
4319  a+=b;
4320  if (a>=m_modulus)
4321  a-=m_modulus;
4322  }
4323 
4324  return a;
4325 }
4326 
4327 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4328 {
4329  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4330  {
4331  if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4332  CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4333  return m_result;
4334  }
4335  else
4336  {
4337  m_result1 = a-b;
4338  if (m_result1.IsNegative())
4339  m_result1 += m_modulus;
4340  return m_result1;
4341  }
4342 }
4343 
4345 {
4346  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4347  {
4348  if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4349  CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4350  }
4351  else
4352  {
4353  a-=b;
4354  if (a.IsNegative())
4355  a+=m_modulus;
4356  }
4357 
4358  return a;
4359 }
4360 
4362 {
4363  if (!a)
4364  return a;
4365 
4366  CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4367  if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4368  Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4369 
4370  return m_result;
4371 }
4372 
4373 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4374 {
4375  if (m_modulus.IsOdd())
4376  {
4377  MontgomeryRepresentation dr(m_modulus);
4378  return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4379  }
4380  else
4381  return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4382 }
4383 
4384 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4385 {
4386  if (m_modulus.IsOdd())
4387  {
4388  MontgomeryRepresentation dr(m_modulus);
4389  dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4390  for (unsigned int i=0; i<exponentsCount; i++)
4391  results[i] = dr.ConvertOut(results[i]);
4392  }
4393  else
4394  AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4395 }
4396 
4398  : ModularArithmetic(m),
4399  m_u((word)0, m_modulus.reg.size()),
4400  m_workspace(5*m_modulus.reg.size())
4401 {
4402  if (!m_modulus.IsOdd())
4403  throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4404 
4405  RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4406 }
4407 
4409 {
4410  word *const T = m_workspace.begin();
4411  word *const R = m_result.reg.begin();
4412  const size_t N = m_modulus.reg.size();
4413  assert(a.reg.size()<=N && b.reg.size()<=N);
4414 
4415  AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4416  SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4417  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4418  return m_result;
4419 }
4420 
4422 {
4423  word *const T = m_workspace.begin();
4424  word *const R = m_result.reg.begin();
4425  const size_t N = m_modulus.reg.size();
4426  assert(a.reg.size()<=N);
4427 
4428  CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4429  SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4430  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4431  return m_result;
4432 }
4433 
4435 {
4436  word *const T = m_workspace.begin();
4437  word *const R = m_result.reg.begin();
4438  const size_t N = m_modulus.reg.size();
4439  assert(a.reg.size()<=N);
4440 
4441  CopyWords(T, a.reg, a.reg.size());
4442  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4443  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4444  return m_result;
4445 }
4446 
4448 {
4449 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4450  word *const T = m_workspace.begin();
4451  word *const R = m_result.reg.begin();
4452  const size_t N = m_modulus.reg.size();
4453  assert(a.reg.size()<=N);
4454 
4455  CopyWords(T, a.reg, a.reg.size());
4456  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4457  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4458  unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4459 
4460 // cout << "k=" << k << " N*32=" << 32*N << endl;
4461 
4462  if (k>N*WORD_BITS)
4463  DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4464  else
4465  MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4466 
4467  return m_result;
4468 }
4469 
4470 // Specialization declared in misc.h to allow us to print integers
4471 // with additional control options, like arbirary bases and uppercase.
4472 template <> CRYPTOPP_DLL
4473 std::string IntToString<Integer>(Integer value, unsigned int base)
4474 {
4475  // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4476  static const unsigned int BIT_32 = (1U << 31);
4477  const bool UPPER = !!(base & BIT_32);
4478  static const unsigned int BIT_31 = (1U << 30);
4479  const bool BASE = !!(base & BIT_31);
4480 
4481  const char CH = UPPER ? 'A' : 'a';
4482  base &= ~(BIT_32|BIT_31);
4483  assert(base >= 2 && base <= 32);
4484 
4485  if (value == 0)
4486  return "0";
4487 
4488  bool negative = false, zero = false;
4489  if (value.IsNegative())
4490  {
4491  negative = true;
4492  value.Negate();
4493  }
4494 
4495  if (!value)
4496  zero = true;
4497 
4498  SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4499  Integer temp;
4500 
4501  unsigned int i=0;
4502  while (!!value)
4503  {
4504  word digit;
4505  Integer::Divide(digit, temp, value, word(base));
4506  s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4507  value.swap(temp);
4508  }
4509 
4510  std::string result;
4511  result.reserve(i+2);
4512 
4513  if (negative)
4514  result += '-';
4515 
4516  if (zero)
4517  result += '0';
4518 
4519  while (i--)
4520  result += s[i];
4521 
4522  if (BASE)
4523  {
4524  if (base == 10)
4525  result += '.';
4526  else if (base == 16)
4527  result += 'h';
4528  else if (base == 8)
4529  result += 'o';
4530  else if (base == 2)
4531  result += 'b';
4532  }
4533 
4534  return result;
4535 }
4536 
4537 // Specialization declared in misc.h to avoid Coverity findings.
4538 template <> CRYPTOPP_DLL
4539 std::string IntToString<word64>(word64 value, unsigned int base)
4540 {
4541  // Hack... set the high bit for uppercase.
4542  static const unsigned int HIGH_BIT = (1U << 31);
4543  const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4544  base &= ~HIGH_BIT;
4545 
4546  assert(base >= 2);
4547  if (value == 0)
4548  return "0";
4549 
4550  std::string result;
4551  while (value > 0)
4552  {
4553  word64 digit = value % base;
4554  result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4555  value /= base;
4556  }
4557  return result;
4558 }
4559 
4560 NAMESPACE_END
4561 
4562 #if WORKAROUND_ARMEL_BUG
4563 # pragma GCC pop_options
4564 #endif
4565 
4566 #endif
Used to pass byte array input as part of a NameValuePairs object.
Definition: algparam.h:29
An invalid argument was detected.
Definition: cryptlib.h:182
void DEREncode(BufferedTransformation &bt) const
Encodes in DER format.
Definition: integer.cpp:4258
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: integer.cpp:4373
Classes for working with NameValuePairs.
const Integer & Square(const Integer &a) const
Square an element in the ring.
Definition: integer.cpp:4421
const Integer & MultiplicativeInverse(const Integer &a) const
Calculate the multiplicative inverse of an element in the ring.
Definition: integer.cpp:4447
void swap(SecBlock< T, A > &b)
Swap contents with another SecBlock.
Definition: secblock.h:711
a number which is probabilistically prime
Definition: integer.h:77
Utility functions for the Crypto++ library.
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:123
Restricts the instantiation of a class to one static object without locks.
Definition: misc.h:262
bool NotZero() const
Determines if the Integer is non-0.
Definition: integer.h:315
bool GetBit(size_t i) const
Provides the i-th bit of the Integer.
Definition: integer.cpp:3043
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:657
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
Definition: integer.cpp:3338
an unsigned value
Definition: integer.h:67
Integer & Reduce(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4344
virtual size_t Peek(byte &outByte) const
Peek a 8-bit byte.
Definition: cryptlib.cpp:537
bool IsOdd() const
Determines if the Integer is odd parity.
Definition: integer.h:333
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
Definition: cryptlib.h:348
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode unsigned value.
Definition: asn.h:442
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: cryptlib.cpp:329
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
Definition: misc.h:723
This file contains helper classes/functions for implementing public key algorithms.
bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
Definition: nbtheory.cpp:381
static Integer Gcd(const Integer &a, const Integer &n)
greatest common divisor
Definition: integer.cpp:4190
void resize(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:702
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
Definition: misc.h:743
const Integer & Subtract(const Integer &a, const Integer &b) const
Subtracts elements in the ring.
Definition: integer.cpp:4327
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
Definition: misc.h:606
void CleanGrow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:686
bool IsEven() const
Determines if the Integer is even parity.
Definition: integer.h:330
Secure memory block with allocator and cleanup.
Definition: secblock.h:434
unsigned int WordCount() const
Determines the number of words required to represent the Integer.
Definition: integer.cpp:3263
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
Definition: integer.cpp:3413
Signedness
Used when importing and exporting integers.
Definition: integer.h:65
size_type size() const
Provides the count of elements in the SecBlock.
Definition: secblock.h:521
ASN.1 object identifiers for algorthms and schemes.
Square block cipher.
Definition: square.h:24
Classes for automatic resource management.
size_t size() const
Length of the memory block.
Definition: algparam.h:93
bool IsP4()
Determines if the CPU is an Intel P4.
Definition: cpu.h:260
byte GetByte(size_t i) const
Provides the i-th byte of the Integer.
Definition: integer.cpp:3065
Library configuration file.
MontgomeryRepresentation(const Integer &modulus)
Construct a IsMontgomeryRepresentation.
Definition: integer.cpp:4397
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
returns same result as Divide(r, q, a, Power2(n)), but faster
Definition: integer.cpp:3989
Ring of congruence classes modulo n.
Definition: modarith.h:34
STL namespace.
Interface for random number generators.
Definition: cryptlib.h:1186
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
Definition: misc.h:733
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
Definition: integer.cpp:3427
std::string IntToString< word64 >(word64 value, unsigned int base)
Converts an unsigned value to a string.
Definition: integer.cpp:4539
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
The minimum number of bytes to encode this integer.
Definition: integer.cpp:3326
void New(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:644
void SetByte(size_t n, byte value)
Set the n-th byte to value.
Definition: integer.cpp:3073
the value is negative
Definition: integer.h:59
SecBlock typedef.
Definition: secblock.h:728
virtual void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the Ring.
Definition: algebra.cpp:334
BER Sequence Decoder.
Definition: asn.h:294
Interface for buffered transformations.
Definition: cryptlib.h:1352
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
encode absolute value as big-endian octet string
Definition: integer.cpp:3382
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: queue.h:35
const byte * begin() const
Pointer to the first byte in the memory block.
Definition: algparam.h:89
bool IsConvertableToLong() const
Determines if the Integer is convertable to Long.
Definition: integer.cpp:2919
Integer MultiplicativeInverse() const
return inverse if 1 or -1, otherwise return 0
Definition: integer.cpp:4174
static const Integer & One()
Integer representing 1.
Definition: integer.cpp:3016
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:371
const Integer & Add(const Integer &a, const Integer &b) const
Adds elements in the ring.
Definition: integer.cpp:4287
byte order is little-endian
Definition: cryptlib.h:125
Sign
Used internally to represent the integer.
Definition: integer.h:55
Pointer that overloads operator ->
Definition: smartptr.h:39
bool IsSquare() const
return whether this integer is a perfect square
Definition: integer.cpp:4163
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
Definition: integer.cpp:3398
Classes and functions for secure memory allocations.
unsigned int BitCount() const
Determines the number of bits required to represent the Integer.
Definition: integer.cpp:3277
void BERDecodeElement(BufferedTransformation &in, Element &a) const
Decodes element in DER format.
Definition: integer.cpp:4271
bool IsUnit() const
is 1 or -1
Definition: integer.cpp:4169
Copy input to a memory buffer.
Definition: filters.h:1016
Integer SquareRoot() const
extract square root, if negative return 0, else return floor of square root
Definition: integer.cpp:4145
bool IsPositive() const
Determines if the Integer is positive.
Definition: integer.h:324
bool GetValue(const char *name, T &value) const
Get a named value.
Definition: cryptlib.h:335
a number with no special properties
Definition: integer.h:75
Integer Squared() const
Definition: integer.h:482
size_t Put(byte inByte, bool blocking=true)
Input a byte for processing.
Definition: cryptlib.h:1378
bool IsNegative() const
Determines if the Integer is negative.
Definition: integer.h:318
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
Definition: algparam.h:554
void swap(Integer &a)
Swaps this Integer with another Integer.
Definition: integer.cpp:3103
Integer()
Creates the zero integer.
Definition: integer.cpp:2879
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
Definition: misc.h:653
signed long ConvertToLong() const
Convert the Integer to Long.
Definition: integer.cpp:2933
Exception thrown when an error is encountered decoding an OpenPGP integer.
Definition: integer.h:264
void Negate()
Reverse the Sign of the Integer.
Definition: integer.cpp:4111
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
Definition: misc.h:711
Integer Times(const Integer &b) const
Definition: integer.cpp:3913
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
Definition: integer.cpp:3389
Application callback to signal suitability of a cabdidate prime.
Definition: nbtheory.h:80
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branchless swap of pointers a and b if condition c is true.
Definition: misc.h:1035
static Integer Power2(size_t e)
Exponentiates to a power of 2.
Definition: integer.cpp:2995
Multiple precision integer with arithmetic operations.
Definition: integer.h:31
a signed value
Definition: integer.h:69
static const Integer & Two()
Integer representing 2.
Definition: integer.cpp:3021
Integer ConvertOut(const Integer &a) const
Reduces an element in the congruence class.
Definition: integer.cpp:4434
virtual Element CascadeExponentiate(const Element &x, const Integer &e1, const Element &y, const Integer &e2) const
TODO.
Definition: algebra.cpp:323
virtual lword Skip(lword skipMax=LWORD_MAX)
Discard skipMax bytes from the output buffer.
Definition: cryptlib.cpp:556
RandomNumberType
Properties of a random integer.
Definition: integer.h:73
const Integer & Inverse(const Integer &a) const
Inverts the element in the ring.
Definition: integer.cpp:4361
const char * Seed()
ConstByteArrayParameter.
Definition: argnames.h:19
byte order is big-endian
Definition: cryptlib.h:127
Safely right shift values when undefined behavior could occur.
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
Definition: misc.h:450
String-based implementation of Store interface.
Definition: filters.h:1066
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
calculate r and q such that (a == d*q + r) && (0 <= r < abs(d))
Definition: integer.cpp:3971
ModularArithmetic(const Integer &modulus=Integer::One())
Construct a ModularArithmetic.
Definition: modarith.h:47
void BERDecodeError()
Raises a BERDecodeErr.
Definition: asn.h:61
Data structure used to store byte strings.
Definition: queue.h:20
Functions for CPU features and intrinsics.
Classes and functions for working with ANS.1 objects.
Classes for SHA-1 and SHA-2 family of message digests.
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
Definition: integer.cpp:3051
size_t GetWord16(word16 &value, ByteOrder order=BIG_ENDIAN_ORDER)
Retrieve a 16-bit word.
Definition: cryptlib.cpp:753
iterator begin()
Provides an iterator pointing to the first element in the memory block.
Definition: secblock.h:496
Integer & Accumulate(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4307
const char * PointerToPrimeSelector()
const PrimeSelector *
Definition: argnames.h:41
Implementation of BufferedTransformation's attachment interface.
Classes and functions for number theoretic operations.
const Integer & Half(const Integer &a) const
TODO.
Definition: integer.cpp:4276
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
Definition: integer.cpp:3360
Exception thrown when division by 0 is encountered.
Definition: integer.h:37
DER Sequence Encoder.
Definition: asn.h:304
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
Definition: misc.h:957
Exception thrown when a random number cannot be found that satisfies the condition.
Definition: integer.h:45
Performs modular arithmetic in Montgomery representation for increased speed.
Definition: modarith.h:274
DER General Encoder.
Definition: asn.h:271
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:205
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: modarith.h:308
const Integer & Multiply(const Integer &a, const Integer &b) const
Multiplies elements in the ring.
Definition: integer.cpp:4408
void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: integer.cpp:3468
Integer InverseMod(const Integer &n) const
calculate multiplicative inverse of *this mod n
Definition: integer.cpp:4195
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
Definition: integer.cpp:3286
size_t PutWord16(word16 value, ByteOrder order=BIG_ENDIAN_ORDER, bool blocking=true)
Input a 16-bit word for processing.
Definition: cryptlib.cpp:717
size_t DEREncodeOctetString(BufferedTransformation &bt, const byte *str, size_t strLen)
DER encode octet string.
Definition: asn.cpp:104
virtual lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: cryptlib.cpp:499
void DEREncodeElement(BufferedTransformation &out, const Element &a) const
Encodes element in DER format.
Definition: integer.cpp:4266
Safely left shift values when undefined behavior could occur.
static const Integer & Zero()
Integer representing 0.
Definition: integer.cpp:3011
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
Definition: misc.h:460
Euclidean domain.
Definition: algebra.h:315
void Grow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:670
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
Definition: integer.cpp:3367
std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
Definition: integer.cpp:4473
lword GetBits(size_t i, size_t n) const
Provides the low order bits of the Integer.
Definition: integer.cpp:3080
virtual size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: cryptlib.cpp:518
Class file for performing modular arithmetic.
Crypto++ library namespace.
size_type SizeInBytes() const
Provides the number of bytes in the SecBlock.
Definition: secblock.h:535
BER General Decoder.
Definition: asn.h:234
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: integer.cpp:4384
int Compare(const Integer &a) const
Perform signed comparison.
Definition: integer.cpp:4127
unsigned int MaxElementByteLength() const
Provides the maximum byte size of an element in the ring.
Definition: modarith.h:230
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: modarith.h:311
Object Identifier.
Definition: asn.h:158
Integer Modulo(const Integer &b) const
Definition: integer.cpp:4024
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: queue.cpp:300
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
Definition: misc.h:628
unsigned int ByteCount() const
Determines the number of bytes required to represent the Integer.
Definition: integer.cpp:3268
Integer ConvertIn(const Integer &a) const
Reduces an element in the congruence class.
Definition: modarith.h:294
the value is positive or 0
Definition: integer.h:57
bool NotNegative() const
Determines if the Integer is non-negative.
Definition: integer.h:321
Interface for retrieving values given their names.
Definition: cryptlib.h:277