Crypto++  5.6.4
Free C++ class library of cryptographic schemes
integer.cpp
1 // integer.cpp - written and placed in the public domain by Wei Dai
2 // contains public domain code contributed by Alister Lee and Leonard Janke
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #if CRYPTOPP_MSC_VERSION
8 # pragma warning(disable: 4100)
9 #endif
10 
11 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
12 # pragma GCC diagnostic ignored "-Wunused"
13 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
14 #endif
15 
16 #ifndef CRYPTOPP_IMPORTS
17 
18 #include "integer.h"
19 #include "secblock.h"
20 #include "modarith.h"
21 #include "nbtheory.h"
22 #include "smartptr.h"
23 #include "algparam.h"
24 #include "filters.h"
25 #include "asn.h"
26 #include "oids.h"
27 #include "words.h"
28 #include "pubkey.h" // for P1363_KDF2
29 #include "sha.h"
30 #include "cpu.h"
31 #include "misc.h"
32 
33 #include <iostream>
34 
35 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
36  #include <intrin.h>
37 #endif
38 
39 #ifdef __DECCXX
40  #include <c_asm.h>
41 #endif
42 
43 #ifdef CRYPTOPP_MSVC6_NO_PP
44  #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
45 #endif
46 
47 // "Error: The operand ___LKDB cannot be assigned to", http://github.com/weidai11/cryptopp/issues/188
48 #if (__SUNPRO_CC >= 0x5130)
49 # define MAYBE_CONST
50 # define MAYBE_UNCONST_CAST const_cast<word*>
51 #else
52 # define MAYBE_CONST const
53 # define MAYBE_UNCONST_CAST
54 #endif
55 
56 // "Inline assembly operands don't work with .intel_syntax",
57 // http://llvm.org/bugs/show_bug.cgi?id=24232
58 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
59 # undef CRYPTOPP_X86_ASM_AVAILABLE
60 # undef CRYPTOPP_X32_ASM_AVAILABLE
61 # undef CRYPTOPP_X64_ASM_AVAILABLE
62 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
63 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
64 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
65 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
66 #else
67 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
68 #endif
69 
70 NAMESPACE_BEGIN(CryptoPP)
71 
72 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
73 {
74  if (valueType != typeid(Integer))
75  return false;
76  *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
77  return true;
78 }
79 
80 inline static int Compare(const word *A, const word *B, size_t N)
81 {
82  while (N--)
83  if (A[N] > B[N])
84  return 1;
85  else if (A[N] < B[N])
86  return -1;
87 
88  return 0;
89 }
90 
91 inline static int Increment(word *A, size_t N, word B=1)
92 {
93  CRYPTOPP_ASSERT(N);
94  word t = A[0];
95  A[0] = t+B;
96  if (A[0] >= t)
97  return 0;
98  for (unsigned i=1; i<N; i++)
99  if (++A[i])
100  return 0;
101  return 1;
102 }
103 
104 inline static int Decrement(word *A, size_t N, word B=1)
105 {
106  CRYPTOPP_ASSERT(N);
107  word t = A[0];
108  A[0] = t-B;
109  if (A[0] <= t)
110  return 0;
111  for (unsigned i=1; i<N; i++)
112  if (A[i]--)
113  return 0;
114  return 1;
115 }
116 
117 static void TwosComplement(word *A, size_t N)
118 {
119  Decrement(A, N);
120  for (unsigned i=0; i<N; i++)
121  A[i] = ~A[i];
122 }
123 
124 static word AtomicInverseModPower2(word A)
125 {
126  CRYPTOPP_ASSERT(A%2==1);
127 
128  word R=A%8;
129 
130  for (unsigned i=3; i<WORD_BITS; i*=2)
131  R = R*(2-R*A);
132 
133  CRYPTOPP_ASSERT(R*A==1);
134  return R;
135 }
136 
137 // ********************************************************
138 
139 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
140  #define Declare2Words(x) word x##0, x##1;
141  #define AssignWord(a, b) a##0 = b; a##1 = 0;
142  #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
143  #define LowWord(a) a##0
144  #define HighWord(a) a##1
145  #ifdef _MSC_VER
146  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
147  #ifndef __INTEL_COMPILER
148  #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
149  #endif
150  #elif defined(__DECCXX)
151  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
152  #elif defined(__x86_64__)
153  #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
154  // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
155  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
156  #else
157  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
158  #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
159  #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
160  #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
161  #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
162  #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
163  #endif
164  #endif
165  #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
166  #ifndef Double3Words
167  #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
168  #endif
169  #ifndef Acc2WordsBy2
170  #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
171  #endif
172  #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
173  #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
174  #define GetCarry(u) u##1
175  #define GetBorrow(u) u##1
176 #else
177  #define Declare2Words(x) dword x;
178  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
179  #define MultiplyWords(p, a, b) p = __emulu(a, b);
180  #else
181  #define MultiplyWords(p, a, b) p = (dword)a*b;
182  #endif
183  #define AssignWord(a, b) a = b;
184  #define Add2WordsBy1(a, b, c) a = b + c;
185  #define Acc2WordsBy2(a, b) a += b;
186  #define LowWord(a) word(a)
187  #define HighWord(a) word(a>>WORD_BITS)
188  #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
189  #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
190  #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
191  #define GetCarry(u) HighWord(u)
192  #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
193 #endif
194 #ifndef MulAcc
195  #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
196 #endif
197 #ifndef Acc2WordsBy1
198  #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
199 #endif
200 #ifndef Acc3WordsBy2
201  #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
202 #endif
203 
204 class DWord
205 {
206 public:
207 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
208  DWord() : m_whole() { }
209 #else
210  DWord() : m_halfs() { }
211 #endif
212 
213 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
214  explicit DWord(word low) : m_whole(low) { }
215 #else
216  explicit DWord(word low) : m_halfs()
217  {
218  m_halfs.low = low;
219  }
220 #endif
221 
222 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
223  DWord(word low, word high) : m_whole()
224 #else
225  DWord(word low, word high) : m_halfs()
226 #endif
227  {
228 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
229 # if defined(IS_LITTLE_ENDIAN)
230  const word t[2] = {low,high};
231  memcpy(&m_whole, &t, sizeof(m_whole));
232 # else
233  const word t[2] = {high,low};
234  memcpy(&m_whole, &t, sizeof(m_whole));
235 # endif
236 #else
237  m_halfs.low = low;
238  m_halfs.high = high;
239 #endif
240  }
241 
242  static DWord Multiply(word a, word b)
243  {
244  DWord r;
245  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
246  r.m_whole = (dword)a * b;
247  #elif defined(MultiplyWordsLoHi)
248  MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
249  #else
250  CRYPTOPP_ASSERT(0);
251  #endif
252  return r;
253  }
254 
255  static DWord MultiplyAndAdd(word a, word b, word c)
256  {
257  DWord r = Multiply(a, b);
258  return r += c;
259  }
260 
261  DWord & operator+=(word a)
262  {
263  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
264  m_whole = m_whole + a;
265  #else
266  m_halfs.low += a;
267  m_halfs.high += (m_halfs.low < a);
268  #endif
269  return *this;
270  }
271 
272  DWord operator+(word a)
273  {
274  DWord r;
275  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
276  r.m_whole = m_whole + a;
277  #else
278  r.m_halfs.low = m_halfs.low + a;
279  r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
280  #endif
281  return r;
282  }
283 
284  DWord operator-(DWord a)
285  {
286  DWord r;
287  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
288  r.m_whole = m_whole - a.m_whole;
289  #else
290  r.m_halfs.low = m_halfs.low - a.m_halfs.low;
291  r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
292  #endif
293  return r;
294  }
295 
296  DWord operator-(word a)
297  {
298  DWord r;
299  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
300  r.m_whole = m_whole - a;
301  #else
302  r.m_halfs.low = m_halfs.low - a;
303  r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
304  #endif
305  return r;
306  }
307 
308  // returns quotient, which must fit in a word
309  word operator/(word divisor);
310 
311  word operator%(word a);
312 
313  bool operator!() const
314  {
315  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
316  return !m_whole;
317  #else
318  return !m_halfs.high && !m_halfs.low;
319  #endif
320  }
321 
322  // TODO: When NATIVE_DWORD is in effect, we access high and low, which are inactive
323  // union members, and that's UB. Also see http://stackoverflow.com/q/11373203.
324  word GetLowHalf() const {return m_halfs.low;}
325  word GetHighHalf() const {return m_halfs.high;}
326  word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
327 
328 private:
329  // Issue 274, "Types cannot be declared in anonymous union",
330  // http://github.com/weidai11/cryptopp/issues/274
331  // Thanks to Martin Bonner at http://stackoverflow.com/a/39507183
332  struct half_words
333  {
334  #ifdef IS_LITTLE_ENDIAN
335  word low;
336  word high;
337  #else
338  word high;
339  word low;
340  #endif
341  };
342  union
343  {
344  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
345  dword m_whole;
346  #endif
347  half_words m_halfs;
348  };
349 };
350 
351 class Word
352 {
353 public:
354  // Converity finding on default ctor. We've isntrumented the code,
355  // and cannot uncover a case where it affects a result.
356 #if defined(__COVERITY__)
357  Word() : m_whole(0) {}
358 #elif CRYPTOPP_DEBUG
359  // Repeating pattern of 1010 for debug builds to break things...
360  Word() : m_whole(0) {memset(&m_whole, 0xaa, sizeof(m_whole));}
361 #else
362  Word() {}
363 #endif
364 
365  Word(word value) : m_whole(value) {}
366  Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
367 
368  static Word Multiply(hword a, hword b)
369  {
370  Word r;
371  r.m_whole = (word)a * b;
372  return r;
373  }
374 
375  Word operator-(Word a)
376  {
377  Word r;
378  r.m_whole = m_whole - a.m_whole;
379  return r;
380  }
381 
382  Word operator-(hword a)
383  {
384  Word r;
385  r.m_whole = m_whole - a;
386  return r;
387  }
388 
389  // returns quotient, which must fit in a word
390  hword operator/(hword divisor)
391  {
392  return hword(m_whole / divisor);
393  }
394 
395  bool operator!() const
396  {
397  return !m_whole;
398  }
399 
400  word GetWhole() const {return m_whole;}
401  hword GetLowHalf() const {return hword(m_whole);}
402  hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
403  hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
404 
405 private:
406  word m_whole;
407 };
408 
409 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
410 template <class S, class D>
411 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
412 {
413  CRYPTOPP_UNUSED(dummy);
414 
415  // CRYPTOPP_ASSERT {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
416  CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
417 
418  // estimate the quotient: do a 2 S by 1 S divide
419  S Q;
420  if (S(B1+1) == 0)
421  Q = A[2];
422  else if (B1 > 0)
423  Q = D(A[1], A[2]) / S(B1+1);
424  else
425  Q = D(A[0], A[1]) / B0;
426 
427  // now subtract Q*B from A
428  D p = D::Multiply(B0, Q);
429  D u = (D) A[0] - p.GetLowHalf();
430  A[0] = u.GetLowHalf();
431  u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
432  A[1] = u.GetLowHalf();
433  A[2] += u.GetHighHalf();
434 
435  // Q <= actual quotient, so fix it
436  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
437  {
438  u = (D) A[0] - B0;
439  A[0] = u.GetLowHalf();
440  u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
441  A[1] = u.GetLowHalf();
442  A[2] += u.GetHighHalf();
443  Q++;
444  CRYPTOPP_ASSERT(Q); // shouldn't overflow
445  }
446 
447  return Q;
448 }
449 
450 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
451 template <class S, class D>
452 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
453 {
454  if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
455  return D(Ah.GetLowHalf(), Ah.GetHighHalf());
456  else
457  {
458  S Q[2];
459  T[0] = Al.GetLowHalf();
460  T[1] = Al.GetHighHalf();
461  T[2] = Ah.GetLowHalf();
462  T[3] = Ah.GetHighHalf();
463  Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
464  Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
465  return D(Q[0], Q[1]);
466  }
467 }
468 
469 // returns quotient, which must fit in a word
470 inline word DWord::operator/(word a)
471 {
472  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
473  return word(m_whole / a);
474  #else
475  hword r[4];
476  return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
477  #endif
478 }
479 
480 inline word DWord::operator%(word a)
481 {
482  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
483  return word(m_whole % a);
484  #else
485  if (a < (word(1) << (WORD_BITS/2)))
486  {
487  hword h = hword(a);
488  word r = m_halfs.high % h;
489  r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
490  return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
491  }
492  else
493  {
494  hword r[4];
495  DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
496  return Word(r[0], r[1]).GetWhole();
497  }
498  #endif
499 }
500 
501 // ********************************************************
502 
503 // Use some tricks to share assembly code between MSVC, GCC, Clang and Sun CC.
504 #if defined(__GNUC__)
505  #define AddPrologue \
506  int result; \
507  __asm__ __volatile__ \
508  ( \
509  INTEL_NOPREFIX
510  #define AddEpilogue \
511  ATT_PREFIX \
512  : "=a" (result)\
513  : "d" (C), "a" (A), "D" (B), "c" (N) \
514  : "%esi", "memory", "cc" \
515  );\
516  return result;
517  #define MulPrologue \
518  __asm__ __volatile__ \
519  ( \
520  INTEL_NOPREFIX \
521  AS1( push ebx) \
522  AS2( mov ebx, edx)
523  #define MulEpilogue \
524  AS1( pop ebx) \
525  ATT_PREFIX \
526  : \
527  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
528  : "%esi", "memory", "cc" \
529  );
530  #define SquPrologue MulPrologue
531  #define SquEpilogue \
532  AS1( pop ebx) \
533  ATT_PREFIX \
534  : \
535  : "d" (s_maskLow16), "c" (C), "a" (A) \
536  : "%esi", "%edi", "memory", "cc" \
537  );
538  #define TopPrologue MulPrologue
539  #define TopEpilogue \
540  AS1( pop ebx) \
541  ATT_PREFIX \
542  : \
543  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
544  : "memory", "cc" \
545  );
546 #else
547  #define AddPrologue \
548  __asm push edi \
549  __asm push esi \
550  __asm mov eax, [esp+12] \
551  __asm mov edi, [esp+16]
552  #define AddEpilogue \
553  __asm pop esi \
554  __asm pop edi \
555  __asm ret 8
556 #if _MSC_VER < 1300
557  #define SaveEBX __asm push ebx
558  #define RestoreEBX __asm pop ebx
559 #else
560  #define SaveEBX
561  #define RestoreEBX
562 #endif
563  #define SquPrologue \
564  AS2( mov eax, A) \
565  AS2( mov ecx, C) \
566  SaveEBX \
567  AS2( lea ebx, s_maskLow16)
568  #define MulPrologue \
569  AS2( mov eax, A) \
570  AS2( mov edi, B) \
571  AS2( mov ecx, C) \
572  SaveEBX \
573  AS2( lea ebx, s_maskLow16)
574  #define TopPrologue \
575  AS2( mov eax, A) \
576  AS2( mov edi, B) \
577  AS2( mov ecx, C) \
578  AS2( mov esi, L) \
579  SaveEBX \
580  AS2( lea ebx, s_maskLow16)
581  #define SquEpilogue RestoreEBX
582  #define MulEpilogue RestoreEBX
583  #define TopEpilogue RestoreEBX
584 #endif
585 
586 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
587 extern "C" {
588 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
589 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
590 }
591 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
592 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
593 {
594  word result;
595  __asm__ __volatile__
596  (
597  INTEL_NOPREFIX
598  AS1( neg %1)
599  ASJ( jz, 1, f)
600  AS2( mov %0,[%3+8*%1])
601  AS2( add %0,[%4+8*%1])
602  AS2( mov [%2+8*%1],%0)
603  ASL(0)
604  AS2( mov %0,[%3+8*%1+8])
605  AS2( adc %0,[%4+8*%1+8])
606  AS2( mov [%2+8*%1+8],%0)
607  AS2( lea %1,[%1+2])
608  ASJ( jrcxz, 1, f)
609  AS2( mov %0,[%3+8*%1])
610  AS2( adc %0,[%4+8*%1])
611  AS2( mov [%2+8*%1],%0)
612  ASJ( jmp, 0, b)
613  ASL(1)
614  AS2( mov %0, 0)
615  AS2( adc %0, %0)
616  ATT_NOPREFIX
617  : "=&r" (result), "+c" (N)
618  : "r" (C+N), "r" (A+N), "r" (B+N)
619  : "memory", "cc"
620  );
621  return (int)result;
622 }
623 
624 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
625 {
626  word result;
627  __asm__ __volatile__
628  (
629  INTEL_NOPREFIX
630  AS1( neg %1)
631  ASJ( jz, 1, f)
632  AS2( mov %0,[%3+8*%1])
633  AS2( sub %0,[%4+8*%1])
634  AS2( mov [%2+8*%1],%0)
635  ASL(0)
636  AS2( mov %0,[%3+8*%1+8])
637  AS2( sbb %0,[%4+8*%1+8])
638  AS2( mov [%2+8*%1+8],%0)
639  AS2( lea %1,[%1+2])
640  ASJ( jrcxz, 1, f)
641  AS2( mov %0,[%3+8*%1])
642  AS2( sbb %0,[%4+8*%1])
643  AS2( mov [%2+8*%1],%0)
644  ASJ( jmp, 0, b)
645  ASL(1)
646  AS2( mov %0, 0)
647  AS2( adc %0, %0)
648  ATT_NOPREFIX
649  : "=&r" (result), "+c" (N)
650  : "r" (C+N), "r" (A+N), "r" (B+N)
651  : "memory", "cc"
652  );
653  return (int)result;
654 }
655 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
656 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
657 {
658  AddPrologue
659 
660  // now: eax = A, edi = B, edx = C, ecx = N
661  AS2( lea eax, [eax+4*ecx])
662  AS2( lea edi, [edi+4*ecx])
663  AS2( lea edx, [edx+4*ecx])
664 
665  AS1( neg ecx) // ecx is negative index
666  AS2( test ecx, 2) // this clears carry flag
667  ASJ( jz, 0, f)
668  AS2( sub ecx, 2)
669  ASJ( jmp, 1, f)
670 
671  ASL(0)
672  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
673  AS2( mov esi,[eax+4*ecx])
674  AS2( adc esi,[edi+4*ecx])
675  AS2( mov [edx+4*ecx],esi)
676  AS2( mov esi,[eax+4*ecx+4])
677  AS2( adc esi,[edi+4*ecx+4])
678  AS2( mov [edx+4*ecx+4],esi)
679  ASL(1)
680  AS2( mov esi,[eax+4*ecx+8])
681  AS2( adc esi,[edi+4*ecx+8])
682  AS2( mov [edx+4*ecx+8],esi)
683  AS2( mov esi,[eax+4*ecx+12])
684  AS2( adc esi,[edi+4*ecx+12])
685  AS2( mov [edx+4*ecx+12],esi)
686 
687  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
688  ASJ( jmp, 0, b)
689 
690  ASL(2)
691  AS2( mov eax, 0)
692  AS1( setc al) // store carry into eax (return result register)
693 
694  AddEpilogue
695 }
696 
697 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
698 {
699  AddPrologue
700 
701  // now: eax = A, edi = B, edx = C, ecx = N
702  AS2( lea eax, [eax+4*ecx])
703  AS2( lea edi, [edi+4*ecx])
704  AS2( lea edx, [edx+4*ecx])
705 
706  AS1( neg ecx) // ecx is negative index
707  AS2( test ecx, 2) // this clears carry flag
708  ASJ( jz, 0, f)
709  AS2( sub ecx, 2)
710  ASJ( jmp, 1, f)
711 
712  ASL(0)
713  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
714  AS2( mov esi,[eax+4*ecx])
715  AS2( sbb esi,[edi+4*ecx])
716  AS2( mov [edx+4*ecx],esi)
717  AS2( mov esi,[eax+4*ecx+4])
718  AS2( sbb esi,[edi+4*ecx+4])
719  AS2( mov [edx+4*ecx+4],esi)
720  ASL(1)
721  AS2( mov esi,[eax+4*ecx+8])
722  AS2( sbb esi,[edi+4*ecx+8])
723  AS2( mov [edx+4*ecx+8],esi)
724  AS2( mov esi,[eax+4*ecx+12])
725  AS2( sbb esi,[edi+4*ecx+12])
726  AS2( mov [edx+4*ecx+12],esi)
727 
728  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
729  ASJ( jmp, 0, b)
730 
731  ASL(2)
732  AS2( mov eax, 0)
733  AS1( setc al) // store carry into eax (return result register)
734 
735  AddEpilogue
736 }
737 
738 #if CRYPTOPP_INTEGER_SSE2
739 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
740 {
741  AddPrologue
742 
743  // now: eax = A, edi = B, edx = C, ecx = N
744  AS2( lea eax, [eax+4*ecx])
745  AS2( lea edi, [edi+4*ecx])
746  AS2( lea edx, [edx+4*ecx])
747 
748  AS1( neg ecx) // ecx is negative index
749  AS2( pxor mm2, mm2)
750  ASJ( jz, 2, f)
751  AS2( test ecx, 2) // this clears carry flag
752  ASJ( jz, 0, f)
753  AS2( sub ecx, 2)
754  ASJ( jmp, 1, f)
755 
756  ASL(0)
757  AS2( movd mm0, DWORD PTR [eax+4*ecx])
758  AS2( movd mm1, DWORD PTR [edi+4*ecx])
759  AS2( paddq mm0, mm1)
760  AS2( paddq mm2, mm0)
761  AS2( movd DWORD PTR [edx+4*ecx], mm2)
762  AS2( psrlq mm2, 32)
763 
764  AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
765  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
766  AS2( paddq mm0, mm1)
767  AS2( paddq mm2, mm0)
768  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
769  AS2( psrlq mm2, 32)
770 
771  ASL(1)
772  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
773  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
774  AS2( paddq mm0, mm1)
775  AS2( paddq mm2, mm0)
776  AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
777  AS2( psrlq mm2, 32)
778 
779  AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
780  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
781  AS2( paddq mm0, mm1)
782  AS2( paddq mm2, mm0)
783  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
784  AS2( psrlq mm2, 32)
785 
786  AS2( add ecx, 4)
787  ASJ( jnz, 0, b)
788 
789  ASL(2)
790  AS2( movd eax, mm2)
791  AS1( emms)
792 
793  AddEpilogue
794 }
795 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
796 {
797  AddPrologue
798 
799  // now: eax = A, edi = B, edx = C, ecx = N
800  AS2( lea eax, [eax+4*ecx])
801  AS2( lea edi, [edi+4*ecx])
802  AS2( lea edx, [edx+4*ecx])
803 
804  AS1( neg ecx) // ecx is negative index
805  AS2( pxor mm2, mm2)
806  ASJ( jz, 2, f)
807  AS2( test ecx, 2) // this clears carry flag
808  ASJ( jz, 0, f)
809  AS2( sub ecx, 2)
810  ASJ( jmp, 1, f)
811 
812  ASL(0)
813  AS2( movd mm0, DWORD PTR [eax+4*ecx])
814  AS2( movd mm1, DWORD PTR [edi+4*ecx])
815  AS2( psubq mm0, mm1)
816  AS2( psubq mm0, mm2)
817  AS2( movd DWORD PTR [edx+4*ecx], mm0)
818  AS2( psrlq mm0, 63)
819 
820  AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
821  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
822  AS2( psubq mm2, mm1)
823  AS2( psubq mm2, mm0)
824  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
825  AS2( psrlq mm2, 63)
826 
827  ASL(1)
828  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
829  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
830  AS2( psubq mm0, mm1)
831  AS2( psubq mm0, mm2)
832  AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
833  AS2( psrlq mm0, 63)
834 
835  AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
836  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
837  AS2( psubq mm2, mm1)
838  AS2( psubq mm2, mm0)
839  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
840  AS2( psrlq mm2, 63)
841 
842  AS2( add ecx, 4)
843  ASJ( jnz, 0, b)
844 
845  ASL(2)
846  AS2( movd eax, mm2)
847  AS1( emms)
848 
849  AddEpilogue
850 }
851 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
852 #else
853 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
854 {
855  CRYPTOPP_ASSERT (N%2 == 0);
856 
857  Declare2Words(u);
858  AssignWord(u, 0);
859  for (size_t i=0; i<N; i+=2)
860  {
861  AddWithCarry(u, A[i], B[i]);
862  C[i] = LowWord(u);
863  AddWithCarry(u, A[i+1], B[i+1]);
864  C[i+1] = LowWord(u);
865  }
866  return int(GetCarry(u));
867 }
868 
869 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
870 {
871  CRYPTOPP_ASSERT (N%2 == 0);
872 
873  Declare2Words(u);
874  AssignWord(u, 0);
875  for (size_t i=0; i<N; i+=2)
876  {
877  SubtractWithBorrow(u, A[i], B[i]);
878  C[i] = LowWord(u);
879  SubtractWithBorrow(u, A[i+1], B[i+1]);
880  C[i+1] = LowWord(u);
881  }
882  return int(GetBorrow(u));
883 }
884 #endif
885 
886 static word LinearMultiply(word *C, const word *AA, word B, size_t N)
887 {
888  // http://github.com/weidai11/cryptopp/issues/188
889  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
890 
891  word carry=0;
892  for(unsigned i=0; i<N; i++)
893  {
894  Declare2Words(p);
895  MultiplyWords(p, A[i], B);
896  Acc2WordsBy1(p, carry);
897  C[i] = LowWord(p);
898  carry = HighWord(p);
899  }
900  return carry;
901 }
902 
903 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
904 
905 #define Mul_2 \
906  Mul_Begin(2) \
907  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
908  Mul_End(1, 1)
909 
910 #define Mul_4 \
911  Mul_Begin(4) \
912  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
913  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
914  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
915  Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
916  Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
917  Mul_End(5, 3)
918 
919 #define Mul_8 \
920  Mul_Begin(8) \
921  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
922  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
923  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
924  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
925  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
926  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
927  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
928  Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
929  Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
930  Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
931  Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
932  Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
933  Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
934  Mul_End(13, 7)
935 
936 #define Mul_16 \
937  Mul_Begin(16) \
938  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
939  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
940  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
941  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
942  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
943  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
944  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
945  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
946  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
947  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
948  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
949  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
950  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
951  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
952  Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
953  Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
954  Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
955  Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
956  Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
957  Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
958  Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
959  Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
960  Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
961  Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
962  Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
963  Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
964  Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
965  Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
966  Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
967  Mul_End(29, 15)
968 
969 #define Squ_2 \
970  Squ_Begin(2) \
971  Squ_End(2)
972 
973 #define Squ_4 \
974  Squ_Begin(4) \
975  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
976  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
977  Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
978  Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
979  Squ_End(4)
980 
981 #define Squ_8 \
982  Squ_Begin(8) \
983  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
984  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
985  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
986  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
987  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
988  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
989  Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
990  Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
991  Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
992  Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
993  Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
994  Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
995  Squ_End(8)
996 
997 #define Squ_16 \
998  Squ_Begin(16) \
999  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1000  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1001  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1002  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1003  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1004  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1005  Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1006  Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1007  Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1008  Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1009  Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
1010  Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
1011  Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
1012  Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1013  Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1014  Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1015  Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1016  Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1017  Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1018  Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1019  Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1020  Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1021  Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1022  Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1023  Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1024  Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1025  Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1026  Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1027  Squ_End(16)
1028 
1029 #define Bot_2 \
1030  Mul_Begin(2) \
1031  Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1032  Bot_End(2)
1033 
1034 #define Bot_4 \
1035  Mul_Begin(4) \
1036  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1037  Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1038  Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1039  Bot_End(4)
1040 
1041 #define Bot_8 \
1042  Mul_Begin(8) \
1043  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1044  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1045  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1046  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1047  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1048  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1049  Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1050  Bot_End(8)
1051 
1052 #define Bot_16 \
1053  Mul_Begin(16) \
1054  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1055  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1056  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1057  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1058  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1059  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1060  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1061  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1062  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1063  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1064  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1065  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1066  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1067  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1068  Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1069  Bot_End(16)
1070 
1071 #endif
1072 
1073 #if 0
1074 #define Mul_Begin(n) \
1075  Declare2Words(p) \
1076  Declare2Words(c) \
1077  Declare2Words(d) \
1078  MultiplyWords(p, A[0], B[0]) \
1079  AssignWord(c, LowWord(p)) \
1080  AssignWord(d, HighWord(p))
1081 
1082 #define Mul_Acc(i, j) \
1083  MultiplyWords(p, A[i], B[j]) \
1084  Acc2WordsBy1(c, LowWord(p)) \
1085  Acc2WordsBy1(d, HighWord(p))
1086 
1087 #define Mul_SaveAcc(k, i, j) \
1088  R[k] = LowWord(c); \
1089  Add2WordsBy1(c, d, HighWord(c)) \
1090  MultiplyWords(p, A[i], B[j]) \
1091  AssignWord(d, HighWord(p)) \
1092  Acc2WordsBy1(c, LowWord(p))
1093 
1094 #define Mul_End(n) \
1095  R[2*n-3] = LowWord(c); \
1096  Acc2WordsBy1(d, HighWord(c)) \
1097  MultiplyWords(p, A[n-1], B[n-1])\
1098  Acc2WordsBy2(d, p) \
1099  R[2*n-2] = LowWord(d); \
1100  R[2*n-1] = HighWord(d);
1101 
1102 #define Bot_SaveAcc(k, i, j) \
1103  R[k] = LowWord(c); \
1104  word e = LowWord(d) + HighWord(c); \
1105  e += A[i] * B[j];
1106 
1107 #define Bot_Acc(i, j) \
1108  e += A[i] * B[j];
1109 
1110 #define Bot_End(n) \
1111  R[n-1] = e;
1112 #else
1113 #define Mul_Begin(n) \
1114  Declare2Words(p) \
1115  word c; \
1116  Declare2Words(d) \
1117  MultiplyWords(p, A[0], B[0]) \
1118  c = LowWord(p); \
1119  AssignWord(d, HighWord(p))
1120 
1121 #define Mul_Acc(i, j) \
1122  MulAcc(c, d, A[i], B[j])
1123 
1124 #define Mul_SaveAcc(k, i, j) \
1125  R[k] = c; \
1126  c = LowWord(d); \
1127  AssignWord(d, HighWord(d)) \
1128  MulAcc(c, d, A[i], B[j])
1129 
1130 #define Mul_End(k, i) \
1131  R[k] = c; \
1132  MultiplyWords(p, A[i], B[i]) \
1133  Acc2WordsBy2(p, d) \
1134  R[k+1] = LowWord(p); \
1135  R[k+2] = HighWord(p);
1136 
1137 #define Bot_SaveAcc(k, i, j) \
1138  R[k] = c; \
1139  c = LowWord(d); \
1140  c += A[i] * B[j];
1141 
1142 #define Bot_Acc(i, j) \
1143  c += A[i] * B[j];
1144 
1145 #define Bot_End(n) \
1146  R[n-1] = c;
1147 #endif
1148 
1149 #define Squ_Begin(n) \
1150  Declare2Words(p) \
1151  word c; \
1152  Declare2Words(d) \
1153  Declare2Words(e) \
1154  MultiplyWords(p, A[0], A[0]) \
1155  R[0] = LowWord(p); \
1156  AssignWord(e, HighWord(p)) \
1157  MultiplyWords(p, A[0], A[1]) \
1158  c = LowWord(p); \
1159  AssignWord(d, HighWord(p)) \
1160  Squ_NonDiag \
1161 
1162 #define Squ_NonDiag \
1163  Double3Words(c, d)
1164 
1165 #define Squ_SaveAcc(k, i, j) \
1166  Acc3WordsBy2(c, d, e) \
1167  R[k] = c; \
1168  MultiplyWords(p, A[i], A[j]) \
1169  c = LowWord(p); \
1170  AssignWord(d, HighWord(p)) \
1171 
1172 #define Squ_Acc(i, j) \
1173  MulAcc(c, d, A[i], A[j])
1174 
1175 #define Squ_Diag(i) \
1176  Squ_NonDiag \
1177  MulAcc(c, d, A[i], A[i])
1178 
1179 #define Squ_End(n) \
1180  Acc3WordsBy2(c, d, e) \
1181  R[2*n-3] = c; \
1182  MultiplyWords(p, A[n-1], A[n-1])\
1183  Acc2WordsBy2(p, e) \
1184  R[2*n-2] = LowWord(p); \
1185  R[2*n-1] = HighWord(p);
1186 
1187 
1188 void Baseline_Multiply2(word *R, const word *AA, const word *BB)
1189 {
1190  // http://github.com/weidai11/cryptopp/issues/188
1191  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1192  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1193 
1194  Mul_2
1195 }
1196 
1197 void Baseline_Multiply4(word *R, const word *AA, const word *BB)
1198 {
1199  // http://github.com/weidai11/cryptopp/issues/188
1200  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1201  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1202 
1203  Mul_4
1204 }
1205 
1206 void Baseline_Multiply8(word *R, const word *AA, const word *BB)
1207 {
1208  // http://github.com/weidai11/cryptopp/issues/188
1209  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1210  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1211 
1212  Mul_8
1213 }
1214 
1215 void Baseline_Square2(word *R, const word *AA)
1216 {
1217  // http://github.com/weidai11/cryptopp/issues/188
1218  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1219 
1220  Squ_2
1221 }
1222 
1223 void Baseline_Square4(word *R, const word *AA)
1224 {
1225  // http://github.com/weidai11/cryptopp/issues/188
1226  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1227 
1228  Squ_4
1229 }
1230 
1231 void Baseline_Square8(word *R, const word *AA)
1232 {
1233  // http://github.com/weidai11/cryptopp/issues/188
1234  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1235 
1236  Squ_8
1237 }
1238 
1239 void Baseline_MultiplyBottom2(word *R, const word *AA, const word *BB)
1240 {
1241  // http://github.com/weidai11/cryptopp/issues/188
1242  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1243  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1244 
1245  Bot_2
1246 }
1247 
1248 void Baseline_MultiplyBottom4(word *R, const word *AA, const word *BB)
1249 {
1250  // http://github.com/weidai11/cryptopp/issues/188
1251  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1252  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1253 
1254  Bot_4
1255 }
1256 
1257 void Baseline_MultiplyBottom8(word *R, const word *AA, const word *BB)
1258 {
1259  // http://github.com/weidai11/cryptopp/issues/188
1260  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1261  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1262 
1263  Bot_8
1264 }
1265 
1266 #define Top_Begin(n) \
1267  Declare2Words(p) \
1268  word c; \
1269  Declare2Words(d) \
1270  MultiplyWords(p, A[0], B[n-2]);\
1271  AssignWord(d, HighWord(p));
1272 
1273 #define Top_Acc(i, j) \
1274  MultiplyWords(p, A[i], B[j]);\
1275  Acc2WordsBy1(d, HighWord(p));
1276 
1277 #define Top_SaveAcc0(i, j) \
1278  c = LowWord(d); \
1279  AssignWord(d, HighWord(d)) \
1280  MulAcc(c, d, A[i], B[j])
1281 
1282 #define Top_SaveAcc1(i, j) \
1283  c = L<c; \
1284  Acc2WordsBy1(d, c); \
1285  c = LowWord(d); \
1286  AssignWord(d, HighWord(d)) \
1287  MulAcc(c, d, A[i], B[j])
1288 
1289 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1290 {
1291  CRYPTOPP_UNUSED(L);
1292  word T[4];
1293  Baseline_Multiply2(T, A, B);
1294  R[0] = T[2];
1295  R[1] = T[3];
1296 }
1297 
1298 void Baseline_MultiplyTop4(word *R, const word *AA, const word *BB, word L)
1299 {
1300  // http://github.com/weidai11/cryptopp/issues/188
1301  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1302  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1303 
1304  Top_Begin(4)
1305  Top_Acc(1, 1) Top_Acc(2, 0) \
1306  Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1307  Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1308  Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1309  Mul_End(1, 3)
1310 }
1311 
1312 void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1313 {
1314  // http://github.com/weidai11/cryptopp/issues/188
1315  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1316  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1317 
1318  Top_Begin(8)
1319  Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1320  Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1321  Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1322  Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1323  Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1324  Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1325  Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1326  Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1327  Mul_End(5, 7)
1328 }
1329 
1330 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1331 void Baseline_Multiply16(word *R, const word *AA, const word *BB)
1332 {
1333  // http://github.com/weidai11/cryptopp/issues/188
1334  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1335  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1336 
1337  Mul_16
1338 }
1339 
1340 void Baseline_Square16(word *R, const word *AA)
1341 {
1342  // http://github.com/weidai11/cryptopp/issues/188
1343  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1344 
1345  Squ_16
1346 }
1347 
1348 void Baseline_MultiplyBottom16(word *R, const word *AA, const word *BB)
1349 {
1350  // http://github.com/weidai11/cryptopp/issues/188
1351  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1352  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1353 
1354  Bot_16
1355 }
1356 
1357 void Baseline_MultiplyTop16(word *R, const word *AA, const word *BB, word L)
1358 {
1359  // http://github.com/weidai11/cryptopp/issues/188
1360  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1361  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1362 
1363  Top_Begin(16)
1364  Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1365  Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1366  Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1367  Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1368  Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1369  Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1370  Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1371  Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1372  Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1373  Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1374  Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1375  Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1376  Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1377  Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1378  Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1379  Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1380  Mul_End(13, 15)
1381 }
1382 #endif
1383 
1384 // ********************************************************
1385 
1386 #if CRYPTOPP_INTEGER_SSE2
1387 
1388 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1389 
1390 #undef Mul_Begin
1391 #undef Mul_Acc
1392 #undef Top_Begin
1393 #undef Top_Acc
1394 #undef Squ_Acc
1395 #undef Squ_NonDiag
1396 #undef Squ_Diag
1397 #undef Squ_SaveAcc
1398 #undef Squ_Begin
1399 #undef Mul_SaveAcc
1400 #undef Bot_Acc
1401 #undef Bot_SaveAcc
1402 #undef Bot_End
1403 #undef Squ_End
1404 #undef Mul_End
1405 
1406 #define SSE2_FinalSave(k) \
1407  AS2( psllq xmm5, 16) \
1408  AS2( paddq xmm4, xmm5) \
1409  AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1410 
1411 #define SSE2_SaveShift(k) \
1412  AS2( movq xmm0, xmm6) \
1413  AS2( punpckhqdq xmm6, xmm0) \
1414  AS2( movq xmm1, xmm7) \
1415  AS2( punpckhqdq xmm7, xmm1) \
1416  AS2( paddd xmm6, xmm0) \
1417  AS2( pslldq xmm6, 4) \
1418  AS2( paddd xmm7, xmm1) \
1419  AS2( paddd xmm4, xmm6) \
1420  AS2( pslldq xmm7, 4) \
1421  AS2( movq xmm6, xmm4) \
1422  AS2( paddd xmm5, xmm7) \
1423  AS2( movq xmm7, xmm5) \
1424  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1425  AS2( psrlq xmm6, 16) \
1426  AS2( paddq xmm6, xmm7) \
1427  AS2( punpckhqdq xmm4, xmm0) \
1428  AS2( punpckhqdq xmm5, xmm0) \
1429  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1430  AS2( psrlq xmm6, 3*16) \
1431  AS2( paddd xmm4, xmm6) \
1432 
1433 #define Squ_SSE2_SaveShift(k) \
1434  AS2( movq xmm0, xmm6) \
1435  AS2( punpckhqdq xmm6, xmm0) \
1436  AS2( movq xmm1, xmm7) \
1437  AS2( punpckhqdq xmm7, xmm1) \
1438  AS2( paddd xmm6, xmm0) \
1439  AS2( pslldq xmm6, 4) \
1440  AS2( paddd xmm7, xmm1) \
1441  AS2( paddd xmm4, xmm6) \
1442  AS2( pslldq xmm7, 4) \
1443  AS2( movhlps xmm6, xmm4) \
1444  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1445  AS2( paddd xmm5, xmm7) \
1446  AS2( movhps QWORD PTR [esp+12], xmm5)\
1447  AS2( psrlq xmm4, 16) \
1448  AS2( paddq xmm4, xmm5) \
1449  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1450  AS2( psrlq xmm4, 3*16) \
1451  AS2( paddd xmm4, xmm6) \
1452  AS2( movq QWORD PTR [esp+4], xmm4)\
1453 
1454 #define SSE2_FirstMultiply(i) \
1455  AS2( movdqa xmm7, [esi+(i)*16])\
1456  AS2( movdqa xmm5, [edi-(i)*16])\
1457  AS2( pmuludq xmm5, xmm7) \
1458  AS2( movdqa xmm4, [ebx])\
1459  AS2( movdqa xmm6, xmm4) \
1460  AS2( pand xmm4, xmm5) \
1461  AS2( psrld xmm5, 16) \
1462  AS2( pmuludq xmm7, [edx-(i)*16])\
1463  AS2( pand xmm6, xmm7) \
1464  AS2( psrld xmm7, 16)
1465 
1466 #define Squ_Begin(n) \
1467  SquPrologue \
1468  AS2( mov esi, esp)\
1469  AS2( and esp, 0xfffffff0)\
1470  AS2( lea edi, [esp-32*n])\
1471  AS2( sub esp, 32*n+16)\
1472  AS1( push esi)\
1473  AS2( mov esi, edi) \
1474  AS2( xor edx, edx) \
1475  ASL(1) \
1476  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1477  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1478  AS2( movdqa [edi+2*edx], xmm0) \
1479  AS2( psrlq xmm0, 32) \
1480  AS2( movdqa [edi+2*edx+16], xmm0) \
1481  AS2( movdqa [edi+16*n+2*edx], xmm1) \
1482  AS2( psrlq xmm1, 32) \
1483  AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1484  AS2( add edx, 16) \
1485  AS2( cmp edx, 8*(n)) \
1486  ASJ( jne, 1, b) \
1487  AS2( lea edx, [edi+16*n])\
1488  SSE2_FirstMultiply(0) \
1489 
1490 #define Squ_Acc(i) \
1491  ASL(LSqu##i) \
1492  AS2( movdqa xmm1, [esi+(i)*16]) \
1493  AS2( movdqa xmm0, [edi-(i)*16]) \
1494  AS2( movdqa xmm2, [ebx]) \
1495  AS2( pmuludq xmm0, xmm1) \
1496  AS2( pmuludq xmm1, [edx-(i)*16]) \
1497  AS2( movdqa xmm3, xmm2) \
1498  AS2( pand xmm2, xmm0) \
1499  AS2( psrld xmm0, 16) \
1500  AS2( paddd xmm4, xmm2) \
1501  AS2( paddd xmm5, xmm0) \
1502  AS2( pand xmm3, xmm1) \
1503  AS2( psrld xmm1, 16) \
1504  AS2( paddd xmm6, xmm3) \
1505  AS2( paddd xmm7, xmm1) \
1506 
1507 #define Squ_Acc1(i)
1508 #define Squ_Acc2(i) ASC(call, LSqu##i)
1509 #define Squ_Acc3(i) Squ_Acc2(i)
1510 #define Squ_Acc4(i) Squ_Acc2(i)
1511 #define Squ_Acc5(i) Squ_Acc2(i)
1512 #define Squ_Acc6(i) Squ_Acc2(i)
1513 #define Squ_Acc7(i) Squ_Acc2(i)
1514 #define Squ_Acc8(i) Squ_Acc2(i)
1515 
1516 #define SSE2_End(E, n) \
1517  SSE2_SaveShift(2*(n)-3) \
1518  AS2( movdqa xmm7, [esi+16]) \
1519  AS2( movdqa xmm0, [edi]) \
1520  AS2( pmuludq xmm0, xmm7) \
1521  AS2( movdqa xmm2, [ebx]) \
1522  AS2( pmuludq xmm7, [edx]) \
1523  AS2( movdqa xmm6, xmm2) \
1524  AS2( pand xmm2, xmm0) \
1525  AS2( psrld xmm0, 16) \
1526  AS2( paddd xmm4, xmm2) \
1527  AS2( paddd xmm5, xmm0) \
1528  AS2( pand xmm6, xmm7) \
1529  AS2( psrld xmm7, 16) \
1530  SSE2_SaveShift(2*(n)-2) \
1531  SSE2_FinalSave(2*(n)-1) \
1532  AS1( pop esp)\
1533  E
1534 
1535 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1536 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1537 #define Top_End(n) SSE2_End(TopEpilogue, n)
1538 
1539 #define Squ_Column1(k, i) \
1540  Squ_SSE2_SaveShift(k) \
1541  AS2( add esi, 16) \
1542  SSE2_FirstMultiply(1)\
1543  Squ_Acc##i(i) \
1544  AS2( paddd xmm4, xmm4) \
1545  AS2( paddd xmm5, xmm5) \
1546  AS2( movdqa xmm3, [esi]) \
1547  AS2( movq xmm1, QWORD PTR [esi+8]) \
1548  AS2( pmuludq xmm1, xmm3) \
1549  AS2( pmuludq xmm3, xmm3) \
1550  AS2( movdqa xmm0, [ebx])\
1551  AS2( movdqa xmm2, xmm0) \
1552  AS2( pand xmm0, xmm1) \
1553  AS2( psrld xmm1, 16) \
1554  AS2( paddd xmm6, xmm0) \
1555  AS2( paddd xmm7, xmm1) \
1556  AS2( pand xmm2, xmm3) \
1557  AS2( psrld xmm3, 16) \
1558  AS2( paddd xmm6, xmm6) \
1559  AS2( paddd xmm7, xmm7) \
1560  AS2( paddd xmm4, xmm2) \
1561  AS2( paddd xmm5, xmm3) \
1562  AS2( movq xmm0, QWORD PTR [esp+4])\
1563  AS2( movq xmm1, QWORD PTR [esp+12])\
1564  AS2( paddd xmm4, xmm0)\
1565  AS2( paddd xmm5, xmm1)\
1566 
1567 #define Squ_Column0(k, i) \
1568  Squ_SSE2_SaveShift(k) \
1569  AS2( add edi, 16) \
1570  AS2( add edx, 16) \
1571  SSE2_FirstMultiply(1)\
1572  Squ_Acc##i(i) \
1573  AS2( paddd xmm6, xmm6) \
1574  AS2( paddd xmm7, xmm7) \
1575  AS2( paddd xmm4, xmm4) \
1576  AS2( paddd xmm5, xmm5) \
1577  AS2( movq xmm0, QWORD PTR [esp+4])\
1578  AS2( movq xmm1, QWORD PTR [esp+12])\
1579  AS2( paddd xmm4, xmm0)\
1580  AS2( paddd xmm5, xmm1)\
1581 
1582 #define SSE2_MulAdd45 \
1583  AS2( movdqa xmm7, [esi]) \
1584  AS2( movdqa xmm0, [edi]) \
1585  AS2( pmuludq xmm0, xmm7) \
1586  AS2( movdqa xmm2, [ebx]) \
1587  AS2( pmuludq xmm7, [edx]) \
1588  AS2( movdqa xmm6, xmm2) \
1589  AS2( pand xmm2, xmm0) \
1590  AS2( psrld xmm0, 16) \
1591  AS2( paddd xmm4, xmm2) \
1592  AS2( paddd xmm5, xmm0) \
1593  AS2( pand xmm6, xmm7) \
1594  AS2( psrld xmm7, 16)
1595 
1596 #define Mul_Begin(n) \
1597  MulPrologue \
1598  AS2( mov esi, esp)\
1599  AS2( and esp, 0xfffffff0)\
1600  AS2( sub esp, 48*n+16)\
1601  AS1( push esi)\
1602  AS2( xor edx, edx) \
1603  ASL(1) \
1604  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1605  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1606  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1607  AS2( movdqa [esp+20+2*edx], xmm0) \
1608  AS2( psrlq xmm0, 32) \
1609  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1610  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1611  AS2( psrlq xmm1, 32) \
1612  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1613  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1614  AS2( psrlq xmm2, 32) \
1615  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1616  AS2( add edx, 16) \
1617  AS2( cmp edx, 8*(n)) \
1618  ASJ( jne, 1, b) \
1619  AS2( lea edi, [esp+20])\
1620  AS2( lea edx, [esp+20+16*n])\
1621  AS2( lea esi, [esp+20+32*n])\
1622  SSE2_FirstMultiply(0) \
1623 
1624 #define Mul_Acc(i) \
1625  ASL(LMul##i) \
1626  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1627  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1628  AS2( movdqa xmm2, [ebx]) \
1629  AS2( pmuludq xmm0, xmm1) \
1630  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1631  AS2( movdqa xmm3, xmm2) \
1632  AS2( pand xmm2, xmm0) \
1633  AS2( psrld xmm0, 16) \
1634  AS2( paddd xmm4, xmm2) \
1635  AS2( paddd xmm5, xmm0) \
1636  AS2( pand xmm3, xmm1) \
1637  AS2( psrld xmm1, 16) \
1638  AS2( paddd xmm6, xmm3) \
1639  AS2( paddd xmm7, xmm1) \
1640 
1641 #define Mul_Acc1(i)
1642 #define Mul_Acc2(i) ASC(call, LMul##i)
1643 #define Mul_Acc3(i) Mul_Acc2(i)
1644 #define Mul_Acc4(i) Mul_Acc2(i)
1645 #define Mul_Acc5(i) Mul_Acc2(i)
1646 #define Mul_Acc6(i) Mul_Acc2(i)
1647 #define Mul_Acc7(i) Mul_Acc2(i)
1648 #define Mul_Acc8(i) Mul_Acc2(i)
1649 #define Mul_Acc9(i) Mul_Acc2(i)
1650 #define Mul_Acc10(i) Mul_Acc2(i)
1651 #define Mul_Acc11(i) Mul_Acc2(i)
1652 #define Mul_Acc12(i) Mul_Acc2(i)
1653 #define Mul_Acc13(i) Mul_Acc2(i)
1654 #define Mul_Acc14(i) Mul_Acc2(i)
1655 #define Mul_Acc15(i) Mul_Acc2(i)
1656 #define Mul_Acc16(i) Mul_Acc2(i)
1657 
1658 #define Mul_Column1(k, i) \
1659  SSE2_SaveShift(k) \
1660  AS2( add esi, 16) \
1661  SSE2_MulAdd45\
1662  Mul_Acc##i(i) \
1663 
1664 #define Mul_Column0(k, i) \
1665  SSE2_SaveShift(k) \
1666  AS2( add edi, 16) \
1667  AS2( add edx, 16) \
1668  SSE2_MulAdd45\
1669  Mul_Acc##i(i) \
1670 
1671 #define Bot_Acc(i) \
1672  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1673  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1674  AS2( pmuludq xmm0, xmm1) \
1675  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1676  AS2( paddq xmm4, xmm0) \
1677  AS2( paddd xmm6, xmm1)
1678 
1679 #define Bot_SaveAcc(k) \
1680  SSE2_SaveShift(k) \
1681  AS2( add edi, 16) \
1682  AS2( add edx, 16) \
1683  AS2( movdqa xmm6, [esi]) \
1684  AS2( movdqa xmm0, [edi]) \
1685  AS2( pmuludq xmm0, xmm6) \
1686  AS2( paddq xmm4, xmm0) \
1687  AS2( psllq xmm5, 16) \
1688  AS2( paddq xmm4, xmm5) \
1689  AS2( pmuludq xmm6, [edx])
1690 
1691 #define Bot_End(n) \
1692  AS2( movhlps xmm7, xmm6) \
1693  AS2( paddd xmm6, xmm7) \
1694  AS2( psllq xmm6, 32) \
1695  AS2( paddd xmm4, xmm6) \
1696  AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1697  AS1( pop esp)\
1698  MulEpilogue
1699 
1700 #define Top_Begin(n) \
1701  TopPrologue \
1702  AS2( mov edx, esp)\
1703  AS2( and esp, 0xfffffff0)\
1704  AS2( sub esp, 48*n+16)\
1705  AS1( push edx)\
1706  AS2( xor edx, edx) \
1707  ASL(1) \
1708  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1709  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1710  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1711  AS2( movdqa [esp+20+2*edx], xmm0) \
1712  AS2( psrlq xmm0, 32) \
1713  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1714  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1715  AS2( psrlq xmm1, 32) \
1716  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1717  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1718  AS2( psrlq xmm2, 32) \
1719  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1720  AS2( add edx, 16) \
1721  AS2( cmp edx, 8*(n)) \
1722  ASJ( jne, 1, b) \
1723  AS2( mov eax, esi) \
1724  AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1725  AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1726  AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1727  AS2( pxor xmm4, xmm4)\
1728  AS2( pxor xmm5, xmm5)
1729 
1730 #define Top_Acc(i) \
1731  AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1732  AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1733  AS2( psrlq xmm0, 48) \
1734  AS2( paddd xmm5, xmm0)\
1735 
1736 #define Top_Column0(i) \
1737  AS2( psllq xmm5, 32) \
1738  AS2( add edi, 16) \
1739  AS2( add edx, 16) \
1740  SSE2_MulAdd45\
1741  Mul_Acc##i(i) \
1742 
1743 #define Top_Column1(i) \
1744  SSE2_SaveShift(0) \
1745  AS2( add esi, 16) \
1746  SSE2_MulAdd45\
1747  Mul_Acc##i(i) \
1748  AS2( shr eax, 16) \
1749  AS2( movd xmm0, eax)\
1750  AS2( movd xmm1, [ecx+4])\
1751  AS2( psrld xmm1, 16)\
1752  AS2( pcmpgtd xmm1, xmm0)\
1753  AS2( psrld xmm1, 31)\
1754  AS2( paddd xmm4, xmm1)\
1755 
1756 void SSE2_Square4(word *C, const word *A)
1757 {
1758  Squ_Begin(2)
1759  Squ_Column0(0, 1)
1760  Squ_End(2)
1761 }
1762 
1763 void SSE2_Square8(word *C, const word *A)
1764 {
1765  Squ_Begin(4)
1766 #ifndef __GNUC__
1767  ASJ( jmp, 0, f)
1768  Squ_Acc(2)
1769  AS1( ret) ASL(0)
1770 #endif
1771  Squ_Column0(0, 1)
1772  Squ_Column1(1, 1)
1773  Squ_Column0(2, 2)
1774  Squ_Column1(3, 1)
1775  Squ_Column0(4, 1)
1776  Squ_End(4)
1777 }
1778 
1779 void SSE2_Square16(word *C, const word *A)
1780 {
1781  Squ_Begin(8)
1782 #ifndef __GNUC__
1783  ASJ( jmp, 0, f)
1784  Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1785  AS1( ret) ASL(0)
1786 #endif
1787  Squ_Column0(0, 1)
1788  Squ_Column1(1, 1)
1789  Squ_Column0(2, 2)
1790  Squ_Column1(3, 2)
1791  Squ_Column0(4, 3)
1792  Squ_Column1(5, 3)
1793  Squ_Column0(6, 4)
1794  Squ_Column1(7, 3)
1795  Squ_Column0(8, 3)
1796  Squ_Column1(9, 2)
1797  Squ_Column0(10, 2)
1798  Squ_Column1(11, 1)
1799  Squ_Column0(12, 1)
1800  Squ_End(8)
1801 }
1802 
1803 void SSE2_Square32(word *C, const word *A)
1804 {
1805  Squ_Begin(16)
1806  ASJ( jmp, 0, f)
1807  Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1808  AS1( ret) ASL(0)
1809  Squ_Column0(0, 1)
1810  Squ_Column1(1, 1)
1811  Squ_Column0(2, 2)
1812  Squ_Column1(3, 2)
1813  Squ_Column0(4, 3)
1814  Squ_Column1(5, 3)
1815  Squ_Column0(6, 4)
1816  Squ_Column1(7, 4)
1817  Squ_Column0(8, 5)
1818  Squ_Column1(9, 5)
1819  Squ_Column0(10, 6)
1820  Squ_Column1(11, 6)
1821  Squ_Column0(12, 7)
1822  Squ_Column1(13, 7)
1823  Squ_Column0(14, 8)
1824  Squ_Column1(15, 7)
1825  Squ_Column0(16, 7)
1826  Squ_Column1(17, 6)
1827  Squ_Column0(18, 6)
1828  Squ_Column1(19, 5)
1829  Squ_Column0(20, 5)
1830  Squ_Column1(21, 4)
1831  Squ_Column0(22, 4)
1832  Squ_Column1(23, 3)
1833  Squ_Column0(24, 3)
1834  Squ_Column1(25, 2)
1835  Squ_Column0(26, 2)
1836  Squ_Column1(27, 1)
1837  Squ_Column0(28, 1)
1838  Squ_End(16)
1839 }
1840 
1841 void SSE2_Multiply4(word *C, const word *A, const word *B)
1842 {
1843  Mul_Begin(2)
1844 #ifndef __GNUC__
1845  ASJ( jmp, 0, f)
1846  Mul_Acc(2)
1847  AS1( ret) ASL(0)
1848 #endif
1849  Mul_Column0(0, 2)
1850  Mul_End(2)
1851 }
1852 
1853 void SSE2_Multiply8(word *C, const word *A, const word *B)
1854 {
1855  Mul_Begin(4)
1856 #ifndef __GNUC__
1857  ASJ( jmp, 0, f)
1858  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1859  AS1( ret) ASL(0)
1860 #endif
1861  Mul_Column0(0, 2)
1862  Mul_Column1(1, 3)
1863  Mul_Column0(2, 4)
1864  Mul_Column1(3, 3)
1865  Mul_Column0(4, 2)
1866  Mul_End(4)
1867 }
1868 
1869 void SSE2_Multiply16(word *C, const word *A, const word *B)
1870 {
1871  Mul_Begin(8)
1872 #ifndef __GNUC__
1873  ASJ( jmp, 0, f)
1874  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1875  AS1( ret) ASL(0)
1876 #endif
1877  Mul_Column0(0, 2)
1878  Mul_Column1(1, 3)
1879  Mul_Column0(2, 4)
1880  Mul_Column1(3, 5)
1881  Mul_Column0(4, 6)
1882  Mul_Column1(5, 7)
1883  Mul_Column0(6, 8)
1884  Mul_Column1(7, 7)
1885  Mul_Column0(8, 6)
1886  Mul_Column1(9, 5)
1887  Mul_Column0(10, 4)
1888  Mul_Column1(11, 3)
1889  Mul_Column0(12, 2)
1890  Mul_End(8)
1891 }
1892 
1893 void SSE2_Multiply32(word *C, const word *A, const word *B)
1894 {
1895  Mul_Begin(16)
1896  ASJ( jmp, 0, f)
1897  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1898  AS1( ret) ASL(0)
1899  Mul_Column0(0, 2)
1900  Mul_Column1(1, 3)
1901  Mul_Column0(2, 4)
1902  Mul_Column1(3, 5)
1903  Mul_Column0(4, 6)
1904  Mul_Column1(5, 7)
1905  Mul_Column0(6, 8)
1906  Mul_Column1(7, 9)
1907  Mul_Column0(8, 10)
1908  Mul_Column1(9, 11)
1909  Mul_Column0(10, 12)
1910  Mul_Column1(11, 13)
1911  Mul_Column0(12, 14)
1912  Mul_Column1(13, 15)
1913  Mul_Column0(14, 16)
1914  Mul_Column1(15, 15)
1915  Mul_Column0(16, 14)
1916  Mul_Column1(17, 13)
1917  Mul_Column0(18, 12)
1918  Mul_Column1(19, 11)
1919  Mul_Column0(20, 10)
1920  Mul_Column1(21, 9)
1921  Mul_Column0(22, 8)
1922  Mul_Column1(23, 7)
1923  Mul_Column0(24, 6)
1924  Mul_Column1(25, 5)
1925  Mul_Column0(26, 4)
1926  Mul_Column1(27, 3)
1927  Mul_Column0(28, 2)
1928  Mul_End(16)
1929 }
1930 
1931 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
1932 {
1933  Mul_Begin(2)
1934  Bot_SaveAcc(0) Bot_Acc(2)
1935  Bot_End(2)
1936 }
1937 
1938 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
1939 {
1940  Mul_Begin(4)
1941 #ifndef __GNUC__
1942  ASJ( jmp, 0, f)
1943  Mul_Acc(3) Mul_Acc(2)
1944  AS1( ret) ASL(0)
1945 #endif
1946  Mul_Column0(0, 2)
1947  Mul_Column1(1, 3)
1948  Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1949  Bot_End(4)
1950 }
1951 
1952 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
1953 {
1954  Mul_Begin(8)
1955 #ifndef __GNUC__
1956  ASJ( jmp, 0, f)
1957  Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1958  AS1( ret) ASL(0)
1959 #endif
1960  Mul_Column0(0, 2)
1961  Mul_Column1(1, 3)
1962  Mul_Column0(2, 4)
1963  Mul_Column1(3, 5)
1964  Mul_Column0(4, 6)
1965  Mul_Column1(5, 7)
1966  Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1967  Bot_End(8)
1968 }
1969 
1970 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
1971 {
1972  Mul_Begin(16)
1973 #ifndef __GNUC__
1974  ASJ( jmp, 0, f)
1975  Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1976  AS1( ret) ASL(0)
1977 #endif
1978  Mul_Column0(0, 2)
1979  Mul_Column1(1, 3)
1980  Mul_Column0(2, 4)
1981  Mul_Column1(3, 5)
1982  Mul_Column0(4, 6)
1983  Mul_Column1(5, 7)
1984  Mul_Column0(6, 8)
1985  Mul_Column1(7, 9)
1986  Mul_Column0(8, 10)
1987  Mul_Column1(9, 11)
1988  Mul_Column0(10, 12)
1989  Mul_Column1(11, 13)
1990  Mul_Column0(12, 14)
1991  Mul_Column1(13, 15)
1992  Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1993  Bot_End(16)
1994 }
1995 
1996 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
1997 {
1998  Top_Begin(4)
1999  Top_Acc(3) Top_Acc(2) Top_Acc(1)
2000 #ifndef __GNUC__
2001  ASJ( jmp, 0, f)
2002  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2003  AS1( ret) ASL(0)
2004 #endif
2005  Top_Column0(4)
2006  Top_Column1(3)
2007  Mul_Column0(0, 2)
2008  Top_End(2)
2009 }
2010 
2011 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
2012 {
2013  Top_Begin(8)
2014  Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2015 #ifndef __GNUC__
2016  ASJ( jmp, 0, f)
2017  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2018  AS1( ret) ASL(0)
2019 #endif
2020  Top_Column0(8)
2021  Top_Column1(7)
2022  Mul_Column0(0, 6)
2023  Mul_Column1(1, 5)
2024  Mul_Column0(2, 4)
2025  Mul_Column1(3, 3)
2026  Mul_Column0(4, 2)
2027  Top_End(4)
2028 }
2029 
2030 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2031 {
2032  Top_Begin(16)
2033  Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2034 #ifndef __GNUC__
2035  ASJ( jmp, 0, f)
2036  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2037  AS1( ret) ASL(0)
2038 #endif
2039  Top_Column0(16)
2040  Top_Column1(15)
2041  Mul_Column0(0, 14)
2042  Mul_Column1(1, 13)
2043  Mul_Column0(2, 12)
2044  Mul_Column1(3, 11)
2045  Mul_Column0(4, 10)
2046  Mul_Column1(5, 9)
2047  Mul_Column0(6, 8)
2048  Mul_Column1(7, 7)
2049  Mul_Column0(8, 6)
2050  Mul_Column1(9, 5)
2051  Mul_Column0(10, 4)
2052  Mul_Column1(11, 3)
2053  Mul_Column0(12, 2)
2054  Top_End(8)
2055 }
2056 
2057 #endif // #if CRYPTOPP_INTEGER_SSE2
2058 
2059 // ********************************************************
2060 
2061 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
2062 typedef void (* PMul)(word *C, const word *A, const word *B);
2063 typedef void (* PSqu)(word *C, const word *A);
2064 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
2065 
2066 #if CRYPTOPP_INTEGER_SSE2
2067 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2068 static size_t s_recursionLimit = 8;
2069 #else
2070 static const size_t s_recursionLimit = 16;
2071 #endif
2072 
2073 static PMul s_pMul[9], s_pBot[9];
2074 static PSqu s_pSqu[9];
2075 static PMulTop s_pTop[9];
2076 
2077 static void SetFunctionPointers()
2078 {
2079  s_pMul[0] = &Baseline_Multiply2;
2080  s_pBot[0] = &Baseline_MultiplyBottom2;
2081  s_pSqu[0] = &Baseline_Square2;
2082  s_pTop[0] = &Baseline_MultiplyTop2;
2083  s_pTop[1] = &Baseline_MultiplyTop4;
2084 
2085 #if CRYPTOPP_INTEGER_SSE2
2086  if (HasSSE2())
2087  {
2088 #if _MSC_VER != 1200 || !(CRYPTOPP_DEBUG)
2089  if (IsP4())
2090  {
2091  s_pAdd = &SSE2_Add;
2092  s_pSub = &SSE2_Sub;
2093  }
2094 #endif
2095 
2096  s_recursionLimit = 32;
2097 
2098  s_pMul[1] = &SSE2_Multiply4;
2099  s_pMul[2] = &SSE2_Multiply8;
2100  s_pMul[4] = &SSE2_Multiply16;
2101  s_pMul[8] = &SSE2_Multiply32;
2102 
2103  s_pBot[1] = &SSE2_MultiplyBottom4;
2104  s_pBot[2] = &SSE2_MultiplyBottom8;
2105  s_pBot[4] = &SSE2_MultiplyBottom16;
2106  s_pBot[8] = &SSE2_MultiplyBottom32;
2107 
2108  s_pSqu[1] = &SSE2_Square4;
2109  s_pSqu[2] = &SSE2_Square8;
2110  s_pSqu[4] = &SSE2_Square16;
2111  s_pSqu[8] = &SSE2_Square32;
2112 
2113  s_pTop[2] = &SSE2_MultiplyTop8;
2114  s_pTop[4] = &SSE2_MultiplyTop16;
2115  s_pTop[8] = &SSE2_MultiplyTop32;
2116  }
2117  else
2118 #endif
2119  {
2120  s_pMul[1] = &Baseline_Multiply4;
2121  s_pMul[2] = &Baseline_Multiply8;
2122 
2123  s_pBot[1] = &Baseline_MultiplyBottom4;
2124  s_pBot[2] = &Baseline_MultiplyBottom8;
2125 
2126  s_pSqu[1] = &Baseline_Square4;
2127  s_pSqu[2] = &Baseline_Square8;
2128 
2129  s_pTop[2] = &Baseline_MultiplyTop8;
2130 
2131 #if !CRYPTOPP_INTEGER_SSE2
2132  s_pMul[4] = &Baseline_Multiply16;
2133  s_pBot[4] = &Baseline_MultiplyBottom16;
2134  s_pSqu[4] = &Baseline_Square16;
2135  s_pTop[4] = &Baseline_MultiplyTop16;
2136 #endif
2137  }
2138 }
2139 
2140 inline int Add(word *C, const word *A, const word *B, size_t N)
2141 {
2142 #if CRYPTOPP_INTEGER_SSE2
2143  return s_pAdd(N, C, A, B);
2144 #else
2145  return Baseline_Add(N, C, A, B);
2146 #endif
2147 }
2148 
2149 inline int Subtract(word *C, const word *A, const word *B, size_t N)
2150 {
2151 #if CRYPTOPP_INTEGER_SSE2
2152  return s_pSub(N, C, A, B);
2153 #else
2154  return Baseline_Sub(N, C, A, B);
2155 #endif
2156 }
2157 
2158 // ********************************************************
2159 
2160 
2161 #define A0 A
2162 #define A1 (A+N2)
2163 #define B0 B
2164 #define B1 (B+N2)
2165 
2166 #define T0 T
2167 #define T1 (T+N2)
2168 #define T2 (T+N)
2169 #define T3 (T+N+N2)
2170 
2171 #define R0 R
2172 #define R1 (R+N2)
2173 #define R2 (R+N)
2174 #define R3 (R+N+N2)
2175 
2176 // R[2*N] - result = A*B
2177 // T[2*N] - temporary work space
2178 // A[N] --- multiplier
2179 // B[N] --- multiplicant
2180 
2181 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2182 {
2183  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2184 
2185  if (N <= s_recursionLimit)
2186  s_pMul[N/4](R, A, B);
2187  else
2188  {
2189  const size_t N2 = N/2;
2190 
2191  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2192  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2193 
2194  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2195  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2196 
2197  RecursiveMultiply(R2, T2, A1, B1, N2);
2198  RecursiveMultiply(T0, T2, R0, R1, N2);
2199  RecursiveMultiply(R0, T2, A0, B0, N2);
2200 
2201  // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2202 
2203  int c2 = Add(R2, R2, R1, N2);
2204  int c3 = c2;
2205  c2 += Add(R1, R2, R0, N2);
2206  c3 += Add(R2, R2, R3, N2);
2207 
2208  if (AN2 == BN2)
2209  c3 -= Subtract(R1, R1, T0, N);
2210  else
2211  c3 += Add(R1, R1, T0, N);
2212 
2213  c3 += Increment(R2, N2, c2);
2214  CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2215  Increment(R3, N2, c3);
2216  }
2217 }
2218 
2219 // R[2*N] - result = A*A
2220 // T[2*N] - temporary work space
2221 // A[N] --- number to be squared
2222 
2223 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2224 {
2225  CRYPTOPP_ASSERT(N && N%2==0);
2226 
2227  if (N <= s_recursionLimit)
2228  s_pSqu[N/4](R, A);
2229  else
2230  {
2231  const size_t N2 = N/2;
2232 
2233  RecursiveSquare(R0, T2, A0, N2);
2234  RecursiveSquare(R2, T2, A1, N2);
2235  RecursiveMultiply(T0, T2, A0, A1, N2);
2236 
2237  int carry = Add(R1, R1, T0, N);
2238  carry += Add(R1, R1, T0, N);
2239  Increment(R3, N2, carry);
2240  }
2241 }
2242 
2243 // R[N] - bottom half of A*B
2244 // T[3*N/2] - temporary work space
2245 // A[N] - multiplier
2246 // B[N] - multiplicant
2247 
2248 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2249 {
2250  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2251 
2252  if (N <= s_recursionLimit)
2253  s_pBot[N/4](R, A, B);
2254  else
2255  {
2256  const size_t N2 = N/2;
2257 
2258  RecursiveMultiply(R, T, A0, B0, N2);
2259  RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2260  Add(R1, R1, T0, N2);
2261  RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2262  Add(R1, R1, T0, N2);
2263  }
2264 }
2265 
2266 // R[N] --- upper half of A*B
2267 // T[2*N] - temporary work space
2268 // L[N] --- lower half of A*B
2269 // A[N] --- multiplier
2270 // B[N] --- multiplicant
2271 
2272 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2273 {
2274  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2275 
2276  if (N <= s_recursionLimit)
2277  s_pTop[N/4](R, A, B, L[N-1]);
2278  else
2279  {
2280  const size_t N2 = N/2;
2281 
2282  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2283  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2284 
2285  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2286  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2287 
2288  RecursiveMultiply(T0, T2, R0, R1, N2);
2289  RecursiveMultiply(R0, T2, A1, B1, N2);
2290 
2291  // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2292 
2293  int t, c3;
2294  int c2 = Subtract(T2, L+N2, L, N2);
2295 
2296  if (AN2 == BN2)
2297  {
2298  c2 -= Add(T2, T2, T0, N2);
2299  t = (Compare(T2, R0, N2) == -1);
2300  c3 = t - Subtract(T2, T2, T1, N2);
2301  }
2302  else
2303  {
2304  c2 += Subtract(T2, T2, T0, N2);
2305  t = (Compare(T2, R0, N2) == -1);
2306  c3 = t + Add(T2, T2, T1, N2);
2307  }
2308 
2309  c2 += t;
2310  if (c2 >= 0)
2311  c3 += Increment(T2, N2, c2);
2312  else
2313  c3 -= Decrement(T2, N2, -c2);
2314  c3 += Add(R0, T2, R1, N2);
2315 
2316  CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2317  Increment(R1, N2, c3);
2318  }
2319 }
2320 
2321 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2322 {
2323  RecursiveMultiply(R, T, A, B, N);
2324 }
2325 
2326 inline void Square(word *R, word *T, const word *A, size_t N)
2327 {
2328  RecursiveSquare(R, T, A, N);
2329 }
2330 
2331 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2332 {
2333  RecursiveMultiplyBottom(R, T, A, B, N);
2334 }
2335 
2336 // R[NA+NB] - result = A*B
2337 // T[NA+NB] - temporary work space
2338 // A[NA] ---- multiplier
2339 // B[NB] ---- multiplicant
2340 
2341 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2342 {
2343  if (NA == NB)
2344  {
2345  if (A == B)
2346  Square(R, T, A, NA);
2347  else
2348  Multiply(R, T, A, B, NA);
2349 
2350  return;
2351  }
2352 
2353  if (NA > NB)
2354  {
2355  std::swap(A, B);
2356  std::swap(NA, NB);
2357  }
2358 
2359  CRYPTOPP_ASSERT(NB % NA == 0);
2360 
2361  if (NA==2 && !A[1])
2362  {
2363  switch (A[0])
2364  {
2365  case 0:
2366  SetWords(R, 0, NB+2);
2367  return;
2368  case 1:
2369  CopyWords(R, B, NB);
2370  R[NB] = R[NB+1] = 0;
2371  return;
2372  default:
2373  R[NB] = LinearMultiply(R, B, A[0], NB);
2374  R[NB+1] = 0;
2375  return;
2376  }
2377  }
2378 
2379  size_t i;
2380  if ((NB/NA)%2 == 0)
2381  {
2382  Multiply(R, T, A, B, NA);
2383  CopyWords(T+2*NA, R+NA, NA);
2384 
2385  for (i=2*NA; i<NB; i+=2*NA)
2386  Multiply(T+NA+i, T, A, B+i, NA);
2387  for (i=NA; i<NB; i+=2*NA)
2388  Multiply(R+i, T, A, B+i, NA);
2389  }
2390  else
2391  {
2392  for (i=0; i<NB; i+=2*NA)
2393  Multiply(R+i, T, A, B+i, NA);
2394  for (i=NA; i<NB; i+=2*NA)
2395  Multiply(T+NA+i, T, A, B+i, NA);
2396  }
2397 
2398  if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2399  Increment(R+NB, NA);
2400 }
2401 
2402 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2403 // T[3*N/2] - temporary work space
2404 // A[N] ----- an odd number as input
2405 
2406 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2407 {
2408  if (N==2)
2409  {
2410  T[0] = AtomicInverseModPower2(A[0]);
2411  T[1] = 0;
2412  s_pBot[0](T+2, T, A);
2413  TwosComplement(T+2, 2);
2414  Increment(T+2, 2, 2);
2415  s_pBot[0](R, T, T+2);
2416  }
2417  else
2418  {
2419  const size_t N2 = N/2;
2420  RecursiveInverseModPower2(R0, T0, A0, N2);
2421  T0[0] = 1;
2422  SetWords(T0+1, 0, N2-1);
2423  MultiplyTop(R1, T1, T0, R0, A0, N2);
2424  MultiplyBottom(T0, T1, R0, A1, N2);
2425  Add(T0, R1, T0, N2);
2426  TwosComplement(T0, N2);
2427  MultiplyBottom(R1, T1, R0, T0, N2);
2428  }
2429 }
2430 
2431 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2432 // T[3*N] - temporary work space
2433 // X[2*N] - number to be reduced
2434 // M[N] --- modulus
2435 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2436 
2437 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2438 {
2439 #if 1
2440  MultiplyBottom(R, T, X, U, N);
2441  MultiplyTop(T, T+N, X, R, M, N);
2442  word borrow = Subtract(T, X+N, T, N);
2443  // defend against timing attack by doing this Add even when not needed
2444  word carry = Add(T+N, T, M, N);
2445  CRYPTOPP_ASSERT(carry | !borrow);
2446  CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2447  CopyWords(R, T + ((0-borrow) & N), N);
2448 #elif 0
2449  const word u = 0-U[0];
2450  Declare2Words(p)
2451  for (size_t i=0; i<N; i++)
2452  {
2453  const word t = u * X[i];
2454  word c = 0;
2455  for (size_t j=0; j<N; j+=2)
2456  {
2457  MultiplyWords(p, t, M[j]);
2458  Acc2WordsBy1(p, X[i+j]);
2459  Acc2WordsBy1(p, c);
2460  X[i+j] = LowWord(p);
2461  c = HighWord(p);
2462  MultiplyWords(p, t, M[j+1]);
2463  Acc2WordsBy1(p, X[i+j+1]);
2464  Acc2WordsBy1(p, c);
2465  X[i+j+1] = LowWord(p);
2466  c = HighWord(p);
2467  }
2468 
2469  if (Increment(X+N+i, N-i, c))
2470  while (!Subtract(X+N, X+N, M, N)) {}
2471  }
2472 
2473  memcpy(R, X+N, N*WORD_SIZE);
2474 #else
2475  __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2476  for (size_t i=0; i<N; i++)
2477  {
2478  __m64 t = _mm_cvtsi32_si64(X[i]);
2479  t = _mm_mul_su32(t, u);
2480  __m64 c = _mm_setzero_si64();
2481  for (size_t j=0; j<N; j+=2)
2482  {
2483  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2484  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2485  c = _mm_add_si64(c, p);
2486  X[i+j] = _mm_cvtsi64_si32(c);
2487  c = _mm_srli_si64(c, 32);
2488  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2489  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2490  c = _mm_add_si64(c, p);
2491  X[i+j+1] = _mm_cvtsi64_si32(c);
2492  c = _mm_srli_si64(c, 32);
2493  }
2494 
2495  if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2496  while (!Subtract(X+N, X+N, M, N)) {}
2497  }
2498 
2499  memcpy(R, X+N, N*WORD_SIZE);
2500  _mm_empty();
2501 #endif
2502 }
2503 
2504 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2505 // T[2*N] - temporary work space
2506 // X[2*N] - number to be reduced
2507 // M[N] --- modulus
2508 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2509 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
2510 
2511 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2512 {
2513  CRYPTOPP_ASSERT(N%2==0 && N>=4);
2514 
2515 #define M0 M
2516 #define M1 (M+N2)
2517 #define V0 V
2518 #define V1 (V+N2)
2519 
2520 #define X0 X
2521 #define X1 (X+N2)
2522 #define X2 (X+N)
2523 #define X3 (X+N+N2)
2524 
2525  const size_t N2 = N/2;
2526  Multiply(T0, T2, V0, X3, N2);
2527  int c2 = Add(T0, T0, X0, N);
2528  MultiplyBottom(T3, T2, T0, U, N2);
2529  MultiplyTop(T2, R, T0, T3, M0, N2);
2530  c2 -= Subtract(T2, T1, T2, N2);
2531  Multiply(T0, R, T3, M1, N2);
2532  c2 -= Subtract(T0, T2, T0, N2);
2533  int c3 = -(int)Subtract(T1, X2, T1, N2);
2534  Multiply(R0, T2, V1, X3, N2);
2535  c3 += Add(R, R, T, N);
2536 
2537  if (c2>0)
2538  c3 += Increment(R1, N2);
2539  else if (c2<0)
2540  c3 -= Decrement(R1, N2, -c2);
2541 
2542  CRYPTOPP_ASSERT(c3>=-1 && c3<=1);
2543  if (c3>0)
2544  Subtract(R, R, M, N);
2545  else if (c3<0)
2546  Add(R, R, M, N);
2547 
2548 #undef M0
2549 #undef M1
2550 #undef V0
2551 #undef V1
2552 
2553 #undef X0
2554 #undef X1
2555 #undef X2
2556 #undef X3
2557 }
2558 
2559 #undef A0
2560 #undef A1
2561 #undef B0
2562 #undef B1
2563 
2564 #undef T0
2565 #undef T1
2566 #undef T2
2567 #undef T3
2568 
2569 #undef R0
2570 #undef R1
2571 #undef R2
2572 #undef R3
2573 
2574 /*
2575 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2576 static word SubatomicDivide(word *A, word B0, word B1)
2577 {
2578  // CRYPTOPP_ASSERT {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2579  CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2580 
2581  // estimate the quotient: do a 2 word by 1 word divide
2582  word Q;
2583  if (B1+1 == 0)
2584  Q = A[2];
2585  else
2586  Q = DWord(A[1], A[2]).DividedBy(B1+1);
2587 
2588  // now subtract Q*B from A
2589  DWord p = DWord::Multiply(B0, Q);
2590  DWord u = (DWord) A[0] - p.GetLowHalf();
2591  A[0] = u.GetLowHalf();
2592  u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2593  A[1] = u.GetLowHalf();
2594  A[2] += u.GetHighHalf();
2595 
2596  // Q <= actual quotient, so fix it
2597  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2598  {
2599  u = (DWord) A[0] - B0;
2600  A[0] = u.GetLowHalf();
2601  u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2602  A[1] = u.GetLowHalf();
2603  A[2] += u.GetHighHalf();
2604  Q++;
2605  CRYPTOPP_ASSERT(Q); // shouldn't overflow
2606  }
2607 
2608  return Q;
2609 }
2610 
2611 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2612 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2613 {
2614  if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2615  {
2616  Q[0] = A[2];
2617  Q[1] = A[3];
2618  }
2619  else
2620  {
2621  word T[4];
2622  T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2623  Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2624  Q[0] = SubatomicDivide(T, B[0], B[1]);
2625 
2626 #if CRYPTOPP_DEBUG
2627  // multiply quotient and divisor and add remainder, make sure it equals dividend
2628  CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2629  word P[4];
2630  LowLevel::Multiply2(P, Q, B);
2631  Add(P, P, T, 4);
2632  CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2633 #endif
2634  }
2635 }
2636 */
2637 
2638 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2639 {
2640  word T[4];
2641  DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2642  Q[0] = q.GetLowHalf();
2643  Q[1] = q.GetHighHalf();
2644 
2645 #if CRYPTOPP_DEBUG
2646  if (B[0] || B[1])
2647  {
2648  // multiply quotient and divisor and add remainder, make sure it equals dividend
2649  CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2650  word P[4];
2651  s_pMul[0](P, Q, B);
2652  Add(P, P, T, 4);
2653  CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2654  }
2655 #endif
2656 }
2657 
2658 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2659 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2660 {
2661  CRYPTOPP_ASSERT(N && N%2==0);
2662 
2663  AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2664 
2665  word borrow = Subtract(R, R, T, N+2);
2666  CRYPTOPP_ASSERT(!borrow && !R[N+1]);
2667  CRYPTOPP_UNUSED(borrow);
2668 
2669  while (R[N] || Compare(R, B, N) >= 0)
2670  {
2671  R[N] -= Subtract(R, R, B, N);
2672  Q[1] += (++Q[0]==0);
2673  CRYPTOPP_ASSERT(Q[0] || Q[1]); // no overflow
2674  }
2675 }
2676 
2677 // R[NB] -------- remainder = A%B
2678 // Q[NA-NB+2] --- quotient = A/B
2679 // T[NA+3*(NB+2)] - temp work space
2680 // A[NA] -------- dividend
2681 // B[NB] -------- divisor
2682 
2683 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2684 {
2685  CRYPTOPP_ASSERT(NA && NB && NA%2==0 && NB%2==0);
2686  CRYPTOPP_ASSERT(B[NB-1] || B[NB-2]);
2687  CRYPTOPP_ASSERT(NB <= NA);
2688 
2689  // set up temporary work space
2690  word *const TA=T;
2691  word *const TB=T+NA+2;
2692  word *const TP=T+NA+2+NB;
2693 
2694  // copy B into TB and normalize it so that TB has highest bit set to 1
2695  unsigned shiftWords = (B[NB-1]==0);
2696  TB[0] = TB[NB-1] = 0;
2697  CopyWords(TB+shiftWords, B, NB-shiftWords);
2698  unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2699  CRYPTOPP_ASSERT(shiftBits < WORD_BITS);
2700  ShiftWordsLeftByBits(TB, NB, shiftBits);
2701 
2702  // copy A into TA and normalize it
2703  TA[0] = TA[NA] = TA[NA+1] = 0;
2704  CopyWords(TA+shiftWords, A, NA);
2705  ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2706 
2707  if (TA[NA+1]==0 && TA[NA] <= 1)
2708  {
2709  Q[NA-NB+1] = Q[NA-NB] = 0;
2710  while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2711  {
2712  TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2713  ++Q[NA-NB];
2714  }
2715  }
2716  else
2717  {
2718  NA+=2;
2719  CRYPTOPP_ASSERT(Compare(TA+NA-NB, TB, NB) < 0);
2720  }
2721 
2722  word BT[2];
2723  BT[0] = TB[NB-2] + 1;
2724  BT[1] = TB[NB-1] + (BT[0]==0);
2725 
2726  // start reducing TA mod TB, 2 words at a time
2727  for (size_t i=NA-2; i>=NB; i-=2)
2728  {
2729  AtomicDivide(Q+i-NB, TA+i-2, BT);
2730  CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2731  }
2732 
2733  // copy TA into R, and denormalize it
2734  CopyWords(R, TA+shiftWords, NB);
2735  ShiftWordsRightByBits(R, NB, shiftBits);
2736 }
2737 
2738 static inline size_t EvenWordCount(const word *X, size_t N)
2739 {
2740  while (N && X[N-2]==0 && X[N-1]==0)
2741  N-=2;
2742  return N;
2743 }
2744 
2745 // return k
2746 // R[N] --- result = A^(-1) * 2^k mod M
2747 // T[4*N] - temporary work space
2748 // A[NA] -- number to take inverse of
2749 // M[N] --- modulus
2750 
2751 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2752 {
2753  CRYPTOPP_ASSERT(NA<=N && N && N%2==0);
2754 
2755  word *b = T;
2756  word *c = T+N;
2757  word *f = T+2*N;
2758  word *g = T+3*N;
2759  size_t bcLen=2, fgLen=EvenWordCount(M, N);
2760  unsigned int k=0;
2761  bool s=false;
2762 
2763  SetWords(T, 0, 3*N);
2764  b[0]=1;
2765  CopyWords(f, A, NA);
2766  CopyWords(g, M, N);
2767 
2768  while (1)
2769  {
2770  word t=f[0];
2771  while (!t)
2772  {
2773  if (EvenWordCount(f, fgLen)==0)
2774  {
2775  SetWords(R, 0, N);
2776  return 0;
2777  }
2778 
2779  ShiftWordsRightByWords(f, fgLen, 1);
2780  bcLen += 2 * (c[bcLen-1] != 0);
2781  CRYPTOPP_ASSERT(bcLen <= N);
2782  ShiftWordsLeftByWords(c, bcLen, 1);
2783  k+=WORD_BITS;
2784  t=f[0];
2785  }
2786 
2787  // t must be non-0; otherwise undefined.
2788  unsigned int i = TrailingZeros(t);
2789  t >>= i;
2790  k += i;
2791 
2792  if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2793  {
2794  if (s)
2795  Subtract(R, M, b, N);
2796  else
2797  CopyWords(R, b, N);
2798  return k;
2799  }
2800 
2801  ShiftWordsRightByBits(f, fgLen, i);
2802  t = ShiftWordsLeftByBits(c, bcLen, i);
2803  c[bcLen] += t;
2804  bcLen += 2 * (t!=0);
2805  CRYPTOPP_ASSERT(bcLen <= N);
2806 
2807  bool swap = Compare(f, g, fgLen)==-1;
2808  ConditionalSwapPointers(swap, f, g);
2809  ConditionalSwapPointers(swap, b, c);
2810  s ^= swap;
2811 
2812  fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2813 
2814  Subtract(f, f, g, fgLen);
2815  t = Add(b, b, c, bcLen);
2816  b[bcLen] += t;
2817  bcLen += 2*t;
2818  CRYPTOPP_ASSERT(bcLen <= N);
2819  }
2820 }
2821 
2822 // R[N] - result = A/(2^k) mod M
2823 // A[N] - input
2824 // M[N] - modulus
2825 
2826 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2827 {
2828  CopyWords(R, A, N);
2829 
2830  while (k--)
2831  {
2832  if (R[0]%2==0)
2833  ShiftWordsRightByBits(R, N, 1);
2834  else
2835  {
2836  word carry = Add(R, R, M, N);
2837  ShiftWordsRightByBits(R, N, 1);
2838  R[N-1] += carry<<(WORD_BITS-1);
2839  }
2840  }
2841 }
2842 
2843 // R[N] - result = A*(2^k) mod M
2844 // A[N] - input
2845 // M[N] - modulus
2846 
2847 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2848 {
2849  CopyWords(R, A, N);
2850 
2851  while (k--)
2852  if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2853  Subtract(R, R, M, N);
2854 }
2855 
2856 // ******************************************************************
2857 
2858 InitializeInteger::InitializeInteger()
2859 {
2860  if (!g_pAssignIntToInteger)
2861  {
2862  SetFunctionPointers();
2863  g_pAssignIntToInteger = (CryptoPP::PAssignIntToInteger)AssignIntToInteger;
2864  }
2865 }
2866 
2867 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2868 
2869 static inline size_t RoundupSize(size_t n)
2870 {
2871  if (n<=8)
2872  return RoundupSizeTable[n];
2873  else if (n<=16)
2874  return 16;
2875  else if (n<=32)
2876  return 32;
2877  else if (n<=64)
2878  return 64;
2879  else return size_t(1) << BitPrecision(n-1);
2880 }
2881 
2883  : reg(2), sign(POSITIVE)
2884 {
2885  reg[0] = reg[1] = 0;
2886 }
2887 
2889  : reg(RoundupSize(t.WordCount())), sign(t.sign)
2890 {
2891  CopyWords(reg, t.reg, reg.size());
2892 }
2893 
2894 Integer::Integer(Sign s, lword value)
2895  : reg(2), sign(s)
2896 {
2897  reg[0] = word(value);
2898  reg[1] = word(SafeRightShift<WORD_BITS>(value));
2899 }
2900 
2901 Integer::Integer(signed long value)
2902  : reg(2)
2903 {
2904  if (value >= 0)
2905  sign = POSITIVE;
2906  else
2907  {
2908  sign = NEGATIVE;
2909  value = -value;
2910  }
2911  reg[0] = word(value);
2912  reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2913 }
2914 
2915 Integer::Integer(Sign s, word high, word low)
2916  : reg(2), sign(s)
2917 {
2918  reg[0] = low;
2919  reg[1] = high;
2920 }
2921 
2923 {
2924  if (ByteCount() > sizeof(long))
2925  return false;
2926 
2927  unsigned long value = (unsigned long)reg[0];
2928  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2929 
2930  if (sign==POSITIVE)
2931  return (signed long)value >= 0;
2932  else
2933  return -(signed long)value < 0;
2934 }
2935 
2936 signed long Integer::ConvertToLong() const
2937 {
2939 
2940  unsigned long value = (unsigned long)reg[0];
2941  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2942  return sign==POSITIVE ? value : -(signed long)value;
2943 }
2944 
2945 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2946 {
2948 
2949  if(o == LITTLE_ENDIAN_ORDER)
2950  {
2951  SecByteBlock block(byteCount);
2952  encodedInteger.Get(block, block.size());
2953  std::reverse(block.begin(), block.begin()+block.size());
2954 
2955  Decode(block.begin(), block.size(), s);
2956  return;
2957  }
2958 
2959  Decode(encodedInteger, byteCount, s);
2960 }
2961 
2962 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2963 {
2965 
2966  if(o == LITTLE_ENDIAN_ORDER)
2967  {
2968  SecByteBlock block(byteCount);
2969 #if (CRYPTOPP_MSC_VERSION >= 1400)
2970  std::reverse_copy(encodedInteger, encodedInteger+byteCount,
2971  stdext::make_checked_array_iterator(block.begin(), block.size()));
2972 #else
2973  std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
2974 #endif
2975  Decode(block.begin(), block.size(), s);
2976  return;
2977  }
2978 
2979  Decode(encodedInteger, byteCount, s);
2980 }
2981 
2983 {
2984  BERDecode(bt);
2985 }
2986 
2988 {
2989  Randomize(rng, bitcount);
2990 }
2991 
2992 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
2993 {
2994  if (!Randomize(rng, min, max, rnType, equiv, mod))
2996 }
2997 
2999 {
3000  Integer r((word)0, BitsToWords(e+1));
3001  r.SetBit(e);
3002  return r;
3003 }
3004 
3005 template <long i>
3007 {
3008  Integer * operator()() const
3009  {
3010  return new Integer(i);
3011  }
3012 };
3013 
3015 {
3016  return Singleton<Integer>().Ref();
3017 }
3018 
3020 {
3021  return Singleton<Integer, NewInteger<1> >().Ref();
3022 }
3023 
3025 {
3026  return Singleton<Integer, NewInteger<2> >().Ref();
3027 }
3028 
3029 bool Integer::operator!() const
3030 {
3031  return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
3032 }
3033 
3034 Integer& Integer::operator=(const Integer& t)
3035 {
3036  if (this != &t)
3037  {
3038  if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
3039  reg.New(RoundupSize(t.WordCount()));
3040  CopyWords(reg, t.reg, reg.size());
3041  sign = t.sign;
3042  }
3043  return *this;
3044 }
3045 
3046 bool Integer::GetBit(size_t n) const
3047 {
3048  if (n/WORD_BITS >= reg.size())
3049  return 0;
3050  else
3051  return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3052 }
3053 
3054 void Integer::SetBit(size_t n, bool value)
3055 {
3056  if (value)
3057  {
3058  reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
3059  reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3060  }
3061  else
3062  {
3063  if (n/WORD_BITS < reg.size())
3064  reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3065  }
3066 }
3067 
3068 byte Integer::GetByte(size_t n) const
3069 {
3070  if (n/WORD_SIZE >= reg.size())
3071  return 0;
3072  else
3073  return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3074 }
3075 
3076 void Integer::SetByte(size_t n, byte value)
3077 {
3078  reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3079  reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3080  reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3081 }
3082 
3083 lword Integer::GetBits(size_t i, size_t n) const
3084 {
3085  lword v = 0;
3086  CRYPTOPP_ASSERT(n <= sizeof(v)*8);
3087  for (unsigned int j=0; j<n; j++)
3088  v |= lword(GetBit(i+j)) << j;
3089  return v;
3090 }
3091 
3092 Integer Integer::operator-() const
3093 {
3094  Integer result(*this);
3095  result.Negate();
3096  return result;
3097 }
3098 
3099 Integer Integer::AbsoluteValue() const
3100 {
3101  Integer result(*this);
3102  result.sign = POSITIVE;
3103  return result;
3104 }
3105 
3107 {
3108  reg.swap(a.reg);
3109  std::swap(sign, a.sign);
3110 }
3111 
3112 Integer::Integer(word value, size_t length)
3113  : reg(RoundupSize(length)), sign(POSITIVE)
3114 {
3115  reg[0] = value;
3116  SetWords(reg+1, 0, reg.size()-1);
3117 }
3118 
3119 template <class T>
3120 static Integer StringToInteger(const T *str, ByteOrder order)
3121 {
3122  CRYPTOPP_ASSERT( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3123 
3124  int radix, sign = 1;
3125  // GCC workaround
3126  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3127  unsigned int length;
3128  for (length = 0; str[length] != 0; length++) {}
3129 
3130  Integer v;
3131 
3132  if (length == 0)
3133  return Integer::Zero();
3134 
3135  switch (str[length-1])
3136  {
3137  case 'h':
3138  case 'H':
3139  radix=16;
3140  break;
3141  case 'o':
3142  case 'O':
3143  radix=8;
3144  break;
3145  case 'b':
3146  case 'B':
3147  radix=2;
3148  break;
3149  default:
3150  radix=10;
3151  }
3152 
3153  // 'str' is of length 1 or more
3154  if (str[0] == '-')
3155  {
3156  sign = -1;
3157  str += 1, length -= 1;
3158  }
3159 
3160  if (length > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
3161  {
3162  radix = 16;
3163  str += 2, length -= 2;
3164  }
3165 
3166  if(order == BIG_ENDIAN_ORDER)
3167  {
3168  for (unsigned int i=0; i<length; i++)
3169  {
3170  int digit, ch = static_cast<int>(str[i]);
3171 
3172  if (ch >= '0' && ch <= '9')
3173  digit = ch - '0';
3174  else if (ch >= 'A' && ch <= 'F')
3175  digit = ch - 'A' + 10;
3176  else if (ch >= 'a' && ch <= 'f')
3177  digit = ch - 'a' + 10;
3178  else
3179  digit = radix;
3180 
3181  if (digit < radix)
3182  {
3183  v *= radix;
3184  v += digit;
3185  }
3186  }
3187  }
3188  else if(radix == 16 && order == LITTLE_ENDIAN_ORDER)
3189  {
3190  // Nibble high, low and count
3191  unsigned int nh = 0, nl = 0, nc = 0;
3192  Integer position(Integer::One());
3193 
3194  for (unsigned int i=0; i<length; i++)
3195  {
3196  int digit, ch = static_cast<int>(str[i]);
3197 
3198  if (ch >= '0' && ch <= '9')
3199  digit = ch - '0';
3200  else if (ch >= 'A' && ch <= 'F')
3201  digit = ch - 'A' + 10;
3202  else if (ch >= 'a' && ch <= 'f')
3203  digit = ch - 'a' + 10;
3204  else
3205  digit = radix;
3206 
3207  if (digit < radix)
3208  {
3209  if(nc++ == 0)
3210  nh = digit;
3211  else
3212  nl = digit;
3213 
3214  if(nc == 2)
3215  {
3216  v += position * (nh << 4 | nl);
3217  nc = 0, position <<= 8;
3218  }
3219  }
3220  }
3221 
3222  if(nc == 1)
3223  v += nh * position;
3224  }
3225  else // LITTLE_ENDIAN_ORDER && radix != 16
3226  {
3227  for (int i=static_cast<int>(length)-1; i>=0; i--)
3228  {
3229  int digit, ch = static_cast<int>(str[i]);
3230 
3231  if (ch >= '0' && ch <= '9')
3232  digit = ch - '0';
3233  else if (ch >= 'A' && ch <= 'F')
3234  digit = ch - 'A' + 10;
3235  else if (ch >= 'a' && ch <= 'f')
3236  digit = ch - 'a' + 10;
3237  else
3238  digit = radix;
3239 
3240  if (digit < radix)
3241  {
3242  v *= radix;
3243  v += digit;
3244  }
3245  }
3246  }
3247 
3248  if (sign == -1)
3249  v.Negate();
3250 
3251  return v;
3252 }
3253 
3254 Integer::Integer(const char *str, ByteOrder order)
3255  : reg(2), sign(POSITIVE)
3256 {
3257  *this = StringToInteger(str,order);
3258 }
3259 
3260 Integer::Integer(const wchar_t *str, ByteOrder order)
3261  : reg(2), sign(POSITIVE)
3262 {
3263  *this = StringToInteger(str,order);
3264 }
3265 
3266 unsigned int Integer::WordCount() const
3267 {
3268  return (unsigned int)CountWords(reg, reg.size());
3269 }
3270 
3271 unsigned int Integer::ByteCount() const
3272 {
3273  unsigned wordCount = WordCount();
3274  if (wordCount)
3275  return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3276  else
3277  return 0;
3278 }
3279 
3280 unsigned int Integer::BitCount() const
3281 {
3282  unsigned wordCount = WordCount();
3283  if (wordCount)
3284  return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3285  else
3286  return 0;
3287 }
3288 
3289 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3290 {
3291  StringStore store(input, inputLen);
3292  Decode(store, inputLen, s);
3293 }
3294 
3296 {
3297  CRYPTOPP_ASSERT(bt.MaxRetrievable() >= inputLen);
3298 
3299  byte b;
3300  bt.Peek(b);
3301  sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3302 
3303  while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3304  {
3305  bt.Skip(1);
3306  inputLen--;
3307  bt.Peek(b);
3308  }
3309 
3310  // The call to CleanNew is optimized away above -O0/-Og.
3311  const size_t size = RoundupSize(BytesToWords(inputLen));
3312  reg.CleanNew(size);
3313 
3314  CRYPTOPP_ASSERT(reg.SizeInBytes() >= inputLen);
3315  for (size_t i=inputLen; i > 0; i--)
3316  {
3317  bt.Get(b);
3318  reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3319  }
3320 
3321  if (sign == NEGATIVE)
3322  {
3323  for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3324  reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3325  TwosComplement(reg, reg.size());
3326  }
3327 }
3328 
3329 size_t Integer::MinEncodedSize(Signedness signedness) const
3330 {
3331  unsigned int outputLen = STDMAX(1U, ByteCount());
3332  if (signedness == UNSIGNED)
3333  return outputLen;
3334  if (NotNegative() && (GetByte(outputLen-1) & 0x80))
3335  outputLen++;
3336  if (IsNegative() && *this < -Power2(outputLen*8-1))
3337  outputLen++;
3338  return outputLen;
3339 }
3340 
3341 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3342 {
3343  CRYPTOPP_ASSERT(output && outputLen);
3344  ArraySink sink(output, outputLen);
3345  Encode(sink, outputLen, signedness);
3346 }
3347 
3348 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3349 {
3350  if (signedness == UNSIGNED || NotNegative())
3351  {
3352  for (size_t i=outputLen; i > 0; i--)
3353  bt.Put(GetByte(i-1));
3354  }
3355  else
3356  {
3357  // take two's complement of *this
3358  Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3359  temp.Encode(bt, outputLen, UNSIGNED);
3360  }
3361 }
3362 
3364 {
3365  DERGeneralEncoder enc(bt, INTEGER);
3367  enc.MessageEnd();
3368 }
3369 
3370 void Integer::BERDecode(const byte *input, size_t len)
3371 {
3372  StringStore store(input, len);
3373  BERDecode(store);
3374 }
3375 
3377 {
3378  BERGeneralDecoder dec(bt, INTEGER);
3379  if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3380  BERDecodeError();
3381  Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3382  dec.MessageEnd();
3383 }
3384 
3386 {
3387  DERGeneralEncoder enc(bt, OCTET_STRING);
3388  Encode(enc, length);
3389  enc.MessageEnd();
3390 }
3391 
3393 {
3394  BERGeneralDecoder dec(bt, OCTET_STRING);
3395  if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3396  BERDecodeError();
3397  Decode(dec, length);
3398  dec.MessageEnd();
3399 }
3400 
3401 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
3402 {
3403  ArraySink sink(output, len);
3404  return OpenPGPEncode(sink);
3405 }
3406 
3408 {
3409  word16 bitCount = word16(BitCount());
3410  bt.PutWord16(bitCount);
3411  size_t byteCount = BitsToBytes(bitCount);
3412  Encode(bt, byteCount);
3413  return 2 + byteCount;
3414 }
3415 
3416 void Integer::OpenPGPDecode(const byte *input, size_t len)
3417 {
3418  StringStore store(input, len);
3419  OpenPGPDecode(store);
3420 }
3421 
3423 {
3424  word16 bitCount;
3425  if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3426  throw OpenPGPDecodeErr();
3427  Decode(bt, BitsToBytes(bitCount));
3428 }
3429 
3431 {
3432  const size_t nbytes = nbits/8 + 1;
3433  SecByteBlock buf(nbytes);
3434  rng.GenerateBlock(buf, nbytes);
3435  if (nbytes)
3436  buf[0] = (byte)Crop(buf[0], nbits % 8);
3437  Decode(buf, nbytes, UNSIGNED);
3438 }
3439 
3440 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3441 {
3442  if (min > max)
3443  throw InvalidArgument("Integer: Min must be no greater than Max");
3444 
3445  Integer range = max - min;
3446  const unsigned int nbits = range.BitCount();
3447 
3448  do
3449  {
3450  Randomize(rng, nbits);
3451  }
3452  while (*this > range);
3453 
3454  *this += min;
3455 }
3456 
3457 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3458 {
3459  return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3460 }
3461 
3463 {
3464 public:
3465  KDF2_RNG(const byte *seed, size_t seedSize)
3466  : m_counter(0), m_counterAndSeed(seedSize + 4)
3467  {
3468  memcpy(m_counterAndSeed + 4, seed, seedSize);
3469  }
3470 
3471  void GenerateBlock(byte *output, size_t size)
3472  {
3473  PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3474  ++m_counter;
3475  P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
3476  }
3477 
3478 private:
3479  word32 m_counter;
3480  SecByteBlock m_counterAndSeed;
3481 };
3482 
3483 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
3484 {
3485  Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3486  Integer max;
3487  if (!params.GetValue("Max", max))
3488  {
3489  int bitLength;
3490  if (params.GetIntValue("BitLength", bitLength))
3491  max = Integer::Power2(bitLength);
3492  else
3493  throw InvalidArgument("Integer: missing Max argument");
3494  }
3495  if (min > max)
3496  throw InvalidArgument("Integer: Min must be no greater than Max");
3497 
3498  Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3499  Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3500 
3501  if (equiv.IsNegative() || equiv >= mod)
3502  throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3503 
3504  Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3505 
3506  member_ptr<KDF2_RNG> kdf2Rng;
3508  if (params.GetValue(Name::Seed(), seed))
3509  {
3510  ByteQueue bq;
3511  DERSequenceEncoder seq(bq);
3512  min.DEREncode(seq);
3513  max.DEREncode(seq);
3514  equiv.DEREncode(seq);
3515  mod.DEREncode(seq);
3516  DEREncodeUnsigned(seq, rnType);
3517  DEREncodeOctetString(seq, seed.begin(), seed.size());
3518  seq.MessageEnd();
3519 
3520  SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3521  bq.Get(finalSeed, finalSeed.size());
3522  kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3523  }
3524  RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3525 
3526  switch (rnType)
3527  {
3528  case ANY:
3529  if (mod == One())
3530  Randomize(rng, min, max);
3531  else
3532  {
3533  Integer min1 = min + (equiv-min)%mod;
3534  if (max < min1)
3535  return false;
3536  Randomize(rng, Zero(), (max - min1) / mod);
3537  *this *= mod;
3538  *this += min1;
3539  }
3540  return true;
3541 
3542  case PRIME:
3543  {
3544  const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
3545 
3546  int i;
3547  i = 0;
3548  while (1)
3549  {
3550  if (++i==16)
3551  {
3552  // check if there are any suitable primes in [min, max]
3553  Integer first = min;
3554  if (FirstPrime(first, max, equiv, mod, pSelector))
3555  {
3556  // if there is only one suitable prime, we're done
3557  *this = first;
3558  if (!FirstPrime(first, max, equiv, mod, pSelector))
3559  return true;
3560  }
3561  else
3562  return false;
3563  }
3564 
3565  Randomize(rng, min, max);
3566  if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3567  return true;
3568  }
3569  }
3570 
3571  default:
3572  throw InvalidArgument("Integer: invalid RandomNumberType argument");
3573  }
3574 }
3575 
3576 std::istream& operator>>(std::istream& in, Integer &a)
3577 {
3578  char c;
3579  unsigned int length = 0;
3580  SecBlock<char> str(length + 16);
3581 
3582  std::ws(in);
3583 
3584  do
3585  {
3586  in.read(&c, 1);
3587  str[length++] = c;
3588  if (length >= str.size())
3589  str.Grow(length + 16);
3590  }
3591  while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3592 
3593  if (in.gcount())
3594  in.putback(c);
3595  str[length-1] = '\0';
3596  a = Integer(str);
3597 
3598  return in;
3599 }
3600 
3601 std::ostream& operator<<(std::ostream& out, const Integer &a)
3602 {
3603  // Get relevant conversion specifications from ostream.
3604  const long f = out.flags() & std::ios::basefield; // Get base digits.
3605  int base, block;
3606  char suffix;
3607  switch(f)
3608  {
3609  case std::ios::oct :
3610  base = 8;
3611  block = 8;
3612  suffix = 'o';
3613  break;
3614  case std::ios::hex :
3615  base = 16;
3616  block = 4;
3617  suffix = 'h';
3618  break;
3619  default :
3620  base = 10;
3621  block = 3;
3622  suffix = '.';
3623  }
3624 
3625  Integer temp1=a, temp2;
3626 
3627  if (a.IsNegative())
3628  {
3629  out << '-';
3630  temp1.Negate();
3631  }
3632 
3633  if (!a)
3634  out << '0';
3635 
3636  static const char upper[]="0123456789ABCDEF";
3637  static const char lower[]="0123456789abcdef";
3638 
3639  const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3640  unsigned int i=0;
3641  SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3642 
3643  while (!!temp1)
3644  {
3645  word digit;
3646  Integer::Divide(digit, temp2, temp1, base);
3647  s[i++]=vec[digit];
3648  temp1.swap(temp2);
3649  }
3650 
3651  while (i--)
3652  {
3653  out << s[i];
3654 // if (i && !(i%block))
3655 // out << ",";
3656  }
3657 
3658 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3659  if(out.flags() & std::ios_base::showbase)
3660  out << suffix;
3661 
3662  return out;
3663 #else
3664  return out << suffix;
3665 #endif
3666 }
3667 
3668 Integer& Integer::operator++()
3669 {
3670  if (NotNegative())
3671  {
3672  if (Increment(reg, reg.size()))
3673  {
3674  reg.CleanGrow(2*reg.size());
3675  reg[reg.size()/2]=1;
3676  }
3677  }
3678  else
3679  {
3680  word borrow = Decrement(reg, reg.size());
3681  CRYPTOPP_ASSERT(!borrow);
3682  CRYPTOPP_UNUSED(borrow);
3683 
3684  if (WordCount()==0)
3685  *this = Zero();
3686  }
3687  return *this;
3688 }
3689 
3690 Integer& Integer::operator--()
3691 {
3692  if (IsNegative())
3693  {
3694  if (Increment(reg, reg.size()))
3695  {
3696  reg.CleanGrow(2*reg.size());
3697  reg[reg.size()/2]=1;
3698  }
3699  }
3700  else
3701  {
3702  if (Decrement(reg, reg.size()))
3703  *this = -One();
3704  }
3705  return *this;
3706 }
3707 
3708 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3709 {
3710  int carry;
3711  if (a.reg.size() == b.reg.size())
3712  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3713  else if (a.reg.size() > b.reg.size())
3714  {
3715  carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3716  CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3717  carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3718  }
3719  else
3720  {
3721  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3722  CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3723  carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3724  }
3725 
3726  if (carry)
3727  {
3728  sum.reg.CleanGrow(2*sum.reg.size());
3729  sum.reg[sum.reg.size()/2] = 1;
3730  }
3731  sum.sign = Integer::POSITIVE;
3732 }
3733 
3734 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3735 {
3736  unsigned aSize = a.WordCount();
3737  aSize += aSize%2;
3738  unsigned bSize = b.WordCount();
3739  bSize += bSize%2;
3740 
3741  if (aSize == bSize)
3742  {
3743  if (Compare(a.reg, b.reg, aSize) >= 0)
3744  {
3745  Subtract(diff.reg, a.reg, b.reg, aSize);
3746  diff.sign = Integer::POSITIVE;
3747  }
3748  else
3749  {
3750  Subtract(diff.reg, b.reg, a.reg, aSize);
3751  diff.sign = Integer::NEGATIVE;
3752  }
3753  }
3754  else if (aSize > bSize)
3755  {
3756  word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3757  CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3758  borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3759  CRYPTOPP_ASSERT(!borrow);
3760  diff.sign = Integer::POSITIVE;
3761  }
3762  else
3763  {
3764  word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3765  CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3766  borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3767  CRYPTOPP_ASSERT(!borrow);
3768  diff.sign = Integer::NEGATIVE;
3769  }
3770 }
3771 
3772 // MSVC .NET 2003 workaround
3773 template <class T> inline const T& STDMAX2(const T& a, const T& b)
3774 {
3775  return a < b ? b : a;
3776 }
3777 
3778 Integer Integer::Plus(const Integer& b) const
3779 {
3780  Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3781  if (NotNegative())
3782  {
3783  if (b.NotNegative())
3784  PositiveAdd(sum, *this, b);
3785  else
3786  PositiveSubtract(sum, *this, b);
3787  }
3788  else
3789  {
3790  if (b.NotNegative())
3791  PositiveSubtract(sum, b, *this);
3792  else
3793  {
3794  PositiveAdd(sum, *this, b);
3795  sum.sign = Integer::NEGATIVE;
3796  }
3797  }
3798  return sum;
3799 }
3800 
3801 Integer& Integer::operator+=(const Integer& t)
3802 {
3803  reg.CleanGrow(t.reg.size());
3804  if (NotNegative())
3805  {
3806  if (t.NotNegative())
3807  PositiveAdd(*this, *this, t);
3808  else
3809  PositiveSubtract(*this, *this, t);
3810  }
3811  else
3812  {
3813  if (t.NotNegative())
3814  PositiveSubtract(*this, t, *this);
3815  else
3816  {
3817  PositiveAdd(*this, *this, t);
3818  sign = Integer::NEGATIVE;
3819  }
3820  }
3821  return *this;
3822 }
3823 
3824 Integer Integer::Minus(const Integer& b) const
3825 {
3826  Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
3827  if (NotNegative())
3828  {
3829  if (b.NotNegative())
3830  PositiveSubtract(diff, *this, b);
3831  else
3832  PositiveAdd(diff, *this, b);
3833  }
3834  else
3835  {
3836  if (b.NotNegative())
3837  {
3838  PositiveAdd(diff, *this, b);
3839  diff.sign = Integer::NEGATIVE;
3840  }
3841  else
3842  PositiveSubtract(diff, b, *this);
3843  }
3844  return diff;
3845 }
3846 
3847 Integer& Integer::operator-=(const Integer& t)
3848 {
3849  reg.CleanGrow(t.reg.size());
3850  if (NotNegative())
3851  {
3852  if (t.NotNegative())
3853  PositiveSubtract(*this, *this, t);
3854  else
3855  PositiveAdd(*this, *this, t);
3856  }
3857  else
3858  {
3859  if (t.NotNegative())
3860  {
3861  PositiveAdd(*this, *this, t);
3862  sign = Integer::NEGATIVE;
3863  }
3864  else
3865  PositiveSubtract(*this, t, *this);
3866  }
3867  return *this;
3868 }
3869 
3870 Integer& Integer::operator<<=(size_t n)
3871 {
3872  const size_t wordCount = WordCount();
3873  const size_t shiftWords = n / WORD_BITS;
3874  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3875 
3876  reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
3877  ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
3878  ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
3879  return *this;
3880 }
3881 
3882 Integer& Integer::operator>>=(size_t n)
3883 {
3884  const size_t wordCount = WordCount();
3885  const size_t shiftWords = n / WORD_BITS;
3886  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3887 
3888  ShiftWordsRightByWords(reg, wordCount, shiftWords);
3889  if (wordCount > shiftWords)
3890  ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
3891  if (IsNegative() && WordCount()==0) // avoid -0
3892  *this = Zero();
3893  return *this;
3894 }
3895 
3896 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
3897 {
3898  size_t aSize = RoundupSize(a.WordCount());
3899  size_t bSize = RoundupSize(b.WordCount());
3900 
3901  product.reg.CleanNew(RoundupSize(aSize+bSize));
3902  product.sign = Integer::POSITIVE;
3903 
3904  IntegerSecBlock workspace(aSize + bSize);
3905  AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
3906 }
3907 
3908 void Multiply(Integer &product, const Integer &a, const Integer &b)
3909 {
3910  PositiveMultiply(product, a, b);
3911 
3912  if (a.NotNegative() != b.NotNegative())
3913  product.Negate();
3914 }
3915 
3917 {
3918  Integer product;
3919  Multiply(product, *this, b);
3920  return product;
3921 }
3922 
3923 /*
3924 void PositiveDivide(Integer &remainder, Integer &quotient,
3925  const Integer &dividend, const Integer &divisor)
3926 {
3927  remainder.reg.CleanNew(divisor.reg.size());
3928  remainder.sign = Integer::POSITIVE;
3929  quotient.reg.New(0);
3930  quotient.sign = Integer::POSITIVE;
3931  unsigned i=dividend.BitCount();
3932  while (i--)
3933  {
3934  word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
3935  remainder.reg[0] |= dividend[i];
3936  if (overflow || remainder >= divisor)
3937  {
3938  Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
3939  quotient.SetBit(i);
3940  }
3941  }
3942 }
3943 */
3944 
3945 void PositiveDivide(Integer &remainder, Integer &quotient,
3946  const Integer &a, const Integer &b)
3947 {
3948  unsigned aSize = a.WordCount();
3949  unsigned bSize = b.WordCount();
3950 
3951  if (!bSize)
3952  throw Integer::DivideByZero();
3953 
3954  if (aSize < bSize)
3955  {
3956  remainder = a;
3957  remainder.sign = Integer::POSITIVE;
3958  quotient = Integer::Zero();
3959  return;
3960  }
3961 
3962  aSize += aSize%2; // round up to next even number
3963  bSize += bSize%2;
3964 
3965  remainder.reg.CleanNew(RoundupSize(bSize));
3966  remainder.sign = Integer::POSITIVE;
3967  quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
3968  quotient.sign = Integer::POSITIVE;
3969 
3970  IntegerSecBlock T(aSize+3*(bSize+2));
3971  Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
3972 }
3973 
3974 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
3975 {
3976  PositiveDivide(remainder, quotient, dividend, divisor);
3977 
3978  if (dividend.IsNegative())
3979  {
3980  quotient.Negate();
3981  if (remainder.NotZero())
3982  {
3983  --quotient;
3984  remainder = divisor.AbsoluteValue() - remainder;
3985  }
3986  }
3987 
3988  if (divisor.IsNegative())
3989  quotient.Negate();
3990 }
3991 
3992 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
3993 {
3994  q = a;
3995  q >>= n;
3996 
3997  const size_t wordCount = BitsToWords(n);
3998  if (wordCount <= a.WordCount())
3999  {
4000  r.reg.resize(RoundupSize(wordCount));
4001  CopyWords(r.reg, a.reg, wordCount);
4002  SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
4003  if (n % WORD_BITS != 0)
4004  r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4005  }
4006  else
4007  {
4008  r.reg.resize(RoundupSize(a.WordCount()));
4009  CopyWords(r.reg, a.reg, r.reg.size());
4010  }
4011  r.sign = POSITIVE;
4012 
4013  if (a.IsNegative() && r.NotZero())
4014  {
4015  --q;
4016  r = Power2(n) - r;
4017  }
4018 }
4019 
4020 Integer Integer::DividedBy(const Integer &b) const
4021 {
4022  Integer remainder, quotient;
4023  Integer::Divide(remainder, quotient, *this, b);
4024  return quotient;
4025 }
4026 
4028 {
4029  Integer remainder, quotient;
4030  Integer::Divide(remainder, quotient, *this, b);
4031  return remainder;
4032 }
4033 
4034 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
4035 {
4036  if (!divisor)
4037  throw Integer::DivideByZero();
4038 
4039  CRYPTOPP_ASSERT(divisor);
4040 
4041  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4042  {
4043  quotient = dividend >> (BitPrecision(divisor)-1);
4044  remainder = dividend.reg[0] & (divisor-1);
4045  return;
4046  }
4047 
4048  unsigned int i = dividend.WordCount();
4049  quotient.reg.CleanNew(RoundupSize(i));
4050  remainder = 0;
4051  while (i--)
4052  {
4053  quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
4054  remainder = DWord(dividend.reg[i], remainder) % divisor;
4055  }
4056 
4057  if (dividend.NotNegative())
4058  quotient.sign = POSITIVE;
4059  else
4060  {
4061  quotient.sign = NEGATIVE;
4062  if (remainder)
4063  {
4064  --quotient;
4065  remainder = divisor - remainder;
4066  }
4067  }
4068 }
4069 
4070 Integer Integer::DividedBy(word b) const
4071 {
4072  word remainder;
4073  Integer quotient;
4074  Integer::Divide(remainder, quotient, *this, b);
4075  return quotient;
4076 }
4077 
4078 word Integer::Modulo(word divisor) const
4079 {
4080  if (!divisor)
4081  throw Integer::DivideByZero();
4082 
4083  CRYPTOPP_ASSERT(divisor);
4084 
4085  word remainder;
4086 
4087  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4088  remainder = reg[0] & (divisor-1);
4089  else
4090  {
4091  unsigned int i = WordCount();
4092 
4093  if (divisor <= 5)
4094  {
4095  DWord sum(0, 0);
4096  while (i--)
4097  sum += reg[i];
4098  remainder = sum % divisor;
4099  }
4100  else
4101  {
4102  remainder = 0;
4103  while (i--)
4104  remainder = DWord(reg[i], remainder) % divisor;
4105  }
4106  }
4107 
4108  if (IsNegative() && remainder)
4109  remainder = divisor - remainder;
4110 
4111  return remainder;
4112 }
4113 
4115 {
4116  if (!!(*this)) // don't flip sign if *this==0
4117  sign = Sign(1-sign);
4118 }
4119 
4120 int Integer::PositiveCompare(const Integer& t) const
4121 {
4122  unsigned size = WordCount(), tSize = t.WordCount();
4123 
4124  if (size == tSize)
4125  return CryptoPP::Compare(reg, t.reg, size);
4126  else
4127  return size > tSize ? 1 : -1;
4128 }
4129 
4130 int Integer::Compare(const Integer& t) const
4131 {
4132  if (NotNegative())
4133  {
4134  if (t.NotNegative())
4135  return PositiveCompare(t);
4136  else
4137  return 1;
4138  }
4139  else
4140  {
4141  if (t.NotNegative())
4142  return -1;
4143  else
4144  return -PositiveCompare(t);
4145  }
4146 }
4147 
4149 {
4150  if (!IsPositive())
4151  return Zero();
4152 
4153  // overestimate square root
4154  Integer x, y = Power2((BitCount()+1)/2);
4155  CRYPTOPP_ASSERT(y*y >= *this);
4156 
4157  do
4158  {
4159  x = y;
4160  y = (x + *this/x) >> 1;
4161  } while (y<x);
4162 
4163  return x;
4164 }
4165 
4166 bool Integer::IsSquare() const
4167 {
4168  Integer r = SquareRoot();
4169  return *this == r.Squared();
4170 }
4171 
4172 bool Integer::IsUnit() const
4173 {
4174  return (WordCount() == 1) && (reg[0] == 1);
4175 }
4176 
4178 {
4179  return IsUnit() ? *this : Zero();
4180 }
4181 
4182 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4183 {
4184  return x*y%m;
4185 }
4186 
4187 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4188 {
4189  ModularArithmetic mr(m);
4190  return mr.Exponentiate(x, e);
4191 }
4192 
4194 {
4195  return EuclideanDomainOf<Integer>().Gcd(a, b);
4196 }
4197 
4199 {
4201 
4202  if (IsNegative())
4203  return Modulo(m).InverseMod(m);
4204 
4205  if (m.IsEven())
4206  {
4207  if (!m || IsEven())
4208  return Zero(); // no inverse
4209  if (*this == One())
4210  return One();
4211 
4212  Integer u = m.Modulo(*this).InverseMod(*this);
4213  return !u ? Zero() : (m*(*this-u)+1)/(*this);
4214  }
4215 
4216  SecBlock<word> T(m.reg.size() * 4);
4217  Integer r((word)0, m.reg.size());
4218  unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4219  DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4220  return r;
4221 }
4222 
4223 word Integer::InverseMod(word mod) const
4224 {
4225  word g0 = mod, g1 = *this % mod;
4226  word v0 = 0, v1 = 1;
4227  word y;
4228 
4229  while (g1)
4230  {
4231  if (g1 == 1)
4232  return v1;
4233  y = g0 / g1;
4234  g0 = g0 % g1;
4235  v0 += y * v1;
4236 
4237  if (!g0)
4238  break;
4239  if (g0 == 1)
4240  return mod-v0;
4241  y = g1 / g0;
4242  g1 = g1 % g0;
4243  v1 += y * v0;
4244  }
4245  return 0;
4246 }
4247 
4248 // ********************************************************
4249 
4251 {
4252  BERSequenceDecoder seq(bt);
4253  OID oid(seq);
4254  if (oid != ASN1::prime_field())
4255  BERDecodeError();
4256  m_modulus.BERDecode(seq);
4257  seq.MessageEnd();
4258  m_result.reg.resize(m_modulus.reg.size());
4259 }
4260 
4262 {
4263  DERSequenceEncoder seq(bt);
4264  ASN1::prime_field().DEREncode(seq);
4265  m_modulus.DEREncode(seq);
4266  seq.MessageEnd();
4267 }
4268 
4270 {
4272 }
4273 
4275 {
4277 }
4278 
4280 {
4281  if (a.reg.size()==m_modulus.reg.size())
4282  {
4283  CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4284  return m_result;
4285  }
4286  else
4287  return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4288 }
4289 
4290 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4291 {
4292  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4293  {
4294  if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4295  || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4296  {
4297  CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4298  }
4299  return m_result;
4300  }
4301  else
4302  {
4303  m_result1 = a+b;
4304  if (m_result1 >= m_modulus)
4305  m_result1 -= m_modulus;
4306  return m_result1;
4307  }
4308 }
4309 
4311 {
4312  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4313  {
4314  if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4315  || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4316  {
4317  CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4318  }
4319  }
4320  else
4321  {
4322  a+=b;
4323  if (a>=m_modulus)
4324  a-=m_modulus;
4325  }
4326 
4327  return a;
4328 }
4329 
4330 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4331 {
4332  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4333  {
4334  if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4335  CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4336  return m_result;
4337  }
4338  else
4339  {
4340  m_result1 = a-b;
4341  if (m_result1.IsNegative())
4342  m_result1 += m_modulus;
4343  return m_result1;
4344  }
4345 }
4346 
4348 {
4349  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4350  {
4351  if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4352  CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4353  }
4354  else
4355  {
4356  a-=b;
4357  if (a.IsNegative())
4358  a+=m_modulus;
4359  }
4360 
4361  return a;
4362 }
4363 
4365 {
4366  if (!a)
4367  return a;
4368 
4369  CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4370  if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4371  Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4372 
4373  return m_result;
4374 }
4375 
4376 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4377 {
4378  if (m_modulus.IsOdd())
4379  {
4380  MontgomeryRepresentation dr(m_modulus);
4381  return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4382  }
4383  else
4384  return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4385 }
4386 
4387 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4388 {
4389  if (m_modulus.IsOdd())
4390  {
4391  MontgomeryRepresentation dr(m_modulus);
4392  dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4393  for (unsigned int i=0; i<exponentsCount; i++)
4394  results[i] = dr.ConvertOut(results[i]);
4395  }
4396  else
4397  AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4398 }
4399 
4401  : ModularArithmetic(m),
4402  m_u((word)0, m_modulus.reg.size()),
4403  m_workspace(5*m_modulus.reg.size())
4404 {
4405  if (!m_modulus.IsOdd())
4406  throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4407 
4408  RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4409 }
4410 
4412 {
4413  word *const T = m_workspace.begin();
4414  word *const R = m_result.reg.begin();
4415  const size_t N = m_modulus.reg.size();
4416  CRYPTOPP_ASSERT(a.reg.size()<=N && b.reg.size()<=N);
4417 
4418  AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4419  SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4420  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4421  return m_result;
4422 }
4423 
4425 {
4426  word *const T = m_workspace.begin();
4427  word *const R = m_result.reg.begin();
4428  const size_t N = m_modulus.reg.size();
4429  CRYPTOPP_ASSERT(a.reg.size()<=N);
4430 
4431  CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4432  SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4433  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4434  return m_result;
4435 }
4436 
4438 {
4439  word *const T = m_workspace.begin();
4440  word *const R = m_result.reg.begin();
4441  const size_t N = m_modulus.reg.size();
4442  CRYPTOPP_ASSERT(a.reg.size()<=N);
4443 
4444  CopyWords(T, a.reg, a.reg.size());
4445  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4446  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4447  return m_result;
4448 }
4449 
4451 {
4452 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4453  word *const T = m_workspace.begin();
4454  word *const R = m_result.reg.begin();
4455  const size_t N = m_modulus.reg.size();
4456  CRYPTOPP_ASSERT(a.reg.size()<=N);
4457 
4458  CopyWords(T, a.reg, a.reg.size());
4459  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4460  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4461  unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4462 
4463 // cout << "k=" << k << " N*32=" << 32*N << endl;
4464 
4465  if (k>N*WORD_BITS)
4466  DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4467  else
4468  MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4469 
4470  return m_result;
4471 }
4472 
4473 // Specialization declared in misc.h to allow us to print integers
4474 // with additional control options, like arbirary bases and uppercase.
4475 template <> CRYPTOPP_DLL
4476 std::string IntToString<Integer>(Integer value, unsigned int base)
4477 {
4478  // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4479  static const unsigned int BIT_32 = (1U << 31);
4480  const bool UPPER = !!(base & BIT_32);
4481  static const unsigned int BIT_31 = (1U << 30);
4482  const bool BASE = !!(base & BIT_31);
4483 
4484  const char CH = UPPER ? 'A' : 'a';
4485  base &= ~(BIT_32|BIT_31);
4486  CRYPTOPP_ASSERT(base >= 2 && base <= 32);
4487 
4488  if (value == 0)
4489  return "0";
4490 
4491  bool negative = false, zero = false;
4492  if (value.IsNegative())
4493  {
4494  negative = true;
4495  value.Negate();
4496  }
4497 
4498  if (!value)
4499  zero = true;
4500 
4501  SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4502  Integer temp;
4503 
4504  unsigned int i=0;
4505  while (!!value)
4506  {
4507  word digit;
4508  Integer::Divide(digit, temp, value, word(base));
4509  s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4510  value.swap(temp);
4511  }
4512 
4513  std::string result;
4514  result.reserve(i+2);
4515 
4516  if (negative)
4517  result += '-';
4518 
4519  if (zero)
4520  result += '0';
4521 
4522  while (i--)
4523  result += s[i];
4524 
4525  if (BASE)
4526  {
4527  if (base == 10)
4528  result += '.';
4529  else if (base == 16)
4530  result += 'h';
4531  else if (base == 8)
4532  result += 'o';
4533  else if (base == 2)
4534  result += 'b';
4535  }
4536 
4537  return result;
4538 }
4539 
4540 // Specialization declared in misc.h to avoid Coverity findings.
4541 template <> CRYPTOPP_DLL
4542 std::string IntToString<word64>(word64 value, unsigned int base)
4543 {
4544  // Hack... set the high bit for uppercase.
4545  static const unsigned int HIGH_BIT = (1U << 31);
4546  const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4547  base &= ~HIGH_BIT;
4548 
4549  CRYPTOPP_ASSERT(base >= 2);
4550  if (value == 0)
4551  return "0";
4552 
4553  std::string result;
4554  while (value > 0)
4555  {
4556  word64 digit = value % base;
4557  result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4558  value /= base;
4559  }
4560  return result;
4561 }
4562 
4563 NAMESPACE_END
4564 
4565 #endif
Used to pass byte array input as part of a NameValuePairs object.
Definition: algparam.h:29
An invalid argument was detected.
Definition: cryptlib.h:187
void DEREncode(BufferedTransformation &bt) const
Encodes in DER format.
Definition: integer.cpp:4261
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: integer.cpp:4376
Classes for working with NameValuePairs.
const Integer & Square(const Integer &a) const
Square an element in the ring.
Definition: integer.cpp:4424
const Integer & MultiplicativeInverse(const Integer &a) const
Calculate the multiplicative inverse of an element in the ring.
Definition: integer.cpp:4450
void swap(SecBlock< T, A > &b)
Swap contents with another SecBlock.
Definition: secblock.h:714
a number which is probabilistically prime
Definition: integer.h:91
Utility functions for the Crypto++ library.
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:128
Restricts the instantiation of a class to one static object without locks.
Definition: misc.h:264
bool NotZero() const
Determines if the Integer is non-0.
Definition: integer.h:329
bool GetBit(size_t i) const
Provides the i-th bit of the Integer.
Definition: integer.cpp:3046
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:660
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
Definition: integer.cpp:3341
an unsigned value
Definition: integer.h:81
Integer & Reduce(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4347
virtual size_t Peek(byte &outByte) const
Peek a 8-bit byte.
Definition: cryptlib.cpp:534
bool IsOdd() const
Determines if the Integer is odd parity.
Definition: integer.h:347
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
Definition: cryptlib.h:353
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode unsigned value.
Definition: asn.h:442
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: cryptlib.cpp:326
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
Definition: misc.h:740
This file contains helper classes/functions for implementing public key algorithms.
bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
Definition: nbtheory.cpp:381
static Integer Gcd(const Integer &a, const Integer &n)
greatest common divisor
Definition: integer.cpp:4193
void resize(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:705
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
Definition: misc.h:760
const Integer & Subtract(const Integer &a, const Integer &b) const
Subtracts elements in the ring.
Definition: integer.cpp:4330
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
Definition: misc.h:623
void CleanGrow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:689
bool IsEven() const
Determines if the Integer is even parity.
Definition: integer.h:344
Secure memory block with allocator and cleanup.
Definition: secblock.h:437
unsigned int WordCount() const
Determines the number of words required to represent the Integer.
Definition: integer.cpp:3266
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
Definition: integer.cpp:3416
Signedness
Used when importing and exporting integers.
Definition: integer.h:79
size_type size() const
Provides the count of elements in the SecBlock.
Definition: secblock.h:524
ASN.1 object identifiers for algorthms and schemes.
Square block cipher.
Definition: square.h:24
Classes for automatic resource management.
size_t size() const
Length of the memory block.
Definition: algparam.h:93
bool IsP4()
Determines if the CPU is an Intel P4.
Definition: cpu.h:215
byte GetByte(size_t i) const
Provides the i-th byte of the Integer.
Definition: integer.cpp:3068
Library configuration file.
MontgomeryRepresentation(const Integer &modulus)
Construct a IsMontgomeryRepresentation.
Definition: integer.cpp:4400
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
returns same result as Divide(r, q, a, Power2(n)), but faster
Definition: integer.cpp:3992
Ring of congruence classes modulo n.
Definition: modarith.h:34
STL namespace.
Interface for random number generators.
Definition: cryptlib.h:1193
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
Definition: misc.h:750
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
Definition: integer.cpp:3430
std::string IntToString< word64 >(word64 value, unsigned int base)
Converts an unsigned value to a string.
Definition: integer.cpp:4542
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
The minimum number of bytes to encode this integer.
Definition: integer.cpp:3329
void New(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:647
void SetByte(size_t n, byte value)
Set the n-th byte to value.
Definition: integer.cpp:3076
the value is negative
Definition: integer.h:73
SecBlock typedef.
Definition: secblock.h:731
virtual void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the Ring.
Definition: algebra.cpp:334
BER Sequence Decoder.
Definition: asn.h:294
Interface for buffered transformations.
Definition: cryptlib.h:1359
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
encode absolute value as big-endian octet string
Definition: integer.cpp:3385
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: queue.h:35
const byte * begin() const
Pointer to the first byte in the memory block.
Definition: algparam.h:89
bool IsConvertableToLong() const
Determines if the Integer is convertable to Long.
Definition: integer.cpp:2922
Integer MultiplicativeInverse() const
return inverse if 1 or -1, otherwise return 0
Definition: integer.cpp:4177
static const Integer & One()
Integer representing 1.
Definition: integer.cpp:3019
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:376
const Integer & Add(const Integer &a, const Integer &b) const
Adds elements in the ring.
Definition: integer.cpp:4290
byte order is little-endian
Definition: cryptlib.h:130
Sign
Used internally to represent the integer.
Definition: integer.h:69
Pointer that overloads operator ->
Definition: smartptr.h:39
bool IsSquare() const
return whether this integer is a perfect square
Definition: integer.cpp:4166
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
Definition: integer.cpp:3401
Classes and functions for secure memory allocations.
unsigned int BitCount() const
Determines the number of bits required to represent the Integer.
Definition: integer.cpp:3280
void BERDecodeElement(BufferedTransformation &in, Element &a) const
Decodes element in DER format.
Definition: integer.cpp:4274
bool IsUnit() const
is 1 or -1
Definition: integer.cpp:4172
Copy input to a memory buffer.
Definition: filters.h:1016
Integer SquareRoot() const
extract square root, if negative return 0, else return floor of square root
Definition: integer.cpp:4148
bool IsPositive() const
Determines if the Integer is positive.
Definition: integer.h:338
bool GetValue(const char *name, T &value) const
Get a named value.
Definition: cryptlib.h:340
a number with no special properties
Definition: integer.h:89
Integer Squared() const
Definition: integer.h:496
size_t Put(byte inByte, bool blocking=true)
Input a byte for processing.
Definition: cryptlib.h:1385
bool IsNegative() const
Determines if the Integer is negative.
Definition: integer.h:332
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
Definition: algparam.h:554
void swap(Integer &a)
Swaps this Integer with another Integer.
Definition: integer.cpp:3106
Integer()
Creates the zero integer.
Definition: integer.cpp:2882
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
Definition: misc.h:670
signed long ConvertToLong() const
Convert the Integer to Long.
Definition: integer.cpp:2936
Exception thrown when an error is encountered decoding an OpenPGP integer.
Definition: integer.h:278
void Negate()
Reverse the Sign of the Integer.
Definition: integer.cpp:4114
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
Definition: misc.h:728
Integer Times(const Integer &b) const
Definition: integer.cpp:3916
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
Definition: integer.cpp:3392
Application callback to signal suitability of a cabdidate prime.
Definition: nbtheory.h:80
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branchless swap of pointers a and b if condition c is true.
Definition: misc.h:1063
static Integer Power2(size_t e)
Exponentiates to a power of 2.
Definition: integer.cpp:2998
Multiple precision integer with arithmetic operations.
Definition: integer.h:45
a signed value
Definition: integer.h:83
static const Integer & Two()
Integer representing 2.
Definition: integer.cpp:3024
Integer ConvertOut(const Integer &a) const
Reduces an element in the congruence class.
Definition: integer.cpp:4437
virtual Element CascadeExponentiate(const Element &x, const Integer &e1, const Element &y, const Integer &e2) const
TODO.
Definition: algebra.cpp:323
virtual lword Skip(lword skipMax=LWORD_MAX)
Discard skipMax bytes from the output buffer.
Definition: cryptlib.cpp:553
RandomNumberType
Properties of a random integer.
Definition: integer.h:87
const Integer & Inverse(const Integer &a) const
Inverts the element in the ring.
Definition: integer.cpp:4364
const char * Seed()
ConstByteArrayParameter.
Definition: argnames.h:19
byte order is big-endian
Definition: cryptlib.h:132
Safely right shift values when undefined behavior could occur.
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
Definition: misc.h:467
String-based implementation of Store interface.
Definition: filters.h:1066
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:62
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
calculate r and q such that (a == d*q + r) && (0 <= r < abs(d))
Definition: integer.cpp:3974
ModularArithmetic(const Integer &modulus=Integer::One())
Construct a ModularArithmetic.
Definition: modarith.h:47
void BERDecodeError()
Raises a BERDecodeErr.
Definition: asn.h:61
Data structure used to store byte strings.
Definition: queue.h:20
Functions for CPU features and intrinsics.
Classes and functions for working with ANS.1 objects.
Classes for SHA-1 and SHA-2 family of message digests.
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
Definition: integer.cpp:3054
size_t GetWord16(word16 &value, ByteOrder order=BIG_ENDIAN_ORDER)
Retrieve a 16-bit word.
Definition: cryptlib.cpp:750
iterator begin()
Provides an iterator pointing to the first element in the memory block.
Definition: secblock.h:499
Integer & Accumulate(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4310
const char * PointerToPrimeSelector()
const PrimeSelector *
Definition: argnames.h:41
Implementation of BufferedTransformation's attachment interface.
Classes and functions for number theoretic operations.
const Integer & Half(const Integer &a) const
TODO.
Definition: integer.cpp:4279
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
Definition: integer.cpp:3363
Exception thrown when division by 0 is encountered.
Definition: integer.h:51
DER Sequence Encoder.
Definition: asn.h:304
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
Definition: misc.h:985
Exception thrown when a random number cannot be found that satisfies the condition.
Definition: integer.h:59
Performs modular arithmetic in Montgomery representation for increased speed.
Definition: modarith.h:274
DER General Encoder.
Definition: asn.h:271
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:160
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: modarith.h:308
const Integer & Multiply(const Integer &a, const Integer &b) const
Multiplies elements in the ring.
Definition: integer.cpp:4411
void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: integer.cpp:3471
Integer InverseMod(const Integer &n) const
calculate multiplicative inverse of *this mod n
Definition: integer.cpp:4198
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
Definition: integer.cpp:3289
size_t PutWord16(word16 value, ByteOrder order=BIG_ENDIAN_ORDER, bool blocking=true)
Input a 16-bit word for processing.
Definition: cryptlib.cpp:714
size_t DEREncodeOctetString(BufferedTransformation &bt, const byte *str, size_t strLen)
DER encode octet string.
Definition: asn.cpp:104
virtual lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: cryptlib.cpp:496
Multiple precision integer with arithmetic operations.
void DEREncodeElement(BufferedTransformation &out, const Element &a) const
Encodes element in DER format.
Definition: integer.cpp:4269
Safely left shift values when undefined behavior could occur.
static const Integer & Zero()
Integer representing 0.
Definition: integer.cpp:3014
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
Definition: misc.h:477
Euclidean domain.
Definition: algebra.h:315
void Grow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:673
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
Definition: integer.cpp:3370
std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
Definition: integer.cpp:4476
lword GetBits(size_t i, size_t n) const
Provides the low order bits of the Integer.
Definition: integer.cpp:3083
virtual size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: cryptlib.cpp:515
Class file for performing modular arithmetic.
Crypto++ library namespace.
size_type SizeInBytes() const
Provides the number of bytes in the SecBlock.
Definition: secblock.h:538
BER General Decoder.
Definition: asn.h:234
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: integer.cpp:4387
int Compare(const Integer &a) const
Perform signed comparison.
Definition: integer.cpp:4130
unsigned int MaxElementByteLength() const
Provides the maximum byte size of an element in the ring.
Definition: modarith.h:230
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: modarith.h:311
Object Identifier.
Definition: asn.h:158
Integer Modulo(const Integer &b) const
Definition: integer.cpp:4027
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: queue.cpp:300
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
Definition: misc.h:645
unsigned int ByteCount() const
Determines the number of bytes required to represent the Integer.
Definition: integer.cpp:3271
Integer ConvertIn(const Integer &a) const
Reduces an element in the congruence class.
Definition: modarith.h:294
the value is positive or 0
Definition: integer.h:71
bool NotNegative() const
Determines if the Integer is non-negative.
Definition: integer.h:335
Interface for retrieving values given their names.
Definition: cryptlib.h:282