Crypto++  5.6.3
Free C++ class library of cryptographic schemes
integer.cpp
1 // integer.cpp - written and placed in the public domain by Wei Dai
2 // contains public domain code contributed by Alister Lee and Leonard Janke
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #if CRYPTOPP_MSC_VERSION
8 # pragma warning(disable: 4100)
9 #endif
10 
11 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
12 # pragma GCC diagnostic ignored "-Wunused"
13 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
14 #endif
15 
16 #ifndef CRYPTOPP_IMPORTS
17 
18 #include "integer.h"
19 #include "secblock.h"
20 #include "modarith.h"
21 #include "nbtheory.h"
22 #include "smartptr.h"
23 #include "algparam.h"
24 #include "filters.h"
25 #include "asn.h"
26 #include "oids.h"
27 #include "words.h"
28 #include "pubkey.h" // for P1363_KDF2
29 #include "sha.h"
30 #include "cpu.h"
31 #include "misc.h"
32 
33 #include <iostream>
34 
35 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
36  #include <intrin.h>
37 #endif
38 
39 #ifdef __DECCXX
40  #include <c_asm.h>
41 #endif
42 
43 #ifdef CRYPTOPP_MSVC6_NO_PP
44  #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
45 #endif
46 
47 // "Inline assembly operands don't work with .intel_syntax",
48 // http://llvm.org/bugs/show_bug.cgi?id=24232
49 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
50 # undef CRYPTOPP_X86_ASM_AVAILABLE
51 # undef CRYPTOPP_X32_ASM_AVAILABLE
52 # undef CRYPTOPP_X64_ASM_AVAILABLE
53 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
54 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
55 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
56 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
57 #else
58 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
59 #endif
60 
61 // Debian QEMU/ARMEL issue in MultiplyTop; see http://github.com/weidai11/cryptopp/issues/31.
62 #if __ARMEL__ && (CRYPTOPP_GCC_VERSION >= 40900) && (CRYPTOPP_GCC_VERSION < 70000) && __OPTIMIZE__
63 # define WORKAROUND_ARMEL_BUG 1
64 #endif
65 
66 #if WORKAROUND_ARMEL_BUG
67 # pragma GCC push_options
68 # pragma GCC optimize("O1")
69 #endif
70 
71 NAMESPACE_BEGIN(CryptoPP)
72 
73 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
74 {
75  if (valueType != typeid(Integer))
76  return false;
77  *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
78  return true;
79 }
80 
81 inline static int Compare(const word *A, const word *B, size_t N)
82 {
83  while (N--)
84  if (A[N] > B[N])
85  return 1;
86  else if (A[N] < B[N])
87  return -1;
88 
89  return 0;
90 }
91 
92 inline static int Increment(word *A, size_t N, word B=1)
93 {
94  assert(N);
95  word t = A[0];
96  A[0] = t+B;
97  if (A[0] >= t)
98  return 0;
99  for (unsigned i=1; i<N; i++)
100  if (++A[i])
101  return 0;
102  return 1;
103 }
104 
105 inline static int Decrement(word *A, size_t N, word B=1)
106 {
107  assert(N);
108  word t = A[0];
109  A[0] = t-B;
110  if (A[0] <= t)
111  return 0;
112  for (unsigned i=1; i<N; i++)
113  if (A[i]--)
114  return 0;
115  return 1;
116 }
117 
118 static void TwosComplement(word *A, size_t N)
119 {
120  Decrement(A, N);
121  for (unsigned i=0; i<N; i++)
122  A[i] = ~A[i];
123 }
124 
125 static word AtomicInverseModPower2(word A)
126 {
127  assert(A%2==1);
128 
129  word R=A%8;
130 
131  for (unsigned i=3; i<WORD_BITS; i*=2)
132  R = R*(2-R*A);
133 
134  assert(R*A==1);
135  return R;
136 }
137 
138 // ********************************************************
139 
140 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
141  #define Declare2Words(x) word x##0, x##1;
142  #define AssignWord(a, b) a##0 = b; a##1 = 0;
143  #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
144  #define LowWord(a) a##0
145  #define HighWord(a) a##1
146  #ifdef _MSC_VER
147  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
148  #ifndef __INTEL_COMPILER
149  #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
150  #endif
151  #elif defined(__DECCXX)
152  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
153  #elif defined(__x86_64__)
154  #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
155  // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
156  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
157  #else
158  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
159  #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
160  #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
161  #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
162  #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
163  #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
164  #endif
165  #endif
166  #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
167  #ifndef Double3Words
168  #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
169  #endif
170  #ifndef Acc2WordsBy2
171  #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
172  #endif
173  #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
174  #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
175  #define GetCarry(u) u##1
176  #define GetBorrow(u) u##1
177 #else
178  #define Declare2Words(x) dword x;
179  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
180  #define MultiplyWords(p, a, b) p = __emulu(a, b);
181  #else
182  #define MultiplyWords(p, a, b) p = (dword)a*b;
183  #endif
184  #define AssignWord(a, b) a = b;
185  #define Add2WordsBy1(a, b, c) a = b + c;
186  #define Acc2WordsBy2(a, b) a += b;
187  #define LowWord(a) word(a)
188  #define HighWord(a) word(a>>WORD_BITS)
189  #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
190  #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
191  #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
192  #define GetCarry(u) HighWord(u)
193  #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
194 #endif
195 #ifndef MulAcc
196  #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
197 #endif
198 #ifndef Acc2WordsBy1
199  #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
200 #endif
201 #ifndef Acc3WordsBy2
202  #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
203 #endif
204 
205 class DWord
206 {
207 public:
208  // Converity finding on default ctor. We've isntrumented the code,
209  // and cannot uncover a case where it affects a result.
210 #if (defined(__COVERITY__) || !defined(NDEBUG)) && defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
211  // Repeating pattern of 1010 for debug builds to break things...
212  DWord() : m_whole(0) {memset(&m_whole, 0xa, sizeof(m_whole));}
213 #elif (defined(__COVERITY__) || !defined(NDEBUG)) && !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
214  // Repeating pattern of 1010 for debug builds to break things...
215  DWord() : m_halfs() {memset(&m_halfs, 0xa, sizeof(m_halfs));}
216 #else
217  DWord() {}
218 #endif
219 
220 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
221  explicit DWord(word low) : m_whole(low) {}
222 #else
223  explicit DWord(word low)
224  {
225  m_halfs.low = low;
226  m_halfs.high = 0;
227  }
228 #endif
229 
230  DWord(word low, word high)
231  {
232  m_halfs.low = low;
233  m_halfs.high = high;
234  }
235 
236  static DWord Multiply(word a, word b)
237  {
238  DWord r;
239  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
240  r.m_whole = (dword)a * b;
241  #elif defined(MultiplyWordsLoHi)
242  MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
243  #else
244  assert(0);
245  #endif
246  return r;
247  }
248 
249  static DWord MultiplyAndAdd(word a, word b, word c)
250  {
251  DWord r = Multiply(a, b);
252  return r += c;
253  }
254 
255  DWord & operator+=(word a)
256  {
257  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
258  m_whole = m_whole + a;
259  #else
260  m_halfs.low += a;
261  m_halfs.high += (m_halfs.low < a);
262  #endif
263  return *this;
264  }
265 
266  DWord operator+(word a)
267  {
268  DWord r;
269  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
270  r.m_whole = m_whole + a;
271  #else
272  r.m_halfs.low = m_halfs.low + a;
273  r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
274  #endif
275  return r;
276  }
277 
278  DWord operator-(DWord a)
279  {
280  DWord r;
281  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
282  r.m_whole = m_whole - a.m_whole;
283  #else
284  r.m_halfs.low = m_halfs.low - a.m_halfs.low;
285  r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
286  #endif
287  return r;
288  }
289 
290  DWord operator-(word a)
291  {
292  DWord r;
293  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
294  r.m_whole = m_whole - a;
295  #else
296  r.m_halfs.low = m_halfs.low - a;
297  r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
298  #endif
299  return r;
300  }
301 
302  // returns quotient, which must fit in a word
303  word operator/(word divisor);
304 
305  word operator%(word a);
306 
307  bool operator!() const
308  {
309  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
310  return !m_whole;
311  #else
312  return !m_halfs.high && !m_halfs.low;
313  #endif
314  }
315 
316  word GetLowHalf() const {return m_halfs.low;}
317  word GetHighHalf() const {return m_halfs.high;}
318  word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
319 
320 private:
321  union
322  {
323  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
324  dword m_whole;
325  #endif
326  struct
327  {
328  #ifdef IS_LITTLE_ENDIAN
329  word low;
330  word high;
331  #else
332  word high;
333  word low;
334  #endif
335  } m_halfs;
336  };
337 };
338 
339 class Word
340 {
341 public:
342  // Converity finding on default ctor. We've isntrumented the code,
343  // and cannot uncover a case where it affects a result.
344 #if defined(__COVERITY__)
345  Word() : m_whole(0) {}
346 #elif !defined(NDEBUG)
347  // Repeating pattern of 1010 for debug builds to break things...
348  Word() : m_whole(0) {memset(&m_whole, 0xa, sizeof(m_whole));}
349 #else
350  Word() {}
351 #endif
352 
353  Word(word value) : m_whole(value) {}
354  Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
355 
356  static Word Multiply(hword a, hword b)
357  {
358  Word r;
359  r.m_whole = (word)a * b;
360  return r;
361  }
362 
363  Word operator-(Word a)
364  {
365  Word r;
366  r.m_whole = m_whole - a.m_whole;
367  return r;
368  }
369 
370  Word operator-(hword a)
371  {
372  Word r;
373  r.m_whole = m_whole - a;
374  return r;
375  }
376 
377  // returns quotient, which must fit in a word
378  hword operator/(hword divisor)
379  {
380  return hword(m_whole / divisor);
381  }
382 
383  bool operator!() const
384  {
385  return !m_whole;
386  }
387 
388  word GetWhole() const {return m_whole;}
389  hword GetLowHalf() const {return hword(m_whole);}
390  hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
391  hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
392 
393 private:
394  word m_whole;
395 };
396 
397 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
398 template <class S, class D>
399 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
400 {
401  CRYPTOPP_UNUSED(dummy);
402 
403  // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
404  assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
405 
406  // estimate the quotient: do a 2 S by 1 S divide
407  S Q;
408  if (S(B1+1) == 0)
409  Q = A[2];
410  else if (B1 > 0)
411  Q = D(A[1], A[2]) / S(B1+1);
412  else
413  Q = D(A[0], A[1]) / B0;
414 
415  // now subtract Q*B from A
416  D p = D::Multiply(B0, Q);
417  D u = (D) A[0] - p.GetLowHalf();
418  A[0] = u.GetLowHalf();
419  u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
420  A[1] = u.GetLowHalf();
421  A[2] += u.GetHighHalf();
422 
423  // Q <= actual quotient, so fix it
424  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
425  {
426  u = (D) A[0] - B0;
427  A[0] = u.GetLowHalf();
428  u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
429  A[1] = u.GetLowHalf();
430  A[2] += u.GetHighHalf();
431  Q++;
432  assert(Q); // shouldn't overflow
433  }
434 
435  return Q;
436 }
437 
438 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
439 template <class S, class D>
440 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
441 {
442  if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
443  return D(Ah.GetLowHalf(), Ah.GetHighHalf());
444  else
445  {
446  S Q[2];
447  T[0] = Al.GetLowHalf();
448  T[1] = Al.GetHighHalf();
449  T[2] = Ah.GetLowHalf();
450  T[3] = Ah.GetHighHalf();
451  Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
452  Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
453  return D(Q[0], Q[1]);
454  }
455 }
456 
457 // returns quotient, which must fit in a word
458 inline word DWord::operator/(word a)
459 {
460  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
461  return word(m_whole / a);
462  #else
463  hword r[4];
464  return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
465  #endif
466 }
467 
468 inline word DWord::operator%(word a)
469 {
470  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
471  return word(m_whole % a);
472  #else
473  if (a < (word(1) << (WORD_BITS/2)))
474  {
475  hword h = hword(a);
476  word r = m_halfs.high % h;
477  r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
478  return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
479  }
480  else
481  {
482  hword r[4];
483  DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
484  return Word(r[0], r[1]).GetWhole();
485  }
486  #endif
487 }
488 
489 // ********************************************************
490 
491 // Use some tricks to share assembly code between MSVC and GCC
492 #if defined(__GNUC__)
493  #define AddPrologue \
494  int result; \
495  __asm__ __volatile__ \
496  ( \
497  INTEL_NOPREFIX
498  #define AddEpilogue \
499  ".att_syntax prefix;" \
500  : "=a" (result)\
501  : "d" (C), "a" (A), "D" (B), "c" (N) \
502  : "%esi", "memory", "cc" \
503  );\
504  return result;
505  #define MulPrologue \
506  __asm__ __volatile__ \
507  ( \
508  ".intel_syntax noprefix;" \
509  AS1( push ebx) \
510  AS2( mov ebx, edx)
511  #define MulEpilogue \
512  AS1( pop ebx) \
513  ".att_syntax prefix;" \
514  : \
515  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
516  : "%esi", "memory", "cc" \
517  );
518  #define SquPrologue MulPrologue
519  #define SquEpilogue \
520  AS1( pop ebx) \
521  ".att_syntax prefix;" \
522  : \
523  : "d" (s_maskLow16), "c" (C), "a" (A) \
524  : "%esi", "%edi", "memory", "cc" \
525  );
526  #define TopPrologue MulPrologue
527  #define TopEpilogue \
528  AS1( pop ebx) \
529  ".att_syntax prefix;" \
530  : \
531  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
532  : "memory", "cc" \
533  );
534 #else
535  #define AddPrologue \
536  __asm push edi \
537  __asm push esi \
538  __asm mov eax, [esp+12] \
539  __asm mov edi, [esp+16]
540  #define AddEpilogue \
541  __asm pop esi \
542  __asm pop edi \
543  __asm ret 8
544 #if _MSC_VER < 1300
545  #define SaveEBX __asm push ebx
546  #define RestoreEBX __asm pop ebx
547 #else
548  #define SaveEBX
549  #define RestoreEBX
550 #endif
551  #define SquPrologue \
552  AS2( mov eax, A) \
553  AS2( mov ecx, C) \
554  SaveEBX \
555  AS2( lea ebx, s_maskLow16)
556  #define MulPrologue \
557  AS2( mov eax, A) \
558  AS2( mov edi, B) \
559  AS2( mov ecx, C) \
560  SaveEBX \
561  AS2( lea ebx, s_maskLow16)
562  #define TopPrologue \
563  AS2( mov eax, A) \
564  AS2( mov edi, B) \
565  AS2( mov ecx, C) \
566  AS2( mov esi, L) \
567  SaveEBX \
568  AS2( lea ebx, s_maskLow16)
569  #define SquEpilogue RestoreEBX
570  #define MulEpilogue RestoreEBX
571  #define TopEpilogue RestoreEBX
572 #endif
573 
574 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
575 extern "C" {
576 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
577 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
578 }
579 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
580 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
581 {
582  word result;
583  __asm__ __volatile__
584  (
585  INTEL_NOPREFIX
586  AS1( neg %1)
587  ASJ( jz, 1, f)
588  AS2( mov %0,[%3+8*%1])
589  AS2( add %0,[%4+8*%1])
590  AS2( mov [%2+8*%1],%0)
591  ASL(0)
592  AS2( mov %0,[%3+8*%1+8])
593  AS2( adc %0,[%4+8*%1+8])
594  AS2( mov [%2+8*%1+8],%0)
595  AS2( lea %1,[%1+2])
596  ASJ( jrcxz, 1, f)
597  AS2( mov %0,[%3+8*%1])
598  AS2( adc %0,[%4+8*%1])
599  AS2( mov [%2+8*%1],%0)
600  ASJ( jmp, 0, b)
601  ASL(1)
602  AS2( mov %0, 0)
603  AS2( adc %0, %0)
604  ATT_NOPREFIX
605  : "=&r" (result), "+c" (N)
606  : "r" (C+N), "r" (A+N), "r" (B+N)
607  : "memory", "cc"
608  );
609  return (int)result;
610 }
611 
612 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
613 {
614  word result;
615  __asm__ __volatile__
616  (
617  INTEL_NOPREFIX
618  AS1( neg %1)
619  ASJ( jz, 1, f)
620  AS2( mov %0,[%3+8*%1])
621  AS2( sub %0,[%4+8*%1])
622  AS2( mov [%2+8*%1],%0)
623  ASL(0)
624  AS2( mov %0,[%3+8*%1+8])
625  AS2( sbb %0,[%4+8*%1+8])
626  AS2( mov [%2+8*%1+8],%0)
627  AS2( lea %1,[%1+2])
628  ASJ( jrcxz, 1, f)
629  AS2( mov %0,[%3+8*%1])
630  AS2( sbb %0,[%4+8*%1])
631  AS2( mov [%2+8*%1],%0)
632  ASJ( jmp, 0, b)
633  ASL(1)
634  AS2( mov %0, 0)
635  AS2( adc %0, %0)
636  ATT_NOPREFIX
637  : "=&r" (result), "+c" (N)
638  : "r" (C+N), "r" (A+N), "r" (B+N)
639  : "memory", "cc"
640  );
641  return (int)result;
642 }
643 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
644 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
645 {
646  AddPrologue
647 
648  // now: eax = A, edi = B, edx = C, ecx = N
649  AS2( lea eax, [eax+4*ecx])
650  AS2( lea edi, [edi+4*ecx])
651  AS2( lea edx, [edx+4*ecx])
652 
653  AS1( neg ecx) // ecx is negative index
654  AS2( test ecx, 2) // this clears carry flag
655  ASJ( jz, 0, f)
656  AS2( sub ecx, 2)
657  ASJ( jmp, 1, f)
658 
659  ASL(0)
660  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
661  AS2( mov esi,[eax+4*ecx])
662  AS2( adc esi,[edi+4*ecx])
663  AS2( mov [edx+4*ecx],esi)
664  AS2( mov esi,[eax+4*ecx+4])
665  AS2( adc esi,[edi+4*ecx+4])
666  AS2( mov [edx+4*ecx+4],esi)
667  ASL(1)
668  AS2( mov esi,[eax+4*ecx+8])
669  AS2( adc esi,[edi+4*ecx+8])
670  AS2( mov [edx+4*ecx+8],esi)
671  AS2( mov esi,[eax+4*ecx+12])
672  AS2( adc esi,[edi+4*ecx+12])
673  AS2( mov [edx+4*ecx+12],esi)
674 
675  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
676  ASJ( jmp, 0, b)
677 
678  ASL(2)
679  AS2( mov eax, 0)
680  AS1( setc al) // store carry into eax (return result register)
681 
682  AddEpilogue
683 }
684 
685 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
686 {
687  AddPrologue
688 
689  // now: eax = A, edi = B, edx = C, ecx = N
690  AS2( lea eax, [eax+4*ecx])
691  AS2( lea edi, [edi+4*ecx])
692  AS2( lea edx, [edx+4*ecx])
693 
694  AS1( neg ecx) // ecx is negative index
695  AS2( test ecx, 2) // this clears carry flag
696  ASJ( jz, 0, f)
697  AS2( sub ecx, 2)
698  ASJ( jmp, 1, f)
699 
700  ASL(0)
701  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
702  AS2( mov esi,[eax+4*ecx])
703  AS2( sbb esi,[edi+4*ecx])
704  AS2( mov [edx+4*ecx],esi)
705  AS2( mov esi,[eax+4*ecx+4])
706  AS2( sbb esi,[edi+4*ecx+4])
707  AS2( mov [edx+4*ecx+4],esi)
708  ASL(1)
709  AS2( mov esi,[eax+4*ecx+8])
710  AS2( sbb esi,[edi+4*ecx+8])
711  AS2( mov [edx+4*ecx+8],esi)
712  AS2( mov esi,[eax+4*ecx+12])
713  AS2( sbb esi,[edi+4*ecx+12])
714  AS2( mov [edx+4*ecx+12],esi)
715 
716  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
717  ASJ( jmp, 0, b)
718 
719  ASL(2)
720  AS2( mov eax, 0)
721  AS1( setc al) // store carry into eax (return result register)
722 
723  AddEpilogue
724 }
725 
726 #if CRYPTOPP_INTEGER_SSE2
727 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
728 {
729  AddPrologue
730 
731  // now: eax = A, edi = B, edx = C, ecx = N
732  AS2( lea eax, [eax+4*ecx])
733  AS2( lea edi, [edi+4*ecx])
734  AS2( lea edx, [edx+4*ecx])
735 
736  AS1( neg ecx) // ecx is negative index
737  AS2( pxor mm2, mm2)
738  ASJ( jz, 2, f)
739  AS2( test ecx, 2) // this clears carry flag
740  ASJ( jz, 0, f)
741  AS2( sub ecx, 2)
742  ASJ( jmp, 1, f)
743 
744  ASL(0)
745  AS2( movd mm0, DWORD PTR [eax+4*ecx])
746  AS2( movd mm1, DWORD PTR [edi+4*ecx])
747  AS2( paddq mm0, mm1)
748  AS2( paddq mm2, mm0)
749  AS2( movd DWORD PTR [edx+4*ecx], mm2)
750  AS2( psrlq mm2, 32)
751 
752  AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
753  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
754  AS2( paddq mm0, mm1)
755  AS2( paddq mm2, mm0)
756  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
757  AS2( psrlq mm2, 32)
758 
759  ASL(1)
760  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
761  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
762  AS2( paddq mm0, mm1)
763  AS2( paddq mm2, mm0)
764  AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
765  AS2( psrlq mm2, 32)
766 
767  AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
768  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
769  AS2( paddq mm0, mm1)
770  AS2( paddq mm2, mm0)
771  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
772  AS2( psrlq mm2, 32)
773 
774  AS2( add ecx, 4)
775  ASJ( jnz, 0, b)
776 
777  ASL(2)
778  AS2( movd eax, mm2)
779  AS1( emms)
780 
781  AddEpilogue
782 }
783 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
784 {
785  AddPrologue
786 
787  // now: eax = A, edi = B, edx = C, ecx = N
788  AS2( lea eax, [eax+4*ecx])
789  AS2( lea edi, [edi+4*ecx])
790  AS2( lea edx, [edx+4*ecx])
791 
792  AS1( neg ecx) // ecx is negative index
793  AS2( pxor mm2, mm2)
794  ASJ( jz, 2, f)
795  AS2( test ecx, 2) // this clears carry flag
796  ASJ( jz, 0, f)
797  AS2( sub ecx, 2)
798  ASJ( jmp, 1, f)
799 
800  ASL(0)
801  AS2( movd mm0, DWORD PTR [eax+4*ecx])
802  AS2( movd mm1, DWORD PTR [edi+4*ecx])
803  AS2( psubq mm0, mm1)
804  AS2( psubq mm0, mm2)
805  AS2( movd DWORD PTR [edx+4*ecx], mm0)
806  AS2( psrlq mm0, 63)
807 
808  AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
809  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
810  AS2( psubq mm2, mm1)
811  AS2( psubq mm2, mm0)
812  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
813  AS2( psrlq mm2, 63)
814 
815  ASL(1)
816  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
817  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
818  AS2( psubq mm0, mm1)
819  AS2( psubq mm0, mm2)
820  AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
821  AS2( psrlq mm0, 63)
822 
823  AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
824  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
825  AS2( psubq mm2, mm1)
826  AS2( psubq mm2, mm0)
827  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
828  AS2( psrlq mm2, 63)
829 
830  AS2( add ecx, 4)
831  ASJ( jnz, 0, b)
832 
833  ASL(2)
834  AS2( movd eax, mm2)
835  AS1( emms)
836 
837  AddEpilogue
838 }
839 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
840 #else
841 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
842 {
843  assert (N%2 == 0);
844 
845  Declare2Words(u);
846  AssignWord(u, 0);
847  for (size_t i=0; i<N; i+=2)
848  {
849  AddWithCarry(u, A[i], B[i]);
850  C[i] = LowWord(u);
851  AddWithCarry(u, A[i+1], B[i+1]);
852  C[i+1] = LowWord(u);
853  }
854  return int(GetCarry(u));
855 }
856 
857 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
858 {
859  assert (N%2 == 0);
860 
861  Declare2Words(u);
862  AssignWord(u, 0);
863  for (size_t i=0; i<N; i+=2)
864  {
865  SubtractWithBorrow(u, A[i], B[i]);
866  C[i] = LowWord(u);
867  SubtractWithBorrow(u, A[i+1], B[i+1]);
868  C[i+1] = LowWord(u);
869  }
870  return int(GetBorrow(u));
871 }
872 #endif
873 
874 static word LinearMultiply(word *C, const word *A, word B, size_t N)
875 {
876  word carry=0;
877  for(unsigned i=0; i<N; i++)
878  {
879  Declare2Words(p);
880  MultiplyWords(p, A[i], B);
881  Acc2WordsBy1(p, carry);
882  C[i] = LowWord(p);
883  carry = HighWord(p);
884  }
885  return carry;
886 }
887 
888 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
889 
890 #define Mul_2 \
891  Mul_Begin(2) \
892  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
893  Mul_End(1, 1)
894 
895 #define Mul_4 \
896  Mul_Begin(4) \
897  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
898  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
899  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
900  Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
901  Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
902  Mul_End(5, 3)
903 
904 #define Mul_8 \
905  Mul_Begin(8) \
906  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
907  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
908  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
909  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
910  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
911  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
912  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
913  Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
914  Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
915  Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
916  Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
917  Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
918  Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
919  Mul_End(13, 7)
920 
921 #define Mul_16 \
922  Mul_Begin(16) \
923  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
924  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
925  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
926  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
927  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
928  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
929  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
930  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
931  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
932  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
933  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
934  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
935  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
936  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
937  Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
938  Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
939  Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
940  Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
941  Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
942  Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
943  Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
944  Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
945  Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
946  Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
947  Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
948  Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
949  Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
950  Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
951  Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
952  Mul_End(29, 15)
953 
954 #define Squ_2 \
955  Squ_Begin(2) \
956  Squ_End(2)
957 
958 #define Squ_4 \
959  Squ_Begin(4) \
960  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
961  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
962  Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
963  Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
964  Squ_End(4)
965 
966 #define Squ_8 \
967  Squ_Begin(8) \
968  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
969  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
970  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
971  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
972  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
973  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
974  Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
975  Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
976  Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
977  Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
978  Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
979  Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
980  Squ_End(8)
981 
982 #define Squ_16 \
983  Squ_Begin(16) \
984  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
985  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
986  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
987  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
988  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
989  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
990  Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
991  Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
992  Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
993  Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
994  Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
995  Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
996  Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
997  Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
998  Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
999  Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1000  Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1001  Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1002  Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1003  Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1004  Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1005  Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1006  Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1007  Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1008  Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1009  Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1010  Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1011  Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1012  Squ_End(16)
1013 
1014 #define Bot_2 \
1015  Mul_Begin(2) \
1016  Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1017  Bot_End(2)
1018 
1019 #define Bot_4 \
1020  Mul_Begin(4) \
1021  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1022  Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1023  Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1024  Bot_End(4)
1025 
1026 #define Bot_8 \
1027  Mul_Begin(8) \
1028  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1029  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1030  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1031  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1032  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1033  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1034  Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1035  Bot_End(8)
1036 
1037 #define Bot_16 \
1038  Mul_Begin(16) \
1039  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1040  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1041  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1042  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1043  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1044  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1045  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1046  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1047  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1048  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1049  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1050  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1051  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1052  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1053  Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1054  Bot_End(16)
1055 
1056 #endif
1057 
1058 #if 0
1059 #define Mul_Begin(n) \
1060  Declare2Words(p) \
1061  Declare2Words(c) \
1062  Declare2Words(d) \
1063  MultiplyWords(p, A[0], B[0]) \
1064  AssignWord(c, LowWord(p)) \
1065  AssignWord(d, HighWord(p))
1066 
1067 #define Mul_Acc(i, j) \
1068  MultiplyWords(p, A[i], B[j]) \
1069  Acc2WordsBy1(c, LowWord(p)) \
1070  Acc2WordsBy1(d, HighWord(p))
1071 
1072 #define Mul_SaveAcc(k, i, j) \
1073  R[k] = LowWord(c); \
1074  Add2WordsBy1(c, d, HighWord(c)) \
1075  MultiplyWords(p, A[i], B[j]) \
1076  AssignWord(d, HighWord(p)) \
1077  Acc2WordsBy1(c, LowWord(p))
1078 
1079 #define Mul_End(n) \
1080  R[2*n-3] = LowWord(c); \
1081  Acc2WordsBy1(d, HighWord(c)) \
1082  MultiplyWords(p, A[n-1], B[n-1])\
1083  Acc2WordsBy2(d, p) \
1084  R[2*n-2] = LowWord(d); \
1085  R[2*n-1] = HighWord(d);
1086 
1087 #define Bot_SaveAcc(k, i, j) \
1088  R[k] = LowWord(c); \
1089  word e = LowWord(d) + HighWord(c); \
1090  e += A[i] * B[j];
1091 
1092 #define Bot_Acc(i, j) \
1093  e += A[i] * B[j];
1094 
1095 #define Bot_End(n) \
1096  R[n-1] = e;
1097 #else
1098 #define Mul_Begin(n) \
1099  Declare2Words(p) \
1100  word c; \
1101  Declare2Words(d) \
1102  MultiplyWords(p, A[0], B[0]) \
1103  c = LowWord(p); \
1104  AssignWord(d, HighWord(p))
1105 
1106 #define Mul_Acc(i, j) \
1107  MulAcc(c, d, A[i], B[j])
1108 
1109 #define Mul_SaveAcc(k, i, j) \
1110  R[k] = c; \
1111  c = LowWord(d); \
1112  AssignWord(d, HighWord(d)) \
1113  MulAcc(c, d, A[i], B[j])
1114 
1115 #define Mul_End(k, i) \
1116  R[k] = c; \
1117  MultiplyWords(p, A[i], B[i]) \
1118  Acc2WordsBy2(p, d) \
1119  R[k+1] = LowWord(p); \
1120  R[k+2] = HighWord(p);
1121 
1122 #define Bot_SaveAcc(k, i, j) \
1123  R[k] = c; \
1124  c = LowWord(d); \
1125  c += A[i] * B[j];
1126 
1127 #define Bot_Acc(i, j) \
1128  c += A[i] * B[j];
1129 
1130 #define Bot_End(n) \
1131  R[n-1] = c;
1132 #endif
1133 
1134 #define Squ_Begin(n) \
1135  Declare2Words(p) \
1136  word c; \
1137  Declare2Words(d) \
1138  Declare2Words(e) \
1139  MultiplyWords(p, A[0], A[0]) \
1140  R[0] = LowWord(p); \
1141  AssignWord(e, HighWord(p)) \
1142  MultiplyWords(p, A[0], A[1]) \
1143  c = LowWord(p); \
1144  AssignWord(d, HighWord(p)) \
1145  Squ_NonDiag \
1146 
1147 #define Squ_NonDiag \
1148  Double3Words(c, d)
1149 
1150 #define Squ_SaveAcc(k, i, j) \
1151  Acc3WordsBy2(c, d, e) \
1152  R[k] = c; \
1153  MultiplyWords(p, A[i], A[j]) \
1154  c = LowWord(p); \
1155  AssignWord(d, HighWord(p)) \
1156 
1157 #define Squ_Acc(i, j) \
1158  MulAcc(c, d, A[i], A[j])
1159 
1160 #define Squ_Diag(i) \
1161  Squ_NonDiag \
1162  MulAcc(c, d, A[i], A[i])
1163 
1164 #define Squ_End(n) \
1165  Acc3WordsBy2(c, d, e) \
1166  R[2*n-3] = c; \
1167  MultiplyWords(p, A[n-1], A[n-1])\
1168  Acc2WordsBy2(p, e) \
1169  R[2*n-2] = LowWord(p); \
1170  R[2*n-1] = HighWord(p);
1171 
1172 
1173 void Baseline_Multiply2(word *R, const word *A, const word *B)
1174 {
1175  Mul_2
1176 }
1177 
1178 void Baseline_Multiply4(word *R, const word *A, const word *B)
1179 {
1180  Mul_4
1181 }
1182 
1183 void Baseline_Multiply8(word *R, const word *A, const word *B)
1184 {
1185  Mul_8
1186 }
1187 
1188 void Baseline_Square2(word *R, const word *A)
1189 {
1190  Squ_2
1191 }
1192 
1193 void Baseline_Square4(word *R, const word *A)
1194 {
1195  Squ_4
1196 }
1197 
1198 void Baseline_Square8(word *R, const word *A)
1199 {
1200  Squ_8
1201 }
1202 
1203 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
1204 {
1205  Bot_2
1206 }
1207 
1208 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
1209 {
1210  Bot_4
1211 }
1212 
1213 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
1214 {
1215  Bot_8
1216 }
1217 
1218 #define Top_Begin(n) \
1219  Declare2Words(p) \
1220  word c; \
1221  Declare2Words(d) \
1222  MultiplyWords(p, A[0], B[n-2]);\
1223  AssignWord(d, HighWord(p));
1224 
1225 #define Top_Acc(i, j) \
1226  MultiplyWords(p, A[i], B[j]);\
1227  Acc2WordsBy1(d, HighWord(p));
1228 
1229 #define Top_SaveAcc0(i, j) \
1230  c = LowWord(d); \
1231  AssignWord(d, HighWord(d)) \
1232  MulAcc(c, d, A[i], B[j])
1233 
1234 #define Top_SaveAcc1(i, j) \
1235  c = L<c; \
1236  Acc2WordsBy1(d, c); \
1237  c = LowWord(d); \
1238  AssignWord(d, HighWord(d)) \
1239  MulAcc(c, d, A[i], B[j])
1240 
1241 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1242 {
1243  CRYPTOPP_UNUSED(L);
1244  word T[4];
1245  Baseline_Multiply2(T, A, B);
1246  R[0] = T[2];
1247  R[1] = T[3];
1248 }
1249 
1250 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
1251 {
1252  Top_Begin(4)
1253  Top_Acc(1, 1) Top_Acc(2, 0) \
1254  Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1255  Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1256  Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1257  Mul_End(1, 3)
1258 }
1259 
1260 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
1261 {
1262  Top_Begin(8)
1263  Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1264  Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1265  Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1266  Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1267  Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1268  Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1269  Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1270  Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1271  Mul_End(5, 7)
1272 }
1273 
1274 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1275 void Baseline_Multiply16(word *R, const word *A, const word *B)
1276 {
1277  Mul_16
1278 }
1279 
1280 void Baseline_Square16(word *R, const word *A)
1281 {
1282  Squ_16
1283 }
1284 
1285 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
1286 {
1287  Bot_16
1288 }
1289 
1290 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
1291 {
1292  Top_Begin(16)
1293  Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1294  Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1295  Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1296  Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1297  Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1298  Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1299  Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1300  Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1301  Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1302  Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1303  Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1304  Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1305  Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1306  Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1307  Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1308  Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1309  Mul_End(13, 15)
1310 }
1311 #endif
1312 
1313 // ********************************************************
1314 
1315 #if CRYPTOPP_INTEGER_SSE2
1316 
1317 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1318 
1319 #undef Mul_Begin
1320 #undef Mul_Acc
1321 #undef Top_Begin
1322 #undef Top_Acc
1323 #undef Squ_Acc
1324 #undef Squ_NonDiag
1325 #undef Squ_Diag
1326 #undef Squ_SaveAcc
1327 #undef Squ_Begin
1328 #undef Mul_SaveAcc
1329 #undef Bot_Acc
1330 #undef Bot_SaveAcc
1331 #undef Bot_End
1332 #undef Squ_End
1333 #undef Mul_End
1334 
1335 #define SSE2_FinalSave(k) \
1336  AS2( psllq xmm5, 16) \
1337  AS2( paddq xmm4, xmm5) \
1338  AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1339 
1340 #define SSE2_SaveShift(k) \
1341  AS2( movq xmm0, xmm6) \
1342  AS2( punpckhqdq xmm6, xmm0) \
1343  AS2( movq xmm1, xmm7) \
1344  AS2( punpckhqdq xmm7, xmm1) \
1345  AS2( paddd xmm6, xmm0) \
1346  AS2( pslldq xmm6, 4) \
1347  AS2( paddd xmm7, xmm1) \
1348  AS2( paddd xmm4, xmm6) \
1349  AS2( pslldq xmm7, 4) \
1350  AS2( movq xmm6, xmm4) \
1351  AS2( paddd xmm5, xmm7) \
1352  AS2( movq xmm7, xmm5) \
1353  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1354  AS2( psrlq xmm6, 16) \
1355  AS2( paddq xmm6, xmm7) \
1356  AS2( punpckhqdq xmm4, xmm0) \
1357  AS2( punpckhqdq xmm5, xmm0) \
1358  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1359  AS2( psrlq xmm6, 3*16) \
1360  AS2( paddd xmm4, xmm6) \
1361 
1362 #define Squ_SSE2_SaveShift(k) \
1363  AS2( movq xmm0, xmm6) \
1364  AS2( punpckhqdq xmm6, xmm0) \
1365  AS2( movq xmm1, xmm7) \
1366  AS2( punpckhqdq xmm7, xmm1) \
1367  AS2( paddd xmm6, xmm0) \
1368  AS2( pslldq xmm6, 4) \
1369  AS2( paddd xmm7, xmm1) \
1370  AS2( paddd xmm4, xmm6) \
1371  AS2( pslldq xmm7, 4) \
1372  AS2( movhlps xmm6, xmm4) \
1373  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1374  AS2( paddd xmm5, xmm7) \
1375  AS2( movhps QWORD PTR [esp+12], xmm5)\
1376  AS2( psrlq xmm4, 16) \
1377  AS2( paddq xmm4, xmm5) \
1378  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1379  AS2( psrlq xmm4, 3*16) \
1380  AS2( paddd xmm4, xmm6) \
1381  AS2( movq QWORD PTR [esp+4], xmm4)\
1382 
1383 #define SSE2_FirstMultiply(i) \
1384  AS2( movdqa xmm7, [esi+(i)*16])\
1385  AS2( movdqa xmm5, [edi-(i)*16])\
1386  AS2( pmuludq xmm5, xmm7) \
1387  AS2( movdqa xmm4, [ebx])\
1388  AS2( movdqa xmm6, xmm4) \
1389  AS2( pand xmm4, xmm5) \
1390  AS2( psrld xmm5, 16) \
1391  AS2( pmuludq xmm7, [edx-(i)*16])\
1392  AS2( pand xmm6, xmm7) \
1393  AS2( psrld xmm7, 16)
1394 
1395 #define Squ_Begin(n) \
1396  SquPrologue \
1397  AS2( mov esi, esp)\
1398  AS2( and esp, 0xfffffff0)\
1399  AS2( lea edi, [esp-32*n])\
1400  AS2( sub esp, 32*n+16)\
1401  AS1( push esi)\
1402  AS2( mov esi, edi) \
1403  AS2( xor edx, edx) \
1404  ASL(1) \
1405  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1406  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1407  AS2( movdqa [edi+2*edx], xmm0) \
1408  AS2( psrlq xmm0, 32) \
1409  AS2( movdqa [edi+2*edx+16], xmm0) \
1410  AS2( movdqa [edi+16*n+2*edx], xmm1) \
1411  AS2( psrlq xmm1, 32) \
1412  AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1413  AS2( add edx, 16) \
1414  AS2( cmp edx, 8*(n)) \
1415  ASJ( jne, 1, b) \
1416  AS2( lea edx, [edi+16*n])\
1417  SSE2_FirstMultiply(0) \
1418 
1419 #define Squ_Acc(i) \
1420  ASL(LSqu##i) \
1421  AS2( movdqa xmm1, [esi+(i)*16]) \
1422  AS2( movdqa xmm0, [edi-(i)*16]) \
1423  AS2( movdqa xmm2, [ebx]) \
1424  AS2( pmuludq xmm0, xmm1) \
1425  AS2( pmuludq xmm1, [edx-(i)*16]) \
1426  AS2( movdqa xmm3, xmm2) \
1427  AS2( pand xmm2, xmm0) \
1428  AS2( psrld xmm0, 16) \
1429  AS2( paddd xmm4, xmm2) \
1430  AS2( paddd xmm5, xmm0) \
1431  AS2( pand xmm3, xmm1) \
1432  AS2( psrld xmm1, 16) \
1433  AS2( paddd xmm6, xmm3) \
1434  AS2( paddd xmm7, xmm1) \
1435 
1436 #define Squ_Acc1(i)
1437 #define Squ_Acc2(i) ASC(call, LSqu##i)
1438 #define Squ_Acc3(i) Squ_Acc2(i)
1439 #define Squ_Acc4(i) Squ_Acc2(i)
1440 #define Squ_Acc5(i) Squ_Acc2(i)
1441 #define Squ_Acc6(i) Squ_Acc2(i)
1442 #define Squ_Acc7(i) Squ_Acc2(i)
1443 #define Squ_Acc8(i) Squ_Acc2(i)
1444 
1445 #define SSE2_End(E, n) \
1446  SSE2_SaveShift(2*(n)-3) \
1447  AS2( movdqa xmm7, [esi+16]) \
1448  AS2( movdqa xmm0, [edi]) \
1449  AS2( pmuludq xmm0, xmm7) \
1450  AS2( movdqa xmm2, [ebx]) \
1451  AS2( pmuludq xmm7, [edx]) \
1452  AS2( movdqa xmm6, xmm2) \
1453  AS2( pand xmm2, xmm0) \
1454  AS2( psrld xmm0, 16) \
1455  AS2( paddd xmm4, xmm2) \
1456  AS2( paddd xmm5, xmm0) \
1457  AS2( pand xmm6, xmm7) \
1458  AS2( psrld xmm7, 16) \
1459  SSE2_SaveShift(2*(n)-2) \
1460  SSE2_FinalSave(2*(n)-1) \
1461  AS1( pop esp)\
1462  E
1463 
1464 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1465 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1466 #define Top_End(n) SSE2_End(TopEpilogue, n)
1467 
1468 #define Squ_Column1(k, i) \
1469  Squ_SSE2_SaveShift(k) \
1470  AS2( add esi, 16) \
1471  SSE2_FirstMultiply(1)\
1472  Squ_Acc##i(i) \
1473  AS2( paddd xmm4, xmm4) \
1474  AS2( paddd xmm5, xmm5) \
1475  AS2( movdqa xmm3, [esi]) \
1476  AS2( movq xmm1, QWORD PTR [esi+8]) \
1477  AS2( pmuludq xmm1, xmm3) \
1478  AS2( pmuludq xmm3, xmm3) \
1479  AS2( movdqa xmm0, [ebx])\
1480  AS2( movdqa xmm2, xmm0) \
1481  AS2( pand xmm0, xmm1) \
1482  AS2( psrld xmm1, 16) \
1483  AS2( paddd xmm6, xmm0) \
1484  AS2( paddd xmm7, xmm1) \
1485  AS2( pand xmm2, xmm3) \
1486  AS2( psrld xmm3, 16) \
1487  AS2( paddd xmm6, xmm6) \
1488  AS2( paddd xmm7, xmm7) \
1489  AS2( paddd xmm4, xmm2) \
1490  AS2( paddd xmm5, xmm3) \
1491  AS2( movq xmm0, QWORD PTR [esp+4])\
1492  AS2( movq xmm1, QWORD PTR [esp+12])\
1493  AS2( paddd xmm4, xmm0)\
1494  AS2( paddd xmm5, xmm1)\
1495 
1496 #define Squ_Column0(k, i) \
1497  Squ_SSE2_SaveShift(k) \
1498  AS2( add edi, 16) \
1499  AS2( add edx, 16) \
1500  SSE2_FirstMultiply(1)\
1501  Squ_Acc##i(i) \
1502  AS2( paddd xmm6, xmm6) \
1503  AS2( paddd xmm7, xmm7) \
1504  AS2( paddd xmm4, xmm4) \
1505  AS2( paddd xmm5, xmm5) \
1506  AS2( movq xmm0, QWORD PTR [esp+4])\
1507  AS2( movq xmm1, QWORD PTR [esp+12])\
1508  AS2( paddd xmm4, xmm0)\
1509  AS2( paddd xmm5, xmm1)\
1510 
1511 #define SSE2_MulAdd45 \
1512  AS2( movdqa xmm7, [esi]) \
1513  AS2( movdqa xmm0, [edi]) \
1514  AS2( pmuludq xmm0, xmm7) \
1515  AS2( movdqa xmm2, [ebx]) \
1516  AS2( pmuludq xmm7, [edx]) \
1517  AS2( movdqa xmm6, xmm2) \
1518  AS2( pand xmm2, xmm0) \
1519  AS2( psrld xmm0, 16) \
1520  AS2( paddd xmm4, xmm2) \
1521  AS2( paddd xmm5, xmm0) \
1522  AS2( pand xmm6, xmm7) \
1523  AS2( psrld xmm7, 16)
1524 
1525 #define Mul_Begin(n) \
1526  MulPrologue \
1527  AS2( mov esi, esp)\
1528  AS2( and esp, 0xfffffff0)\
1529  AS2( sub esp, 48*n+16)\
1530  AS1( push esi)\
1531  AS2( xor edx, edx) \
1532  ASL(1) \
1533  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1534  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1535  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1536  AS2( movdqa [esp+20+2*edx], xmm0) \
1537  AS2( psrlq xmm0, 32) \
1538  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1539  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1540  AS2( psrlq xmm1, 32) \
1541  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1542  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1543  AS2( psrlq xmm2, 32) \
1544  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1545  AS2( add edx, 16) \
1546  AS2( cmp edx, 8*(n)) \
1547  ASJ( jne, 1, b) \
1548  AS2( lea edi, [esp+20])\
1549  AS2( lea edx, [esp+20+16*n])\
1550  AS2( lea esi, [esp+20+32*n])\
1551  SSE2_FirstMultiply(0) \
1552 
1553 #define Mul_Acc(i) \
1554  ASL(LMul##i) \
1555  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1556  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1557  AS2( movdqa xmm2, [ebx]) \
1558  AS2( pmuludq xmm0, xmm1) \
1559  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1560  AS2( movdqa xmm3, xmm2) \
1561  AS2( pand xmm2, xmm0) \
1562  AS2( psrld xmm0, 16) \
1563  AS2( paddd xmm4, xmm2) \
1564  AS2( paddd xmm5, xmm0) \
1565  AS2( pand xmm3, xmm1) \
1566  AS2( psrld xmm1, 16) \
1567  AS2( paddd xmm6, xmm3) \
1568  AS2( paddd xmm7, xmm1) \
1569 
1570 #define Mul_Acc1(i)
1571 #define Mul_Acc2(i) ASC(call, LMul##i)
1572 #define Mul_Acc3(i) Mul_Acc2(i)
1573 #define Mul_Acc4(i) Mul_Acc2(i)
1574 #define Mul_Acc5(i) Mul_Acc2(i)
1575 #define Mul_Acc6(i) Mul_Acc2(i)
1576 #define Mul_Acc7(i) Mul_Acc2(i)
1577 #define Mul_Acc8(i) Mul_Acc2(i)
1578 #define Mul_Acc9(i) Mul_Acc2(i)
1579 #define Mul_Acc10(i) Mul_Acc2(i)
1580 #define Mul_Acc11(i) Mul_Acc2(i)
1581 #define Mul_Acc12(i) Mul_Acc2(i)
1582 #define Mul_Acc13(i) Mul_Acc2(i)
1583 #define Mul_Acc14(i) Mul_Acc2(i)
1584 #define Mul_Acc15(i) Mul_Acc2(i)
1585 #define Mul_Acc16(i) Mul_Acc2(i)
1586 
1587 #define Mul_Column1(k, i) \
1588  SSE2_SaveShift(k) \
1589  AS2( add esi, 16) \
1590  SSE2_MulAdd45\
1591  Mul_Acc##i(i) \
1592 
1593 #define Mul_Column0(k, i) \
1594  SSE2_SaveShift(k) \
1595  AS2( add edi, 16) \
1596  AS2( add edx, 16) \
1597  SSE2_MulAdd45\
1598  Mul_Acc##i(i) \
1599 
1600 #define Bot_Acc(i) \
1601  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1602  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1603  AS2( pmuludq xmm0, xmm1) \
1604  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1605  AS2( paddq xmm4, xmm0) \
1606  AS2( paddd xmm6, xmm1)
1607 
1608 #define Bot_SaveAcc(k) \
1609  SSE2_SaveShift(k) \
1610  AS2( add edi, 16) \
1611  AS2( add edx, 16) \
1612  AS2( movdqa xmm6, [esi]) \
1613  AS2( movdqa xmm0, [edi]) \
1614  AS2( pmuludq xmm0, xmm6) \
1615  AS2( paddq xmm4, xmm0) \
1616  AS2( psllq xmm5, 16) \
1617  AS2( paddq xmm4, xmm5) \
1618  AS2( pmuludq xmm6, [edx])
1619 
1620 #define Bot_End(n) \
1621  AS2( movhlps xmm7, xmm6) \
1622  AS2( paddd xmm6, xmm7) \
1623  AS2( psllq xmm6, 32) \
1624  AS2( paddd xmm4, xmm6) \
1625  AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1626  AS1( pop esp)\
1627  MulEpilogue
1628 
1629 #define Top_Begin(n) \
1630  TopPrologue \
1631  AS2( mov edx, esp)\
1632  AS2( and esp, 0xfffffff0)\
1633  AS2( sub esp, 48*n+16)\
1634  AS1( push edx)\
1635  AS2( xor edx, edx) \
1636  ASL(1) \
1637  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1638  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1639  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1640  AS2( movdqa [esp+20+2*edx], xmm0) \
1641  AS2( psrlq xmm0, 32) \
1642  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1643  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1644  AS2( psrlq xmm1, 32) \
1645  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1646  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1647  AS2( psrlq xmm2, 32) \
1648  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1649  AS2( add edx, 16) \
1650  AS2( cmp edx, 8*(n)) \
1651  ASJ( jne, 1, b) \
1652  AS2( mov eax, esi) \
1653  AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1654  AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1655  AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1656  AS2( pxor xmm4, xmm4)\
1657  AS2( pxor xmm5, xmm5)
1658 
1659 #define Top_Acc(i) \
1660  AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1661  AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1662  AS2( psrlq xmm0, 48) \
1663  AS2( paddd xmm5, xmm0)\
1664 
1665 #define Top_Column0(i) \
1666  AS2( psllq xmm5, 32) \
1667  AS2( add edi, 16) \
1668  AS2( add edx, 16) \
1669  SSE2_MulAdd45\
1670  Mul_Acc##i(i) \
1671 
1672 #define Top_Column1(i) \
1673  SSE2_SaveShift(0) \
1674  AS2( add esi, 16) \
1675  SSE2_MulAdd45\
1676  Mul_Acc##i(i) \
1677  AS2( shr eax, 16) \
1678  AS2( movd xmm0, eax)\
1679  AS2( movd xmm1, [ecx+4])\
1680  AS2( psrld xmm1, 16)\
1681  AS2( pcmpgtd xmm1, xmm0)\
1682  AS2( psrld xmm1, 31)\
1683  AS2( paddd xmm4, xmm1)\
1684 
1685 void SSE2_Square4(word *C, const word *A)
1686 {
1687  Squ_Begin(2)
1688  Squ_Column0(0, 1)
1689  Squ_End(2)
1690 }
1691 
1692 void SSE2_Square8(word *C, const word *A)
1693 {
1694  Squ_Begin(4)
1695 #ifndef __GNUC__
1696  ASJ( jmp, 0, f)
1697  Squ_Acc(2)
1698  AS1( ret) ASL(0)
1699 #endif
1700  Squ_Column0(0, 1)
1701  Squ_Column1(1, 1)
1702  Squ_Column0(2, 2)
1703  Squ_Column1(3, 1)
1704  Squ_Column0(4, 1)
1705  Squ_End(4)
1706 }
1707 
1708 void SSE2_Square16(word *C, const word *A)
1709 {
1710  Squ_Begin(8)
1711 #ifndef __GNUC__
1712  ASJ( jmp, 0, f)
1713  Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1714  AS1( ret) ASL(0)
1715 #endif
1716  Squ_Column0(0, 1)
1717  Squ_Column1(1, 1)
1718  Squ_Column0(2, 2)
1719  Squ_Column1(3, 2)
1720  Squ_Column0(4, 3)
1721  Squ_Column1(5, 3)
1722  Squ_Column0(6, 4)
1723  Squ_Column1(7, 3)
1724  Squ_Column0(8, 3)
1725  Squ_Column1(9, 2)
1726  Squ_Column0(10, 2)
1727  Squ_Column1(11, 1)
1728  Squ_Column0(12, 1)
1729  Squ_End(8)
1730 }
1731 
1732 void SSE2_Square32(word *C, const word *A)
1733 {
1734  Squ_Begin(16)
1735  ASJ( jmp, 0, f)
1736  Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1737  AS1( ret) ASL(0)
1738  Squ_Column0(0, 1)
1739  Squ_Column1(1, 1)
1740  Squ_Column0(2, 2)
1741  Squ_Column1(3, 2)
1742  Squ_Column0(4, 3)
1743  Squ_Column1(5, 3)
1744  Squ_Column0(6, 4)
1745  Squ_Column1(7, 4)
1746  Squ_Column0(8, 5)
1747  Squ_Column1(9, 5)
1748  Squ_Column0(10, 6)
1749  Squ_Column1(11, 6)
1750  Squ_Column0(12, 7)
1751  Squ_Column1(13, 7)
1752  Squ_Column0(14, 8)
1753  Squ_Column1(15, 7)
1754  Squ_Column0(16, 7)
1755  Squ_Column1(17, 6)
1756  Squ_Column0(18, 6)
1757  Squ_Column1(19, 5)
1758  Squ_Column0(20, 5)
1759  Squ_Column1(21, 4)
1760  Squ_Column0(22, 4)
1761  Squ_Column1(23, 3)
1762  Squ_Column0(24, 3)
1763  Squ_Column1(25, 2)
1764  Squ_Column0(26, 2)
1765  Squ_Column1(27, 1)
1766  Squ_Column0(28, 1)
1767  Squ_End(16)
1768 }
1769 
1770 void SSE2_Multiply4(word *C, const word *A, const word *B)
1771 {
1772  Mul_Begin(2)
1773 #ifndef __GNUC__
1774  ASJ( jmp, 0, f)
1775  Mul_Acc(2)
1776  AS1( ret) ASL(0)
1777 #endif
1778  Mul_Column0(0, 2)
1779  Mul_End(2)
1780 }
1781 
1782 void SSE2_Multiply8(word *C, const word *A, const word *B)
1783 {
1784  Mul_Begin(4)
1785 #ifndef __GNUC__
1786  ASJ( jmp, 0, f)
1787  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1788  AS1( ret) ASL(0)
1789 #endif
1790  Mul_Column0(0, 2)
1791  Mul_Column1(1, 3)
1792  Mul_Column0(2, 4)
1793  Mul_Column1(3, 3)
1794  Mul_Column0(4, 2)
1795  Mul_End(4)
1796 }
1797 
1798 void SSE2_Multiply16(word *C, const word *A, const word *B)
1799 {
1800  Mul_Begin(8)
1801 #ifndef __GNUC__
1802  ASJ( jmp, 0, f)
1803  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1804  AS1( ret) ASL(0)
1805 #endif
1806  Mul_Column0(0, 2)
1807  Mul_Column1(1, 3)
1808  Mul_Column0(2, 4)
1809  Mul_Column1(3, 5)
1810  Mul_Column0(4, 6)
1811  Mul_Column1(5, 7)
1812  Mul_Column0(6, 8)
1813  Mul_Column1(7, 7)
1814  Mul_Column0(8, 6)
1815  Mul_Column1(9, 5)
1816  Mul_Column0(10, 4)
1817  Mul_Column1(11, 3)
1818  Mul_Column0(12, 2)
1819  Mul_End(8)
1820 }
1821 
1822 void SSE2_Multiply32(word *C, const word *A, const word *B)
1823 {
1824  Mul_Begin(16)
1825  ASJ( jmp, 0, f)
1826  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1827  AS1( ret) ASL(0)
1828  Mul_Column0(0, 2)
1829  Mul_Column1(1, 3)
1830  Mul_Column0(2, 4)
1831  Mul_Column1(3, 5)
1832  Mul_Column0(4, 6)
1833  Mul_Column1(5, 7)
1834  Mul_Column0(6, 8)
1835  Mul_Column1(7, 9)
1836  Mul_Column0(8, 10)
1837  Mul_Column1(9, 11)
1838  Mul_Column0(10, 12)
1839  Mul_Column1(11, 13)
1840  Mul_Column0(12, 14)
1841  Mul_Column1(13, 15)
1842  Mul_Column0(14, 16)
1843  Mul_Column1(15, 15)
1844  Mul_Column0(16, 14)
1845  Mul_Column1(17, 13)
1846  Mul_Column0(18, 12)
1847  Mul_Column1(19, 11)
1848  Mul_Column0(20, 10)
1849  Mul_Column1(21, 9)
1850  Mul_Column0(22, 8)
1851  Mul_Column1(23, 7)
1852  Mul_Column0(24, 6)
1853  Mul_Column1(25, 5)
1854  Mul_Column0(26, 4)
1855  Mul_Column1(27, 3)
1856  Mul_Column0(28, 2)
1857  Mul_End(16)
1858 }
1859 
1860 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
1861 {
1862  Mul_Begin(2)
1863  Bot_SaveAcc(0) Bot_Acc(2)
1864  Bot_End(2)
1865 }
1866 
1867 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
1868 {
1869  Mul_Begin(4)
1870 #ifndef __GNUC__
1871  ASJ( jmp, 0, f)
1872  Mul_Acc(3) Mul_Acc(2)
1873  AS1( ret) ASL(0)
1874 #endif
1875  Mul_Column0(0, 2)
1876  Mul_Column1(1, 3)
1877  Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1878  Bot_End(4)
1879 }
1880 
1881 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
1882 {
1883  Mul_Begin(8)
1884 #ifndef __GNUC__
1885  ASJ( jmp, 0, f)
1886  Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1887  AS1( ret) ASL(0)
1888 #endif
1889  Mul_Column0(0, 2)
1890  Mul_Column1(1, 3)
1891  Mul_Column0(2, 4)
1892  Mul_Column1(3, 5)
1893  Mul_Column0(4, 6)
1894  Mul_Column1(5, 7)
1895  Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1896  Bot_End(8)
1897 }
1898 
1899 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
1900 {
1901  Mul_Begin(16)
1902 #ifndef __GNUC__
1903  ASJ( jmp, 0, f)
1904  Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1905  AS1( ret) ASL(0)
1906 #endif
1907  Mul_Column0(0, 2)
1908  Mul_Column1(1, 3)
1909  Mul_Column0(2, 4)
1910  Mul_Column1(3, 5)
1911  Mul_Column0(4, 6)
1912  Mul_Column1(5, 7)
1913  Mul_Column0(6, 8)
1914  Mul_Column1(7, 9)
1915  Mul_Column0(8, 10)
1916  Mul_Column1(9, 11)
1917  Mul_Column0(10, 12)
1918  Mul_Column1(11, 13)
1919  Mul_Column0(12, 14)
1920  Mul_Column1(13, 15)
1921  Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1922  Bot_End(16)
1923 }
1924 
1925 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
1926 {
1927  Top_Begin(4)
1928  Top_Acc(3) Top_Acc(2) Top_Acc(1)
1929 #ifndef __GNUC__
1930  ASJ( jmp, 0, f)
1931  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1932  AS1( ret) ASL(0)
1933 #endif
1934  Top_Column0(4)
1935  Top_Column1(3)
1936  Mul_Column0(0, 2)
1937  Top_End(2)
1938 }
1939 
1940 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
1941 {
1942  Top_Begin(8)
1943  Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
1944 #ifndef __GNUC__
1945  ASJ( jmp, 0, f)
1946  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1947  AS1( ret) ASL(0)
1948 #endif
1949  Top_Column0(8)
1950  Top_Column1(7)
1951  Mul_Column0(0, 6)
1952  Mul_Column1(1, 5)
1953  Mul_Column0(2, 4)
1954  Mul_Column1(3, 3)
1955  Mul_Column0(4, 2)
1956  Top_End(4)
1957 }
1958 
1959 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
1960 {
1961  Top_Begin(16)
1962  Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
1963 #ifndef __GNUC__
1964  ASJ( jmp, 0, f)
1965  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1966  AS1( ret) ASL(0)
1967 #endif
1968  Top_Column0(16)
1969  Top_Column1(15)
1970  Mul_Column0(0, 14)
1971  Mul_Column1(1, 13)
1972  Mul_Column0(2, 12)
1973  Mul_Column1(3, 11)
1974  Mul_Column0(4, 10)
1975  Mul_Column1(5, 9)
1976  Mul_Column0(6, 8)
1977  Mul_Column1(7, 7)
1978  Mul_Column0(8, 6)
1979  Mul_Column1(9, 5)
1980  Mul_Column0(10, 4)
1981  Mul_Column1(11, 3)
1982  Mul_Column0(12, 2)
1983  Top_End(8)
1984 }
1985 
1986 #endif // #if CRYPTOPP_INTEGER_SSE2
1987 
1988 // ********************************************************
1989 
1990 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
1991 typedef void (* PMul)(word *C, const word *A, const word *B);
1992 typedef void (* PSqu)(word *C, const word *A);
1993 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
1994 
1995 #if CRYPTOPP_INTEGER_SSE2
1996 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
1997 static size_t s_recursionLimit = 8;
1998 #else
1999 static const size_t s_recursionLimit = 16;
2000 #endif
2001 
2002 static PMul s_pMul[9], s_pBot[9];
2003 static PSqu s_pSqu[9];
2004 static PMulTop s_pTop[9];
2005 
2006 static void SetFunctionPointers()
2007 {
2008  s_pMul[0] = &Baseline_Multiply2;
2009  s_pBot[0] = &Baseline_MultiplyBottom2;
2010  s_pSqu[0] = &Baseline_Square2;
2011  s_pTop[0] = &Baseline_MultiplyTop2;
2012  s_pTop[1] = &Baseline_MultiplyTop4;
2013 
2014 #if CRYPTOPP_INTEGER_SSE2
2015  if (HasSSE2())
2016  {
2017 #if _MSC_VER != 1200 || defined(NDEBUG)
2018  if (IsP4())
2019  {
2020  s_pAdd = &SSE2_Add;
2021  s_pSub = &SSE2_Sub;
2022  }
2023 #endif
2024 
2025  s_recursionLimit = 32;
2026 
2027  s_pMul[1] = &SSE2_Multiply4;
2028  s_pMul[2] = &SSE2_Multiply8;
2029  s_pMul[4] = &SSE2_Multiply16;
2030  s_pMul[8] = &SSE2_Multiply32;
2031 
2032  s_pBot[1] = &SSE2_MultiplyBottom4;
2033  s_pBot[2] = &SSE2_MultiplyBottom8;
2034  s_pBot[4] = &SSE2_MultiplyBottom16;
2035  s_pBot[8] = &SSE2_MultiplyBottom32;
2036 
2037  s_pSqu[1] = &SSE2_Square4;
2038  s_pSqu[2] = &SSE2_Square8;
2039  s_pSqu[4] = &SSE2_Square16;
2040  s_pSqu[8] = &SSE2_Square32;
2041 
2042  s_pTop[2] = &SSE2_MultiplyTop8;
2043  s_pTop[4] = &SSE2_MultiplyTop16;
2044  s_pTop[8] = &SSE2_MultiplyTop32;
2045  }
2046  else
2047 #endif
2048  {
2049  s_pMul[1] = &Baseline_Multiply4;
2050  s_pMul[2] = &Baseline_Multiply8;
2051 
2052  s_pBot[1] = &Baseline_MultiplyBottom4;
2053  s_pBot[2] = &Baseline_MultiplyBottom8;
2054 
2055  s_pSqu[1] = &Baseline_Square4;
2056  s_pSqu[2] = &Baseline_Square8;
2057 
2058  s_pTop[2] = &Baseline_MultiplyTop8;
2059 
2060 #if !CRYPTOPP_INTEGER_SSE2
2061  s_pMul[4] = &Baseline_Multiply16;
2062  s_pBot[4] = &Baseline_MultiplyBottom16;
2063  s_pSqu[4] = &Baseline_Square16;
2064  s_pTop[4] = &Baseline_MultiplyTop16;
2065 #endif
2066  }
2067 }
2068 
2069 inline int Add(word *C, const word *A, const word *B, size_t N)
2070 {
2071 #if CRYPTOPP_INTEGER_SSE2
2072  return s_pAdd(N, C, A, B);
2073 #else
2074  return Baseline_Add(N, C, A, B);
2075 #endif
2076 }
2077 
2078 inline int Subtract(word *C, const word *A, const word *B, size_t N)
2079 {
2080 #if CRYPTOPP_INTEGER_SSE2
2081  return s_pSub(N, C, A, B);
2082 #else
2083  return Baseline_Sub(N, C, A, B);
2084 #endif
2085 }
2086 
2087 // ********************************************************
2088 
2089 
2090 #define A0 A
2091 #define A1 (A+N2)
2092 #define B0 B
2093 #define B1 (B+N2)
2094 
2095 #define T0 T
2096 #define T1 (T+N2)
2097 #define T2 (T+N)
2098 #define T3 (T+N+N2)
2099 
2100 #define R0 R
2101 #define R1 (R+N2)
2102 #define R2 (R+N)
2103 #define R3 (R+N+N2)
2104 
2105 // R[2*N] - result = A*B
2106 // T[2*N] - temporary work space
2107 // A[N] --- multiplier
2108 // B[N] --- multiplicant
2109 
2110 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2111 {
2112  assert(N>=2 && N%2==0);
2113 
2114  if (N <= s_recursionLimit)
2115  s_pMul[N/4](R, A, B);
2116  else
2117  {
2118  const size_t N2 = N/2;
2119 
2120  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2121  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2122 
2123  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2124  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2125 
2126  RecursiveMultiply(R2, T2, A1, B1, N2);
2127  RecursiveMultiply(T0, T2, R0, R1, N2);
2128  RecursiveMultiply(R0, T2, A0, B0, N2);
2129 
2130  // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2131 
2132  int c2 = Add(R2, R2, R1, N2);
2133  int c3 = c2;
2134  c2 += Add(R1, R2, R0, N2);
2135  c3 += Add(R2, R2, R3, N2);
2136 
2137  if (AN2 == BN2)
2138  c3 -= Subtract(R1, R1, T0, N);
2139  else
2140  c3 += Add(R1, R1, T0, N);
2141 
2142  c3 += Increment(R2, N2, c2);
2143  assert (c3 >= 0 && c3 <= 2);
2144  Increment(R3, N2, c3);
2145  }
2146 }
2147 
2148 // R[2*N] - result = A*A
2149 // T[2*N] - temporary work space
2150 // A[N] --- number to be squared
2151 
2152 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2153 {
2154  assert(N && N%2==0);
2155 
2156  if (N <= s_recursionLimit)
2157  s_pSqu[N/4](R, A);
2158  else
2159  {
2160  const size_t N2 = N/2;
2161 
2162  RecursiveSquare(R0, T2, A0, N2);
2163  RecursiveSquare(R2, T2, A1, N2);
2164  RecursiveMultiply(T0, T2, A0, A1, N2);
2165 
2166  int carry = Add(R1, R1, T0, N);
2167  carry += Add(R1, R1, T0, N);
2168  Increment(R3, N2, carry);
2169  }
2170 }
2171 
2172 // R[N] - bottom half of A*B
2173 // T[3*N/2] - temporary work space
2174 // A[N] - multiplier
2175 // B[N] - multiplicant
2176 
2177 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2178 {
2179  assert(N>=2 && N%2==0);
2180 
2181  if (N <= s_recursionLimit)
2182  s_pBot[N/4](R, A, B);
2183  else
2184  {
2185  const size_t N2 = N/2;
2186 
2187  RecursiveMultiply(R, T, A0, B0, N2);
2188  RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2189  Add(R1, R1, T0, N2);
2190  RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2191  Add(R1, R1, T0, N2);
2192  }
2193 }
2194 
2195 // R[N] --- upper half of A*B
2196 // T[2*N] - temporary work space
2197 // L[N] --- lower half of A*B
2198 // A[N] --- multiplier
2199 // B[N] --- multiplicant
2200 
2201 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2202 {
2203  assert(N>=2 && N%2==0);
2204 
2205  if (N <= s_recursionLimit)
2206  s_pTop[N/4](R, A, B, L[N-1]);
2207  else
2208  {
2209  const size_t N2 = N/2;
2210 
2211  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2212  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2213 
2214  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2215  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2216 
2217  RecursiveMultiply(T0, T2, R0, R1, N2);
2218  RecursiveMultiply(R0, T2, A1, B1, N2);
2219 
2220  // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2221 
2222  int t, c3;
2223  int c2 = Subtract(T2, L+N2, L, N2);
2224 
2225  if (AN2 == BN2)
2226  {
2227  c2 -= Add(T2, T2, T0, N2);
2228  t = (Compare(T2, R0, N2) == -1);
2229  c3 = t - Subtract(T2, T2, T1, N2);
2230  }
2231  else
2232  {
2233  c2 += Subtract(T2, T2, T0, N2);
2234  t = (Compare(T2, R0, N2) == -1);
2235  c3 = t + Add(T2, T2, T1, N2);
2236  }
2237 
2238  c2 += t;
2239  if (c2 >= 0)
2240  c3 += Increment(T2, N2, c2);
2241  else
2242  c3 -= Decrement(T2, N2, -c2);
2243  c3 += Add(R0, T2, R1, N2);
2244 
2245  assert (c3 >= 0 && c3 <= 2);
2246  Increment(R1, N2, c3);
2247  }
2248 }
2249 
2250 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2251 {
2252  RecursiveMultiply(R, T, A, B, N);
2253 }
2254 
2255 inline void Square(word *R, word *T, const word *A, size_t N)
2256 {
2257  RecursiveSquare(R, T, A, N);
2258 }
2259 
2260 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2261 {
2262  RecursiveMultiplyBottom(R, T, A, B, N);
2263 }
2264 
2265 // R[NA+NB] - result = A*B
2266 // T[NA+NB] - temporary work space
2267 // A[NA] ---- multiplier
2268 // B[NB] ---- multiplicant
2269 
2270 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2271 {
2272  if (NA == NB)
2273  {
2274  if (A == B)
2275  Square(R, T, A, NA);
2276  else
2277  Multiply(R, T, A, B, NA);
2278 
2279  return;
2280  }
2281 
2282  if (NA > NB)
2283  {
2284  std::swap(A, B);
2285  std::swap(NA, NB);
2286  }
2287 
2288  assert(NB % NA == 0);
2289 
2290  if (NA==2 && !A[1])
2291  {
2292  switch (A[0])
2293  {
2294  case 0:
2295  SetWords(R, 0, NB+2);
2296  return;
2297  case 1:
2298  CopyWords(R, B, NB);
2299  R[NB] = R[NB+1] = 0;
2300  return;
2301  default:
2302  R[NB] = LinearMultiply(R, B, A[0], NB);
2303  R[NB+1] = 0;
2304  return;
2305  }
2306  }
2307 
2308  size_t i;
2309  if ((NB/NA)%2 == 0)
2310  {
2311  Multiply(R, T, A, B, NA);
2312  CopyWords(T+2*NA, R+NA, NA);
2313 
2314  for (i=2*NA; i<NB; i+=2*NA)
2315  Multiply(T+NA+i, T, A, B+i, NA);
2316  for (i=NA; i<NB; i+=2*NA)
2317  Multiply(R+i, T, A, B+i, NA);
2318  }
2319  else
2320  {
2321  for (i=0; i<NB; i+=2*NA)
2322  Multiply(R+i, T, A, B+i, NA);
2323  for (i=NA; i<NB; i+=2*NA)
2324  Multiply(T+NA+i, T, A, B+i, NA);
2325  }
2326 
2327  if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2328  Increment(R+NB, NA);
2329 }
2330 
2331 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2332 // T[3*N/2] - temporary work space
2333 // A[N] ----- an odd number as input
2334 
2335 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2336 {
2337  if (N==2)
2338  {
2339  T[0] = AtomicInverseModPower2(A[0]);
2340  T[1] = 0;
2341  s_pBot[0](T+2, T, A);
2342  TwosComplement(T+2, 2);
2343  Increment(T+2, 2, 2);
2344  s_pBot[0](R, T, T+2);
2345  }
2346  else
2347  {
2348  const size_t N2 = N/2;
2349  RecursiveInverseModPower2(R0, T0, A0, N2);
2350  T0[0] = 1;
2351  SetWords(T0+1, 0, N2-1);
2352  MultiplyTop(R1, T1, T0, R0, A0, N2);
2353  MultiplyBottom(T0, T1, R0, A1, N2);
2354  Add(T0, R1, T0, N2);
2355  TwosComplement(T0, N2);
2356  MultiplyBottom(R1, T1, R0, T0, N2);
2357  }
2358 }
2359 
2360 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2361 // T[3*N] - temporary work space
2362 // X[2*N] - number to be reduced
2363 // M[N] --- modulus
2364 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2365 
2366 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2367 {
2368 #if 1
2369  MultiplyBottom(R, T, X, U, N);
2370  MultiplyTop(T, T+N, X, R, M, N);
2371  word borrow = Subtract(T, X+N, T, N);
2372  // defend against timing attack by doing this Add even when not needed
2373  word carry = Add(T+N, T, M, N);
2374  assert(carry | !borrow);
2375  CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2376  CopyWords(R, T + ((0-borrow) & N), N);
2377 #elif 0
2378  const word u = 0-U[0];
2379  Declare2Words(p)
2380  for (size_t i=0; i<N; i++)
2381  {
2382  const word t = u * X[i];
2383  word c = 0;
2384  for (size_t j=0; j<N; j+=2)
2385  {
2386  MultiplyWords(p, t, M[j]);
2387  Acc2WordsBy1(p, X[i+j]);
2388  Acc2WordsBy1(p, c);
2389  X[i+j] = LowWord(p);
2390  c = HighWord(p);
2391  MultiplyWords(p, t, M[j+1]);
2392  Acc2WordsBy1(p, X[i+j+1]);
2393  Acc2WordsBy1(p, c);
2394  X[i+j+1] = LowWord(p);
2395  c = HighWord(p);
2396  }
2397 
2398  if (Increment(X+N+i, N-i, c))
2399  while (!Subtract(X+N, X+N, M, N)) {}
2400  }
2401 
2402  memcpy(R, X+N, N*WORD_SIZE);
2403 #else
2404  __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2405  for (size_t i=0; i<N; i++)
2406  {
2407  __m64 t = _mm_cvtsi32_si64(X[i]);
2408  t = _mm_mul_su32(t, u);
2409  __m64 c = _mm_setzero_si64();
2410  for (size_t j=0; j<N; j+=2)
2411  {
2412  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2413  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2414  c = _mm_add_si64(c, p);
2415  X[i+j] = _mm_cvtsi64_si32(c);
2416  c = _mm_srli_si64(c, 32);
2417  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2418  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2419  c = _mm_add_si64(c, p);
2420  X[i+j+1] = _mm_cvtsi64_si32(c);
2421  c = _mm_srli_si64(c, 32);
2422  }
2423 
2424  if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2425  while (!Subtract(X+N, X+N, M, N)) {}
2426  }
2427 
2428  memcpy(R, X+N, N*WORD_SIZE);
2429  _mm_empty();
2430 #endif
2431 }
2432 
2433 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2434 // T[2*N] - temporary work space
2435 // X[2*N] - number to be reduced
2436 // M[N] --- modulus
2437 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2438 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
2439 
2440 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2441 {
2442  assert(N%2==0 && N>=4);
2443 
2444 #define M0 M
2445 #define M1 (M+N2)
2446 #define V0 V
2447 #define V1 (V+N2)
2448 
2449 #define X0 X
2450 #define X1 (X+N2)
2451 #define X2 (X+N)
2452 #define X3 (X+N+N2)
2453 
2454  const size_t N2 = N/2;
2455  Multiply(T0, T2, V0, X3, N2);
2456  int c2 = Add(T0, T0, X0, N);
2457  MultiplyBottom(T3, T2, T0, U, N2);
2458  MultiplyTop(T2, R, T0, T3, M0, N2);
2459  c2 -= Subtract(T2, T1, T2, N2);
2460  Multiply(T0, R, T3, M1, N2);
2461  c2 -= Subtract(T0, T2, T0, N2);
2462  int c3 = -(int)Subtract(T1, X2, T1, N2);
2463  Multiply(R0, T2, V1, X3, N2);
2464  c3 += Add(R, R, T, N);
2465 
2466  if (c2>0)
2467  c3 += Increment(R1, N2);
2468  else if (c2<0)
2469  c3 -= Decrement(R1, N2, -c2);
2470 
2471  assert(c3>=-1 && c3<=1);
2472  if (c3>0)
2473  Subtract(R, R, M, N);
2474  else if (c3<0)
2475  Add(R, R, M, N);
2476 
2477 #undef M0
2478 #undef M1
2479 #undef V0
2480 #undef V1
2481 
2482 #undef X0
2483 #undef X1
2484 #undef X2
2485 #undef X3
2486 }
2487 
2488 #undef A0
2489 #undef A1
2490 #undef B0
2491 #undef B1
2492 
2493 #undef T0
2494 #undef T1
2495 #undef T2
2496 #undef T3
2497 
2498 #undef R0
2499 #undef R1
2500 #undef R2
2501 #undef R3
2502 
2503 /*
2504 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2505 static word SubatomicDivide(word *A, word B0, word B1)
2506 {
2507  // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2508  assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2509 
2510  // estimate the quotient: do a 2 word by 1 word divide
2511  word Q;
2512  if (B1+1 == 0)
2513  Q = A[2];
2514  else
2515  Q = DWord(A[1], A[2]).DividedBy(B1+1);
2516 
2517  // now subtract Q*B from A
2518  DWord p = DWord::Multiply(B0, Q);
2519  DWord u = (DWord) A[0] - p.GetLowHalf();
2520  A[0] = u.GetLowHalf();
2521  u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2522  A[1] = u.GetLowHalf();
2523  A[2] += u.GetHighHalf();
2524 
2525  // Q <= actual quotient, so fix it
2526  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2527  {
2528  u = (DWord) A[0] - B0;
2529  A[0] = u.GetLowHalf();
2530  u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2531  A[1] = u.GetLowHalf();
2532  A[2] += u.GetHighHalf();
2533  Q++;
2534  assert(Q); // shouldn't overflow
2535  }
2536 
2537  return Q;
2538 }
2539 
2540 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2541 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2542 {
2543  if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2544  {
2545  Q[0] = A[2];
2546  Q[1] = A[3];
2547  }
2548  else
2549  {
2550  word T[4];
2551  T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2552  Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2553  Q[0] = SubatomicDivide(T, B[0], B[1]);
2554 
2555 #ifndef NDEBUG
2556  // multiply quotient and divisor and add remainder, make sure it equals dividend
2557  assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2558  word P[4];
2559  LowLevel::Multiply2(P, Q, B);
2560  Add(P, P, T, 4);
2561  assert(memcmp(P, A, 4*WORD_SIZE)==0);
2562 #endif
2563  }
2564 }
2565 */
2566 
2567 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2568 {
2569  word T[4];
2570  DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2571  Q[0] = q.GetLowHalf();
2572  Q[1] = q.GetHighHalf();
2573 
2574 #ifndef NDEBUG
2575  if (B[0] || B[1])
2576  {
2577  // multiply quotient and divisor and add remainder, make sure it equals dividend
2578  assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2579  word P[4];
2580  s_pMul[0](P, Q, B);
2581  Add(P, P, T, 4);
2582  assert(memcmp(P, A, 4*WORD_SIZE)==0);
2583  }
2584 #endif
2585 }
2586 
2587 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2588 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2589 {
2590  assert(N && N%2==0);
2591 
2592  AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2593 
2594  word borrow = Subtract(R, R, T, N+2);
2595  assert(!borrow && !R[N+1]);
2596  CRYPTOPP_UNUSED(borrow);
2597 
2598  while (R[N] || Compare(R, B, N) >= 0)
2599  {
2600  R[N] -= Subtract(R, R, B, N);
2601  Q[1] += (++Q[0]==0);
2602  assert(Q[0] || Q[1]); // no overflow
2603  }
2604 }
2605 
2606 // R[NB] -------- remainder = A%B
2607 // Q[NA-NB+2] --- quotient = A/B
2608 // T[NA+3*(NB+2)] - temp work space
2609 // A[NA] -------- dividend
2610 // B[NB] -------- divisor
2611 
2612 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2613 {
2614  assert(NA && NB && NA%2==0 && NB%2==0);
2615  assert(B[NB-1] || B[NB-2]);
2616  assert(NB <= NA);
2617 
2618  // set up temporary work space
2619  word *const TA=T;
2620  word *const TB=T+NA+2;
2621  word *const TP=T+NA+2+NB;
2622 
2623  // copy B into TB and normalize it so that TB has highest bit set to 1
2624  unsigned shiftWords = (B[NB-1]==0);
2625  TB[0] = TB[NB-1] = 0;
2626  CopyWords(TB+shiftWords, B, NB-shiftWords);
2627  unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2628  assert(shiftBits < WORD_BITS);
2629  ShiftWordsLeftByBits(TB, NB, shiftBits);
2630 
2631  // copy A into TA and normalize it
2632  TA[0] = TA[NA] = TA[NA+1] = 0;
2633  CopyWords(TA+shiftWords, A, NA);
2634  ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2635 
2636  if (TA[NA+1]==0 && TA[NA] <= 1)
2637  {
2638  Q[NA-NB+1] = Q[NA-NB] = 0;
2639  while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2640  {
2641  TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2642  ++Q[NA-NB];
2643  }
2644  }
2645  else
2646  {
2647  NA+=2;
2648  assert(Compare(TA+NA-NB, TB, NB) < 0);
2649  }
2650 
2651  word BT[2];
2652  BT[0] = TB[NB-2] + 1;
2653  BT[1] = TB[NB-1] + (BT[0]==0);
2654 
2655  // start reducing TA mod TB, 2 words at a time
2656  for (size_t i=NA-2; i>=NB; i-=2)
2657  {
2658  AtomicDivide(Q+i-NB, TA+i-2, BT);
2659  CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2660  }
2661 
2662  // copy TA into R, and denormalize it
2663  CopyWords(R, TA+shiftWords, NB);
2664  ShiftWordsRightByBits(R, NB, shiftBits);
2665 }
2666 
2667 static inline size_t EvenWordCount(const word *X, size_t N)
2668 {
2669  while (N && X[N-2]==0 && X[N-1]==0)
2670  N-=2;
2671  return N;
2672 }
2673 
2674 // return k
2675 // R[N] --- result = A^(-1) * 2^k mod M
2676 // T[4*N] - temporary work space
2677 // A[NA] -- number to take inverse of
2678 // M[N] --- modulus
2679 
2680 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2681 {
2682  assert(NA<=N && N && N%2==0);
2683 
2684  word *b = T;
2685  word *c = T+N;
2686  word *f = T+2*N;
2687  word *g = T+3*N;
2688  size_t bcLen=2, fgLen=EvenWordCount(M, N);
2689  unsigned int k=0;
2690  bool s=false;
2691 
2692  SetWords(T, 0, 3*N);
2693  b[0]=1;
2694  CopyWords(f, A, NA);
2695  CopyWords(g, M, N);
2696 
2697  while (1)
2698  {
2699  word t=f[0];
2700  while (!t)
2701  {
2702  if (EvenWordCount(f, fgLen)==0)
2703  {
2704  SetWords(R, 0, N);
2705  return 0;
2706  }
2707 
2708  ShiftWordsRightByWords(f, fgLen, 1);
2709  bcLen += 2 * (c[bcLen-1] != 0);
2710  assert(bcLen <= N);
2711  ShiftWordsLeftByWords(c, bcLen, 1);
2712  k+=WORD_BITS;
2713  t=f[0];
2714  }
2715 
2716  // t must be non-0; otherwise undefined.
2717  unsigned int i = TrailingZeros(t);
2718  t >>= i;
2719  k += i;
2720 
2721  if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2722  {
2723  if (s)
2724  Subtract(R, M, b, N);
2725  else
2726  CopyWords(R, b, N);
2727  return k;
2728  }
2729 
2730  ShiftWordsRightByBits(f, fgLen, i);
2731  t = ShiftWordsLeftByBits(c, bcLen, i);
2732  c[bcLen] += t;
2733  bcLen += 2 * (t!=0);
2734  assert(bcLen <= N);
2735 
2736  bool swap = Compare(f, g, fgLen)==-1;
2737  ConditionalSwapPointers(swap, f, g);
2738  ConditionalSwapPointers(swap, b, c);
2739  s ^= swap;
2740 
2741  fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2742 
2743  Subtract(f, f, g, fgLen);
2744  t = Add(b, b, c, bcLen);
2745  b[bcLen] += t;
2746  bcLen += 2*t;
2747  assert(bcLen <= N);
2748  }
2749 }
2750 
2751 // R[N] - result = A/(2^k) mod M
2752 // A[N] - input
2753 // M[N] - modulus
2754 
2755 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2756 {
2757  CopyWords(R, A, N);
2758 
2759  while (k--)
2760  {
2761  if (R[0]%2==0)
2762  ShiftWordsRightByBits(R, N, 1);
2763  else
2764  {
2765  word carry = Add(R, R, M, N);
2766  ShiftWordsRightByBits(R, N, 1);
2767  R[N-1] += carry<<(WORD_BITS-1);
2768  }
2769  }
2770 }
2771 
2772 // R[N] - result = A*(2^k) mod M
2773 // A[N] - input
2774 // M[N] - modulus
2775 
2776 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2777 {
2778  CopyWords(R, A, N);
2779 
2780  while (k--)
2781  if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2782  Subtract(R, R, M, N);
2783 }
2784 
2785 // ******************************************************************
2786 
2787 InitializeInteger::InitializeInteger()
2788 {
2789  if (!g_pAssignIntToInteger)
2790  {
2791  SetFunctionPointers();
2792  g_pAssignIntToInteger = (CryptoPP::PAssignIntToInteger)AssignIntToInteger;
2793  }
2794 }
2795 
2796 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2797 
2798 static inline size_t RoundupSize(size_t n)
2799 {
2800  if (n<=8)
2801  return RoundupSizeTable[n];
2802  else if (n<=16)
2803  return 16;
2804  else if (n<=32)
2805  return 32;
2806  else if (n<=64)
2807  return 64;
2808  else return size_t(1) << BitPrecision(n-1);
2809 }
2810 
2812  : reg(2), sign(POSITIVE)
2813 {
2814  reg[0] = reg[1] = 0;
2815 }
2816 
2818  : reg(RoundupSize(t.WordCount())), sign(t.sign)
2819 {
2820  CopyWords(reg, t.reg, reg.size());
2821 }
2822 
2823 Integer::Integer(Sign s, lword value)
2824  : reg(2), sign(s)
2825 {
2826  reg[0] = word(value);
2827  reg[1] = word(SafeRightShift<WORD_BITS>(value));
2828 }
2829 
2830 Integer::Integer(signed long value)
2831  : reg(2)
2832 {
2833  if (value >= 0)
2834  sign = POSITIVE;
2835  else
2836  {
2837  sign = NEGATIVE;
2838  value = -value;
2839  }
2840  reg[0] = word(value);
2841  reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2842 }
2843 
2844 Integer::Integer(Sign s, word high, word low)
2845  : reg(2), sign(s)
2846 {
2847  reg[0] = low;
2848  reg[1] = high;
2849 }
2850 
2852 {
2853  if (ByteCount() > sizeof(long))
2854  return false;
2855 
2856  unsigned long value = (unsigned long)reg[0];
2857  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2858 
2859  if (sign==POSITIVE)
2860  return (signed long)value >= 0;
2861  else
2862  return -(signed long)value < 0;
2863 }
2864 
2865 signed long Integer::ConvertToLong() const
2866 {
2867  assert(IsConvertableToLong());
2868 
2869  unsigned long value = (unsigned long)reg[0];
2870  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2871  return sign==POSITIVE ? value : -(signed long)value;
2872 }
2873 
2874 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2875 {
2876  assert(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
2877 
2878  if(o == LITTLE_ENDIAN_ORDER)
2879  {
2880  SecByteBlock block(byteCount);
2881  encodedInteger.Get(block, block.size());
2882  std::reverse(block.begin(), block.begin()+block.size());
2883 
2884  Decode(block.begin(), block.size(), s);
2885  return;
2886  }
2887 
2888  Decode(encodedInteger, byteCount, s);
2889 }
2890 
2891 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2892 {
2893  assert(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
2894 
2895  if(o == LITTLE_ENDIAN_ORDER)
2896  {
2897  SecByteBlock block(byteCount);
2898  std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
2899 
2900  Decode(block.begin(), block.size(), s);
2901  return;
2902  }
2903 
2904  Decode(encodedInteger, byteCount, s);
2905 }
2906 
2908 {
2909  BERDecode(bt);
2910 }
2911 
2913 {
2914  Randomize(rng, bitcount);
2915 }
2916 
2917 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
2918 {
2919  if (!Randomize(rng, min, max, rnType, equiv, mod))
2921 }
2922 
2924 {
2925  Integer r((word)0, BitsToWords(e+1));
2926  r.SetBit(e);
2927  return r;
2928 }
2929 
2930 template <long i>
2932 {
2933  Integer * operator()() const
2934  {
2935  return new Integer(i);
2936  }
2937 };
2938 
2940 {
2941  return Singleton<Integer>().Ref();
2942 }
2943 
2945 {
2946  return Singleton<Integer, NewInteger<1> >().Ref();
2947 }
2948 
2950 {
2951  return Singleton<Integer, NewInteger<2> >().Ref();
2952 }
2953 
2954 bool Integer::operator!() const
2955 {
2956  return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
2957 }
2958 
2959 Integer& Integer::operator=(const Integer& t)
2960 {
2961  if (this != &t)
2962  {
2963  if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
2964  reg.New(RoundupSize(t.WordCount()));
2965  CopyWords(reg, t.reg, reg.size());
2966  sign = t.sign;
2967  }
2968  return *this;
2969 }
2970 
2971 bool Integer::GetBit(size_t n) const
2972 {
2973  if (n/WORD_BITS >= reg.size())
2974  return 0;
2975  else
2976  return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
2977 }
2978 
2979 void Integer::SetBit(size_t n, bool value)
2980 {
2981  if (value)
2982  {
2983  reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
2984  reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
2985  }
2986  else
2987  {
2988  if (n/WORD_BITS < reg.size())
2989  reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
2990  }
2991 }
2992 
2993 byte Integer::GetByte(size_t n) const
2994 {
2995  if (n/WORD_SIZE >= reg.size())
2996  return 0;
2997  else
2998  return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
2999 }
3000 
3001 void Integer::SetByte(size_t n, byte value)
3002 {
3003  reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3004  reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3005  reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3006 }
3007 
3008 lword Integer::GetBits(size_t i, size_t n) const
3009 {
3010  lword v = 0;
3011  assert(n <= sizeof(v)*8);
3012  for (unsigned int j=0; j<n; j++)
3013  v |= lword(GetBit(i+j)) << j;
3014  return v;
3015 }
3016 
3017 Integer Integer::operator-() const
3018 {
3019  Integer result(*this);
3020  result.Negate();
3021  return result;
3022 }
3023 
3024 Integer Integer::AbsoluteValue() const
3025 {
3026  Integer result(*this);
3027  result.sign = POSITIVE;
3028  return result;
3029 }
3030 
3032 {
3033  reg.swap(a.reg);
3034  std::swap(sign, a.sign);
3035 }
3036 
3037 Integer::Integer(word value, size_t length)
3038  : reg(RoundupSize(length)), sign(POSITIVE)
3039 {
3040  reg[0] = value;
3041  SetWords(reg+1, 0, reg.size()-1);
3042 }
3043 
3044 template <class T>
3045 static Integer StringToInteger(const T *str, ByteOrder order)
3046 {
3047  assert( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3048 
3049  int radix, sign = 1;
3050  // GCC workaround
3051  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3052  unsigned int length;
3053  for (length = 0; str[length] != 0; length++) {}
3054 
3055  Integer v;
3056 
3057  if (length == 0)
3058  return Integer::Zero();
3059 
3060  switch (str[length-1])
3061  {
3062  case 'h':
3063  case 'H':
3064  radix=16;
3065  break;
3066  case 'o':
3067  case 'O':
3068  radix=8;
3069  break;
3070  case 'b':
3071  case 'B':
3072  radix=2;
3073  break;
3074  default:
3075  radix=10;
3076  }
3077 
3078  // 'str' is of length 1 or more
3079  if (str[0] == '-')
3080  {
3081  sign = -1;
3082  str += 1, length -= 1;
3083  }
3084 
3085  if (length > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
3086  {
3087  radix = 16;
3088  str += 2, length -= 2;
3089  }
3090 
3091  if(order == BIG_ENDIAN_ORDER)
3092  {
3093  for (unsigned int i=0; i<length; i++)
3094  {
3095  int digit, ch = static_cast<int>(str[i]);
3096 
3097  if (ch >= '0' && ch <= '9')
3098  digit = ch - '0';
3099  else if (ch >= 'A' && ch <= 'F')
3100  digit = ch - 'A' + 10;
3101  else if (ch >= 'a' && ch <= 'f')
3102  digit = ch - 'a' + 10;
3103  else
3104  digit = radix;
3105 
3106  if (digit < radix)
3107  {
3108  v *= radix;
3109  v += digit;
3110  }
3111  }
3112  }
3113  else if(radix == 16 && order == LITTLE_ENDIAN_ORDER)
3114  {
3115  // Nibble high, low and count
3116  unsigned int nh = 0, nl = 0, nc = 0;
3117  Integer position(Integer::One());
3118 
3119  for (unsigned int i=0; i<length; i++)
3120  {
3121  int digit, ch = static_cast<int>(str[i]);
3122 
3123  if (ch >= '0' && ch <= '9')
3124  digit = ch - '0';
3125  else if (ch >= 'A' && ch <= 'F')
3126  digit = ch - 'A' + 10;
3127  else if (ch >= 'a' && ch <= 'f')
3128  digit = ch - 'a' + 10;
3129  else
3130  digit = radix;
3131 
3132  if (digit < radix)
3133  {
3134  if(nc++ == 0)
3135  nh = digit;
3136  else
3137  nl = digit;
3138 
3139  if(nc == 2)
3140  {
3141  v += position * (nh << 4 | nl);
3142  nc = 0, position <<= 8;
3143  }
3144  }
3145  }
3146 
3147  if(nc == 1)
3148  v += nh * position;
3149  }
3150  else // LITTLE_ENDIAN_ORDER && radix != 16
3151  {
3152  for (int i=static_cast<int>(length)-1; i>=0; i--)
3153  {
3154  int digit, ch = static_cast<int>(str[i]);
3155 
3156  if (ch >= '0' && ch <= '9')
3157  digit = ch - '0';
3158  else if (ch >= 'A' && ch <= 'F')
3159  digit = ch - 'A' + 10;
3160  else if (ch >= 'a' && ch <= 'f')
3161  digit = ch - 'a' + 10;
3162  else
3163  digit = radix;
3164 
3165  if (digit < radix)
3166  {
3167  v *= radix;
3168  v += digit;
3169  }
3170  }
3171  }
3172 
3173  if (sign == -1)
3174  v.Negate();
3175 
3176  return v;
3177 }
3178 
3179 Integer::Integer(const char *str, ByteOrder order)
3180  : reg(2), sign(POSITIVE)
3181 {
3182  *this = StringToInteger(str,order);
3183 }
3184 
3185 Integer::Integer(const wchar_t *str, ByteOrder order)
3186  : reg(2), sign(POSITIVE)
3187 {
3188  *this = StringToInteger(str,order);
3189 }
3190 
3191 unsigned int Integer::WordCount() const
3192 {
3193  return (unsigned int)CountWords(reg, reg.size());
3194 }
3195 
3196 unsigned int Integer::ByteCount() const
3197 {
3198  unsigned wordCount = WordCount();
3199  if (wordCount)
3200  return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3201  else
3202  return 0;
3203 }
3204 
3205 unsigned int Integer::BitCount() const
3206 {
3207  unsigned wordCount = WordCount();
3208  if (wordCount)
3209  return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3210  else
3211  return 0;
3212 }
3213 
3214 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3215 {
3216  StringStore store(input, inputLen);
3217  Decode(store, inputLen, s);
3218 }
3219 
3221 {
3222  assert(bt.MaxRetrievable() >= inputLen);
3223 
3224  byte b;
3225  bt.Peek(b);
3226  sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3227 
3228  while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3229  {
3230  bt.Skip(1);
3231  inputLen--;
3232  bt.Peek(b);
3233  }
3234 
3235  // The call to CleanNew is optimized away above -O0/-Og.
3236  const size_t size = RoundupSize(BytesToWords(inputLen));
3237  reg.CleanNew(size);
3238 
3239  assert(reg.SizeInBytes() >= inputLen);
3240  for (size_t i=inputLen; i > 0; i--)
3241  {
3242  bt.Get(b);
3243  reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3244  }
3245 
3246  if (sign == NEGATIVE)
3247  {
3248  for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3249  reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3250  TwosComplement(reg, reg.size());
3251  }
3252 }
3253 
3254 size_t Integer::MinEncodedSize(Signedness signedness) const
3255 {
3256  unsigned int outputLen = STDMAX(1U, ByteCount());
3257  if (signedness == UNSIGNED)
3258  return outputLen;
3259  if (NotNegative() && (GetByte(outputLen-1) & 0x80))
3260  outputLen++;
3261  if (IsNegative() && *this < -Power2(outputLen*8-1))
3262  outputLen++;
3263  return outputLen;
3264 }
3265 
3266 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3267 {
3268  assert(output && outputLen);
3269  ArraySink sink(output, outputLen);
3270  Encode(sink, outputLen, signedness);
3271 }
3272 
3273 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3274 {
3275  if (signedness == UNSIGNED || NotNegative())
3276  {
3277  for (size_t i=outputLen; i > 0; i--)
3278  bt.Put(GetByte(i-1));
3279  }
3280  else
3281  {
3282  // take two's complement of *this
3283  Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3284  temp.Encode(bt, outputLen, UNSIGNED);
3285  }
3286 }
3287 
3289 {
3290  DERGeneralEncoder enc(bt, INTEGER);
3292  enc.MessageEnd();
3293 }
3294 
3295 void Integer::BERDecode(const byte *input, size_t len)
3296 {
3297  StringStore store(input, len);
3298  BERDecode(store);
3299 }
3300 
3302 {
3303  BERGeneralDecoder dec(bt, INTEGER);
3304  if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3305  BERDecodeError();
3306  Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3307  dec.MessageEnd();
3308 }
3309 
3311 {
3312  DERGeneralEncoder enc(bt, OCTET_STRING);
3313  Encode(enc, length);
3314  enc.MessageEnd();
3315 }
3316 
3318 {
3319  BERGeneralDecoder dec(bt, OCTET_STRING);
3320  if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3321  BERDecodeError();
3322  Decode(dec, length);
3323  dec.MessageEnd();
3324 }
3325 
3326 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
3327 {
3328  ArraySink sink(output, len);
3329  return OpenPGPEncode(sink);
3330 }
3331 
3333 {
3334  word16 bitCount = word16(BitCount());
3335  bt.PutWord16(bitCount);
3336  size_t byteCount = BitsToBytes(bitCount);
3337  Encode(bt, byteCount);
3338  return 2 + byteCount;
3339 }
3340 
3341 void Integer::OpenPGPDecode(const byte *input, size_t len)
3342 {
3343  StringStore store(input, len);
3344  OpenPGPDecode(store);
3345 }
3346 
3348 {
3349  word16 bitCount;
3350  if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3351  throw OpenPGPDecodeErr();
3352  Decode(bt, BitsToBytes(bitCount));
3353 }
3354 
3356 {
3357  const size_t nbytes = nbits/8 + 1;
3358  SecByteBlock buf(nbytes);
3359  rng.GenerateBlock(buf, nbytes);
3360  if (nbytes)
3361  buf[0] = (byte)Crop(buf[0], nbits % 8);
3362  Decode(buf, nbytes, UNSIGNED);
3363 }
3364 
3365 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3366 {
3367  if (min > max)
3368  throw InvalidArgument("Integer: Min must be no greater than Max");
3369 
3370  Integer range = max - min;
3371  const unsigned int nbits = range.BitCount();
3372 
3373  do
3374  {
3375  Randomize(rng, nbits);
3376  }
3377  while (*this > range);
3378 
3379  *this += min;
3380 }
3381 
3382 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3383 {
3384  return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3385 }
3386 
3388 {
3389 public:
3390  KDF2_RNG(const byte *seed, size_t seedSize)
3391  : m_counter(0), m_counterAndSeed(seedSize + 4)
3392  {
3393  memcpy(m_counterAndSeed + 4, seed, seedSize);
3394  }
3395 
3396  void GenerateBlock(byte *output, size_t size)
3397  {
3398  PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3399  ++m_counter;
3400  P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
3401  }
3402 
3403 private:
3404  word32 m_counter;
3405  SecByteBlock m_counterAndSeed;
3406 };
3407 
3408 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
3409 {
3410  Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3411  Integer max;
3412  if (!params.GetValue("Max", max))
3413  {
3414  int bitLength;
3415  if (params.GetIntValue("BitLength", bitLength))
3416  max = Integer::Power2(bitLength);
3417  else
3418  throw InvalidArgument("Integer: missing Max argument");
3419  }
3420  if (min > max)
3421  throw InvalidArgument("Integer: Min must be no greater than Max");
3422 
3423  Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3424  Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3425 
3426  if (equiv.IsNegative() || equiv >= mod)
3427  throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3428 
3429  Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3430 
3431  member_ptr<KDF2_RNG> kdf2Rng;
3433  if (params.GetValue(Name::Seed(), seed))
3434  {
3435  ByteQueue bq;
3436  DERSequenceEncoder seq(bq);
3437  min.DEREncode(seq);
3438  max.DEREncode(seq);
3439  equiv.DEREncode(seq);
3440  mod.DEREncode(seq);
3441  DEREncodeUnsigned(seq, rnType);
3442  DEREncodeOctetString(seq, seed.begin(), seed.size());
3443  seq.MessageEnd();
3444 
3445  SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3446  bq.Get(finalSeed, finalSeed.size());
3447  kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3448  }
3449  RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3450 
3451  switch (rnType)
3452  {
3453  case ANY:
3454  if (mod == One())
3455  Randomize(rng, min, max);
3456  else
3457  {
3458  Integer min1 = min + (equiv-min)%mod;
3459  if (max < min1)
3460  return false;
3461  Randomize(rng, Zero(), (max - min1) / mod);
3462  *this *= mod;
3463  *this += min1;
3464  }
3465  return true;
3466 
3467  case PRIME:
3468  {
3469  const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
3470 
3471  int i;
3472  i = 0;
3473  while (1)
3474  {
3475  if (++i==16)
3476  {
3477  // check if there are any suitable primes in [min, max]
3478  Integer first = min;
3479  if (FirstPrime(first, max, equiv, mod, pSelector))
3480  {
3481  // if there is only one suitable prime, we're done
3482  *this = first;
3483  if (!FirstPrime(first, max, equiv, mod, pSelector))
3484  return true;
3485  }
3486  else
3487  return false;
3488  }
3489 
3490  Randomize(rng, min, max);
3491  if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3492  return true;
3493  }
3494  }
3495 
3496  default:
3497  throw InvalidArgument("Integer: invalid RandomNumberType argument");
3498  }
3499 }
3500 
3501 std::istream& operator>>(std::istream& in, Integer &a)
3502 {
3503  char c;
3504  unsigned int length = 0;
3505  SecBlock<char> str(length + 16);
3506 
3507  std::ws(in);
3508 
3509  do
3510  {
3511  in.read(&c, 1);
3512  str[length++] = c;
3513  if (length >= str.size())
3514  str.Grow(length + 16);
3515  }
3516  while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3517 
3518  if (in.gcount())
3519  in.putback(c);
3520  str[length-1] = '\0';
3521  a = Integer(str);
3522 
3523  return in;
3524 }
3525 
3526 std::ostream& operator<<(std::ostream& out, const Integer &a)
3527 {
3528  // Get relevant conversion specifications from ostream.
3529  const long f = out.flags() & std::ios::basefield; // Get base digits.
3530  int base, block;
3531  char suffix;
3532  switch(f)
3533  {
3534  case std::ios::oct :
3535  base = 8;
3536  block = 8;
3537  suffix = 'o';
3538  break;
3539  case std::ios::hex :
3540  base = 16;
3541  block = 4;
3542  suffix = 'h';
3543  break;
3544  default :
3545  base = 10;
3546  block = 3;
3547  suffix = '.';
3548  }
3549 
3550  Integer temp1=a, temp2;
3551 
3552  if (a.IsNegative())
3553  {
3554  out << '-';
3555  temp1.Negate();
3556  }
3557 
3558  if (!a)
3559  out << '0';
3560 
3561  static const char upper[]="0123456789ABCDEF";
3562  static const char lower[]="0123456789abcdef";
3563 
3564  const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3565  unsigned int i=0;
3566  SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3567 
3568  while (!!temp1)
3569  {
3570  word digit;
3571  Integer::Divide(digit, temp2, temp1, base);
3572  s[i++]=vec[digit];
3573  temp1.swap(temp2);
3574  }
3575 
3576  while (i--)
3577  {
3578  out << s[i];
3579 // if (i && !(i%block))
3580 // out << ",";
3581  }
3582 
3583 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3584  if(out.flags() & std::ios_base::showbase)
3585  out << suffix;
3586 
3587  return out;
3588 #else
3589  return out << suffix;
3590 #endif
3591 }
3592 
3593 Integer& Integer::operator++()
3594 {
3595  if (NotNegative())
3596  {
3597  if (Increment(reg, reg.size()))
3598  {
3599  reg.CleanGrow(2*reg.size());
3600  reg[reg.size()/2]=1;
3601  }
3602  }
3603  else
3604  {
3605  word borrow = Decrement(reg, reg.size());
3606  assert(!borrow);
3607  CRYPTOPP_UNUSED(borrow);
3608 
3609  if (WordCount()==0)
3610  *this = Zero();
3611  }
3612  return *this;
3613 }
3614 
3615 Integer& Integer::operator--()
3616 {
3617  if (IsNegative())
3618  {
3619  if (Increment(reg, reg.size()))
3620  {
3621  reg.CleanGrow(2*reg.size());
3622  reg[reg.size()/2]=1;
3623  }
3624  }
3625  else
3626  {
3627  if (Decrement(reg, reg.size()))
3628  *this = -One();
3629  }
3630  return *this;
3631 }
3632 
3633 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3634 {
3635  int carry;
3636  if (a.reg.size() == b.reg.size())
3637  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3638  else if (a.reg.size() > b.reg.size())
3639  {
3640  carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3641  CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3642  carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3643  }
3644  else
3645  {
3646  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3647  CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3648  carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3649  }
3650 
3651  if (carry)
3652  {
3653  sum.reg.CleanGrow(2*sum.reg.size());
3654  sum.reg[sum.reg.size()/2] = 1;
3655  }
3656  sum.sign = Integer::POSITIVE;
3657 }
3658 
3659 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3660 {
3661  unsigned aSize = a.WordCount();
3662  aSize += aSize%2;
3663  unsigned bSize = b.WordCount();
3664  bSize += bSize%2;
3665 
3666  if (aSize == bSize)
3667  {
3668  if (Compare(a.reg, b.reg, aSize) >= 0)
3669  {
3670  Subtract(diff.reg, a.reg, b.reg, aSize);
3671  diff.sign = Integer::POSITIVE;
3672  }
3673  else
3674  {
3675  Subtract(diff.reg, b.reg, a.reg, aSize);
3676  diff.sign = Integer::NEGATIVE;
3677  }
3678  }
3679  else if (aSize > bSize)
3680  {
3681  word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3682  CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3683  borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3684  assert(!borrow);
3685  diff.sign = Integer::POSITIVE;
3686  }
3687  else
3688  {
3689  word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3690  CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3691  borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3692  assert(!borrow);
3693  diff.sign = Integer::NEGATIVE;
3694  }
3695 }
3696 
3697 // MSVC .NET 2003 workaround
3698 template <class T> inline const T& STDMAX2(const T& a, const T& b)
3699 {
3700  return a < b ? b : a;
3701 }
3702 
3703 Integer Integer::Plus(const Integer& b) const
3704 {
3705  Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3706  if (NotNegative())
3707  {
3708  if (b.NotNegative())
3709  PositiveAdd(sum, *this, b);
3710  else
3711  PositiveSubtract(sum, *this, b);
3712  }
3713  else
3714  {
3715  if (b.NotNegative())
3716  PositiveSubtract(sum, b, *this);
3717  else
3718  {
3719  PositiveAdd(sum, *this, b);
3720  sum.sign = Integer::NEGATIVE;
3721  }
3722  }
3723  return sum;
3724 }
3725 
3726 Integer& Integer::operator+=(const Integer& t)
3727 {
3728  reg.CleanGrow(t.reg.size());
3729  if (NotNegative())
3730  {
3731  if (t.NotNegative())
3732  PositiveAdd(*this, *this, t);
3733  else
3734  PositiveSubtract(*this, *this, t);
3735  }
3736  else
3737  {
3738  if (t.NotNegative())
3739  PositiveSubtract(*this, t, *this);
3740  else
3741  {
3742  PositiveAdd(*this, *this, t);
3743  sign = Integer::NEGATIVE;
3744  }
3745  }
3746  return *this;
3747 }
3748 
3749 Integer Integer::Minus(const Integer& b) const
3750 {
3751  Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
3752  if (NotNegative())
3753  {
3754  if (b.NotNegative())
3755  PositiveSubtract(diff, *this, b);
3756  else
3757  PositiveAdd(diff, *this, b);
3758  }
3759  else
3760  {
3761  if (b.NotNegative())
3762  {
3763  PositiveAdd(diff, *this, b);
3764  diff.sign = Integer::NEGATIVE;
3765  }
3766  else
3767  PositiveSubtract(diff, b, *this);
3768  }
3769  return diff;
3770 }
3771 
3772 Integer& Integer::operator-=(const Integer& t)
3773 {
3774  reg.CleanGrow(t.reg.size());
3775  if (NotNegative())
3776  {
3777  if (t.NotNegative())
3778  PositiveSubtract(*this, *this, t);
3779  else
3780  PositiveAdd(*this, *this, t);
3781  }
3782  else
3783  {
3784  if (t.NotNegative())
3785  {
3786  PositiveAdd(*this, *this, t);
3787  sign = Integer::NEGATIVE;
3788  }
3789  else
3790  PositiveSubtract(*this, t, *this);
3791  }
3792  return *this;
3793 }
3794 
3795 Integer& Integer::operator<<=(size_t n)
3796 {
3797  const size_t wordCount = WordCount();
3798  const size_t shiftWords = n / WORD_BITS;
3799  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3800 
3801  reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
3802  ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
3803  ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
3804  return *this;
3805 }
3806 
3807 Integer& Integer::operator>>=(size_t n)
3808 {
3809  const size_t wordCount = WordCount();
3810  const size_t shiftWords = n / WORD_BITS;
3811  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3812 
3813  ShiftWordsRightByWords(reg, wordCount, shiftWords);
3814  if (wordCount > shiftWords)
3815  ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
3816  if (IsNegative() && WordCount()==0) // avoid -0
3817  *this = Zero();
3818  return *this;
3819 }
3820 
3821 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
3822 {
3823  size_t aSize = RoundupSize(a.WordCount());
3824  size_t bSize = RoundupSize(b.WordCount());
3825 
3826  product.reg.CleanNew(RoundupSize(aSize+bSize));
3827  product.sign = Integer::POSITIVE;
3828 
3829  IntegerSecBlock workspace(aSize + bSize);
3830  AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
3831 }
3832 
3833 void Multiply(Integer &product, const Integer &a, const Integer &b)
3834 {
3835  PositiveMultiply(product, a, b);
3836 
3837  if (a.NotNegative() != b.NotNegative())
3838  product.Negate();
3839 }
3840 
3842 {
3843  Integer product;
3844  Multiply(product, *this, b);
3845  return product;
3846 }
3847 
3848 /*
3849 void PositiveDivide(Integer &remainder, Integer &quotient,
3850  const Integer &dividend, const Integer &divisor)
3851 {
3852  remainder.reg.CleanNew(divisor.reg.size());
3853  remainder.sign = Integer::POSITIVE;
3854  quotient.reg.New(0);
3855  quotient.sign = Integer::POSITIVE;
3856  unsigned i=dividend.BitCount();
3857  while (i--)
3858  {
3859  word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
3860  remainder.reg[0] |= dividend[i];
3861  if (overflow || remainder >= divisor)
3862  {
3863  Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
3864  quotient.SetBit(i);
3865  }
3866  }
3867 }
3868 */
3869 
3870 void PositiveDivide(Integer &remainder, Integer &quotient,
3871  const Integer &a, const Integer &b)
3872 {
3873  unsigned aSize = a.WordCount();
3874  unsigned bSize = b.WordCount();
3875 
3876  if (!bSize)
3877  throw Integer::DivideByZero();
3878 
3879  if (aSize < bSize)
3880  {
3881  remainder = a;
3882  remainder.sign = Integer::POSITIVE;
3883  quotient = Integer::Zero();
3884  return;
3885  }
3886 
3887  aSize += aSize%2; // round up to next even number
3888  bSize += bSize%2;
3889 
3890  remainder.reg.CleanNew(RoundupSize(bSize));
3891  remainder.sign = Integer::POSITIVE;
3892  quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
3893  quotient.sign = Integer::POSITIVE;
3894 
3895  IntegerSecBlock T(aSize+3*(bSize+2));
3896  Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
3897 }
3898 
3899 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
3900 {
3901  PositiveDivide(remainder, quotient, dividend, divisor);
3902 
3903  if (dividend.IsNegative())
3904  {
3905  quotient.Negate();
3906  if (remainder.NotZero())
3907  {
3908  --quotient;
3909  remainder = divisor.AbsoluteValue() - remainder;
3910  }
3911  }
3912 
3913  if (divisor.IsNegative())
3914  quotient.Negate();
3915 }
3916 
3917 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
3918 {
3919  q = a;
3920  q >>= n;
3921 
3922  const size_t wordCount = BitsToWords(n);
3923  if (wordCount <= a.WordCount())
3924  {
3925  r.reg.resize(RoundupSize(wordCount));
3926  CopyWords(r.reg, a.reg, wordCount);
3927  SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
3928  if (n % WORD_BITS != 0)
3929  r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
3930  }
3931  else
3932  {
3933  r.reg.resize(RoundupSize(a.WordCount()));
3934  CopyWords(r.reg, a.reg, r.reg.size());
3935  }
3936  r.sign = POSITIVE;
3937 
3938  if (a.IsNegative() && r.NotZero())
3939  {
3940  --q;
3941  r = Power2(n) - r;
3942  }
3943 }
3944 
3945 Integer Integer::DividedBy(const Integer &b) const
3946 {
3947  Integer remainder, quotient;
3948  Integer::Divide(remainder, quotient, *this, b);
3949  return quotient;
3950 }
3951 
3953 {
3954  Integer remainder, quotient;
3955  Integer::Divide(remainder, quotient, *this, b);
3956  return remainder;
3957 }
3958 
3959 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
3960 {
3961  if (!divisor)
3962  throw Integer::DivideByZero();
3963 
3964  assert(divisor);
3965 
3966  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
3967  {
3968  quotient = dividend >> (BitPrecision(divisor)-1);
3969  remainder = dividend.reg[0] & (divisor-1);
3970  return;
3971  }
3972 
3973  unsigned int i = dividend.WordCount();
3974  quotient.reg.CleanNew(RoundupSize(i));
3975  remainder = 0;
3976  while (i--)
3977  {
3978  quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
3979  remainder = DWord(dividend.reg[i], remainder) % divisor;
3980  }
3981 
3982  if (dividend.NotNegative())
3983  quotient.sign = POSITIVE;
3984  else
3985  {
3986  quotient.sign = NEGATIVE;
3987  if (remainder)
3988  {
3989  --quotient;
3990  remainder = divisor - remainder;
3991  }
3992  }
3993 }
3994 
3995 Integer Integer::DividedBy(word b) const
3996 {
3997  word remainder;
3998  Integer quotient;
3999  Integer::Divide(remainder, quotient, *this, b);
4000  return quotient;
4001 }
4002 
4003 word Integer::Modulo(word divisor) const
4004 {
4005  if (!divisor)
4006  throw Integer::DivideByZero();
4007 
4008  assert(divisor);
4009 
4010  word remainder;
4011 
4012  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4013  remainder = reg[0] & (divisor-1);
4014  else
4015  {
4016  unsigned int i = WordCount();
4017 
4018  if (divisor <= 5)
4019  {
4020  DWord sum(0, 0);
4021  while (i--)
4022  sum += reg[i];
4023  remainder = sum % divisor;
4024  }
4025  else
4026  {
4027  remainder = 0;
4028  while (i--)
4029  remainder = DWord(reg[i], remainder) % divisor;
4030  }
4031  }
4032 
4033  if (IsNegative() && remainder)
4034  remainder = divisor - remainder;
4035 
4036  return remainder;
4037 }
4038 
4040 {
4041  if (!!(*this)) // don't flip sign if *this==0
4042  sign = Sign(1-sign);
4043 }
4044 
4045 int Integer::PositiveCompare(const Integer& t) const
4046 {
4047  unsigned size = WordCount(), tSize = t.WordCount();
4048 
4049  if (size == tSize)
4050  return CryptoPP::Compare(reg, t.reg, size);
4051  else
4052  return size > tSize ? 1 : -1;
4053 }
4054 
4055 int Integer::Compare(const Integer& t) const
4056 {
4057  if (NotNegative())
4058  {
4059  if (t.NotNegative())
4060  return PositiveCompare(t);
4061  else
4062  return 1;
4063  }
4064  else
4065  {
4066  if (t.NotNegative())
4067  return -1;
4068  else
4069  return -PositiveCompare(t);
4070  }
4071 }
4072 
4074 {
4075  if (!IsPositive())
4076  return Zero();
4077 
4078  // overestimate square root
4079  Integer x, y = Power2((BitCount()+1)/2);
4080  assert(y*y >= *this);
4081 
4082  do
4083  {
4084  x = y;
4085  y = (x + *this/x) >> 1;
4086  } while (y<x);
4087 
4088  return x;
4089 }
4090 
4091 bool Integer::IsSquare() const
4092 {
4093  Integer r = SquareRoot();
4094  return *this == r.Squared();
4095 }
4096 
4097 bool Integer::IsUnit() const
4098 {
4099  return (WordCount() == 1) && (reg[0] == 1);
4100 }
4101 
4103 {
4104  return IsUnit() ? *this : Zero();
4105 }
4106 
4107 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4108 {
4109  return x*y%m;
4110 }
4111 
4112 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4113 {
4114  ModularArithmetic mr(m);
4115  return mr.Exponentiate(x, e);
4116 }
4117 
4119 {
4120  return EuclideanDomainOf<Integer>().Gcd(a, b);
4121 }
4122 
4124 {
4125  assert(m.NotNegative());
4126 
4127  if (IsNegative())
4128  return Modulo(m).InverseMod(m);
4129 
4130  if (m.IsEven())
4131  {
4132  if (!m || IsEven())
4133  return Zero(); // no inverse
4134  if (*this == One())
4135  return One();
4136 
4137  Integer u = m.Modulo(*this).InverseMod(*this);
4138  return !u ? Zero() : (m*(*this-u)+1)/(*this);
4139  }
4140 
4141  SecBlock<word> T(m.reg.size() * 4);
4142  Integer r((word)0, m.reg.size());
4143  unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4144  DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4145  return r;
4146 }
4147 
4148 word Integer::InverseMod(word mod) const
4149 {
4150  word g0 = mod, g1 = *this % mod;
4151  word v0 = 0, v1 = 1;
4152  word y;
4153 
4154  while (g1)
4155  {
4156  if (g1 == 1)
4157  return v1;
4158  y = g0 / g1;
4159  g0 = g0 % g1;
4160  v0 += y * v1;
4161 
4162  if (!g0)
4163  break;
4164  if (g0 == 1)
4165  return mod-v0;
4166  y = g1 / g0;
4167  g1 = g1 % g0;
4168  v1 += y * v0;
4169  }
4170  return 0;
4171 }
4172 
4173 // ********************************************************
4174 
4175 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
4176 {
4177  BERSequenceDecoder seq(bt);
4178  OID oid(seq);
4179  if (oid != ASN1::prime_field())
4180  BERDecodeError();
4181  m_modulus.BERDecode(seq);
4182  seq.MessageEnd();
4183  m_result.reg.resize(m_modulus.reg.size());
4184 }
4185 
4186 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
4187 {
4188  DERSequenceEncoder seq(bt);
4189  ASN1::prime_field().DEREncode(seq);
4190  m_modulus.DEREncode(seq);
4191  seq.MessageEnd();
4192 }
4193 
4194 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
4195 {
4196  a.DEREncodeAsOctetString(out, MaxElementByteLength());
4197 }
4198 
4199 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
4200 {
4201  a.BERDecodeAsOctetString(in, MaxElementByteLength());
4202 }
4203 
4204 const Integer& ModularArithmetic::Half(const Integer &a) const
4205 {
4206  if (a.reg.size()==m_modulus.reg.size())
4207  {
4208  CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4209  return m_result;
4210  }
4211  else
4212  return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4213 }
4214 
4215 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4216 {
4217  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4218  {
4219  if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4220  || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4221  {
4222  CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4223  }
4224  return m_result;
4225  }
4226  else
4227  {
4228  m_result1 = a+b;
4229  if (m_result1 >= m_modulus)
4230  m_result1 -= m_modulus;
4231  return m_result1;
4232  }
4233 }
4234 
4235 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
4236 {
4237  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4238  {
4239  if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4240  || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4241  {
4242  CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4243  }
4244  }
4245  else
4246  {
4247  a+=b;
4248  if (a>=m_modulus)
4249  a-=m_modulus;
4250  }
4251 
4252  return a;
4253 }
4254 
4255 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4256 {
4257  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4258  {
4259  if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4260  CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4261  return m_result;
4262  }
4263  else
4264  {
4265  m_result1 = a-b;
4266  if (m_result1.IsNegative())
4267  m_result1 += m_modulus;
4268  return m_result1;
4269  }
4270 }
4271 
4272 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
4273 {
4274  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4275  {
4276  if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4277  CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4278  }
4279  else
4280  {
4281  a-=b;
4282  if (a.IsNegative())
4283  a+=m_modulus;
4284  }
4285 
4286  return a;
4287 }
4288 
4289 const Integer& ModularArithmetic::Inverse(const Integer &a) const
4290 {
4291  if (!a)
4292  return a;
4293 
4294  CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4295  if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4296  Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4297 
4298  return m_result;
4299 }
4300 
4301 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4302 {
4303  if (m_modulus.IsOdd())
4304  {
4305  MontgomeryRepresentation dr(m_modulus);
4306  return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4307  }
4308  else
4309  return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4310 }
4311 
4312 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4313 {
4314  if (m_modulus.IsOdd())
4315  {
4316  MontgomeryRepresentation dr(m_modulus);
4317  dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4318  for (unsigned int i=0; i<exponentsCount; i++)
4319  results[i] = dr.ConvertOut(results[i]);
4320  }
4321  else
4322  AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4323 }
4324 
4325 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m) // modulus must be odd
4326  : ModularArithmetic(m),
4327  m_u((word)0, m_modulus.reg.size()),
4328  m_workspace(5*m_modulus.reg.size())
4329 {
4330  if (!m_modulus.IsOdd())
4331  throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4332 
4333  RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4334 }
4335 
4336 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
4337 {
4338  word *const T = m_workspace.begin();
4339  word *const R = m_result.reg.begin();
4340  const size_t N = m_modulus.reg.size();
4341  assert(a.reg.size()<=N && b.reg.size()<=N);
4342 
4343  AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4344  SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4345  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4346  return m_result;
4347 }
4348 
4349 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
4350 {
4351  word *const T = m_workspace.begin();
4352  word *const R = m_result.reg.begin();
4353  const size_t N = m_modulus.reg.size();
4354  assert(a.reg.size()<=N);
4355 
4356  CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4357  SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4358  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4359  return m_result;
4360 }
4361 
4362 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
4363 {
4364  word *const T = m_workspace.begin();
4365  word *const R = m_result.reg.begin();
4366  const size_t N = m_modulus.reg.size();
4367  assert(a.reg.size()<=N);
4368 
4369  CopyWords(T, a.reg, a.reg.size());
4370  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4371  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4372  return m_result;
4373 }
4374 
4375 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
4376 {
4377 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4378  word *const T = m_workspace.begin();
4379  word *const R = m_result.reg.begin();
4380  const size_t N = m_modulus.reg.size();
4381  assert(a.reg.size()<=N);
4382 
4383  CopyWords(T, a.reg, a.reg.size());
4384  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4385  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4386  unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4387 
4388 // cout << "k=" << k << " N*32=" << 32*N << endl;
4389 
4390  if (k>N*WORD_BITS)
4391  DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4392  else
4393  MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4394 
4395  return m_result;
4396 }
4397 
4398 // Specialization declared in misc.h to allow us to print integers
4399 // with additional control options, like arbirary bases and uppercase.
4400 template <> CRYPTOPP_DLL
4401 std::string IntToString<Integer>(Integer value, unsigned int base)
4402 {
4403  // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4404  static const unsigned int BIT_32 = (1U << 31);
4405  const bool UPPER = !!(base & BIT_32);
4406  static const unsigned int BIT_31 = (1U << 30);
4407  const bool BASE = !!(base & BIT_31);
4408 
4409  const char CH = UPPER ? 'A' : 'a';
4410  base &= ~(BIT_32|BIT_31);
4411  assert(base >= 2 && base <= 32);
4412 
4413  if (value == 0)
4414  return "0";
4415 
4416  bool negative = false, zero = false;
4417  if (value.IsNegative())
4418  {
4419  negative = true;
4420  value.Negate();
4421  }
4422 
4423  if (!value)
4424  zero = true;
4425 
4426  SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4427  Integer temp;
4428 
4429  unsigned int i=0;
4430  while (!!value)
4431  {
4432  word digit;
4433  Integer::Divide(digit, temp, value, word(base));
4434  s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4435  value.swap(temp);
4436  }
4437 
4438  std::string result;
4439  result.reserve(i+2);
4440 
4441  if (negative)
4442  result += '-';
4443 
4444  if (zero)
4445  result += '0';
4446 
4447  while (i--)
4448  result += s[i];
4449 
4450  if (BASE)
4451  {
4452  if (base == 10)
4453  result += '.';
4454  else if (base == 16)
4455  result += 'h';
4456  else if (base == 8)
4457  result += 'o';
4458  else if (base == 2)
4459  result += 'b';
4460  }
4461 
4462  return result;
4463 }
4464 
4465 // Specialization declared in misc.h to avoid Coverity findings.
4466 template <> CRYPTOPP_DLL
4467 std::string IntToString<unsigned long long>(unsigned long long value, unsigned int base)
4468 {
4469  // Hack... set the high bit for uppercase.
4470  static const unsigned int HIGH_BIT = (1U << 31);
4471  const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4472  base &= ~HIGH_BIT;
4473 
4474  assert(base >= 2);
4475  if (value == 0)
4476  return "0";
4477 
4478  std::string result;
4479  while (value > 0)
4480  {
4481  unsigned long long digit = value % base;
4482  result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4483  value /= base;
4484  }
4485  return result;
4486 }
4487 
4488 NAMESPACE_END
4489 
4490 #if WORKAROUND_ARMEL_BUG
4491 # pragma GCC pop_options
4492 #endif
4493 
4494 #endif
Used to pass byte array input as part of a NameValuePairs object.
Definition: algparam.h:29
An invalid argument was detected.
Definition: cryptlib.h:182
Classes for working with NameValuePairs.
void swap(SecBlock< T, A > &b)
Swap contents with another SecBlock.
Definition: secblock.h:706
a number which is probabilistically prime
Definition: integer.h:77
Utility functions for the Crypto++ library.
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:123
Restricts the instantiation of a class to one static object without locks.
Definition: misc.h:238
bool NotZero() const
Determines if the Integer is non-0.
Definition: integer.h:315
bool GetBit(size_t i) const
Provides the i-th bit of the Integer.
Definition: integer.cpp:2971
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:652
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
Definition: integer.cpp:3266
an unsigned value
Definition: integer.h:67
virtual size_t Peek(byte &outByte) const
Peek a 8-bit byte.
Definition: cryptlib.cpp:537
bool IsOdd() const
Determines if the Integer is odd parity.
Definition: integer.h:333
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
Definition: cryptlib.h:348
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode Unsigned.
Definition: asn.h:316
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: cryptlib.cpp:329
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
Definition: misc.h:660
This file contains helper classes/functions for implementing public key algorithms.
bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
Definition: nbtheory.cpp:381
static Integer Gcd(const Integer &a, const Integer &n)
greatest common divisor
Definition: integer.cpp:4118
void resize(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:697
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
Definition: misc.h:680
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
Definition: misc.h:553
void CleanGrow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:681
bool IsEven() const
Determines if the Integer is even parity.
Definition: integer.h:330
Secure memory block with allocator and cleanup.
Definition: secblock.h:429
unsigned int WordCount() const
Determines the number of words required to represent the Integer.
Definition: integer.cpp:3191
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
Definition: integer.cpp:3341
Signedness
Used when importing and exporting integers.
Definition: integer.h:65
size_type size() const
Provides the count of elements in the SecBlock.
Definition: secblock.h:516
Object identifiers for algorthms and schemes.
Square block cipher.
Definition: square.h:24
Classes for automatic resource management.
size_t size() const
Length of the memory block.
Definition: algparam.h:93
byte GetByte(size_t i) const
Provides the i-th byte of the Integer.
Definition: integer.cpp:2993
Library configuration file.
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
returns same result as Divide(r, q, a, Power2(n)), but faster
Definition: integer.cpp:3917
Ring of congruence classes modulo n.
Definition: modarith.h:27
STL namespace.
Interface for random number generators.
Definition: cryptlib.h:1176
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
Definition: misc.h:670
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
Definition: integer.cpp:3355
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
The minimum number of bytes to encode this integer.
Definition: integer.cpp:3254
void New(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:639
void SetByte(size_t n, byte value)
Set the n-th byte to value.
Definition: integer.cpp:3001
the value is negative
Definition: integer.h:59
SecBlock typedef.
Definition: secblock.h:723
BER Sequence Decoder.
Definition: asn.h:197
Interface for buffered transformations.
Definition: cryptlib.h:1342
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
encode absolute value as big-endian octet string
Definition: integer.cpp:3310
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: queue.h:35
const byte * begin() const
Pointer to the first byte in the memory block.
Definition: algparam.h:89
bool IsConvertableToLong() const
Determines if the Integer is convertable to Long.
Definition: integer.cpp:2851
Integer MultiplicativeInverse() const
return inverse if 1 or -1, otherwise return 0
Definition: integer.cpp:4102
static const Integer & One()
Integer representing 1.
Definition: integer.cpp:2944
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:371
Abstract Ring.
Definition: algebra.h:52
byte order is little-endian
Definition: cryptlib.h:125
Sign
Used internally to represent the integer.
Definition: integer.h:55
Pointer that overloads operator→
Definition: smartptr.h:39
bool IsSquare() const
return whether this integer is a perfect square
Definition: integer.cpp:4091
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
Definition: integer.cpp:3326
Classes and functions for secure memory allocations.
unsigned int BitCount() const
Determines the number of bits required to represent the Integer.
Definition: integer.cpp:3205
bool IsUnit() const
is 1 or -1
Definition: integer.cpp:4097
Copy input to a memory buffer.
Definition: filters.h:1001
Integer SquareRoot() const
extract square root, if negative return 0, else return floor of square root
Definition: integer.cpp:4073
bool IsPositive() const
Determines if the Integer is positive.
Definition: integer.h:324
bool GetValue(const char *name, T &value) const
Get a named value.
Definition: cryptlib.h:335
a number with no special properties
Definition: integer.h:75
Integer Squared() const
Definition: integer.h:482
size_t Put(byte inByte, bool blocking=true)
Input a byte for processing.
Definition: cryptlib.h:1363
bool IsNegative() const
Determines if the Integer is negative.
Definition: integer.h:318
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
Definition: algparam.h:554
void swap(Integer &a)
Swaps this Integer with another Integer.
Definition: integer.cpp:3031
Integer()
Creates the zero integer.
Definition: integer.cpp:2811
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
Definition: misc.h:600
signed long ConvertToLong() const
Convert the Integer to Long.
Definition: integer.cpp:2865
Exception thrown when an error is encountered decoding an OpenPGP integer.
Definition: integer.h:264
void Negate()
Reverse the Sign of the Integer.
Definition: integer.cpp:4039
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
Definition: misc.h:648
Integer Times(const Integer &b) const
Definition: integer.cpp:3841
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
Definition: integer.cpp:3317
Application callback to signal suitability of a cabdidate prime.
Definition: nbtheory.h:80
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branchless swap of pointers a and b if condition c is true.
Definition: misc.h:958
static Integer Power2(size_t e)
Exponentiates to a power of 2.
Definition: integer.cpp:2923
Multiple precision integer with arithmetic operations.
Definition: integer.h:31
a signed value
Definition: integer.h:69
static const Integer & Two()
Integer representing 2.
Definition: integer.cpp:2949
virtual lword Skip(lword skipMax=LWORD_MAX)
Discard skipMax bytes from the output buffer.
Definition: cryptlib.cpp:556
RandomNumberType
Properties of a random integer.
Definition: integer.h:73
const char * Seed()
ConstByteArrayParameter.
Definition: argnames.h:19
byte order is big-endian
Definition: cryptlib.h:127
Safely right shift values when undefined behavior could occur.
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
Definition: misc.h:397
String-based implementation of Store interface.
Definition: filters.h:1046
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
calculate r and q such that (a == d*q + r) && (0 <= r < abs(d))
Definition: integer.cpp:3899
Data structure used to store byte strings.
Definition: queue.h:20
Classes, functions, intrinsics and features for X86, X32 nd X64 assembly.
Classes and functions for working with ANS.1 objects.
Classes for SHA-1 and SHA-2 family of message digests.
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
Definition: integer.cpp:2979
size_t GetWord16(word16 &value, ByteOrder order=BIG_ENDIAN_ORDER)
Retrieve a 16-bit word.
Definition: cryptlib.cpp:753
iterator begin()
Provides an iterator pointing to the first element in the memory block.
Definition: secblock.h:491
const char * PointerToPrimeSelector()
const PrimeSelector *
Definition: argnames.h:41
Implementation of BufferedTransformation's attachment interface.
Classes and functions for number theoretic operations.
std::string IntToString< unsigned long long >(unsigned long long value, unsigned int base)
Converts an unsigned value to a string.
Definition: integer.cpp:4467
size_t DEREncodeOctetString(BufferedTransformation &out, const byte *str, size_t strLen)
ASN Strings.
Definition: asn.cpp:104
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
Definition: integer.cpp:3288
Exception thrown when division by 0 is encountered.
Definition: integer.h:37
DER Sequence Encoder.
Definition: asn.h:207
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
Definition: misc.h:880
Exception thrown when a random number cannot be found that satisfies the condition.
Definition: integer.h:45
Performs modular arithmetic in Montgomery representation for increased speed.
Definition: modarith.h:137
DER General Encoder.
Definition: asn.h:174
void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: integer.cpp:3396
Integer InverseMod(const Integer &n) const
calculate multiplicative inverse of *this mod n
Definition: integer.cpp:4123
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
Definition: integer.cpp:3214
size_t PutWord16(word16 value, ByteOrder order=BIG_ENDIAN_ORDER, bool blocking=true)
Input a 16-bit word for processing.
Definition: cryptlib.cpp:717
virtual lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: cryptlib.cpp:499
Safely left shift values when undefined behavior could occur.
static const Integer & Zero()
Integer representing 0.
Definition: integer.cpp:2939
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
Definition: misc.h:407
EuclideanDomainOf.
Definition: algebra.h:165
void Grow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:665
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
Definition: integer.cpp:3295
std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
Definition: integer.cpp:4401
lword GetBits(size_t i, size_t n) const
Provides the low order bits of the Integer.
Definition: integer.cpp:3008
virtual size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: cryptlib.cpp:518
Class file for performing modular arithmetic.
Crypto++ library namespace.
size_type SizeInBytes() const
Provides the number of bytes in the SecBlock.
Definition: secblock.h:530
BER General Decoder.
Definition: asn.h:137
int Compare(const Integer &a) const
Perform signed comparison.
Definition: integer.cpp:4055
Object Identifier.
Definition: asn.h:91
Integer Modulo(const Integer &b) const
Definition: integer.cpp:3952
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: queue.cpp:300
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
Definition: misc.h:575
unsigned int ByteCount() const
Determines the number of bytes required to represent the Integer.
Definition: integer.cpp:3196
the value is positive or 0
Definition: integer.h:57
bool NotNegative() const
Determines if the Integer is non-negative.
Definition: integer.h:321
Interface for retrieving values given their names.
Definition: cryptlib.h:277