Crypto++  5.6.3
Free C++ class library of cryptographic schemes
integer.cpp
1 // integer.cpp - written and placed in the public domain by Wei Dai
2 // contains public domain code contributed by Alister Lee and Leonard Janke
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #if CRYPTOPP_MSC_VERSION
8 # pragma warning(disable: 4100)
9 #endif
10 
11 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
12 # pragma GCC diagnostic ignored "-Wunused"
13 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
14 #endif
15 
16 #ifndef CRYPTOPP_IMPORTS
17 
18 #include "integer.h"
19 #include "secblock.h"
20 #include "modarith.h"
21 #include "nbtheory.h"
22 #include "smartptr.h"
23 #include "algparam.h"
24 #include "filters.h"
25 #include "asn.h"
26 #include "oids.h"
27 #include "words.h"
28 #include "pubkey.h" // for P1363_KDF2
29 #include "sha.h"
30 #include "cpu.h"
31 #include "misc.h"
32 
33 #include <iostream>
34 
35 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
36  #include <intrin.h>
37 #endif
38 
39 #ifdef __DECCXX
40  #include <c_asm.h>
41 #endif
42 
43 #ifdef CRYPTOPP_MSVC6_NO_PP
44  #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
45 #endif
46 
47 // "Inline assembly operands don't work with .intel_syntax",
48 // http://llvm.org/bugs/show_bug.cgi?id=24232
49 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
50 # undef CRYPTOPP_X86_ASM_AVAILABLE
51 # undef CRYPTOPP_X32_ASM_AVAILABLE
52 # undef CRYPTOPP_X64_ASM_AVAILABLE
53 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
54 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
55 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
56 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
57 #else
58 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
59 #endif
60 
61 // Debian QEMU/ARMEL issue in MultiplyTop; see http://github.com/weidai11/cryptopp/issues/31.
62 #if __ARMEL__ && (CRYPTOPP_GCC_VERSION >= 40900) && (CRYPTOPP_GCC_VERSION < 70000) && __OPTIMIZE__
63 # define WORKAROUND_ARMEL_BUG 1
64 #endif
65 
66 #if WORKAROUND_ARMEL_BUG
67 # pragma GCC push_options
68 # pragma GCC optimize("O1")
69 #endif
70 
71 NAMESPACE_BEGIN(CryptoPP)
72 
73 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
74 {
75  if (valueType != typeid(Integer))
76  return false;
77  *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
78  return true;
79 }
80 
81 inline static int Compare(const word *A, const word *B, size_t N)
82 {
83  while (N--)
84  if (A[N] > B[N])
85  return 1;
86  else if (A[N] < B[N])
87  return -1;
88 
89  return 0;
90 }
91 
92 inline static int Increment(word *A, size_t N, word B=1)
93 {
94  assert(N);
95  word t = A[0];
96  A[0] = t+B;
97  if (A[0] >= t)
98  return 0;
99  for (unsigned i=1; i<N; i++)
100  if (++A[i])
101  return 0;
102  return 1;
103 }
104 
105 inline static int Decrement(word *A, size_t N, word B=1)
106 {
107  assert(N);
108  word t = A[0];
109  A[0] = t-B;
110  if (A[0] <= t)
111  return 0;
112  for (unsigned i=1; i<N; i++)
113  if (A[i]--)
114  return 0;
115  return 1;
116 }
117 
118 static void TwosComplement(word *A, size_t N)
119 {
120  Decrement(A, N);
121  for (unsigned i=0; i<N; i++)
122  A[i] = ~A[i];
123 }
124 
125 static word AtomicInverseModPower2(word A)
126 {
127  assert(A%2==1);
128 
129  word R=A%8;
130 
131  for (unsigned i=3; i<WORD_BITS; i*=2)
132  R = R*(2-R*A);
133 
134  assert(R*A==1);
135  return R;
136 }
137 
138 // ********************************************************
139 
140 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
141  #define Declare2Words(x) word x##0, x##1;
142  #define AssignWord(a, b) a##0 = b; a##1 = 0;
143  #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
144  #define LowWord(a) a##0
145  #define HighWord(a) a##1
146  #ifdef _MSC_VER
147  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
148  #ifndef __INTEL_COMPILER
149  #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
150  #endif
151  #elif defined(__DECCXX)
152  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
153  #elif defined(__x86_64__)
154  #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
155  // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
156  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
157  #else
158  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
159  #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
160  #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
161  #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
162  #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
163  #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
164  #endif
165  #endif
166  #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
167  #ifndef Double3Words
168  #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
169  #endif
170  #ifndef Acc2WordsBy2
171  #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
172  #endif
173  #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
174  #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
175  #define GetCarry(u) u##1
176  #define GetBorrow(u) u##1
177 #else
178  #define Declare2Words(x) dword x;
179  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
180  #define MultiplyWords(p, a, b) p = __emulu(a, b);
181  #else
182  #define MultiplyWords(p, a, b) p = (dword)a*b;
183  #endif
184  #define AssignWord(a, b) a = b;
185  #define Add2WordsBy1(a, b, c) a = b + c;
186  #define Acc2WordsBy2(a, b) a += b;
187  #define LowWord(a) word(a)
188  #define HighWord(a) word(a>>WORD_BITS)
189  #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
190  #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
191  #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
192  #define GetCarry(u) HighWord(u)
193  #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
194 #endif
195 #ifndef MulAcc
196  #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
197 #endif
198 #ifndef Acc2WordsBy1
199  #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
200 #endif
201 #ifndef Acc3WordsBy2
202  #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
203 #endif
204 
205 class DWord
206 {
207 public:
208  // Converity finding on default ctor. We've isntrumented the code,
209  // and cannot uncover a case where it affects a result.
210 #if (defined(__COVERITY__) || !defined(NDEBUG)) && defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
211  // Repeating pattern of 1010 for debug builds to break things...
212  DWord() : m_whole(0) {memset(&m_whole, 0xa, sizeof(m_whole));}
213 #elif (defined(__COVERITY__) || !defined(NDEBUG)) && !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
214  // Repeating pattern of 1010 for debug builds to break things...
215  DWord() : m_halfs() {memset(&m_halfs, 0xa, sizeof(m_halfs));}
216 #else
217  DWord() {}
218 #endif
219 
220 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
221  explicit DWord(word low) : m_whole(low) {}
222 #else
223  explicit DWord(word low)
224  {
225  m_halfs.low = low;
226  m_halfs.high = 0;
227  }
228 #endif
229 
230  DWord(word low, word high)
231  {
232  m_halfs.low = low;
233  m_halfs.high = high;
234  }
235 
236  static DWord Multiply(word a, word b)
237  {
238  DWord r;
239  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
240  r.m_whole = (dword)a * b;
241  #elif defined(MultiplyWordsLoHi)
242  MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
243  #else
244  assert(0);
245  #endif
246  return r;
247  }
248 
249  static DWord MultiplyAndAdd(word a, word b, word c)
250  {
251  DWord r = Multiply(a, b);
252  return r += c;
253  }
254 
255  DWord & operator+=(word a)
256  {
257  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
258  m_whole = m_whole + a;
259  #else
260  m_halfs.low += a;
261  m_halfs.high += (m_halfs.low < a);
262  #endif
263  return *this;
264  }
265 
266  DWord operator+(word a)
267  {
268  DWord r;
269  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
270  r.m_whole = m_whole + a;
271  #else
272  r.m_halfs.low = m_halfs.low + a;
273  r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
274  #endif
275  return r;
276  }
277 
278  DWord operator-(DWord a)
279  {
280  DWord r;
281  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
282  r.m_whole = m_whole - a.m_whole;
283  #else
284  r.m_halfs.low = m_halfs.low - a.m_halfs.low;
285  r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
286  #endif
287  return r;
288  }
289 
290  DWord operator-(word a)
291  {
292  DWord r;
293  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
294  r.m_whole = m_whole - a;
295  #else
296  r.m_halfs.low = m_halfs.low - a;
297  r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
298  #endif
299  return r;
300  }
301 
302  // returns quotient, which must fit in a word
303  word operator/(word divisor);
304 
305  word operator%(word a);
306 
307  bool operator!() const
308  {
309  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
310  return !m_whole;
311  #else
312  return !m_halfs.high && !m_halfs.low;
313  #endif
314  }
315 
316  word GetLowHalf() const {return m_halfs.low;}
317  word GetHighHalf() const {return m_halfs.high;}
318  word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
319 
320 private:
321  union
322  {
323  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
324  dword m_whole;
325  #endif
326  struct
327  {
328  #ifdef IS_LITTLE_ENDIAN
329  word low;
330  word high;
331  #else
332  word high;
333  word low;
334  #endif
335  } m_halfs;
336  };
337 };
338 
339 class Word
340 {
341 public:
342  // Converity finding on default ctor. We've isntrumented the code,
343  // and cannot uncover a case where it affects a result.
344 #if defined(__COVERITY__)
345  Word() : m_whole(0) {}
346 #elif !defined(NDEBUG)
347  // Repeating pattern of 1010 for debug builds to break things...
348  Word() : m_whole(0) {memset(&m_whole, 0xa, sizeof(m_whole));}
349 #else
350  Word() {}
351 #endif
352 
353  Word(word value) : m_whole(value) {}
354  Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
355 
356  static Word Multiply(hword a, hword b)
357  {
358  Word r;
359  r.m_whole = (word)a * b;
360  return r;
361  }
362 
363  Word operator-(Word a)
364  {
365  Word r;
366  r.m_whole = m_whole - a.m_whole;
367  return r;
368  }
369 
370  Word operator-(hword a)
371  {
372  Word r;
373  r.m_whole = m_whole - a;
374  return r;
375  }
376 
377  // returns quotient, which must fit in a word
378  hword operator/(hword divisor)
379  {
380  return hword(m_whole / divisor);
381  }
382 
383  bool operator!() const
384  {
385  return !m_whole;
386  }
387 
388  word GetWhole() const {return m_whole;}
389  hword GetLowHalf() const {return hword(m_whole);}
390  hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
391  hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
392 
393 private:
394  word m_whole;
395 };
396 
397 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
398 template <class S, class D>
399 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
400 {
401  CRYPTOPP_UNUSED(dummy);
402 
403  // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
404  assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
405 
406  // estimate the quotient: do a 2 S by 1 S divide
407  S Q;
408  if (S(B1+1) == 0)
409  Q = A[2];
410  else if (B1 > 0)
411  Q = D(A[1], A[2]) / S(B1+1);
412  else
413  Q = D(A[0], A[1]) / B0;
414 
415  // now subtract Q*B from A
416  D p = D::Multiply(B0, Q);
417  D u = (D) A[0] - p.GetLowHalf();
418  A[0] = u.GetLowHalf();
419  u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
420  A[1] = u.GetLowHalf();
421  A[2] += u.GetHighHalf();
422 
423  // Q <= actual quotient, so fix it
424  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
425  {
426  u = (D) A[0] - B0;
427  A[0] = u.GetLowHalf();
428  u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
429  A[1] = u.GetLowHalf();
430  A[2] += u.GetHighHalf();
431  Q++;
432  assert(Q); // shouldn't overflow
433  }
434 
435  return Q;
436 }
437 
438 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
439 template <class S, class D>
440 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
441 {
442  if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
443  return D(Ah.GetLowHalf(), Ah.GetHighHalf());
444  else
445  {
446  S Q[2];
447  T[0] = Al.GetLowHalf();
448  T[1] = Al.GetHighHalf();
449  T[2] = Ah.GetLowHalf();
450  T[3] = Ah.GetHighHalf();
451  Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
452  Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
453  return D(Q[0], Q[1]);
454  }
455 }
456 
457 // returns quotient, which must fit in a word
458 inline word DWord::operator/(word a)
459 {
460  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
461  return word(m_whole / a);
462  #else
463  hword r[4];
464  return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
465  #endif
466 }
467 
468 inline word DWord::operator%(word a)
469 {
470  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
471  return word(m_whole % a);
472  #else
473  if (a < (word(1) << (WORD_BITS/2)))
474  {
475  hword h = hword(a);
476  word r = m_halfs.high % h;
477  r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
478  return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
479  }
480  else
481  {
482  hword r[4];
483  DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
484  return Word(r[0], r[1]).GetWhole();
485  }
486  #endif
487 }
488 
489 // ********************************************************
490 
491 // Use some tricks to share assembly code between MSVC and GCC
492 #if defined(__GNUC__)
493  #define AddPrologue \
494  int result; \
495  __asm__ __volatile__ \
496  ( \
497  INTEL_NOPREFIX
498  #define AddEpilogue \
499  ".att_syntax prefix;" \
500  : "=a" (result)\
501  : "d" (C), "a" (A), "D" (B), "c" (N) \
502  : "%esi", "memory", "cc" \
503  );\
504  return result;
505  #define MulPrologue \
506  __asm__ __volatile__ \
507  ( \
508  ".intel_syntax noprefix;" \
509  AS1( push ebx) \
510  AS2( mov ebx, edx)
511  #define MulEpilogue \
512  AS1( pop ebx) \
513  ".att_syntax prefix;" \
514  : \
515  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
516  : "%esi", "memory", "cc" \
517  );
518  #define SquPrologue MulPrologue
519  #define SquEpilogue \
520  AS1( pop ebx) \
521  ".att_syntax prefix;" \
522  : \
523  : "d" (s_maskLow16), "c" (C), "a" (A) \
524  : "%esi", "%edi", "memory", "cc" \
525  );
526  #define TopPrologue MulPrologue
527  #define TopEpilogue \
528  AS1( pop ebx) \
529  ".att_syntax prefix;" \
530  : \
531  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
532  : "memory", "cc" \
533  );
534 #else
535  #define AddPrologue \
536  __asm push edi \
537  __asm push esi \
538  __asm mov eax, [esp+12] \
539  __asm mov edi, [esp+16]
540  #define AddEpilogue \
541  __asm pop esi \
542  __asm pop edi \
543  __asm ret 8
544 #if _MSC_VER < 1300
545  #define SaveEBX __asm push ebx
546  #define RestoreEBX __asm pop ebx
547 #else
548  #define SaveEBX
549  #define RestoreEBX
550 #endif
551  #define SquPrologue \
552  AS2( mov eax, A) \
553  AS2( mov ecx, C) \
554  SaveEBX \
555  AS2( lea ebx, s_maskLow16)
556  #define MulPrologue \
557  AS2( mov eax, A) \
558  AS2( mov edi, B) \
559  AS2( mov ecx, C) \
560  SaveEBX \
561  AS2( lea ebx, s_maskLow16)
562  #define TopPrologue \
563  AS2( mov eax, A) \
564  AS2( mov edi, B) \
565  AS2( mov ecx, C) \
566  AS2( mov esi, L) \
567  SaveEBX \
568  AS2( lea ebx, s_maskLow16)
569  #define SquEpilogue RestoreEBX
570  #define MulEpilogue RestoreEBX
571  #define TopEpilogue RestoreEBX
572 #endif
573 
574 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
575 extern "C" {
576 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
577 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
578 }
579 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
580 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
581 {
582  word result;
583  __asm__ __volatile__
584  (
585  INTEL_NOPREFIX
586  AS1( neg %1)
587  ASJ( jz, 1, f)
588  AS2( mov %0,[%3+8*%1])
589  AS2( add %0,[%4+8*%1])
590  AS2( mov [%2+8*%1],%0)
591  ASL(0)
592  AS2( mov %0,[%3+8*%1+8])
593  AS2( adc %0,[%4+8*%1+8])
594  AS2( mov [%2+8*%1+8],%0)
595  AS2( lea %1,[%1+2])
596  ASJ( jrcxz, 1, f)
597  AS2( mov %0,[%3+8*%1])
598  AS2( adc %0,[%4+8*%1])
599  AS2( mov [%2+8*%1],%0)
600  ASJ( jmp, 0, b)
601  ASL(1)
602  AS2( mov %0, 0)
603  AS2( adc %0, %0)
604  ATT_NOPREFIX
605  : "=&r" (result), "+c" (N)
606  : "r" (C+N), "r" (A+N), "r" (B+N)
607  : "memory", "cc"
608  );
609  return (int)result;
610 }
611 
612 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
613 {
614  word result;
615  __asm__ __volatile__
616  (
617  INTEL_NOPREFIX
618  AS1( neg %1)
619  ASJ( jz, 1, f)
620  AS2( mov %0,[%3+8*%1])
621  AS2( sub %0,[%4+8*%1])
622  AS2( mov [%2+8*%1],%0)
623  ASL(0)
624  AS2( mov %0,[%3+8*%1+8])
625  AS2( sbb %0,[%4+8*%1+8])
626  AS2( mov [%2+8*%1+8],%0)
627  AS2( lea %1,[%1+2])
628  ASJ( jrcxz, 1, f)
629  AS2( mov %0,[%3+8*%1])
630  AS2( sbb %0,[%4+8*%1])
631  AS2( mov [%2+8*%1],%0)
632  ASJ( jmp, 0, b)
633  ASL(1)
634  AS2( mov %0, 0)
635  AS2( adc %0, %0)
636  ATT_NOPREFIX
637  : "=&r" (result), "+c" (N)
638  : "r" (C+N), "r" (A+N), "r" (B+N)
639  : "memory", "cc"
640  );
641  return (int)result;
642 }
643 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
644 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
645 {
646  AddPrologue
647 
648  // now: eax = A, edi = B, edx = C, ecx = N
649  AS2( lea eax, [eax+4*ecx])
650  AS2( lea edi, [edi+4*ecx])
651  AS2( lea edx, [edx+4*ecx])
652 
653  AS1( neg ecx) // ecx is negative index
654  AS2( test ecx, 2) // this clears carry flag
655  ASJ( jz, 0, f)
656  AS2( sub ecx, 2)
657  ASJ( jmp, 1, f)
658 
659  ASL(0)
660  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
661  AS2( mov esi,[eax+4*ecx])
662  AS2( adc esi,[edi+4*ecx])
663  AS2( mov [edx+4*ecx],esi)
664  AS2( mov esi,[eax+4*ecx+4])
665  AS2( adc esi,[edi+4*ecx+4])
666  AS2( mov [edx+4*ecx+4],esi)
667  ASL(1)
668  AS2( mov esi,[eax+4*ecx+8])
669  AS2( adc esi,[edi+4*ecx+8])
670  AS2( mov [edx+4*ecx+8],esi)
671  AS2( mov esi,[eax+4*ecx+12])
672  AS2( adc esi,[edi+4*ecx+12])
673  AS2( mov [edx+4*ecx+12],esi)
674 
675  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
676  ASJ( jmp, 0, b)
677 
678  ASL(2)
679  AS2( mov eax, 0)
680  AS1( setc al) // store carry into eax (return result register)
681 
682  AddEpilogue
683 }
684 
685 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
686 {
687  AddPrologue
688 
689  // now: eax = A, edi = B, edx = C, ecx = N
690  AS2( lea eax, [eax+4*ecx])
691  AS2( lea edi, [edi+4*ecx])
692  AS2( lea edx, [edx+4*ecx])
693 
694  AS1( neg ecx) // ecx is negative index
695  AS2( test ecx, 2) // this clears carry flag
696  ASJ( jz, 0, f)
697  AS2( sub ecx, 2)
698  ASJ( jmp, 1, f)
699 
700  ASL(0)
701  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
702  AS2( mov esi,[eax+4*ecx])
703  AS2( sbb esi,[edi+4*ecx])
704  AS2( mov [edx+4*ecx],esi)
705  AS2( mov esi,[eax+4*ecx+4])
706  AS2( sbb esi,[edi+4*ecx+4])
707  AS2( mov [edx+4*ecx+4],esi)
708  ASL(1)
709  AS2( mov esi,[eax+4*ecx+8])
710  AS2( sbb esi,[edi+4*ecx+8])
711  AS2( mov [edx+4*ecx+8],esi)
712  AS2( mov esi,[eax+4*ecx+12])
713  AS2( sbb esi,[edi+4*ecx+12])
714  AS2( mov [edx+4*ecx+12],esi)
715 
716  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
717  ASJ( jmp, 0, b)
718 
719  ASL(2)
720  AS2( mov eax, 0)
721  AS1( setc al) // store carry into eax (return result register)
722 
723  AddEpilogue
724 }
725 
726 #if CRYPTOPP_INTEGER_SSE2
727 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
728 {
729  AddPrologue
730 
731  // now: eax = A, edi = B, edx = C, ecx = N
732  AS2( lea eax, [eax+4*ecx])
733  AS2( lea edi, [edi+4*ecx])
734  AS2( lea edx, [edx+4*ecx])
735 
736  AS1( neg ecx) // ecx is negative index
737  AS2( pxor mm2, mm2)
738  ASJ( jz, 2, f)
739  AS2( test ecx, 2) // this clears carry flag
740  ASJ( jz, 0, f)
741  AS2( sub ecx, 2)
742  ASJ( jmp, 1, f)
743 
744  ASL(0)
745  AS2( movd mm0, DWORD PTR [eax+4*ecx])
746  AS2( movd mm1, DWORD PTR [edi+4*ecx])
747  AS2( paddq mm0, mm1)
748  AS2( paddq mm2, mm0)
749  AS2( movd DWORD PTR [edx+4*ecx], mm2)
750  AS2( psrlq mm2, 32)
751 
752  AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
753  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
754  AS2( paddq mm0, mm1)
755  AS2( paddq mm2, mm0)
756  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
757  AS2( psrlq mm2, 32)
758 
759  ASL(1)
760  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
761  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
762  AS2( paddq mm0, mm1)
763  AS2( paddq mm2, mm0)
764  AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
765  AS2( psrlq mm2, 32)
766 
767  AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
768  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
769  AS2( paddq mm0, mm1)
770  AS2( paddq mm2, mm0)
771  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
772  AS2( psrlq mm2, 32)
773 
774  AS2( add ecx, 4)
775  ASJ( jnz, 0, b)
776 
777  ASL(2)
778  AS2( movd eax, mm2)
779  AS1( emms)
780 
781  AddEpilogue
782 }
783 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
784 {
785  AddPrologue
786 
787  // now: eax = A, edi = B, edx = C, ecx = N
788  AS2( lea eax, [eax+4*ecx])
789  AS2( lea edi, [edi+4*ecx])
790  AS2( lea edx, [edx+4*ecx])
791 
792  AS1( neg ecx) // ecx is negative index
793  AS2( pxor mm2, mm2)
794  ASJ( jz, 2, f)
795  AS2( test ecx, 2) // this clears carry flag
796  ASJ( jz, 0, f)
797  AS2( sub ecx, 2)
798  ASJ( jmp, 1, f)
799 
800  ASL(0)
801  AS2( movd mm0, DWORD PTR [eax+4*ecx])
802  AS2( movd mm1, DWORD PTR [edi+4*ecx])
803  AS2( psubq mm0, mm1)
804  AS2( psubq mm0, mm2)
805  AS2( movd DWORD PTR [edx+4*ecx], mm0)
806  AS2( psrlq mm0, 63)
807 
808  AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
809  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
810  AS2( psubq mm2, mm1)
811  AS2( psubq mm2, mm0)
812  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
813  AS2( psrlq mm2, 63)
814 
815  ASL(1)
816  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
817  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
818  AS2( psubq mm0, mm1)
819  AS2( psubq mm0, mm2)
820  AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
821  AS2( psrlq mm0, 63)
822 
823  AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
824  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
825  AS2( psubq mm2, mm1)
826  AS2( psubq mm2, mm0)
827  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
828  AS2( psrlq mm2, 63)
829 
830  AS2( add ecx, 4)
831  ASJ( jnz, 0, b)
832 
833  ASL(2)
834  AS2( movd eax, mm2)
835  AS1( emms)
836 
837  AddEpilogue
838 }
839 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
840 #else
841 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
842 {
843  assert (N%2 == 0);
844 
845  Declare2Words(u);
846  AssignWord(u, 0);
847  for (size_t i=0; i<N; i+=2)
848  {
849  AddWithCarry(u, A[i], B[i]);
850  C[i] = LowWord(u);
851  AddWithCarry(u, A[i+1], B[i+1]);
852  C[i+1] = LowWord(u);
853  }
854  return int(GetCarry(u));
855 }
856 
857 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
858 {
859  assert (N%2 == 0);
860 
861  Declare2Words(u);
862  AssignWord(u, 0);
863  for (size_t i=0; i<N; i+=2)
864  {
865  SubtractWithBorrow(u, A[i], B[i]);
866  C[i] = LowWord(u);
867  SubtractWithBorrow(u, A[i+1], B[i+1]);
868  C[i+1] = LowWord(u);
869  }
870  return int(GetBorrow(u));
871 }
872 #endif
873 
874 static word LinearMultiply(word *C, const word *A, word B, size_t N)
875 {
876  word carry=0;
877  for(unsigned i=0; i<N; i++)
878  {
879  Declare2Words(p);
880  MultiplyWords(p, A[i], B);
881  Acc2WordsBy1(p, carry);
882  C[i] = LowWord(p);
883  carry = HighWord(p);
884  }
885  return carry;
886 }
887 
888 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
889 
890 #define Mul_2 \
891  Mul_Begin(2) \
892  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
893  Mul_End(1, 1)
894 
895 #define Mul_4 \
896  Mul_Begin(4) \
897  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
898  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
899  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
900  Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
901  Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
902  Mul_End(5, 3)
903 
904 #define Mul_8 \
905  Mul_Begin(8) \
906  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
907  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
908  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
909  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
910  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
911  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
912  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
913  Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
914  Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
915  Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
916  Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
917  Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
918  Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
919  Mul_End(13, 7)
920 
921 #define Mul_16 \
922  Mul_Begin(16) \
923  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
924  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
925  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
926  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
927  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
928  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
929  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
930  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
931  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
932  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
933  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
934  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
935  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
936  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
937  Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
938  Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
939  Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
940  Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
941  Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
942  Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
943  Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
944  Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
945  Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
946  Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
947  Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
948  Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
949  Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
950  Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
951  Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
952  Mul_End(29, 15)
953 
954 #define Squ_2 \
955  Squ_Begin(2) \
956  Squ_End(2)
957 
958 #define Squ_4 \
959  Squ_Begin(4) \
960  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
961  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
962  Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
963  Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
964  Squ_End(4)
965 
966 #define Squ_8 \
967  Squ_Begin(8) \
968  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
969  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
970  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
971  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
972  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
973  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
974  Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
975  Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
976  Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
977  Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
978  Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
979  Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
980  Squ_End(8)
981 
982 #define Squ_16 \
983  Squ_Begin(16) \
984  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
985  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
986  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
987  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
988  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
989  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
990  Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
991  Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
992  Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
993  Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
994  Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
995  Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
996  Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
997  Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
998  Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
999  Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1000  Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1001  Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1002  Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1003  Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1004  Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1005  Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1006  Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1007  Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1008  Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1009  Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1010  Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1011  Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1012  Squ_End(16)
1013 
1014 #define Bot_2 \
1015  Mul_Begin(2) \
1016  Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1017  Bot_End(2)
1018 
1019 #define Bot_4 \
1020  Mul_Begin(4) \
1021  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1022  Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1023  Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1024  Bot_End(4)
1025 
1026 #define Bot_8 \
1027  Mul_Begin(8) \
1028  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1029  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1030  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1031  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1032  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1033  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1034  Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1035  Bot_End(8)
1036 
1037 #define Bot_16 \
1038  Mul_Begin(16) \
1039  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1040  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1041  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1042  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1043  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1044  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1045  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1046  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1047  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1048  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1049  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1050  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1051  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1052  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1053  Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1054  Bot_End(16)
1055 
1056 #endif
1057 
1058 #if 0
1059 #define Mul_Begin(n) \
1060  Declare2Words(p) \
1061  Declare2Words(c) \
1062  Declare2Words(d) \
1063  MultiplyWords(p, A[0], B[0]) \
1064  AssignWord(c, LowWord(p)) \
1065  AssignWord(d, HighWord(p))
1066 
1067 #define Mul_Acc(i, j) \
1068  MultiplyWords(p, A[i], B[j]) \
1069  Acc2WordsBy1(c, LowWord(p)) \
1070  Acc2WordsBy1(d, HighWord(p))
1071 
1072 #define Mul_SaveAcc(k, i, j) \
1073  R[k] = LowWord(c); \
1074  Add2WordsBy1(c, d, HighWord(c)) \
1075  MultiplyWords(p, A[i], B[j]) \
1076  AssignWord(d, HighWord(p)) \
1077  Acc2WordsBy1(c, LowWord(p))
1078 
1079 #define Mul_End(n) \
1080  R[2*n-3] = LowWord(c); \
1081  Acc2WordsBy1(d, HighWord(c)) \
1082  MultiplyWords(p, A[n-1], B[n-1])\
1083  Acc2WordsBy2(d, p) \
1084  R[2*n-2] = LowWord(d); \
1085  R[2*n-1] = HighWord(d);
1086 
1087 #define Bot_SaveAcc(k, i, j) \
1088  R[k] = LowWord(c); \
1089  word e = LowWord(d) + HighWord(c); \
1090  e += A[i] * B[j];
1091 
1092 #define Bot_Acc(i, j) \
1093  e += A[i] * B[j];
1094 
1095 #define Bot_End(n) \
1096  R[n-1] = e;
1097 #else
1098 #define Mul_Begin(n) \
1099  Declare2Words(p) \
1100  word c; \
1101  Declare2Words(d) \
1102  MultiplyWords(p, A[0], B[0]) \
1103  c = LowWord(p); \
1104  AssignWord(d, HighWord(p))
1105 
1106 #define Mul_Acc(i, j) \
1107  MulAcc(c, d, A[i], B[j])
1108 
1109 #define Mul_SaveAcc(k, i, j) \
1110  R[k] = c; \
1111  c = LowWord(d); \
1112  AssignWord(d, HighWord(d)) \
1113  MulAcc(c, d, A[i], B[j])
1114 
1115 #define Mul_End(k, i) \
1116  R[k] = c; \
1117  MultiplyWords(p, A[i], B[i]) \
1118  Acc2WordsBy2(p, d) \
1119  R[k+1] = LowWord(p); \
1120  R[k+2] = HighWord(p);
1121 
1122 #define Bot_SaveAcc(k, i, j) \
1123  R[k] = c; \
1124  c = LowWord(d); \
1125  c += A[i] * B[j];
1126 
1127 #define Bot_Acc(i, j) \
1128  c += A[i] * B[j];
1129 
1130 #define Bot_End(n) \
1131  R[n-1] = c;
1132 #endif
1133 
1134 #define Squ_Begin(n) \
1135  Declare2Words(p) \
1136  word c; \
1137  Declare2Words(d) \
1138  Declare2Words(e) \
1139  MultiplyWords(p, A[0], A[0]) \
1140  R[0] = LowWord(p); \
1141  AssignWord(e, HighWord(p)) \
1142  MultiplyWords(p, A[0], A[1]) \
1143  c = LowWord(p); \
1144  AssignWord(d, HighWord(p)) \
1145  Squ_NonDiag \
1146 
1147 #define Squ_NonDiag \
1148  Double3Words(c, d)
1149 
1150 #define Squ_SaveAcc(k, i, j) \
1151  Acc3WordsBy2(c, d, e) \
1152  R[k] = c; \
1153  MultiplyWords(p, A[i], A[j]) \
1154  c = LowWord(p); \
1155  AssignWord(d, HighWord(p)) \
1156 
1157 #define Squ_Acc(i, j) \
1158  MulAcc(c, d, A[i], A[j])
1159 
1160 #define Squ_Diag(i) \
1161  Squ_NonDiag \
1162  MulAcc(c, d, A[i], A[i])
1163 
1164 #define Squ_End(n) \
1165  Acc3WordsBy2(c, d, e) \
1166  R[2*n-3] = c; \
1167  MultiplyWords(p, A[n-1], A[n-1])\
1168  Acc2WordsBy2(p, e) \
1169  R[2*n-2] = LowWord(p); \
1170  R[2*n-1] = HighWord(p);
1171 
1172 
1173 void Baseline_Multiply2(word *R, const word *A, const word *B)
1174 {
1175  Mul_2
1176 }
1177 
1178 void Baseline_Multiply4(word *R, const word *A, const word *B)
1179 {
1180  Mul_4
1181 }
1182 
1183 void Baseline_Multiply8(word *R, const word *A, const word *B)
1184 {
1185  Mul_8
1186 }
1187 
1188 void Baseline_Square2(word *R, const word *A)
1189 {
1190  Squ_2
1191 }
1192 
1193 void Baseline_Square4(word *R, const word *A)
1194 {
1195  Squ_4
1196 }
1197 
1198 void Baseline_Square8(word *R, const word *A)
1199 {
1200  Squ_8
1201 }
1202 
1203 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
1204 {
1205  Bot_2
1206 }
1207 
1208 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
1209 {
1210  Bot_4
1211 }
1212 
1213 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
1214 {
1215  Bot_8
1216 }
1217 
1218 #define Top_Begin(n) \
1219  Declare2Words(p) \
1220  word c; \
1221  Declare2Words(d) \
1222  MultiplyWords(p, A[0], B[n-2]);\
1223  AssignWord(d, HighWord(p));
1224 
1225 #define Top_Acc(i, j) \
1226  MultiplyWords(p, A[i], B[j]);\
1227  Acc2WordsBy1(d, HighWord(p));
1228 
1229 #define Top_SaveAcc0(i, j) \
1230  c = LowWord(d); \
1231  AssignWord(d, HighWord(d)) \
1232  MulAcc(c, d, A[i], B[j])
1233 
1234 #define Top_SaveAcc1(i, j) \
1235  c = L<c; \
1236  Acc2WordsBy1(d, c); \
1237  c = LowWord(d); \
1238  AssignWord(d, HighWord(d)) \
1239  MulAcc(c, d, A[i], B[j])
1240 
1241 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1242 {
1243  CRYPTOPP_UNUSED(L);
1244  word T[4];
1245  Baseline_Multiply2(T, A, B);
1246  R[0] = T[2];
1247  R[1] = T[3];
1248 }
1249 
1250 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
1251 {
1252  Top_Begin(4)
1253  Top_Acc(1, 1) Top_Acc(2, 0) \
1254  Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1255  Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1256  Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1257  Mul_End(1, 3)
1258 }
1259 
1260 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
1261 {
1262  Top_Begin(8)
1263  Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1264  Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1265  Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1266  Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1267  Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1268  Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1269  Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1270  Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1271  Mul_End(5, 7)
1272 }
1273 
1274 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1275 void Baseline_Multiply16(word *R, const word *A, const word *B)
1276 {
1277  Mul_16
1278 }
1279 
1280 void Baseline_Square16(word *R, const word *A)
1281 {
1282  Squ_16
1283 }
1284 
1285 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
1286 {
1287  Bot_16
1288 }
1289 
1290 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
1291 {
1292  Top_Begin(16)
1293  Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1294  Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1295  Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1296  Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1297  Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1298  Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1299  Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1300  Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1301  Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1302  Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1303  Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1304  Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1305  Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1306  Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1307  Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1308  Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1309  Mul_End(13, 15)
1310 }
1311 #endif
1312 
1313 // ********************************************************
1314 
1315 #if CRYPTOPP_INTEGER_SSE2
1316 
1317 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1318 
1319 #undef Mul_Begin
1320 #undef Mul_Acc
1321 #undef Top_Begin
1322 #undef Top_Acc
1323 #undef Squ_Acc
1324 #undef Squ_NonDiag
1325 #undef Squ_Diag
1326 #undef Squ_SaveAcc
1327 #undef Squ_Begin
1328 #undef Mul_SaveAcc
1329 #undef Bot_Acc
1330 #undef Bot_SaveAcc
1331 #undef Bot_End
1332 #undef Squ_End
1333 #undef Mul_End
1334 
1335 #define SSE2_FinalSave(k) \
1336  AS2( psllq xmm5, 16) \
1337  AS2( paddq xmm4, xmm5) \
1338  AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1339 
1340 #define SSE2_SaveShift(k) \
1341  AS2( movq xmm0, xmm6) \
1342  AS2( punpckhqdq xmm6, xmm0) \
1343  AS2( movq xmm1, xmm7) \
1344  AS2( punpckhqdq xmm7, xmm1) \
1345  AS2( paddd xmm6, xmm0) \
1346  AS2( pslldq xmm6, 4) \
1347  AS2( paddd xmm7, xmm1) \
1348  AS2( paddd xmm4, xmm6) \
1349  AS2( pslldq xmm7, 4) \
1350  AS2( movq xmm6, xmm4) \
1351  AS2( paddd xmm5, xmm7) \
1352  AS2( movq xmm7, xmm5) \
1353  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1354  AS2( psrlq xmm6, 16) \
1355  AS2( paddq xmm6, xmm7) \
1356  AS2( punpckhqdq xmm4, xmm0) \
1357  AS2( punpckhqdq xmm5, xmm0) \
1358  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1359  AS2( psrlq xmm6, 3*16) \
1360  AS2( paddd xmm4, xmm6) \
1361 
1362 #define Squ_SSE2_SaveShift(k) \
1363  AS2( movq xmm0, xmm6) \
1364  AS2( punpckhqdq xmm6, xmm0) \
1365  AS2( movq xmm1, xmm7) \
1366  AS2( punpckhqdq xmm7, xmm1) \
1367  AS2( paddd xmm6, xmm0) \
1368  AS2( pslldq xmm6, 4) \
1369  AS2( paddd xmm7, xmm1) \
1370  AS2( paddd xmm4, xmm6) \
1371  AS2( pslldq xmm7, 4) \
1372  AS2( movhlps xmm6, xmm4) \
1373  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1374  AS2( paddd xmm5, xmm7) \
1375  AS2( movhps QWORD PTR [esp+12], xmm5)\
1376  AS2( psrlq xmm4, 16) \
1377  AS2( paddq xmm4, xmm5) \
1378  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1379  AS2( psrlq xmm4, 3*16) \
1380  AS2( paddd xmm4, xmm6) \
1381  AS2( movq QWORD PTR [esp+4], xmm4)\
1382 
1383 #define SSE2_FirstMultiply(i) \
1384  AS2( movdqa xmm7, [esi+(i)*16])\
1385  AS2( movdqa xmm5, [edi-(i)*16])\
1386  AS2( pmuludq xmm5, xmm7) \
1387  AS2( movdqa xmm4, [ebx])\
1388  AS2( movdqa xmm6, xmm4) \
1389  AS2( pand xmm4, xmm5) \
1390  AS2( psrld xmm5, 16) \
1391  AS2( pmuludq xmm7, [edx-(i)*16])\
1392  AS2( pand xmm6, xmm7) \
1393  AS2( psrld xmm7, 16)
1394 
1395 #define Squ_Begin(n) \
1396  SquPrologue \
1397  AS2( mov esi, esp)\
1398  AS2( and esp, 0xfffffff0)\
1399  AS2( lea edi, [esp-32*n])\
1400  AS2( sub esp, 32*n+16)\
1401  AS1( push esi)\
1402  AS2( mov esi, edi) \
1403  AS2( xor edx, edx) \
1404  ASL(1) \
1405  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1406  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1407  AS2( movdqa [edi+2*edx], xmm0) \
1408  AS2( psrlq xmm0, 32) \
1409  AS2( movdqa [edi+2*edx+16], xmm0) \
1410  AS2( movdqa [edi+16*n+2*edx], xmm1) \
1411  AS2( psrlq xmm1, 32) \
1412  AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1413  AS2( add edx, 16) \
1414  AS2( cmp edx, 8*(n)) \
1415  ASJ( jne, 1, b) \
1416  AS2( lea edx, [edi+16*n])\
1417  SSE2_FirstMultiply(0) \
1418 
1419 #define Squ_Acc(i) \
1420  ASL(LSqu##i) \
1421  AS2( movdqa xmm1, [esi+(i)*16]) \
1422  AS2( movdqa xmm0, [edi-(i)*16]) \
1423  AS2( movdqa xmm2, [ebx]) \
1424  AS2( pmuludq xmm0, xmm1) \
1425  AS2( pmuludq xmm1, [edx-(i)*16]) \
1426  AS2( movdqa xmm3, xmm2) \
1427  AS2( pand xmm2, xmm0) \
1428  AS2( psrld xmm0, 16) \
1429  AS2( paddd xmm4, xmm2) \
1430  AS2( paddd xmm5, xmm0) \
1431  AS2( pand xmm3, xmm1) \
1432  AS2( psrld xmm1, 16) \
1433  AS2( paddd xmm6, xmm3) \
1434  AS2( paddd xmm7, xmm1) \
1435 
1436 #define Squ_Acc1(i)
1437 #define Squ_Acc2(i) ASC(call, LSqu##i)
1438 #define Squ_Acc3(i) Squ_Acc2(i)
1439 #define Squ_Acc4(i) Squ_Acc2(i)
1440 #define Squ_Acc5(i) Squ_Acc2(i)
1441 #define Squ_Acc6(i) Squ_Acc2(i)
1442 #define Squ_Acc7(i) Squ_Acc2(i)
1443 #define Squ_Acc8(i) Squ_Acc2(i)
1444 
1445 #define SSE2_End(E, n) \
1446  SSE2_SaveShift(2*(n)-3) \
1447  AS2( movdqa xmm7, [esi+16]) \
1448  AS2( movdqa xmm0, [edi]) \
1449  AS2( pmuludq xmm0, xmm7) \
1450  AS2( movdqa xmm2, [ebx]) \
1451  AS2( pmuludq xmm7, [edx]) \
1452  AS2( movdqa xmm6, xmm2) \
1453  AS2( pand xmm2, xmm0) \
1454  AS2( psrld xmm0, 16) \
1455  AS2( paddd xmm4, xmm2) \
1456  AS2( paddd xmm5, xmm0) \
1457  AS2( pand xmm6, xmm7) \
1458  AS2( psrld xmm7, 16) \
1459  SSE2_SaveShift(2*(n)-2) \
1460  SSE2_FinalSave(2*(n)-1) \
1461  AS1( pop esp)\
1462  E
1463 
1464 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1465 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1466 #define Top_End(n) SSE2_End(TopEpilogue, n)
1467 
1468 #define Squ_Column1(k, i) \
1469  Squ_SSE2_SaveShift(k) \
1470  AS2( add esi, 16) \
1471  SSE2_FirstMultiply(1)\
1472  Squ_Acc##i(i) \
1473  AS2( paddd xmm4, xmm4) \
1474  AS2( paddd xmm5, xmm5) \
1475  AS2( movdqa xmm3, [esi]) \
1476  AS2( movq xmm1, QWORD PTR [esi+8]) \
1477  AS2( pmuludq xmm1, xmm3) \
1478  AS2( pmuludq xmm3, xmm3) \
1479  AS2( movdqa xmm0, [ebx])\
1480  AS2( movdqa xmm2, xmm0) \
1481  AS2( pand xmm0, xmm1) \
1482  AS2( psrld xmm1, 16) \
1483  AS2( paddd xmm6, xmm0) \
1484  AS2( paddd xmm7, xmm1) \
1485  AS2( pand xmm2, xmm3) \
1486  AS2( psrld xmm3, 16) \
1487  AS2( paddd xmm6, xmm6) \
1488  AS2( paddd xmm7, xmm7) \
1489  AS2( paddd xmm4, xmm2) \
1490  AS2( paddd xmm5, xmm3) \
1491  AS2( movq xmm0, QWORD PTR [esp+4])\
1492  AS2( movq xmm1, QWORD PTR [esp+12])\
1493  AS2( paddd xmm4, xmm0)\
1494  AS2( paddd xmm5, xmm1)\
1495 
1496 #define Squ_Column0(k, i) \
1497  Squ_SSE2_SaveShift(k) \
1498  AS2( add edi, 16) \
1499  AS2( add edx, 16) \
1500  SSE2_FirstMultiply(1)\
1501  Squ_Acc##i(i) \
1502  AS2( paddd xmm6, xmm6) \
1503  AS2( paddd xmm7, xmm7) \
1504  AS2( paddd xmm4, xmm4) \
1505  AS2( paddd xmm5, xmm5) \
1506  AS2( movq xmm0, QWORD PTR [esp+4])\
1507  AS2( movq xmm1, QWORD PTR [esp+12])\
1508  AS2( paddd xmm4, xmm0)\
1509  AS2( paddd xmm5, xmm1)\
1510 
1511 #define SSE2_MulAdd45 \
1512  AS2( movdqa xmm7, [esi]) \
1513  AS2( movdqa xmm0, [edi]) \
1514  AS2( pmuludq xmm0, xmm7) \
1515  AS2( movdqa xmm2, [ebx]) \
1516  AS2( pmuludq xmm7, [edx]) \
1517  AS2( movdqa xmm6, xmm2) \
1518  AS2( pand xmm2, xmm0) \
1519  AS2( psrld xmm0, 16) \
1520  AS2( paddd xmm4, xmm2) \
1521  AS2( paddd xmm5, xmm0) \
1522  AS2( pand xmm6, xmm7) \
1523  AS2( psrld xmm7, 16)
1524 
1525 #define Mul_Begin(n) \
1526  MulPrologue \
1527  AS2( mov esi, esp)\
1528  AS2( and esp, 0xfffffff0)\
1529  AS2( sub esp, 48*n+16)\
1530  AS1( push esi)\
1531  AS2( xor edx, edx) \
1532  ASL(1) \
1533  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1534  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1535  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1536  AS2( movdqa [esp+20+2*edx], xmm0) \
1537  AS2( psrlq xmm0, 32) \
1538  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1539  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1540  AS2( psrlq xmm1, 32) \
1541  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1542  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1543  AS2( psrlq xmm2, 32) \
1544  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1545  AS2( add edx, 16) \
1546  AS2( cmp edx, 8*(n)) \
1547  ASJ( jne, 1, b) \
1548  AS2( lea edi, [esp+20])\
1549  AS2( lea edx, [esp+20+16*n])\
1550  AS2( lea esi, [esp+20+32*n])\
1551  SSE2_FirstMultiply(0) \
1552 
1553 #define Mul_Acc(i) \
1554  ASL(LMul##i) \
1555  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1556  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1557  AS2( movdqa xmm2, [ebx]) \
1558  AS2( pmuludq xmm0, xmm1) \
1559  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1560  AS2( movdqa xmm3, xmm2) \
1561  AS2( pand xmm2, xmm0) \
1562  AS2( psrld xmm0, 16) \
1563  AS2( paddd xmm4, xmm2) \
1564  AS2( paddd xmm5, xmm0) \
1565  AS2( pand xmm3, xmm1) \
1566  AS2( psrld xmm1, 16) \
1567  AS2( paddd xmm6, xmm3) \
1568  AS2( paddd xmm7, xmm1) \
1569 
1570 #define Mul_Acc1(i)
1571 #define Mul_Acc2(i) ASC(call, LMul##i)
1572 #define Mul_Acc3(i) Mul_Acc2(i)
1573 #define Mul_Acc4(i) Mul_Acc2(i)
1574 #define Mul_Acc5(i) Mul_Acc2(i)
1575 #define Mul_Acc6(i) Mul_Acc2(i)
1576 #define Mul_Acc7(i) Mul_Acc2(i)
1577 #define Mul_Acc8(i) Mul_Acc2(i)
1578 #define Mul_Acc9(i) Mul_Acc2(i)
1579 #define Mul_Acc10(i) Mul_Acc2(i)
1580 #define Mul_Acc11(i) Mul_Acc2(i)
1581 #define Mul_Acc12(i) Mul_Acc2(i)
1582 #define Mul_Acc13(i) Mul_Acc2(i)
1583 #define Mul_Acc14(i) Mul_Acc2(i)
1584 #define Mul_Acc15(i) Mul_Acc2(i)
1585 #define Mul_Acc16(i) Mul_Acc2(i)
1586 
1587 #define Mul_Column1(k, i) \
1588  SSE2_SaveShift(k) \
1589  AS2( add esi, 16) \
1590  SSE2_MulAdd45\
1591  Mul_Acc##i(i) \
1592 
1593 #define Mul_Column0(k, i) \
1594  SSE2_SaveShift(k) \
1595  AS2( add edi, 16) \
1596  AS2( add edx, 16) \
1597  SSE2_MulAdd45\
1598  Mul_Acc##i(i) \
1599 
1600 #define Bot_Acc(i) \
1601  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1602  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1603  AS2( pmuludq xmm0, xmm1) \
1604  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1605  AS2( paddq xmm4, xmm0) \
1606  AS2( paddd xmm6, xmm1)
1607 
1608 #define Bot_SaveAcc(k) \
1609  SSE2_SaveShift(k) \
1610  AS2( add edi, 16) \
1611  AS2( add edx, 16) \
1612  AS2( movdqa xmm6, [esi]) \
1613  AS2( movdqa xmm0, [edi]) \
1614  AS2( pmuludq xmm0, xmm6) \
1615  AS2( paddq xmm4, xmm0) \
1616  AS2( psllq xmm5, 16) \
1617  AS2( paddq xmm4, xmm5) \
1618  AS2( pmuludq xmm6, [edx])
1619 
1620 #define Bot_End(n) \
1621  AS2( movhlps xmm7, xmm6) \
1622  AS2( paddd xmm6, xmm7) \
1623  AS2( psllq xmm6, 32) \
1624  AS2( paddd xmm4, xmm6) \
1625  AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1626  AS1( pop esp)\
1627  MulEpilogue
1628 
1629 #define Top_Begin(n) \
1630  TopPrologue \
1631  AS2( mov edx, esp)\
1632  AS2( and esp, 0xfffffff0)\
1633  AS2( sub esp, 48*n+16)\
1634  AS1( push edx)\
1635  AS2( xor edx, edx) \
1636  ASL(1) \
1637  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1638  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1639  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1640  AS2( movdqa [esp+20+2*edx], xmm0) \
1641  AS2( psrlq xmm0, 32) \
1642  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1643  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1644  AS2( psrlq xmm1, 32) \
1645  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1646  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1647  AS2( psrlq xmm2, 32) \
1648  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1649  AS2( add edx, 16) \
1650  AS2( cmp edx, 8*(n)) \
1651  ASJ( jne, 1, b) \
1652  AS2( mov eax, esi) \
1653  AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1654  AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1655  AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1656  AS2( pxor xmm4, xmm4)\
1657  AS2( pxor xmm5, xmm5)
1658 
1659 #define Top_Acc(i) \
1660  AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1661  AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1662  AS2( psrlq xmm0, 48) \
1663  AS2( paddd xmm5, xmm0)\
1664 
1665 #define Top_Column0(i) \
1666  AS2( psllq xmm5, 32) \
1667  AS2( add edi, 16) \
1668  AS2( add edx, 16) \
1669  SSE2_MulAdd45\
1670  Mul_Acc##i(i) \
1671 
1672 #define Top_Column1(i) \
1673  SSE2_SaveShift(0) \
1674  AS2( add esi, 16) \
1675  SSE2_MulAdd45\
1676  Mul_Acc##i(i) \
1677  AS2( shr eax, 16) \
1678  AS2( movd xmm0, eax)\
1679  AS2( movd xmm1, [ecx+4])\
1680  AS2( psrld xmm1, 16)\
1681  AS2( pcmpgtd xmm1, xmm0)\
1682  AS2( psrld xmm1, 31)\
1683  AS2( paddd xmm4, xmm1)\
1684 
1685 void SSE2_Square4(word *C, const word *A)
1686 {
1687  Squ_Begin(2)
1688  Squ_Column0(0, 1)
1689  Squ_End(2)
1690 }
1691 
1692 void SSE2_Square8(word *C, const word *A)
1693 {
1694  Squ_Begin(4)
1695 #ifndef __GNUC__
1696  ASJ( jmp, 0, f)
1697  Squ_Acc(2)
1698  AS1( ret) ASL(0)
1699 #endif
1700  Squ_Column0(0, 1)
1701  Squ_Column1(1, 1)
1702  Squ_Column0(2, 2)
1703  Squ_Column1(3, 1)
1704  Squ_Column0(4, 1)
1705  Squ_End(4)
1706 }
1707 
1708 void SSE2_Square16(word *C, const word *A)
1709 {
1710  Squ_Begin(8)
1711 #ifndef __GNUC__
1712  ASJ( jmp, 0, f)
1713  Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1714  AS1( ret) ASL(0)
1715 #endif
1716  Squ_Column0(0, 1)
1717  Squ_Column1(1, 1)
1718  Squ_Column0(2, 2)
1719  Squ_Column1(3, 2)
1720  Squ_Column0(4, 3)
1721  Squ_Column1(5, 3)
1722  Squ_Column0(6, 4)
1723  Squ_Column1(7, 3)
1724  Squ_Column0(8, 3)
1725  Squ_Column1(9, 2)
1726  Squ_Column0(10, 2)
1727  Squ_Column1(11, 1)
1728  Squ_Column0(12, 1)
1729  Squ_End(8)
1730 }
1731 
1732 void SSE2_Square32(word *C, const word *A)
1733 {
1734  Squ_Begin(16)
1735  ASJ( jmp, 0, f)
1736  Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1737  AS1( ret) ASL(0)
1738  Squ_Column0(0, 1)
1739  Squ_Column1(1, 1)
1740  Squ_Column0(2, 2)
1741  Squ_Column1(3, 2)
1742  Squ_Column0(4, 3)
1743  Squ_Column1(5, 3)
1744  Squ_Column0(6, 4)
1745  Squ_Column1(7, 4)
1746  Squ_Column0(8, 5)
1747  Squ_Column1(9, 5)
1748  Squ_Column0(10, 6)
1749  Squ_Column1(11, 6)
1750  Squ_Column0(12, 7)
1751  Squ_Column1(13, 7)
1752  Squ_Column0(14, 8)
1753  Squ_Column1(15, 7)
1754  Squ_Column0(16, 7)
1755  Squ_Column1(17, 6)
1756  Squ_Column0(18, 6)
1757  Squ_Column1(19, 5)
1758  Squ_Column0(20, 5)
1759  Squ_Column1(21, 4)
1760  Squ_Column0(22, 4)
1761  Squ_Column1(23, 3)
1762  Squ_Column0(24, 3)
1763  Squ_Column1(25, 2)
1764  Squ_Column0(26, 2)
1765  Squ_Column1(27, 1)
1766  Squ_Column0(28, 1)
1767  Squ_End(16)
1768 }
1769 
1770 void SSE2_Multiply4(word *C, const word *A, const word *B)
1771 {
1772  Mul_Begin(2)
1773 #ifndef __GNUC__
1774  ASJ( jmp, 0, f)
1775  Mul_Acc(2)
1776  AS1( ret) ASL(0)
1777 #endif
1778  Mul_Column0(0, 2)
1779  Mul_End(2)
1780 }
1781 
1782 void SSE2_Multiply8(word *C, const word *A, const word *B)
1783 {
1784  Mul_Begin(4)
1785 #ifndef __GNUC__
1786  ASJ( jmp, 0, f)
1787  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1788  AS1( ret) ASL(0)
1789 #endif
1790  Mul_Column0(0, 2)
1791  Mul_Column1(1, 3)
1792  Mul_Column0(2, 4)
1793  Mul_Column1(3, 3)
1794  Mul_Column0(4, 2)
1795  Mul_End(4)
1796 }
1797 
1798 void SSE2_Multiply16(word *C, const word *A, const word *B)
1799 {
1800  Mul_Begin(8)
1801 #ifndef __GNUC__
1802  ASJ( jmp, 0, f)
1803  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1804  AS1( ret) ASL(0)
1805 #endif
1806  Mul_Column0(0, 2)
1807  Mul_Column1(1, 3)
1808  Mul_Column0(2, 4)
1809  Mul_Column1(3, 5)
1810  Mul_Column0(4, 6)
1811  Mul_Column1(5, 7)
1812  Mul_Column0(6, 8)
1813  Mul_Column1(7, 7)
1814  Mul_Column0(8, 6)
1815  Mul_Column1(9, 5)
1816  Mul_Column0(10, 4)
1817  Mul_Column1(11, 3)
1818  Mul_Column0(12, 2)
1819  Mul_End(8)
1820 }
1821 
1822 void SSE2_Multiply32(word *C, const word *A, const word *B)
1823 {
1824  Mul_Begin(16)
1825  ASJ( jmp, 0, f)
1826  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1827  AS1( ret) ASL(0)
1828  Mul_Column0(0, 2)
1829  Mul_Column1(1, 3)
1830  Mul_Column0(2, 4)
1831  Mul_Column1(3, 5)
1832  Mul_Column0(4, 6)
1833  Mul_Column1(5, 7)
1834  Mul_Column0(6, 8)
1835  Mul_Column1(7, 9)
1836  Mul_Column0(8, 10)
1837  Mul_Column1(9, 11)
1838  Mul_Column0(10, 12)
1839  Mul_Column1(11, 13)
1840  Mul_Column0(12, 14)
1841  Mul_Column1(13, 15)
1842  Mul_Column0(14, 16)
1843  Mul_Column1(15, 15)
1844  Mul_Column0(16, 14)
1845  Mul_Column1(17, 13)
1846  Mul_Column0(18, 12)
1847  Mul_Column1(19, 11)
1848  Mul_Column0(20, 10)
1849  Mul_Column1(21, 9)
1850  Mul_Column0(22, 8)
1851  Mul_Column1(23, 7)
1852  Mul_Column0(24, 6)
1853  Mul_Column1(25, 5)
1854  Mul_Column0(26, 4)
1855  Mul_Column1(27, 3)
1856  Mul_Column0(28, 2)
1857  Mul_End(16)
1858 }
1859 
1860 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
1861 {
1862  Mul_Begin(2)
1863  Bot_SaveAcc(0) Bot_Acc(2)
1864  Bot_End(2)
1865 }
1866 
1867 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
1868 {
1869  Mul_Begin(4)
1870 #ifndef __GNUC__
1871  ASJ( jmp, 0, f)
1872  Mul_Acc(3) Mul_Acc(2)
1873  AS1( ret) ASL(0)
1874 #endif
1875  Mul_Column0(0, 2)
1876  Mul_Column1(1, 3)
1877  Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1878  Bot_End(4)
1879 }
1880 
1881 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
1882 {
1883  Mul_Begin(8)
1884 #ifndef __GNUC__
1885  ASJ( jmp, 0, f)
1886  Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1887  AS1( ret) ASL(0)
1888 #endif
1889  Mul_Column0(0, 2)
1890  Mul_Column1(1, 3)
1891  Mul_Column0(2, 4)
1892  Mul_Column1(3, 5)
1893  Mul_Column0(4, 6)
1894  Mul_Column1(5, 7)
1895  Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1896  Bot_End(8)
1897 }
1898 
1899 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
1900 {
1901  Mul_Begin(16)
1902 #ifndef __GNUC__
1903  ASJ( jmp, 0, f)
1904  Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1905  AS1( ret) ASL(0)
1906 #endif
1907  Mul_Column0(0, 2)
1908  Mul_Column1(1, 3)
1909  Mul_Column0(2, 4)
1910  Mul_Column1(3, 5)
1911  Mul_Column0(4, 6)
1912  Mul_Column1(5, 7)
1913  Mul_Column0(6, 8)
1914  Mul_Column1(7, 9)
1915  Mul_Column0(8, 10)
1916  Mul_Column1(9, 11)
1917  Mul_Column0(10, 12)
1918  Mul_Column1(11, 13)
1919  Mul_Column0(12, 14)
1920  Mul_Column1(13, 15)
1921  Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1922  Bot_End(16)
1923 }
1924 
1925 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
1926 {
1927  Top_Begin(4)
1928  Top_Acc(3) Top_Acc(2) Top_Acc(1)
1929 #ifndef __GNUC__
1930  ASJ( jmp, 0, f)
1931  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1932  AS1( ret) ASL(0)
1933 #endif
1934  Top_Column0(4)
1935  Top_Column1(3)
1936  Mul_Column0(0, 2)
1937  Top_End(2)
1938 }
1939 
1940 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
1941 {
1942  Top_Begin(8)
1943  Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
1944 #ifndef __GNUC__
1945  ASJ( jmp, 0, f)
1946  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1947  AS1( ret) ASL(0)
1948 #endif
1949  Top_Column0(8)
1950  Top_Column1(7)
1951  Mul_Column0(0, 6)
1952  Mul_Column1(1, 5)
1953  Mul_Column0(2, 4)
1954  Mul_Column1(3, 3)
1955  Mul_Column0(4, 2)
1956  Top_End(4)
1957 }
1958 
1959 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
1960 {
1961  Top_Begin(16)
1962  Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
1963 #ifndef __GNUC__
1964  ASJ( jmp, 0, f)
1965  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1966  AS1( ret) ASL(0)
1967 #endif
1968  Top_Column0(16)
1969  Top_Column1(15)
1970  Mul_Column0(0, 14)
1971  Mul_Column1(1, 13)
1972  Mul_Column0(2, 12)
1973  Mul_Column1(3, 11)
1974  Mul_Column0(4, 10)
1975  Mul_Column1(5, 9)
1976  Mul_Column0(6, 8)
1977  Mul_Column1(7, 7)
1978  Mul_Column0(8, 6)
1979  Mul_Column1(9, 5)
1980  Mul_Column0(10, 4)
1981  Mul_Column1(11, 3)
1982  Mul_Column0(12, 2)
1983  Top_End(8)
1984 }
1985 
1986 #endif // #if CRYPTOPP_INTEGER_SSE2
1987 
1988 // ********************************************************
1989 
1990 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
1991 typedef void (* PMul)(word *C, const word *A, const word *B);
1992 typedef void (* PSqu)(word *C, const word *A);
1993 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
1994 
1995 #if CRYPTOPP_INTEGER_SSE2
1996 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
1997 static size_t s_recursionLimit = 8;
1998 #else
1999 static const size_t s_recursionLimit = 16;
2000 #endif
2001 
2002 static PMul s_pMul[9], s_pBot[9];
2003 static PSqu s_pSqu[9];
2004 static PMulTop s_pTop[9];
2005 
2006 static void SetFunctionPointers()
2007 {
2008  s_pMul[0] = &Baseline_Multiply2;
2009  s_pBot[0] = &Baseline_MultiplyBottom2;
2010  s_pSqu[0] = &Baseline_Square2;
2011  s_pTop[0] = &Baseline_MultiplyTop2;
2012  s_pTop[1] = &Baseline_MultiplyTop4;
2013 
2014 #if CRYPTOPP_INTEGER_SSE2
2015  if (HasSSE2())
2016  {
2017 #if _MSC_VER != 1200 || defined(NDEBUG)
2018  if (IsP4())
2019  {
2020  s_pAdd = &SSE2_Add;
2021  s_pSub = &SSE2_Sub;
2022  }
2023 #endif
2024 
2025  s_recursionLimit = 32;
2026 
2027  s_pMul[1] = &SSE2_Multiply4;
2028  s_pMul[2] = &SSE2_Multiply8;
2029  s_pMul[4] = &SSE2_Multiply16;
2030  s_pMul[8] = &SSE2_Multiply32;
2031 
2032  s_pBot[1] = &SSE2_MultiplyBottom4;
2033  s_pBot[2] = &SSE2_MultiplyBottom8;
2034  s_pBot[4] = &SSE2_MultiplyBottom16;
2035  s_pBot[8] = &SSE2_MultiplyBottom32;
2036 
2037  s_pSqu[1] = &SSE2_Square4;
2038  s_pSqu[2] = &SSE2_Square8;
2039  s_pSqu[4] = &SSE2_Square16;
2040  s_pSqu[8] = &SSE2_Square32;
2041 
2042  s_pTop[2] = &SSE2_MultiplyTop8;
2043  s_pTop[4] = &SSE2_MultiplyTop16;
2044  s_pTop[8] = &SSE2_MultiplyTop32;
2045  }
2046  else
2047 #endif
2048  {
2049  s_pMul[1] = &Baseline_Multiply4;
2050  s_pMul[2] = &Baseline_Multiply8;
2051 
2052  s_pBot[1] = &Baseline_MultiplyBottom4;
2053  s_pBot[2] = &Baseline_MultiplyBottom8;
2054 
2055  s_pSqu[1] = &Baseline_Square4;
2056  s_pSqu[2] = &Baseline_Square8;
2057 
2058  s_pTop[2] = &Baseline_MultiplyTop8;
2059 
2060 #if !CRYPTOPP_INTEGER_SSE2
2061  s_pMul[4] = &Baseline_Multiply16;
2062  s_pBot[4] = &Baseline_MultiplyBottom16;
2063  s_pSqu[4] = &Baseline_Square16;
2064  s_pTop[4] = &Baseline_MultiplyTop16;
2065 #endif
2066  }
2067 }
2068 
2069 inline int Add(word *C, const word *A, const word *B, size_t N)
2070 {
2071 #if CRYPTOPP_INTEGER_SSE2
2072  return s_pAdd(N, C, A, B);
2073 #else
2074  return Baseline_Add(N, C, A, B);
2075 #endif
2076 }
2077 
2078 inline int Subtract(word *C, const word *A, const word *B, size_t N)
2079 {
2080 #if CRYPTOPP_INTEGER_SSE2
2081  return s_pSub(N, C, A, B);
2082 #else
2083  return Baseline_Sub(N, C, A, B);
2084 #endif
2085 }
2086 
2087 // ********************************************************
2088 
2089 
2090 #define A0 A
2091 #define A1 (A+N2)
2092 #define B0 B
2093 #define B1 (B+N2)
2094 
2095 #define T0 T
2096 #define T1 (T+N2)
2097 #define T2 (T+N)
2098 #define T3 (T+N+N2)
2099 
2100 #define R0 R
2101 #define R1 (R+N2)
2102 #define R2 (R+N)
2103 #define R3 (R+N+N2)
2104 
2105 // R[2*N] - result = A*B
2106 // T[2*N] - temporary work space
2107 // A[N] --- multiplier
2108 // B[N] --- multiplicant
2109 
2110 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2111 {
2112  assert(N>=2 && N%2==0);
2113 
2114  if (N <= s_recursionLimit)
2115  s_pMul[N/4](R, A, B);
2116  else
2117  {
2118  const size_t N2 = N/2;
2119 
2120  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2121  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2122 
2123  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2124  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2125 
2126  RecursiveMultiply(R2, T2, A1, B1, N2);
2127  RecursiveMultiply(T0, T2, R0, R1, N2);
2128  RecursiveMultiply(R0, T2, A0, B0, N2);
2129 
2130  // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2131 
2132  int c2 = Add(R2, R2, R1, N2);
2133  int c3 = c2;
2134  c2 += Add(R1, R2, R0, N2);
2135  c3 += Add(R2, R2, R3, N2);
2136 
2137  if (AN2 == BN2)
2138  c3 -= Subtract(R1, R1, T0, N);
2139  else
2140  c3 += Add(R1, R1, T0, N);
2141 
2142  c3 += Increment(R2, N2, c2);
2143  assert (c3 >= 0 && c3 <= 2);
2144  Increment(R3, N2, c3);
2145  }
2146 }
2147 
2148 // R[2*N] - result = A*A
2149 // T[2*N] - temporary work space
2150 // A[N] --- number to be squared
2151 
2152 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2153 {
2154  assert(N && N%2==0);
2155 
2156  if (N <= s_recursionLimit)
2157  s_pSqu[N/4](R, A);
2158  else
2159  {
2160  const size_t N2 = N/2;
2161 
2162  RecursiveSquare(R0, T2, A0, N2);
2163  RecursiveSquare(R2, T2, A1, N2);
2164  RecursiveMultiply(T0, T2, A0, A1, N2);
2165 
2166  int carry = Add(R1, R1, T0, N);
2167  carry += Add(R1, R1, T0, N);
2168  Increment(R3, N2, carry);
2169  }
2170 }
2171 
2172 // R[N] - bottom half of A*B
2173 // T[3*N/2] - temporary work space
2174 // A[N] - multiplier
2175 // B[N] - multiplicant
2176 
2177 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2178 {
2179  assert(N>=2 && N%2==0);
2180 
2181  if (N <= s_recursionLimit)
2182  s_pBot[N/4](R, A, B);
2183  else
2184  {
2185  const size_t N2 = N/2;
2186 
2187  RecursiveMultiply(R, T, A0, B0, N2);
2188  RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2189  Add(R1, R1, T0, N2);
2190  RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2191  Add(R1, R1, T0, N2);
2192  }
2193 }
2194 
2195 // R[N] --- upper half of A*B
2196 // T[2*N] - temporary work space
2197 // L[N] --- lower half of A*B
2198 // A[N] --- multiplier
2199 // B[N] --- multiplicant
2200 
2201 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2202 {
2203  assert(N>=2 && N%2==0);
2204 
2205  if (N <= s_recursionLimit)
2206  s_pTop[N/4](R, A, B, L[N-1]);
2207  else
2208  {
2209  const size_t N2 = N/2;
2210 
2211  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2212  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2213 
2214  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2215  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2216 
2217  RecursiveMultiply(T0, T2, R0, R1, N2);
2218  RecursiveMultiply(R0, T2, A1, B1, N2);
2219 
2220  // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2221 
2222  int t, c3;
2223  int c2 = Subtract(T2, L+N2, L, N2);
2224 
2225  if (AN2 == BN2)
2226  {
2227  c2 -= Add(T2, T2, T0, N2);
2228  t = (Compare(T2, R0, N2) == -1);
2229  c3 = t - Subtract(T2, T2, T1, N2);
2230  }
2231  else
2232  {
2233  c2 += Subtract(T2, T2, T0, N2);
2234  t = (Compare(T2, R0, N2) == -1);
2235  c3 = t + Add(T2, T2, T1, N2);
2236  }
2237 
2238  c2 += t;
2239  if (c2 >= 0)
2240  c3 += Increment(T2, N2, c2);
2241  else
2242  c3 -= Decrement(T2, N2, -c2);
2243  c3 += Add(R0, T2, R1, N2);
2244 
2245  assert (c3 >= 0 && c3 <= 2);
2246  Increment(R1, N2, c3);
2247  }
2248 }
2249 
2250 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2251 {
2252  RecursiveMultiply(R, T, A, B, N);
2253 }
2254 
2255 inline void Square(word *R, word *T, const word *A, size_t N)
2256 {
2257  RecursiveSquare(R, T, A, N);
2258 }
2259 
2260 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2261 {
2262  RecursiveMultiplyBottom(R, T, A, B, N);
2263 }
2264 
2265 // R[NA+NB] - result = A*B
2266 // T[NA+NB] - temporary work space
2267 // A[NA] ---- multiplier
2268 // B[NB] ---- multiplicant
2269 
2270 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2271 {
2272  if (NA == NB)
2273  {
2274  if (A == B)
2275  Square(R, T, A, NA);
2276  else
2277  Multiply(R, T, A, B, NA);
2278 
2279  return;
2280  }
2281 
2282  if (NA > NB)
2283  {
2284  std::swap(A, B);
2285  std::swap(NA, NB);
2286  }
2287 
2288  assert(NB % NA == 0);
2289 
2290  if (NA==2 && !A[1])
2291  {
2292  switch (A[0])
2293  {
2294  case 0:
2295  SetWords(R, 0, NB+2);
2296  return;
2297  case 1:
2298  CopyWords(R, B, NB);
2299  R[NB] = R[NB+1] = 0;
2300  return;
2301  default:
2302  R[NB] = LinearMultiply(R, B, A[0], NB);
2303  R[NB+1] = 0;
2304  return;
2305  }
2306  }
2307 
2308  size_t i;
2309  if ((NB/NA)%2 == 0)
2310  {
2311  Multiply(R, T, A, B, NA);
2312  CopyWords(T+2*NA, R+NA, NA);
2313 
2314  for (i=2*NA; i<NB; i+=2*NA)
2315  Multiply(T+NA+i, T, A, B+i, NA);
2316  for (i=NA; i<NB; i+=2*NA)
2317  Multiply(R+i, T, A, B+i, NA);
2318  }
2319  else
2320  {
2321  for (i=0; i<NB; i+=2*NA)
2322  Multiply(R+i, T, A, B+i, NA);
2323  for (i=NA; i<NB; i+=2*NA)
2324  Multiply(T+NA+i, T, A, B+i, NA);
2325  }
2326 
2327  if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2328  Increment(R+NB, NA);
2329 }
2330 
2331 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2332 // T[3*N/2] - temporary work space
2333 // A[N] ----- an odd number as input
2334 
2335 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2336 {
2337  if (N==2)
2338  {
2339  T[0] = AtomicInverseModPower2(A[0]);
2340  T[1] = 0;
2341  s_pBot[0](T+2, T, A);
2342  TwosComplement(T+2, 2);
2343  Increment(T+2, 2, 2);
2344  s_pBot[0](R, T, T+2);
2345  }
2346  else
2347  {
2348  const size_t N2 = N/2;
2349  RecursiveInverseModPower2(R0, T0, A0, N2);
2350  T0[0] = 1;
2351  SetWords(T0+1, 0, N2-1);
2352  MultiplyTop(R1, T1, T0, R0, A0, N2);
2353  MultiplyBottom(T0, T1, R0, A1, N2);
2354  Add(T0, R1, T0, N2);
2355  TwosComplement(T0, N2);
2356  MultiplyBottom(R1, T1, R0, T0, N2);
2357  }
2358 }
2359 
2360 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2361 // T[3*N] - temporary work space
2362 // X[2*N] - number to be reduced
2363 // M[N] --- modulus
2364 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2365 
2366 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2367 {
2368 #if 1
2369  MultiplyBottom(R, T, X, U, N);
2370  MultiplyTop(T, T+N, X, R, M, N);
2371  word borrow = Subtract(T, X+N, T, N);
2372  // defend against timing attack by doing this Add even when not needed
2373  word carry = Add(T+N, T, M, N);
2374  assert(carry | !borrow);
2375  CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2376  CopyWords(R, T + ((0-borrow) & N), N);
2377 #elif 0
2378  const word u = 0-U[0];
2379  Declare2Words(p)
2380  for (size_t i=0; i<N; i++)
2381  {
2382  const word t = u * X[i];
2383  word c = 0;
2384  for (size_t j=0; j<N; j+=2)
2385  {
2386  MultiplyWords(p, t, M[j]);
2387  Acc2WordsBy1(p, X[i+j]);
2388  Acc2WordsBy1(p, c);
2389  X[i+j] = LowWord(p);
2390  c = HighWord(p);
2391  MultiplyWords(p, t, M[j+1]);
2392  Acc2WordsBy1(p, X[i+j+1]);
2393  Acc2WordsBy1(p, c);
2394  X[i+j+1] = LowWord(p);
2395  c = HighWord(p);
2396  }
2397 
2398  if (Increment(X+N+i, N-i, c))
2399  while (!Subtract(X+N, X+N, M, N)) {}
2400  }
2401 
2402  memcpy(R, X+N, N*WORD_SIZE);
2403 #else
2404  __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2405  for (size_t i=0; i<N; i++)
2406  {
2407  __m64 t = _mm_cvtsi32_si64(X[i]);
2408  t = _mm_mul_su32(t, u);
2409  __m64 c = _mm_setzero_si64();
2410  for (size_t j=0; j<N; j+=2)
2411  {
2412  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2413  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2414  c = _mm_add_si64(c, p);
2415  X[i+j] = _mm_cvtsi64_si32(c);
2416  c = _mm_srli_si64(c, 32);
2417  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2418  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2419  c = _mm_add_si64(c, p);
2420  X[i+j+1] = _mm_cvtsi64_si32(c);
2421  c = _mm_srli_si64(c, 32);
2422  }
2423 
2424  if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2425  while (!Subtract(X+N, X+N, M, N)) {}
2426  }
2427 
2428  memcpy(R, X+N, N*WORD_SIZE);
2429  _mm_empty();
2430 #endif
2431 }
2432 
2433 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2434 // T[2*N] - temporary work space
2435 // X[2*N] - number to be reduced
2436 // M[N] --- modulus
2437 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2438 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
2439 
2440 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2441 {
2442  assert(N%2==0 && N>=4);
2443 
2444 #define M0 M
2445 #define M1 (M+N2)
2446 #define V0 V
2447 #define V1 (V+N2)
2448 
2449 #define X0 X
2450 #define X1 (X+N2)
2451 #define X2 (X+N)
2452 #define X3 (X+N+N2)
2453 
2454  const size_t N2 = N/2;
2455  Multiply(T0, T2, V0, X3, N2);
2456  int c2 = Add(T0, T0, X0, N);
2457  MultiplyBottom(T3, T2, T0, U, N2);
2458  MultiplyTop(T2, R, T0, T3, M0, N2);
2459  c2 -= Subtract(T2, T1, T2, N2);
2460  Multiply(T0, R, T3, M1, N2);
2461  c2 -= Subtract(T0, T2, T0, N2);
2462  int c3 = -(int)Subtract(T1, X2, T1, N2);
2463  Multiply(R0, T2, V1, X3, N2);
2464  c3 += Add(R, R, T, N);
2465 
2466  if (c2>0)
2467  c3 += Increment(R1, N2);
2468  else if (c2<0)
2469  c3 -= Decrement(R1, N2, -c2);
2470 
2471  assert(c3>=-1 && c3<=1);
2472  if (c3>0)
2473  Subtract(R, R, M, N);
2474  else if (c3<0)
2475  Add(R, R, M, N);
2476 
2477 #undef M0
2478 #undef M1
2479 #undef V0
2480 #undef V1
2481 
2482 #undef X0
2483 #undef X1
2484 #undef X2
2485 #undef X3
2486 }
2487 
2488 #undef A0
2489 #undef A1
2490 #undef B0
2491 #undef B1
2492 
2493 #undef T0
2494 #undef T1
2495 #undef T2
2496 #undef T3
2497 
2498 #undef R0
2499 #undef R1
2500 #undef R2
2501 #undef R3
2502 
2503 /*
2504 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2505 static word SubatomicDivide(word *A, word B0, word B1)
2506 {
2507  // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2508  assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2509 
2510  // estimate the quotient: do a 2 word by 1 word divide
2511  word Q;
2512  if (B1+1 == 0)
2513  Q = A[2];
2514  else
2515  Q = DWord(A[1], A[2]).DividedBy(B1+1);
2516 
2517  // now subtract Q*B from A
2518  DWord p = DWord::Multiply(B0, Q);
2519  DWord u = (DWord) A[0] - p.GetLowHalf();
2520  A[0] = u.GetLowHalf();
2521  u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2522  A[1] = u.GetLowHalf();
2523  A[2] += u.GetHighHalf();
2524 
2525  // Q <= actual quotient, so fix it
2526  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2527  {
2528  u = (DWord) A[0] - B0;
2529  A[0] = u.GetLowHalf();
2530  u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2531  A[1] = u.GetLowHalf();
2532  A[2] += u.GetHighHalf();
2533  Q++;
2534  assert(Q); // shouldn't overflow
2535  }
2536 
2537  return Q;
2538 }
2539 
2540 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2541 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2542 {
2543  if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2544  {
2545  Q[0] = A[2];
2546  Q[1] = A[3];
2547  }
2548  else
2549  {
2550  word T[4];
2551  T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2552  Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2553  Q[0] = SubatomicDivide(T, B[0], B[1]);
2554 
2555 #ifndef NDEBUG
2556  // multiply quotient and divisor and add remainder, make sure it equals dividend
2557  assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2558  word P[4];
2559  LowLevel::Multiply2(P, Q, B);
2560  Add(P, P, T, 4);
2561  assert(memcmp(P, A, 4*WORD_SIZE)==0);
2562 #endif
2563  }
2564 }
2565 */
2566 
2567 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2568 {
2569  word T[4];
2570  DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2571  Q[0] = q.GetLowHalf();
2572  Q[1] = q.GetHighHalf();
2573 
2574 #ifndef NDEBUG
2575  if (B[0] || B[1])
2576  {
2577  // multiply quotient and divisor and add remainder, make sure it equals dividend
2578  assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2579  word P[4];
2580  s_pMul[0](P, Q, B);
2581  Add(P, P, T, 4);
2582  assert(memcmp(P, A, 4*WORD_SIZE)==0);
2583  }
2584 #endif
2585 }
2586 
2587 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2588 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2589 {
2590  assert(N && N%2==0);
2591 
2592  AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2593 
2594  word borrow = Subtract(R, R, T, N+2);
2595  assert(!borrow && !R[N+1]);
2596  CRYPTOPP_UNUSED(borrow);
2597 
2598  while (R[N] || Compare(R, B, N) >= 0)
2599  {
2600  R[N] -= Subtract(R, R, B, N);
2601  Q[1] += (++Q[0]==0);
2602  assert(Q[0] || Q[1]); // no overflow
2603  }
2604 }
2605 
2606 // R[NB] -------- remainder = A%B
2607 // Q[NA-NB+2] --- quotient = A/B
2608 // T[NA+3*(NB+2)] - temp work space
2609 // A[NA] -------- dividend
2610 // B[NB] -------- divisor
2611 
2612 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2613 {
2614  assert(NA && NB && NA%2==0 && NB%2==0);
2615  assert(B[NB-1] || B[NB-2]);
2616  assert(NB <= NA);
2617 
2618  // set up temporary work space
2619  word *const TA=T;
2620  word *const TB=T+NA+2;
2621  word *const TP=T+NA+2+NB;
2622 
2623  // copy B into TB and normalize it so that TB has highest bit set to 1
2624  unsigned shiftWords = (B[NB-1]==0);
2625  TB[0] = TB[NB-1] = 0;
2626  CopyWords(TB+shiftWords, B, NB-shiftWords);
2627  unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2628  assert(shiftBits < WORD_BITS);
2629  ShiftWordsLeftByBits(TB, NB, shiftBits);
2630 
2631  // copy A into TA and normalize it
2632  TA[0] = TA[NA] = TA[NA+1] = 0;
2633  CopyWords(TA+shiftWords, A, NA);
2634  ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2635 
2636  if (TA[NA+1]==0 && TA[NA] <= 1)
2637  {
2638  Q[NA-NB+1] = Q[NA-NB] = 0;
2639  while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2640  {
2641  TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2642  ++Q[NA-NB];
2643  }
2644  }
2645  else
2646  {
2647  NA+=2;
2648  assert(Compare(TA+NA-NB, TB, NB) < 0);
2649  }
2650 
2651  word BT[2];
2652  BT[0] = TB[NB-2] + 1;
2653  BT[1] = TB[NB-1] + (BT[0]==0);
2654 
2655  // start reducing TA mod TB, 2 words at a time
2656  for (size_t i=NA-2; i>=NB; i-=2)
2657  {
2658  AtomicDivide(Q+i-NB, TA+i-2, BT);
2659  CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2660  }
2661 
2662  // copy TA into R, and denormalize it
2663  CopyWords(R, TA+shiftWords, NB);
2664  ShiftWordsRightByBits(R, NB, shiftBits);
2665 }
2666 
2667 static inline size_t EvenWordCount(const word *X, size_t N)
2668 {
2669  while (N && X[N-2]==0 && X[N-1]==0)
2670  N-=2;
2671  return N;
2672 }
2673 
2674 // return k
2675 // R[N] --- result = A^(-1) * 2^k mod M
2676 // T[4*N] - temporary work space
2677 // A[NA] -- number to take inverse of
2678 // M[N] --- modulus
2679 
2680 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2681 {
2682  assert(NA<=N && N && N%2==0);
2683 
2684  word *b = T;
2685  word *c = T+N;
2686  word *f = T+2*N;
2687  word *g = T+3*N;
2688  size_t bcLen=2, fgLen=EvenWordCount(M, N);
2689  unsigned int k=0;
2690  bool s=false;
2691 
2692  SetWords(T, 0, 3*N);
2693  b[0]=1;
2694  CopyWords(f, A, NA);
2695  CopyWords(g, M, N);
2696 
2697  while (1)
2698  {
2699  word t=f[0];
2700  while (!t)
2701  {
2702  if (EvenWordCount(f, fgLen)==0)
2703  {
2704  SetWords(R, 0, N);
2705  return 0;
2706  }
2707 
2708  ShiftWordsRightByWords(f, fgLen, 1);
2709  bcLen += 2 * (c[bcLen-1] != 0);
2710  assert(bcLen <= N);
2711  ShiftWordsLeftByWords(c, bcLen, 1);
2712  k+=WORD_BITS;
2713  t=f[0];
2714  }
2715 
2716  // t must be non-0; otherwise undefined.
2717  unsigned int i = TrailingZeros(t);
2718  t >>= i;
2719  k += i;
2720 
2721  if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2722  {
2723  if (s)
2724  Subtract(R, M, b, N);
2725  else
2726  CopyWords(R, b, N);
2727  return k;
2728  }
2729 
2730  ShiftWordsRightByBits(f, fgLen, i);
2731  t = ShiftWordsLeftByBits(c, bcLen, i);
2732  c[bcLen] += t;
2733  bcLen += 2 * (t!=0);
2734  assert(bcLen <= N);
2735 
2736  bool swap = Compare(f, g, fgLen)==-1;
2737  ConditionalSwapPointers(swap, f, g);
2738  ConditionalSwapPointers(swap, b, c);
2739  s ^= swap;
2740 
2741  fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2742 
2743  Subtract(f, f, g, fgLen);
2744  t = Add(b, b, c, bcLen);
2745  b[bcLen] += t;
2746  bcLen += 2*t;
2747  assert(bcLen <= N);
2748  }
2749 }
2750 
2751 // R[N] - result = A/(2^k) mod M
2752 // A[N] - input
2753 // M[N] - modulus
2754 
2755 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2756 {
2757  CopyWords(R, A, N);
2758 
2759  while (k--)
2760  {
2761  if (R[0]%2==0)
2762  ShiftWordsRightByBits(R, N, 1);
2763  else
2764  {
2765  word carry = Add(R, R, M, N);
2766  ShiftWordsRightByBits(R, N, 1);
2767  R[N-1] += carry<<(WORD_BITS-1);
2768  }
2769  }
2770 }
2771 
2772 // R[N] - result = A*(2^k) mod M
2773 // A[N] - input
2774 // M[N] - modulus
2775 
2776 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2777 {
2778  CopyWords(R, A, N);
2779 
2780  while (k--)
2781  if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2782  Subtract(R, R, M, N);
2783 }
2784 
2785 // ******************************************************************
2786 
2787 InitializeInteger::InitializeInteger()
2788 {
2789  if (!g_pAssignIntToInteger)
2790  {
2791  SetFunctionPointers();
2792  g_pAssignIntToInteger = (CryptoPP::PAssignIntToInteger)AssignIntToInteger;
2793  }
2794 }
2795 
2796 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2797 
2798 static inline size_t RoundupSize(size_t n)
2799 {
2800  if (n<=8)
2801  return RoundupSizeTable[n];
2802  else if (n<=16)
2803  return 16;
2804  else if (n<=32)
2805  return 32;
2806  else if (n<=64)
2807  return 64;
2808  else return size_t(1) << BitPrecision(n-1);
2809 }
2810 
2812  : reg(2), sign(POSITIVE)
2813 {
2814  reg[0] = reg[1] = 0;
2815 }
2816 
2818  : reg(RoundupSize(t.WordCount())), sign(t.sign)
2819 {
2820  CopyWords(reg, t.reg, reg.size());
2821 }
2822 
2823 Integer::Integer(Sign s, lword value)
2824  : reg(2), sign(s)
2825 {
2826  reg[0] = word(value);
2827  reg[1] = word(SafeRightShift<WORD_BITS>(value));
2828 }
2829 
2830 Integer::Integer(signed long value)
2831  : reg(2)
2832 {
2833  if (value >= 0)
2834  sign = POSITIVE;
2835  else
2836  {
2837  sign = NEGATIVE;
2838  value = -value;
2839  }
2840  reg[0] = word(value);
2841  reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2842 }
2843 
2844 Integer::Integer(Sign s, word high, word low)
2845  : reg(2), sign(s)
2846 {
2847  reg[0] = low;
2848  reg[1] = high;
2849 }
2850 
2852 {
2853  if (ByteCount() > sizeof(long))
2854  return false;
2855 
2856  unsigned long value = (unsigned long)reg[0];
2857  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2858 
2859  if (sign==POSITIVE)
2860  return (signed long)value >= 0;
2861  else
2862  return -(signed long)value < 0;
2863 }
2864 
2865 signed long Integer::ConvertToLong() const
2866 {
2867  assert(IsConvertableToLong());
2868 
2869  unsigned long value = (unsigned long)reg[0];
2870  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2871  return sign==POSITIVE ? value : -(signed long)value;
2872 }
2873 
2874 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2875 {
2876  assert(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
2877 
2878  if(o == LITTLE_ENDIAN_ORDER)
2879  {
2880  SecByteBlock block(byteCount);
2881  encodedInteger.Get(block, block.size());
2882  std::reverse(block.begin(), block.begin()+block.size());
2883 
2884  Decode(block.begin(), block.size(), s);
2885  return;
2886  }
2887 
2888  Decode(encodedInteger, byteCount, s);
2889 }
2890 
2891 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2892 {
2893  assert(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
2894 
2895  if(o == LITTLE_ENDIAN_ORDER)
2896  {
2897  SecByteBlock block(byteCount);
2898 #if (CRYPTOPP_MSC_VERSION >= 1500)
2899  std::reverse_copy(encodedInteger, encodedInteger+byteCount,
2900  stdext::make_checked_array_iterator(block.begin(), block.size()));
2901 #else
2902  std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
2903 #endif
2904  Decode(block.begin(), block.size(), s);
2905  return;
2906  }
2907 
2908  Decode(encodedInteger, byteCount, s);
2909 }
2910 
2912 {
2913  BERDecode(bt);
2914 }
2915 
2917 {
2918  Randomize(rng, bitcount);
2919 }
2920 
2921 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
2922 {
2923  if (!Randomize(rng, min, max, rnType, equiv, mod))
2925 }
2926 
2928 {
2929  Integer r((word)0, BitsToWords(e+1));
2930  r.SetBit(e);
2931  return r;
2932 }
2933 
2934 template <long i>
2936 {
2937  Integer * operator()() const
2938  {
2939  return new Integer(i);
2940  }
2941 };
2942 
2944 {
2945  return Singleton<Integer>().Ref();
2946 }
2947 
2949 {
2950  return Singleton<Integer, NewInteger<1> >().Ref();
2951 }
2952 
2954 {
2955  return Singleton<Integer, NewInteger<2> >().Ref();
2956 }
2957 
2958 bool Integer::operator!() const
2959 {
2960  return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
2961 }
2962 
2963 Integer& Integer::operator=(const Integer& t)
2964 {
2965  if (this != &t)
2966  {
2967  if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
2968  reg.New(RoundupSize(t.WordCount()));
2969  CopyWords(reg, t.reg, reg.size());
2970  sign = t.sign;
2971  }
2972  return *this;
2973 }
2974 
2975 bool Integer::GetBit(size_t n) const
2976 {
2977  if (n/WORD_BITS >= reg.size())
2978  return 0;
2979  else
2980  return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
2981 }
2982 
2983 void Integer::SetBit(size_t n, bool value)
2984 {
2985  if (value)
2986  {
2987  reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
2988  reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
2989  }
2990  else
2991  {
2992  if (n/WORD_BITS < reg.size())
2993  reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
2994  }
2995 }
2996 
2997 byte Integer::GetByte(size_t n) const
2998 {
2999  if (n/WORD_SIZE >= reg.size())
3000  return 0;
3001  else
3002  return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3003 }
3004 
3005 void Integer::SetByte(size_t n, byte value)
3006 {
3007  reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3008  reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3009  reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3010 }
3011 
3012 lword Integer::GetBits(size_t i, size_t n) const
3013 {
3014  lword v = 0;
3015  assert(n <= sizeof(v)*8);
3016  for (unsigned int j=0; j<n; j++)
3017  v |= lword(GetBit(i+j)) << j;
3018  return v;
3019 }
3020 
3021 Integer Integer::operator-() const
3022 {
3023  Integer result(*this);
3024  result.Negate();
3025  return result;
3026 }
3027 
3028 Integer Integer::AbsoluteValue() const
3029 {
3030  Integer result(*this);
3031  result.sign = POSITIVE;
3032  return result;
3033 }
3034 
3036 {
3037  reg.swap(a.reg);
3038  std::swap(sign, a.sign);
3039 }
3040 
3041 Integer::Integer(word value, size_t length)
3042  : reg(RoundupSize(length)), sign(POSITIVE)
3043 {
3044  reg[0] = value;
3045  SetWords(reg+1, 0, reg.size()-1);
3046 }
3047 
3048 template <class T>
3049 static Integer StringToInteger(const T *str, ByteOrder order)
3050 {
3051  assert( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3052 
3053  int radix, sign = 1;
3054  // GCC workaround
3055  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3056  unsigned int length;
3057  for (length = 0; str[length] != 0; length++) {}
3058 
3059  Integer v;
3060 
3061  if (length == 0)
3062  return Integer::Zero();
3063 
3064  switch (str[length-1])
3065  {
3066  case 'h':
3067  case 'H':
3068  radix=16;
3069  break;
3070  case 'o':
3071  case 'O':
3072  radix=8;
3073  break;
3074  case 'b':
3075  case 'B':
3076  radix=2;
3077  break;
3078  default:
3079  radix=10;
3080  }
3081 
3082  // 'str' is of length 1 or more
3083  if (str[0] == '-')
3084  {
3085  sign = -1;
3086  str += 1, length -= 1;
3087  }
3088 
3089  if (length > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
3090  {
3091  radix = 16;
3092  str += 2, length -= 2;
3093  }
3094 
3095  if(order == BIG_ENDIAN_ORDER)
3096  {
3097  for (unsigned int i=0; i<length; i++)
3098  {
3099  int digit, ch = static_cast<int>(str[i]);
3100 
3101  if (ch >= '0' && ch <= '9')
3102  digit = ch - '0';
3103  else if (ch >= 'A' && ch <= 'F')
3104  digit = ch - 'A' + 10;
3105  else if (ch >= 'a' && ch <= 'f')
3106  digit = ch - 'a' + 10;
3107  else
3108  digit = radix;
3109 
3110  if (digit < radix)
3111  {
3112  v *= radix;
3113  v += digit;
3114  }
3115  }
3116  }
3117  else if(radix == 16 && order == LITTLE_ENDIAN_ORDER)
3118  {
3119  // Nibble high, low and count
3120  unsigned int nh = 0, nl = 0, nc = 0;
3121  Integer position(Integer::One());
3122 
3123  for (unsigned int i=0; i<length; i++)
3124  {
3125  int digit, ch = static_cast<int>(str[i]);
3126 
3127  if (ch >= '0' && ch <= '9')
3128  digit = ch - '0';
3129  else if (ch >= 'A' && ch <= 'F')
3130  digit = ch - 'A' + 10;
3131  else if (ch >= 'a' && ch <= 'f')
3132  digit = ch - 'a' + 10;
3133  else
3134  digit = radix;
3135 
3136  if (digit < radix)
3137  {
3138  if(nc++ == 0)
3139  nh = digit;
3140  else
3141  nl = digit;
3142 
3143  if(nc == 2)
3144  {
3145  v += position * (nh << 4 | nl);
3146  nc = 0, position <<= 8;
3147  }
3148  }
3149  }
3150 
3151  if(nc == 1)
3152  v += nh * position;
3153  }
3154  else // LITTLE_ENDIAN_ORDER && radix != 16
3155  {
3156  for (int i=static_cast<int>(length)-1; i>=0; i--)
3157  {
3158  int digit, ch = static_cast<int>(str[i]);
3159 
3160  if (ch >= '0' && ch <= '9')
3161  digit = ch - '0';
3162  else if (ch >= 'A' && ch <= 'F')
3163  digit = ch - 'A' + 10;
3164  else if (ch >= 'a' && ch <= 'f')
3165  digit = ch - 'a' + 10;
3166  else
3167  digit = radix;
3168 
3169  if (digit < radix)
3170  {
3171  v *= radix;
3172  v += digit;
3173  }
3174  }
3175  }
3176 
3177  if (sign == -1)
3178  v.Negate();
3179 
3180  return v;
3181 }
3182 
3183 Integer::Integer(const char *str, ByteOrder order)
3184  : reg(2), sign(POSITIVE)
3185 {
3186  *this = StringToInteger(str,order);
3187 }
3188 
3189 Integer::Integer(const wchar_t *str, ByteOrder order)
3190  : reg(2), sign(POSITIVE)
3191 {
3192  *this = StringToInteger(str,order);
3193 }
3194 
3195 unsigned int Integer::WordCount() const
3196 {
3197  return (unsigned int)CountWords(reg, reg.size());
3198 }
3199 
3200 unsigned int Integer::ByteCount() const
3201 {
3202  unsigned wordCount = WordCount();
3203  if (wordCount)
3204  return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3205  else
3206  return 0;
3207 }
3208 
3209 unsigned int Integer::BitCount() const
3210 {
3211  unsigned wordCount = WordCount();
3212  if (wordCount)
3213  return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3214  else
3215  return 0;
3216 }
3217 
3218 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3219 {
3220  StringStore store(input, inputLen);
3221  Decode(store, inputLen, s);
3222 }
3223 
3225 {
3226  assert(bt.MaxRetrievable() >= inputLen);
3227 
3228  byte b;
3229  bt.Peek(b);
3230  sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3231 
3232  while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3233  {
3234  bt.Skip(1);
3235  inputLen--;
3236  bt.Peek(b);
3237  }
3238 
3239  // The call to CleanNew is optimized away above -O0/-Og.
3240  const size_t size = RoundupSize(BytesToWords(inputLen));
3241  reg.CleanNew(size);
3242 
3243  assert(reg.SizeInBytes() >= inputLen);
3244  for (size_t i=inputLen; i > 0; i--)
3245  {
3246  bt.Get(b);
3247  reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3248  }
3249 
3250  if (sign == NEGATIVE)
3251  {
3252  for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3253  reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3254  TwosComplement(reg, reg.size());
3255  }
3256 }
3257 
3258 size_t Integer::MinEncodedSize(Signedness signedness) const
3259 {
3260  unsigned int outputLen = STDMAX(1U, ByteCount());
3261  if (signedness == UNSIGNED)
3262  return outputLen;
3263  if (NotNegative() && (GetByte(outputLen-1) & 0x80))
3264  outputLen++;
3265  if (IsNegative() && *this < -Power2(outputLen*8-1))
3266  outputLen++;
3267  return outputLen;
3268 }
3269 
3270 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3271 {
3272  assert(output && outputLen);
3273  ArraySink sink(output, outputLen);
3274  Encode(sink, outputLen, signedness);
3275 }
3276 
3277 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3278 {
3279  if (signedness == UNSIGNED || NotNegative())
3280  {
3281  for (size_t i=outputLen; i > 0; i--)
3282  bt.Put(GetByte(i-1));
3283  }
3284  else
3285  {
3286  // take two's complement of *this
3287  Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3288  temp.Encode(bt, outputLen, UNSIGNED);
3289  }
3290 }
3291 
3293 {
3294  DERGeneralEncoder enc(bt, INTEGER);
3296  enc.MessageEnd();
3297 }
3298 
3299 void Integer::BERDecode(const byte *input, size_t len)
3300 {
3301  StringStore store(input, len);
3302  BERDecode(store);
3303 }
3304 
3306 {
3307  BERGeneralDecoder dec(bt, INTEGER);
3308  if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3309  BERDecodeError();
3310  Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3311  dec.MessageEnd();
3312 }
3313 
3315 {
3316  DERGeneralEncoder enc(bt, OCTET_STRING);
3317  Encode(enc, length);
3318  enc.MessageEnd();
3319 }
3320 
3322 {
3323  BERGeneralDecoder dec(bt, OCTET_STRING);
3324  if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3325  BERDecodeError();
3326  Decode(dec, length);
3327  dec.MessageEnd();
3328 }
3329 
3330 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
3331 {
3332  ArraySink sink(output, len);
3333  return OpenPGPEncode(sink);
3334 }
3335 
3337 {
3338  word16 bitCount = word16(BitCount());
3339  bt.PutWord16(bitCount);
3340  size_t byteCount = BitsToBytes(bitCount);
3341  Encode(bt, byteCount);
3342  return 2 + byteCount;
3343 }
3344 
3345 void Integer::OpenPGPDecode(const byte *input, size_t len)
3346 {
3347  StringStore store(input, len);
3348  OpenPGPDecode(store);
3349 }
3350 
3352 {
3353  word16 bitCount;
3354  if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3355  throw OpenPGPDecodeErr();
3356  Decode(bt, BitsToBytes(bitCount));
3357 }
3358 
3360 {
3361  const size_t nbytes = nbits/8 + 1;
3362  SecByteBlock buf(nbytes);
3363  rng.GenerateBlock(buf, nbytes);
3364  if (nbytes)
3365  buf[0] = (byte)Crop(buf[0], nbits % 8);
3366  Decode(buf, nbytes, UNSIGNED);
3367 }
3368 
3369 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3370 {
3371  if (min > max)
3372  throw InvalidArgument("Integer: Min must be no greater than Max");
3373 
3374  Integer range = max - min;
3375  const unsigned int nbits = range.BitCount();
3376 
3377  do
3378  {
3379  Randomize(rng, nbits);
3380  }
3381  while (*this > range);
3382 
3383  *this += min;
3384 }
3385 
3386 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3387 {
3388  return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3389 }
3390 
3392 {
3393 public:
3394  KDF2_RNG(const byte *seed, size_t seedSize)
3395  : m_counter(0), m_counterAndSeed(seedSize + 4)
3396  {
3397  memcpy(m_counterAndSeed + 4, seed, seedSize);
3398  }
3399 
3400  void GenerateBlock(byte *output, size_t size)
3401  {
3402  PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3403  ++m_counter;
3404  P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
3405  }
3406 
3407 private:
3408  word32 m_counter;
3409  SecByteBlock m_counterAndSeed;
3410 };
3411 
3412 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
3413 {
3414  Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3415  Integer max;
3416  if (!params.GetValue("Max", max))
3417  {
3418  int bitLength;
3419  if (params.GetIntValue("BitLength", bitLength))
3420  max = Integer::Power2(bitLength);
3421  else
3422  throw InvalidArgument("Integer: missing Max argument");
3423  }
3424  if (min > max)
3425  throw InvalidArgument("Integer: Min must be no greater than Max");
3426 
3427  Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3428  Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3429 
3430  if (equiv.IsNegative() || equiv >= mod)
3431  throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3432 
3433  Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3434 
3435  member_ptr<KDF2_RNG> kdf2Rng;
3437  if (params.GetValue(Name::Seed(), seed))
3438  {
3439  ByteQueue bq;
3440  DERSequenceEncoder seq(bq);
3441  min.DEREncode(seq);
3442  max.DEREncode(seq);
3443  equiv.DEREncode(seq);
3444  mod.DEREncode(seq);
3445  DEREncodeUnsigned(seq, rnType);
3446  DEREncodeOctetString(seq, seed.begin(), seed.size());
3447  seq.MessageEnd();
3448 
3449  SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3450  bq.Get(finalSeed, finalSeed.size());
3451  kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3452  }
3453  RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3454 
3455  switch (rnType)
3456  {
3457  case ANY:
3458  if (mod == One())
3459  Randomize(rng, min, max);
3460  else
3461  {
3462  Integer min1 = min + (equiv-min)%mod;
3463  if (max < min1)
3464  return false;
3465  Randomize(rng, Zero(), (max - min1) / mod);
3466  *this *= mod;
3467  *this += min1;
3468  }
3469  return true;
3470 
3471  case PRIME:
3472  {
3473  const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
3474 
3475  int i;
3476  i = 0;
3477  while (1)
3478  {
3479  if (++i==16)
3480  {
3481  // check if there are any suitable primes in [min, max]
3482  Integer first = min;
3483  if (FirstPrime(first, max, equiv, mod, pSelector))
3484  {
3485  // if there is only one suitable prime, we're done
3486  *this = first;
3487  if (!FirstPrime(first, max, equiv, mod, pSelector))
3488  return true;
3489  }
3490  else
3491  return false;
3492  }
3493 
3494  Randomize(rng, min, max);
3495  if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3496  return true;
3497  }
3498  }
3499 
3500  default:
3501  throw InvalidArgument("Integer: invalid RandomNumberType argument");
3502  }
3503 }
3504 
3505 std::istream& operator>>(std::istream& in, Integer &a)
3506 {
3507  char c;
3508  unsigned int length = 0;
3509  SecBlock<char> str(length + 16);
3510 
3511  std::ws(in);
3512 
3513  do
3514  {
3515  in.read(&c, 1);
3516  str[length++] = c;
3517  if (length >= str.size())
3518  str.Grow(length + 16);
3519  }
3520  while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3521 
3522  if (in.gcount())
3523  in.putback(c);
3524  str[length-1] = '\0';
3525  a = Integer(str);
3526 
3527  return in;
3528 }
3529 
3530 std::ostream& operator<<(std::ostream& out, const Integer &a)
3531 {
3532  // Get relevant conversion specifications from ostream.
3533  const long f = out.flags() & std::ios::basefield; // Get base digits.
3534  int base, block;
3535  char suffix;
3536  switch(f)
3537  {
3538  case std::ios::oct :
3539  base = 8;
3540  block = 8;
3541  suffix = 'o';
3542  break;
3543  case std::ios::hex :
3544  base = 16;
3545  block = 4;
3546  suffix = 'h';
3547  break;
3548  default :
3549  base = 10;
3550  block = 3;
3551  suffix = '.';
3552  }
3553 
3554  Integer temp1=a, temp2;
3555 
3556  if (a.IsNegative())
3557  {
3558  out << '-';
3559  temp1.Negate();
3560  }
3561 
3562  if (!a)
3563  out << '0';
3564 
3565  static const char upper[]="0123456789ABCDEF";
3566  static const char lower[]="0123456789abcdef";
3567 
3568  const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3569  unsigned int i=0;
3570  SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3571 
3572  while (!!temp1)
3573  {
3574  word digit;
3575  Integer::Divide(digit, temp2, temp1, base);
3576  s[i++]=vec[digit];
3577  temp1.swap(temp2);
3578  }
3579 
3580  while (i--)
3581  {
3582  out << s[i];
3583 // if (i && !(i%block))
3584 // out << ",";
3585  }
3586 
3587 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3588  if(out.flags() & std::ios_base::showbase)
3589  out << suffix;
3590 
3591  return out;
3592 #else
3593  return out << suffix;
3594 #endif
3595 }
3596 
3597 Integer& Integer::operator++()
3598 {
3599  if (NotNegative())
3600  {
3601  if (Increment(reg, reg.size()))
3602  {
3603  reg.CleanGrow(2*reg.size());
3604  reg[reg.size()/2]=1;
3605  }
3606  }
3607  else
3608  {
3609  word borrow = Decrement(reg, reg.size());
3610  assert(!borrow);
3611  CRYPTOPP_UNUSED(borrow);
3612 
3613  if (WordCount()==0)
3614  *this = Zero();
3615  }
3616  return *this;
3617 }
3618 
3619 Integer& Integer::operator--()
3620 {
3621  if (IsNegative())
3622  {
3623  if (Increment(reg, reg.size()))
3624  {
3625  reg.CleanGrow(2*reg.size());
3626  reg[reg.size()/2]=1;
3627  }
3628  }
3629  else
3630  {
3631  if (Decrement(reg, reg.size()))
3632  *this = -One();
3633  }
3634  return *this;
3635 }
3636 
3637 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3638 {
3639  int carry;
3640  if (a.reg.size() == b.reg.size())
3641  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3642  else if (a.reg.size() > b.reg.size())
3643  {
3644  carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3645  CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3646  carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3647  }
3648  else
3649  {
3650  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3651  CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3652  carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3653  }
3654 
3655  if (carry)
3656  {
3657  sum.reg.CleanGrow(2*sum.reg.size());
3658  sum.reg[sum.reg.size()/2] = 1;
3659  }
3660  sum.sign = Integer::POSITIVE;
3661 }
3662 
3663 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3664 {
3665  unsigned aSize = a.WordCount();
3666  aSize += aSize%2;
3667  unsigned bSize = b.WordCount();
3668  bSize += bSize%2;
3669 
3670  if (aSize == bSize)
3671  {
3672  if (Compare(a.reg, b.reg, aSize) >= 0)
3673  {
3674  Subtract(diff.reg, a.reg, b.reg, aSize);
3675  diff.sign = Integer::POSITIVE;
3676  }
3677  else
3678  {
3679  Subtract(diff.reg, b.reg, a.reg, aSize);
3680  diff.sign = Integer::NEGATIVE;
3681  }
3682  }
3683  else if (aSize > bSize)
3684  {
3685  word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3686  CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3687  borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3688  assert(!borrow);
3689  diff.sign = Integer::POSITIVE;
3690  }
3691  else
3692  {
3693  word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3694  CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3695  borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3696  assert(!borrow);
3697  diff.sign = Integer::NEGATIVE;
3698  }
3699 }
3700 
3701 // MSVC .NET 2003 workaround
3702 template <class T> inline const T& STDMAX2(const T& a, const T& b)
3703 {
3704  return a < b ? b : a;
3705 }
3706 
3707 Integer Integer::Plus(const Integer& b) const
3708 {
3709  Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3710  if (NotNegative())
3711  {
3712  if (b.NotNegative())
3713  PositiveAdd(sum, *this, b);
3714  else
3715  PositiveSubtract(sum, *this, b);
3716  }
3717  else
3718  {
3719  if (b.NotNegative())
3720  PositiveSubtract(sum, b, *this);
3721  else
3722  {
3723  PositiveAdd(sum, *this, b);
3724  sum.sign = Integer::NEGATIVE;
3725  }
3726  }
3727  return sum;
3728 }
3729 
3730 Integer& Integer::operator+=(const Integer& t)
3731 {
3732  reg.CleanGrow(t.reg.size());
3733  if (NotNegative())
3734  {
3735  if (t.NotNegative())
3736  PositiveAdd(*this, *this, t);
3737  else
3738  PositiveSubtract(*this, *this, t);
3739  }
3740  else
3741  {
3742  if (t.NotNegative())
3743  PositiveSubtract(*this, t, *this);
3744  else
3745  {
3746  PositiveAdd(*this, *this, t);
3747  sign = Integer::NEGATIVE;
3748  }
3749  }
3750  return *this;
3751 }
3752 
3753 Integer Integer::Minus(const Integer& b) const
3754 {
3755  Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
3756  if (NotNegative())
3757  {
3758  if (b.NotNegative())
3759  PositiveSubtract(diff, *this, b);
3760  else
3761  PositiveAdd(diff, *this, b);
3762  }
3763  else
3764  {
3765  if (b.NotNegative())
3766  {
3767  PositiveAdd(diff, *this, b);
3768  diff.sign = Integer::NEGATIVE;
3769  }
3770  else
3771  PositiveSubtract(diff, b, *this);
3772  }
3773  return diff;
3774 }
3775 
3776 Integer& Integer::operator-=(const Integer& t)
3777 {
3778  reg.CleanGrow(t.reg.size());
3779  if (NotNegative())
3780  {
3781  if (t.NotNegative())
3782  PositiveSubtract(*this, *this, t);
3783  else
3784  PositiveAdd(*this, *this, t);
3785  }
3786  else
3787  {
3788  if (t.NotNegative())
3789  {
3790  PositiveAdd(*this, *this, t);
3791  sign = Integer::NEGATIVE;
3792  }
3793  else
3794  PositiveSubtract(*this, t, *this);
3795  }
3796  return *this;
3797 }
3798 
3799 Integer& Integer::operator<<=(size_t n)
3800 {
3801  const size_t wordCount = WordCount();
3802  const size_t shiftWords = n / WORD_BITS;
3803  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3804 
3805  reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
3806  ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
3807  ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
3808  return *this;
3809 }
3810 
3811 Integer& Integer::operator>>=(size_t n)
3812 {
3813  const size_t wordCount = WordCount();
3814  const size_t shiftWords = n / WORD_BITS;
3815  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3816 
3817  ShiftWordsRightByWords(reg, wordCount, shiftWords);
3818  if (wordCount > shiftWords)
3819  ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
3820  if (IsNegative() && WordCount()==0) // avoid -0
3821  *this = Zero();
3822  return *this;
3823 }
3824 
3825 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
3826 {
3827  size_t aSize = RoundupSize(a.WordCount());
3828  size_t bSize = RoundupSize(b.WordCount());
3829 
3830  product.reg.CleanNew(RoundupSize(aSize+bSize));
3831  product.sign = Integer::POSITIVE;
3832 
3833  IntegerSecBlock workspace(aSize + bSize);
3834  AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
3835 }
3836 
3837 void Multiply(Integer &product, const Integer &a, const Integer &b)
3838 {
3839  PositiveMultiply(product, a, b);
3840 
3841  if (a.NotNegative() != b.NotNegative())
3842  product.Negate();
3843 }
3844 
3846 {
3847  Integer product;
3848  Multiply(product, *this, b);
3849  return product;
3850 }
3851 
3852 /*
3853 void PositiveDivide(Integer &remainder, Integer &quotient,
3854  const Integer &dividend, const Integer &divisor)
3855 {
3856  remainder.reg.CleanNew(divisor.reg.size());
3857  remainder.sign = Integer::POSITIVE;
3858  quotient.reg.New(0);
3859  quotient.sign = Integer::POSITIVE;
3860  unsigned i=dividend.BitCount();
3861  while (i--)
3862  {
3863  word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
3864  remainder.reg[0] |= dividend[i];
3865  if (overflow || remainder >= divisor)
3866  {
3867  Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
3868  quotient.SetBit(i);
3869  }
3870  }
3871 }
3872 */
3873 
3874 void PositiveDivide(Integer &remainder, Integer &quotient,
3875  const Integer &a, const Integer &b)
3876 {
3877  unsigned aSize = a.WordCount();
3878  unsigned bSize = b.WordCount();
3879 
3880  if (!bSize)
3881  throw Integer::DivideByZero();
3882 
3883  if (aSize < bSize)
3884  {
3885  remainder = a;
3886  remainder.sign = Integer::POSITIVE;
3887  quotient = Integer::Zero();
3888  return;
3889  }
3890 
3891  aSize += aSize%2; // round up to next even number
3892  bSize += bSize%2;
3893 
3894  remainder.reg.CleanNew(RoundupSize(bSize));
3895  remainder.sign = Integer::POSITIVE;
3896  quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
3897  quotient.sign = Integer::POSITIVE;
3898 
3899  IntegerSecBlock T(aSize+3*(bSize+2));
3900  Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
3901 }
3902 
3903 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
3904 {
3905  PositiveDivide(remainder, quotient, dividend, divisor);
3906 
3907  if (dividend.IsNegative())
3908  {
3909  quotient.Negate();
3910  if (remainder.NotZero())
3911  {
3912  --quotient;
3913  remainder = divisor.AbsoluteValue() - remainder;
3914  }
3915  }
3916 
3917  if (divisor.IsNegative())
3918  quotient.Negate();
3919 }
3920 
3921 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
3922 {
3923  q = a;
3924  q >>= n;
3925 
3926  const size_t wordCount = BitsToWords(n);
3927  if (wordCount <= a.WordCount())
3928  {
3929  r.reg.resize(RoundupSize(wordCount));
3930  CopyWords(r.reg, a.reg, wordCount);
3931  SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
3932  if (n % WORD_BITS != 0)
3933  r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
3934  }
3935  else
3936  {
3937  r.reg.resize(RoundupSize(a.WordCount()));
3938  CopyWords(r.reg, a.reg, r.reg.size());
3939  }
3940  r.sign = POSITIVE;
3941 
3942  if (a.IsNegative() && r.NotZero())
3943  {
3944  --q;
3945  r = Power2(n) - r;
3946  }
3947 }
3948 
3949 Integer Integer::DividedBy(const Integer &b) const
3950 {
3951  Integer remainder, quotient;
3952  Integer::Divide(remainder, quotient, *this, b);
3953  return quotient;
3954 }
3955 
3957 {
3958  Integer remainder, quotient;
3959  Integer::Divide(remainder, quotient, *this, b);
3960  return remainder;
3961 }
3962 
3963 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
3964 {
3965  if (!divisor)
3966  throw Integer::DivideByZero();
3967 
3968  assert(divisor);
3969 
3970  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
3971  {
3972  quotient = dividend >> (BitPrecision(divisor)-1);
3973  remainder = dividend.reg[0] & (divisor-1);
3974  return;
3975  }
3976 
3977  unsigned int i = dividend.WordCount();
3978  quotient.reg.CleanNew(RoundupSize(i));
3979  remainder = 0;
3980  while (i--)
3981  {
3982  quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
3983  remainder = DWord(dividend.reg[i], remainder) % divisor;
3984  }
3985 
3986  if (dividend.NotNegative())
3987  quotient.sign = POSITIVE;
3988  else
3989  {
3990  quotient.sign = NEGATIVE;
3991  if (remainder)
3992  {
3993  --quotient;
3994  remainder = divisor - remainder;
3995  }
3996  }
3997 }
3998 
3999 Integer Integer::DividedBy(word b) const
4000 {
4001  word remainder;
4002  Integer quotient;
4003  Integer::Divide(remainder, quotient, *this, b);
4004  return quotient;
4005 }
4006 
4007 word Integer::Modulo(word divisor) const
4008 {
4009  if (!divisor)
4010  throw Integer::DivideByZero();
4011 
4012  assert(divisor);
4013 
4014  word remainder;
4015 
4016  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4017  remainder = reg[0] & (divisor-1);
4018  else
4019  {
4020  unsigned int i = WordCount();
4021 
4022  if (divisor <= 5)
4023  {
4024  DWord sum(0, 0);
4025  while (i--)
4026  sum += reg[i];
4027  remainder = sum % divisor;
4028  }
4029  else
4030  {
4031  remainder = 0;
4032  while (i--)
4033  remainder = DWord(reg[i], remainder) % divisor;
4034  }
4035  }
4036 
4037  if (IsNegative() && remainder)
4038  remainder = divisor - remainder;
4039 
4040  return remainder;
4041 }
4042 
4044 {
4045  if (!!(*this)) // don't flip sign if *this==0
4046  sign = Sign(1-sign);
4047 }
4048 
4049 int Integer::PositiveCompare(const Integer& t) const
4050 {
4051  unsigned size = WordCount(), tSize = t.WordCount();
4052 
4053  if (size == tSize)
4054  return CryptoPP::Compare(reg, t.reg, size);
4055  else
4056  return size > tSize ? 1 : -1;
4057 }
4058 
4059 int Integer::Compare(const Integer& t) const
4060 {
4061  if (NotNegative())
4062  {
4063  if (t.NotNegative())
4064  return PositiveCompare(t);
4065  else
4066  return 1;
4067  }
4068  else
4069  {
4070  if (t.NotNegative())
4071  return -1;
4072  else
4073  return -PositiveCompare(t);
4074  }
4075 }
4076 
4078 {
4079  if (!IsPositive())
4080  return Zero();
4081 
4082  // overestimate square root
4083  Integer x, y = Power2((BitCount()+1)/2);
4084  assert(y*y >= *this);
4085 
4086  do
4087  {
4088  x = y;
4089  y = (x + *this/x) >> 1;
4090  } while (y<x);
4091 
4092  return x;
4093 }
4094 
4095 bool Integer::IsSquare() const
4096 {
4097  Integer r = SquareRoot();
4098  return *this == r.Squared();
4099 }
4100 
4101 bool Integer::IsUnit() const
4102 {
4103  return (WordCount() == 1) && (reg[0] == 1);
4104 }
4105 
4107 {
4108  return IsUnit() ? *this : Zero();
4109 }
4110 
4111 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4112 {
4113  return x*y%m;
4114 }
4115 
4116 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4117 {
4118  ModularArithmetic mr(m);
4119  return mr.Exponentiate(x, e);
4120 }
4121 
4123 {
4124  return EuclideanDomainOf<Integer>().Gcd(a, b);
4125 }
4126 
4128 {
4129  assert(m.NotNegative());
4130 
4131  if (IsNegative())
4132  return Modulo(m).InverseMod(m);
4133 
4134  if (m.IsEven())
4135  {
4136  if (!m || IsEven())
4137  return Zero(); // no inverse
4138  if (*this == One())
4139  return One();
4140 
4141  Integer u = m.Modulo(*this).InverseMod(*this);
4142  return !u ? Zero() : (m*(*this-u)+1)/(*this);
4143  }
4144 
4145  SecBlock<word> T(m.reg.size() * 4);
4146  Integer r((word)0, m.reg.size());
4147  unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4148  DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4149  return r;
4150 }
4151 
4152 word Integer::InverseMod(word mod) const
4153 {
4154  word g0 = mod, g1 = *this % mod;
4155  word v0 = 0, v1 = 1;
4156  word y;
4157 
4158  while (g1)
4159  {
4160  if (g1 == 1)
4161  return v1;
4162  y = g0 / g1;
4163  g0 = g0 % g1;
4164  v0 += y * v1;
4165 
4166  if (!g0)
4167  break;
4168  if (g0 == 1)
4169  return mod-v0;
4170  y = g1 / g0;
4171  g1 = g1 % g0;
4172  v1 += y * v0;
4173  }
4174  return 0;
4175 }
4176 
4177 // ********************************************************
4178 
4180 {
4181  BERSequenceDecoder seq(bt);
4182  OID oid(seq);
4183  if (oid != ASN1::prime_field())
4184  BERDecodeError();
4185  m_modulus.BERDecode(seq);
4186  seq.MessageEnd();
4187  m_result.reg.resize(m_modulus.reg.size());
4188 }
4189 
4191 {
4192  DERSequenceEncoder seq(bt);
4193  ASN1::prime_field().DEREncode(seq);
4194  m_modulus.DEREncode(seq);
4195  seq.MessageEnd();
4196 }
4197 
4199 {
4201 }
4202 
4204 {
4206 }
4207 
4209 {
4210  if (a.reg.size()==m_modulus.reg.size())
4211  {
4212  CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4213  return m_result;
4214  }
4215  else
4216  return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4217 }
4218 
4219 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4220 {
4221  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4222  {
4223  if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4224  || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4225  {
4226  CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4227  }
4228  return m_result;
4229  }
4230  else
4231  {
4232  m_result1 = a+b;
4233  if (m_result1 >= m_modulus)
4234  m_result1 -= m_modulus;
4235  return m_result1;
4236  }
4237 }
4238 
4240 {
4241  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4242  {
4243  if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4244  || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4245  {
4246  CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4247  }
4248  }
4249  else
4250  {
4251  a+=b;
4252  if (a>=m_modulus)
4253  a-=m_modulus;
4254  }
4255 
4256  return a;
4257 }
4258 
4259 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4260 {
4261  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4262  {
4263  if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4264  CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4265  return m_result;
4266  }
4267  else
4268  {
4269  m_result1 = a-b;
4270  if (m_result1.IsNegative())
4271  m_result1 += m_modulus;
4272  return m_result1;
4273  }
4274 }
4275 
4277 {
4278  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4279  {
4280  if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4281  CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4282  }
4283  else
4284  {
4285  a-=b;
4286  if (a.IsNegative())
4287  a+=m_modulus;
4288  }
4289 
4290  return a;
4291 }
4292 
4294 {
4295  if (!a)
4296  return a;
4297 
4298  CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4299  if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4300  Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4301 
4302  return m_result;
4303 }
4304 
4305 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4306 {
4307  if (m_modulus.IsOdd())
4308  {
4309  MontgomeryRepresentation dr(m_modulus);
4310  return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4311  }
4312  else
4313  return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4314 }
4315 
4316 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4317 {
4318  if (m_modulus.IsOdd())
4319  {
4320  MontgomeryRepresentation dr(m_modulus);
4321  dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4322  for (unsigned int i=0; i<exponentsCount; i++)
4323  results[i] = dr.ConvertOut(results[i]);
4324  }
4325  else
4326  AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4327 }
4328 
4330  : ModularArithmetic(m),
4331  m_u((word)0, m_modulus.reg.size()),
4332  m_workspace(5*m_modulus.reg.size())
4333 {
4334  if (!m_modulus.IsOdd())
4335  throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4336 
4337  RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4338 }
4339 
4341 {
4342  word *const T = m_workspace.begin();
4343  word *const R = m_result.reg.begin();
4344  const size_t N = m_modulus.reg.size();
4345  assert(a.reg.size()<=N && b.reg.size()<=N);
4346 
4347  AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4348  SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4349  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4350  return m_result;
4351 }
4352 
4354 {
4355  word *const T = m_workspace.begin();
4356  word *const R = m_result.reg.begin();
4357  const size_t N = m_modulus.reg.size();
4358  assert(a.reg.size()<=N);
4359 
4360  CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4361  SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4362  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4363  return m_result;
4364 }
4365 
4367 {
4368  word *const T = m_workspace.begin();
4369  word *const R = m_result.reg.begin();
4370  const size_t N = m_modulus.reg.size();
4371  assert(a.reg.size()<=N);
4372 
4373  CopyWords(T, a.reg, a.reg.size());
4374  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4375  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4376  return m_result;
4377 }
4378 
4380 {
4381 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4382  word *const T = m_workspace.begin();
4383  word *const R = m_result.reg.begin();
4384  const size_t N = m_modulus.reg.size();
4385  assert(a.reg.size()<=N);
4386 
4387  CopyWords(T, a.reg, a.reg.size());
4388  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4389  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4390  unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4391 
4392 // cout << "k=" << k << " N*32=" << 32*N << endl;
4393 
4394  if (k>N*WORD_BITS)
4395  DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4396  else
4397  MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4398 
4399  return m_result;
4400 }
4401 
4402 // Specialization declared in misc.h to allow us to print integers
4403 // with additional control options, like arbirary bases and uppercase.
4404 template <> CRYPTOPP_DLL
4405 std::string IntToString<Integer>(Integer value, unsigned int base)
4406 {
4407  // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4408  static const unsigned int BIT_32 = (1U << 31);
4409  const bool UPPER = !!(base & BIT_32);
4410  static const unsigned int BIT_31 = (1U << 30);
4411  const bool BASE = !!(base & BIT_31);
4412 
4413  const char CH = UPPER ? 'A' : 'a';
4414  base &= ~(BIT_32|BIT_31);
4415  assert(base >= 2 && base <= 32);
4416 
4417  if (value == 0)
4418  return "0";
4419 
4420  bool negative = false, zero = false;
4421  if (value.IsNegative())
4422  {
4423  negative = true;
4424  value.Negate();
4425  }
4426 
4427  if (!value)
4428  zero = true;
4429 
4430  SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4431  Integer temp;
4432 
4433  unsigned int i=0;
4434  while (!!value)
4435  {
4436  word digit;
4437  Integer::Divide(digit, temp, value, word(base));
4438  s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4439  value.swap(temp);
4440  }
4441 
4442  std::string result;
4443  result.reserve(i+2);
4444 
4445  if (negative)
4446  result += '-';
4447 
4448  if (zero)
4449  result += '0';
4450 
4451  while (i--)
4452  result += s[i];
4453 
4454  if (BASE)
4455  {
4456  if (base == 10)
4457  result += '.';
4458  else if (base == 16)
4459  result += 'h';
4460  else if (base == 8)
4461  result += 'o';
4462  else if (base == 2)
4463  result += 'b';
4464  }
4465 
4466  return result;
4467 }
4468 
4469 // Specialization declared in misc.h to avoid Coverity findings.
4470 template <> CRYPTOPP_DLL
4471 std::string IntToString<word64>(word64 value, unsigned int base)
4472 {
4473  // Hack... set the high bit for uppercase.
4474  static const unsigned int HIGH_BIT = (1U << 31);
4475  const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4476  base &= ~HIGH_BIT;
4477 
4478  assert(base >= 2);
4479  if (value == 0)
4480  return "0";
4481 
4482  std::string result;
4483  while (value > 0)
4484  {
4485  word64 digit = value % base;
4486  result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4487  value /= base;
4488  }
4489  return result;
4490 }
4491 
4492 NAMESPACE_END
4493 
4494 #if WORKAROUND_ARMEL_BUG
4495 # pragma GCC pop_options
4496 #endif
4497 
4498 #endif
Used to pass byte array input as part of a NameValuePairs object.
Definition: algparam.h:29
An invalid argument was detected.
Definition: cryptlib.h:182
void DEREncode(BufferedTransformation &bt) const
Encodes in DER format.
Definition: integer.cpp:4190
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: integer.cpp:4305
Classes for working with NameValuePairs.
const Integer & Square(const Integer &a) const
Square an element in the ring.
Definition: integer.cpp:4353
const Integer & MultiplicativeInverse(const Integer &a) const
Calculate the multiplicative inverse of an element in the ring.
Definition: integer.cpp:4379
void swap(SecBlock< T, A > &b)
Swap contents with another SecBlock.
Definition: secblock.h:717
a number which is probabilistically prime
Definition: integer.h:77
Utility functions for the Crypto++ library.
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:123
Restricts the instantiation of a class to one static object without locks.
Definition: misc.h:244
bool NotZero() const
Determines if the Integer is non-0.
Definition: integer.h:315
bool GetBit(size_t i) const
Provides the i-th bit of the Integer.
Definition: integer.cpp:2975
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:663
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
Definition: integer.cpp:3270
an unsigned value
Definition: integer.h:67
Integer & Reduce(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4276
virtual size_t Peek(byte &outByte) const
Peek a 8-bit byte.
Definition: cryptlib.cpp:537
bool IsOdd() const
Determines if the Integer is odd parity.
Definition: integer.h:333
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
Definition: cryptlib.h:348
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode unsigned value.
Definition: asn.h:442
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: cryptlib.cpp:329
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
Definition: misc.h:704
This file contains helper classes/functions for implementing public key algorithms.
bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
Definition: nbtheory.cpp:381
static Integer Gcd(const Integer &a, const Integer &n)
greatest common divisor
Definition: integer.cpp:4122
void resize(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:708
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
Definition: misc.h:724
const Integer & Subtract(const Integer &a, const Integer &b) const
Subtracts elements in the ring.
Definition: integer.cpp:4259
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
Definition: misc.h:587
void CleanGrow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:692
bool IsEven() const
Determines if the Integer is even parity.
Definition: integer.h:330
Secure memory block with allocator and cleanup.
Definition: secblock.h:440
unsigned int WordCount() const
Determines the number of words required to represent the Integer.
Definition: integer.cpp:3195
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
Definition: integer.cpp:3345
Signedness
Used when importing and exporting integers.
Definition: integer.h:65
size_type size() const
Provides the count of elements in the SecBlock.
Definition: secblock.h:527
ASN.1 object identifiers for algorthms and schemes.
Square block cipher.
Definition: square.h:24
Classes for automatic resource management.
size_t size() const
Length of the memory block.
Definition: algparam.h:93
byte GetByte(size_t i) const
Provides the i-th byte of the Integer.
Definition: integer.cpp:2997
Library configuration file.
MontgomeryRepresentation(const Integer &modulus)
Construct a IsMontgomeryRepresentation.
Definition: integer.cpp:4329
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
returns same result as Divide(r, q, a, Power2(n)), but faster
Definition: integer.cpp:3921
Ring of congruence classes modulo n.
Definition: modarith.h:34
STL namespace.
Interface for random number generators.
Definition: cryptlib.h:1186
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
Definition: misc.h:714
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
Definition: integer.cpp:3359
std::string IntToString< word64 >(word64 value, unsigned int base)
Converts an unsigned value to a string.
Definition: integer.cpp:4471
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
The minimum number of bytes to encode this integer.
Definition: integer.cpp:3258
void New(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:650
void SetByte(size_t n, byte value)
Set the n-th byte to value.
Definition: integer.cpp:3005
the value is negative
Definition: integer.h:59
SecBlock typedef.
Definition: secblock.h:734
virtual void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the Ring.
Definition: algebra.cpp:334
BER Sequence Decoder.
Definition: asn.h:294
Interface for buffered transformations.
Definition: cryptlib.h:1352
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
encode absolute value as big-endian octet string
Definition: integer.cpp:3314
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: queue.h:35
const byte * begin() const
Pointer to the first byte in the memory block.
Definition: algparam.h:89
bool IsConvertableToLong() const
Determines if the Integer is convertable to Long.
Definition: integer.cpp:2851
Integer MultiplicativeInverse() const
return inverse if 1 or -1, otherwise return 0
Definition: integer.cpp:4106
static const Integer & One()
Integer representing 1.
Definition: integer.cpp:2948
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:371
const Integer & Add(const Integer &a, const Integer &b) const
Adds elements in the ring.
Definition: integer.cpp:4219
byte order is little-endian
Definition: cryptlib.h:125
Sign
Used internally to represent the integer.
Definition: integer.h:55
Pointer that overloads operator ->
Definition: smartptr.h:39
bool IsSquare() const
return whether this integer is a perfect square
Definition: integer.cpp:4095
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
Definition: integer.cpp:3330
Classes and functions for secure memory allocations.
unsigned int BitCount() const
Determines the number of bits required to represent the Integer.
Definition: integer.cpp:3209
void BERDecodeElement(BufferedTransformation &in, Element &a) const
Decodes element in DER format.
Definition: integer.cpp:4203
bool IsUnit() const
is 1 or -1
Definition: integer.cpp:4101
Copy input to a memory buffer.
Definition: filters.h:1000
Integer SquareRoot() const
extract square root, if negative return 0, else return floor of square root
Definition: integer.cpp:4077
bool IsPositive() const
Determines if the Integer is positive.
Definition: integer.h:324
bool GetValue(const char *name, T &value) const
Get a named value.
Definition: cryptlib.h:335
a number with no special properties
Definition: integer.h:75
Integer Squared() const
Definition: integer.h:482
size_t Put(byte inByte, bool blocking=true)
Input a byte for processing.
Definition: cryptlib.h:1378
bool IsNegative() const
Determines if the Integer is negative.
Definition: integer.h:318
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
Definition: algparam.h:554
void swap(Integer &a)
Swaps this Integer with another Integer.
Definition: integer.cpp:3035
Integer()
Creates the zero integer.
Definition: integer.cpp:2811
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
Definition: misc.h:634
signed long ConvertToLong() const
Convert the Integer to Long.
Definition: integer.cpp:2865
Exception thrown when an error is encountered decoding an OpenPGP integer.
Definition: integer.h:264
void Negate()
Reverse the Sign of the Integer.
Definition: integer.cpp:4043
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
Definition: misc.h:692
Integer Times(const Integer &b) const
Definition: integer.cpp:3845
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
Definition: integer.cpp:3321
Application callback to signal suitability of a cabdidate prime.
Definition: nbtheory.h:80
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branchless swap of pointers a and b if condition c is true.
Definition: misc.h:1016
static Integer Power2(size_t e)
Exponentiates to a power of 2.
Definition: integer.cpp:2927
Multiple precision integer with arithmetic operations.
Definition: integer.h:31
a signed value
Definition: integer.h:69
static const Integer & Two()
Integer representing 2.
Definition: integer.cpp:2953
Integer ConvertOut(const Integer &a) const
Reduces an element in the congruence class.
Definition: integer.cpp:4366
virtual Element CascadeExponentiate(const Element &x, const Integer &e1, const Element &y, const Integer &e2) const
TODO.
Definition: algebra.cpp:323
virtual lword Skip(lword skipMax=LWORD_MAX)
Discard skipMax bytes from the output buffer.
Definition: cryptlib.cpp:556
RandomNumberType
Properties of a random integer.
Definition: integer.h:73
const Integer & Inverse(const Integer &a) const
Inverts the element in the ring.
Definition: integer.cpp:4293
const char * Seed()
ConstByteArrayParameter.
Definition: argnames.h:19
byte order is big-endian
Definition: cryptlib.h:127
Safely right shift values when undefined behavior could occur.
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
Definition: misc.h:431
String-based implementation of Store interface.
Definition: filters.h:1050
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
calculate r and q such that (a == d*q + r) && (0 <= r < abs(d))
Definition: integer.cpp:3903
ModularArithmetic(const Integer &modulus=Integer::One())
Construct a ModularArithmetic.
Definition: modarith.h:47
void BERDecodeError()
Raises a BERDecodeErr.
Definition: asn.h:61
Data structure used to store byte strings.
Definition: queue.h:20
Functions for CPU features and intrinsics.
Classes and functions for working with ANS.1 objects.
Classes for SHA-1 and SHA-2 family of message digests.
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
Definition: integer.cpp:2983
size_t GetWord16(word16 &value, ByteOrder order=BIG_ENDIAN_ORDER)
Retrieve a 16-bit word.
Definition: cryptlib.cpp:753
iterator begin()
Provides an iterator pointing to the first element in the memory block.
Definition: secblock.h:502
Integer & Accumulate(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4239
const char * PointerToPrimeSelector()
const PrimeSelector *
Definition: argnames.h:41
Implementation of BufferedTransformation's attachment interface.
Classes and functions for number theoretic operations.
const Integer & Half(const Integer &a) const
TODO.
Definition: integer.cpp:4208
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
Definition: integer.cpp:3292
Exception thrown when division by 0 is encountered.
Definition: integer.h:37
DER Sequence Encoder.
Definition: asn.h:304
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
Definition: misc.h:938
Exception thrown when a random number cannot be found that satisfies the condition.
Definition: integer.h:45
Performs modular arithmetic in Montgomery representation for increased speed.
Definition: modarith.h:274
DER General Encoder.
Definition: asn.h:271
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: modarith.h:308
const Integer & Multiply(const Integer &a, const Integer &b) const
Multiplies elements in the ring.
Definition: integer.cpp:4340
void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: integer.cpp:3400
Integer InverseMod(const Integer &n) const
calculate multiplicative inverse of *this mod n
Definition: integer.cpp:4127
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
Definition: integer.cpp:3218
size_t PutWord16(word16 value, ByteOrder order=BIG_ENDIAN_ORDER, bool blocking=true)
Input a 16-bit word for processing.
Definition: cryptlib.cpp:717
size_t DEREncodeOctetString(BufferedTransformation &bt, const byte *str, size_t strLen)
DER encode octet string.
Definition: asn.cpp:104
virtual lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: cryptlib.cpp:499
void DEREncodeElement(BufferedTransformation &out, const Element &a) const
Encodes element in DER format.
Definition: integer.cpp:4198
Safely left shift values when undefined behavior could occur.
static const Integer & Zero()
Integer representing 0.
Definition: integer.cpp:2943
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
Definition: misc.h:441
Euclidean domain.
Definition: algebra.h:315
void Grow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:676
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
Definition: integer.cpp:3299
std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
Definition: integer.cpp:4405
lword GetBits(size_t i, size_t n) const
Provides the low order bits of the Integer.
Definition: integer.cpp:3012
virtual size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: cryptlib.cpp:518
Class file for performing modular arithmetic.
Crypto++ library namespace.
size_type SizeInBytes() const
Provides the number of bytes in the SecBlock.
Definition: secblock.h:541
BER General Decoder.
Definition: asn.h:234
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: integer.cpp:4316
int Compare(const Integer &a) const
Perform signed comparison.
Definition: integer.cpp:4059
unsigned int MaxElementByteLength() const
Provides the maximum byte size of an element in the ring.
Definition: modarith.h:230
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: modarith.h:311
Object Identifier.
Definition: asn.h:158
Integer Modulo(const Integer &b) const
Definition: integer.cpp:3956
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: queue.cpp:300
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
Definition: misc.h:609
unsigned int ByteCount() const
Determines the number of bytes required to represent the Integer.
Definition: integer.cpp:3200
Integer ConvertIn(const Integer &a) const
Reduces an element in the congruence class.
Definition: modarith.h:294
the value is positive or 0
Definition: integer.h:57
bool NotNegative() const
Determines if the Integer is non-negative.
Definition: integer.h:321
Interface for retrieving values given their names.
Definition: cryptlib.h:277