58#ifndef CRYPTOPP_IMPORTS 
   78#if (CRYPTOPP_MSC_VERSION >= 1400) && !defined(_M_ARM) 
   88#if (__SUNPRO_CC >= 0x5130) 
   90# define MAYBE_UNCONST_CAST(x) const_cast<word*>(x) 
   92# define MAYBE_CONST const 
   93# define MAYBE_UNCONST_CAST(x) x 
   98#if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_MIXED_ASM) 
   99# undef CRYPTOPP_X86_ASM_AVAILABLE 
  100# undef CRYPTOPP_X32_ASM_AVAILABLE 
  101# undef CRYPTOPP_X64_ASM_AVAILABLE 
  102# undef CRYPTOPP_SSE2_ASM_AVAILABLE 
  103# undef CRYPTOPP_SSSE3_ASM_AVAILABLE 
  105# define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86)) 
  113static void SetFunctionPointers();
 
  121InitializeInteger::InitializeInteger()
 
  127        SetFunctionPointers();
 
  144inline static int Compare(
const word *A, 
const word *B, 
size_t N)
 
  149        else if (A[N] < B[N])
 
  155inline static int Increment(
word *A, 
size_t N, 
word B=1)
 
  162    for (
unsigned i=1; i<N; i++)
 
  168inline static int Decrement(
word *A, 
size_t N, 
word B=1)
 
  175    for (
unsigned i=1; i<N; i++)
 
  181static void TwosComplement(
word *A, 
size_t N)
 
  184    for (
unsigned i=0; i<N; i++)
 
  188static word AtomicInverseModPower2(
word A)
 
  203#if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || ((defined(__aarch64__) || defined(__x86_64__)) && defined(CRYPTOPP_WORD128_AVAILABLE)) 
  204    #define TWO_64_BIT_WORDS 1 
  205    #define Declare2Words(x)            word x##0, x##1; 
  206    #define AssignWord(a, b)            a##0 = b; a##1 = 0; 
  207    #define Add2WordsBy1(a, b, c)       a##0 = b##0 + c; a##1 = b##1 + (a##0 < c); 
  208    #define LowWord(a)                  a##0 
  209    #define HighWord(a)                 a##1 
  210    #ifdef CRYPTOPP_MSC_VERSION 
  211        #define MultiplyWordsLoHi(p0, p1, a, b)     p0 = _umul128(a, b, &p1); 
  212        #ifndef __INTEL_COMPILER 
  213            #define Double3Words(c, d)      d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2; 
  215    #elif defined(__aarch32__) || defined(__aarch64__) 
  216        #define MultiplyWordsLoHi(p0, p1, a, b)     p0 = a*b; asm ("umulh %0,%1,%2" : "=r"(p1) : "r"(a), "r"(b)); 
  217    #elif defined(__DECCXX) 
  218        #define MultiplyWordsLoHi(p0, p1, a, b)     p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b); 
  219    #elif defined(__x86_64__) 
  220        #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100 
  222            #define MultiplyWordsLoHi(p0, p1, a, b)     asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc"); 
  223        #elif defined(__BMI2__) && 0 
  224            #define MultiplyWordsLoHi(p0, p1, a, b)     asm ("mulxq %3, %0, %1" : "=r"(p0), "=r"(p1) : "d"(a), "r"(b)); 
  225            #define MulAcc(c, d, a, b)      asm ("mulxq %6, %3, %4; addq %3, %0; adcxq %4, %1; adcxq %7, %2;" : "+&r"(c), "+&r"(d##0), "+&r"(d##1), "=&r"(p0), "=&r"(p1) : "d"(a), "r"(b), "r"(W64LIT(0)) : "cc"); 
  226            #define Double3Words(c, d)      asm ("addq %0, %0; adcxq %1, %1; adcxq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc"); 
  227            #define Acc2WordsBy1(a, b)      asm ("addq %2, %0; adcxq %3, %1;" : "+&r"(a##0), "+r"(a##1) : "r"(b), "r"(W64LIT(0)) : "cc"); 
  228            #define Acc2WordsBy2(a, b)      asm ("addq %2, %0; adcxq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc"); 
  229            #define Acc3WordsBy2(c, d, e)   asm ("addq %5, %0; adcxq %6, %1; adcxq %7, %2;" : "+r"(c), "=&r"(e##0), "=&r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1), "r"(W64LIT(0)) : "cc"); 
  231            #define MultiplyWordsLoHi(p0, p1, a, b)     asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc"); 
  232            #define MulAcc(c, d, a, b)      asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc"); 
  233            #define Double3Words(c, d)      asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc"); 
  234            #define Acc2WordsBy1(a, b)      asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc"); 
  235            #define Acc2WordsBy2(a, b)      asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc"); 
  236            #define Acc3WordsBy2(c, d, e)   asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc"); 
  239    #define MultiplyWords(p, a, b)      MultiplyWordsLoHi(p##0, p##1, a, b) 
  241        #define Double3Words(c, d)      d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2; 
  244        #define Acc2WordsBy2(a, b)      a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1; 
  246    #define AddWithCarry(u, a, b)       {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);} 
  247    #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);} 
  248    #define GetCarry(u)                 u##1 
  249    #define GetBorrow(u)                u##1 
  251    #define Declare2Words(x)            dword x; 
  252    #if CRYPTOPP_MSC_VERSION >= 1400 && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64)) 
  253        #define MultiplyWords(p, a, b)      p = __emulu(a, b); 
  255        #define MultiplyWords(p, a, b)      p = (dword)a*b; 
  257    #define AssignWord(a, b)            a = b; 
  258    #define Add2WordsBy1(a, b, c)       a = b + c; 
  259    #define Acc2WordsBy2(a, b)          a += b; 
  260    #define LowWord(a)                  word(a) 
  261    #define HighWord(a)                 word(a>>WORD_BITS) 
  262    #define Double3Words(c, d)          d = 2*d + (c>>(WORD_BITS-1)); c *= 2; 
  263    #define AddWithCarry(u, a, b)       u = dword(a) + b + GetCarry(u); 
  264    #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u); 
  265    #define GetCarry(u)                 HighWord(u) 
  266    #define GetBorrow(u)                word(u>>(WORD_BITS*2-1)) 
  269    #define MulAcc(c, d, a, b)          MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p)); 
  272    #define Acc2WordsBy1(a, b)          Add2WordsBy1(a, a, b) 
  275    #define Acc3WordsBy2(c, d, e)       Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e)); 
  281#if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) 
  282    DWord() {std::memset(&m_whole, 0x00, 
sizeof(m_whole));}
 
  284    DWord() {std::memset(&m_halfs, 0x00, 
sizeof(m_halfs));}
 
  287#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 
  288    explicit DWord(
word low) : m_whole(low) { }
 
  290    explicit DWord(
word low)
 
  297#if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) 
  298    DWord(
word low, 
word high) : m_whole()
 
  300    DWord(
word low, 
word high) : m_halfs()
 
  303#if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) 
  304#  if (CRYPTOPP_LITTLE_ENDIAN) 
  305        const word t[2] = {low,high};
 
  306        std::memcpy(&m_whole, t, 
sizeof(m_whole));
 
  308        const word t[2] = {high,low};
 
  309        std::memcpy(&m_whole, t, 
sizeof(m_whole));
 
  317    static DWord Multiply(
word a, 
word b)
 
  320        #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 
  321            r.m_whole = (
dword)a * b;
 
  322        #elif defined(MultiplyWordsLoHi) 
  323            MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
 
  332        DWord r = Multiply(a, b);
 
  336    DWord & operator+=(
word a)
 
  338        #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 
  339            m_whole = m_whole + a;
 
  342            m_halfs.high += (m_halfs.low < a);
 
  350        #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 
  351            r.m_whole = m_whole + a;
 
  353            r.m_halfs.low = m_halfs.low + a;
 
  354            r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
 
  359    DWord operator-(DWord a)
 
  362        #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 
  363            r.m_whole = m_whole - a.m_whole;
 
  365            r.m_halfs.low = m_halfs.low - a.m_halfs.low;
 
  366            r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
 
  371    DWord operator-(
word a)
 
  374        #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 
  375            r.m_whole = m_whole - a;
 
  377            r.m_halfs.low = m_halfs.low - a;
 
  378            r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
 
  388    bool operator!()
 const 
  390    #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 
  393        return !m_halfs.high && !m_halfs.low;
 
  399    word GetLowHalf()
 const {
return m_halfs.low;}
 
  400    word GetHighHalf()
 const {
return m_halfs.high;}
 
  401    word GetHighHalfAsBorrow()
 const {
return 0-m_halfs.high;}
 
  409    #if (CRYPTOPP_LITTLE_ENDIAN) 
  419   #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 
  429    Word() : m_whole(0) {}
 
  430    Word(
word value) : m_whole(value) {}
 
  436        r.m_whole = (
word)a * b;
 
  440    Word operator-(Word a)
 
  443        r.m_whole = m_whole - a.m_whole;
 
  447    Word operator-(
hword a)
 
  450        r.m_whole = m_whole - a;
 
  457        return hword(m_whole / divisor);
 
  460    bool operator!()
 const 
  465    word GetWhole()
 const {
return m_whole;}
 
  466    hword GetLowHalf()
 const {
return hword(m_whole);}
 
  475template <
class S, 
class D>
 
  476S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULLPTR)
 
  478    CRYPTOPP_UNUSED(dummy);
 
  486    S Q; 
bool pre = (S(B1+1) == 0);
 
  488        Q = D(A[1], A[2]) / S(B1+1);
 
  492        Q = D(A[0], A[1]) / B0;
 
  495    D p = D::Multiply(B0, Q);
 
  496    D u = (D) A[0] - p.GetLowHalf();
 
  497    A[0] = u.GetLowHalf();
 
  498    u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
 
  499    A[1] = u.GetLowHalf();
 
  500    A[2] += u.GetHighHalf();
 
  503    while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
 
  506        A[0] = u.GetLowHalf();
 
  507        u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
 
  508        A[1] = u.GetLowHalf();
 
  509        A[2] += u.GetHighHalf();
 
  518template <
class S, 
class D>
 
  519inline D DivideFourWordsByTwo(S *T, 
const D &Al, 
const D &Ah, 
const D &B)
 
  526        T[0] = Al.GetLowHalf();
 
  527        T[1] = Al.GetHighHalf();
 
  528        T[2] = Ah.GetLowHalf();
 
  529        T[3] = Ah.GetHighHalf();
 
  530        Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
 
  531        Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
 
  532        return D(Q[0], Q[1]);
 
  536        return D(Ah.GetLowHalf(), Ah.GetHighHalf());
 
  543    #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 
  544        return word(m_whole / a);
 
  547        return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
 
  553    #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 
  554        return word(m_whole % a);
 
  559            word r = m_halfs.high % h;
 
  566            DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
 
  567            return Word(r[0], r[1]).GetWhole();
 
  576    #define AddPrologue \ 
  578        __asm__ __volatile__ \ 
  581    #define AddEpilogue \ 
  584                    : "d" (C), "a" (A), "D" (B), "c" (N) \ 
  585                    : "%esi", "memory", "cc" \ 
  588    #define MulPrologue \ 
  589        __asm__ __volatile__ \ 
  594    #define MulEpilogue \ 
  598            : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \ 
  599            : "%esi", "memory", "cc" \ 
  601    #define SquPrologue     MulPrologue 
  602    #define SquEpilogue \ 
  606            : "d" (s_maskLow16), "c" (C), "a" (A) \ 
  607            : "%esi", "%edi", "memory", "cc" \ 
  609    #define TopPrologue     MulPrologue 
  610    #define TopEpilogue \ 
  614            : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \ 
  618    #define AddPrologue \ 
  621        __asm   mov     eax, [esp+12] \ 
  622        __asm   mov     edi, [esp+16] 
  623    #define AddEpilogue \ 
  629    #define SquPrologue                 \ 
  633        AS2(    lea     ebx, s_maskLow16) 
  634    #define MulPrologue                 \ 
  639        AS2(    lea     ebx, s_maskLow16) 
  640    #define TopPrologue                 \ 
  646        AS2(    lea     ebx, s_maskLow16) 
  647    #define SquEpilogue     RestoreEBX 
  648    #define MulEpilogue     RestoreEBX 
  649    #define TopEpilogue     RestoreEBX 
  652#ifdef CRYPTOPP_X64_MASM_AVAILABLE 
  654int Baseline_Add(
size_t N, 
word *C, 
const word *A, 
const word *B);
 
  655int Baseline_Sub(
size_t N, 
word *C, 
const word *A, 
const word *B);
 
  657#elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE) 
  658int Baseline_Add(
size_t N, 
word *C, 
const word *A, 
const word *B)
 
  666    AS2(    mov     %0,[%3+8*%1])
 
  667    AS2(    add     %0,[%4+8*%1])
 
  668    AS2(    mov     [%2+8*%1],%0)
 
  670    AS2(    mov     %0,[%3+8*%1+8])
 
  671    AS2(    adc     %0,[%4+8*%1+8])
 
  672    AS2(    mov     [%2+8*%1+8],%0)
 
  675    AS2(    mov     %0,[%3+8*%1])
 
  676    AS2(    adc     %0,[%4+8*%1])
 
  677    AS2(    mov     [%2+8*%1],%0)
 
  683    : 
"=&r" (result), 
"+c" (N)
 
  684    : 
"r" (C+N), 
"r" (A+N), 
"r" (B+N)
 
  690int Baseline_Sub(
size_t N, 
word *C, 
const word *A, 
const word *B)
 
  698    AS2(    mov     %0,[%3+8*%1])
 
  699    AS2(    sub     %0,[%4+8*%1])
 
  700    AS2(    mov     [%2+8*%1],%0)
 
  702    AS2(    mov     %0,[%3+8*%1+8])
 
  703    AS2(    sbb     %0,[%4+8*%1+8])
 
  704    AS2(    mov     [%2+8*%1+8],%0)
 
  707    AS2(    mov     %0,[%3+8*%1])
 
  708    AS2(    sbb     %0,[%4+8*%1])
 
  709    AS2(    mov     [%2+8*%1],%0)
 
  715    : 
"=&r" (result), 
"+c" (N)
 
  716    : 
"r" (C+N), 
"r" (A+N), 
"r" (B+N)
 
  721#elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86 
  722CRYPTOPP_NAKED 
int CRYPTOPP_FASTCALL Baseline_Add(
size_t N, 
word *C, 
const word *A, 
const word *B)
 
  727    AS2(    lea     eax, [eax+4*ecx])
 
  728    AS2(    lea     edi, [edi+4*ecx])
 
  729    AS2(    lea     edx, [edx+4*ecx])
 
  739    AS2(    mov     esi,[eax+4*ecx])
 
  740    AS2(    adc     esi,[edi+4*ecx])
 
  741    AS2(    mov     [edx+4*ecx],esi)
 
  742    AS2(    mov     esi,[eax+4*ecx+4])
 
  743    AS2(    adc     esi,[edi+4*ecx+4])
 
  744    AS2(    mov     [edx+4*ecx+4],esi)
 
  746    AS2(    mov     esi,[eax+4*ecx+8])
 
  747    AS2(    adc     esi,[edi+4*ecx+8])
 
  748    AS2(    mov     [edx+4*ecx+8],esi)
 
  749    AS2(    mov     esi,[eax+4*ecx+12])
 
  750    AS2(    adc     esi,[edi+4*ecx+12])
 
  751    AS2(    mov     [edx+4*ecx+12],esi)
 
  753    AS2(    lea     ecx,[ecx+4])        
 
  763    CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
 
  764    CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
 
  767CRYPTOPP_NAKED 
int CRYPTOPP_FASTCALL Baseline_Sub(
size_t N, 
word *C, const 
word *A, const 
word *B)
 
  772    AS2(    lea     eax, [eax+4*ecx])
 
  773    AS2(    lea     edi, [edi+4*ecx])
 
  774    AS2(    lea     edx, [edx+4*ecx])
 
  784    AS2(    mov     esi,[eax+4*ecx])
 
  785    AS2(    sbb     esi,[edi+4*ecx])
 
  786    AS2(    mov     [edx+4*ecx],esi)
 
  787    AS2(    mov     esi,[eax+4*ecx+4])
 
  788    AS2(    sbb     esi,[edi+4*ecx+4])
 
  789    AS2(    mov     [edx+4*ecx+4],esi)
 
  791    AS2(    mov     esi,[eax+4*ecx+8])
 
  792    AS2(    sbb     esi,[edi+4*ecx+8])
 
  793    AS2(    mov     [edx+4*ecx+8],esi)
 
  794    AS2(    mov     esi,[eax+4*ecx+12])
 
  795    AS2(    sbb     esi,[edi+4*ecx+12])
 
  796    AS2(    mov     [edx+4*ecx+12],esi)
 
  798    AS2(    lea     ecx,[ecx+4])        
 
  808    CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
 
  809    CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
 
  812#if CRYPTOPP_INTEGER_SSE2 
  813CRYPTOPP_NAKED 
int CRYPTOPP_FASTCALL SSE2_Add(
size_t N, 
word *C, 
const word *A, 
const word *B)
 
  818    AS2(    lea     eax, [eax+4*ecx])
 
  819    AS2(    lea     edi, [edi+4*ecx])
 
  820    AS2(    lea     edx, [edx+4*ecx])
 
  831    AS2(    movd     mm0, DWORD PTR [eax+4*ecx])
 
  832    AS2(    movd     mm1, DWORD PTR [edi+4*ecx])
 
  835    AS2(    movd     DWORD PTR [edx+4*ecx], mm2)
 
  838    AS2(    movd     mm0, DWORD PTR [eax+4*ecx+4])
 
  839    AS2(    movd     mm1, DWORD PTR [edi+4*ecx+4])
 
  842    AS2(    movd     DWORD PTR [edx+4*ecx+4], mm2)
 
  846    AS2(    movd     mm0, DWORD PTR [eax+4*ecx+8])
 
  847    AS2(    movd     mm1, DWORD PTR [edi+4*ecx+8])
 
  850    AS2(    movd     DWORD PTR [edx+4*ecx+8], mm2)
 
  853    AS2(    movd     mm0, DWORD PTR [eax+4*ecx+12])
 
  854    AS2(    movd     mm1, DWORD PTR [edi+4*ecx+12])
 
  857    AS2(    movd     DWORD PTR [edx+4*ecx+12], mm2)
 
  870    CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
 
  871    CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
 
  873CRYPTOPP_NAKED 
int CRYPTOPP_FASTCALL SSE2_Sub(
size_t N, 
word *C, const 
word *A, const 
word *B)
 
  878    AS2(    lea     eax, [eax+4*ecx])
 
  879    AS2(    lea     edi, [edi+4*ecx])
 
  880    AS2(    lea     edx, [edx+4*ecx])
 
  891    AS2(    movd     mm0, DWORD PTR [eax+4*ecx])
 
  892    AS2(    movd     mm1, DWORD PTR [edi+4*ecx])
 
  895    AS2(    movd     DWORD PTR [edx+4*ecx], mm0)
 
  898    AS2(    movd     mm2, DWORD PTR [eax+4*ecx+4])
 
  899    AS2(    movd     mm1, DWORD PTR [edi+4*ecx+4])
 
  902    AS2(    movd     DWORD PTR [edx+4*ecx+4], mm2)
 
  906    AS2(    movd     mm0, DWORD PTR [eax+4*ecx+8])
 
  907    AS2(    movd     mm1, DWORD PTR [edi+4*ecx+8])
 
  910    AS2(    movd     DWORD PTR [edx+4*ecx+8], mm0)
 
  913    AS2(    movd     mm2, DWORD PTR [eax+4*ecx+12])
 
  914    AS2(    movd     mm1, DWORD PTR [edi+4*ecx+12])
 
  917    AS2(    movd     DWORD PTR [edx+4*ecx+12], mm2)
 
  930    CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
 
  931    CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
 
  935int CRYPTOPP_FASTCALL Baseline_Add(
size_t N, 
word *C, 
const word *A, 
const word *B)
 
  941    for (
size_t i=0; i<N; i+=2)
 
  943        AddWithCarry(u, A[i], B[i]);
 
  945        AddWithCarry(u, A[i+1], B[i+1]);
 
  948    return int(GetCarry(u));
 
  951int CRYPTOPP_FASTCALL Baseline_Sub(
size_t N, 
word *C, 
const word *A, 
const word *B)
 
  957    for (
size_t i=0; i<N; i+=2)
 
  959        SubtractWithBorrow(u, A[i], B[i]);
 
  961        SubtractWithBorrow(u, A[i+1], B[i+1]);
 
  964    return int(GetBorrow(u));
 
  974    for(
unsigned i=0; i<N; i++)
 
  977        MultiplyWords(p, A[i], B);
 
  978        Acc2WordsBy1(p, carry);
 
  985#ifndef CRYPTOPP_DOXYGEN_PROCESSING 
  989    Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 
  994    Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 
  995    Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0)  \ 
  996    Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \ 
  997    Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1)  \ 
  998    Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \ 
 1003    Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 
 1004    Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0)  \ 
 1005    Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \ 
 1006    Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ 
 1007    Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ 
 1008    Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ 
 1009    Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ 
 1010    Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \ 
 1011    Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \ 
 1012    Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \ 
 1013    Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ 
 1014    Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ 
 1015    Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \ 
 1020    Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 
 1021    Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ 
 1022    Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 
 1023    Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ 
 1024    Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ 
 1025    Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ 
 1026    Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ 
 1027    Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ 
 1028    Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ 
 1029    Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ 
 1030    Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ 
 1031    Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ 
 1032    Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ 
 1033    Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ 
 1034    Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \ 
 1035    Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \ 
 1036    Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \ 
 1037    Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \ 
 1038    Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \ 
 1039    Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \ 
 1040    Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \ 
 1041    Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \ 
 1042    Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \ 
 1043    Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \ 
 1044    Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \ 
 1045    Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \ 
 1046    Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ 
 1047    Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ 
 1048    Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \ 
 1057    Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ 
 1058    Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ 
 1059    Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \ 
 1060    Squ_SaveAcc(4, 2, 3) Squ_NonDiag \ 
 1065    Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ 
 1066    Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ 
 1067    Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \ 
 1068    Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \ 
 1069    Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \ 
 1070    Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \ 
 1071    Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \ 
 1072    Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5)  Squ_NonDiag \ 
 1073    Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \ 
 1074    Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \ 
 1075    Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \ 
 1076    Squ_SaveAcc(12, 6, 7) Squ_NonDiag \ 
 1081    Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ 
 1082    Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ 
 1083    Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \ 
 1084    Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \ 
 1085    Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \ 
 1086    Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \ 
 1087    Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \ 
 1088    Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \ 
 1089    Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \ 
 1090    Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \ 
 1091    Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \ 
 1092    Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \ 
 1093    Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \ 
 1094    Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \ 
 1095    Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \ 
 1096    Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \ 
 1097    Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \ 
 1098    Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \ 
 1099    Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \ 
 1100    Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \ 
 1101    Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \ 
 1102    Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \ 
 1103    Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \ 
 1104    Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \ 
 1105    Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \ 
 1106    Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \ 
 1107    Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \ 
 1108    Squ_SaveAcc(28, 14, 15) Squ_NonDiag \ 
 1113    Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \ 
 1118    Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 
 1119    Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2)  \ 
 1120    Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0)  \ 
 1125    Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 
 1126    Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0)  \ 
 1127    Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \ 
 1128    Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ 
 1129    Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ 
 1130    Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ 
 1131    Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \ 
 1136    Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 
 1137    Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ 
 1138    Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 
 1139    Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ 
 1140    Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ 
 1141    Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ 
 1142    Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ 
 1143    Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ 
 1144    Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ 
 1145    Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ 
 1146    Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ 
 1147    Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ 
 1148    Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ 
 1149    Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ 
 1150    Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \ 
 1156#define Mul_Begin(n)                \ 
 1160    MultiplyWords(p, A[0], B[0])    \ 
 1161    AssignWord(c, LowWord(p))       \ 
 1162    AssignWord(d, HighWord(p)) 
 1164#define Mul_Acc(i, j)               \ 
 1165    MultiplyWords(p, A[i], B[j])    \ 
 1166    Acc2WordsBy1(c, LowWord(p))     \ 
 1167    Acc2WordsBy1(d, HighWord(p)) 
 1169#define Mul_SaveAcc(k, i, j)        \ 
 1170    R[k] = LowWord(c);              \ 
 1171    Add2WordsBy1(c, d, HighWord(c)) \ 
 1172    MultiplyWords(p, A[i], B[j])    \ 
 1173    AssignWord(d, HighWord(p))      \ 
 1174    Acc2WordsBy1(c, LowWord(p)) 
 1177    R[2*n-3] = LowWord(c);          \ 
 1178    Acc2WordsBy1(d, HighWord(c))    \ 
 1179    MultiplyWords(p, A[n-1], B[n-1])\ 
 1180    Acc2WordsBy2(d, p)              \ 
 1181    R[2*n-2] = LowWord(d);          \ 
 1182    R[2*n-1] = HighWord(d); 
 1184#define Bot_SaveAcc(k, i, j)        \ 
 1185    R[k] = LowWord(c);              \ 
 1186    word e = LowWord(d) + HighWord(c);  \ 
 1189#define Bot_Acc(i, j)   \ 
 1195#define Mul_Begin(n)                \ 
 1199    MultiplyWords(p, A[0], B[0])    \ 
 1201    AssignWord(d, HighWord(p)) 
 1203#define Mul_Acc(i, j)               \ 
 1204    MulAcc(c, d, A[i], B[j]) 
 1206#define Mul_SaveAcc(k, i, j)        \ 
 1209    AssignWord(d, HighWord(d))  \ 
 1210    MulAcc(c, d, A[i], B[j]) 
 1212#define Mul_End(k, i)                   \ 
 1214    MultiplyWords(p, A[i], B[i])    \ 
 1215    Acc2WordsBy2(p, d)              \ 
 1216    R[k+1] = LowWord(p);            \ 
 1217    R[k+2] = HighWord(p); 
 1219#define Bot_SaveAcc(k, i, j)        \ 
 1224#define Bot_Acc(i, j)   \ 
 1231#define Squ_Begin(n)                \ 
 1236    MultiplyWords(p, A[0], A[0])    \ 
 1237    R[0] = LowWord(p);              \ 
 1238    AssignWord(e, HighWord(p))      \ 
 1239    MultiplyWords(p, A[0], A[1])    \ 
 1241    AssignWord(d, HighWord(p))      \ 
 1244#define Squ_NonDiag             \ 
 1247#define Squ_SaveAcc(k, i, j)        \ 
 1248    Acc3WordsBy2(c, d, e)           \ 
 1250    MultiplyWords(p, A[i], A[j])    \ 
 1252    AssignWord(d, HighWord(p))      \ 
 1254#define Squ_Acc(i, j)               \ 
 1255    MulAcc(c, d, A[i], A[j]) 
 1257#define Squ_Diag(i)                 \ 
 1259    MulAcc(c, d, A[i], A[i]) 
 1262    Acc3WordsBy2(c, d, e)           \ 
 1264    MultiplyWords(p, A[n-1], A[n-1])\ 
 1265    Acc2WordsBy2(p, e)              \ 
 1266    R[2*n-2] = LowWord(p);          \ 
 1267    R[2*n-1] = HighWord(p); 
 1270void Baseline_Multiply2(
word *R, 
const word *AA, 
const word *BB)
 
 1279void Baseline_Multiply4(
word *R, 
const word *AA, 
const word *BB)
 
 1288void Baseline_Multiply8(
word *R, 
const word *AA, 
const word *BB)
 
 1297void Baseline_Square2(
word *R, 
const word *AA)
 
 1305void Baseline_Square4(
word *R, 
const word *AA)
 
 1313void Baseline_Square8(
word *R, 
const word *AA)
 
 1321void Baseline_MultiplyBottom2(
word *R, 
const word *AA, 
const word *BB)
 
 1330#if defined(TWO_64_BIT_WORDS) 
 1331    CRYPTOPP_UNUSED(d0); CRYPTOPP_UNUSED(d1);
 
 1335void Baseline_MultiplyBottom4(
word *R, 
const word *AA, 
const word *BB)
 
 1344void Baseline_MultiplyBottom8(
word *R, 
const word *AA, 
const word *BB)
 
 1353#define Top_Begin(n)                \ 
 1357    MultiplyWords(p, A[0], B[n-2]);\ 
 1358    AssignWord(d, HighWord(p)); 
 1360#define Top_Acc(i, j)   \ 
 1361    MultiplyWords(p, A[i], B[j]);\ 
 1362    Acc2WordsBy1(d, HighWord(p)); 
 1364#define Top_SaveAcc0(i, j)      \ 
 1366    AssignWord(d, HighWord(d))  \ 
 1367    MulAcc(c, d, A[i], B[j]) 
 1369#define Top_SaveAcc1(i, j)      \ 
 1371    Acc2WordsBy1(d, c); \ 
 1373    AssignWord(d, HighWord(d))  \ 
 1374    MulAcc(c, d, A[i], B[j]) 
 1380    Baseline_Multiply2(T, A, B);
 
 1392    Top_Acc(1, 1) Top_Acc(2, 0)  \
 
 1393    Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
 
 1394    Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1)  \
 
 1395    Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
 
 1406    Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
 
 1407    Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
 
 1408    Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
 
 1409    Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
 
 1410    Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
 
 1411    Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
 
 1412    Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
 
 1413    Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
 
 1417#if !CRYPTOPP_INTEGER_SSE2   
 1418void Baseline_Multiply16(
word *R, 
const word *AA, 
const word *BB)
 
 1427void Baseline_Square16(
word *R, 
const word *AA)
 
 1435void Baseline_MultiplyBottom16(
word *R, 
const word *AA, 
const word *BB)
 
 1444void Baseline_MultiplyTop16(
word *R, 
const word *AA, 
const word *BB, 
word L)
 
 1451    Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
 
 1452    Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
 
 1453    Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
 
 1454    Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
 
 1455    Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
 
 1456    Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
 
 1457    Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
 
 1458    Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
 
 1459    Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
 
 1460    Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
 
 1461    Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
 
 1462    Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
 
 1463    Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
 
 1464    Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
 
 1465    Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
 
 1466    Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
 
 1473#if CRYPTOPP_INTEGER_SSE2 
 1475CRYPTOPP_ALIGN_DATA(16)
 
 1477const 
word32 s_maskLow16[4] = {
 
 1478    0xffff,0xffff,0xffff,0xffff
 
 1497#define SSE2_FinalSave(k)           \ 
 1498    AS2(    psllq       xmm5, 16)   \ 
 1499    AS2(    paddq       xmm4, xmm5) \ 
 1500    AS2(    movq        QWORD PTR [ecx+8*(k)], xmm4) 
 1502#define SSE2_SaveShift(k)           \ 
 1503    AS2(    movq        xmm0, xmm6) \ 
 1504    AS2(    punpckhqdq  xmm6, xmm0) \ 
 1505    AS2(    movq        xmm1, xmm7) \ 
 1506    AS2(    punpckhqdq  xmm7, xmm1) \ 
 1507    AS2(    paddd       xmm6, xmm0) \ 
 1508    AS2(    pslldq      xmm6, 4)    \ 
 1509    AS2(    paddd       xmm7, xmm1) \ 
 1510    AS2(    paddd       xmm4, xmm6) \ 
 1511    AS2(    pslldq      xmm7, 4)    \ 
 1512    AS2(    movq        xmm6, xmm4) \ 
 1513    AS2(    paddd       xmm5, xmm7) \ 
 1514    AS2(    movq        xmm7, xmm5) \ 
 1515    AS2(    movd        DWORD PTR [ecx+8*(k)], xmm4)    \ 
 1516    AS2(    psrlq       xmm6, 16)   \ 
 1517    AS2(    paddq       xmm6, xmm7) \ 
 1518    AS2(    punpckhqdq  xmm4, xmm0) \ 
 1519    AS2(    punpckhqdq  xmm5, xmm0) \ 
 1520    AS2(    movq        QWORD PTR [ecx+8*(k)+2], xmm6)  \ 
 1521    AS2(    psrlq       xmm6, 3*16) \ 
 1522    AS2(    paddd       xmm4, xmm6) \ 
 1524#define Squ_SSE2_SaveShift(k)           \ 
 1525    AS2(    movq        xmm0, xmm6) \ 
 1526    AS2(    punpckhqdq  xmm6, xmm0) \ 
 1527    AS2(    movq        xmm1, xmm7) \ 
 1528    AS2(    punpckhqdq  xmm7, xmm1) \ 
 1529    AS2(    paddd       xmm6, xmm0) \ 
 1530    AS2(    pslldq      xmm6, 4)    \ 
 1531    AS2(    paddd       xmm7, xmm1) \ 
 1532    AS2(    paddd       xmm4, xmm6) \ 
 1533    AS2(    pslldq      xmm7, 4)    \ 
 1534    AS2(    movhlps     xmm6, xmm4) \ 
 1535    AS2(    movd        DWORD PTR [ecx+8*(k)], xmm4)    \ 
 1536    AS2(    paddd       xmm5, xmm7) \ 
 1537    AS2(    movhps      QWORD PTR [esp+12], xmm5)\ 
 1538    AS2(    psrlq       xmm4, 16)   \ 
 1539    AS2(    paddq       xmm4, xmm5) \ 
 1540    AS2(    movq        QWORD PTR [ecx+8*(k)+2], xmm4)  \ 
 1541    AS2(    psrlq       xmm4, 3*16) \ 
 1542    AS2(    paddd       xmm4, xmm6) \ 
 1543    AS2(    movq        QWORD PTR [esp+4], xmm4)\ 
 1545#define SSE2_FirstMultiply(i)               \ 
 1546    AS2(    movdqa      xmm7, [esi+(i)*16])\ 
 1547    AS2(    movdqa      xmm5, [edi-(i)*16])\ 
 1548    AS2(    pmuludq     xmm5, xmm7)     \ 
 1549    AS2(    movdqa      xmm4, [ebx])\ 
 1550    AS2(    movdqa      xmm6, xmm4)     \ 
 1551    AS2(    pand        xmm4, xmm5)     \ 
 1552    AS2(    psrld       xmm5, 16)       \ 
 1553    AS2(    pmuludq     xmm7, [edx-(i)*16])\ 
 1554    AS2(    pand        xmm6, xmm7)     \ 
 1555    AS2(    psrld       xmm7, 16) 
 1557#define Squ_Begin(n)                            \ 
 1560    AS2(    and     esp, 0xfffffff0)\ 
 1561    AS2(    lea     edi, [esp-32*n])\ 
 1562    AS2(    sub     esp, 32*n+16)\ 
 1564    AS2(    mov     esi, edi)                   \ 
 1565    AS2(    xor     edx, edx)                   \ 
 1567    ASS(    pshufd  xmm0, [eax+edx], 3,1,2,0)   \ 
 1568    ASS(    pshufd  xmm1, [eax+edx], 2,0,3,1)   \ 
 1569    AS2(    movdqa  [edi+2*edx], xmm0)      \ 
 1570    AS2(    psrlq   xmm0, 32)                   \ 
 1571    AS2(    movdqa  [edi+2*edx+16], xmm0)   \ 
 1572    AS2(    movdqa  [edi+16*n+2*edx], xmm1)     \ 
 1573    AS2(    psrlq   xmm1, 32)                   \ 
 1574    AS2(    movdqa  [edi+16*n+2*edx+16], xmm1)  \ 
 1576    AS2(    cmp     edx, 8*(n))                 \ 
 1578    AS2(    lea     edx, [edi+16*n])\ 
 1579    SSE2_FirstMultiply(0)                           \ 
 1583    AS2(    movdqa      xmm1, [esi+(i)*16]) \ 
 1584    AS2(    movdqa      xmm0, [edi-(i)*16]) \ 
 1585    AS2(    movdqa      xmm2, [ebx])    \ 
 1586    AS2(    pmuludq     xmm0, xmm1)             \ 
 1587    AS2(    pmuludq     xmm1, [edx-(i)*16]) \ 
 1588    AS2(    movdqa      xmm3, xmm2)         \ 
 1589    AS2(    pand        xmm2, xmm0)         \ 
 1590    AS2(    psrld       xmm0, 16)           \ 
 1591    AS2(    paddd       xmm4, xmm2)         \ 
 1592    AS2(    paddd       xmm5, xmm0)         \ 
 1593    AS2(    pand        xmm3, xmm1)         \ 
 1594    AS2(    psrld       xmm1, 16)           \ 
 1595    AS2(    paddd       xmm6, xmm3)         \ 
 1596    AS2(    paddd       xmm7, xmm1)     \ 
 1599#define Squ_Acc2(i)     ASC(call, LSqu##i) 
 1600#define Squ_Acc3(i)     Squ_Acc2(i) 
 1601#define Squ_Acc4(i)     Squ_Acc2(i) 
 1602#define Squ_Acc5(i)     Squ_Acc2(i) 
 1603#define Squ_Acc6(i)     Squ_Acc2(i) 
 1604#define Squ_Acc7(i)     Squ_Acc2(i) 
 1605#define Squ_Acc8(i)     Squ_Acc2(i) 
 1607#define SSE2_End(E, n)                  \ 
 1608    SSE2_SaveShift(2*(n)-3)         \ 
 1609    AS2(    movdqa      xmm7, [esi+16]) \ 
 1610    AS2(    movdqa      xmm0, [edi])    \ 
 1611    AS2(    pmuludq     xmm0, xmm7)             \ 
 1612    AS2(    movdqa      xmm2, [ebx])        \ 
 1613    AS2(    pmuludq     xmm7, [edx])    \ 
 1614    AS2(    movdqa      xmm6, xmm2)             \ 
 1615    AS2(    pand        xmm2, xmm0)             \ 
 1616    AS2(    psrld       xmm0, 16)               \ 
 1617    AS2(    paddd       xmm4, xmm2)             \ 
 1618    AS2(    paddd       xmm5, xmm0)             \ 
 1619    AS2(    pand        xmm6, xmm7)             \ 
 1620    AS2(    psrld       xmm7, 16)   \ 
 1621    SSE2_SaveShift(2*(n)-2)         \ 
 1622    SSE2_FinalSave(2*(n)-1)         \ 
 1626#define Squ_End(n)      SSE2_End(SquEpilogue, n) 
 1627#define Mul_End(n)      SSE2_End(MulEpilogue, n) 
 1628#define Top_End(n)      SSE2_End(TopEpilogue, n) 
 1630#define Squ_Column1(k, i)   \ 
 1631    Squ_SSE2_SaveShift(k)                   \ 
 1633    SSE2_FirstMultiply(1)\ 
 1635    AS2(    paddd       xmm4, xmm4)     \ 
 1636    AS2(    paddd       xmm5, xmm5)     \ 
 1637    AS2(    movdqa      xmm3, [esi])                \ 
 1638    AS2(    movq        xmm1, QWORD PTR [esi+8])    \ 
 1639    AS2(    pmuludq     xmm1, xmm3)     \ 
 1640    AS2(    pmuludq     xmm3, xmm3)     \ 
 1641    AS2(    movdqa      xmm0, [ebx])\ 
 1642    AS2(    movdqa      xmm2, xmm0)     \ 
 1643    AS2(    pand        xmm0, xmm1)     \ 
 1644    AS2(    psrld       xmm1, 16)       \ 
 1645    AS2(    paddd       xmm6, xmm0)     \ 
 1646    AS2(    paddd       xmm7, xmm1)     \ 
 1647    AS2(    pand        xmm2, xmm3)     \ 
 1648    AS2(    psrld       xmm3, 16)       \ 
 1649    AS2(    paddd       xmm6, xmm6)     \ 
 1650    AS2(    paddd       xmm7, xmm7)     \ 
 1651    AS2(    paddd       xmm4, xmm2)     \ 
 1652    AS2(    paddd       xmm5, xmm3)     \ 
 1653    AS2(    movq        xmm0, QWORD PTR [esp+4])\ 
 1654    AS2(    movq        xmm1, QWORD PTR [esp+12])\ 
 1655    AS2(    paddd       xmm4, xmm0)\ 
 1656    AS2(    paddd       xmm5, xmm1)\ 
 1658#define Squ_Column0(k, i)   \ 
 1659    Squ_SSE2_SaveShift(k)                   \ 
 1662    SSE2_FirstMultiply(1)\ 
 1664    AS2(    paddd       xmm6, xmm6)     \ 
 1665    AS2(    paddd       xmm7, xmm7)     \ 
 1666    AS2(    paddd       xmm4, xmm4)     \ 
 1667    AS2(    paddd       xmm5, xmm5)     \ 
 1668    AS2(    movq        xmm0, QWORD PTR [esp+4])\ 
 1669    AS2(    movq        xmm1, QWORD PTR [esp+12])\ 
 1670    AS2(    paddd       xmm4, xmm0)\ 
 1671    AS2(    paddd       xmm5, xmm1)\ 
 1673#define SSE2_MulAdd45                       \ 
 1674    AS2(    movdqa      xmm7, [esi])    \ 
 1675    AS2(    movdqa      xmm0, [edi])    \ 
 1676    AS2(    pmuludq     xmm0, xmm7)             \ 
 1677    AS2(    movdqa      xmm2, [ebx])        \ 
 1678    AS2(    pmuludq     xmm7, [edx])    \ 
 1679    AS2(    movdqa      xmm6, xmm2)             \ 
 1680    AS2(    pand        xmm2, xmm0)             \ 
 1681    AS2(    psrld       xmm0, 16)               \ 
 1682    AS2(    paddd       xmm4, xmm2)             \ 
 1683    AS2(    paddd       xmm5, xmm0)             \ 
 1684    AS2(    pand        xmm6, xmm7)             \ 
 1685    AS2(    psrld       xmm7, 16) 
 1687#define Mul_Begin(n)                            \ 
 1690    AS2(    and     esp, 0xfffffff0)\ 
 1691    AS2(    sub     esp, 48*n+16)\ 
 1693    AS2(    xor     edx, edx)                   \ 
 1695    ASS(    pshufd  xmm0, [eax+edx], 3,1,2,0)   \ 
 1696    ASS(    pshufd  xmm1, [eax+edx], 2,0,3,1)   \ 
 1697    ASS(    pshufd  xmm2, [edi+edx], 3,1,2,0)   \ 
 1698    AS2(    movdqa  [esp+20+2*edx], xmm0)       \ 
 1699    AS2(    psrlq   xmm0, 32)                   \ 
 1700    AS2(    movdqa  [esp+20+2*edx+16], xmm0)    \ 
 1701    AS2(    movdqa  [esp+20+16*n+2*edx], xmm1)      \ 
 1702    AS2(    psrlq   xmm1, 32)                   \ 
 1703    AS2(    movdqa  [esp+20+16*n+2*edx+16], xmm1)   \ 
 1704    AS2(    movdqa  [esp+20+32*n+2*edx], xmm2)      \ 
 1705    AS2(    psrlq   xmm2, 32)                   \ 
 1706    AS2(    movdqa  [esp+20+32*n+2*edx+16], xmm2)   \ 
 1708    AS2(    cmp     edx, 8*(n))                 \ 
 1710    AS2(    lea     edi, [esp+20])\ 
 1711    AS2(    lea     edx, [esp+20+16*n])\ 
 1712    AS2(    lea     esi, [esp+20+32*n])\ 
 1713    SSE2_FirstMultiply(0)                           \ 
 1717    AS2(    movdqa      xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16])   \ 
 1718    AS2(    movdqa      xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16])   \ 
 1719    AS2(    movdqa      xmm2, [ebx])    \ 
 1720    AS2(    pmuludq     xmm0, xmm1)             \ 
 1721    AS2(    pmuludq     xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16])   \ 
 1722    AS2(    movdqa      xmm3, xmm2)         \ 
 1723    AS2(    pand        xmm2, xmm0)         \ 
 1724    AS2(    psrld       xmm0, 16)           \ 
 1725    AS2(    paddd       xmm4, xmm2)         \ 
 1726    AS2(    paddd       xmm5, xmm0)         \ 
 1727    AS2(    pand        xmm3, xmm1)         \ 
 1728    AS2(    psrld       xmm1, 16)           \ 
 1729    AS2(    paddd       xmm6, xmm3)         \ 
 1730    AS2(    paddd       xmm7, xmm1)     \ 
 1733#define Mul_Acc2(i)     ASC(call, LMul##i) 
 1734#define Mul_Acc3(i)     Mul_Acc2(i) 
 1735#define Mul_Acc4(i)     Mul_Acc2(i) 
 1736#define Mul_Acc5(i)     Mul_Acc2(i) 
 1737#define Mul_Acc6(i)     Mul_Acc2(i) 
 1738#define Mul_Acc7(i)     Mul_Acc2(i) 
 1739#define Mul_Acc8(i)     Mul_Acc2(i) 
 1740#define Mul_Acc9(i)     Mul_Acc2(i) 
 1741#define Mul_Acc10(i)    Mul_Acc2(i) 
 1742#define Mul_Acc11(i)    Mul_Acc2(i) 
 1743#define Mul_Acc12(i)    Mul_Acc2(i) 
 1744#define Mul_Acc13(i)    Mul_Acc2(i) 
 1745#define Mul_Acc14(i)    Mul_Acc2(i) 
 1746#define Mul_Acc15(i)    Mul_Acc2(i) 
 1747#define Mul_Acc16(i)    Mul_Acc2(i) 
 1749#define Mul_Column1(k, i)   \ 
 1755#define Mul_Column0(k, i)   \ 
 1763    AS2(    movdqa      xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16])   \ 
 1764    AS2(    movdqa      xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16])   \ 
 1765    AS2(    pmuludq     xmm0, xmm1)             \ 
 1766    AS2(    pmuludq     xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16])       \ 
 1767    AS2(    paddq       xmm4, xmm0)             \ 
 1768    AS2(    paddd       xmm6, xmm1) 
 1770#define Bot_SaveAcc(k)                  \ 
 1774    AS2(    movdqa      xmm6, [esi])    \ 
 1775    AS2(    movdqa      xmm0, [edi])    \ 
 1776    AS2(    pmuludq     xmm0, xmm6)             \ 
 1777    AS2(    paddq       xmm4, xmm0)             \ 
 1778    AS2(    psllq       xmm5, 16)               \ 
 1779    AS2(    paddq       xmm4, xmm5)             \ 
 1780    AS2(    pmuludq     xmm6, [edx]) 
 1783    AS2(    movhlps     xmm7, xmm6)         \ 
 1784    AS2(    paddd       xmm6, xmm7)         \ 
 1785    AS2(    psllq       xmm6, 32)           \ 
 1786    AS2(    paddd       xmm4, xmm6)         \ 
 1787    AS2(    movq        QWORD PTR [ecx+8*((n)-1)], xmm4)    \ 
 1791#define Top_Begin(n)                            \ 
 1794    AS2(    and     esp, 0xfffffff0)\ 
 1795    AS2(    sub     esp, 48*n+16)\ 
 1797    AS2(    xor     edx, edx)                   \ 
 1799    ASS(    pshufd  xmm0, [eax+edx], 3,1,2,0)   \ 
 1800    ASS(    pshufd  xmm1, [eax+edx], 2,0,3,1)   \ 
 1801    ASS(    pshufd  xmm2, [edi+edx], 3,1,2,0)   \ 
 1802    AS2(    movdqa  [esp+20+2*edx], xmm0)       \ 
 1803    AS2(    psrlq   xmm0, 32)                   \ 
 1804    AS2(    movdqa  [esp+20+2*edx+16], xmm0)    \ 
 1805    AS2(    movdqa  [esp+20+16*n+2*edx], xmm1)      \ 
 1806    AS2(    psrlq   xmm1, 32)                   \ 
 1807    AS2(    movdqa  [esp+20+16*n+2*edx+16], xmm1)   \ 
 1808    AS2(    movdqa  [esp+20+32*n+2*edx], xmm2)      \ 
 1809    AS2(    psrlq   xmm2, 32)                   \ 
 1810    AS2(    movdqa  [esp+20+32*n+2*edx+16], xmm2)   \ 
 1812    AS2(    cmp     edx, 8*(n))                 \ 
 1814    AS2(    mov     eax, esi)                   \ 
 1815    AS2(    lea     edi, [esp+20+00*n+16*(n/2-1)])\ 
 1816    AS2(    lea     edx, [esp+20+16*n+16*(n/2-1)])\ 
 1817    AS2(    lea     esi, [esp+20+32*n+16*(n/2-1)])\ 
 1818    AS2(    pxor    xmm4, xmm4)\ 
 1819    AS2(    pxor    xmm5, xmm5) 
 1822    AS2(    movq        xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8])   \ 
 1823    AS2(    pmuludq     xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16])   \ 
 1824    AS2(    psrlq       xmm0, 48)               \ 
 1825    AS2(    paddd       xmm5, xmm0)\ 
 1827#define Top_Column0(i)  \ 
 1828    AS2(    psllq       xmm5, 32)               \ 
 1834#define Top_Column1(i)  \ 
 1840    AS2(    movd        xmm0, eax)\ 
 1841    AS2(    movd        xmm1, [ecx+4])\ 
 1842    AS2(    psrld       xmm1, 16)\ 
 1843    AS2(    pcmpgtd     xmm1, xmm0)\ 
 1844    AS2(    psrld       xmm1, 31)\ 
 1845    AS2(    paddd       xmm4, xmm1)\ 
 1847void SSE2_Square4(
word *C, 
const word *A)
 
 1854void SSE2_Square8(
word *C, const 
word *A)
 
 1870void SSE2_Square16(
word *C, const 
word *A)
 
 1875    Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
 
 1894void SSE2_Square32(
word *C, const 
word *A)
 
 1898    Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
 
 1932void SSE2_Multiply4(
word *C, const 
word *A, const 
word *B)
 
 1944void SSE2_Multiply8(
word *C, const 
word *A, const 
word *B)
 
 1949    Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
 
 1960void SSE2_Multiply16(
word *C, const 
word *A, const 
word *B)
 
 1965    Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
 
 1984void SSE2_Multiply32(
word *C, const 
word *A, const 
word *B)
 
 1988    Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
 
 2022void SSE2_MultiplyBottom4(
word *C, const 
word *A, const 
word *B)
 
 2025    Bot_SaveAcc(0) Bot_Acc(2)
 
 2029void SSE2_MultiplyBottom8(
word *C, const 
word *A, const 
word *B)
 
 2034    Mul_Acc(3) Mul_Acc(2)
 
 2039    Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
 
 2043void SSE2_MultiplyBottom16(
word *C, const 
word *A, const 
word *B)
 
 2048    Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
 
 2057    Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
 
 2061void SSE2_MultiplyBottom32(
word *C, const 
word *A, const 
word *B)
 
 2066    Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
 
 2083    Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
 
 2090    Top_Acc(3) Top_Acc(2) Top_Acc(1)
 
 2093    Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
 
 2105    Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
 
 2108    Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
 
 2124    Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
 
 2127    Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
 
 2152typedef int (CRYPTOPP_FASTCALL * PAdd)(
size_t N, 
word *C, 
const word *A, 
const word *B);
 
 2153typedef void (* PMul)(
word *C, 
const word *A, 
const word *B);
 
 2154typedef void (* PSqu)(
word *C, 
const word *A);
 
 2157#if CRYPTOPP_INTEGER_SSE2 
 2158static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
 
 2159static size_t s_recursionLimit = 8;
 
 2161static const size_t s_recursionLimit = 16;
 
 2164static PMul s_pMul[9], s_pBot[9];
 
 2165static PSqu s_pSqu[9];
 
 2166static PMulTop s_pTop[9];
 
 2168void SetFunctionPointers()
 
 2170    s_pMul[0] = &Baseline_Multiply2;
 
 2171    s_pBot[0] = &Baseline_MultiplyBottom2;
 
 2172    s_pSqu[0] = &Baseline_Square2;
 
 2173    s_pTop[0] = &Baseline_MultiplyTop2;
 
 2174    s_pTop[1] = &Baseline_MultiplyTop4;
 
 2176#if CRYPTOPP_INTEGER_SSE2 
 2185        s_recursionLimit = 32;
 
 2187        s_pMul[1] = &SSE2_Multiply4;
 
 2188        s_pMul[2] = &SSE2_Multiply8;
 
 2189        s_pMul[4] = &SSE2_Multiply16;
 
 2190        s_pMul[8] = &SSE2_Multiply32;
 
 2192        s_pBot[1] = &SSE2_MultiplyBottom4;
 
 2193        s_pBot[2] = &SSE2_MultiplyBottom8;
 
 2194        s_pBot[4] = &SSE2_MultiplyBottom16;
 
 2195        s_pBot[8] = &SSE2_MultiplyBottom32;
 
 2197        s_pSqu[1] = &SSE2_Square4;
 
 2198        s_pSqu[2] = &SSE2_Square8;
 
 2199        s_pSqu[4] = &SSE2_Square16;
 
 2200        s_pSqu[8] = &SSE2_Square32;
 
 2202        s_pTop[2] = &SSE2_MultiplyTop8;
 
 2203        s_pTop[4] = &SSE2_MultiplyTop16;
 
 2204        s_pTop[8] = &SSE2_MultiplyTop32;
 
 2209        s_pMul[1] = &Baseline_Multiply4;
 
 2210        s_pMul[2] = &Baseline_Multiply8;
 
 2212        s_pBot[1] = &Baseline_MultiplyBottom4;
 
 2213        s_pBot[2] = &Baseline_MultiplyBottom8;
 
 2215        s_pSqu[1] = &Baseline_Square4;
 
 2216        s_pSqu[2] = &Baseline_Square8;
 
 2218        s_pTop[2] = &Baseline_MultiplyTop8;
 
 2220#if !CRYPTOPP_INTEGER_SSE2 
 2221        s_pMul[4] = &Baseline_Multiply16;
 
 2222        s_pBot[4] = &Baseline_MultiplyBottom16;
 
 2223        s_pSqu[4] = &Baseline_Square16;
 
 2224        s_pTop[4] = &Baseline_MultiplyTop16;
 
 2229inline int Add(
word *C, 
const word *A, 
const word *B, 
size_t N)
 
 2231#if CRYPTOPP_INTEGER_SSE2 
 2232    return s_pAdd(N, C, A, B);
 
 2234    return Baseline_Add(N, C, A, B);
 
 2238inline int Subtract(
word *C, 
const word *A, 
const word *B, 
size_t N)
 
 2240#if CRYPTOPP_INTEGER_SSE2 
 2241    return s_pSub(N, C, A, B);
 
 2243    return Baseline_Sub(N, C, A, B);
 
 2270void RecursiveMultiply(
word *R, 
word *T, 
const word *A, 
const word *B, 
size_t N)
 
 2274    if (N <= s_recursionLimit)
 
 2275        s_pMul[N/4](R, A, B);
 
 2278        const size_t N2 = N/2;
 
 2280        size_t AN2 = Compare(A0, A1, N2) > 0 ?  0 : N2;
 
 2281        Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
 
 2283        size_t BN2 = Compare(B0, B1, N2) > 0 ?  0 : N2;
 
 2284        Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
 
 2286        RecursiveMultiply(R2, T2, A1, B1, N2);
 
 2287        RecursiveMultiply(T0, T2, R0, R1, N2);
 
 2288        RecursiveMultiply(R0, T2, A0, B0, N2);
 
 2292        int c2 = Add(R2, R2, R1, N2);
 
 2294        c2 += Add(R1, R2, R0, N2);
 
 2295        c3 += Add(R2, R2, R3, N2);
 
 2298            c3 -= Subtract(R1, R1, T0, N);
 
 2300            c3 += Add(R1, R1, T0, N);
 
 2302        c3 += Increment(R2, N2, c2);
 
 2304        Increment(R3, N2, c3);
 
 2312void RecursiveSquare(
word *R, 
word *T, 
const word *A, 
size_t N)
 
 2316    if (N <= s_recursionLimit)
 
 2320        const size_t N2 = N/2;
 
 2322        RecursiveSquare(R0, T2, A0, N2);
 
 2323        RecursiveSquare(R2, T2, A1, N2);
 
 2324        RecursiveMultiply(T0, T2, A0, A1, N2);
 
 2326        int carry = Add(R1, R1, T0, N);
 
 2327        carry += Add(R1, R1, T0, N);
 
 2328        Increment(R3, N2, carry);
 
 2337void RecursiveMultiplyBottom(
word *R, 
word *T, 
const word *A, 
const word *B, 
size_t N)
 
 2341    if (N <= s_recursionLimit)
 
 2342        s_pBot[N/4](R, A, B);
 
 2345        const size_t N2 = N/2;
 
 2347        RecursiveMultiply(R, T, A0, B0, N2);
 
 2348        RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
 
 2349        Add(R1, R1, T0, N2);
 
 2350        RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
 
 2351        Add(R1, R1, T0, N2);
 
 2365    if (N <= s_recursionLimit)
 
 2366        s_pTop[N/4](R, A, B, L[N-1]);
 
 2369        const size_t N2 = N/2;
 
 2371        size_t AN2 = Compare(A0, A1, N2) > 0 ?  0 : N2;
 
 2372        Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
 
 2374        size_t BN2 = Compare(B0, B1, N2) > 0 ?  0 : N2;
 
 2375        Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
 
 2377        RecursiveMultiply(T0, T2, R0, R1, N2);
 
 2378        RecursiveMultiply(R0, T2, A1, B1, N2);
 
 2383        int c2 = Subtract(T2, L+N2, L, N2);
 
 2387            c2 -= Add(T2, T2, T0, N2);
 
 2388            t = (Compare(T2, R0, N2) == -1);
 
 2389            c3 = t - Subtract(T2, T2, T1, N2);
 
 2393            c2 += Subtract(T2, T2, T0, N2);
 
 2394            t = (Compare(T2, R0, N2) == -1);
 
 2395            c3 = t + Add(T2, T2, T1, N2);
 
 2400            c3 += Increment(T2, N2, c2);
 
 2402            c3 -= Decrement(T2, N2, -c2);
 
 2403        c3 += Add(R0, T2, R1, N2);
 
 2406        Increment(R1, N2, c3);
 
 2410inline void Multiply(
word *R, 
word *T, 
const word *A, 
const word *B, 
size_t N)
 
 2412    RecursiveMultiply(R, T, A, B, N);
 
 2417    RecursiveSquare(R, T, A, N);
 
 2420inline void MultiplyBottom(
word *R, 
word *T, 
const word *A, 
const word *B, 
size_t N)
 
 2422    RecursiveMultiplyBottom(R, T, A, B, N);
 
 2430void AsymmetricMultiply(
word *R, 
word *T, 
const word *A, 
size_t NA, 
const word *B, 
size_t NB)
 
 2436            Multiply(R, T, A, B, NA);
 
 2457            R[NB] = LinearMultiply(R, B, A[0], NB);
 
 2465            R[NB] = R[NB+1] = 0;
 
 2473        Multiply(R, T, A, B, NA);
 
 2476        for (i=2*NA; i<NB; i+=2*NA)
 
 2477            Multiply(T+NA+i, T, A, B+i, NA);
 
 2478        for (i=NA; i<NB; i+=2*NA)
 
 2479            Multiply(R+i, T, A, B+i, NA);
 
 2483        for (i=0; i<NB; i+=2*NA)
 
 2484            Multiply(R+i, T, A, B+i, NA);
 
 2485        for (i=NA; i<NB; i+=2*NA)
 
 2486            Multiply(T+NA+i, T, A, B+i, NA);
 
 2489    if (Add(R+NA, R+NA, T+2*NA, NB-NA))
 
 2490        Increment(R+NB, NA);
 
 2497void RecursiveInverseModPower2(
word *R, 
word *T, 
const word *A, 
size_t N)
 
 2502        const size_t N2 = N/2;
 
 2503        RecursiveInverseModPower2(R0, T0, A0, N2);
 
 2506        MultiplyTop(R1, T1, T0, R0, A0, N2);
 
 2507        MultiplyBottom(T0, T1, R0, A1, N2);
 
 2508        Add(T0, R1, T0, N2);
 
 2509        TwosComplement(T0, N2);
 
 2510        MultiplyBottom(R1, T1, R0, T0, N2);
 
 2514        T[0] = AtomicInverseModPower2(A[0]);
 
 2516        s_pBot[0](T+2, T, A);
 
 2517        TwosComplement(T+2, 2);
 
 2518        Increment(T+2, 2, 2);
 
 2519        s_pBot[0](R, T, T+2);
 
 2532    MultiplyBottom(R, T, X, U, N);
 
 2533    MultiplyTop(T, T+N, X, R, M, N);
 
 2534    word borrow = Subtract(T, X+N, T, N);
 
 2536    word carry = Add(T+N, T, M, N);
 
 2538    CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
 
 2541    const word u = 0-U[0];
 
 2543    for (
size_t i=0; i<N; i++)
 
 2545        const word t = u * X[i];
 
 2547        for (
size_t j=0; j<N; j+=2)
 
 2549            MultiplyWords(p, t, M[j]);
 
 2550            Acc2WordsBy1(p, X[i+j]);
 
 2552            X[i+j] = LowWord(p);
 
 2554            MultiplyWords(p, t, M[j+1]);
 
 2555            Acc2WordsBy1(p, X[i+j+1]);
 
 2557            X[i+j+1] = LowWord(p);
 
 2561        if (Increment(X+N+i, N-i, c))
 
 2562            while (!Subtract(X+N, X+N, M, N)) {}
 
 2567    __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
 
 2568    for (
size_t i=0; i<N; i++)
 
 2570        __m64 t = _mm_cvtsi32_si64(X[i]);
 
 2571        t = _mm_mul_su32(t, u);
 
 2572        __m64 c = _mm_setzero_si64();
 
 2573        for (
size_t j=0; j<N; j+=2)
 
 2575            p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
 
 2576            p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
 
 2577            c = _mm_add_si64(c, p);
 
 2578            X[i+j] = _mm_cvtsi64_si32(c);
 
 2579            c = _mm_srli_si64(c, 32);
 
 2580            p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
 
 2581            p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
 
 2582            c = _mm_add_si64(c, p);
 
 2583            X[i+j+1] = _mm_cvtsi64_si32(c);
 
 2584            c = _mm_srli_si64(c, 32);
 
 2587        if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
 
 2588            while (!Subtract(X+N, X+N, M, N)) {}
 
 2617    const size_t N2 = N/2;
 
 2618    Multiply(T0, T2, V0, X3, N2);
 
 2619    int c2 = Add(T0, T0, X0, N);
 
 2620    MultiplyBottom(T3, T2, T0, U, N2);
 
 2621    MultiplyTop(T2, R, T0, T3, M0, N2);
 
 2622    c2 -= Subtract(T2, T1, T2, N2);
 
 2623    Multiply(T0, R, T3, M1, N2);
 
 2624    c2 -= Subtract(T0, T2, T0, N2);
 
 2625    int c3 = -(int)Subtract(T1, X2, T1, N2);
 
 2626    Multiply(R0, T2, V1, X3, N2);
 
 2627    c3 += Add(R, R, T, N);
 
 2630        c3 += Increment(R1, N2);
 
 2632        c3 -= Decrement(R1, N2, -c2);
 
 2636        Subtract(R, R, M, N);
 
 2730static inline void AtomicDivide(
word *Q, 
const word *A, 
const word *B)
 
 2733    DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
 
 2734    Q[0] = q.GetLowHalf();
 
 2735    Q[1] = q.GetHighHalf();
 
 2737#if defined(CRYPTOPP_DEBUG) 
 2741        CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
 
 2751static void CorrectQuotientEstimate(
word *R, 
word *T, 
word *Q, 
const word *B, 
size_t N)
 
 2755    AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
 
 2757    word borrow = Subtract(R, R, T, N+2);
 
 2759    CRYPTOPP_UNUSED(borrow);
 
 2761    while (R[N] || Compare(R, B, N) >= 0)
 
 2763        R[N] -= Subtract(R, R, B, N);
 
 2764        Q[1] += (++Q[0]==0);
 
 2783    word *
const TB=T+NA+2;
 
 2784    word *
const TP=T+NA+2+NB;
 
 2787    unsigned shiftWords = (B[NB-1]==0);
 
 2788    TB[0] = TB[NB-1] = 0;
 
 2789    CopyWords(TB+shiftWords, B, NB-shiftWords);
 
 2795    TA[0] = TA[NA] = TA[NA+1] = 0;
 
 2799    if (TA[NA+1]==0 && TA[NA] <= 1)
 
 2801        Q[NA-NB+1] = Q[NA-NB] = 0;
 
 2802        while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
 
 2804            TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
 
 2815    BT[0] = TB[NB-2] + 1;
 
 2816    BT[1] = TB[NB-1] + (BT[0]==0);
 
 2819    for (
size_t i=NA-2; i>=NB; i-=2)
 
 2821        AtomicDivide(Q+i-NB, TA+i-2, BT);
 
 2822        CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
 
 2830static inline size_t EvenWordCount(
const word *X, 
size_t N)
 
 2832    while (N && X[N-2]==0 && X[N-1]==0)
 
 2843unsigned int AlmostInverse(
word *R, 
word *T, 
const word *A, 
size_t NA, 
const word *M, 
size_t N)
 
 2851    size_t bcLen=2, fgLen=EvenWordCount(M, N);
 
 2865            if (EvenWordCount(f, fgLen)==0)
 
 2872            bcLen += 2 * (c[bcLen-1] != 0);
 
 2884        if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
 
 2887                Subtract(R, M, b, N);
 
 2896        bcLen += 2 * (t!=0);
 
 2899        bool swap = Compare(f, g, fgLen)==-1;
 
 2904        fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
 
 2906        Subtract(f, f, g, fgLen);
 
 2907        t = Add(b, b, c, bcLen);
 
 2918void DivideByPower2Mod(
word *R, 
const word *A, 
size_t k, 
const word *M, 
size_t N)
 
 2928            word carry = Add(R, R, M, N);
 
 2939void MultiplyByPower2Mod(
word *R, 
const word *A, 
size_t k, 
const word *M, 
size_t N)
 
 2945            Subtract(R, R, M, N);
 
 2950static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
 
 2952static inline size_t RoundupSize(
size_t n)
 
 2955        return RoundupSizeTable[n];
 
 2967    : reg(2), sign(POSITIVE)
 
 2969    reg[0] = reg[1] = 0;
 
 2973    : reg(RoundupSize(t.WordCount())), sign(t.sign)
 
 2981    reg[0] = 
word(value);
 
 2982    reg[1] = 
word(SafeRightShift<WORD_BITS>(value));
 
 2995    reg[0] = 
word(value);
 
 2996    reg[1] = 
word(SafeRightShift<WORD_BITS>((
unsigned long)value));
 
 3011    unsigned long value = (
unsigned long)reg[0];
 
 3012    value += SafeLeftShift<WORD_BITS, unsigned long>((
unsigned long)reg[1]);
 
 3015        return (
signed long)value >= 0;
 
 3017        return -(
signed long)value < 0;
 
 3024    unsigned long value = (
unsigned long)reg[0];
 
 3025    value += SafeLeftShift<WORD_BITS, unsigned long>((
unsigned long)reg[1]);
 
 3026    return sign==
POSITIVE ? value : -(
signed long)value;
 
 3035        Decode(encodedInteger, byteCount, s);
 
 3040        encodedInteger.
Get(block, block.size());
 
 3041        std::reverse(block.begin(), block.begin()+block.size());
 
 3043        Decode(block.begin(), block.size(), s);
 
 3054        Decode(encodedInteger, byteCount, s);
 
 3059#if (CRYPTOPP_MSC_VERSION >= 1500) 
 3060        std::reverse_copy(encodedInteger, encodedInteger+byteCount,
 
 3061            stdext::make_checked_array_iterator(block.begin(), block.size()));
 
 3063        std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
 
 3065        Decode(block.begin(), block.size(), s);
 
 3083    if (!
Randomize(rng, min, max, rnType, equiv, mod))
 
 3103        if (reg.
size() != t.reg.
size() || t.reg[t.reg.
size()/2] == 0)
 
 3154    for (
unsigned int j=0; j<n; j++)
 
 3176    std::swap(sign, a.sign);
 
 3180    : reg(RoundupSize(length)), sign(POSITIVE)
 
 3191    int radix, sign = 1;
 
 3194    unsigned int length;
 
 3195    for (length = 0; str[length] != 0; length++) {}
 
 3203    switch (str[length-1])
 
 3225        str += 1, length -= 1;
 
 3230    if (length > 2 && str[0] == 
'0')
 
 3232        if (str[1] == 
'x' || str[1] == 
'X')
 
 3235            str += 2, length -= 2;
 
 3237        else if (str[1] == 
'n' || str[1] == 
'N')
 
 3240            str += 2, length -= 2;
 
 3242        else if (str[1] == 
'o' || str[1] == 
'O')
 
 3245            str += 2, length -= 2;
 
 3251        for (
unsigned int i=0; i<length; i++)
 
 3253            int digit, ch = 
static_cast<int>(str[i]);
 
 3256            if (ch >= 
'0' && ch <= 
'9')
 
 3258            else if (ch >= 
'a' && ch <= 
'f')
 
 3259                digit = ch - 
'a' + 10;
 
 3260            else if (ch >= 
'A' && ch <= 
'F')
 
 3261                digit = ch - 
'A' + 10;
 
 3275        unsigned int nh = 0, nl = 0, nc = 0;
 
 3278        for (
unsigned int i=0; i<length; i++)
 
 3280            int digit, ch = 
static_cast<int>(str[i]);
 
 3282            if (ch >= 
'0' && ch <= 
'9')
 
 3284            else if (ch >= 
'a' && ch <= 
'f')
 
 3285                digit = ch - 
'a' + 10;
 
 3286            else if (ch >= 
'A' && ch <= 
'F')
 
 3287                digit = ch - 
'A' + 10;
 
 3300                    v += position * (nh << 4 | nl);
 
 3301                    nc = 0, position <<= 8;
 
 3311        for (
int i=
static_cast<int>(length)-1; i>=0; i--)
 
 3313            int digit, ch = 
static_cast<int>(str[i]);
 
 3315            if (ch >= 
'0' && ch <= 
'9')
 
 3317            else if (ch >= 
'a' && ch <= 
'f')
 
 3318                digit = ch - 
'a' + 10;
 
 3319            else if (ch >= 
'A' && ch <= 
'F')
 
 3320                digit = ch - 
'A' + 10;
 
 3339    : reg(2), sign(POSITIVE)
 
 3341    *
this = StringToInteger(str,order);
 
 3345    : reg(2), sign(POSITIVE)
 
 3347    *
this = StringToInteger(str,order);
 
 3377    Decode(store, inputLen, s);
 
 3390    while (inputLen>0 && (sign==
POSITIVE ? b==0 : b==0xff))
 
 3398    for (
size_t i=inputLen; i > 0; i--)
 
 3408        TwosComplement(reg, reg.
size());
 
 3416    const bool pre = (signedness == 
UNSIGNED);
 
 3427void Integer::Encode(
byte *output, 
size_t outputLen, Signedness signedness)
 const 
 3431    Encode(sink, outputLen, signedness);
 
 3438        for (
size_t i=outputLen; i > 0; i--)
 
 3466    if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
 
 3482    if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
 
 3502    return 2 + byteCount;
 
 3516        throw OpenPGPDecodeErr();
 
 3522    const size_t nbytes = nbits/8 + 1;
 
 3530    buf[0] = (
byte)
Crop(buf[0], nbits % 8);
 
 3540    const unsigned int nbits = range.
BitCount();
 
 3546    while (*
this > range);
 
 3554        (
"RandomNumberType", rnType)(
"EquivalentTo", equiv)(
"Mod", mod));
 
 3560    KDF2_RNG(
const byte *seed, 
size_t seedSize)
 
 3561        : m_counter(0), m_counterAndSeed(ClampSize(seedSize) + 4)
 
 3563        std::memcpy(m_counterAndSeed + 4, seed, ClampSize(seedSize));
 
 3575    inline size_t ClampSize(
size_t req)
 const 
 3578        if (req > 16U*1024*1024)
 
 3579            return 16U*1024*1024;
 
 3607        throw InvalidArgument(
"Integer: invalid EquivalentTo and/or Mod argument");
 
 3626        bq.
Get(finalSeed, finalSeed.size());
 
 3627        kdf2Rng.reset(
new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
 
 3638                Integer min1 = min + (equiv-min)%mod;
 
 3659                    if (
FirstPrime(first, max, equiv, mod, pSelector))
 
 3663                        if (!
FirstPrime(first, max, equiv, mod, pSelector))
 
 3671                if (
FirstPrime(*
this, 
STDMIN(*
this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
 
 3681std::istream& operator>>(std::istream& in, 
Integer &a)
 
 3684    unsigned int length = 0;
 
 3693        if (length >= str.size())
 
 3694            str.Grow(length + 16);
 
 3696    while (in && (c==
'-' || c==
'x' || (c>=
'0' && c<=
'9') || (c>=
'a' && c<=
'f') || (c>=
'A' && c<=
'F') || c==
'h' || c==
'H' || c==
'o' || c==
'O' || c==
',' || c==
'.'));
 
 3700    str[length-1] = 
'\0';
 
 3707inline int FlagToBase(
long f) {
 
 3708    return f == std::ios::hex ? 16 : (f == std::ios::oct ? 8 : 10);
 
 3711inline char FlagToSuffix(
long f) {
 
 3712    return f == std::ios::hex ? 
'h' : (f == std::ios::oct ? 
'o' : 
'.');
 
 3719    const long f = out.flags() & std::ios::basefield;
 
 3720    const int base = FlagToBase(f);
 
 3721    const char suffix = FlagToSuffix(f);
 
 3733    static const char upper[]=
"0123456789ABCDEF";
 
 3734    static const char lower[]=
"0123456789abcdef";
 
 3736    const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
 
 3753#ifdef CRYPTOPP_USE_STD_SHOWBASE 
 3754    if (out.flags() & std::ios_base::showbase)
 
 3759    return out << suffix;
 
 3767        if (Increment(reg, reg.
size()))
 
 3770            reg[reg.
size()/2]=1;
 
 3775        word borrow = Decrement(reg, reg.
size());
 
 3788        if (Increment(reg, reg.
size()))
 
 3791            reg[reg.
size()/2]=1;
 
 3796        if (Decrement(reg, reg.
size()))
 
 3814    else if (reg.
size() >= t.reg.
size())
 
 3818        for (
size_t i=0; i<t.reg.
size(); ++i)
 
 3819            temp[i] = reg[i] & t.reg[i];
 
 3822        std::swap(result.reg, temp);
 
 3830        for (
size_t i=0; i<reg.
size(); ++i)
 
 3831            temp[i] = reg[i] & t.reg[i];
 
 3834        std::swap(result.reg, temp);
 
 3852    else if (reg.
size() >= t.reg.
size())
 
 3856        for (
size_t i=0; i<t.reg.
size(); ++i)
 
 3857            temp[i] |= t.reg[i];
 
 3860        std::swap(result.reg, temp);
 
 3868        for (
size_t i=0; i<reg.
size(); ++i)
 
 3872        std::swap(result.reg, temp);
 
 3890    else if (reg.
size() >= t.reg.
size())
 
 3894        for (
size_t i=0; i<t.reg.
size(); ++i)
 
 3895            temp[i] ^= t.reg[i];
 
 3898        std::swap(result.reg, temp);
 
 3906        for (
size_t i=0; i<reg.
size(); ++i)
 
 3910        std::swap(result.reg, temp);
 
 3919    int carry; 
const bool pre = (a.reg.size() == b.reg.size());
 
 3920    if (!pre && a.reg.size() > b.reg.size())
 
 3922        carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
 
 3923        CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
 
 3924        carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
 
 3928        carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
 
 3932        carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
 
 3933        CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
 
 3934        carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
 
 3940        sum.reg[sum.reg.
size()/2] = 1;
 
 3947    unsigned aSize = a.WordCount();
 
 3949    unsigned bSize = b.WordCount();
 
 3955        word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
 
 3956        CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
 
 3957        borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
 
 3961    else if (aSize == bSize)
 
 3963        if (Compare(a.reg, b.reg, aSize) >= 0)
 
 3965            Subtract(diff.reg, a.reg, b.reg, aSize);
 
 3970            Subtract(diff.reg, b.reg, a.reg, aSize);
 
 3976        word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
 
 3977        CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
 
 3978        borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
 
 3985template <
class T> 
inline const T& STDMAX2(
const T& a, 
const T& b)
 
 3987    return a < b ? b : a;
 
 3995        if (b.NotNegative())
 
 3996            PositiveAdd(sum, *
this, b);
 
 3998            PositiveSubtract(sum, *
this, b);
 
 4002        if (b.NotNegative())
 
 4003            PositiveSubtract(sum, b, *
this);
 
 4006            PositiveAdd(sum, *
this, b);
 
 4019            PositiveAdd(*
this, *
this, t);
 
 4021            PositiveSubtract(*
this, *
this, t);
 
 4026            PositiveSubtract(*
this, t, *
this);
 
 4029            PositiveAdd(*
this, *
this, t);
 
 4041        if (b.NotNegative())
 
 4042            PositiveSubtract(diff, *
this, b);
 
 4044            PositiveAdd(diff, *
this, b);
 
 4048        if (b.NotNegative())
 
 4050            PositiveAdd(diff, *
this, b);
 
 4054            PositiveSubtract(diff, b, *
this);
 
 4065            PositiveSubtract(*
this, *
this, t);
 
 4067            PositiveAdd(*
this, *
this, t);
 
 4073            PositiveAdd(*
this, *
this, t);
 
 4077            PositiveSubtract(*
this, t, *
this);
 
 4085    const size_t shiftWords = n / 
WORD_BITS;
 
 4086    const unsigned int shiftBits = (
unsigned int)(n % 
WORD_BITS);
 
 4097    const size_t shiftWords = n / 
WORD_BITS;
 
 4098    const unsigned int shiftBits = (
unsigned int)(n % 
WORD_BITS);
 
 4101    if (wordCount > shiftWords)
 
 4130            const size_t head = reg.
size();
 
 4131            const size_t tail = t.reg.
size() - reg.
size();
 
 4155            const size_t head = reg.
size();
 
 4156            const size_t tail = t.reg.
size() - reg.
size();
 
 4168    size_t aSize = RoundupSize(a.WordCount());
 
 4169    size_t bSize = RoundupSize(b.WordCount());
 
 4171    product.reg.
CleanNew(RoundupSize(aSize+bSize));
 
 4175    AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
 
 4180    PositiveMultiply(product, a, b);
 
 4182    if (a.NotNegative() != b.NotNegative())
 
 4189    Multiply(product, *
this, b);
 
 4218    unsigned aSize = a.WordCount();
 
 4219    unsigned bSize = b.WordCount();
 
 4235    remainder.reg.
CleanNew(RoundupSize(bSize));
 
 4237    quotient.reg.
CleanNew(RoundupSize(aSize-bSize+2));
 
 4241    Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
 
 4246    PositiveDivide(remainder, quotient, dividend, divisor);
 
 4268    if (wordCount <= a.WordCount())
 
 4270        r.reg.
resize(RoundupSize(wordCount));
 
 4278        r.reg.
resize(RoundupSize(a.WordCount()));
 
 4283    if (a.IsNegative() && r.
NotZero())
 
 4314        remainder = dividend.reg[0] & (divisor-1);
 
 4319    quotient.reg.
CleanNew(RoundupSize(i));
 
 4323        quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
 
 4324        remainder = DWord(dividend.reg[i], remainder) % divisor;
 
 4335            remainder = divisor - remainder;
 
 4356    if ((divisor & (divisor-1)) != 0)   
 
 4364                remainder = DWord(reg[i], remainder) % divisor;
 
 4371            remainder = sum % divisor;
 
 4376        remainder = reg[0] & (divisor-1);
 
 4380        remainder = divisor - remainder;
 
 4388        sign = 
Sign(1-sign);
 
 4391int Integer::PositiveCompare(
const Integer& t)
 const 
 4396        return size > tSize ? 1 : -1;
 
 4398        return CryptoPP::Compare(reg, t.reg, size);
 
 4406            return PositiveCompare(t);
 
 4415            return -PositiveCompare(t);
 
 4431        y = (x + *
this/x) >> 1;
 
 4445    return (
WordCount() == 1) && (reg[0] == 1);
 
 4469    return mr.Exponentiate(x, e);
 
 4483        return Modulo(m).InverseModNext(m);
 
 4487        return Modulo(m).InverseModNext(m);
 
 4489    return InverseModNext(m);
 
 4505        return !u ? 
Zero() : (m*(*this-u)+1)/(*this);
 
 4511    unsigned k = AlmostInverse(r.reg, T, reg, reg.
size(), m.reg, m.reg.
size());
 
 4512    DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.
size());
 
 4520    word g0 = mod, g1 = *
this % mod;
 
 4521    word v0 = 0, v1 = 1;
 
 4549    if (oid != ASN1::prime_field())
 
 4559    ASN1::prime_field().DEREncode(seq);
 
 4576    if (a.reg.size()==m_modulus.reg.
size())
 
 4578        CryptoPP::DivideByPower2Mod(m_result.reg.
begin(), a.reg, 1, m_modulus.reg, a.reg.size());
 
 4582        return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
 
 4587    if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
 
 4589        if (CryptoPP::Add(m_result.reg.
begin(), a.reg, b.reg, a.reg.size())
 
 4590            || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
 
 4592            CryptoPP::Subtract(m_result.reg.
begin(), m_result.reg, m_modulus.reg, a.reg.size());
 
 4599        if (m_result1 >= m_modulus)
 
 4600            m_result1 -= m_modulus;
 
 4607    if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
 
 4609        if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
 
 4610            || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
 
 4612            CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
 
 4627    if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
 
 4629        if (CryptoPP::Subtract(m_result.reg.
begin(), a.reg, b.reg, a.reg.size()))
 
 4630            CryptoPP::Add(m_result.reg.
begin(), m_result.reg, m_modulus.reg, a.reg.size());
 
 4637            m_result1 += m_modulus;
 
 4644    if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
 
 4646        if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
 
 4647            CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
 
 4665    if (CryptoPP::Subtract(m_result.reg.
begin(), m_result.reg, a.reg, a.reg.size()))
 
 4666        Decrement(m_result.reg.
begin()+a.reg.size(), m_modulus.reg.
size()-a.reg.size());
 
 4673    if (m_modulus.
IsOdd())
 
 4676        return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
 
 4684    if (m_modulus.
IsOdd())
 
 4687        dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
 
 4688        for (
unsigned int i=0; i<exponentsCount; i++)
 
 4689            results[i] = dr.ConvertOut(results[i]);
 
 4697      m_u((
word)0, m_modulus.reg.size()),
 
 4698      m_workspace(5*m_modulus.reg.size())
 
 4700    if (!m_modulus.IsOdd())
 
 4701        throw InvalidArgument(
"MontgomeryRepresentation: Montgomery representation requires an odd modulus");
 
 4703    RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
 
 4709    word *
const R = m_result.reg.begin();
 
 4710    const size_t N = m_modulus.reg.size();
 
 4713    AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
 
 4714    SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
 
 4715    MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
 
 4722    word *
const R = m_result.reg.begin();
 
 4723    const size_t N = m_modulus.reg.size();
 
 4726    CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
 
 4727    SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
 
 4728    MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
 
 4735    word *
const R = m_result.reg.begin();
 
 4736    const size_t N = m_modulus.reg.size();
 
 4740    SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
 
 4741    MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
 
 4749    word *
const R = m_result.reg.begin();
 
 4750    const size_t N = m_modulus.reg.size();
 
 4754    SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
 
 4755    MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
 
 4756    unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
 
 4761        DivideByPower2Mod(R, R, k-N*
WORD_BITS, m_modulus.reg, N);
 
 4763        MultiplyByPower2Mod(R, R, N*
WORD_BITS-k, m_modulus.reg, N);
 
 4770template <> CRYPTOPP_DLL
 
 4774    static const unsigned int BIT_32 = (1U << 31);
 
 4775    const bool UPPER = !!(base & BIT_32);
 
 4776    static const unsigned int BIT_31 = (1U << 30);
 
 4777    const bool BASE = !!(base & BIT_31);
 
 4779    const char CH = UPPER ? 
'A' : 
'a';
 
 4780    base &= ~(BIT_32|BIT_31);
 
 4786    bool negative = 
false, zero = 
false;
 
 4804        s[i++]=char((digit < 10 ? 
'0' : (CH - 10)) + digit);
 
 4809    result.reserve(i+2);
 
 4824        else if (base == 16)
 
 4836template <> CRYPTOPP_DLL
 
 4840    static const unsigned int HIGH_BIT = (1U << 31);
 
 4841    const char CH = !!(base & HIGH_BIT) ? 
'A' : 
'a';
 
 4851        word64 digit = value % base;
 
 4852        result = char((digit < 10 ? 
'0' : (CH - 10)) + digit) + result;
 
 4858#ifndef CRYPTOPP_NO_ASSIGN_TO_INTEGER 
 4861bool AssignIntToInteger(
const std::type_info &valueType, 
void *pInteger, 
const void *pInt)
 
 4863    if (valueType != 
typeid(
Integer))
 
 4865    *
reinterpret_cast<Integer *
>(pInteger) = *
reinterpret_cast<const int *
>(pInt);
 
 4877        SetFunctionPointers();
 
 4886#if defined(HAVE_GCC_INIT_PRIORITY) 
 4887    const InitInteger s_init __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 10))) = InitInteger();
 
 4888    const Integer g_zero __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 11))) = 
Integer(0L);
 
 4889    const Integer g_one __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 12))) = 
Integer(1L);
 
 4890    const Integer g_two __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 13))) = 
Integer(2L);
 
 4891#elif defined(HAVE_MSC_INIT_PRIORITY) 
 4892    #pragma warning(disable: 4075) 
 4893    #pragma init_seg(".CRT$XCU") 
 4894    const InitInteger s_init;
 
 4898    #pragma warning(default: 4075) 
 4899#elif HAVE_XLC_INIT_PRIORITY 
 4901    #pragma priority(280) 
 4902    const InitInteger s_init;
 
 4907    const InitInteger s_init;
 
 4914#if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY) 
 4916#elif defined(CRYPTOPP_CXX11_STATIC_INIT) 
 4917    static const Integer s_zero(0L);
 
 4926#if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY) 
 4928#elif defined(CRYPTOPP_CXX11_STATIC_INIT) 
 4929    static const Integer s_one(1L);
 
 4938#if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY) 
 4940#elif defined(CRYPTOPP_CXX11_STATIC_INIT) 
 4941    static const Integer s_two(2L);
 
#define MAYBE_UNCONST_CAST(T, x)
SunCC workaround.
 
#define MAYBE_CONST
SunCC workaround.
 
Classes for working with NameValuePairs.
 
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
 
Classes and functions for working with ANS.1 objects.
 
std::ostream & operator<<(std::ostream &out, const OID &oid)
Print a OID value.
 
OID operator+(const OID &lhs, unsigned long rhs)
Append a value to an OID.
 
CRYPTOPP_DLL size_t DEREncodeOctetString(BufferedTransformation &bt, const byte *str, size_t strLen)
DER encode octet string.
 
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode unsigned value.
 
@ OCTET_STRING
ASN.1 Octet string.
 
void BERDecodeError()
Raises a BERDecodeErr.
 
virtual const Element & Gcd(const Element &a, const Element &b) const
Calculates the greatest common denominator in the ring.
 
virtual void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the Ring.
 
virtual Element CascadeExponentiate(const Element &x, const Integer &e1, const Element &y, const Integer &e2) const
TODO.
 
Copy input to a memory buffer.
 
Data structure used to store byte strings.
 
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
 
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
 
Used to pass byte array input as part of a NameValuePairs object.
 
const byte * begin() const
Pointer to the first byte in the memory block.
 
size_t size() const
Length of the memory block.
 
Exception thrown when division by 0 is encountered.
 
Exception thrown when a random number cannot be found that satisfies the condition.
 
Multiple precision integer with arithmetic operations.
 
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
Extended Division.
 
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
 
Integer & operator>>=(size_t n)
Right-shift Assignment.
 
Integer & operator&=(const Integer &t)
Bitwise AND Assignment.
 
bool GetBit(size_t i) const
Provides the i-th bit of the Integer.
 
void SetByte(size_t n, byte value)
Set the n-th byte to value.
 
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
Extended Division.
 
bool IsPositive() const
Determines if the Integer is positive.
 
Integer Minus(const Integer &b) const
Subtraction.
 
signed long ConvertToLong() const
Convert the Integer to Long.
 
Integer operator-() const
Subtraction.
 
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
 
Integer & operator+=(const Integer &t)
Addition Assignment.
 
Integer And(const Integer &t) const
Bitwise AND.
 
bool IsSquare() const
Determine whether this integer is a perfect square.
 
Integer Plus(const Integer &b) const
Addition.
 
Integer DividedBy(const Integer &b) const
Division.
 
Integer & operator++()
Pre-increment.
 
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
Encode absolute value as big-endian octet string.
 
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
 
bool NotZero() const
Determines if the Integer is non-0.
 
Integer Times(const Integer &b) const
Multiplication.
 
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
 
Integer & operator--()
Pre-decrement.
 
byte GetByte(size_t i) const
Provides the i-th byte of the Integer.
 
static const Integer & Zero()
Integer representing 0.
 
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
 
bool IsConvertableToLong() const
Determines if the Integer is convertable to Long.
 
Integer Or(const Integer &t) const
Bitwise OR.
 
lword GetBits(size_t i, size_t n) const
Provides the low order bits of the Integer.
 
static Integer Power2(size_t e)
Exponentiates to a power of 2.
 
Integer Squared() const
Multiply this integer by itself.
 
Integer()
Creates the zero integer.
 
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
 
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
Minimum number of bytes to encode this integer.
 
unsigned int BitCount() const
Determines the number of bits required to represent the Integer.
 
Integer & operator|=(const Integer &t)
Bitwise OR Assignment.
 
void Negate()
Reverse the Sign of the Integer.
 
bool NotNegative() const
Determines if the Integer is non-negative.
 
unsigned int WordCount() const
Determines the number of words required to represent the Integer.
 
Integer & operator^=(const Integer &t)
Bitwise XOR Assignment.
 
Integer & operator=(const Integer &t)
Assignment.
 
Integer & operator-=(const Integer &t)
Subtraction Assignment.
 
RandomNumberType
Properties of a random integer.
 
@ ANY
a number with no special properties
 
@ PRIME
a number which is probabilistically prime
 
bool operator!() const
Negation.
 
Integer AbsoluteValue() const
Retrieve the absolute value of this integer.
 
@ UNSIGNED
an unsigned value
 
Integer Xor(const Integer &t) const
Bitwise XOR.
 
int Compare(const Integer &a) const
Perform signed comparison.
 
Integer Modulo(const Integer &b) const
Remainder.
 
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
 
void swap(Integer &a)
Swaps this Integer with another Integer.
 
static const Integer & Two()
Integer representing 2.
 
bool IsZero() const
Determines if the Integer is 0.
 
Integer MultiplicativeInverse() const
Calculate multiplicative inverse.
 
bool GenerateRandomNoThrow(RandomNumberGenerator &rng, const NameValuePairs ¶ms=g_nullNameValuePairs)
Generate a random number.
 
bool IsNegative() const
Determines if the Integer is negative.
 
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
 
Integer & operator<<=(size_t n)
Left-shift Assignment.
 
Sign
Used internally to represent the integer.
 
@ NEGATIVE
the value is negative
 
@ POSITIVE
the value is positive or 0
 
unsigned int ByteCount() const
Determines the number of bytes required to represent the Integer.
 
bool IsUnit() const
Determine if 1 or -1.
 
bool IsOdd() const
Determines if the Integer is odd parity.
 
static Integer Gcd(const Integer &a, const Integer &n)
Calculate greatest common divisor.
 
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
 
Integer InverseMod(const Integer &n) const
Calculate multiplicative inverse.
 
Integer SquareRoot() const
Extract square root.
 
static const Integer & One()
Integer representing 1.
 
bool IsEven() const
Determines if the Integer is even parity.
 
An invalid argument was detected.
 
Ring of congruence classes modulo n.
 
Integer & Reduce(Integer &a, const Integer &b) const
TODO.
 
ModularArithmetic(const Integer &modulus=Integer::One())
Construct a ModularArithmetic.
 
const Integer & Half(const Integer &a) const
Divides an element by 2.
 
const Integer & Inverse(const Integer &a) const
Inverts the element in the ring.
 
void BERDecodeElement(BufferedTransformation &in, Element &a) const
Decodes element in DER format.
 
unsigned int MaxElementByteLength() const
Provides the maximum byte size of an element in the ring.
 
void DEREncodeElement(BufferedTransformation &out, const Element &a) const
Encodes element in DER format.
 
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
 
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
 
Integer & Accumulate(Integer &a, const Integer &b) const
TODO.
 
const Integer & Subtract(const Integer &a, const Integer &b) const
Subtracts elements in the ring.
 
const Integer & Add(const Integer &a, const Integer &b) const
Adds elements in the ring.
 
void DEREncode(BufferedTransformation &bt) const
Encodes in DER format.
 
Performs modular arithmetic in Montgomery representation for increased speed.
 
Integer ConvertOut(const Integer &a) const
Reduces an element in the congruence class.
 
const Integer & Square(const Integer &a) const
Square an element in the ring.
 
const Integer & Multiply(const Integer &a, const Integer &b) const
Multiplies elements in the ring.
 
MontgomeryRepresentation(const Integer &modulus)
Construct a MontgomeryRepresentation.
 
const Integer & MultiplicativeInverse(const Integer &a) const
Calculate the multiplicative inverse of an element in the ring.
 
Interface for retrieving values given their names.
 
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
 
bool GetValue(const char *name, T &value) const
Get a named value.
 
CRYPTOPP_DLL bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
 
static void DeriveKey(byte *output, size_t outputLength, const byte *input, size_t inputLength, const byte *derivationParams, size_t derivationParamsLength)
P1363 key derivation function.
 
Application callback to signal suitability of a cabdidate prime.
 
Interface for random number generators.
 
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
 
Secure memory block with allocator and cleanup.
 
iterator begin()
Provides an iterator pointing to the first element in the memory block.
 
void CleanNew(size_type newSize)
Change size without preserving contents.
 
void swap(SecBlock< T, A > &b)
Swap contents with another SecBlock.
 
void CleanGrow(size_type newSize)
Change size and preserve contents.
 
void New(size_type newSize)
Change size without preserving contents.
 
size_type size() const
Provides the count of elements in the SecBlock.
 
void resize(size_type newSize)
Change size and preserve contents.
 
Restricts the instantiation of a class to one static object without locks.
 
String-based implementation of Store interface.
 
Pointer that overloads operator ->
 
Library configuration file.
 
#define CRYPTOPP_TABLE
Override for internal linkage.
 
unsigned char byte
8-bit unsigned datatype
 
word64 word
Full word used for multiprecision integer arithmetic.
 
const unsigned int WORD_BITS
Size of a platform word in bits.
 
unsigned int word32
32-bit unsigned datatype
 
unsigned short word16
16-bit unsigned datatype
 
word128 dword
Double word used for multiprecision integer arithmetic.
 
unsigned long long word64
64-bit unsigned datatype
 
word32 hword
Half word used for multiprecision integer arithmetic.
 
const unsigned int WORD_SIZE
Size of a platform word in bytes.
 
word64 lword
Large word type.
 
Functions for CPU features and intrinsics.
 
ByteOrder
Provides the byte ordering.
 
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
 
@ BIG_ENDIAN_ORDER
byte order is big-endian
 
Implementation of BufferedTransformation's attachment interface.
 
Multiple precision integer with arithmetic operations.
 
Utility functions for the Crypto++ library.
 
CRYPTOPP_DLL std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
 
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
 
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
 
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
 
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
 
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
 
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branch-less swap of pointers a and b if condition c is true.
 
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
 
bool IsPowerOf2(const T &value)
Tests whether a value is a power of 2.
 
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
 
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
 
#define MEMORY_BARRIER
A memory barrier.
 
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
 
CRYPTOPP_DLL std::string IntToString< word64 >(word64 value, unsigned int base)
Converts an unsigned value to a string.
 
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
 
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
 
Class file for performing modular arithmetic.
 
Crypto++ library namespace.
 
const char * PointerToPrimeSelector()
const PrimeSelector *
 
const char * Seed()
ConstByteArrayParameter.
 
Classes and functions for number theoretic operations.
 
CRYPTOPP_DLL bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
 
ASN.1 object identifiers for algorithms and schemes.
 
This file contains helper classes/functions for implementing public key algorithms.
 
Classes and functions for secure memory allocations.
 
Classes for SHA-1 and SHA-2 family of message digests.
 
Classes for automatic resource management.
 
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
 
Support functions for word operations.
 
void ShiftWordsRightByWords(word *r, size_t n, size_t shiftWords)
Right shift word array.
 
void XorWords(word *r, const word *a, const word *b, size_t n)
XOR word arrays.
 
void SetWords(word *r, word a, size_t n)
Set the value of words.
 
void OrWords(word *r, const word *a, const word *b, size_t n)
OR word arrays.
 
word ShiftWordsLeftByBits(word *r, size_t n, unsigned int shiftBits)
Left shift word array.
 
void ShiftWordsLeftByWords(word *r, size_t n, size_t shiftWords)
Left shift word array.
 
size_t CountWords(const word *x, size_t n)
Count the number of words.
 
word ShiftWordsRightByBits(word *r, size_t n, unsigned int shiftBits)
Right shift word array.
 
void CopyWords(word *r, const word *a, size_t n)
Copy word array.
 
void AndWords(word *r, const word *a, const word *b, size_t n)
AND word arrays.