| Clockticks | Instructions Retired | 64k Aliasing conflicts | Mispredicted Branches Retired | MOB Loads Replays Retired | Clockticks per Instructions Retired (CPI) | 64K Aliasing Conflicts Performance Impact | Branch Mispredict Performance Impact | Mispredicted Branches per Instructions Retired | Store Forward Performance Impact | |
|---|---|---|---|---|---|---|---|---|---|---|
| Scale | 1,000,000 | 1,000,000 | 10,000 | 10,000 | 1,000 | 1 | 1 | 1 | 1 | 1 |
| Entire Program | 500,214 | 545,604 | 220,829 | 130,608 | 175,288 | 0.917 | 5.298 | 5.222 | 0.002 | 1.752 |
| P4_Mul | 62,853 | 65,010 | 56,496 | 1,711 | 9,651 | 0.967 | 10.786 | 0.544 | 0 | 0.768 |
| Add | 49,332 | 63,687 | 17,828 | 24,305 | 4,158 | 0.775 | 4.337 | 9.853 | 0.004 | 0.421 |
| Subtract | 32,046 | 36,909 | 13,889 | 20,043 | 3,408 | 0.868 | 5.201 | 12.509 | 0.005 | 0.532 |
| ProcessAndXorBlock | 23,919 | 45,363 | 101 | 16 | 8,362 | 0.527 | 0.05 | 0.013 | 0 | 1.748 |
| Multiply8 | 23,610 | 27,576 | 5,430 | 5,688 | 9,633 | 0.856 | 2.76 | 4.818 | 0.002 | 2.04 |
| Transform | 14,535 | 11,208 | 315 | 0 | 2,229 | 1.297 | 0.26 | 0 | 0 | 0.767 |
| Multiply | 12,597 | 14,655 | 2,889 | 8,345 | 810 | 0.86 | 2.752 | 13.25 | 0.006 | 0.321 |
| Multiply4 | 11,079 | 8,262 | 2,253 | 166 | 149 | 1.341 | 2.441 | 0.299 | 0 | 0.067 |
| AlmostInverse | 10,719 | 11,721 | 150 | 10,013 | 3,884 | 0.915 | 0.168 | 18.684 | 0.009 | 1.812 |
| RecursiveMultiply | 10,503 | 13,170 | 3,661 | 7,494 | 2,663 | 0.797 | 4.182 | 14.27 | 0.006 | 1.268 |
| MultiplicativeInverse | 9,864 | 11,211 | 482 | 12,126 | 781 | 0.88 | 0.586 | 24.586 | 0.011 | 0.396 |
| Divide | 6,864 | 6,324 | 2,819 | 2,807 | 5,735 | 1.085 | 4.928 | 8.179 | 0.004 | 4.178 |
| RawProcessBlock | 6,507 | 8,985 | 6 | 0 | 49 | 0.724 | 0.011 | 0.001 | 0 | 0.038 |
| Iterate | 6,030 | 7,491 | 18,114 | 0 | 101 | 0.805 | 36.048 | 0 | 0 | 0.084 |
CRYPTOPP_NAKED word Add_POpt(word *C, const word *A, const word *B, unsigned int N) { AddPrologue // now: ebx = B, ecx = C, edx = A, esi = N AS2( sub ecx, edx) // hold the distance between C & A so we can add this to A to get C AS2( xor eax, eax) // clear eax AS2( sub eax, esi) // eax is a negative index from end of B AS2( lea ebx, [ebx+4*esi]) // ebx is end of B AS2( sar eax, 1) // unit of eax is now dwords; this also clears the carry flag AS1( jz loopendAdd) // if no dwords then nothing to do AS1(loopstartAdd:) AS2( mov esi,[edx]) // load lower word of A AS2( mov ebp,[edx+4]) // load higher word of A AS2( mov edi,[ebx+8*eax]) // load lower word of B AS2( lea edx,[edx+8]) // advance A and C AS2( adc esi,edi) // add lower words AS2( mov edi,[ebx+8*eax+4]) // load higher word of B AS2( adc ebp,edi) // add higher words AS1( inc eax) // advance B AS2( mov [edx+ecx-8],esi) // store lower word result AS2( mov [edx+ecx-4],ebp) // store higher word result AS1( jnz loopstartAdd) // loop until eax overflows and becomes zero AS1(loopendAdd:) AS2( adc eax, 0) // store carry into eax (return result register) AddEpilogue }hide code
CRYPTOPP_NAKED word Add_P4Opt(word *C, const word *A, const word *B, unsigned int N) { AddPrologue // now: ebx = B, ecx = C, edx = A, esi = N AS2( xor eax, eax) AS1( neg esi) AS1( jz loopendAddP4) // if no dwords then nothing to do AS2( mov edi, [edx]) AS2( mov ebp, [ebx]) AS1( jmp carry1AddP4) AS1(loopstartAddP4:) AS2( mov edi, [edx+8]) AS2( add ecx, 8) AS2( add edx, 8) AS2( mov ebp, [ebx]) AS2( add edi, eax) AS1( jc carry1AddP4) AS2( xor eax, eax) AS1(carry1AddP4:) AS2( add edi, ebp) AS2( mov ebp, 1) AS2( mov [ecx], edi) AS2( mov edi, [edx+4]) AS2( cmovc eax, ebp) AS2( mov ebp, [ebx+4]) AS2( add ebx, 8) AS2( add edi, eax) AS1( jc carry2AddP4) AS2( xor eax, eax) AS1(carry2AddP4:) AS2( add edi, ebp) AS2( mov ebp, 1) AS2( cmovc eax, ebp) AS2( mov [ecx+4], edi) AS2( add esi, 2) AS1( jnz loopstartAddP4) AS1(loopendAddP4:) AddEpilogue }hide code
CRYPTOPP_NAKED word Add_64bit(word *C, const word *A, const word *B, unsigned int N) { AddPrologue // now: ebx = B, ecx = C, edx = A, esi = N AS2( xor eax, eax) AS2( cmp esi, 01h) AS2( pxor mm2, mm2) //mm2=0 (carry) AS1( jbe AddLabelcd) //N<2 AS2( shl esi, 02h) //esi = N*4 = num. of bytes AS1(AddLabel3c:) AS2( movq mm0, MMWORD PTR [edx + eax]) // mm0 = *A AS2( movq mm1, MMWORD PTR [ebx + eax]) // mm1 = *B AS2( add eax, 08h) // AS2( cmp eax, esi) AS2( movq mm3, mm0) AS2( movq mm4, mm0) AS2( paddq mm0, mm1) // mm0+=carry(mm2) AS2( paddq mm0, mm2) //mm0 = mm0+mm1 AS2( por mm3, mm1) //mm3 = *A | *B AS2( pand mm4, mm1) //mm4 = *A & *B (carry* in msb) AS2( movq MMWORD PTR [ecx +eax - 8], mm0) // *C = *A + *B AS2( pandn mm0, mm3) //mm0=mm3&!mm0 (carry** in msb) AS2( por mm0, mm4) AS2( psrlq mm0, 03fh) //mm0= carry* or carry ** AS2( movq mm2, mm0) AS1( jl AddLabel3c) AS1(AddLabelcd:) AS3( pextrw eax, mm2, 00h) AS1( emms) AddEpilogue }hide code
CRYPTOPP_NAKED word Add_64bitx2(word *C, const word *A, const word *B, unsigned int N) { _asm{ push esi push ebx mov edx, DWORD PTR [esp+0Ch] //edx=C mov ecx, DWORD PTR [esp+10h] //ecx=A mov eax, DWORD PTR [esp+014h] //eax=B mov esi, DWORD PTR [esp+018h] //esi=N xor ebx, ebx cmp esi, 01h pxor xmm2, xmm2 //xmm2=0 (carry from previous) movdqu xmm7, add_64x2_carry_mask jbe AddLabelcd //N<2 shl esi, 02h //esi = N*4 = num. of bytes AddLabel3c: movdqa xmm0, XMMWORD PTR [ecx + ebx] // mm0 = *A movdqa xmm1, XMMWORD PTR [eax + ebx] // mm1 = *B add ebx, 10h // cmp ebx, esi movdqa xmm3, xmm0 movdqa xmm4, xmm0 paddq xmm0, xmm1 // mm0 = mm0+mm1 paddq xmm0, xmm2 //mm0+=carry(mm2) por xmm3, xmm1 //mm3 = *A | *B pand xmm4, xmm1 //mm4 = *A & *B (carry* in msb) pxor xmm1,xmm1 movdqa xmm5, xmm0 //xmm5 is result movdqa xmm2, xmm0 // pandn xmm0, xmm3 //mm0=mm3&!mm0 (carry** in msb) por xmm0, xmm4 //xmm0 = both carrys in both msbs movdqa xmm6, xmm0 pslldq xmm0, 1 //xmm0 - carry from first to second pand xmm0,xmm7 paddq xmm5, xmm0 movdqa XMMWORD PTR [edx + ebx -10h], xmm5 // *C = *A +*B + carry pandn xmm5, xmm2 //xmm5 - carry to next from adding the carry in msb por xmm6, xmm5 psrlq xmm6, 03fh punpckhdq xmm6, xmm1 movdqa xmm2, xmm6 jl AddLabel3c AddLabelcd: pextrw eax, xmm2, 00h pop ebx pop esi ret } }hide code
CRYPTOPP_NAKED word Add_16bitx8(word *res, const word *a, const word *b, word n) { AddPrologue AS2( xor eax,eax) AS2( pxor xmm5,xmm5) // last carry // now: ebx = B, ecx = C, edx = A, esi = N eax=i(0) AS2( shl esi, 02h) //esi = N*4 = num. of bytes AS2( mov edi, ebx) AS2( or edi, ecx) AS2( or edi, edx) AS2( test edi, 15) AS1( jnz Done_128_part) AS2( mov ebp, esi) AS2( and ebp, -10h) //ebp is maximal so that ebp<=esi, ebp mod 16 =0 AS1( jz Done_128_part) AS2( add eax,0) // clear carry AS2( pxor xmm6,xmm6) // zero mask AS2( pcmpeqd xmm4,xmm4) // -1 mask AS1(L:) AS2( movdqa xmm0,[edx+eax]) // load first number AS2( movdqa xmm1,[ebx+eax]) // load second number AS2( movdqa xmm2,xmm0) // save copy of first AS2( paddw xmm0,xmm1) // regular 16 bit AS2( paddusw xmm2,xmm1) // saturated add AS2( pcmpeqw xmm2,xmm0) // -1 if same result (no carry) AS2( pxor xmm2,xmm4) // xor with -1 so -1 if carry AS2( movdqa xmm3,xmm2) // duplicate AS2( pslldq xmm2, 2) // put carry in the next word AS2( psrldq xmm3,14) // save carry for next iteration AS2( paddw xmm2,xmm5) // add carry from last iteration AS2( movdqa xmm5,xmm3) // save carry for next iteration AS2( psubw xmm0,xmm2) // add carry (sub -1) AS2( movdqa xmm7,xmm0) // save copy of result AS2( pcmpeqw xmm7,xmm6) // compare if zero AS2( pand xmm7,xmm2) // result is zero and we added carry so it is carry propagate AS2( pmovmskb edi,xmm7) // check if xmm7 is all zero if not loop for carry AS2( test edi,edi) AS1( jnz Carry_check) // carry propagate case AS2( add eax,16) AS2( cmp eax,ebp) AS2( movdqa [ecx+eax-16],xmm0) // save the result AS1( jl L) AS1( jmp Done_128_part) AS1(Carry_check:) AS2( por xmm3,xmm7) AS2( pslldq xmm7,2) // Propagate carry AS2( psubw xmm0,xmm7) // add carry AS2( movdqa xmm2,xmm0) AS2( pcmpeqw xmm2,xmm6) AS2( pand xmm7,xmm2) AS2( pmovmskb edi,xmm7) // check if xmm7 is all zero if not loop for carry AS2( test edi,edi) AS1( jnz Carry_check) AS2( movdqa [ecx+eax],xmm0) AS2( psrldq xmm3,14) // save carry for next iteration AS2( por xmm5, xmm3) // or the propagate carry with the add carry only one can happened. AS2( add eax,16) AS2( cmp eax,ebp) AS1( jl L) AS1(Done_128_part:) AS2( cmp esi, eax) AS2( movdq2q mm2, xmm5) AS2( psrlq mm2, 0fh) AS1( jbe AddLabelcd) //N<2 AS1(AddLabel3c:) AS2( movq mm0, MMWORD PTR [edx + eax]) // mm0 = *A AS2( movq mm1, MMWORD PTR [ebx + eax]) // mm1 = *B AS2( add eax, 08h) // AS2( cmp eax, esi) AS2( movq mm3, mm0) AS2( movq mm4, mm0) AS2( paddq mm0, mm1) // mm0+=carry(mm2) AS2( paddq mm0, mm2) //mm0 = mm0+mm1 AS2( por mm3, mm1) //mm3 = *A | *B AS2( pand mm4, mm1) //mm4 = *A & *B (carry* in msb) AS2( movq MMWORD PTR [ecx +eax - 8], mm0) // *C = *A + *B AS2( pandn mm0, mm3) //mm0=mm3&!mm0 (carry** in msb) AS2( por mm0, mm4) AS2( psrlq mm0, 03fh) //mm0= carry* or carry ** AS2( movq mm2, mm0) AS1( jl AddLabel3c) AS1(AddLabelcd:) AS3( pextrw eax, mm2, 00h) AS1( emms) AddEpilogue }hide code
| P4-Optimized | Pentium-Optimized | 64bit | 16bit*8 | 64bit*2 | ||
|---|---|---|---|---|---|---|
| 1088bit Additions (64bit Alligned) | 11,673 | 9,716 | 9,808 | 10,305 | ||
| 1152bit Additions (128bit Alligned) | 13,235 | 12,041 | 10,560 | 10,381 | 13,224 | |
| Function inside the program | 10,663 | 10,053 | 10,076 | 12,236 | ||
| Entire program | 42,000 | 41,464 | 41,068 | 43,334 |
CRYPTOPP_NAKED word POpt_Subtract(word *C, const word *A, const word *B, unsigned int N) { AddPrologue // now: ebx = B, ecx = C, edx = A, esi = N AS2( sub ecx, edx) // hold the distance between C & A so we can add this to A to get C AS2( xor eax, eax) // clear eax AS2( sub eax, esi) // eax is a negative index from end of B AS2( lea ebx, [ebx+4*esi]) // ebx is end of B AS2( sar eax, 1) // unit of eax is now dwords; this also clears the carry flag AS1( jz loopendSub) // if no dwords then nothing to do AS1(loopstartSub:) AS2( mov esi,[edx]) // load lower word of A AS2( mov ebp,[edx+4]) // load higher word of A AS2( mov edi,[ebx+8*eax]) // load lower word of B AS2( lea edx,[edx+8]) // advance A and C AS2( sbb esi,edi) // subtract lower words AS2( mov edi,[ebx+8*eax+4]) // load higher word of B AS2( sbb ebp,edi) // subtract higher words AS1( inc eax) // advance B AS2( mov [edx+ecx-8],esi) // store lower word result AS2( mov [edx+ecx-4],ebp) // store higher word result AS1( jnz loopstartSub) // loop until eax overflows and becomes zero AS1(loopendSub:) AS2( adc eax, 0) // store carry into eax (return result register) AddEpilogue }hide code
CRYPTOPP_NAKED word P4Opt_Subtract(word *C, const word *A, const word *B, unsigned int N) { AddPrologue // now: ebx = B, ecx = C, edx = A, esi = N AS2( xor eax, eax) AS1( neg esi) AS1( jz loopendSubP4) // if no dwords then nothing to do AS2( mov edi, [edx]) AS2( mov ebp, [ebx]) AS1( jmp carry1SubP4) AS1(loopstartSubP4:) AS2( mov edi, [edx+8]) AS2( add edx, 8) AS2( add ecx, 8) AS2( mov ebp, [ebx]) AS2( sub edi, eax) AS1( jc carry1SubP4) AS2( xor eax, eax) AS1(carry1SubP4:) AS2( sub edi, ebp) AS2( mov ebp, 1) AS2( mov [ecx], edi) AS2( mov edi, [edx+4]) AS2( cmovc eax, ebp) AS2( mov ebp, [ebx+4]) AS2( add ebx, 8) AS2( sub edi, eax) AS1( jc carry2SubP4) AS2( xor eax, eax) AS1(carry2SubP4:) AS2( sub edi, ebp) AS2( mov ebp, 1) AS2( cmovc eax, ebp) AS2( mov [ecx+4], edi) AS2( add esi, 2) AS1( jnz loopstartSubP4) AS1(loopendSubP4:) AddEpilogue }hide code
CRYPTOPP_NAKED word 64bit_Subtract(word *C, const word *A, const word *B, unsigned int N) { _asm{ push esi push ebx mov edx, DWORD PTR [esp+0Ch] //edx=C mov ecx, DWORD PTR [esp+10h] //ecx=A mov eax, DWORD PTR [esp+014h] //eax=B mov esi, DWORD PTR [esp+018h] //esi=N xor ebx, ebx cmp esi, 01h pxor mm2, mm2 //mm2=0 (carry) jbe AddLabelcd //N<2 shl esi, 02h //esi = N*4 = num. of bytes AddLabel3c: movq mm0, MMWORD PTR [ecx + ebx] // mm0 = *A movq mm1, MMWORD PTR [eax + ebx] // mm1 = *B add ebx, 08h // cmp ebx, esi movq mm3, mm0 movq mm4, mm0 psubq mm0, mm1 // mm0-=carry(mm2) psubq mm0, mm2 //mm0 = mm0-mm1 pand mm4, mm1 //mm4 = *A & *B movq mm5, mm1 //mm5 = *B movq MMWORD PTR [edx +ebx - 8], mm0 // *C = *A + *B por mm1, mm0 //mm1 = *B | *res pand mm4, mm0 //mm4 = A & B & res (carry* in msb) pandn mm3, mm1 //mm3 = !A & (B | res) (carry** in msb) por mm3, mm4 psrlq mm3, 03fh //mm3= carry* or carry ** movq mm2, mm3 jl AddLabel3c AddLabelcd: pextrw eax, mm2, 00h emms pop ebx pop esi ret } }hide code
| P4-Optimized | Pentium-Optimized | 64bit | |
|---|---|---|---|
| 1152bit*12e8 Subtractions (128bit Alligned) | 12,999 | 12,660 | 13,558 |
| Function inside the program | 10,076 | 9,680 | 9,939 |
| Entire program | 41,068 | 40,664 | 40,797 |
Integer ModularRoot(const Integer &a, const Integer &dp, const Integer &dq, const Integer &p, const Integer &q, const Integer &u) { Integer p2 = ModularExponentiation((a % p), dp, p); //this will be done in one thread Integer q2 = ModularExponentiation((a % q), dq, q); //this in another return CRT(p2, p, q2, q, u); }hide function
typedef HANDLE PortThread; typedef HANDLE PortCondVar; typedef HANDLE PortSemaphore; typedef HANDLE PortMutex; typedef unsigned long (__stdcall* ThrFuncPointer)(void*); PortThread portCreateThread(const ThrFuncPointer initFunc, void* param); void portKillThread(PortThread thrd); PortCondVar portCreateCondVar(); void portWaitOnCondVar(PortCondVar cnd); void portReleaseOneOfCondVar(PortCondVar cnd); void portKillCondVar(PortCondVar cnd); PortSemaphore portCreateSemaphore(int initVal); void portWaitOnSemaphore(PortSemaphore smph); void portReleaseOneOfSemaphore(PortSemaphore smph); void portClearSemaphore(PortSemaphore smph); void portKillSemaphore(PortSemaphore smph); PortMutex portCreateMutex(); void portWaitOnMutex(PortMutex mtx); void portReleaseMutex(PortMutex mtx); void portKillMutex(PortMutex mtx);hide declarations
class HelperThread { public: HelperThread(); virtual ~HelperThread(); protected: friend unsigned long __stdcall mulThreadMainThrFunc(void*); virtual void start(); virtual void doYourStuff() = 0; //actuall work is here virtual void waitTillDone(); PortCondVar startExec, endExec; PortThread theThread; static const ThrFuncPointer mainThrFunc; };hide declaration
Integer ModularRootThreaded(const Integer &a, const Integer &dp, const Integer &dq, const Integer &p, const Integer &q, const Integer &u, MRThread* myMRThread) { Integer q2; Integer p2; if (myMRThread) { myMRThread->a=&a; myMRThread->p2=&p2; myMRThread->p=&p; myMRThread->dp=&dp; myMRThread->start(); q2 = ModularUnthreadedExponentiation((a % q), dq, q); myMRThread->waitTillDone(); } else { q2 = ModularExponentiation((a % q), dq, q); p2 = ModularExponentiation((a % p), dp, p); } return CRT(p2, p, q2, q, u); }hide function
| HT machine | DP machine | |||||
|---|---|---|---|---|---|---|
| unthreads | threaded | speedup | unthreads | threaded | speedup | |
| RSA 1024 Decryption | 3.7(0.19) | 2.86(0.08) | 22.65% | 4.35(0.06) | 2.53(0.07) | 41.92% |
| RSA 2048 Decryption | 20.73(0.08) | 15.47(0.61) | 25.38% | 24.45(0.09) | 13.6(0.24) | 44.4% |
| RSA 1024 Signature | 3.52(0.27) | 2.9(0.13) | 17.59% | 4.19(0.07) | 2.53(0.07) | 39.75% |
| RSA 2048 Signature | 19.77(0.08) | 15.94(0.44) | 19.38% | 23.26(0.12) | 13.78(0.34) | 40.76% |
//The names of the functions used might be confusing. The usuall use of it will be through a // MultiplicativeGroup, so the Add or Accumulat functions usually multiply, and Double multiplies // the base by itself. template <class T> void AbstractGroup<T>::SimultaneousMultiply(T *results, const T &base, const Integer *expBegin, unsigned int expCount) const { std::vector<std::vector<Element> > buckets(expCount); std::vector<WindowSlider> exponents; exponents.reserve(expCount); unsigned int i; for (i=0; i<expCount; i++) { assert(expBegin->NotNegative()); exponents.push_back(WindowSlider(*expBegin++, InversionIsFast(), 0)); exponents[i].FindNextWindow(); buckets[i].resize(1<<(exponents[i].windowSize-1), Identity()); } unsigned int expBitPosition = 0; Element g = base; bool notDone = true; while (notDone) { notDone = false; for (i=0; i<expCount; i++) { if (!exponents[i].finished && expBitPosition == exponents[i].windowBegin) { Element &bucket = buckets[i][exponents[i].expWindow/2]; if (exponents[i].negateNext) Accumulate(bucket, Inverse(g)); else Accumulate(bucket, g); exponents[i].FindNextWindow(); } notDone = notDone || !exponents[i].finished; } if (notDone) { g = Double(g); expBitPosition++; } } for (i=0; i<expCount; i++) { Element &r = *results++; r = buckets[i][buckets[i].size()-1]; if (buckets[i].size() > 1) { for (int j = buckets[i].size()-2; j >= 1; j--) { Accumulate(buckets[i][j], buckets[i][j+1]); Accumulate(r, buckets[i][j]); } Accumulate(buckets[i][0], buckets[i][1]); r = Add(Double(r), buckets[i][0]); } } }hide code
| Test | HT results | DP results | |||||||
|---|---|---|---|---|---|---|---|---|---|
| base size |
exponent size |
Non | elements | elements (DP variant) |
exponent | Non | elements | elements (DP variant) |
exponent |
| 512bit | 256bit | 0.69 (0.01) | 0.78 (0) | 0.81 (0.01) | 1.14 (0) | 0.85 (0) | 0.91 (0) | 0.93 (0.01) | 1.16 (0.07) |
| 512bit | 512bit | 1.33 (0.04) | 1.46 (0) | 1.56 (0.01) | 1.97 (0.01) | 1.59 (0) | 1.7 (0.01) | 1.76 (0.01) | 1.92 (0.07) |
| 1024bit | 128bit | 2.19 (0.04) | 2.15 (0.05) | 2.13 (0.01) | 3.69 (0.02) | 2.72 (0.01) | 2.59 (0.19) | 2.45 (0.03) | 3.77 (0.19) |
| 1024bit | 256bit | 4.3 (0) | 4.14 (0.09) | 4.16 (0.02) | 6.61 (0.03) | 5.4 (0.05) | 4.85 (0.31) | 4.65 (0.02) | 6.53 (0.42) |
| 1024bit | 512bit | 8.39 (0.01) | 8.29 (0.25) | 8.43 (0.02) | 12.4 (0.01) | 10.49 (0.06) | 9.99 (0.83) | 9.39 (0.05) | 11.77 (0.68) |
| 1024bit | 1024bit | 16.45 (0.07) | 16.05 (0.4) | 16.6 (0.04) | 23.67 (0.04) | 20.54 (0.06) | 19.48 (1.53) | 18.51 (0.05) | 22.54 (1.59) |
| 2048bit | 2048bit | 50.94 (0.18) | 49.23 (0.34) | 52.5 (1.71) | 72.35 (1.85) | 63.96 (0.17) | 56.65 (0.38) | 56.65 (0.62) | 65.65 (3.24) |
| 3072bit | 3072bit | 229.87 (0.38) | 222.05 (0.37) | 234.09 (1.73) | 322.74 (4.9) | 289.35 (0.99) | 263.95 (1.45) | 264.06 (5.15) | 299.26 (4.67) |
| HT results | DP results | |||||||
| constant size |
Non | elements | elements (DP variant) |
exponent | Non | elements | elements (DP variant) |
exponent | 8bit | 0.1 (0.02) | 0.12 (0.02) | 0.11 (0.03) | 0.09 (0.05) | 0.13 (0.03) | 0.13 (0.04) | 0.12 (0.03) | 0.14 (0.02) |
|---|---|---|---|---|---|---|---|---|
| 16bit | 0.47 (0) | 0.47 (0.01) | 0.48 (0.01) | 0.4 (0.19) | 0.59 (0.01) | 0.6 (0.01) | 0.59 (0) | 0.6 (0.01) |
| 62bit | 1.79 (0.02) | 1.86 (0.02) | 1.84 (0) | 2.77 (1.36) | 2.27 (0) | 2.32 (0.02) | 2.31 (0.01) | 3.71 (0.13) |
| 124bit | 3.53 (0.03) | 3.65 (0.04) | 3.58 (0.03) | 4.87 (2.4) | 4.47 (0.03) | 4.53 (0) | 4.51 (0.04) | 6.25 (0.31) |
| 248bit | 6.94 (0.03) | 7.07 (0.04) | 7.08 (0.04) | 8.75 (4.31) | 8.72 (0.06) | 8.8 (0.04) | 8.82 (0.03) | 11.06 (0.86) |


| HT machine | DP machine | |||||
|---|---|---|---|---|---|---|
| Test | Original | final | speedup | Original | final (DP variant) |
speedup |
| RSA 1024 Encryption | 0.12(0) | 0.12(0) | 0% | 0.15(0) | 0.15(0) | 0% |
| RSA 1024 Decryption | 3.49(0.03) | 2.79(0.01) | 20% | 5.68(1.16) | 3.45(0.05) | 39% |
| Rabin 1024 Decryption | 4.16(0.19) | 4.1(0.22) | 1% | 5.17(0.27) | 5.14(0.3) | 1% |
| DLIES 1024 Encryption | 2.82(0.12) | 2.88(0.1) | -2% | 3.55(0.22) | 3.26(0.12) | 8% |
| DLIES 1024 Encryption with precomputation | 3.01(0.14) | 2.99(0.07) | 0% | 3.78(0.17) | 3.74(0.07) | 1% |
| DLIES 1024 Decryption | 9.1(0.13) | 9.1(0.13) | 0% | 11.42(0.21) | 10.66(0.18) | 7% |
| RSA 2048 Encryption | 0.29(0.01) | 0.3(0.01) | -1% | 0.37(0.02) | 0.37(0) | 0% |
| RSA 2048 Decryption | 18.28(0.12) | 14.84(0.09) | 19% | 22.84(0.14) | 17.53(1.88) | 23% |
| Rabin 2048 Decryption | 20.6(1.25) | 20.34(1.24) | 1% | 25.83(1.4) | 24.09(2.87) | 7% |
| DLIES 2048 Encryption | 12.45(0.21) | 11.54(0.27) | 7% | 15.62(0.44) | 13.03(0.17) | 17% |
| DLIES 2048 Encryption with precomputation | 11.79(0.27) | 11.69(0.36) | 1% | 14.75(0.44) | 14.57(0.17) | 1% |
| DLIES 2048 Decryption | 54.83(6.16) | 51.22(0.53) | 7% | 71.57(14.11) | 61.66(10.23) | 14% |
| RSA 1024 Signature | 3.61(0.06) | 2.79(0.03) | 23% | 5.53(1.49) | 3.44(0.08) | 38% |
| RSA 1024 Verification | 0.12(0) | 0.12(0) | 0% | 0.15(0) | 0.15(0) | 0% |
| Rabin 1024 Signature | 4.1(0.06) | 4.05(0.06) | 1% | 5.09(0.05) | 5.07(0.06) | 0% |
| Rabin 1024 Verification | 1.11(0.14) | 1.11(0.14) | 0% | 1.42(0.17) | 1.34(0.11) | 6% |
| RW 1024 Signature | 4.63(0.13) | 3.43(0.05) | 26% | 4.32(0.08) | 4.32(0.03) | 0% |
| RW 1024 Verification | 0.06(0) | 0.06(0) | 0% | 0.08(0.01) | 0.07(0.01) | 4% |
| NR 1024 Signature | 1.44(0.01) | 1.46(0.03) | -2% | 1.81(0.09) | 1.65(0.03) | 9% |
| NR 1024 Signature with precomputation | 0.73(0.03) | 0.71(0.03) | 2% | 0.93(0.06) | 0.89(0.03) | 4% |
| NR 1024 Verification | 1.64(0.07) | 1.68(0.16) | -2% | 2.06(0.08) | 2.08(0.08) | -1% |
| NR 1024 Verification with precomputation | 1.17(0.08) | 1.13(0.12) | 4% | 1.47(0.17) | 1.41(0.17) | 4% |
| DSA 1024 Signature | 1.41(0.04) | 1.44(0.01) | -2% | 1.76(0.04) | 1.65(0.02) | 6% |
| DSA 1024 Signature with precomputation | 0.7(0.02) | 0.71(0.02) | 0% | 0.88(0.01) | 0.88(0) | 0% |
| DSA 1024 Verification | 1.63(0.11) | 1.64(0.12) | -1% | 2.03(0.07) | 1.97(0.09) | 3% |
| DSA 1024 Verification with precomputation | 1.17(0.12) | 1.12(0.13) | 4% | 1.47(0.19) | 1.37(0.14) | 7% |
| ESIGN 1023 Signature | 0.33(0.01) | 0.33(0) | 0% | 0.41(0.01) | 0.41(0.01) | 0% |
| ESIGN 1023 Verification | 0.11(0) | 0.11(0.01) | -1% | 0.14(0) | 0.14(0.01) | -1% |
| ESIGN 1536 Signature | 0.65(0.02) | 0.65(0.02) | 0% | 0.82(0.04) | 0.81(0.02) | 0% |
| ESIGN 1536 Verification | 0.27(0.01) | 0.26(0) | 2% | 0.33(0) | 0.33(0.01) | 0% |
| RSA 2048 Signature | 18.21(0.33) | 14.79(0.09) | 19% | 22.73(0.21) | 17.25(1.14) | 24% |
| RSA 2048 Verification | 0.3(0.03) | 0.29(0) | 2% | 0.36(0.02) | 0.36(0.01) | 0% |
| Rabin 2048 Signature | 20.74(0.6) | 20.47(0.17) | 1% | 25.86(0.32) | 24.33(2.42) | 6% |
| Rabin 2048 Verification | 2.89(0.16) | 2.83(0.15) | 2% | 3.59(0.33) | 3.58(0.29) | 0% |
| RW 2048 Signature | 18.43(0.21) | 18.21(0.12) | 1% | 23(0.21) | 21.34(1.52) | 7% |
| RW 2048 Verification | 0.13(0.01) | 0.13(0.01) | 1% | 0.17(0.01) | 0.17(0.02) | 1% |
| NR 2048 Signature | 6.3(0.16) | 5.85(0.1) | 7% | 7.86(0.09) | 6.63(0.08) | 16% |
| NR 2048 Signature with precomputation | 2.1(0.1) | 2.06(0.03) | 2% | 2.64(0.1) | 2.61(0.08) | 1% |
| NR 2048 Verification | 7.18(0.56) | 7.14(0.19) | 1% | 8.94(0.29) | 8.98(0.33) | 0% |
| NR 2048 Verification with precomputation | 3.4(0.22) | 3.27(0.23) | 4% | 4.27(0.28) | 4.19(0.32) | 2% |
| ESIGN 2046 Signature | 0.77(0.02) | 0.77(0.02) | 0% | 0.96(0.02) | 0.97(0.02) | 0% |
| ESIGN 2046 Verification | 0.28(0) | 0.28(0.01) | 0% | 0.35(0.02) | 0.35(0) | 1% |
| DH 1024 Key-Pair Generation | 1.42(0) | 1.44(0.01) | -1% | 1.78(0.02) | 1.65(0.18) | 7% |
| DH 1024 Key-Pair Generation with precomputation | 1.45(0.01) | 1.51(0.01) | -4% | 1.83(0.02) | 1.91(0.12) | -4% |
| DH 1024 Key Agreement | 2.53(0.11) | 2.53(0.09) | 0% | 3.19(0.1) | 3.01(0.04) | 6% |
| DH 2048 Key-Pair Generation | 6.28(0.21) | 5.89(0.67) | 6% | 7.8(0.06) | 6.49(0.09) | 17% |
| DH 2048 Key-Pair Generation with precomputation | 5.96(0.23) | 5.95(0.67) | 0% | 7.34(0.06) | 7.32(0.07) | 0% |
| DH 2048 Key Agreement | 9.06(0.3) | 8.54(0.12) | 6% | 11.3(0.22) | 10.09(0.89) | 11% |
| MQV 1024 Key-Pair Generation | 1.4(0.02) | 1.41(0.01) | -1% | 1.75(0.03) | 1.6(0.14) | 8% |
| MQV 1024 Key-Pair Generation with precomputation | 0.7(0.03) | 0.69(0.02) | 1% | 0.86(0.06) | 0.87(0.02) | -1% |
| MQV 1024 Key Agreement | 2.63(0.04) | 2.62(0.09) | 0% | 3.31(0.04) | 2.91(0.05) | 12% |
| MQV 2048 Key-Pair Generation | 6.21(0.22) | 5.75(0.13) | 7% | 7.75(0.21) | 6.53(0.22) | 16% |
| MQV 2048 Key-Pair Generation with precomputation | 2.04(0.12) | 2.02(0.07) | 1% | 2.54(0.1) | 2.52(0.04) | 1% |
| MQV 2048 Key Agreement | 11.3(0.12) | 10.46(2.18) | 7% | 14.23(0.41) | 10.82(0.11) | 24% |
//current program - using worst-case scenario: return (expLen <= 6 ? 1 : (expLen <= 24 ? 2 : (expLen <= 96 ? 3 : (expLen <= 320 ? 4 : (expLen <= 960 ? 5 : (expLen <= 2688 ? 6 : 7)))))); //another possibility could be using average-case scenario: return (expLen <= 17 ? 1 : (expLen <= 47 ? 2 : (expLen <= 159 ? 3 : (expLen <= 479 ? 4 : (expLen <= 1343 ? 5 : (expLen <= 3583 ? 6 : 7))))));back to top