00001
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifndef MBEDTLS_BN_MUL_H
00039 #define MBEDTLS_BN_MUL_H
00040
00041 #include "bignum.h"
00042
00043 #if defined(MBEDTLS_HAVE_ASM)
00044
00045 #ifndef asm
00046 #define asm __asm
00047 #endif
00048
00049
00050 #if defined(__GNUC__) && \
00051 ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )
00052 #if defined(__i386__)
00053
00054 #define MULADDC_INIT \
00055 asm( \
00056 "movl %%ebx, %0 \n\t" \
00057 "movl %5, %%esi \n\t" \
00058 "movl %6, %%edi \n\t" \
00059 "movl %7, %%ecx \n\t" \
00060 "movl %8, %%ebx \n\t"
00061
00062 #define MULADDC_CORE \
00063 "lodsl \n\t" \
00064 "mull %%ebx \n\t" \
00065 "addl %%ecx, %%eax \n\t" \
00066 "adcl $0, %%edx \n\t" \
00067 "addl (%%edi), %%eax \n\t" \
00068 "adcl $0, %%edx \n\t" \
00069 "movl %%edx, %%ecx \n\t" \
00070 "stosl \n\t"
00071
00072 #if defined(MBEDTLS_HAVE_SSE2)
00073
00074 #define MULADDC_HUIT \
00075 "movd %%ecx, %%mm1 \n\t" \
00076 "movd %%ebx, %%mm0 \n\t" \
00077 "movd (%%edi), %%mm3 \n\t" \
00078 "paddq %%mm3, %%mm1 \n\t" \
00079 "movd (%%esi), %%mm2 \n\t" \
00080 "pmuludq %%mm0, %%mm2 \n\t" \
00081 "movd 4(%%esi), %%mm4 \n\t" \
00082 "pmuludq %%mm0, %%mm4 \n\t" \
00083 "movd 8(%%esi), %%mm6 \n\t" \
00084 "pmuludq %%mm0, %%mm6 \n\t" \
00085 "movd 12(%%esi), %%mm7 \n\t" \
00086 "pmuludq %%mm0, %%mm7 \n\t" \
00087 "paddq %%mm2, %%mm1 \n\t" \
00088 "movd 4(%%edi), %%mm3 \n\t" \
00089 "paddq %%mm4, %%mm3 \n\t" \
00090 "movd 8(%%edi), %%mm5 \n\t" \
00091 "paddq %%mm6, %%mm5 \n\t" \
00092 "movd 12(%%edi), %%mm4 \n\t" \
00093 "paddq %%mm4, %%mm7 \n\t" \
00094 "movd %%mm1, (%%edi) \n\t" \
00095 "movd 16(%%esi), %%mm2 \n\t" \
00096 "pmuludq %%mm0, %%mm2 \n\t" \
00097 "psrlq $32, %%mm1 \n\t" \
00098 "movd 20(%%esi), %%mm4 \n\t" \
00099 "pmuludq %%mm0, %%mm4 \n\t" \
00100 "paddq %%mm3, %%mm1 \n\t" \
00101 "movd 24(%%esi), %%mm6 \n\t" \
00102 "pmuludq %%mm0, %%mm6 \n\t" \
00103 "movd %%mm1, 4(%%edi) \n\t" \
00104 "psrlq $32, %%mm1 \n\t" \
00105 "movd 28(%%esi), %%mm3 \n\t" \
00106 "pmuludq %%mm0, %%mm3 \n\t" \
00107 "paddq %%mm5, %%mm1 \n\t" \
00108 "movd 16(%%edi), %%mm5 \n\t" \
00109 "paddq %%mm5, %%mm2 \n\t" \
00110 "movd %%mm1, 8(%%edi) \n\t" \
00111 "psrlq $32, %%mm1 \n\t" \
00112 "paddq %%mm7, %%mm1 \n\t" \
00113 "movd 20(%%edi), %%mm5 \n\t" \
00114 "paddq %%mm5, %%mm4 \n\t" \
00115 "movd %%mm1, 12(%%edi) \n\t" \
00116 "psrlq $32, %%mm1 \n\t" \
00117 "paddq %%mm2, %%mm1 \n\t" \
00118 "movd 24(%%edi), %%mm5 \n\t" \
00119 "paddq %%mm5, %%mm6 \n\t" \
00120 "movd %%mm1, 16(%%edi) \n\t" \
00121 "psrlq $32, %%mm1 \n\t" \
00122 "paddq %%mm4, %%mm1 \n\t" \
00123 "movd 28(%%edi), %%mm5 \n\t" \
00124 "paddq %%mm5, %%mm3 \n\t" \
00125 "movd %%mm1, 20(%%edi) \n\t" \
00126 "psrlq $32, %%mm1 \n\t" \
00127 "paddq %%mm6, %%mm1 \n\t" \
00128 "movd %%mm1, 24(%%edi) \n\t" \
00129 "psrlq $32, %%mm1 \n\t" \
00130 "paddq %%mm3, %%mm1 \n\t" \
00131 "movd %%mm1, 28(%%edi) \n\t" \
00132 "addl $32, %%edi \n\t" \
00133 "addl $32, %%esi \n\t" \
00134 "psrlq $32, %%mm1 \n\t" \
00135 "movd %%mm1, %%ecx \n\t"
00136
00137 #define MULADDC_STOP \
00138 "emms \n\t" \
00139 "movl %4, %%ebx \n\t" \
00140 "movl %%ecx, %1 \n\t" \
00141 "movl %%edi, %2 \n\t" \
00142 "movl %%esi, %3 \n\t" \
00143 : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \
00144 : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \
00145 : "eax", "ecx", "edx", "esi", "edi" \
00146 );
00147
00148 #else
00149
00150 #define MULADDC_STOP \
00151 "movl %4, %%ebx \n\t" \
00152 "movl %%ecx, %1 \n\t" \
00153 "movl %%edi, %2 \n\t" \
00154 "movl %%esi, %3 \n\t" \
00155 : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \
00156 : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \
00157 : "eax", "ecx", "edx", "esi", "edi" \
00158 );
00159 #endif
00160 #endif
00161
00162 #if defined(__amd64__) || defined (__x86_64__)
00163
00164 #define MULADDC_INIT \
00165 asm( \
00166 "xorq %%r8, %%r8 \n\t"
00167
00168 #define MULADDC_CORE \
00169 "movq (%%rsi), %%rax \n\t" \
00170 "mulq %%rbx \n\t" \
00171 "addq $8, %%rsi \n\t" \
00172 "addq %%rcx, %%rax \n\t" \
00173 "movq %%r8, %%rcx \n\t" \
00174 "adcq $0, %%rdx \n\t" \
00175 "nop \n\t" \
00176 "addq %%rax, (%%rdi) \n\t" \
00177 "adcq %%rdx, %%rcx \n\t" \
00178 "addq $8, %%rdi \n\t"
00179
00180 #define MULADDC_STOP \
00181 : "+c" (c), "+D" (d), "+S" (s) \
00182 : "b" (b) \
00183 : "rax", "rdx", "r8" \
00184 );
00185
00186 #endif
00187
00188 #if defined(__mc68020__) || defined(__mcpu32__)
00189
00190 #define MULADDC_INIT \
00191 asm( \
00192 "movl %3, %%a2 \n\t" \
00193 "movl %4, %%a3 \n\t" \
00194 "movl %5, %%d3 \n\t" \
00195 "movl %6, %%d2 \n\t" \
00196 "moveq #0, %%d0 \n\t"
00197
00198 #define MULADDC_CORE \
00199 "movel %%a2@+, %%d1 \n\t" \
00200 "mulul %%d2, %%d4:%%d1 \n\t" \
00201 "addl %%d3, %%d1 \n\t" \
00202 "addxl %%d0, %%d4 \n\t" \
00203 "moveq #0, %%d3 \n\t" \
00204 "addl %%d1, %%a3@+ \n\t" \
00205 "addxl %%d4, %%d3 \n\t"
00206
00207 #define MULADDC_STOP \
00208 "movl %%d3, %0 \n\t" \
00209 "movl %%a3, %1 \n\t" \
00210 "movl %%a2, %2 \n\t" \
00211 : "=m" (c), "=m" (d), "=m" (s) \
00212 : "m" (s), "m" (d), "m" (c), "m" (b) \
00213 : "d0", "d1", "d2", "d3", "d4", "a2", "a3" \
00214 );
00215
00216 #define MULADDC_HUIT \
00217 "movel %%a2@+, %%d1 \n\t" \
00218 "mulul %%d2, %%d4:%%d1 \n\t" \
00219 "addxl %%d3, %%d1 \n\t" \
00220 "addxl %%d0, %%d4 \n\t" \
00221 "addl %%d1, %%a3@+ \n\t" \
00222 "movel %%a2@+, %%d1 \n\t" \
00223 "mulul %%d2, %%d3:%%d1 \n\t" \
00224 "addxl %%d4, %%d1 \n\t" \
00225 "addxl %%d0, %%d3 \n\t" \
00226 "addl %%d1, %%a3@+ \n\t" \
00227 "movel %%a2@+, %%d1 \n\t" \
00228 "mulul %%d2, %%d4:%%d1 \n\t" \
00229 "addxl %%d3, %%d1 \n\t" \
00230 "addxl %%d0, %%d4 \n\t" \
00231 "addl %%d1, %%a3@+ \n\t" \
00232 "movel %%a2@+, %%d1 \n\t" \
00233 "mulul %%d2, %%d3:%%d1 \n\t" \
00234 "addxl %%d4, %%d1 \n\t" \
00235 "addxl %%d0, %%d3 \n\t" \
00236 "addl %%d1, %%a3@+ \n\t" \
00237 "movel %%a2@+, %%d1 \n\t" \
00238 "mulul %%d2, %%d4:%%d1 \n\t" \
00239 "addxl %%d3, %%d1 \n\t" \
00240 "addxl %%d0, %%d4 \n\t" \
00241 "addl %%d1, %%a3@+ \n\t" \
00242 "movel %%a2@+, %%d1 \n\t" \
00243 "mulul %%d2, %%d3:%%d1 \n\t" \
00244 "addxl %%d4, %%d1 \n\t" \
00245 "addxl %%d0, %%d3 \n\t" \
00246 "addl %%d1, %%a3@+ \n\t" \
00247 "movel %%a2@+, %%d1 \n\t" \
00248 "mulul %%d2, %%d4:%%d1 \n\t" \
00249 "addxl %%d3, %%d1 \n\t" \
00250 "addxl %%d0, %%d4 \n\t" \
00251 "addl %%d1, %%a3@+ \n\t" \
00252 "movel %%a2@+, %%d1 \n\t" \
00253 "mulul %%d2, %%d3:%%d1 \n\t" \
00254 "addxl %%d4, %%d1 \n\t" \
00255 "addxl %%d0, %%d3 \n\t" \
00256 "addl %%d1, %%a3@+ \n\t" \
00257 "addxl %%d0, %%d3 \n\t"
00258
00259 #endif
00260
00261 #if defined(__powerpc64__) || defined(__ppc64__)
00262
00263 #if defined(__MACH__) && defined(__APPLE__)
00264
00265 #define MULADDC_INIT \
00266 asm( \
00267 "ld r3, %3 \n\t" \
00268 "ld r4, %4 \n\t" \
00269 "ld r5, %5 \n\t" \
00270 "ld r6, %6 \n\t" \
00271 "addi r3, r3, -8 \n\t" \
00272 "addi r4, r4, -8 \n\t" \
00273 "addic r5, r5, 0 \n\t"
00274
00275 #define MULADDC_CORE \
00276 "ldu r7, 8(r3) \n\t" \
00277 "mulld r8, r7, r6 \n\t" \
00278 "mulhdu r9, r7, r6 \n\t" \
00279 "adde r8, r8, r5 \n\t" \
00280 "ld r7, 8(r4) \n\t" \
00281 "addze r5, r9 \n\t" \
00282 "addc r8, r8, r7 \n\t" \
00283 "stdu r8, 8(r4) \n\t"
00284
00285 #define MULADDC_STOP \
00286 "addze r5, r5 \n\t" \
00287 "addi r4, r4, 8 \n\t" \
00288 "addi r3, r3, 8 \n\t" \
00289 "std r5, %0 \n\t" \
00290 "std r4, %1 \n\t" \
00291 "std r3, %2 \n\t" \
00292 : "=m" (c), "=m" (d), "=m" (s) \
00293 : "m" (s), "m" (d), "m" (c), "m" (b) \
00294 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
00295 );
00296
00297
00298 #else
00299
00300 #define MULADDC_INIT \
00301 asm( \
00302 "ld %%r3, %3 \n\t" \
00303 "ld %%r4, %4 \n\t" \
00304 "ld %%r5, %5 \n\t" \
00305 "ld %%r6, %6 \n\t" \
00306 "addi %%r3, %%r3, -8 \n\t" \
00307 "addi %%r4, %%r4, -8 \n\t" \
00308 "addic %%r5, %%r5, 0 \n\t"
00309
00310 #define MULADDC_CORE \
00311 "ldu %%r7, 8(%%r3) \n\t" \
00312 "mulld %%r8, %%r7, %%r6 \n\t" \
00313 "mulhdu %%r9, %%r7, %%r6 \n\t" \
00314 "adde %%r8, %%r8, %%r5 \n\t" \
00315 "ld %%r7, 8(%%r4) \n\t" \
00316 "addze %%r5, %%r9 \n\t" \
00317 "addc %%r8, %%r8, %%r7 \n\t" \
00318 "stdu %%r8, 8(%%r4) \n\t"
00319
00320 #define MULADDC_STOP \
00321 "addze %%r5, %%r5 \n\t" \
00322 "addi %%r4, %%r4, 8 \n\t" \
00323 "addi %%r3, %%r3, 8 \n\t" \
00324 "std %%r5, %0 \n\t" \
00325 "std %%r4, %1 \n\t" \
00326 "std %%r3, %2 \n\t" \
00327 : "=m" (c), "=m" (d), "=m" (s) \
00328 : "m" (s), "m" (d), "m" (c), "m" (b) \
00329 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
00330 );
00331
00332 #endif
00333
00334 #elif defined(__powerpc__) || defined(__ppc__)
00335
00336 #if defined(__MACH__) && defined(__APPLE__)
00337
00338 #define MULADDC_INIT \
00339 asm( \
00340 "lwz r3, %3 \n\t" \
00341 "lwz r4, %4 \n\t" \
00342 "lwz r5, %5 \n\t" \
00343 "lwz r6, %6 \n\t" \
00344 "addi r3, r3, -4 \n\t" \
00345 "addi r4, r4, -4 \n\t" \
00346 "addic r5, r5, 0 \n\t"
00347
00348 #define MULADDC_CORE \
00349 "lwzu r7, 4(r3) \n\t" \
00350 "mullw r8, r7, r6 \n\t" \
00351 "mulhwu r9, r7, r6 \n\t" \
00352 "adde r8, r8, r5 \n\t" \
00353 "lwz r7, 4(r4) \n\t" \
00354 "addze r5, r9 \n\t" \
00355 "addc r8, r8, r7 \n\t" \
00356 "stwu r8, 4(r4) \n\t"
00357
00358 #define MULADDC_STOP \
00359 "addze r5, r5 \n\t" \
00360 "addi r4, r4, 4 \n\t" \
00361 "addi r3, r3, 4 \n\t" \
00362 "stw r5, %0 \n\t" \
00363 "stw r4, %1 \n\t" \
00364 "stw r3, %2 \n\t" \
00365 : "=m" (c), "=m" (d), "=m" (s) \
00366 : "m" (s), "m" (d), "m" (c), "m" (b) \
00367 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
00368 );
00369
00370 #else
00371
00372 #define MULADDC_INIT \
00373 asm( \
00374 "lwz %%r3, %3 \n\t" \
00375 "lwz %%r4, %4 \n\t" \
00376 "lwz %%r5, %5 \n\t" \
00377 "lwz %%r6, %6 \n\t" \
00378 "addi %%r3, %%r3, -4 \n\t" \
00379 "addi %%r4, %%r4, -4 \n\t" \
00380 "addic %%r5, %%r5, 0 \n\t"
00381
00382 #define MULADDC_CORE \
00383 "lwzu %%r7, 4(%%r3) \n\t" \
00384 "mullw %%r8, %%r7, %%r6 \n\t" \
00385 "mulhwu %%r9, %%r7, %%r6 \n\t" \
00386 "adde %%r8, %%r8, %%r5 \n\t" \
00387 "lwz %%r7, 4(%%r4) \n\t" \
00388 "addze %%r5, %%r9 \n\t" \
00389 "addc %%r8, %%r8, %%r7 \n\t" \
00390 "stwu %%r8, 4(%%r4) \n\t"
00391
00392 #define MULADDC_STOP \
00393 "addze %%r5, %%r5 \n\t" \
00394 "addi %%r4, %%r4, 4 \n\t" \
00395 "addi %%r3, %%r3, 4 \n\t" \
00396 "stw %%r5, %0 \n\t" \
00397 "stw %%r4, %1 \n\t" \
00398 "stw %%r3, %2 \n\t" \
00399 : "=m" (c), "=m" (d), "=m" (s) \
00400 : "m" (s), "m" (d), "m" (c), "m" (b) \
00401 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
00402 );
00403
00404 #endif
00405
00406 #endif
00407
00408
00409
00410
00411
00412 #if 0 && defined(__sparc__)
00413 #if defined(__sparc64__)
00414
00415 #define MULADDC_INIT \
00416 asm( \
00417 "ldx %3, %%o0 \n\t" \
00418 "ldx %4, %%o1 \n\t" \
00419 "ld %5, %%o2 \n\t" \
00420 "ld %6, %%o3 \n\t"
00421
00422 #define MULADDC_CORE \
00423 "ld [%%o0], %%o4 \n\t" \
00424 "inc 4, %%o0 \n\t" \
00425 "ld [%%o1], %%o5 \n\t" \
00426 "umul %%o3, %%o4, %%o4 \n\t" \
00427 "addcc %%o4, %%o2, %%o4 \n\t" \
00428 "rd %%y, %%g1 \n\t" \
00429 "addx %%g1, 0, %%g1 \n\t" \
00430 "addcc %%o4, %%o5, %%o4 \n\t" \
00431 "st %%o4, [%%o1] \n\t" \
00432 "addx %%g1, 0, %%o2 \n\t" \
00433 "inc 4, %%o1 \n\t"
00434
00435 #define MULADDC_STOP \
00436 "st %%o2, %0 \n\t" \
00437 "stx %%o1, %1 \n\t" \
00438 "stx %%o0, %2 \n\t" \
00439 : "=m" (c), "=m" (d), "=m" (s) \
00440 : "m" (s), "m" (d), "m" (c), "m" (b) \
00441 : "g1", "o0", "o1", "o2", "o3", "o4", \
00442 "o5" \
00443 );
00444
00445 #else
00446
00447 #define MULADDC_INIT \
00448 asm( \
00449 "ld %3, %%o0 \n\t" \
00450 "ld %4, %%o1 \n\t" \
00451 "ld %5, %%o2 \n\t" \
00452 "ld %6, %%o3 \n\t"
00453
00454 #define MULADDC_CORE \
00455 "ld [%%o0], %%o4 \n\t" \
00456 "inc 4, %%o0 \n\t" \
00457 "ld [%%o1], %%o5 \n\t" \
00458 "umul %%o3, %%o4, %%o4 \n\t" \
00459 "addcc %%o4, %%o2, %%o4 \n\t" \
00460 "rd %%y, %%g1 \n\t" \
00461 "addx %%g1, 0, %%g1 \n\t" \
00462 "addcc %%o4, %%o5, %%o4 \n\t" \
00463 "st %%o4, [%%o1] \n\t" \
00464 "addx %%g1, 0, %%o2 \n\t" \
00465 "inc 4, %%o1 \n\t"
00466
00467 #define MULADDC_STOP \
00468 "st %%o2, %0 \n\t" \
00469 "st %%o1, %1 \n\t" \
00470 "st %%o0, %2 \n\t" \
00471 : "=m" (c), "=m" (d), "=m" (s) \
00472 : "m" (s), "m" (d), "m" (c), "m" (b) \
00473 : "g1", "o0", "o1", "o2", "o3", "o4", \
00474 "o5" \
00475 );
00476
00477 #endif
00478 #endif
00479
00480 #if defined(__microblaze__) || defined(microblaze)
00481
00482 #define MULADDC_INIT \
00483 asm( \
00484 "lwi r3, %3 \n\t" \
00485 "lwi r4, %4 \n\t" \
00486 "lwi r5, %5 \n\t" \
00487 "lwi r6, %6 \n\t" \
00488 "andi r7, r6, 0xffff \n\t" \
00489 "bsrli r6, r6, 16 \n\t"
00490
00491 #define MULADDC_CORE \
00492 "lhui r8, r3, 0 \n\t" \
00493 "addi r3, r3, 2 \n\t" \
00494 "lhui r9, r3, 0 \n\t" \
00495 "addi r3, r3, 2 \n\t" \
00496 "mul r10, r9, r6 \n\t" \
00497 "mul r11, r8, r7 \n\t" \
00498 "mul r12, r9, r7 \n\t" \
00499 "mul r13, r8, r6 \n\t" \
00500 "bsrli r8, r10, 16 \n\t" \
00501 "bsrli r9, r11, 16 \n\t" \
00502 "add r13, r13, r8 \n\t" \
00503 "add r13, r13, r9 \n\t" \
00504 "bslli r10, r10, 16 \n\t" \
00505 "bslli r11, r11, 16 \n\t" \
00506 "add r12, r12, r10 \n\t" \
00507 "addc r13, r13, r0 \n\t" \
00508 "add r12, r12, r11 \n\t" \
00509 "addc r13, r13, r0 \n\t" \
00510 "lwi r10, r4, 0 \n\t" \
00511 "add r12, r12, r10 \n\t" \
00512 "addc r13, r13, r0 \n\t" \
00513 "add r12, r12, r5 \n\t" \
00514 "addc r5, r13, r0 \n\t" \
00515 "swi r12, r4, 0 \n\t" \
00516 "addi r4, r4, 4 \n\t"
00517
00518 #define MULADDC_STOP \
00519 "swi r5, %0 \n\t" \
00520 "swi r4, %1 \n\t" \
00521 "swi r3, %2 \n\t" \
00522 : "=m" (c), "=m" (d), "=m" (s) \
00523 : "m" (s), "m" (d), "m" (c), "m" (b) \
00524 : "r3", "r4" "r5", "r6", "r7", "r8", \
00525 "r9", "r10", "r11", "r12", "r13" \
00526 );
00527
00528 #endif
00529
00530 #if defined(__tricore__)
00531
00532 #define MULADDC_INIT \
00533 asm( \
00534 "ld.a %%a2, %3 \n\t" \
00535 "ld.a %%a3, %4 \n\t" \
00536 "ld.w %%d4, %5 \n\t" \
00537 "ld.w %%d1, %6 \n\t" \
00538 "xor %%d5, %%d5 \n\t"
00539
00540 #define MULADDC_CORE \
00541 "ld.w %%d0, [%%a2+] \n\t" \
00542 "madd.u %%e2, %%e4, %%d0, %%d1 \n\t" \
00543 "ld.w %%d0, [%%a3] \n\t" \
00544 "addx %%d2, %%d2, %%d0 \n\t" \
00545 "addc %%d3, %%d3, 0 \n\t" \
00546 "mov %%d4, %%d3 \n\t" \
00547 "st.w [%%a3+], %%d2 \n\t"
00548
00549 #define MULADDC_STOP \
00550 "st.w %0, %%d4 \n\t" \
00551 "st.a %1, %%a3 \n\t" \
00552 "st.a %2, %%a2 \n\t" \
00553 : "=m" (c), "=m" (d), "=m" (s) \
00554 : "m" (s), "m" (d), "m" (c), "m" (b) \
00555 : "d0", "d1", "e2", "d4", "a2", "a3" \
00556 );
00557
00558 #endif
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572 #if defined(__GNUC__) && !defined(__OPTIMIZE__)
00573 #define MULADDC_CANNOT_USE_R7
00574 #endif
00575
00576 #if defined(__arm__) && !defined(MULADDC_CANNOT_USE_R7)
00577
00578 #if defined(__thumb__) && !defined(__thumb2__)
00579
00580 #define MULADDC_INIT \
00581 asm( \
00582 "ldr r0, %3 \n\t" \
00583 "ldr r1, %4 \n\t" \
00584 "ldr r2, %5 \n\t" \
00585 "ldr r3, %6 \n\t" \
00586 "lsr r7, r3, #16 \n\t" \
00587 "mov r9, r7 \n\t" \
00588 "lsl r7, r3, #16 \n\t" \
00589 "lsr r7, r7, #16 \n\t" \
00590 "mov r8, r7 \n\t"
00591
00592 #define MULADDC_CORE \
00593 "ldmia r0!, {r6} \n\t" \
00594 "lsr r7, r6, #16 \n\t" \
00595 "lsl r6, r6, #16 \n\t" \
00596 "lsr r6, r6, #16 \n\t" \
00597 "mov r4, r8 \n\t" \
00598 "mul r4, r6 \n\t" \
00599 "mov r3, r9 \n\t" \
00600 "mul r6, r3 \n\t" \
00601 "mov r5, r9 \n\t" \
00602 "mul r5, r7 \n\t" \
00603 "mov r3, r8 \n\t" \
00604 "mul r7, r3 \n\t" \
00605 "lsr r3, r6, #16 \n\t" \
00606 "add r5, r5, r3 \n\t" \
00607 "lsr r3, r7, #16 \n\t" \
00608 "add r5, r5, r3 \n\t" \
00609 "add r4, r4, r2 \n\t" \
00610 "mov r2, #0 \n\t" \
00611 "adc r5, r2 \n\t" \
00612 "lsl r3, r6, #16 \n\t" \
00613 "add r4, r4, r3 \n\t" \
00614 "adc r5, r2 \n\t" \
00615 "lsl r3, r7, #16 \n\t" \
00616 "add r4, r4, r3 \n\t" \
00617 "adc r5, r2 \n\t" \
00618 "ldr r3, [r1] \n\t" \
00619 "add r4, r4, r3 \n\t" \
00620 "adc r2, r5 \n\t" \
00621 "stmia r1!, {r4} \n\t"
00622
00623 #define MULADDC_STOP \
00624 "str r2, %0 \n\t" \
00625 "str r1, %1 \n\t" \
00626 "str r0, %2 \n\t" \
00627 : "=m" (c), "=m" (d), "=m" (s) \
00628 : "m" (s), "m" (d), "m" (c), "m" (b) \
00629 : "r0", "r1", "r2", "r3", "r4", "r5", \
00630 "r6", "r7", "r8", "r9", "cc" \
00631 );
00632
00633 #else
00634
00635 #define MULADDC_INIT \
00636 asm( \
00637 "ldr r0, %3 \n\t" \
00638 "ldr r1, %4 \n\t" \
00639 "ldr r2, %5 \n\t" \
00640 "ldr r3, %6 \n\t"
00641
00642 #define MULADDC_CORE \
00643 "ldr r4, [r0], #4 \n\t" \
00644 "mov r5, #0 \n\t" \
00645 "ldr r6, [r1] \n\t" \
00646 "umlal r2, r5, r3, r4 \n\t" \
00647 "adds r7, r6, r2 \n\t" \
00648 "adc r2, r5, #0 \n\t" \
00649 "str r7, [r1], #4 \n\t"
00650
00651 #define MULADDC_STOP \
00652 "str r2, %0 \n\t" \
00653 "str r1, %1 \n\t" \
00654 "str r0, %2 \n\t" \
00655 : "=m" (c), "=m" (d), "=m" (s) \
00656 : "m" (s), "m" (d), "m" (c), "m" (b) \
00657 : "r0", "r1", "r2", "r3", "r4", "r5", \
00658 "r6", "r7", "cc" \
00659 );
00660
00661 #endif
00662
00663 #endif
00664
00665 #if defined(__alpha__)
00666
00667 #define MULADDC_INIT \
00668 asm( \
00669 "ldq $1, %3 \n\t" \
00670 "ldq $2, %4 \n\t" \
00671 "ldq $3, %5 \n\t" \
00672 "ldq $4, %6 \n\t"
00673
00674 #define MULADDC_CORE \
00675 "ldq $6, 0($1) \n\t" \
00676 "addq $1, 8, $1 \n\t" \
00677 "mulq $6, $4, $7 \n\t" \
00678 "umulh $6, $4, $6 \n\t" \
00679 "addq $7, $3, $7 \n\t" \
00680 "cmpult $7, $3, $3 \n\t" \
00681 "ldq $5, 0($2) \n\t" \
00682 "addq $7, $5, $7 \n\t" \
00683 "cmpult $7, $5, $5 \n\t" \
00684 "stq $7, 0($2) \n\t" \
00685 "addq $2, 8, $2 \n\t" \
00686 "addq $6, $3, $3 \n\t" \
00687 "addq $5, $3, $3 \n\t"
00688
00689 #define MULADDC_STOP \
00690 "stq $3, %0 \n\t" \
00691 "stq $2, %1 \n\t" \
00692 "stq $1, %2 \n\t" \
00693 : "=m" (c), "=m" (d), "=m" (s) \
00694 : "m" (s), "m" (d), "m" (c), "m" (b) \
00695 : "$1", "$2", "$3", "$4", "$5", "$6", "$7" \
00696 );
00697 #endif
00698
00699 #if defined(__mips__) && !defined(__mips64)
00700
00701 #define MULADDC_INIT \
00702 asm( \
00703 "lw $10, %3 \n\t" \
00704 "lw $11, %4 \n\t" \
00705 "lw $12, %5 \n\t" \
00706 "lw $13, %6 \n\t"
00707
00708 #define MULADDC_CORE \
00709 "lw $14, 0($10) \n\t" \
00710 "multu $13, $14 \n\t" \
00711 "addi $10, $10, 4 \n\t" \
00712 "mflo $14 \n\t" \
00713 "mfhi $9 \n\t" \
00714 "addu $14, $12, $14 \n\t" \
00715 "lw $15, 0($11) \n\t" \
00716 "sltu $12, $14, $12 \n\t" \
00717 "addu $15, $14, $15 \n\t" \
00718 "sltu $14, $15, $14 \n\t" \
00719 "addu $12, $12, $9 \n\t" \
00720 "sw $15, 0($11) \n\t" \
00721 "addu $12, $12, $14 \n\t" \
00722 "addi $11, $11, 4 \n\t"
00723
00724 #define MULADDC_STOP \
00725 "sw $12, %0 \n\t" \
00726 "sw $11, %1 \n\t" \
00727 "sw $10, %2 \n\t" \
00728 : "=m" (c), "=m" (d), "=m" (s) \
00729 : "m" (s), "m" (d), "m" (c), "m" (b) \
00730 : "$9", "$10", "$11", "$12", "$13", "$14", "$15" \
00731 );
00732
00733 #endif
00734 #endif
00735
00736 #if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
00737
00738 #define MULADDC_INIT \
00739 __asm mov esi, s \
00740 __asm mov edi, d \
00741 __asm mov ecx, c \
00742 __asm mov ebx, b
00743
00744 #define MULADDC_CORE \
00745 __asm lodsd \
00746 __asm mul ebx \
00747 __asm add eax, ecx \
00748 __asm adc edx, 0 \
00749 __asm add eax, [edi] \
00750 __asm adc edx, 0 \
00751 __asm mov ecx, edx \
00752 __asm stosd
00753
00754 #if defined(MBEDTLS_HAVE_SSE2)
00755
00756 #define EMIT __asm _emit
00757
00758 #define MULADDC_HUIT \
00759 EMIT 0x0F EMIT 0x6E EMIT 0xC9 \
00760 EMIT 0x0F EMIT 0x6E EMIT 0xC3 \
00761 EMIT 0x0F EMIT 0x6E EMIT 0x1F \
00762 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
00763 EMIT 0x0F EMIT 0x6E EMIT 0x16 \
00764 EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \
00765 EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \
00766 EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \
00767 EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \
00768 EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \
00769 EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \
00770 EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \
00771 EMIT 0x0F EMIT 0xD4 EMIT 0xCA \
00772 EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \
00773 EMIT 0x0F EMIT 0xD4 EMIT 0xDC \
00774 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \
00775 EMIT 0x0F EMIT 0xD4 EMIT 0xEE \
00776 EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \
00777 EMIT 0x0F EMIT 0xD4 EMIT 0xFC \
00778 EMIT 0x0F EMIT 0x7E EMIT 0x0F \
00779 EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \
00780 EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \
00781 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00782 EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \
00783 EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \
00784 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
00785 EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \
00786 EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \
00787 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \
00788 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00789 EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \
00790 EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \
00791 EMIT 0x0F EMIT 0xD4 EMIT 0xCD \
00792 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \
00793 EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \
00794 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \
00795 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00796 EMIT 0x0F EMIT 0xD4 EMIT 0xCF \
00797 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \
00798 EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \
00799 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \
00800 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00801 EMIT 0x0F EMIT 0xD4 EMIT 0xCA \
00802 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \
00803 EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \
00804 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \
00805 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00806 EMIT 0x0F EMIT 0xD4 EMIT 0xCC \
00807 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \
00808 EMIT 0x0F EMIT 0xD4 EMIT 0xDD \
00809 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \
00810 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00811 EMIT 0x0F EMIT 0xD4 EMIT 0xCE \
00812 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \
00813 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00814 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
00815 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \
00816 EMIT 0x83 EMIT 0xC7 EMIT 0x20 \
00817 EMIT 0x83 EMIT 0xC6 EMIT 0x20 \
00818 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
00819 EMIT 0x0F EMIT 0x7E EMIT 0xC9
00820
00821 #define MULADDC_STOP \
00822 EMIT 0x0F EMIT 0x77 \
00823 __asm mov c, ecx \
00824 __asm mov d, edi \
00825 __asm mov s, esi \
00826
00827 #else
00828
00829 #define MULADDC_STOP \
00830 __asm mov c, ecx \
00831 __asm mov d, edi \
00832 __asm mov s, esi \
00833
00834 #endif
00835 #endif
00836
00837 #endif
00838
00839 #if !defined(MULADDC_CORE)
00840 #if defined(MBEDTLS_HAVE_UDBL)
00841
00842 #define MULADDC_INIT \
00843 { \
00844 mbedtls_t_udbl r; \
00845 mbedtls_mpi_uint r0, r1;
00846
00847 #define MULADDC_CORE \
00848 r = *(s++) * (mbedtls_t_udbl) b; \
00849 r0 = (mbedtls_mpi_uint) r; \
00850 r1 = (mbedtls_mpi_uint)( r >> biL ); \
00851 r0 += c; r1 += (r0 < c); \
00852 r0 += *d; r1 += (r0 < *d); \
00853 c = r1; *(d++) = r0;
00854
00855 #define MULADDC_STOP \
00856 }
00857
00858 #else
00859 #define MULADDC_INIT \
00860 { \
00861 mbedtls_mpi_uint s0, s1, b0, b1; \
00862 mbedtls_mpi_uint r0, r1, rx, ry; \
00863 b0 = ( b << biH ) >> biH; \
00864 b1 = ( b >> biH );
00865
00866 #define MULADDC_CORE \
00867 s0 = ( *s << biH ) >> biH; \
00868 s1 = ( *s >> biH ); s++; \
00869 rx = s0 * b1; r0 = s0 * b0; \
00870 ry = s1 * b0; r1 = s1 * b1; \
00871 r1 += ( rx >> biH ); \
00872 r1 += ( ry >> biH ); \
00873 rx <<= biH; ry <<= biH; \
00874 r0 += rx; r1 += (r0 < rx); \
00875 r0 += ry; r1 += (r0 < ry); \
00876 r0 += c; r1 += (r0 < c); \
00877 r0 += *d; r1 += (r0 < *d); \
00878 c = r1; *(d++) = r0;
00879
00880 #define MULADDC_STOP \
00881 }
00882
00883 #endif
00884 #endif
00885
00886 #endif