Back to index

lightning-sunbird  0.9+nobinonly
mpmontg.c
Go to the documentation of this file.
00001 /* ***** BEGIN LICENSE BLOCK *****
00002  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00003  *
00004  * The contents of this file are subject to the Mozilla Public License Version
00005  * 1.1 (the "License"); you may not use this file except in compliance with
00006  * the License. You may obtain a copy of the License at
00007  * http://www.mozilla.org/MPL/
00008  *
00009  * Software distributed under the License is distributed on an "AS IS" basis,
00010  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00011  * for the specific language governing rights and limitations under the
00012  * License.
00013  *
00014  * The Original Code is the Netscape security libraries.
00015  *
00016  * The Initial Developer of the Original Code is
00017  * Netscape Communications Corporation.
00018  * Portions created by the Initial Developer are Copyright (C) 2000
00019  * the Initial Developer. All Rights Reserved.
00020  *
00021  * Contributor(s):
00022  *   Sheueling Chang Shantz <sheueling.chang@sun.com>,
00023  *   Stephen Fung <stephen.fung@sun.com>, and
00024  *   Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
00025  *
00026  * Alternatively, the contents of this file may be used under the terms of
00027  * either the GNU General Public License Version 2 or later (the "GPL"), or
00028  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00029  * in which case the provisions of the GPL or the LGPL are applicable instead
00030  * of those above. If you wish to allow use of your version of this file only
00031  * under the terms of either the GPL or the LGPL, and not to allow others to
00032  * use your version of this file under the terms of the MPL, indicate your
00033  * decision by deleting the provisions above and replace them with the notice
00034  * and other provisions required by the GPL or the LGPL. If you do not delete
00035  * the provisions above, a recipient may use your version of this file under
00036  * the terms of any one of the MPL, the GPL or the LGPL.
00037  *
00038  * ***** END LICENSE BLOCK ***** */
00039 /* $Id: mpmontg.c,v 1.17.2.3 2006/08/29 02:46:20 nelson%bolyard.com Exp $ */
00040 
00041 /* This file implements moduluar exponentiation using Montgomery's
00042  * method for modular reduction.  This file implements the method
00043  * described as "Improvement 1" in the paper "A Cryptogrpahic Library for
00044  * the Motorola DSP56000" by Stephen R. Dusse' and Burton S. Kaliski Jr.
00045  * published in "Advances in Cryptology: Proceedings of EUROCRYPT '90"
00046  * "Lecture Notes in Computer Science" volume 473, 1991, pg 230-244,
00047  * published by Springer Verlag.
00048  */
00049 
00050 #define MP_USING_CACHE_SAFE_MOD_EXP 1 
00051 #include <string.h>
00052 #include "mpi-priv.h"
00053 #include "mplogic.h"
00054 #include "mpprime.h"
00055 #ifdef MP_USING_MONT_MULF
00056 #include "montmulf.h"
00057 #endif
00058 #include <stddef.h> /* ptrdiff_t */
00059 
00060 /* if MP_CHAR_STORE_SLOW is defined, we  */
00061 /* need to know endianness of this platform. */
00062 #ifdef MP_CHAR_STORE_SLOW
00063 #if !defined(MP_IS_BIG_ENDIAN) && !defined(MP_IS_LITTLE_ENDIAN)
00064 #error "You must define MP_IS_BIG_ENDIAN or MP_IS_LITTLE_ENDIAN\n" \
00065        "  if you define MP_CHAR_STORE_SLOW."
00066 #endif
00067 #endif
00068 
00069 #define STATIC
00070 
00071 #define MAX_ODD_INTS    32   /* 2 ** (WINDOW_BITS - 1) */
00072 
00073 #if defined(_WIN32_WCE)
00074 #define ABORT  res = MP_UNDEF; goto CLEANUP
00075 #else
00076 #define ABORT abort()
00077 #endif
00078 
00079 /* computes T = REDC(T), 2^b == R */
00080 mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm)
00081 {
00082   mp_err res;
00083   mp_size i;
00084 
00085   i = MP_USED(T) + MP_USED(&mmm->N) + 2;
00086   MP_CHECKOK( s_mp_pad(T, i) );
00087   for (i = 0; i < MP_USED(&mmm->N); ++i ) {
00088     mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime;
00089     /* T += N * m_i * (MP_RADIX ** i); */
00090     MP_CHECKOK( s_mp_mul_d_add_offset(&mmm->N, m_i, T, i) );
00091   }
00092   s_mp_clamp(T);
00093 
00094   /* T /= R */
00095   s_mp_div_2d(T, mmm->b); 
00096 
00097   if ((res = s_mp_cmp(T, &mmm->N)) >= 0) {
00098     /* T = T - N */
00099     MP_CHECKOK( s_mp_sub(T, &mmm->N) );
00100 #ifdef DEBUG
00101     if ((res = mp_cmp(T, &mmm->N)) >= 0) {
00102       res = MP_UNDEF;
00103       goto CLEANUP;
00104     }
00105 #endif
00106   }
00107   res = MP_OKAY;
00108 CLEANUP:
00109   return res;
00110 }
00111 
00112 #if !defined(MP_ASSEMBLY_MUL_MONT) && !defined(MP_MONT_USE_MP_MUL)
00113 mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, 
00114                   mp_mont_modulus *mmm)
00115 {
00116   mp_digit *pb;
00117   mp_digit m_i;
00118   mp_err   res;
00119   mp_size  ib;
00120   mp_size  useda, usedb;
00121 
00122   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
00123 
00124   if (MP_USED(a) < MP_USED(b)) {
00125     const mp_int *xch = b;  /* switch a and b, to do fewer outer loops */
00126     b = a;
00127     a = xch;
00128   }
00129 
00130   MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
00131   ib = MP_USED(a) + MP_MAX(MP_USED(b), MP_USED(&mmm->N)) + 2;
00132   if((res = s_mp_pad(c, ib)) != MP_OKAY)
00133     goto CLEANUP;
00134 
00135   useda = MP_USED(a);
00136   pb = MP_DIGITS(b);
00137   s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c));
00138   s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1));
00139   m_i = MP_DIGIT(c, 0) * mmm->n0prime;
00140   s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0);
00141 
00142   /* Outer loop:  Digits of b */
00143   usedb = MP_USED(b);
00144   for (ib = 1; ib < usedb; ib++) {
00145     mp_digit b_i    = *pb++;
00146 
00147     /* Inner product:  Digits of a */
00148     if (b_i)
00149       s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
00150     m_i = MP_DIGIT(c, ib) * mmm->n0prime;
00151     s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
00152   }
00153   if (usedb < MP_USED(&mmm->N)) {
00154     for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib ) {
00155       m_i = MP_DIGIT(c, ib) * mmm->n0prime;
00156       s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
00157     }
00158   }
00159   s_mp_clamp(c);
00160   s_mp_div_2d(c, mmm->b); 
00161   if (s_mp_cmp(c, &mmm->N) >= 0) {
00162     MP_CHECKOK( s_mp_sub(c, &mmm->N) );
00163   }
00164   res = MP_OKAY;
00165 
00166 CLEANUP:
00167   return res;
00168 }
00169 #endif
00170 
00171 STATIC
00172 mp_err s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
00173 {
00174   mp_err res;
00175 
00176   /* xMont = x * R mod N   where  N is modulus */
00177   MP_CHECKOK( mpl_lsh(x, xMont, mmm->b) );              /* xMont = x << b */
00178   MP_CHECKOK( mp_div(xMont, &mmm->N, 0, xMont) );       /*         mod N */
00179 CLEANUP:
00180   return res;
00181 }
00182 
00183 #ifdef MP_USING_MONT_MULF
00184 
00185 /* the floating point multiply is already cache safe,
00186  * don't turn on cache safe unless we specifically
00187  * force it */
00188 #ifndef MP_FORCE_CACHE_SAFE
00189 #undef MP_USING_CACHE_SAFE_MOD_EXP
00190 #endif
00191 
00192 unsigned int mp_using_mont_mulf = 1;
00193 
00194 /* computes montgomery square of the integer in mResult */
00195 #define SQR \
00196   conv_i32_to_d32_and_d16(dm1, d16Tmp, mResult, nLen); \
00197   mont_mulf_noconv(mResult, dm1, d16Tmp, \
00198                  dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
00199 
00200 /* computes montgomery product of x and the integer in mResult */
00201 #define MUL(x) \
00202   conv_i32_to_d32(dm1, mResult, nLen); \
00203   mont_mulf_noconv(mResult, dm1, oddPowers[x], \
00204                  dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
00205 
00206 /* Do modular exponentiation using floating point multiply code. */
00207 mp_err mp_exptmod_f(const mp_int *   montBase, 
00208                     const mp_int *   exponent, 
00209                   const mp_int *   modulus, 
00210                   mp_int *         result, 
00211                   mp_mont_modulus *mmm, 
00212                   int              nLen, 
00213                   mp_size          bits_in_exponent, 
00214                   mp_size          window_bits,
00215                   mp_size          odd_ints)
00216 {
00217   mp_digit *mResult;
00218   double   *dBuf = 0, *dm1, *dn, *dSqr, *d16Tmp, *dTmp;
00219   double    dn0;
00220   mp_size   i;
00221   mp_err    res;
00222   int       expOff;
00223   int       dSize = 0, oddPowSize, dTmpSize;
00224   mp_int    accum1;
00225   double   *oddPowers[MAX_ODD_INTS];
00226 
00227   /* function for computing n0prime only works if n0 is odd */
00228 
00229   MP_DIGITS(&accum1) = 0;
00230 
00231   for (i = 0; i < MAX_ODD_INTS; ++i)
00232     oddPowers[i] = 0;
00233 
00234   MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );
00235 
00236   mp_set(&accum1, 1);
00237   MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) );
00238   MP_CHECKOK( s_mp_pad(&accum1, nLen) );
00239 
00240   oddPowSize = 2 * nLen + 1;
00241   dTmpSize   = 2 * oddPowSize;
00242   dSize = sizeof(double) * (nLen * 4 + 1 + 
00243                          ((odd_ints + 1) * oddPowSize) + dTmpSize);
00244   dBuf   = (double *)malloc(dSize);
00245   dm1    = dBuf;            /* array of d32 */
00246   dn     = dBuf   + nLen;   /* array of d32 */
00247   dSqr   = dn     + nLen;          /* array of d32 */
00248   d16Tmp = dSqr   + nLen;   /* array of d16 */
00249   dTmp   = d16Tmp + oddPowSize;
00250 
00251   for (i = 0; i < odd_ints; ++i) {
00252       oddPowers[i] = dTmp;
00253       dTmp += oddPowSize;
00254   }
00255   mResult = (mp_digit *)(dTmp + dTmpSize);       /* size is nLen + 1 */
00256 
00257   /* Make dn and dn0 */
00258   conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen);
00259   dn0 = (double)(mmm->n0prime & 0xffff);
00260 
00261   /* Make dSqr */
00262   conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen);
00263   mont_mulf_noconv(mResult, dm1, oddPowers[0], 
00264                  dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
00265   conv_i32_to_d32(dSqr, mResult, nLen);
00266 
00267   for (i = 1; i < odd_ints; ++i) {
00268     mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1], 
00269                    dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
00270     conv_i32_to_d16(oddPowers[i], mResult, nLen);
00271   }
00272 
00273   s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */
00274 
00275   for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
00276     mp_size smallExp;
00277     MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );
00278     smallExp = (mp_size)res;
00279 
00280     if (window_bits == 1) {
00281       if (!smallExp) {
00282        SQR;
00283       } else if (smallExp & 1) {
00284        SQR; MUL(0); 
00285       } else {
00286        ABORT;
00287       }
00288     } else if (window_bits == 4) {
00289       if (!smallExp) {
00290        SQR; SQR; SQR; SQR;
00291       } else if (smallExp & 1) {
00292        SQR; SQR; SQR; SQR; MUL(smallExp/2); 
00293       } else if (smallExp & 2) {
00294        SQR; SQR; SQR; MUL(smallExp/4); SQR; 
00295       } else if (smallExp & 4) {
00296        SQR; SQR; MUL(smallExp/8); SQR; SQR; 
00297       } else if (smallExp & 8) {
00298        SQR; MUL(smallExp/16); SQR; SQR; SQR; 
00299       } else {
00300        ABORT;
00301       }
00302     } else if (window_bits == 5) {
00303       if (!smallExp) {
00304        SQR; SQR; SQR; SQR; SQR; 
00305       } else if (smallExp & 1) {
00306        SQR; SQR; SQR; SQR; SQR; MUL(smallExp/2);
00307       } else if (smallExp & 2) {
00308        SQR; SQR; SQR; SQR; MUL(smallExp/4); SQR;
00309       } else if (smallExp & 4) {
00310        SQR; SQR; SQR; MUL(smallExp/8); SQR; SQR;
00311       } else if (smallExp & 8) {
00312        SQR; SQR; MUL(smallExp/16); SQR; SQR; SQR;
00313       } else if (smallExp & 0x10) {
00314        SQR; MUL(smallExp/32); SQR; SQR; SQR; SQR;
00315       } else {
00316        ABORT;
00317       }
00318     } else if (window_bits == 6) {
00319       if (!smallExp) {
00320        SQR; SQR; SQR; SQR; SQR; SQR;
00321       } else if (smallExp & 1) {
00322        SQR; SQR; SQR; SQR; SQR; SQR; MUL(smallExp/2); 
00323       } else if (smallExp & 2) {
00324        SQR; SQR; SQR; SQR; SQR; MUL(smallExp/4); SQR; 
00325       } else if (smallExp & 4) {
00326        SQR; SQR; SQR; SQR; MUL(smallExp/8); SQR; SQR; 
00327       } else if (smallExp & 8) {
00328        SQR; SQR; SQR; MUL(smallExp/16); SQR; SQR; SQR; 
00329       } else if (smallExp & 0x10) {
00330        SQR; SQR; MUL(smallExp/32); SQR; SQR; SQR; SQR; 
00331       } else if (smallExp & 0x20) {
00332        SQR; MUL(smallExp/64); SQR; SQR; SQR; SQR; SQR; 
00333       } else {
00334        ABORT;
00335       }
00336     } else {
00337       ABORT;
00338     }
00339   }
00340 
00341   s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */
00342 
00343   res = s_mp_redc(&accum1, mmm);
00344   mp_exch(&accum1, result);
00345 
00346 CLEANUP:
00347   mp_clear(&accum1);
00348   if (dBuf) {
00349     if (dSize)
00350       memset(dBuf, 0, dSize);
00351     free(dBuf);
00352   }
00353 
00354   return res;
00355 }
00356 #undef SQR
00357 #undef MUL
00358 #endif
00359 
00360 #define SQR(a,b) \
00361   MP_CHECKOK( mp_sqr(a, b) );\
00362   MP_CHECKOK( s_mp_redc(b, mmm) )
00363 
00364 #if defined(MP_MONT_USE_MP_MUL)
00365 #define MUL(x,a,b) \
00366   MP_CHECKOK( mp_mul(a, oddPowers + (x), b) ); \
00367   MP_CHECKOK( s_mp_redc(b, mmm) ) 
00368 #else
00369 #define MUL(x,a,b) \
00370   MP_CHECKOK( s_mp_mul_mont(a, oddPowers + (x), b, mmm) )
00371 #endif
00372 
00373 #define SWAPPA ptmp = pa1; pa1 = pa2; pa2 = ptmp
00374 
00375 /* Do modular exponentiation using integer multiply code. */
00376 mp_err mp_exptmod_i(const mp_int *   montBase, 
00377                     const mp_int *   exponent, 
00378                   const mp_int *   modulus, 
00379                   mp_int *         result, 
00380                   mp_mont_modulus *mmm, 
00381                   int              nLen, 
00382                   mp_size          bits_in_exponent, 
00383                   mp_size          window_bits,
00384                   mp_size          odd_ints)
00385 {
00386   mp_int *pa1, *pa2, *ptmp;
00387   mp_size i;
00388   mp_err  res;
00389   int     expOff;
00390   mp_int  accum1, accum2, power2, oddPowers[MAX_ODD_INTS];
00391 
00392   /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */
00393   /* oddPowers[i] = base ** (2*i + 1); */
00394 
00395   MP_DIGITS(&accum1) = 0;
00396   MP_DIGITS(&accum2) = 0;
00397   MP_DIGITS(&power2) = 0;
00398   for (i = 0; i < MAX_ODD_INTS; ++i) {
00399     MP_DIGITS(oddPowers + i) = 0;
00400   }
00401 
00402   MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );
00403   MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) );
00404 
00405   MP_CHECKOK( mp_init_copy(&oddPowers[0], montBase) );
00406 
00407   mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2);
00408   MP_CHECKOK( mp_sqr(montBase, &power2) );       /* power2 = montBase ** 2 */
00409   MP_CHECKOK( s_mp_redc(&power2, mmm) );
00410 
00411   for (i = 1; i < odd_ints; ++i) {
00412     mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2);
00413     MP_CHECKOK( mp_mul(oddPowers + (i - 1), &power2, oddPowers + i) );
00414     MP_CHECKOK( s_mp_redc(oddPowers + i, mmm) );
00415   }
00416 
00417   /* set accumulator to montgomery residue of 1 */
00418   mp_set(&accum1, 1);
00419   MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) );
00420   pa1 = &accum1;
00421   pa2 = &accum2;
00422 
00423   for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
00424     mp_size smallExp;
00425     MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );
00426     smallExp = (mp_size)res;
00427 
00428     if (window_bits == 1) {
00429       if (!smallExp) {
00430        SQR(pa1,pa2); SWAPPA;
00431       } else if (smallExp & 1) {
00432        SQR(pa1,pa2); MUL(0,pa2,pa1);
00433       } else {
00434        ABORT;
00435       }
00436     } else if (window_bits == 4) {
00437       if (!smallExp) {
00438        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
00439       } else if (smallExp & 1) {
00440        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
00441        MUL(smallExp/2, pa1,pa2); SWAPPA;
00442       } else if (smallExp & 2) {
00443        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); 
00444        MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;
00445       } else if (smallExp & 4) {
00446        SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/8,pa1,pa2); 
00447        SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
00448       } else if (smallExp & 8) {
00449        SQR(pa1,pa2); MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); 
00450        SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
00451       } else {
00452        ABORT;
00453       }
00454     } else if (window_bits == 5) {
00455       if (!smallExp) {
00456        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
00457        SQR(pa1,pa2); SWAPPA;
00458       } else if (smallExp & 1) {
00459        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
00460        SQR(pa1,pa2); MUL(smallExp/2,pa2,pa1);
00461       } else if (smallExp & 2) {
00462        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
00463        MUL(smallExp/4,pa1,pa2); SQR(pa2,pa1);
00464       } else if (smallExp & 4) {
00465        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); 
00466        MUL(smallExp/8,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
00467       } else if (smallExp & 8) {
00468        SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/16,pa1,pa2); 
00469        SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
00470       } else if (smallExp & 0x10) {
00471        SQR(pa1,pa2); MUL(smallExp/32,pa2,pa1); SQR(pa1,pa2); 
00472        SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
00473       } else {
00474        ABORT;
00475       }
00476     } else if (window_bits == 6) {
00477       if (!smallExp) {
00478        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
00479        SQR(pa1,pa2); SQR(pa2,pa1);
00480       } else if (smallExp & 1) {
00481        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
00482        SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/2,pa1,pa2); SWAPPA;
00483       } else if (smallExp & 2) {
00484        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
00485        SQR(pa1,pa2); MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;
00486       } else if (smallExp & 4) {
00487        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
00488        MUL(smallExp/8,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
00489       } else if (smallExp & 8) {
00490        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); 
00491        MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
00492        SQR(pa1,pa2); SWAPPA;
00493       } else if (smallExp & 0x10) {
00494        SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/32,pa1,pa2); 
00495        SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
00496       } else if (smallExp & 0x20) {
00497        SQR(pa1,pa2); MUL(smallExp/64,pa2,pa1); SQR(pa1,pa2); 
00498        SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
00499       } else {
00500        ABORT;
00501       }
00502     } else {
00503       ABORT;
00504     }
00505   }
00506 
00507   res = s_mp_redc(pa1, mmm);
00508   mp_exch(pa1, result);
00509 
00510 CLEANUP:
00511   mp_clear(&accum1);
00512   mp_clear(&accum2);
00513   mp_clear(&power2);
00514   for (i = 0; i < odd_ints; ++i) {
00515     mp_clear(oddPowers + i);
00516   }
00517   return res;
00518 }
00519 #undef SQR
00520 #undef MUL
00521 
00522 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
00523 unsigned int mp_using_cache_safe_exp = 1;
00524 #endif
00525 
00526 mp_err mp_set_safe_modexp(int value) 
00527 {
00528 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
00529  mp_using_cache_safe_exp = value;
00530  return MP_OKAY;
00531 #else
00532  if (value == 0) {
00533    return MP_OKAY;
00534  }
00535  return MP_BADARG;
00536 #endif
00537 }
00538 
00539 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
00540 #define WEAVE_WORD_SIZE 4
00541 
00542 #ifndef MP_CHAR_STORE_SLOW
00543 /*
00544  * mpi_to_weave takes an array of bignums, a matrix in which each bignum 
00545  * occupies all the columns of a row, and transposes it into a matrix in 
00546  * which each bignum occupies a column of every row.  The first row of the
00547  * input matrix becomes the first column of the output matrix.  The n'th
00548  * row of input becomes the n'th column of output.  The input data is said
00549  * to be "interleaved" or "woven" into the output matrix.
00550  *
00551  * The array of bignums is left in this woven form.  Each time a single
00552  * bignum value is needed, it is recreated by fetching the n'th column, 
00553  * forming a single row which is the new bignum.
00554  *
00555  * The purpose of this interleaving is make it impossible to determine which
00556  * of the bignums is being used in any one operation by examining the pattern
00557  * of cache misses.
00558  *
00559  * The weaving function does not transpose the entire input matrix in one call.
00560  * It transposes 4 rows of mp_ints into their respective columns of output.
00561  *
00562  * There are two different implementations of the weaving and unweaving code
00563  * in this file.  One uses byte loads and stores.  The second uses loads and
00564  * stores of mp_weave_word size values.  The weaved forms of these two 
00565  * implementations differ.  Consequently, each one has its own explanation.
00566  *
00567  * Here is the explanation for the byte-at-a-time implementation.
00568  *
00569  * This implementation treats each mp_int bignum as an array of bytes, 
00570  * rather than as an array of mp_digits.  It stores those bytes as a 
00571  * column of bytes in the output matrix.  It doesn't care if the machine
00572  * uses big-endian or little-endian byte ordering within mp_digits.
00573  * The first byte of the mp_digit array becomes the first byte in the output
00574  * column, regardless of whether that byte is the MSB or LSB of the mp_digit.
00575  *
00576  * "bignums" is an array of mp_ints.
00577  * It points to four rows, four mp_ints, a subset of a larger array of mp_ints.
00578  *
00579  * "weaved" is the weaved output matrix. 
00580  * The first byte of bignums[0] is stored in weaved[0].
00581  * 
00582  * "nBignums" is the total number of bignums in the array of which "bignums" 
00583  * is a part.  
00584  *
00585  * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array. 
00586  * mp_ints that use less than nDigits digits are logically padded with zeros 
00587  * while being stored in the weaved array.
00588  */
00589 mp_err mpi_to_weave(const mp_int  *bignums, 
00590                     unsigned char *weaved, 
00591                   mp_size nDigits,  /* in each mp_int of input */
00592                   mp_size nBignums) /* in the entire source array */
00593 {
00594   mp_size i;
00595   unsigned char * endDest = weaved + (nDigits * nBignums * sizeof(mp_digit));
00596 
00597   for (i=0; i < WEAVE_WORD_SIZE; i++) {
00598     mp_size used = MP_USED(&bignums[i]);
00599     unsigned char *pSrc   = (unsigned char *)MP_DIGITS(&bignums[i]);
00600     unsigned char *endSrc = pSrc + (used * sizeof(mp_digit));
00601     unsigned char *pDest  = weaved + i;
00602 
00603     ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG);
00604     ARGCHK(used <= nDigits, MP_BADARG);
00605 
00606     for (; pSrc < endSrc; pSrc++) {
00607       *pDest = *pSrc;
00608       pDest += nBignums;
00609     }
00610     while (pDest < endDest) {
00611       *pDest = 0;
00612       pDest += nBignums;
00613     }
00614   }
00615 
00616   return MP_OKAY;
00617 }
00618 
00619 /* Reverse the operation above for one mp_int.
00620  * Reconstruct one mp_int from its column in the weaved array.
00621  * "pSrc" points to the offset into the weave array of the bignum we 
00622  * are going to reconstruct.
00623  */
00624 mp_err weave_to_mpi(mp_int *a,                /* output, result */
00625                     const unsigned char *pSrc, /* input, byte matrix */
00626                   mp_size nDigits,          /* per mp_int output */
00627                   mp_size nBignums)         /* bignums in weaved matrix */
00628 {
00629   unsigned char *pDest   = (unsigned char *)MP_DIGITS(a);
00630   unsigned char *endDest = pDest + (nDigits * sizeof(mp_digit));
00631 
00632   MP_SIGN(a) = MP_ZPOS;
00633   MP_USED(a) = nDigits;
00634 
00635   for (; pDest < endDest; pSrc += nBignums, pDest++) {
00636     *pDest = *pSrc;
00637   }
00638   s_mp_clamp(a);
00639   return MP_OKAY;
00640 }
00641 
00642 #else
00643 
00644 /* Need a primitive that we know is 32 bits long... */
00645 /* this is true on all modern processors we know of today*/
00646 typedef unsigned int mp_weave_word;
00647 
00648 /*
00649  * on some platforms character stores into memory is very expensive since they
00650  * generate a read/modify/write operation on the bus. On those platforms
00651  * we need to do integer writes to the bus. Because of some unrolled code,
00652  * in this current code the size of mp_weave_word must be four. The code that
00653  * makes this assumption explicity is called out. (on some platforms a write
00654  * of 4 bytes still requires a single read-modify-write operation.
00655  *
00656  * This function is takes the identical parameters as the function above, 
00657  * however it lays out the final array differently. Where the previous function
00658  * treats the mpi_int as an byte array, this function treats it as an array of
00659  * mp_digits where each digit is stored in big endian order.
00660  * 
00661  * since we need to interleave on a byte by byte basis, we need to collect 
00662  * several mpi structures together into a single uint32 before we write. We
00663  * also need to make sure the uint32 is arranged so that the first value of 
00664  * the first array winds up in b[0]. This means construction of that uint32
00665  * is endian specific (even though the layout of the mp_digits in the array 
00666  * is always big endian).
00667  *
00668  * The final data is stored as follows :
00669  *
00670  * Our same logical array p array, m is sizeof(mp_digit),
00671  * N is still count and n is now b_size. If we define p[i].digit[j]0 as the 
00672  * most significant byte of the word p[i].digit[j], p[i].digit[j]1 as 
00673  * the next most significant byte of p[i].digit[j], ...  and p[i].digit[j]m-1
00674  * is the least significant byte. 
00675  * Our array would look like:
00676  * p[0].digit[0]0     p[1].digit[0]0    ...  p[N-2].digit[0]0    p[N-1].digit[0]0
00677  * p[0].digit[0]1     p[1].digit[0]1    ...  p[N-2].digit[0]1    p[N-1].digit[0]1
00678  *                .                                         .
00679  * p[0].digit[0]m-1   p[1].digit[0]m-1  ...  p[N-2].digit[0]m-1  p[N-1].digit[0]m-1
00680  * p[0].digit[1]0     p[1].digit[1]0    ...  p[N-2].digit[1]0    p[N-1].digit[1]0
00681  *                .                                         .
00682  *                .                                         .
00683  * p[0].digit[n-1]m-2 p[1].digit[n-1]m-2 ... p[N-2].digit[n-1]m-2 p[N-1].digit[n-1]m-2
00684  * p[0].digit[n-1]m-1 p[1].digit[n-1]m-1 ... p[N-2].digit[n-1]m-1 p[N-1].digit[n-1]m-1 
00685  *
00686  */
00687 mp_err mpi_to_weave(const mp_int *a, unsigned char *b, 
00688                                    mp_size b_size, mp_size count)
00689 {
00690   mp_size i;
00691   mp_digit *digitsa0;
00692   mp_digit *digitsa1;
00693   mp_digit *digitsa2;
00694   mp_digit *digitsa3;
00695   mp_size   useda0;
00696   mp_size   useda1;
00697   mp_size   useda2;
00698   mp_size   useda3;
00699   mp_weave_word *weaved = (mp_weave_word *)b;
00700 
00701   count = count/sizeof(mp_weave_word);
00702 
00703   /* this code pretty much depends on this ! */
00704 #if MP_ARGCHK == 2
00705   assert(WEAVE_WORD_SIZE == 4); 
00706   assert(sizeof(mp_weave_word) == 4);
00707 #endif
00708 
00709   digitsa0 = MP_DIGITS(&a[0]);
00710   digitsa1 = MP_DIGITS(&a[1]);
00711   digitsa2 = MP_DIGITS(&a[2]);
00712   digitsa3 = MP_DIGITS(&a[3]);
00713   useda0 = MP_USED(&a[0]);
00714   useda1 = MP_USED(&a[1]);
00715   useda2 = MP_USED(&a[2]);
00716   useda3 = MP_USED(&a[3]);
00717 
00718   ARGCHK(MP_SIGN(&a[0]) == MP_ZPOS, MP_BADARG);
00719   ARGCHK(MP_SIGN(&a[1]) == MP_ZPOS, MP_BADARG);
00720   ARGCHK(MP_SIGN(&a[2]) == MP_ZPOS, MP_BADARG);
00721   ARGCHK(MP_SIGN(&a[3]) == MP_ZPOS, MP_BADARG);
00722   ARGCHK(useda0 <= b_size, MP_BADARG);
00723   ARGCHK(useda1 <= b_size, MP_BADARG);
00724   ARGCHK(useda2 <= b_size, MP_BADARG);
00725   ARGCHK(useda3 <= b_size, MP_BADARG);
00726 
00727 #define SAFE_FETCH(digit, used, word) ((word) < (used) ? (digit[word]) : 0)
00728 
00729   for (i=0; i < b_size; i++) {
00730     mp_digit d0 = SAFE_FETCH(digitsa0,useda0,i);
00731     mp_digit d1 = SAFE_FETCH(digitsa1,useda1,i);
00732     mp_digit d2 = SAFE_FETCH(digitsa2,useda2,i);
00733     mp_digit d3 = SAFE_FETCH(digitsa3,useda3,i);
00734     register mp_weave_word acc;
00735 
00736 /*
00737  * ONE_STEP takes the MSB of each of our current digits and places that
00738  * byte in the appropriate position for writing to the weaved array.
00739  *  On little endian:
00740  *   b3 b2 b1 b0
00741  *  On big endian:
00742  *   b0 b1 b2 b3
00743  *  When the data is written it would always wind up:
00744  *   b[0] = b0
00745  *   b[1] = b1
00746  *   b[2] = b2
00747  *   b[3] = b3
00748  *
00749  * Once we've written the MSB, we shift the whole digit up left one
00750  * byte, putting the Next Most Significant Byte in the MSB position,
00751  * so we we repeat the next one step that byte will be written.
00752  * NOTE: This code assumes sizeof(mp_weave_word) and MP_WEAVE_WORD_SIZE
00753  * is 4.
00754  */
00755 #ifdef MP_IS_LITTLE_ENDIAN 
00756 #define MPI_WEAVE_ONE_STEP \
00757     acc  = (d0 >> (MP_DIGIT_BIT-8))  & 0x000000ff; d0 <<= 8; /*b0*/ \
00758     acc |= (d1 >> (MP_DIGIT_BIT-16)) & 0x0000ff00; d1 <<= 8; /*b1*/ \
00759     acc |= (d2 >> (MP_DIGIT_BIT-24)) & 0x00ff0000; d2 <<= 8; /*b2*/ \
00760     acc |= (d3 >> (MP_DIGIT_BIT-32)) & 0xff000000; d3 <<= 8; /*b3*/ \
00761     *weaved = acc; weaved += count;
00762 #else 
00763 #define MPI_WEAVE_ONE_STEP \
00764     acc  = (d0 >> (MP_DIGIT_BIT-32)) & 0xff000000; d0 <<= 8; /*b0*/ \
00765     acc |= (d1 >> (MP_DIGIT_BIT-24)) & 0x00ff0000; d1 <<= 8; /*b1*/ \
00766     acc |= (d2 >> (MP_DIGIT_BIT-16)) & 0x0000ff00; d2 <<= 8; /*b2*/ \
00767     acc |= (d3 >> (MP_DIGIT_BIT-8))  & 0x000000ff; d3 <<= 8; /*b3*/ \
00768     *weaved = acc; weaved += count;
00769 #endif 
00770    switch (sizeof(mp_digit)) {
00771    case 32:
00772     MPI_WEAVE_ONE_STEP
00773     MPI_WEAVE_ONE_STEP
00774     MPI_WEAVE_ONE_STEP
00775     MPI_WEAVE_ONE_STEP
00776     MPI_WEAVE_ONE_STEP
00777     MPI_WEAVE_ONE_STEP
00778     MPI_WEAVE_ONE_STEP
00779     MPI_WEAVE_ONE_STEP
00780     MPI_WEAVE_ONE_STEP
00781     MPI_WEAVE_ONE_STEP
00782     MPI_WEAVE_ONE_STEP
00783     MPI_WEAVE_ONE_STEP
00784     MPI_WEAVE_ONE_STEP
00785     MPI_WEAVE_ONE_STEP
00786     MPI_WEAVE_ONE_STEP
00787     MPI_WEAVE_ONE_STEP
00788    case 16:
00789     MPI_WEAVE_ONE_STEP
00790     MPI_WEAVE_ONE_STEP
00791     MPI_WEAVE_ONE_STEP
00792     MPI_WEAVE_ONE_STEP
00793     MPI_WEAVE_ONE_STEP
00794     MPI_WEAVE_ONE_STEP
00795     MPI_WEAVE_ONE_STEP
00796     MPI_WEAVE_ONE_STEP
00797    case 8:
00798     MPI_WEAVE_ONE_STEP
00799     MPI_WEAVE_ONE_STEP
00800     MPI_WEAVE_ONE_STEP
00801     MPI_WEAVE_ONE_STEP
00802    case 4:
00803     MPI_WEAVE_ONE_STEP
00804     MPI_WEAVE_ONE_STEP
00805    case 2:
00806     MPI_WEAVE_ONE_STEP
00807    case 1:
00808     MPI_WEAVE_ONE_STEP
00809     break;
00810    }
00811   }
00812 
00813   return MP_OKAY;
00814 }
00815 
00816 /* reverse the operation above for one entry.
00817  * b points to the offset into the weave array of the power we are
00818  * calculating */
00819 mp_err weave_to_mpi(mp_int *a, const unsigned char *b, 
00820                                    mp_size b_size, mp_size count)
00821 {
00822   mp_digit *pb = MP_DIGITS(a);
00823   mp_digit *end = &pb[b_size];
00824 
00825   MP_SIGN(a) = MP_ZPOS;
00826   MP_USED(a) = b_size;
00827 
00828   for (; pb < end; pb++) {
00829     register mp_digit digit;
00830 
00831     digit = *b << 8; b += count;
00832 #define MPI_UNWEAVE_ONE_STEP  digit |= *b; b += count; digit = digit << 8;
00833     switch (sizeof(mp_digit)) {
00834     case 32:
00835        MPI_UNWEAVE_ONE_STEP 
00836        MPI_UNWEAVE_ONE_STEP 
00837        MPI_UNWEAVE_ONE_STEP 
00838        MPI_UNWEAVE_ONE_STEP 
00839        MPI_UNWEAVE_ONE_STEP 
00840        MPI_UNWEAVE_ONE_STEP 
00841        MPI_UNWEAVE_ONE_STEP 
00842        MPI_UNWEAVE_ONE_STEP 
00843        MPI_UNWEAVE_ONE_STEP 
00844        MPI_UNWEAVE_ONE_STEP 
00845        MPI_UNWEAVE_ONE_STEP 
00846        MPI_UNWEAVE_ONE_STEP 
00847        MPI_UNWEAVE_ONE_STEP 
00848        MPI_UNWEAVE_ONE_STEP 
00849        MPI_UNWEAVE_ONE_STEP 
00850        MPI_UNWEAVE_ONE_STEP 
00851     case 16:
00852        MPI_UNWEAVE_ONE_STEP 
00853        MPI_UNWEAVE_ONE_STEP 
00854        MPI_UNWEAVE_ONE_STEP 
00855        MPI_UNWEAVE_ONE_STEP 
00856        MPI_UNWEAVE_ONE_STEP 
00857        MPI_UNWEAVE_ONE_STEP 
00858        MPI_UNWEAVE_ONE_STEP 
00859        MPI_UNWEAVE_ONE_STEP 
00860     case 8:
00861        MPI_UNWEAVE_ONE_STEP 
00862        MPI_UNWEAVE_ONE_STEP 
00863        MPI_UNWEAVE_ONE_STEP 
00864        MPI_UNWEAVE_ONE_STEP 
00865     case 4:
00866        MPI_UNWEAVE_ONE_STEP 
00867        MPI_UNWEAVE_ONE_STEP 
00868     case 2:
00869        break;
00870     }
00871     digit |= *b; b += count; 
00872 
00873     *pb = digit;
00874   }
00875   s_mp_clamp(a);
00876   return MP_OKAY;
00877 }
00878 #endif
00879 
00880 
00881 #define SQR(a,b) \
00882   MP_CHECKOK( mp_sqr(a, b) );\
00883   MP_CHECKOK( s_mp_redc(b, mmm) )
00884 
00885 #if defined(MP_MONT_USE_MP_MUL)
00886 #define MUL_NOWEAVE(x,a,b) \
00887   MP_CHECKOK( mp_mul(a, x, b) ); \
00888   MP_CHECKOK( s_mp_redc(b, mmm) ) 
00889 #else
00890 #define MUL_NOWEAVE(x,a,b) \
00891   MP_CHECKOK( s_mp_mul_mont(a, x, b, mmm) )
00892 #endif
00893 
00894 #define MUL(x,a,b) \
00895   MP_CHECKOK( weave_to_mpi(&tmp, powers + (x), nLen, num_powers) ); \
00896   MUL_NOWEAVE(&tmp,a,b)
00897 
00898 #define SWAPPA ptmp = pa1; pa1 = pa2; pa2 = ptmp
00899 #define MP_ALIGN(x,y) ((((ptrdiff_t)(x))+((y)-1))&(((ptrdiff_t)0)-(y)))
00900 
00901 /* Do modular exponentiation using integer multiply code. */
00902 mp_err mp_exptmod_safe_i(const mp_int *   montBase, 
00903                     const mp_int *   exponent, 
00904                   const mp_int *   modulus, 
00905                   mp_int *         result, 
00906                   mp_mont_modulus *mmm, 
00907                   int              nLen, 
00908                   mp_size          bits_in_exponent, 
00909                   mp_size          window_bits,
00910                   mp_size          num_powers)
00911 {
00912   mp_int *pa1, *pa2, *ptmp;
00913   mp_size i;
00914   mp_size first_window;
00915   mp_err  res;
00916   int     expOff;
00917   mp_int  accum1, accum2, accum[WEAVE_WORD_SIZE];
00918   mp_int  tmp;
00919   unsigned char *powersArray;
00920   unsigned char *powers;
00921 
00922   MP_DIGITS(&accum1) = 0;
00923   MP_DIGITS(&accum2) = 0;
00924   MP_DIGITS(&accum[0]) = 0;
00925   MP_DIGITS(&accum[1]) = 0;
00926   MP_DIGITS(&accum[2]) = 0;
00927   MP_DIGITS(&accum[3]) = 0;
00928   MP_DIGITS(&tmp) = 0;
00929 
00930   powersArray = (unsigned char *)malloc(num_powers*(nLen*sizeof(mp_digit)+1));
00931   if (powersArray == NULL) {
00932     res = MP_MEM;
00933     goto CLEANUP;
00934   }
00935 
00936   /* powers[i] = base ** (i); */
00937   powers = (unsigned char *)MP_ALIGN(powersArray,num_powers);
00938 
00939   /* grab the first window value. This allows us to preload accumulator1
00940    * and save a conversion, some squares and a multiple*/
00941   MP_CHECKOK( mpl_get_bits(exponent, 
00942                             bits_in_exponent-window_bits, window_bits) );
00943   first_window = (mp_size)res;
00944 
00945   MP_CHECKOK( mp_init_size(&accum1, 3 * nLen + 2) );
00946   MP_CHECKOK( mp_init_size(&accum2, 3 * nLen + 2) );
00947   MP_CHECKOK( mp_init_size(&tmp, 3 * nLen + 2) );
00948 
00949   /* build the first WEAVE_WORD powers inline */
00950   /* if WEAVE_WORD_SIZE is not 4, this code will have to change */
00951   if (num_powers > 2) {
00952     MP_CHECKOK( mp_init_size(&accum[0], 3 * nLen + 2) );
00953     MP_CHECKOK( mp_init_size(&accum[1], 3 * nLen + 2) );
00954     MP_CHECKOK( mp_init_size(&accum[2], 3 * nLen + 2) );
00955     MP_CHECKOK( mp_init_size(&accum[3], 3 * nLen + 2) );
00956     mp_set(&accum[0], 1);
00957     MP_CHECKOK( s_mp_to_mont(&accum[0], mmm, &accum[0]) );
00958     MP_CHECKOK( mp_copy(montBase, &accum[1]) );
00959     SQR(montBase, &accum[2]);
00960     MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
00961     MP_CHECKOK( mpi_to_weave(accum, powers, nLen, num_powers) );
00962     if (first_window < 4) {
00963       MP_CHECKOK( mp_copy(&accum[first_window], &accum1) );
00964       first_window = num_powers;
00965     }
00966   } else {
00967       if (first_window == 0) {
00968         mp_set(&accum1, 1);
00969         MP_CHECKOK( s_mp_to_mont(&accum1, mmm, &accum1) );
00970       } else {
00971         /* assert first_window == 1? */
00972         MP_CHECKOK( mp_copy(montBase, &accum1) );
00973       }
00974   }
00975 
00976   /*
00977    * calculate all the powers in the powers array.
00978    * this adds 2**(k-1)-2 square operations over just calculating the
00979    * odd powers where k is the window size in the two other mp_modexpt
00980    * implementations in this file. We will get some of that
00981    * back by not needing the first 'k' squares and one multiply for the 
00982    * first window */ 
00983   for (i = WEAVE_WORD_SIZE; i < num_powers; i++) {
00984     int acc_index = i & (WEAVE_WORD_SIZE-1); /* i % WEAVE_WORD_SIZE */
00985     if ( i & 1 ) {
00986       MUL_NOWEAVE(montBase, &accum[acc_index-1] , &accum[acc_index]);
00987       /* we've filled the array do our 'per array' processing */
00988       if (acc_index == (WEAVE_WORD_SIZE-1)) {
00989         MP_CHECKOK( mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE-1),
00990                                                   nLen, num_powers) );
00991 
00992         if (first_window <= i) {
00993           MP_CHECKOK( mp_copy(&accum[first_window & (WEAVE_WORD_SIZE-1)], 
00994                                                         &accum1) );
00995           first_window = num_powers;
00996         }
00997       }
00998     } else {
00999       /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source
01000        * and target are the same so we need to copy.. After that, the
01001        * value is overwritten, so we need to fetch it from the stored
01002        * weave array */
01003       if (i > 2* WEAVE_WORD_SIZE) {
01004         MP_CHECKOK(weave_to_mpi(&accum2, powers+i/2, nLen, num_powers));
01005         SQR(&accum2, &accum[acc_index]);
01006       } else {
01007        int half_power_index = (i/2) & (WEAVE_WORD_SIZE-1);
01008        if (half_power_index == acc_index) {
01009           /* copy is cheaper than weave_to_mpi */
01010           MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2));
01011           SQR(&accum2,&accum[acc_index]);
01012        } else {
01013           SQR(&accum[half_power_index],&accum[acc_index]);
01014        }
01015       }
01016     }
01017   }
01018   /* if the accum1 isn't set, Then there is something wrong with our logic 
01019    * above and is an internal programming error. 
01020    */
01021 #if MP_ARGCHK == 2
01022   assert(MP_USED(&accum1) != 0);
01023 #endif
01024 
01025   /* set accumulator to montgomery residue of 1 */
01026   pa1 = &accum1;
01027   pa2 = &accum2;
01028 
01029   for (expOff = bits_in_exponent - window_bits*2; expOff >= 0; expOff -= window_bits) {
01030     mp_size smallExp;
01031     MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );
01032     smallExp = (mp_size)res;
01033 
01034     /* handle unroll the loops */
01035     switch (window_bits) {
01036     case 1:
01037        if (!smallExp) {
01038            SQR(pa1,pa2); SWAPPA;
01039        } else if (smallExp & 1) {
01040            SQR(pa1,pa2); MUL_NOWEAVE(montBase,pa2,pa1);
01041        } else {
01042            ABORT;
01043        }
01044        break;
01045     case 6:
01046        SQR(pa1,pa2); SQR(pa2,pa1); 
01047        /* fall through */
01048     case 4:
01049        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
01050        MUL(smallExp, pa1,pa2); SWAPPA;
01051        break;
01052     case 5:
01053        SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); 
01054        SQR(pa1,pa2); MUL(smallExp,pa2,pa1);
01055        break;
01056     default:
01057        ABORT; /* could do a loop? */
01058     }
01059   }
01060 
01061   res = s_mp_redc(pa1, mmm);
01062   mp_exch(pa1, result);
01063 
01064 CLEANUP:
01065   mp_clear(&accum1);
01066   mp_clear(&accum2);
01067   mp_clear(&accum[0]);
01068   mp_clear(&accum[1]);
01069   mp_clear(&accum[2]);
01070   mp_clear(&accum[3]);
01071   mp_clear(&tmp);
01072   /* PORT_Memset(powers,0,num_powers*nLen*sizeof(mp_digit)); */
01073   free(powersArray);
01074   return res;
01075 }
01076 #undef SQR
01077 #undef MUL
01078 #endif
01079 
01080 mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent, 
01081                 const mp_int *modulus, mp_int *result)
01082 {
01083   const mp_int *base;
01084   mp_size bits_in_exponent, i, window_bits, odd_ints;
01085   mp_err  res;
01086   int     nLen;
01087   mp_int  montBase, goodBase;
01088   mp_mont_modulus mmm;
01089 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
01090   static unsigned int max_window_bits;
01091 #endif
01092 
01093   /* function for computing n0prime only works if n0 is odd */
01094   if (!mp_isodd(modulus))
01095     return s_mp_exptmod(inBase, exponent, modulus, result);
01096 
01097   MP_DIGITS(&montBase) = 0;
01098   MP_DIGITS(&goodBase) = 0;
01099 
01100   if (mp_cmp(inBase, modulus) < 0) {
01101     base = inBase;
01102   } else {
01103     MP_CHECKOK( mp_init(&goodBase) );
01104     base = &goodBase;
01105     MP_CHECKOK( mp_mod(inBase, modulus, &goodBase) );
01106   }
01107 
01108   nLen  = MP_USED(modulus);
01109   MP_CHECKOK( mp_init_size(&montBase, 2 * nLen + 2) );
01110 
01111   mmm.N = *modulus;                /* a copy of the mp_int struct */
01112   i = mpl_significant_bits(modulus);
01113   i += MP_DIGIT_BIT - 1;
01114   mmm.b = i - i % MP_DIGIT_BIT;
01115 
01116   /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
01117   **          where n0 = least significant mp_digit of N, the modulus.
01118   */
01119   mmm.n0prime = 0 - s_mp_invmod_radix( MP_DIGIT(modulus, 0) );
01120 
01121   MP_CHECKOK( s_mp_to_mont(base, &mmm, &montBase) );
01122 
01123   bits_in_exponent = mpl_significant_bits(exponent);
01124 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
01125   if (mp_using_cache_safe_exp) {
01126     if (bits_in_exponent > 780)
01127        window_bits = 6;
01128     else if (bits_in_exponent > 256)
01129        window_bits = 5;
01130     else if (bits_in_exponent > 20)
01131        window_bits = 4;
01132        /* RSA public key exponents are typically under 20 bits (common values 
01133         * are: 3, 17, 65537) and a 4-bit window is inefficient
01134         */
01135     else 
01136        window_bits = 1;
01137   } else
01138 #endif
01139   if (bits_in_exponent > 480)
01140     window_bits = 6;
01141   else if (bits_in_exponent > 160)
01142     window_bits = 5;
01143   else if (bits_in_exponent > 20)
01144     window_bits = 4;
01145   /* RSA public key exponents are typically under 20 bits (common values 
01146    * are: 3, 17, 65537) and a 4-bit window is inefficient
01147    */
01148   else 
01149     window_bits = 1;
01150 
01151 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
01152   /*
01153    * clamp the window size based on
01154    * the cache line size.
01155    */
01156   if (!max_window_bits) {
01157     unsigned long cache_size = s_mpi_getProcessorLineSize();
01158     /* processor has no cache, use 'fast' code always */
01159     if (cache_size == 0) {
01160       mp_using_cache_safe_exp = 0;
01161     } 
01162     if ((cache_size == 0) || (cache_size >= 64)) {
01163       max_window_bits = 6;
01164     } else if (cache_size >= 32) {
01165       max_window_bits = 5;
01166     } else if (cache_size >= 16) {
01167       max_window_bits = 4;
01168     } else max_window_bits = 1; /* should this be an assert? */
01169   }
01170 
01171   /* clamp the window size down before we caclulate bits_in_exponent */
01172   if (mp_using_cache_safe_exp) {
01173     if (window_bits > max_window_bits) {
01174       window_bits = max_window_bits;
01175     }
01176   }
01177 #endif
01178 
01179   odd_ints = 1 << (window_bits - 1);
01180   i = bits_in_exponent % window_bits;
01181   if (i != 0) {
01182     bits_in_exponent += window_bits - i;
01183   } 
01184 
01185 #ifdef MP_USING_MONT_MULF
01186   if (mp_using_mont_mulf) {
01187     MP_CHECKOK( s_mp_pad(&montBase, nLen) );
01188     res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen, 
01189                    bits_in_exponent, window_bits, odd_ints);
01190   } else
01191 #endif
01192 #ifdef MP_USING_CACHE_SAFE_MOD_EXP
01193   if (mp_using_cache_safe_exp) {
01194     res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen, 
01195                    bits_in_exponent, window_bits, 1 << window_bits);
01196   } else
01197 #endif
01198   res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen, 
01199                    bits_in_exponent, window_bits, odd_ints);
01200 
01201 CLEANUP:
01202   mp_clear(&montBase);
01203   mp_clear(&goodBase);
01204   /* Don't mp_clear mmm.N because it is merely a copy of modulus.
01205   ** Just zap it.
01206   */
01207   memset(&mmm, 0, sizeof mmm);
01208   return res;
01209 }