Back to index

lightning-sunbird  0.9+nobinonly
mpi.c
Go to the documentation of this file.
00001 /*
00002  *  mpi.c
00003  *
00004  *  Arbitrary precision integer arithmetic library
00005  *
00006  * ***** BEGIN LICENSE BLOCK *****
00007  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00008  *
00009  * The contents of this file are subject to the Mozilla Public License Version
00010  * 1.1 (the "License"); you may not use this file except in compliance with
00011  * the License. You may obtain a copy of the License at
00012  * http://www.mozilla.org/MPL/
00013  *
00014  * Software distributed under the License is distributed on an "AS IS" basis,
00015  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00016  * for the specific language governing rights and limitations under the
00017  * License.
00018  *
00019  * The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
00020  *
00021  * The Initial Developer of the Original Code is
00022  * Michael J. Fromberger.
00023  * Portions created by the Initial Developer are Copyright (C) 1998
00024  * the Initial Developer. All Rights Reserved.
00025  *
00026  * Contributor(s):
00027  *   Netscape Communications Corporation
00028  *   Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
00029  *
00030  * Alternatively, the contents of this file may be used under the terms of
00031  * either the GNU General Public License Version 2 or later (the "GPL"), or
00032  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00033  * in which case the provisions of the GPL or the LGPL are applicable instead
00034  * of those above. If you wish to allow use of your version of this file only
00035  * under the terms of either the GPL or the LGPL, and not to allow others to
00036  * use your version of this file under the terms of the MPL, indicate your
00037  * decision by deleting the provisions above and replace them with the notice
00038  * and other provisions required by the GPL or the LGPL. If you do not delete
00039  * the provisions above, a recipient may use your version of this file under
00040  * the terms of any one of the MPL, the GPL or the LGPL.
00041  *
00042  * ***** END LICENSE BLOCK ***** */
00043 /* $Id: mpi.c,v 1.43.2.1 2005/12/21 00:15:55 wtchang%redhat.com Exp $ */
00044 
00045 #include "mpi-priv.h"
00046 #if defined(OSF1)
00047 #include <c_asm.h>
00048 #endif
00049 
00050 #if MP_LOGTAB
00051 /*
00052   A table of the logs of 2 for various bases (the 0 and 1 entries of
00053   this table are meaningless and should not be referenced).  
00054 
00055   This table is used to compute output lengths for the mp_toradix()
00056   function.  Since a number n in radix r takes up about log_r(n)
00057   digits, we estimate the output size by taking the least integer
00058   greater than log_r(n), where:
00059 
00060   log_r(n) = log_2(n) * log_r(2)
00061 
00062   This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
00063   which are the output bases supported.  
00064  */
00065 #include "logtab.h"
00066 #endif
00067 
00068 /* {{{ Constant strings */
00069 
00070 /* Constant strings returned by mp_strerror() */
00071 static const char *mp_err_string[] = {
00072   "unknown result code",     /* say what?            */
00073   "boolean true",            /* MP_OKAY, MP_YES      */
00074   "boolean false",           /* MP_NO                */
00075   "out of memory",           /* MP_MEM               */
00076   "argument out of range",   /* MP_RANGE             */
00077   "invalid input parameter", /* MP_BADARG            */
00078   "result is undefined"      /* MP_UNDEF             */
00079 };
00080 
00081 /* Value to digit maps for radix conversion   */
00082 
00083 /* s_dmap_1 - standard digits and letters */
00084 static const char *s_dmap_1 = 
00085   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
00086 
00087 /* }}} */
00088 
00089 unsigned long mp_allocs;
00090 unsigned long mp_frees;
00091 unsigned long mp_copies;
00092 
00093 /* {{{ Default precision manipulation */
00094 
00095 /* Default precision for newly created mp_int's      */
00096 static mp_size s_mp_defprec = MP_DEFPREC;
00097 
00098 mp_size mp_get_prec(void)
00099 {
00100   return s_mp_defprec;
00101 
00102 } /* end mp_get_prec() */
00103 
00104 void         mp_set_prec(mp_size prec)
00105 {
00106   if(prec == 0)
00107     s_mp_defprec = MP_DEFPREC;
00108   else
00109     s_mp_defprec = prec;
00110 
00111 } /* end mp_set_prec() */
00112 
00113 /* }}} */
00114 
00115 /*------------------------------------------------------------------------*/
00116 /* {{{ mp_init(mp) */
00117 
00118 /*
00119   mp_init(mp)
00120 
00121   Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
00122   MP_MEM if memory could not be allocated for the structure.
00123  */
00124 
00125 mp_err mp_init(mp_int *mp)
00126 {
00127   return mp_init_size(mp, s_mp_defprec);
00128 
00129 } /* end mp_init() */
00130 
00131 /* }}} */
00132 
00133 /* {{{ mp_init_size(mp, prec) */
00134 
00135 /*
00136   mp_init_size(mp, prec)
00137 
00138   Initialize a new zero-valued mp_int with at least the given
00139   precision; returns MP_OKAY if successful, or MP_MEM if memory could
00140   not be allocated for the structure.
00141  */
00142 
00143 mp_err mp_init_size(mp_int *mp, mp_size prec)
00144 {
00145   ARGCHK(mp != NULL && prec > 0, MP_BADARG);
00146 
00147   prec = MP_ROUNDUP(prec, s_mp_defprec);
00148   if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
00149     return MP_MEM;
00150 
00151   SIGN(mp) = ZPOS;
00152   USED(mp) = 1;
00153   ALLOC(mp) = prec;
00154 
00155   return MP_OKAY;
00156 
00157 } /* end mp_init_size() */
00158 
00159 /* }}} */
00160 
00161 /* {{{ mp_init_copy(mp, from) */
00162 
00163 /*
00164   mp_init_copy(mp, from)
00165 
00166   Initialize mp as an exact copy of from.  Returns MP_OKAY if
00167   successful, MP_MEM if memory could not be allocated for the new
00168   structure.
00169  */
00170 
00171 mp_err mp_init_copy(mp_int *mp, const mp_int *from)
00172 {
00173   ARGCHK(mp != NULL && from != NULL, MP_BADARG);
00174 
00175   if(mp == from)
00176     return MP_OKAY;
00177 
00178   if((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
00179     return MP_MEM;
00180 
00181   s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
00182   USED(mp) = USED(from);
00183   ALLOC(mp) = ALLOC(from);
00184   SIGN(mp) = SIGN(from);
00185 
00186   return MP_OKAY;
00187 
00188 } /* end mp_init_copy() */
00189 
00190 /* }}} */
00191 
00192 /* {{{ mp_copy(from, to) */
00193 
00194 /*
00195   mp_copy(from, to)
00196 
00197   Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
00198   'to' has already been initialized (if not, use mp_init_copy()
00199   instead). If 'from' and 'to' are identical, nothing happens.
00200  */
00201 
00202 mp_err mp_copy(const mp_int *from, mp_int *to)
00203 {
00204   ARGCHK(from != NULL && to != NULL, MP_BADARG);
00205 
00206   if(from == to)
00207     return MP_OKAY;
00208 
00209   ++mp_copies;
00210   { /* copy */
00211     mp_digit   *tmp;
00212 
00213     /*
00214       If the allocated buffer in 'to' already has enough space to hold
00215       all the used digits of 'from', we'll re-use it to avoid hitting
00216       the memory allocater more than necessary; otherwise, we'd have
00217       to grow anyway, so we just allocate a hunk and make the copy as
00218       usual
00219      */
00220     if(ALLOC(to) >= USED(from)) {
00221       s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
00222       s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
00223       
00224     } else {
00225       if((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
00226        return MP_MEM;
00227 
00228       s_mp_copy(DIGITS(from), tmp, USED(from));
00229 
00230       if(DIGITS(to) != NULL) {
00231 #if MP_CRYPTO
00232        s_mp_setz(DIGITS(to), ALLOC(to));
00233 #endif
00234        s_mp_free(DIGITS(to));
00235       }
00236 
00237       DIGITS(to) = tmp;
00238       ALLOC(to) = ALLOC(from);
00239     }
00240 
00241     /* Copy the precision and sign from the original */
00242     USED(to) = USED(from);
00243     SIGN(to) = SIGN(from);
00244   } /* end copy */
00245 
00246   return MP_OKAY;
00247 
00248 } /* end mp_copy() */
00249 
00250 /* }}} */
00251 
00252 /* {{{ mp_exch(mp1, mp2) */
00253 
00254 /*
00255   mp_exch(mp1, mp2)
00256 
00257   Exchange mp1 and mp2 without allocating any intermediate memory
00258   (well, unless you count the stack space needed for this call and the
00259   locals it creates...).  This cannot fail.
00260  */
00261 
00262 void mp_exch(mp_int *mp1, mp_int *mp2)
00263 {
00264 #if MP_ARGCHK == 2
00265   assert(mp1 != NULL && mp2 != NULL);
00266 #else
00267   if(mp1 == NULL || mp2 == NULL)
00268     return;
00269 #endif
00270 
00271   s_mp_exch(mp1, mp2);
00272 
00273 } /* end mp_exch() */
00274 
00275 /* }}} */
00276 
00277 /* {{{ mp_clear(mp) */
00278 
00279 /*
00280   mp_clear(mp)
00281 
00282   Release the storage used by an mp_int, and void its fields so that
00283   if someone calls mp_clear() again for the same int later, we won't
00284   get tollchocked.
00285  */
00286 
00287 void   mp_clear(mp_int *mp)
00288 {
00289   if(mp == NULL)
00290     return;
00291 
00292   if(DIGITS(mp) != NULL) {
00293 #if MP_CRYPTO
00294     s_mp_setz(DIGITS(mp), ALLOC(mp));
00295 #endif
00296     s_mp_free(DIGITS(mp));
00297     DIGITS(mp) = NULL;
00298   }
00299 
00300   USED(mp) = 0;
00301   ALLOC(mp) = 0;
00302 
00303 } /* end mp_clear() */
00304 
00305 /* }}} */
00306 
00307 /* {{{ mp_zero(mp) */
00308 
00309 /*
00310   mp_zero(mp) 
00311 
00312   Set mp to zero.  Does not change the allocated size of the structure,
00313   and therefore cannot fail (except on a bad argument, which we ignore)
00314  */
00315 void   mp_zero(mp_int *mp)
00316 {
00317   if(mp == NULL)
00318     return;
00319 
00320   s_mp_setz(DIGITS(mp), ALLOC(mp));
00321   USED(mp) = 1;
00322   SIGN(mp) = ZPOS;
00323 
00324 } /* end mp_zero() */
00325 
00326 /* }}} */
00327 
00328 /* {{{ mp_set(mp, d) */
00329 
00330 void   mp_set(mp_int *mp, mp_digit d)
00331 {
00332   if(mp == NULL)
00333     return;
00334 
00335   mp_zero(mp);
00336   DIGIT(mp, 0) = d;
00337 
00338 } /* end mp_set() */
00339 
00340 /* }}} */
00341 
00342 /* {{{ mp_set_int(mp, z) */
00343 
00344 mp_err mp_set_int(mp_int *mp, long z)
00345 {
00346   int            ix;
00347   unsigned long  v = labs(z);
00348   mp_err         res;
00349 
00350   ARGCHK(mp != NULL, MP_BADARG);
00351 
00352   mp_zero(mp);
00353   if(z == 0)
00354     return MP_OKAY;  /* shortcut for zero */
00355 
00356   if (sizeof v <= sizeof(mp_digit)) {
00357     DIGIT(mp,0) = v;
00358   } else {
00359     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
00360       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
00361        return res;
00362 
00363       res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
00364       if (res != MP_OKAY)
00365        return res;
00366     }
00367   }
00368   if(z < 0)
00369     SIGN(mp) = NEG;
00370 
00371   return MP_OKAY;
00372 
00373 } /* end mp_set_int() */
00374 
00375 /* }}} */
00376 
00377 /* {{{ mp_set_ulong(mp, z) */
00378 
00379 mp_err mp_set_ulong(mp_int *mp, unsigned long z)
00380 {
00381   int            ix;
00382   mp_err         res;
00383 
00384   ARGCHK(mp != NULL, MP_BADARG);
00385 
00386   mp_zero(mp);
00387   if(z == 0)
00388     return MP_OKAY;  /* shortcut for zero */
00389 
00390   if (sizeof z <= sizeof(mp_digit)) {
00391     DIGIT(mp,0) = z;
00392   } else {
00393     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
00394       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
00395        return res;
00396 
00397       res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
00398       if (res != MP_OKAY)
00399        return res;
00400     }
00401   }
00402   return MP_OKAY;
00403 } /* end mp_set_ulong() */
00404 
00405 /* }}} */
00406 
00407 /*------------------------------------------------------------------------*/
00408 /* {{{ Digit arithmetic */
00409 
00410 /* {{{ mp_add_d(a, d, b) */
00411 
00412 /*
00413   mp_add_d(a, d, b)
00414 
00415   Compute the sum b = a + d, for a single digit d.  Respects the sign of
00416   its primary addend (single digits are unsigned anyway).
00417  */
00418 
00419 mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
00420 {
00421   mp_int   tmp;
00422   mp_err   res;
00423 
00424   ARGCHK(a != NULL && b != NULL, MP_BADARG);
00425 
00426   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
00427     return res;
00428 
00429   if(SIGN(&tmp) == ZPOS) {
00430     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
00431       goto CLEANUP;
00432   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
00433     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
00434       goto CLEANUP;
00435   } else {
00436     mp_neg(&tmp, &tmp);
00437 
00438     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
00439   }
00440 
00441   if(s_mp_cmp_d(&tmp, 0) == 0)
00442     SIGN(&tmp) = ZPOS;
00443 
00444   s_mp_exch(&tmp, b);
00445 
00446 CLEANUP:
00447   mp_clear(&tmp);
00448   return res;
00449 
00450 } /* end mp_add_d() */
00451 
00452 /* }}} */
00453 
00454 /* {{{ mp_sub_d(a, d, b) */
00455 
00456 /*
00457   mp_sub_d(a, d, b)
00458 
00459   Compute the difference b = a - d, for a single digit d.  Respects the
00460   sign of its subtrahend (single digits are unsigned anyway).
00461  */
00462 
00463 mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
00464 {
00465   mp_int   tmp;
00466   mp_err   res;
00467 
00468   ARGCHK(a != NULL && b != NULL, MP_BADARG);
00469 
00470   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
00471     return res;
00472 
00473   if(SIGN(&tmp) == NEG) {
00474     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
00475       goto CLEANUP;
00476   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
00477     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
00478       goto CLEANUP;
00479   } else {
00480     mp_neg(&tmp, &tmp);
00481 
00482     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
00483     SIGN(&tmp) = NEG;
00484   }
00485 
00486   if(s_mp_cmp_d(&tmp, 0) == 0)
00487     SIGN(&tmp) = ZPOS;
00488 
00489   s_mp_exch(&tmp, b);
00490 
00491 CLEANUP:
00492   mp_clear(&tmp);
00493   return res;
00494 
00495 } /* end mp_sub_d() */
00496 
00497 /* }}} */
00498 
00499 /* {{{ mp_mul_d(a, d, b) */
00500 
00501 /*
00502   mp_mul_d(a, d, b)
00503 
00504   Compute the product b = a * d, for a single digit d.  Respects the sign
00505   of its multiplicand (single digits are unsigned anyway)
00506  */
00507 
00508 mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
00509 {
00510   mp_err  res;
00511 
00512   ARGCHK(a != NULL && b != NULL, MP_BADARG);
00513 
00514   if(d == 0) {
00515     mp_zero(b);
00516     return MP_OKAY;
00517   }
00518 
00519   if((res = mp_copy(a, b)) != MP_OKAY)
00520     return res;
00521 
00522   res = s_mp_mul_d(b, d);
00523 
00524   return res;
00525 
00526 } /* end mp_mul_d() */
00527 
00528 /* }}} */
00529 
00530 /* {{{ mp_mul_2(a, c) */
00531 
00532 mp_err mp_mul_2(const mp_int *a, mp_int *c)
00533 {
00534   mp_err  res;
00535 
00536   ARGCHK(a != NULL && c != NULL, MP_BADARG);
00537 
00538   if((res = mp_copy(a, c)) != MP_OKAY)
00539     return res;
00540 
00541   return s_mp_mul_2(c);
00542 
00543 } /* end mp_mul_2() */
00544 
00545 /* }}} */
00546 
00547 /* {{{ mp_div_d(a, d, q, r) */
00548 
00549 /*
00550   mp_div_d(a, d, q, r)
00551 
00552   Compute the quotient q = a / d and remainder r = a mod d, for a
00553   single digit d.  Respects the sign of its divisor (single digits are
00554   unsigned anyway).
00555  */
00556 
00557 mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
00558 {
00559   mp_err   res;
00560   mp_int   qp;
00561   mp_digit rem;
00562   int      pow;
00563 
00564   ARGCHK(a != NULL, MP_BADARG);
00565 
00566   if(d == 0)
00567     return MP_RANGE;
00568 
00569   /* Shortcut for powers of two ... */
00570   if((pow = s_mp_ispow2d(d)) >= 0) {
00571     mp_digit  mask;
00572 
00573     mask = ((mp_digit)1 << pow) - 1;
00574     rem = DIGIT(a, 0) & mask;
00575 
00576     if(q) {
00577       mp_copy(a, q);
00578       s_mp_div_2d(q, pow);
00579     }
00580 
00581     if(r)
00582       *r = rem;
00583 
00584     return MP_OKAY;
00585   }
00586 
00587   if((res = mp_init_copy(&qp, a)) != MP_OKAY)
00588     return res;
00589 
00590   res = s_mp_div_d(&qp, d, &rem);
00591 
00592   if(s_mp_cmp_d(&qp, 0) == 0)
00593     SIGN(q) = ZPOS;
00594 
00595   if(r)
00596     *r = rem;
00597 
00598   if(q)
00599     s_mp_exch(&qp, q);
00600 
00601   mp_clear(&qp);
00602   return res;
00603 
00604 } /* end mp_div_d() */
00605 
00606 /* }}} */
00607 
00608 /* {{{ mp_div_2(a, c) */
00609 
00610 /*
00611   mp_div_2(a, c)
00612 
00613   Compute c = a / 2, disregarding the remainder.
00614  */
00615 
00616 mp_err mp_div_2(const mp_int *a, mp_int *c)
00617 {
00618   mp_err  res;
00619 
00620   ARGCHK(a != NULL && c != NULL, MP_BADARG);
00621 
00622   if((res = mp_copy(a, c)) != MP_OKAY)
00623     return res;
00624 
00625   s_mp_div_2(c);
00626 
00627   return MP_OKAY;
00628 
00629 } /* end mp_div_2() */
00630 
00631 /* }}} */
00632 
00633 /* {{{ mp_expt_d(a, d, b) */
00634 
00635 mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
00636 {
00637   mp_int   s, x;
00638   mp_err   res;
00639 
00640   ARGCHK(a != NULL && c != NULL, MP_BADARG);
00641 
00642   if((res = mp_init(&s)) != MP_OKAY)
00643     return res;
00644   if((res = mp_init_copy(&x, a)) != MP_OKAY)
00645     goto X;
00646 
00647   DIGIT(&s, 0) = 1;
00648 
00649   while(d != 0) {
00650     if(d & 1) {
00651       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
00652        goto CLEANUP;
00653     }
00654 
00655     d /= 2;
00656 
00657     if((res = s_mp_sqr(&x)) != MP_OKAY)
00658       goto CLEANUP;
00659   }
00660 
00661   s_mp_exch(&s, c);
00662 
00663 CLEANUP:
00664   mp_clear(&x);
00665 X:
00666   mp_clear(&s);
00667 
00668   return res;
00669 
00670 } /* end mp_expt_d() */
00671 
00672 /* }}} */
00673 
00674 /* }}} */
00675 
00676 /*------------------------------------------------------------------------*/
00677 /* {{{ Full arithmetic */
00678 
00679 /* {{{ mp_abs(a, b) */
00680 
00681 /*
00682   mp_abs(a, b)
00683 
00684   Compute b = |a|.  'a' and 'b' may be identical.
00685  */
00686 
00687 mp_err mp_abs(const mp_int *a, mp_int *b)
00688 {
00689   mp_err   res;
00690 
00691   ARGCHK(a != NULL && b != NULL, MP_BADARG);
00692 
00693   if((res = mp_copy(a, b)) != MP_OKAY)
00694     return res;
00695 
00696   SIGN(b) = ZPOS;
00697 
00698   return MP_OKAY;
00699 
00700 } /* end mp_abs() */
00701 
00702 /* }}} */
00703 
00704 /* {{{ mp_neg(a, b) */
00705 
00706 /*
00707   mp_neg(a, b)
00708 
00709   Compute b = -a.  'a' and 'b' may be identical.
00710  */
00711 
00712 mp_err mp_neg(const mp_int *a, mp_int *b)
00713 {
00714   mp_err   res;
00715 
00716   ARGCHK(a != NULL && b != NULL, MP_BADARG);
00717 
00718   if((res = mp_copy(a, b)) != MP_OKAY)
00719     return res;
00720 
00721   if(s_mp_cmp_d(b, 0) == MP_EQ) 
00722     SIGN(b) = ZPOS;
00723   else 
00724     SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
00725 
00726   return MP_OKAY;
00727 
00728 } /* end mp_neg() */
00729 
00730 /* }}} */
00731 
00732 /* {{{ mp_add(a, b, c) */
00733 
00734 /*
00735   mp_add(a, b, c)
00736 
00737   Compute c = a + b.  All parameters may be identical.
00738  */
00739 
00740 mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
00741 {
00742   mp_err  res;
00743 
00744   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
00745 
00746   if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
00747     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
00748   } else if(s_mp_cmp(a, b) >= 0) {  /* different sign: |a| >= |b|   */
00749     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
00750   } else {                          /* different sign: |a|  < |b|   */
00751     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
00752   }
00753 
00754   if (s_mp_cmp_d(c, 0) == MP_EQ)
00755     SIGN(c) = ZPOS;
00756 
00757 CLEANUP:
00758   return res;
00759 
00760 } /* end mp_add() */
00761 
00762 /* }}} */
00763 
00764 /* {{{ mp_sub(a, b, c) */
00765 
00766 /*
00767   mp_sub(a, b, c)
00768 
00769   Compute c = a - b.  All parameters may be identical.
00770  */
00771 
00772 mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
00773 {
00774   mp_err  res;
00775   int     magDiff;
00776 
00777   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
00778 
00779   if (a == b) {
00780     mp_zero(c);
00781     return MP_OKAY;
00782   }
00783 
00784   if (MP_SIGN(a) != MP_SIGN(b)) {
00785     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
00786   } else if (!(magDiff = s_mp_cmp(a, b))) {
00787     mp_zero(c);
00788     res = MP_OKAY;
00789   } else if (magDiff > 0) {
00790     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
00791   } else {
00792     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
00793     MP_SIGN(c) = !MP_SIGN(a);
00794   }
00795 
00796   if (s_mp_cmp_d(c, 0) == MP_EQ)
00797     MP_SIGN(c) = MP_ZPOS;
00798 
00799 CLEANUP:
00800   return res;
00801 
00802 } /* end mp_sub() */
00803 
00804 /* }}} */
00805 
00806 /* {{{ mp_mul(a, b, c) */
00807 
00808 /*
00809   mp_mul(a, b, c)
00810 
00811   Compute c = a * b.  All parameters may be identical.
00812  */
00813 mp_err   mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
00814 {
00815   mp_digit *pb;
00816   mp_int   tmp;
00817   mp_err   res;
00818   mp_size  ib;
00819   mp_size  useda, usedb;
00820 
00821   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
00822 
00823   if (a == c) {
00824     if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
00825       return res;
00826     if (a == b) 
00827       b = &tmp;
00828     a = &tmp;
00829   } else if (b == c) {
00830     if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
00831       return res;
00832     b = &tmp;
00833   } else {
00834     MP_DIGITS(&tmp) = 0;
00835   }
00836 
00837   if (MP_USED(a) < MP_USED(b)) {
00838     const mp_int *xch = b;  /* switch a and b, to do fewer outer loops */
00839     b = a;
00840     a = xch;
00841   }
00842 
00843   MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
00844   if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
00845     goto CLEANUP;
00846 
00847 #ifdef NSS_USE_COMBA
00848   if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
00849       if (MP_USED(a) == 4) {
00850           s_mp_mul_comba_4(a, b, c);
00851           goto CLEANUP;
00852       }
00853       if (MP_USED(a) == 8) {
00854           s_mp_mul_comba_8(a, b, c);
00855           goto CLEANUP;
00856       }
00857       if (MP_USED(a) == 16) {
00858           s_mp_mul_comba_16(a, b, c);
00859           goto CLEANUP;
00860       }
00861       if (MP_USED(a) == 32) {
00862           s_mp_mul_comba_32(a, b, c);
00863           goto CLEANUP;
00864       } 
00865   }
00866 #endif
00867 
00868   pb = MP_DIGITS(b);
00869   s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
00870 
00871   /* Outer loop:  Digits of b */
00872   useda = MP_USED(a);
00873   usedb = MP_USED(b);
00874   for (ib = 1; ib < usedb; ib++) {
00875     mp_digit b_i    = *pb++;
00876 
00877     /* Inner product:  Digits of a */
00878     if (b_i)
00879       s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
00880     else
00881       MP_DIGIT(c, ib + useda) = b_i;
00882   }
00883 
00884   s_mp_clamp(c);
00885 
00886   if(SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
00887     SIGN(c) = ZPOS;
00888   else
00889     SIGN(c) = NEG;
00890 
00891 CLEANUP:
00892   mp_clear(&tmp);
00893   return res;
00894 } /* end mp_mul() */
00895 
00896 /* }}} */
00897 
00898 /* {{{ mp_sqr(a, sqr) */
00899 
00900 #if MP_SQUARE
00901 /*
00902   Computes the square of a.  This can be done more
00903   efficiently than a general multiplication, because many of the
00904   computation steps are redundant when squaring.  The inner product
00905   step is a bit more complicated, but we save a fair number of
00906   iterations of the multiplication loop.
00907  */
00908 
00909 /* sqr = a^2;   Caller provides both a and tmp; */
00910 mp_err   mp_sqr(const mp_int *a, mp_int *sqr)
00911 {
00912   mp_digit *pa;
00913   mp_digit d;
00914   mp_err   res;
00915   mp_size  ix;
00916   mp_int   tmp;
00917   int      count;
00918 
00919   ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
00920 
00921   if (a == sqr) {
00922     if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
00923       return res;
00924     a = &tmp;
00925   } else {
00926     DIGITS(&tmp) = 0;
00927     res = MP_OKAY;
00928   }
00929 
00930   ix = 2 * MP_USED(a);
00931   if (ix > MP_ALLOC(sqr)) {
00932     MP_USED(sqr) = 1; 
00933     MP_CHECKOK( s_mp_grow(sqr, ix) );
00934   } 
00935   MP_USED(sqr) = ix;
00936   MP_DIGIT(sqr, 0) = 0;
00937 
00938 #ifdef NSS_USE_COMBA
00939   if (IS_POWER_OF_2(MP_USED(a))) {
00940       if (MP_USED(a) == 4) {
00941           s_mp_sqr_comba_4(a, sqr);
00942           goto CLEANUP;
00943       }
00944       if (MP_USED(a) == 8) {
00945           s_mp_sqr_comba_8(a, sqr);
00946           goto CLEANUP;
00947       }
00948       if (MP_USED(a) == 16) {
00949           s_mp_sqr_comba_16(a, sqr);
00950           goto CLEANUP;
00951       }
00952       if (MP_USED(a) == 32) {
00953           s_mp_sqr_comba_32(a, sqr);
00954           goto CLEANUP;
00955       } 
00956   }
00957 #endif
00958 
00959   pa = MP_DIGITS(a);
00960   count = MP_USED(a) - 1;
00961   if (count > 0) {
00962     d = *pa++;
00963     s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
00964     for (ix = 3; --count > 0; ix += 2) {
00965       d = *pa++;
00966       s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
00967     } /* for(ix ...) */
00968     MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
00969 
00970     /* now sqr *= 2 */
00971     s_mp_mul_2(sqr);
00972   } else {
00973     MP_DIGIT(sqr, 1) = 0;
00974   }
00975 
00976   /* now add the squares of the digits of a to sqr. */
00977   s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
00978 
00979   SIGN(sqr) = ZPOS;
00980   s_mp_clamp(sqr);
00981 
00982 CLEANUP:
00983   mp_clear(&tmp);
00984   return res;
00985 
00986 } /* end mp_sqr() */
00987 #endif
00988 
00989 /* }}} */
00990 
00991 /* {{{ mp_div(a, b, q, r) */
00992 
00993 /*
00994   mp_div(a, b, q, r)
00995 
00996   Compute q = a / b and r = a mod b.  Input parameters may be re-used
00997   as output parameters.  If q or r is NULL, that portion of the
00998   computation will be discarded (although it will still be computed)
00999  */
01000 mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
01001 {
01002   mp_err   res;
01003   mp_int   *pQ, *pR;
01004   mp_int   qtmp, rtmp, btmp;
01005   int      cmp;
01006   mp_sign  signA = MP_SIGN(a);
01007   mp_sign  signB = MP_SIGN(b);
01008 
01009   ARGCHK(a != NULL && b != NULL, MP_BADARG);
01010 
01011   if(mp_cmp_z(b) == MP_EQ)
01012     return MP_RANGE;
01013 
01014   DIGITS(&qtmp) = 0;
01015   DIGITS(&rtmp) = 0;
01016   DIGITS(&btmp) = 0;
01017 
01018   /* Set up some temporaries... */
01019   if (!r || r == a || r == b) {
01020     MP_CHECKOK( mp_init_copy(&rtmp, a) );
01021     pR = &rtmp;
01022   } else {
01023     MP_CHECKOK( mp_copy(a, r) );
01024     pR = r;
01025   }
01026 
01027   if (!q || q == a || q == b) {
01028     MP_CHECKOK( mp_init_size(&qtmp, MP_USED(a)) );
01029     pQ = &qtmp;
01030   } else {
01031     MP_CHECKOK( s_mp_pad(q, MP_USED(a)) );
01032     pQ = q;
01033     mp_zero(pQ);
01034   }
01035 
01036   /*
01037     If |a| <= |b|, we can compute the solution without division;
01038     otherwise, we actually do the work required.
01039    */
01040   if ((cmp = s_mp_cmp(a, b)) <= 0) {
01041     if (cmp) {
01042       /* r was set to a above. */
01043       mp_zero(pQ);
01044     } else {
01045       mp_set(pQ, 1);
01046       mp_zero(pR);
01047     }
01048   } else {
01049     MP_CHECKOK( mp_init_copy(&btmp, b) );
01050     MP_CHECKOK( s_mp_div(pR, &btmp, pQ) );
01051   }
01052 
01053   /* Compute the signs for the output  */
01054   MP_SIGN(pR) = signA;   /* Sr = Sa              */
01055   /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
01056   MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
01057 
01058   if(s_mp_cmp_d(pQ, 0) == MP_EQ)
01059     SIGN(pQ) = ZPOS;
01060   if(s_mp_cmp_d(pR, 0) == MP_EQ)
01061     SIGN(pR) = ZPOS;
01062 
01063   /* Copy output, if it is needed      */
01064   if(q && q != pQ) 
01065     s_mp_exch(pQ, q);
01066 
01067   if(r && r != pR) 
01068     s_mp_exch(pR, r);
01069 
01070 CLEANUP:
01071   mp_clear(&btmp);
01072   mp_clear(&rtmp);
01073   mp_clear(&qtmp);
01074 
01075   return res;
01076 
01077 } /* end mp_div() */
01078 
01079 /* }}} */
01080 
01081 /* {{{ mp_div_2d(a, d, q, r) */
01082 
01083 mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
01084 {
01085   mp_err  res;
01086 
01087   ARGCHK(a != NULL, MP_BADARG);
01088 
01089   if(q) {
01090     if((res = mp_copy(a, q)) != MP_OKAY)
01091       return res;
01092   }
01093   if(r) {
01094     if((res = mp_copy(a, r)) != MP_OKAY)
01095       return res;
01096   }
01097   if(q) {
01098     s_mp_div_2d(q, d);
01099   }
01100   if(r) {
01101     s_mp_mod_2d(r, d);
01102   }
01103 
01104   return MP_OKAY;
01105 
01106 } /* end mp_div_2d() */
01107 
01108 /* }}} */
01109 
01110 /* {{{ mp_expt(a, b, c) */
01111 
01112 /*
01113   mp_expt(a, b, c)
01114 
01115   Compute c = a ** b, that is, raise a to the b power.  Uses a
01116   standard iterative square-and-multiply technique.
01117  */
01118 
01119 mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
01120 {
01121   mp_int   s, x;
01122   mp_err   res;
01123   mp_digit d;
01124   int      dig, bit;
01125 
01126   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
01127 
01128   if(mp_cmp_z(b) < 0)
01129     return MP_RANGE;
01130 
01131   if((res = mp_init(&s)) != MP_OKAY)
01132     return res;
01133 
01134   mp_set(&s, 1);
01135 
01136   if((res = mp_init_copy(&x, a)) != MP_OKAY)
01137     goto X;
01138 
01139   /* Loop over low-order digits in ascending order */
01140   for(dig = 0; dig < (USED(b) - 1); dig++) {
01141     d = DIGIT(b, dig);
01142 
01143     /* Loop over bits of each non-maximal digit */
01144     for(bit = 0; bit < DIGIT_BIT; bit++) {
01145       if(d & 1) {
01146        if((res = s_mp_mul(&s, &x)) != MP_OKAY) 
01147          goto CLEANUP;
01148       }
01149 
01150       d >>= 1;
01151       
01152       if((res = s_mp_sqr(&x)) != MP_OKAY)
01153        goto CLEANUP;
01154     }
01155   }
01156 
01157   /* Consider now the last digit... */
01158   d = DIGIT(b, dig);
01159 
01160   while(d) {
01161     if(d & 1) {
01162       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
01163        goto CLEANUP;
01164     }
01165 
01166     d >>= 1;
01167 
01168     if((res = s_mp_sqr(&x)) != MP_OKAY)
01169       goto CLEANUP;
01170   }
01171   
01172   if(mp_iseven(b))
01173     SIGN(&s) = SIGN(a);
01174 
01175   res = mp_copy(&s, c);
01176 
01177 CLEANUP:
01178   mp_clear(&x);
01179 X:
01180   mp_clear(&s);
01181 
01182   return res;
01183 
01184 } /* end mp_expt() */
01185 
01186 /* }}} */
01187 
01188 /* {{{ mp_2expt(a, k) */
01189 
01190 /* Compute a = 2^k */
01191 
01192 mp_err mp_2expt(mp_int *a, mp_digit k)
01193 {
01194   ARGCHK(a != NULL, MP_BADARG);
01195 
01196   return s_mp_2expt(a, k);
01197 
01198 } /* end mp_2expt() */
01199 
01200 /* }}} */
01201 
01202 /* {{{ mp_mod(a, m, c) */
01203 
01204 /*
01205   mp_mod(a, m, c)
01206 
01207   Compute c = a (mod m).  Result will always be 0 <= c < m.
01208  */
01209 
01210 mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
01211 {
01212   mp_err  res;
01213   int     mag;
01214 
01215   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
01216 
01217   if(SIGN(m) == NEG)
01218     return MP_RANGE;
01219 
01220   /*
01221      If |a| > m, we need to divide to get the remainder and take the
01222      absolute value.  
01223 
01224      If |a| < m, we don't need to do any division, just copy and adjust
01225      the sign (if a is negative).
01226 
01227      If |a| == m, we can simply set the result to zero.
01228 
01229      This order is intended to minimize the average path length of the
01230      comparison chain on common workloads -- the most frequent cases are
01231      that |a| != m, so we do those first.
01232    */
01233   if((mag = s_mp_cmp(a, m)) > 0) {
01234     if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
01235       return res;
01236     
01237     if(SIGN(c) == NEG) {
01238       if((res = mp_add(c, m, c)) != MP_OKAY)
01239        return res;
01240     }
01241 
01242   } else if(mag < 0) {
01243     if((res = mp_copy(a, c)) != MP_OKAY)
01244       return res;
01245 
01246     if(mp_cmp_z(a) < 0) {
01247       if((res = mp_add(c, m, c)) != MP_OKAY)
01248        return res;
01249 
01250     }
01251     
01252   } else {
01253     mp_zero(c);
01254 
01255   }
01256 
01257   return MP_OKAY;
01258 
01259 } /* end mp_mod() */
01260 
01261 /* }}} */
01262 
01263 /* {{{ mp_mod_d(a, d, c) */
01264 
01265 /*
01266   mp_mod_d(a, d, c)
01267 
01268   Compute c = a (mod d).  Result will always be 0 <= c < d
01269  */
01270 mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
01271 {
01272   mp_err   res;
01273   mp_digit rem;
01274 
01275   ARGCHK(a != NULL && c != NULL, MP_BADARG);
01276 
01277   if(s_mp_cmp_d(a, d) > 0) {
01278     if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
01279       return res;
01280 
01281   } else {
01282     if(SIGN(a) == NEG)
01283       rem = d - DIGIT(a, 0);
01284     else
01285       rem = DIGIT(a, 0);
01286   }
01287 
01288   if(c)
01289     *c = rem;
01290 
01291   return MP_OKAY;
01292 
01293 } /* end mp_mod_d() */
01294 
01295 /* }}} */
01296 
01297 /* {{{ mp_sqrt(a, b) */
01298 
01299 /*
01300   mp_sqrt(a, b)
01301 
01302   Compute the integer square root of a, and store the result in b.
01303   Uses an integer-arithmetic version of Newton's iterative linear
01304   approximation technique to determine this value; the result has the
01305   following two properties:
01306 
01307      b^2 <= a
01308      (b+1)^2 >= a
01309 
01310   It is a range error to pass a negative value.
01311  */
01312 mp_err mp_sqrt(const mp_int *a, mp_int *b)
01313 {
01314   mp_int   x, t;
01315   mp_err   res;
01316   mp_size  used;
01317 
01318   ARGCHK(a != NULL && b != NULL, MP_BADARG);
01319 
01320   /* Cannot take square root of a negative value */
01321   if(SIGN(a) == NEG)
01322     return MP_RANGE;
01323 
01324   /* Special cases for zero and one, trivial     */
01325   if(mp_cmp_d(a, 1) <= 0)
01326     return mp_copy(a, b);
01327     
01328   /* Initialize the temporaries we'll use below  */
01329   if((res = mp_init_size(&t, USED(a))) != MP_OKAY)
01330     return res;
01331 
01332   /* Compute an initial guess for the iteration as a itself */
01333   if((res = mp_init_copy(&x, a)) != MP_OKAY)
01334     goto X;
01335 
01336   used = MP_USED(&x);
01337   if (used > 1) {
01338     s_mp_rshd(&x, used / 2);
01339   }
01340 
01341   for(;;) {
01342     /* t = (x * x) - a */
01343     mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
01344     if((res = mp_sqr(&t, &t)) != MP_OKAY ||
01345        (res = mp_sub(&t, a, &t)) != MP_OKAY)
01346       goto CLEANUP;
01347 
01348     /* t = t / 2x       */
01349     s_mp_mul_2(&x);
01350     if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
01351       goto CLEANUP;
01352     s_mp_div_2(&x);
01353 
01354     /* Terminate the loop, if the quotient is zero */
01355     if(mp_cmp_z(&t) == MP_EQ)
01356       break;
01357 
01358     /* x = x - t       */
01359     if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
01360       goto CLEANUP;
01361 
01362   }
01363 
01364   /* Copy result to output parameter */
01365   mp_sub_d(&x, 1, &x);
01366   s_mp_exch(&x, b);
01367 
01368  CLEANUP:
01369   mp_clear(&x);
01370  X:
01371   mp_clear(&t); 
01372 
01373   return res;
01374 
01375 } /* end mp_sqrt() */
01376 
01377 /* }}} */
01378 
01379 /* }}} */
01380 
01381 /*------------------------------------------------------------------------*/
01382 /* {{{ Modular arithmetic */
01383 
01384 #if MP_MODARITH
01385 /* {{{ mp_addmod(a, b, m, c) */
01386 
01387 /*
01388   mp_addmod(a, b, m, c)
01389 
01390   Compute c = (a + b) mod m
01391  */
01392 
01393 mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
01394 {
01395   mp_err  res;
01396 
01397   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
01398 
01399   if((res = mp_add(a, b, c)) != MP_OKAY)
01400     return res;
01401   if((res = mp_mod(c, m, c)) != MP_OKAY)
01402     return res;
01403 
01404   return MP_OKAY;
01405 
01406 }
01407 
01408 /* }}} */
01409 
01410 /* {{{ mp_submod(a, b, m, c) */
01411 
01412 /*
01413   mp_submod(a, b, m, c)
01414 
01415   Compute c = (a - b) mod m
01416  */
01417 
01418 mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
01419 {
01420   mp_err  res;
01421 
01422   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
01423 
01424   if((res = mp_sub(a, b, c)) != MP_OKAY)
01425     return res;
01426   if((res = mp_mod(c, m, c)) != MP_OKAY)
01427     return res;
01428 
01429   return MP_OKAY;
01430 
01431 }
01432 
01433 /* }}} */
01434 
01435 /* {{{ mp_mulmod(a, b, m, c) */
01436 
01437 /*
01438   mp_mulmod(a, b, m, c)
01439 
01440   Compute c = (a * b) mod m
01441  */
01442 
01443 mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
01444 {
01445   mp_err  res;
01446 
01447   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
01448 
01449   if((res = mp_mul(a, b, c)) != MP_OKAY)
01450     return res;
01451   if((res = mp_mod(c, m, c)) != MP_OKAY)
01452     return res;
01453 
01454   return MP_OKAY;
01455 
01456 }
01457 
01458 /* }}} */
01459 
01460 /* {{{ mp_sqrmod(a, m, c) */
01461 
01462 #if MP_SQUARE
01463 mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
01464 {
01465   mp_err  res;
01466 
01467   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
01468 
01469   if((res = mp_sqr(a, c)) != MP_OKAY)
01470     return res;
01471   if((res = mp_mod(c, m, c)) != MP_OKAY)
01472     return res;
01473 
01474   return MP_OKAY;
01475 
01476 } /* end mp_sqrmod() */
01477 #endif
01478 
01479 /* }}} */
01480 
01481 /* {{{ s_mp_exptmod(a, b, m, c) */
01482 
01483 /*
01484   s_mp_exptmod(a, b, m, c)
01485 
01486   Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
01487   method with modular reductions at each step. (This is basically the
01488   same code as mp_expt(), except for the addition of the reductions)
01489   
01490   The modular reductions are done using Barrett's algorithm (see
01491   s_mp_reduce() below for details)
01492  */
01493 
01494 mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
01495 {
01496   mp_int   s, x, mu;
01497   mp_err   res;
01498   mp_digit d;
01499   int      dig, bit;
01500 
01501   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
01502 
01503   if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
01504     return MP_RANGE;
01505 
01506   if((res = mp_init(&s)) != MP_OKAY)
01507     return res;
01508   if((res = mp_init_copy(&x, a)) != MP_OKAY ||
01509      (res = mp_mod(&x, m, &x)) != MP_OKAY)
01510     goto X;
01511   if((res = mp_init(&mu)) != MP_OKAY)
01512     goto MU;
01513 
01514   mp_set(&s, 1);
01515 
01516   /* mu = b^2k / m */
01517   s_mp_add_d(&mu, 1); 
01518   s_mp_lshd(&mu, 2 * USED(m));
01519   if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
01520     goto CLEANUP;
01521 
01522   /* Loop over digits of b in ascending order, except highest order */
01523   for(dig = 0; dig < (USED(b) - 1); dig++) {
01524     d = DIGIT(b, dig);
01525 
01526     /* Loop over the bits of the lower-order digits */
01527     for(bit = 0; bit < DIGIT_BIT; bit++) {
01528       if(d & 1) {
01529        if((res = s_mp_mul(&s, &x)) != MP_OKAY)
01530          goto CLEANUP;
01531        if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
01532          goto CLEANUP;
01533       }
01534 
01535       d >>= 1;
01536 
01537       if((res = s_mp_sqr(&x)) != MP_OKAY)
01538        goto CLEANUP;
01539       if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
01540        goto CLEANUP;
01541     }
01542   }
01543 
01544   /* Now do the last digit... */
01545   d = DIGIT(b, dig);
01546 
01547   while(d) {
01548     if(d & 1) {
01549       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
01550        goto CLEANUP;
01551       if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
01552        goto CLEANUP;
01553     }
01554 
01555     d >>= 1;
01556 
01557     if((res = s_mp_sqr(&x)) != MP_OKAY)
01558       goto CLEANUP;
01559     if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
01560       goto CLEANUP;
01561   }
01562 
01563   s_mp_exch(&s, c);
01564 
01565  CLEANUP:
01566   mp_clear(&mu);
01567  MU:
01568   mp_clear(&x);
01569  X:
01570   mp_clear(&s);
01571 
01572   return res;
01573 
01574 } /* end s_mp_exptmod() */
01575 
01576 /* }}} */
01577 
01578 /* {{{ mp_exptmod_d(a, d, m, c) */
01579 
01580 mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
01581 {
01582   mp_int   s, x;
01583   mp_err   res;
01584 
01585   ARGCHK(a != NULL && c != NULL, MP_BADARG);
01586 
01587   if((res = mp_init(&s)) != MP_OKAY)
01588     return res;
01589   if((res = mp_init_copy(&x, a)) != MP_OKAY)
01590     goto X;
01591 
01592   mp_set(&s, 1);
01593 
01594   while(d != 0) {
01595     if(d & 1) {
01596       if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
01597         (res = mp_mod(&s, m, &s)) != MP_OKAY)
01598        goto CLEANUP;
01599     }
01600 
01601     d /= 2;
01602 
01603     if((res = s_mp_sqr(&x)) != MP_OKAY ||
01604        (res = mp_mod(&x, m, &x)) != MP_OKAY)
01605       goto CLEANUP;
01606   }
01607 
01608   s_mp_exch(&s, c);
01609 
01610 CLEANUP:
01611   mp_clear(&x);
01612 X:
01613   mp_clear(&s);
01614 
01615   return res;
01616 
01617 } /* end mp_exptmod_d() */
01618 
01619 /* }}} */
01620 #endif /* if MP_MODARITH */
01621 
01622 /* }}} */
01623 
01624 /*------------------------------------------------------------------------*/
01625 /* {{{ Comparison functions */
01626 
01627 /* {{{ mp_cmp_z(a) */
01628 
01629 /*
01630   mp_cmp_z(a)
01631 
01632   Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
01633  */
01634 
01635 int    mp_cmp_z(const mp_int *a)
01636 {
01637   if(SIGN(a) == NEG)
01638     return MP_LT;
01639   else if(USED(a) == 1 && DIGIT(a, 0) == 0)
01640     return MP_EQ;
01641   else
01642     return MP_GT;
01643 
01644 } /* end mp_cmp_z() */
01645 
01646 /* }}} */
01647 
01648 /* {{{ mp_cmp_d(a, d) */
01649 
01650 /*
01651   mp_cmp_d(a, d)
01652 
01653   Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
01654  */
01655 
01656 int    mp_cmp_d(const mp_int *a, mp_digit d)
01657 {
01658   ARGCHK(a != NULL, MP_EQ);
01659 
01660   if(SIGN(a) == NEG)
01661     return MP_LT;
01662 
01663   return s_mp_cmp_d(a, d);
01664 
01665 } /* end mp_cmp_d() */
01666 
01667 /* }}} */
01668 
01669 /* {{{ mp_cmp(a, b) */
01670 
01671 int    mp_cmp(const mp_int *a, const mp_int *b)
01672 {
01673   ARGCHK(a != NULL && b != NULL, MP_EQ);
01674 
01675   if(SIGN(a) == SIGN(b)) {
01676     int  mag;
01677 
01678     if((mag = s_mp_cmp(a, b)) == MP_EQ)
01679       return MP_EQ;
01680 
01681     if(SIGN(a) == ZPOS)
01682       return mag;
01683     else
01684       return -mag;
01685 
01686   } else if(SIGN(a) == ZPOS) {
01687     return MP_GT;
01688   } else {
01689     return MP_LT;
01690   }
01691 
01692 } /* end mp_cmp() */
01693 
01694 /* }}} */
01695 
01696 /* {{{ mp_cmp_mag(a, b) */
01697 
01698 /*
01699   mp_cmp_mag(a, b)
01700 
01701   Compares |a| <=> |b|, and returns an appropriate comparison result
01702  */
01703 
01704 int    mp_cmp_mag(mp_int *a, mp_int *b)
01705 {
01706   ARGCHK(a != NULL && b != NULL, MP_EQ);
01707 
01708   return s_mp_cmp(a, b);
01709 
01710 } /* end mp_cmp_mag() */
01711 
01712 /* }}} */
01713 
01714 /* {{{ mp_cmp_int(a, z) */
01715 
01716 /*
01717   This just converts z to an mp_int, and uses the existing comparison
01718   routines.  This is sort of inefficient, but it's not clear to me how
01719   frequently this wil get used anyway.  For small positive constants,
01720   you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
01721  */
01722 int    mp_cmp_int(const mp_int *a, long z)
01723 {
01724   mp_int  tmp;
01725   int     out;
01726 
01727   ARGCHK(a != NULL, MP_EQ);
01728   
01729   mp_init(&tmp); mp_set_int(&tmp, z);
01730   out = mp_cmp(a, &tmp);
01731   mp_clear(&tmp);
01732 
01733   return out;
01734 
01735 } /* end mp_cmp_int() */
01736 
01737 /* }}} */
01738 
01739 /* {{{ mp_isodd(a) */
01740 
01741 /*
01742   mp_isodd(a)
01743 
01744   Returns a true (non-zero) value if a is odd, false (zero) otherwise.
01745  */
01746 int    mp_isodd(const mp_int *a)
01747 {
01748   ARGCHK(a != NULL, 0);
01749 
01750   return (int)(DIGIT(a, 0) & 1);
01751 
01752 } /* end mp_isodd() */
01753 
01754 /* }}} */
01755 
01756 /* {{{ mp_iseven(a) */
01757 
01758 int    mp_iseven(const mp_int *a)
01759 {
01760   return !mp_isodd(a);
01761 
01762 } /* end mp_iseven() */
01763 
01764 /* }}} */
01765 
01766 /* }}} */
01767 
01768 /*------------------------------------------------------------------------*/
01769 /* {{{ Number theoretic functions */
01770 
01771 #if MP_NUMTH
01772 /* {{{ mp_gcd(a, b, c) */
01773 
01774 /*
01775   Like the old mp_gcd() function, except computes the GCD using the
01776   binary algorithm due to Josef Stein in 1961 (via Knuth).
01777  */
01778 mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
01779 {
01780   mp_err   res;
01781   mp_int   u, v, t;
01782   mp_size  k = 0;
01783 
01784   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
01785 
01786   if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
01787       return MP_RANGE;
01788   if(mp_cmp_z(a) == MP_EQ) {
01789     return mp_copy(b, c);
01790   } else if(mp_cmp_z(b) == MP_EQ) {
01791     return mp_copy(a, c);
01792   }
01793 
01794   if((res = mp_init(&t)) != MP_OKAY)
01795     return res;
01796   if((res = mp_init_copy(&u, a)) != MP_OKAY)
01797     goto U;
01798   if((res = mp_init_copy(&v, b)) != MP_OKAY)
01799     goto V;
01800 
01801   SIGN(&u) = ZPOS;
01802   SIGN(&v) = ZPOS;
01803 
01804   /* Divide out common factors of 2 until at least 1 of a, b is even */
01805   while(mp_iseven(&u) && mp_iseven(&v)) {
01806     s_mp_div_2(&u);
01807     s_mp_div_2(&v);
01808     ++k;
01809   }
01810 
01811   /* Initialize t */
01812   if(mp_isodd(&u)) {
01813     if((res = mp_copy(&v, &t)) != MP_OKAY)
01814       goto CLEANUP;
01815     
01816     /* t = -v */
01817     if(SIGN(&v) == ZPOS)
01818       SIGN(&t) = NEG;
01819     else
01820       SIGN(&t) = ZPOS;
01821     
01822   } else {
01823     if((res = mp_copy(&u, &t)) != MP_OKAY)
01824       goto CLEANUP;
01825 
01826   }
01827 
01828   for(;;) {
01829     while(mp_iseven(&t)) {
01830       s_mp_div_2(&t);
01831     }
01832 
01833     if(mp_cmp_z(&t) == MP_GT) {
01834       if((res = mp_copy(&t, &u)) != MP_OKAY)
01835        goto CLEANUP;
01836 
01837     } else {
01838       if((res = mp_copy(&t, &v)) != MP_OKAY)
01839        goto CLEANUP;
01840 
01841       /* v = -t */
01842       if(SIGN(&t) == ZPOS)
01843        SIGN(&v) = NEG;
01844       else
01845        SIGN(&v) = ZPOS;
01846     }
01847 
01848     if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
01849       goto CLEANUP;
01850 
01851     if(s_mp_cmp_d(&t, 0) == MP_EQ)
01852       break;
01853   }
01854 
01855   s_mp_2expt(&v, k);       /* v = 2^k   */
01856   res = mp_mul(&u, &v, c); /* c = u * v */
01857 
01858  CLEANUP:
01859   mp_clear(&v);
01860  V:
01861   mp_clear(&u);
01862  U:
01863   mp_clear(&t);
01864 
01865   return res;
01866 
01867 } /* end mp_gcd() */
01868 
01869 /* }}} */
01870 
01871 /* {{{ mp_lcm(a, b, c) */
01872 
01873 /* We compute the least common multiple using the rule:
01874 
01875    ab = [a, b](a, b)
01876 
01877    ... by computing the product, and dividing out the gcd.
01878  */
01879 
01880 mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
01881 {
01882   mp_int  gcd, prod;
01883   mp_err  res;
01884 
01885   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
01886 
01887   /* Set up temporaries */
01888   if((res = mp_init(&gcd)) != MP_OKAY)
01889     return res;
01890   if((res = mp_init(&prod)) != MP_OKAY)
01891     goto GCD;
01892 
01893   if((res = mp_mul(a, b, &prod)) != MP_OKAY)
01894     goto CLEANUP;
01895   if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
01896     goto CLEANUP;
01897 
01898   res = mp_div(&prod, &gcd, c, NULL);
01899 
01900  CLEANUP:
01901   mp_clear(&prod);
01902  GCD:
01903   mp_clear(&gcd);
01904 
01905   return res;
01906 
01907 } /* end mp_lcm() */
01908 
01909 /* }}} */
01910 
01911 /* {{{ mp_xgcd(a, b, g, x, y) */
01912 
01913 /*
01914   mp_xgcd(a, b, g, x, y)
01915 
01916   Compute g = (a, b) and values x and y satisfying Bezout's identity
01917   (that is, ax + by = g).  This uses the binary extended GCD algorithm
01918   based on the Stein algorithm used for mp_gcd()
01919   See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
01920  */
01921 
01922 mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
01923 {
01924   mp_int   gx, xc, yc, u, v, A, B, C, D;
01925   mp_int  *clean[9];
01926   mp_err   res;
01927   int      last = -1;
01928 
01929   if(mp_cmp_z(b) == 0)
01930     return MP_RANGE;
01931 
01932   /* Initialize all these variables we need */
01933   MP_CHECKOK( mp_init(&u) );
01934   clean[++last] = &u;
01935   MP_CHECKOK( mp_init(&v) );
01936   clean[++last] = &v;
01937   MP_CHECKOK( mp_init(&gx) );
01938   clean[++last] = &gx;
01939   MP_CHECKOK( mp_init(&A) );
01940   clean[++last] = &A;
01941   MP_CHECKOK( mp_init(&B) );
01942   clean[++last] = &B;
01943   MP_CHECKOK( mp_init(&C) );
01944   clean[++last] = &C;
01945   MP_CHECKOK( mp_init(&D) );
01946   clean[++last] = &D;
01947   MP_CHECKOK( mp_init_copy(&xc, a) );
01948   clean[++last] = &xc;
01949   mp_abs(&xc, &xc);
01950   MP_CHECKOK( mp_init_copy(&yc, b) );
01951   clean[++last] = &yc;
01952   mp_abs(&yc, &yc);
01953 
01954   mp_set(&gx, 1);
01955 
01956   /* Divide by two until at least one of them is odd */
01957   while(mp_iseven(&xc) && mp_iseven(&yc)) {
01958     mp_size nx = mp_trailing_zeros(&xc);
01959     mp_size ny = mp_trailing_zeros(&yc);
01960     mp_size n  = MP_MIN(nx, ny);
01961     s_mp_div_2d(&xc,n);
01962     s_mp_div_2d(&yc,n);
01963     MP_CHECKOK( s_mp_mul_2d(&gx,n) );
01964   }
01965 
01966   mp_copy(&xc, &u);
01967   mp_copy(&yc, &v);
01968   mp_set(&A, 1); mp_set(&D, 1);
01969 
01970   /* Loop through binary GCD algorithm */
01971   do {
01972     while(mp_iseven(&u)) {
01973       s_mp_div_2(&u);
01974 
01975       if(mp_iseven(&A) && mp_iseven(&B)) {
01976        s_mp_div_2(&A); s_mp_div_2(&B);
01977       } else {
01978        MP_CHECKOK( mp_add(&A, &yc, &A) );
01979        s_mp_div_2(&A);
01980        MP_CHECKOK( mp_sub(&B, &xc, &B) );
01981        s_mp_div_2(&B);
01982       }
01983     }
01984 
01985     while(mp_iseven(&v)) {
01986       s_mp_div_2(&v);
01987 
01988       if(mp_iseven(&C) && mp_iseven(&D)) {
01989        s_mp_div_2(&C); s_mp_div_2(&D);
01990       } else {
01991        MP_CHECKOK( mp_add(&C, &yc, &C) );
01992        s_mp_div_2(&C);
01993        MP_CHECKOK( mp_sub(&D, &xc, &D) );
01994        s_mp_div_2(&D);
01995       }
01996     }
01997 
01998     if(mp_cmp(&u, &v) >= 0) {
01999       MP_CHECKOK( mp_sub(&u, &v, &u) );
02000       MP_CHECKOK( mp_sub(&A, &C, &A) );
02001       MP_CHECKOK( mp_sub(&B, &D, &B) );
02002     } else {
02003       MP_CHECKOK( mp_sub(&v, &u, &v) );
02004       MP_CHECKOK( mp_sub(&C, &A, &C) );
02005       MP_CHECKOK( mp_sub(&D, &B, &D) );
02006     }
02007   } while (mp_cmp_z(&u) != 0);
02008 
02009   /* copy results to output */
02010   if(x)
02011     MP_CHECKOK( mp_copy(&C, x) );
02012 
02013   if(y)
02014     MP_CHECKOK( mp_copy(&D, y) );
02015       
02016   if(g)
02017     MP_CHECKOK( mp_mul(&gx, &v, g) );
02018 
02019  CLEANUP:
02020   while(last >= 0)
02021     mp_clear(clean[last--]);
02022 
02023   return res;
02024 
02025 } /* end mp_xgcd() */
02026 
02027 /* }}} */
02028 
02029 mp_size mp_trailing_zeros(const mp_int *mp)
02030 {
02031   mp_digit d;
02032   mp_size  n = 0;
02033   int      ix;
02034 
02035   if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
02036     return n;
02037 
02038   for (ix = 0; !(d = MP_DIGIT(mp,ix)) && (ix < MP_USED(mp)); ++ix)
02039     n += MP_DIGIT_BIT;
02040   if (!d)
02041     return 0; /* shouldn't happen, but ... */
02042 #if !defined(MP_USE_UINT_DIGIT)
02043   if (!(d & 0xffffffffU)) {
02044     d >>= 32;
02045     n  += 32;
02046   }
02047 #endif
02048   if (!(d & 0xffffU)) {
02049     d >>= 16;
02050     n  += 16;
02051   }
02052   if (!(d & 0xffU)) {
02053     d >>= 8;
02054     n  += 8;
02055   }
02056   if (!(d & 0xfU)) {
02057     d >>= 4;
02058     n  += 4;
02059   }
02060   if (!(d & 0x3U)) {
02061     d >>= 2;
02062     n  += 2;
02063   }
02064   if (!(d & 0x1U)) {
02065     d >>= 1;
02066     n  += 1;
02067   }
02068 #if MP_ARGCHK == 2
02069   assert(0 != (d & 1));
02070 #endif
02071   return n;
02072 }
02073 
02074 /* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
02075 ** Returns k (positive) or error (negative).
02076 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
02077 ** by Richard Schroeppel (a.k.a. Captain Nemo).
02078 */
02079 mp_err s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
02080 {
02081   mp_err res;
02082   mp_err k    = 0;
02083   mp_int d, f, g;
02084 
02085   ARGCHK(a && p && c, MP_BADARG);
02086 
02087   MP_DIGITS(&d) = 0;
02088   MP_DIGITS(&f) = 0;
02089   MP_DIGITS(&g) = 0;
02090   MP_CHECKOK( mp_init(&d) );
02091   MP_CHECKOK( mp_init_copy(&f, a) );      /* f = a */
02092   MP_CHECKOK( mp_init_copy(&g, p) );      /* g = p */
02093 
02094   mp_set(c, 1);
02095   mp_zero(&d);
02096 
02097   if (mp_cmp_z(&f) == 0) {
02098     res = MP_UNDEF;
02099   } else 
02100   for (;;) {
02101     int diff_sign;
02102     while (mp_iseven(&f)) {
02103       mp_size n = mp_trailing_zeros(&f);
02104       if (!n) {
02105        res = MP_UNDEF;
02106        goto CLEANUP;
02107       }
02108       s_mp_div_2d(&f, n);
02109       MP_CHECKOK( s_mp_mul_2d(&d, n) );
02110       k += n;
02111     }
02112     if (mp_cmp_d(&f, 1) == MP_EQ) {       /* f == 1 */
02113       res = k;
02114       break;
02115     }
02116     diff_sign = mp_cmp(&f, &g);
02117     if (diff_sign < 0) {           /* f < g */
02118       s_mp_exch(&f, &g);
02119       s_mp_exch(c, &d);
02120     } else if (diff_sign == 0) {          /* f == g */
02121       res = MP_UNDEF;              /* a and p are not relatively prime */
02122       break;
02123     }
02124     if ((MP_DIGIT(&f,0) % 4) == (MP_DIGIT(&g,0) % 4)) {
02125       MP_CHECKOK( mp_sub(&f, &g, &f) );   /* f = f - g */
02126       MP_CHECKOK( mp_sub(c,  &d,  c) );   /* c = c - d */
02127     } else {
02128       MP_CHECKOK( mp_add(&f, &g, &f) );   /* f = f + g */
02129       MP_CHECKOK( mp_add(c,  &d,  c) );   /* c = c + d */
02130     }
02131   }
02132   if (res >= 0) {
02133     while (MP_SIGN(c) != MP_ZPOS) {
02134       MP_CHECKOK( mp_add(c, p, c) );
02135     }
02136     res = k;
02137   }
02138 
02139 CLEANUP:
02140   mp_clear(&d);
02141   mp_clear(&f);
02142   mp_clear(&g);
02143   return res;
02144 }
02145 
02146 /* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
02147 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
02148 ** by Richard Schroeppel (a.k.a. Captain Nemo).
02149 */
02150 mp_digit  s_mp_invmod_radix(mp_digit P)
02151 {
02152   mp_digit T = P;
02153   T *= 2 - (P * T);
02154   T *= 2 - (P * T);
02155   T *= 2 - (P * T);
02156   T *= 2 - (P * T);
02157 #if !defined(MP_USE_UINT_DIGIT)
02158   T *= 2 - (P * T);
02159   T *= 2 - (P * T);
02160 #endif
02161   return T;
02162 }
02163 
02164 /* Given c, k, and prime p, where a*c == 2**k (mod p), 
02165 ** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
02166 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
02167 ** by Richard Schroeppel (a.k.a. Captain Nemo).
02168 */
02169 mp_err  s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
02170 {
02171   int      k_orig = k;
02172   mp_digit r;
02173   mp_size  ix;
02174   mp_err   res;
02175 
02176   if (mp_cmp_z(c) < 0) {           /* c < 0 */
02177     MP_CHECKOK( mp_add(c, p, x) ); /* x = c + p */
02178   } else {
02179     MP_CHECKOK( mp_copy(c, x) );   /* x = c */
02180   }
02181 
02182   /* make sure x is large enough */
02183   ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
02184   ix = MP_MAX(ix, MP_USED(x));
02185   MP_CHECKOK( s_mp_pad(x, ix) );
02186 
02187   r = 0 - s_mp_invmod_radix(MP_DIGIT(p,0));
02188 
02189   for (ix = 0; k > 0; ix++) {
02190     int      j = MP_MIN(k, MP_DIGIT_BIT);
02191     mp_digit v = r * MP_DIGIT(x, ix);
02192     if (j < MP_DIGIT_BIT) {
02193       v &= ((mp_digit)1 << j) - 1; /* v = v mod (2 ** j) */
02194     }
02195     s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
02196     k -= j;
02197   }
02198   s_mp_clamp(x);
02199   s_mp_div_2d(x, k_orig);
02200   res = MP_OKAY;
02201 
02202 CLEANUP:
02203   return res;
02204 }
02205 
02206 /* compute mod inverse using Schroeppel's method, only if m is odd */
02207 mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
02208 {
02209   int k;
02210   mp_err  res;
02211   mp_int  x;
02212 
02213   ARGCHK(a && m && c, MP_BADARG);
02214 
02215   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
02216     return MP_RANGE;
02217   if (mp_iseven(m))
02218     return MP_UNDEF;
02219 
02220   MP_DIGITS(&x) = 0;
02221 
02222   if (a == c) {
02223     if ((res = mp_init_copy(&x, a)) != MP_OKAY)
02224       return res;
02225     if (a == m) 
02226       m = &x;
02227     a = &x;
02228   } else if (m == c) {
02229     if ((res = mp_init_copy(&x, m)) != MP_OKAY)
02230       return res;
02231     m = &x;
02232   } else {
02233     MP_DIGITS(&x) = 0;
02234   }
02235 
02236   MP_CHECKOK( s_mp_almost_inverse(a, m, c) );
02237   k = res;
02238   MP_CHECKOK( s_mp_fixup_reciprocal(c, m, k, c) );
02239 CLEANUP:
02240   mp_clear(&x);
02241   return res;
02242 }
02243 
02244 /* Known good algorithm for computing modular inverse.  But slow. */
02245 mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
02246 {
02247   mp_int  g, x;
02248   mp_err  res;
02249 
02250   ARGCHK(a && m && c, MP_BADARG);
02251 
02252   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
02253     return MP_RANGE;
02254 
02255   MP_DIGITS(&g) = 0;
02256   MP_DIGITS(&x) = 0;
02257   MP_CHECKOK( mp_init(&x) );
02258   MP_CHECKOK( mp_init(&g) );
02259 
02260   MP_CHECKOK( mp_xgcd(a, m, &g, &x, NULL) );
02261 
02262   if (mp_cmp_d(&g, 1) != MP_EQ) {
02263     res = MP_UNDEF;
02264     goto CLEANUP;
02265   }
02266 
02267   res = mp_mod(&x, m, c);
02268   SIGN(c) = SIGN(a);
02269 
02270 CLEANUP:
02271   mp_clear(&x);
02272   mp_clear(&g);
02273 
02274   return res;
02275 }
02276 
02277 /* modular inverse where modulus is 2**k. */
02278 /* c = a**-1 mod 2**k */
02279 mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
02280 {
02281   mp_err res;
02282   mp_size ix = k + 4;
02283   mp_int t0, t1, val, tmp, two2k;
02284 
02285   static const mp_digit d2 = 2;
02286   static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 };
02287 
02288   if (mp_iseven(a))
02289     return MP_UNDEF;
02290   if (k <= MP_DIGIT_BIT) {
02291     mp_digit i = s_mp_invmod_radix(MP_DIGIT(a,0));
02292     if (k < MP_DIGIT_BIT)
02293       i &= ((mp_digit)1 << k) - (mp_digit)1;
02294     mp_set(c, i);
02295     return MP_OKAY;
02296   }
02297   MP_DIGITS(&t0) = 0;
02298   MP_DIGITS(&t1) = 0;
02299   MP_DIGITS(&val) = 0;
02300   MP_DIGITS(&tmp) = 0;
02301   MP_DIGITS(&two2k) = 0;
02302   MP_CHECKOK( mp_init_copy(&val, a) );
02303   s_mp_mod_2d(&val, k);
02304   MP_CHECKOK( mp_init_copy(&t0, &val) );
02305   MP_CHECKOK( mp_init_copy(&t1, &t0)  );
02306   MP_CHECKOK( mp_init(&tmp) );
02307   MP_CHECKOK( mp_init(&two2k) );
02308   MP_CHECKOK( s_mp_2expt(&two2k, k) );
02309   do {
02310     MP_CHECKOK( mp_mul(&val, &t1, &tmp)  );
02311     MP_CHECKOK( mp_sub(&two, &tmp, &tmp) );
02312     MP_CHECKOK( mp_mul(&t1, &tmp, &t1)   );
02313     s_mp_mod_2d(&t1, k);
02314     while (MP_SIGN(&t1) != MP_ZPOS) {
02315       MP_CHECKOK( mp_add(&t1, &two2k, &t1) );
02316     }
02317     if (mp_cmp(&t1, &t0) == MP_EQ) 
02318       break;
02319     MP_CHECKOK( mp_copy(&t1, &t0) );
02320   } while (--ix > 0);
02321   if (!ix) {
02322     res = MP_UNDEF;
02323   } else {
02324     mp_exch(c, &t1);
02325   }
02326 
02327 CLEANUP:
02328   mp_clear(&t0);
02329   mp_clear(&t1);
02330   mp_clear(&val);
02331   mp_clear(&tmp);
02332   mp_clear(&two2k);
02333   return res;
02334 }
02335 
02336 mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
02337 {
02338   mp_err res;
02339   mp_size k;
02340   mp_int oddFactor, evenFactor;    /* factors of the modulus */
02341   mp_int oddPart, evenPart; /* parts to combine via CRT. */
02342   mp_int C2, tmp1, tmp2;
02343 
02344   /*static const mp_digit d1 = 1; */
02345   /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
02346 
02347   if ((res = s_mp_ispow2(m)) >= 0) {
02348     k = res;
02349     return s_mp_invmod_2d(a, k, c);
02350   }
02351   MP_DIGITS(&oddFactor) = 0;
02352   MP_DIGITS(&evenFactor) = 0;
02353   MP_DIGITS(&oddPart) = 0;
02354   MP_DIGITS(&evenPart) = 0;
02355   MP_DIGITS(&C2)     = 0;
02356   MP_DIGITS(&tmp1)   = 0;
02357   MP_DIGITS(&tmp2)   = 0;
02358 
02359   MP_CHECKOK( mp_init_copy(&oddFactor, m) );    /* oddFactor = m */
02360   MP_CHECKOK( mp_init(&evenFactor) );
02361   MP_CHECKOK( mp_init(&oddPart) );
02362   MP_CHECKOK( mp_init(&evenPart) );
02363   MP_CHECKOK( mp_init(&C2)     );
02364   MP_CHECKOK( mp_init(&tmp1)   );
02365   MP_CHECKOK( mp_init(&tmp2)   );
02366 
02367   k = mp_trailing_zeros(m);
02368   s_mp_div_2d(&oddFactor, k);
02369   MP_CHECKOK( s_mp_2expt(&evenFactor, k) );
02370 
02371   /* compute a**-1 mod oddFactor. */
02372   MP_CHECKOK( s_mp_invmod_odd_m(a, &oddFactor, &oddPart) );
02373   /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
02374   MP_CHECKOK( s_mp_invmod_2d(   a,       k,    &evenPart) );
02375 
02376   /* Use Chinese Remainer theorem to compute a**-1 mod m. */
02377   /* let m1 = oddFactor,  v1 = oddPart, 
02378    * let m2 = evenFactor, v2 = evenPart.
02379    */
02380 
02381   /* Compute C2 = m1**-1 mod m2. */
02382   MP_CHECKOK( s_mp_invmod_2d(&oddFactor, k,    &C2) );
02383 
02384   /* compute u = (v2 - v1)*C2 mod m2 */
02385   MP_CHECKOK( mp_sub(&evenPart, &oddPart,   &tmp1) );
02386   MP_CHECKOK( mp_mul(&tmp1,     &C2,        &tmp2) );
02387   s_mp_mod_2d(&tmp2, k);
02388   while (MP_SIGN(&tmp2) != MP_ZPOS) {
02389     MP_CHECKOK( mp_add(&tmp2, &evenFactor, &tmp2) );
02390   }
02391 
02392   /* compute answer = v1 + u*m1 */
02393   MP_CHECKOK( mp_mul(&tmp2,     &oddFactor, c) );
02394   MP_CHECKOK( mp_add(&oddPart,  c,          c) );
02395   /* not sure this is necessary, but it's low cost if not. */
02396   MP_CHECKOK( mp_mod(c,         m,          c) );
02397 
02398 CLEANUP:
02399   mp_clear(&oddFactor);
02400   mp_clear(&evenFactor);
02401   mp_clear(&oddPart);
02402   mp_clear(&evenPart);
02403   mp_clear(&C2);
02404   mp_clear(&tmp1);
02405   mp_clear(&tmp2);
02406   return res;
02407 }
02408 
02409 
02410 /* {{{ mp_invmod(a, m, c) */
02411 
02412 /*
02413   mp_invmod(a, m, c)
02414 
02415   Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
02416   This is equivalent to the question of whether (a, m) = 1.  If not,
02417   MP_UNDEF is returned, and there is no inverse.
02418  */
02419 
02420 mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
02421 {
02422 
02423   ARGCHK(a && m && c, MP_BADARG);
02424 
02425   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
02426     return MP_RANGE;
02427 
02428   if (mp_isodd(m)) {
02429     return s_mp_invmod_odd_m(a, m, c);
02430   }
02431   if (mp_iseven(a))
02432     return MP_UNDEF; /* not invertable */
02433 
02434   return s_mp_invmod_even_m(a, m, c);
02435 
02436 } /* end mp_invmod() */
02437 
02438 /* }}} */
02439 #endif /* if MP_NUMTH */
02440 
02441 /* }}} */
02442 
02443 /*------------------------------------------------------------------------*/
02444 /* {{{ mp_print(mp, ofp) */
02445 
02446 #if MP_IOFUNC
02447 /*
02448   mp_print(mp, ofp)
02449 
02450   Print a textual representation of the given mp_int on the output
02451   stream 'ofp'.  Output is generated using the internal radix.
02452  */
02453 
02454 void   mp_print(mp_int *mp, FILE *ofp)
02455 {
02456   int   ix;
02457 
02458   if(mp == NULL || ofp == NULL)
02459     return;
02460 
02461   fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
02462 
02463   for(ix = USED(mp) - 1; ix >= 0; ix--) {
02464     fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
02465   }
02466 
02467 } /* end mp_print() */
02468 
02469 #endif /* if MP_IOFUNC */
02470 
02471 /* }}} */
02472 
02473 /*------------------------------------------------------------------------*/
02474 /* {{{ More I/O Functions */
02475 
02476 /* {{{ mp_read_raw(mp, str, len) */
02477 
02478 /* 
02479    mp_read_raw(mp, str, len)
02480 
02481    Read in a raw value (base 256) into the given mp_int
02482  */
02483 
02484 mp_err  mp_read_raw(mp_int *mp, char *str, int len)
02485 {
02486   int            ix;
02487   mp_err         res;
02488   unsigned char *ustr = (unsigned char *)str;
02489 
02490   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
02491 
02492   mp_zero(mp);
02493 
02494   /* Get sign from first byte */
02495   if(ustr[0])
02496     SIGN(mp) = NEG;
02497   else
02498     SIGN(mp) = ZPOS;
02499 
02500   /* Read the rest of the digits */
02501   for(ix = 1; ix < len; ix++) {
02502     if((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
02503       return res;
02504     if((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
02505       return res;
02506   }
02507 
02508   return MP_OKAY;
02509 
02510 } /* end mp_read_raw() */
02511 
02512 /* }}} */
02513 
02514 /* {{{ mp_raw_size(mp) */
02515 
02516 int    mp_raw_size(mp_int *mp)
02517 {
02518   ARGCHK(mp != NULL, 0);
02519 
02520   return (USED(mp) * sizeof(mp_digit)) + 1;
02521 
02522 } /* end mp_raw_size() */
02523 
02524 /* }}} */
02525 
02526 /* {{{ mp_toraw(mp, str) */
02527 
02528 mp_err mp_toraw(mp_int *mp, char *str)
02529 {
02530   int  ix, jx, pos = 1;
02531 
02532   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
02533 
02534   str[0] = (char)SIGN(mp);
02535 
02536   /* Iterate over each digit... */
02537   for(ix = USED(mp) - 1; ix >= 0; ix--) {
02538     mp_digit  d = DIGIT(mp, ix);
02539 
02540     /* Unpack digit bytes, high order first */
02541     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
02542       str[pos++] = (char)(d >> (jx * CHAR_BIT));
02543     }
02544   }
02545 
02546   return MP_OKAY;
02547 
02548 } /* end mp_toraw() */
02549 
02550 /* }}} */
02551 
02552 /* {{{ mp_read_radix(mp, str, radix) */
02553 
02554 /*
02555   mp_read_radix(mp, str, radix)
02556 
02557   Read an integer from the given string, and set mp to the resulting
02558   value.  The input is presumed to be in base 10.  Leading non-digit
02559   characters are ignored, and the function reads until a non-digit
02560   character or the end of the string.
02561  */
02562 
02563 mp_err  mp_read_radix(mp_int *mp, const char *str, int radix)
02564 {
02565   int     ix = 0, val = 0;
02566   mp_err  res;
02567   mp_sign sig = ZPOS;
02568 
02569   ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX, 
02570         MP_BADARG);
02571 
02572   mp_zero(mp);
02573 
02574   /* Skip leading non-digit characters until a digit or '-' or '+' */
02575   while(str[ix] && 
02576        (s_mp_tovalue(str[ix], radix) < 0) && 
02577        str[ix] != '-' &&
02578        str[ix] != '+') {
02579     ++ix;
02580   }
02581 
02582   if(str[ix] == '-') {
02583     sig = NEG;
02584     ++ix;
02585   } else if(str[ix] == '+') {
02586     sig = ZPOS; /* this is the default anyway... */
02587     ++ix;
02588   }
02589 
02590   while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
02591     if((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
02592       return res;
02593     if((res = s_mp_add_d(mp, val)) != MP_OKAY)
02594       return res;
02595     ++ix;
02596   }
02597 
02598   if(s_mp_cmp_d(mp, 0) == MP_EQ)
02599     SIGN(mp) = ZPOS;
02600   else
02601     SIGN(mp) = sig;
02602 
02603   return MP_OKAY;
02604 
02605 } /* end mp_read_radix() */
02606 
02607 mp_err mp_read_variable_radix(mp_int *a, const char * str, int default_radix)
02608 {
02609   int     radix = default_radix;
02610   int     cx;
02611   mp_sign sig   = ZPOS;
02612   mp_err  res;
02613 
02614   /* Skip leading non-digit characters until a digit or '-' or '+' */
02615   while ((cx = *str) != 0 && 
02616        (s_mp_tovalue(cx, radix) < 0) && 
02617        cx != '-' &&
02618        cx != '+') {
02619     ++str;
02620   }
02621 
02622   if (cx == '-') {
02623     sig = NEG;
02624     ++str;
02625   } else if (cx == '+') {
02626     sig = ZPOS; /* this is the default anyway... */
02627     ++str;
02628   }
02629 
02630   if (str[0] == '0') {
02631     if ((str[1] | 0x20) == 'x') {
02632       radix = 16;
02633       str += 2;
02634     } else {
02635       radix = 8;
02636       str++;
02637     }
02638   }
02639   res = mp_read_radix(a, str, radix);
02640   if (res == MP_OKAY) {
02641     MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
02642   }
02643   return res;
02644 }
02645 
02646 /* }}} */
02647 
02648 /* {{{ mp_radix_size(mp, radix) */
02649 
02650 int    mp_radix_size(mp_int *mp, int radix)
02651 {
02652   int  bits;
02653 
02654   if(!mp || radix < 2 || radix > MAX_RADIX)
02655     return 0;
02656 
02657   bits = USED(mp) * DIGIT_BIT - 1;
02658  
02659   return s_mp_outlen(bits, radix);
02660 
02661 } /* end mp_radix_size() */
02662 
02663 /* }}} */
02664 
02665 /* {{{ mp_toradix(mp, str, radix) */
02666 
02667 mp_err mp_toradix(mp_int *mp, char *str, int radix)
02668 {
02669   int  ix, pos = 0;
02670 
02671   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
02672   ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
02673 
02674   if(mp_cmp_z(mp) == MP_EQ) {
02675     str[0] = '0';
02676     str[1] = '\0';
02677   } else {
02678     mp_err   res;
02679     mp_int   tmp;
02680     mp_sign  sgn;
02681     mp_digit rem, rdx = (mp_digit)radix;
02682     char     ch;
02683 
02684     if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
02685       return res;
02686 
02687     /* Save sign for later, and take absolute value */
02688     sgn = SIGN(&tmp); SIGN(&tmp) = ZPOS;
02689 
02690     /* Generate output digits in reverse order      */
02691     while(mp_cmp_z(&tmp) != 0) {
02692       if((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
02693        mp_clear(&tmp);
02694        return res;
02695       }
02696 
02697       /* Generate digits, use capital letters */
02698       ch = s_mp_todigit(rem, radix, 0);
02699 
02700       str[pos++] = ch;
02701     }
02702 
02703     /* Add - sign if original value was negative */
02704     if(sgn == NEG)
02705       str[pos++] = '-';
02706 
02707     /* Add trailing NUL to end the string        */
02708     str[pos--] = '\0';
02709 
02710     /* Reverse the digits and sign indicator     */
02711     ix = 0;
02712     while(ix < pos) {
02713       char tmp = str[ix];
02714 
02715       str[ix] = str[pos];
02716       str[pos] = tmp;
02717       ++ix;
02718       --pos;
02719     }
02720     
02721     mp_clear(&tmp);
02722   }
02723 
02724   return MP_OKAY;
02725 
02726 } /* end mp_toradix() */
02727 
02728 /* }}} */
02729 
02730 /* {{{ mp_tovalue(ch, r) */
02731 
02732 int    mp_tovalue(char ch, int r)
02733 {
02734   return s_mp_tovalue(ch, r);
02735 
02736 } /* end mp_tovalue() */
02737 
02738 /* }}} */
02739 
02740 /* }}} */
02741 
02742 /* {{{ mp_strerror(ec) */
02743 
02744 /*
02745   mp_strerror(ec)
02746 
02747   Return a string describing the meaning of error code 'ec'.  The
02748   string returned is allocated in static memory, so the caller should
02749   not attempt to modify or free the memory associated with this
02750   string.
02751  */
02752 const char  *mp_strerror(mp_err ec)
02753 {
02754   int   aec = (ec < 0) ? -ec : ec;
02755 
02756   /* Code values are negative, so the senses of these comparisons
02757      are accurate */
02758   if(ec < MP_LAST_CODE || ec > MP_OKAY) {
02759     return mp_err_string[0];  /* unknown error code */
02760   } else {
02761     return mp_err_string[aec + 1];
02762   }
02763 
02764 } /* end mp_strerror() */
02765 
02766 /* }}} */
02767 
02768 /*========================================================================*/
02769 /*------------------------------------------------------------------------*/
02770 /* Static function definitions (internal use only)                        */
02771 
02772 /* {{{ Memory management */
02773 
02774 /* {{{ s_mp_grow(mp, min) */
02775 
02776 /* Make sure there are at least 'min' digits allocated to mp              */
02777 mp_err   s_mp_grow(mp_int *mp, mp_size min)
02778 {
02779   if(min > ALLOC(mp)) {
02780     mp_digit   *tmp;
02781 
02782     /* Set min to next nearest default precision block size */
02783     min = MP_ROUNDUP(min, s_mp_defprec);
02784 
02785     if((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
02786       return MP_MEM;
02787 
02788     s_mp_copy(DIGITS(mp), tmp, USED(mp));
02789 
02790 #if MP_CRYPTO
02791     s_mp_setz(DIGITS(mp), ALLOC(mp));
02792 #endif
02793     s_mp_free(DIGITS(mp));
02794     DIGITS(mp) = tmp;
02795     ALLOC(mp) = min;
02796   }
02797 
02798   return MP_OKAY;
02799 
02800 } /* end s_mp_grow() */
02801 
02802 /* }}} */
02803 
02804 /* {{{ s_mp_pad(mp, min) */
02805 
02806 /* Make sure the used size of mp is at least 'min', growing if needed     */
02807 mp_err   s_mp_pad(mp_int *mp, mp_size min)
02808 {
02809   if(min > USED(mp)) {
02810     mp_err  res;
02811 
02812     /* Make sure there is room to increase precision  */
02813     if (min > ALLOC(mp)) {
02814       if ((res = s_mp_grow(mp, min)) != MP_OKAY)
02815        return res;
02816     } else {
02817       s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
02818     }
02819 
02820     /* Increase precision; should already be 0-filled */
02821     USED(mp) = min;
02822   }
02823 
02824   return MP_OKAY;
02825 
02826 } /* end s_mp_pad() */
02827 
02828 /* }}} */
02829 
02830 /* {{{ s_mp_setz(dp, count) */
02831 
02832 #if MP_MACRO == 0
02833 /* Set 'count' digits pointed to by dp to be zeroes                       */
02834 void s_mp_setz(mp_digit *dp, mp_size count)
02835 {
02836 #if MP_MEMSET == 0
02837   int  ix;
02838 
02839   for(ix = 0; ix < count; ix++)
02840     dp[ix] = 0;
02841 #else
02842   memset(dp, 0, count * sizeof(mp_digit));
02843 #endif
02844 
02845 } /* end s_mp_setz() */
02846 #endif
02847 
02848 /* }}} */
02849 
02850 /* {{{ s_mp_copy(sp, dp, count) */
02851 
02852 #if MP_MACRO == 0
02853 /* Copy 'count' digits from sp to dp                                      */
02854 void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
02855 {
02856 #if MP_MEMCPY == 0
02857   int  ix;
02858 
02859   for(ix = 0; ix < count; ix++)
02860     dp[ix] = sp[ix];
02861 #else
02862   memcpy(dp, sp, count * sizeof(mp_digit));
02863 #endif
02864 
02865 } /* end s_mp_copy() */
02866 #endif
02867 
02868 /* }}} */
02869 
02870 /* {{{ s_mp_alloc(nb, ni) */
02871 
02872 #if MP_MACRO == 0
02873 /* Allocate ni records of nb bytes each, and return a pointer to that     */
02874 void    *s_mp_alloc(size_t nb, size_t ni)
02875 {
02876   ++mp_allocs;
02877   return calloc(nb, ni);
02878 
02879 } /* end s_mp_alloc() */
02880 #endif
02881 
02882 /* }}} */
02883 
02884 /* {{{ s_mp_free(ptr) */
02885 
02886 #if MP_MACRO == 0
02887 /* Free the memory pointed to by ptr                                      */
02888 void     s_mp_free(void *ptr)
02889 {
02890   if(ptr) {
02891     ++mp_frees;
02892     free(ptr);
02893   }
02894 } /* end s_mp_free() */
02895 #endif
02896 
02897 /* }}} */
02898 
02899 /* {{{ s_mp_clamp(mp) */
02900 
02901 #if MP_MACRO == 0
02902 /* Remove leading zeroes from the given value                             */
02903 void     s_mp_clamp(mp_int *mp)
02904 {
02905   mp_size used = MP_USED(mp);
02906   while (used > 1 && DIGIT(mp, used - 1) == 0)
02907     --used;
02908   MP_USED(mp) = used;
02909 } /* end s_mp_clamp() */
02910 #endif
02911 
02912 /* }}} */
02913 
02914 /* {{{ s_mp_exch(a, b) */
02915 
02916 /* Exchange the data for a and b; (b, a) = (a, b)                         */
02917 void     s_mp_exch(mp_int *a, mp_int *b)
02918 {
02919   mp_int   tmp;
02920 
02921   tmp = *a;
02922   *a = *b;
02923   *b = tmp;
02924 
02925 } /* end s_mp_exch() */
02926 
02927 /* }}} */
02928 
02929 /* }}} */
02930 
02931 /* {{{ Arithmetic helpers */
02932 
02933 /* {{{ s_mp_lshd(mp, p) */
02934 
02935 /* 
02936    Shift mp leftward by p digits, growing if needed, and zero-filling
02937    the in-shifted digits at the right end.  This is a convenient
02938    alternative to multiplication by powers of the radix
02939    The value of USED(mp) must already have been set to the value for
02940    the shifted result.
02941  */   
02942 
02943 mp_err   s_mp_lshd(mp_int *mp, mp_size p)
02944 {
02945   mp_err  res;
02946   mp_size pos;
02947   int     ix;
02948 
02949   if(p == 0)
02950     return MP_OKAY;
02951 
02952   if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
02953     return MP_OKAY;
02954 
02955   if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
02956     return res;
02957 
02958   pos = USED(mp) - 1;
02959 
02960   /* Shift all the significant figures over as needed */
02961   for(ix = pos - p; ix >= 0; ix--) 
02962     DIGIT(mp, ix + p) = DIGIT(mp, ix);
02963 
02964   /* Fill the bottom digits with zeroes */
02965   for(ix = 0; ix < p; ix++)
02966     DIGIT(mp, ix) = 0;
02967 
02968   return MP_OKAY;
02969 
02970 } /* end s_mp_lshd() */
02971 
02972 /* }}} */
02973 
02974 /* {{{ s_mp_mul_2d(mp, d) */
02975 
02976 /*
02977   Multiply the integer by 2^d, where d is a number of bits.  This
02978   amounts to a bitwise shift of the value.
02979  */
02980 mp_err   s_mp_mul_2d(mp_int *mp, mp_digit d)
02981 {
02982   mp_err   res;
02983   mp_digit dshift, bshift;
02984   mp_digit mask;
02985 
02986   ARGCHK(mp != NULL,  MP_BADARG);
02987 
02988   dshift = d / MP_DIGIT_BIT;
02989   bshift = d % MP_DIGIT_BIT;
02990   /* bits to be shifted out of the top word */
02991   mask   = ((mp_digit)~0 << (MP_DIGIT_BIT - bshift)); 
02992   mask  &= MP_DIGIT(mp, MP_USED(mp) - 1);
02993 
02994   if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0) )))
02995     return res;
02996 
02997   if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
02998     return res;
02999 
03000   if (bshift) { 
03001     mp_digit *pa = MP_DIGITS(mp);
03002     mp_digit *alim = pa + MP_USED(mp);
03003     mp_digit  prev = 0;
03004 
03005     for (pa += dshift; pa < alim; ) {
03006       mp_digit x = *pa;
03007       *pa++ = (x << bshift) | prev;
03008       prev = x >> (DIGIT_BIT - bshift);
03009     }
03010   }
03011 
03012   s_mp_clamp(mp);
03013   return MP_OKAY;
03014 } /* end s_mp_mul_2d() */
03015 
03016 /* {{{ s_mp_rshd(mp, p) */
03017 
03018 /* 
03019    Shift mp rightward by p digits.  Maintains the invariant that
03020    digits above the precision are all zero.  Digits shifted off the
03021    end are lost.  Cannot fail.
03022  */
03023 
03024 void     s_mp_rshd(mp_int *mp, mp_size p)
03025 {
03026   mp_size  ix;
03027   mp_digit *src, *dst;
03028 
03029   if(p == 0)
03030     return;
03031 
03032   /* Shortcut when all digits are to be shifted off */
03033   if(p >= USED(mp)) {
03034     s_mp_setz(DIGITS(mp), ALLOC(mp));
03035     USED(mp) = 1;
03036     SIGN(mp) = ZPOS;
03037     return;
03038   }
03039 
03040   /* Shift all the significant figures over as needed */
03041   dst = MP_DIGITS(mp);
03042   src = dst + p;
03043   for (ix = USED(mp) - p; ix > 0; ix--)
03044     *dst++ = *src++;
03045 
03046   MP_USED(mp) -= p;
03047   /* Fill the top digits with zeroes */
03048   while (p-- > 0)
03049     *dst++ = 0;
03050 
03051 #if 0
03052   /* Strip off any leading zeroes    */
03053   s_mp_clamp(mp);
03054 #endif
03055 
03056 } /* end s_mp_rshd() */
03057 
03058 /* }}} */
03059 
03060 /* {{{ s_mp_div_2(mp) */
03061 
03062 /* Divide by two -- take advantage of radix properties to do it fast      */
03063 void     s_mp_div_2(mp_int *mp)
03064 {
03065   s_mp_div_2d(mp, 1);
03066 
03067 } /* end s_mp_div_2() */
03068 
03069 /* }}} */
03070 
03071 /* {{{ s_mp_mul_2(mp) */
03072 
03073 mp_err s_mp_mul_2(mp_int *mp)
03074 {
03075   mp_digit *pd;
03076   int      ix, used;
03077   mp_digit kin = 0;
03078 
03079   /* Shift digits leftward by 1 bit */
03080   used = MP_USED(mp);
03081   pd = MP_DIGITS(mp);
03082   for (ix = 0; ix < used; ix++) {
03083     mp_digit d = *pd;
03084     *pd++ = (d << 1) | kin;
03085     kin = (d >> (DIGIT_BIT - 1));
03086   }
03087 
03088   /* Deal with rollover from last digit */
03089   if (kin) {
03090     if (ix >= ALLOC(mp)) {
03091       mp_err res;
03092       if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
03093        return res;
03094     }
03095 
03096     DIGIT(mp, ix) = kin;
03097     USED(mp) += 1;
03098   }
03099 
03100   return MP_OKAY;
03101 
03102 } /* end s_mp_mul_2() */
03103 
03104 /* }}} */
03105 
03106 /* {{{ s_mp_mod_2d(mp, d) */
03107 
03108 /*
03109   Remainder the integer by 2^d, where d is a number of bits.  This
03110   amounts to a bitwise AND of the value, and does not require the full
03111   division code
03112  */
03113 void     s_mp_mod_2d(mp_int *mp, mp_digit d)
03114 {
03115   mp_size  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
03116   mp_size  ix;
03117   mp_digit dmask;
03118 
03119   if(ndig >= USED(mp))
03120     return;
03121 
03122   /* Flush all the bits above 2^d in its digit */
03123   dmask = ((mp_digit)1 << nbit) - 1;
03124   DIGIT(mp, ndig) &= dmask;
03125 
03126   /* Flush all digits above the one with 2^d in it */
03127   for(ix = ndig + 1; ix < USED(mp); ix++)
03128     DIGIT(mp, ix) = 0;
03129 
03130   s_mp_clamp(mp);
03131 
03132 } /* end s_mp_mod_2d() */
03133 
03134 /* }}} */
03135 
03136 /* {{{ s_mp_div_2d(mp, d) */
03137 
03138 /*
03139   Divide the integer by 2^d, where d is a number of bits.  This
03140   amounts to a bitwise shift of the value, and does not require the
03141   full division code (used in Barrett reduction, see below)
03142  */
03143 void     s_mp_div_2d(mp_int *mp, mp_digit d)
03144 {
03145   int       ix;
03146   mp_digit  save, next, mask;
03147 
03148   s_mp_rshd(mp, d / DIGIT_BIT);
03149   d %= DIGIT_BIT;
03150   if (d) {
03151     mask = ((mp_digit)1 << d) - 1;
03152     save = 0;
03153     for(ix = USED(mp) - 1; ix >= 0; ix--) {
03154       next = DIGIT(mp, ix) & mask;
03155       DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d));
03156       save = next;
03157     }
03158   }
03159   s_mp_clamp(mp);
03160 
03161 } /* end s_mp_div_2d() */
03162 
03163 /* }}} */
03164 
03165 /* {{{ s_mp_norm(a, b, *d) */
03166 
03167 /*
03168   s_mp_norm(a, b, *d)
03169 
03170   Normalize a and b for division, where b is the divisor.  In order
03171   that we might make good guesses for quotient digits, we want the
03172   leading digit of b to be at least half the radix, which we
03173   accomplish by multiplying a and b by a power of 2.  The exponent 
03174   (shift count) is placed in *pd, so that the remainder can be shifted 
03175   back at the end of the division process.
03176  */
03177 
03178 mp_err   s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
03179 {
03180   mp_digit  d;
03181   mp_digit  mask;
03182   mp_digit  b_msd;
03183   mp_err    res    = MP_OKAY;
03184 
03185   d = 0;
03186   mask  = DIGIT_MAX & ~(DIGIT_MAX >> 1);  /* mask is msb of digit */
03187   b_msd = DIGIT(b, USED(b) - 1);
03188   while (!(b_msd & mask)) {
03189     b_msd <<= 1;
03190     ++d;
03191   }
03192 
03193   if (d) {
03194     MP_CHECKOK( s_mp_mul_2d(a, d) );
03195     MP_CHECKOK( s_mp_mul_2d(b, d) );
03196   }
03197 
03198   *pd = d;
03199 CLEANUP:
03200   return res;
03201 
03202 } /* end s_mp_norm() */
03203 
03204 /* }}} */
03205 
03206 /* }}} */
03207 
03208 /* {{{ Primitive digit arithmetic */
03209 
03210 /* {{{ s_mp_add_d(mp, d) */
03211 
03212 /* Add d to |mp| in place                                                 */
03213 mp_err   s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
03214 {
03215 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03216   mp_word   w, k = 0;
03217   mp_size   ix = 1;
03218 
03219   w = (mp_word)DIGIT(mp, 0) + d;
03220   DIGIT(mp, 0) = ACCUM(w);
03221   k = CARRYOUT(w);
03222 
03223   while(ix < USED(mp) && k) {
03224     w = (mp_word)DIGIT(mp, ix) + k;
03225     DIGIT(mp, ix) = ACCUM(w);
03226     k = CARRYOUT(w);
03227     ++ix;
03228   }
03229 
03230   if(k != 0) {
03231     mp_err  res;
03232 
03233     if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
03234       return res;
03235 
03236     DIGIT(mp, ix) = (mp_digit)k;
03237   }
03238 
03239   return MP_OKAY;
03240 #else
03241   mp_digit * pmp = MP_DIGITS(mp);
03242   mp_digit sum, mp_i, carry = 0;
03243   mp_err   res = MP_OKAY;
03244   int used = (int)MP_USED(mp);
03245 
03246   mp_i = *pmp;
03247   *pmp++ = sum = d + mp_i;
03248   carry = (sum < d);
03249   while (carry && --used > 0) {
03250     mp_i = *pmp;
03251     *pmp++ = sum = carry + mp_i;
03252     carry = !sum;
03253   }
03254   if (carry && !used) {
03255     /* mp is growing */
03256     used = MP_USED(mp);
03257     MP_CHECKOK( s_mp_pad(mp, used + 1) );
03258     MP_DIGIT(mp, used) = carry;
03259   }
03260 CLEANUP:
03261   return res;
03262 #endif
03263 } /* end s_mp_add_d() */
03264 
03265 /* }}} */
03266 
03267 /* {{{ s_mp_sub_d(mp, d) */
03268 
03269 /* Subtract d from |mp| in place, assumes |mp| > d                        */
03270 mp_err   s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
03271 {
03272 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
03273   mp_word   w, b = 0;
03274   mp_size   ix = 1;
03275 
03276   /* Compute initial subtraction    */
03277   w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
03278   b = CARRYOUT(w) ? 0 : 1;
03279   DIGIT(mp, 0) = ACCUM(w);
03280 
03281   /* Propagate borrows leftward     */
03282   while(b && ix < USED(mp)) {
03283     w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
03284     b = CARRYOUT(w) ? 0 : 1;
03285     DIGIT(mp, ix) = ACCUM(w);
03286     ++ix;
03287   }
03288 
03289   /* Remove leading zeroes          */
03290   s_mp_clamp(mp);
03291 
03292   /* If we have a borrow out, it's a violation of the input invariant */
03293   if(b)
03294     return MP_RANGE;
03295   else
03296     return MP_OKAY;
03297 #else
03298   mp_digit *pmp = MP_DIGITS(mp);
03299   mp_digit mp_i, diff, borrow;
03300   mp_size  used = MP_USED(mp);
03301 
03302   mp_i = *pmp;
03303   *pmp++ = diff = mp_i - d;
03304   borrow = (diff > mp_i);
03305   while (borrow && --used) {
03306     mp_i = *pmp;
03307     *pmp++ = diff = mp_i - borrow;
03308     borrow = (diff > mp_i);
03309   }
03310   s_mp_clamp(mp);
03311   return (borrow && !used) ? MP_RANGE : MP_OKAY;
03312 #endif
03313 } /* end s_mp_sub_d() */
03314 
03315 /* }}} */
03316 
03317 /* {{{ s_mp_mul_d(a, d) */
03318 
03319 /* Compute a = a * d, single digit multiplication                         */
03320 mp_err   s_mp_mul_d(mp_int *a, mp_digit d)
03321 {
03322   mp_err  res;
03323   mp_size used;
03324   int     pow;
03325 
03326   if (!d) {
03327     mp_zero(a);
03328     return MP_OKAY;
03329   }
03330   if (d == 1)
03331     return MP_OKAY;
03332   if (0 <= (pow = s_mp_ispow2d(d))) {
03333     return s_mp_mul_2d(a, (mp_digit)pow);
03334   }
03335 
03336   used = MP_USED(a);
03337   MP_CHECKOK( s_mp_pad(a, used + 1) );
03338 
03339   s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
03340 
03341   s_mp_clamp(a);
03342 
03343 CLEANUP:
03344   return res;
03345   
03346 } /* end s_mp_mul_d() */
03347 
03348 /* }}} */
03349 
03350 /* {{{ s_mp_div_d(mp, d, r) */
03351 
03352 /*
03353   s_mp_div_d(mp, d, r)
03354 
03355   Compute the quotient mp = mp / d and remainder r = mp mod d, for a
03356   single digit d.  If r is null, the remainder will be discarded.
03357  */
03358 
03359 mp_err   s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
03360 {
03361 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
03362   mp_word   w = 0, q;
03363 #else
03364   mp_digit  w, q;
03365 #endif
03366   int       ix;
03367   mp_err    res;
03368   mp_int    quot;
03369   mp_int    rem;
03370 
03371   if(d == 0)
03372     return MP_RANGE;
03373   if (d == 1) {
03374     if (r)
03375       *r = 0;
03376     return MP_OKAY;
03377   }
03378   /* could check for power of 2 here, but mp_div_d does that. */
03379   if (MP_USED(mp) == 1) {
03380     mp_digit n   = MP_DIGIT(mp,0);
03381     mp_digit rem;
03382 
03383     q   = n / d;
03384     rem = n % d;
03385     MP_DIGIT(mp,0) = q;
03386     if (r)
03387       *r = rem;
03388     return MP_OKAY;
03389   }
03390 
03391   MP_DIGITS(&rem)  = 0;
03392   MP_DIGITS(&quot) = 0;
03393   /* Make room for the quotient */
03394   MP_CHECKOK( mp_init_size(&quot, USED(mp)) );
03395 
03396 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
03397   for(ix = USED(mp) - 1; ix >= 0; ix--) {
03398     w = (w << DIGIT_BIT) | DIGIT(mp, ix);
03399 
03400     if(w >= d) {
03401       q = w / d;
03402       w = w % d;
03403     } else {
03404       q = 0;
03405     }
03406 
03407     s_mp_lshd(&quot, 1);
03408     DIGIT(&quot, 0) = (mp_digit)q;
03409   }
03410 #else
03411   {
03412     mp_digit p;
03413 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
03414     mp_digit norm;
03415 #endif
03416 
03417     MP_CHECKOK( mp_init_copy(&rem, mp) );
03418 
03419 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
03420     MP_DIGIT(&quot, 0) = d;
03421     MP_CHECKOK( s_mp_norm(&rem, &quot, &norm) );
03422     if (norm)
03423       d <<= norm;
03424     MP_DIGIT(&quot, 0) = 0;
03425 #endif
03426 
03427     p = 0;
03428     for (ix = USED(&rem) - 1; ix >= 0; ix--) {
03429       w = DIGIT(&rem, ix);
03430 
03431       if (p) {
03432         MP_CHECKOK( s_mpv_div_2dx1d(p, w, d, &q, &w) );
03433       } else if (w >= d) {
03434        q = w / d;
03435        w = w % d;
03436       } else {
03437        q = 0;
03438       }
03439 
03440       MP_CHECKOK( s_mp_lshd(&quot, 1) );
03441       DIGIT(&quot, 0) = q;
03442       p = w;
03443     }
03444 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
03445     if (norm)
03446       w >>= norm;
03447 #endif
03448   }
03449 #endif
03450 
03451   /* Deliver the remainder, if desired */
03452   if(r)
03453     *r = (mp_digit)w;
03454 
03455   s_mp_clamp(&quot);
03456   mp_exch(&quot, mp);
03457 CLEANUP:
03458   mp_clear(&quot);
03459   mp_clear(&rem);
03460 
03461   return res;
03462 } /* end s_mp_div_d() */
03463 
03464 /* }}} */
03465 
03466 
03467 /* }}} */
03468 
03469 /* {{{ Primitive full arithmetic */
03470 
03471 /* {{{ s_mp_add(a, b) */
03472 
03473 /* Compute a = |a| + |b|                                                  */
03474 mp_err   s_mp_add(mp_int *a, const mp_int *b)  /* magnitude addition      */
03475 {
03476 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03477   mp_word   w = 0;
03478 #else
03479   mp_digit  d, sum, carry = 0;
03480 #endif
03481   mp_digit *pa, *pb;
03482   mp_size   ix;
03483   mp_size   used;
03484   mp_err    res;
03485 
03486   /* Make sure a has enough precision for the output value */
03487   if((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
03488     return res;
03489 
03490   /*
03491     Add up all digits up to the precision of b.  If b had initially
03492     the same precision as a, or greater, we took care of it by the
03493     padding step above, so there is no problem.  If b had initially
03494     less precision, we'll have to make sure the carry out is duly
03495     propagated upward among the higher-order digits of the sum.
03496    */
03497   pa = MP_DIGITS(a);
03498   pb = MP_DIGITS(b);
03499   used = MP_USED(b);
03500   for(ix = 0; ix < used; ix++) {
03501 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03502     w = w + *pa + *pb++;
03503     *pa++ = ACCUM(w);
03504     w = CARRYOUT(w);
03505 #else
03506     d = *pa;
03507     sum = d + *pb++;
03508     d = (sum < d);                 /* detect overflow */
03509     *pa++ = sum += carry;
03510     carry = d + (sum < carry);            /* detect overflow */
03511 #endif
03512   }
03513 
03514   /* If we run out of 'b' digits before we're actually done, make
03515      sure the carries get propagated upward...  
03516    */
03517   used = MP_USED(a);
03518 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03519   while (w && ix < used) {
03520     w = w + *pa;
03521     *pa++ = ACCUM(w);
03522     w = CARRYOUT(w);
03523     ++ix;
03524   }
03525 #else
03526   while (carry && ix < used) {
03527     sum = carry + *pa;
03528     *pa++ = sum;
03529     carry = !sum;
03530     ++ix;
03531   }
03532 #endif
03533 
03534   /* If there's an overall carry out, increase precision and include
03535      it.  We could have done this initially, but why touch the memory
03536      allocator unless we're sure we have to?
03537    */
03538 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03539   if (w) {
03540     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
03541       return res;
03542 
03543     DIGIT(a, ix) = (mp_digit)w;
03544   }
03545 #else
03546   if (carry) {
03547     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
03548       return res;
03549 
03550     DIGIT(a, used) = carry;
03551   }
03552 #endif
03553 
03554   return MP_OKAY;
03555 } /* end s_mp_add() */
03556 
03557 /* }}} */
03558 
03559 /* Compute c = |a| + |b|         */ /* magnitude addition      */
03560 mp_err   s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)  
03561 {
03562   mp_digit *pa, *pb, *pc;
03563 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03564   mp_word   w = 0;
03565 #else
03566   mp_digit  sum, carry = 0, d;
03567 #endif
03568   mp_size   ix;
03569   mp_size   used;
03570   mp_err    res;
03571 
03572   MP_SIGN(c) = MP_SIGN(a);
03573   if (MP_USED(a) < MP_USED(b)) {
03574     const mp_int *xch = a;
03575     a = b;
03576     b = xch;
03577   }
03578 
03579   /* Make sure a has enough precision for the output value */
03580   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
03581     return res;
03582 
03583   /*
03584     Add up all digits up to the precision of b.  If b had initially
03585     the same precision as a, or greater, we took care of it by the
03586     exchange step above, so there is no problem.  If b had initially
03587     less precision, we'll have to make sure the carry out is duly
03588     propagated upward among the higher-order digits of the sum.
03589    */
03590   pa = MP_DIGITS(a);
03591   pb = MP_DIGITS(b);
03592   pc = MP_DIGITS(c);
03593   used = MP_USED(b);
03594   for (ix = 0; ix < used; ix++) {
03595 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03596     w = w + *pa++ + *pb++;
03597     *pc++ = ACCUM(w);
03598     w = CARRYOUT(w);
03599 #else
03600     d = *pa++;
03601     sum = d + *pb++;
03602     d = (sum < d);                 /* detect overflow */
03603     *pc++ = sum += carry;
03604     carry = d + (sum < carry);            /* detect overflow */
03605 #endif
03606   }
03607 
03608   /* If we run out of 'b' digits before we're actually done, make
03609      sure the carries get propagated upward...  
03610    */
03611   for (used = MP_USED(a); ix < used; ++ix) {
03612 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03613     w = w + *pa++;
03614     *pc++ = ACCUM(w);
03615     w = CARRYOUT(w);
03616 #else
03617     *pc++ = sum = carry + *pa++;
03618     carry = (sum < carry);
03619 #endif
03620   }
03621 
03622   /* If there's an overall carry out, increase precision and include
03623      it.  We could have done this initially, but why touch the memory
03624      allocator unless we're sure we have to?
03625    */
03626 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03627   if (w) {
03628     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
03629       return res;
03630 
03631     DIGIT(c, used) = (mp_digit)w;
03632     ++used;
03633   }
03634 #else
03635   if (carry) {
03636     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
03637       return res;
03638 
03639     DIGIT(c, used) = carry;
03640     ++used;
03641   }
03642 #endif
03643   MP_USED(c) = used;
03644   return MP_OKAY;
03645 }
03646 /* {{{ s_mp_add_offset(a, b, offset) */
03647 
03648 /* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
03649 mp_err   s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)   
03650 {
03651 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03652   mp_word   w, k = 0;
03653 #else
03654   mp_digit  d, sum, carry = 0;
03655 #endif
03656   mp_size   ib;
03657   mp_size   ia;
03658   mp_size   lim;
03659   mp_err    res;
03660 
03661   /* Make sure a has enough precision for the output value */
03662   lim = MP_USED(b) + offset;
03663   if((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
03664     return res;
03665 
03666   /*
03667     Add up all digits up to the precision of b.  If b had initially
03668     the same precision as a, or greater, we took care of it by the
03669     padding step above, so there is no problem.  If b had initially
03670     less precision, we'll have to make sure the carry out is duly
03671     propagated upward among the higher-order digits of the sum.
03672    */
03673   lim = USED(b);
03674   for(ib = 0, ia = offset; ib < lim; ib++, ia++) {
03675 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03676     w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
03677     DIGIT(a, ia) = ACCUM(w);
03678     k = CARRYOUT(w);
03679 #else
03680     d = MP_DIGIT(a, ia);
03681     sum = d + MP_DIGIT(b, ib);
03682     d = (sum < d);
03683     MP_DIGIT(a,ia) = sum += carry;
03684     carry = d + (sum < carry);
03685 #endif
03686   }
03687 
03688   /* If we run out of 'b' digits before we're actually done, make
03689      sure the carries get propagated upward...  
03690    */
03691 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03692   for (lim = MP_USED(a); k && (ia < lim); ++ia) {
03693     w = (mp_word)DIGIT(a, ia) + k;
03694     DIGIT(a, ia) = ACCUM(w);
03695     k = CARRYOUT(w);
03696   }
03697 #else
03698   for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
03699     d = MP_DIGIT(a, ia);
03700     MP_DIGIT(a,ia) = sum = d + carry;
03701     carry = (sum < d);
03702   }
03703 #endif
03704 
03705   /* If there's an overall carry out, increase precision and include
03706      it.  We could have done this initially, but why touch the memory
03707      allocator unless we're sure we have to?
03708    */
03709 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
03710   if(k) {
03711     if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
03712       return res;
03713 
03714     DIGIT(a, ia) = (mp_digit)k;
03715   }
03716 #else
03717   if (carry) {
03718     if((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
03719       return res;
03720 
03721     DIGIT(a, lim) = carry;
03722   }
03723 #endif
03724   s_mp_clamp(a);
03725 
03726   return MP_OKAY;
03727 
03728 } /* end s_mp_add_offset() */
03729 
03730 /* }}} */
03731 
03732 /* {{{ s_mp_sub(a, b) */
03733 
03734 /* Compute a = |a| - |b|, assumes |a| >= |b|                              */
03735 mp_err   s_mp_sub(mp_int *a, const mp_int *b)  /* magnitude subtract      */
03736 {
03737   mp_digit *pa, *pb, *limit;
03738 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
03739   mp_sword  w = 0;
03740 #else
03741   mp_digit  d, diff, borrow = 0;
03742 #endif
03743 
03744   /*
03745     Subtract and propagate borrow.  Up to the precision of b, this
03746     accounts for the digits of b; after that, we just make sure the
03747     carries get to the right place.  This saves having to pad b out to
03748     the precision of a just to make the loops work right...
03749    */
03750   pa = MP_DIGITS(a);
03751   pb = MP_DIGITS(b);
03752   limit = pb + MP_USED(b);
03753   while (pb < limit) {
03754 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
03755     w = w + *pa - *pb++;
03756     *pa++ = ACCUM(w);
03757     w >>= MP_DIGIT_BIT;
03758 #else
03759     d = *pa;
03760     diff = d - *pb++;
03761     d = (diff > d);                       /* detect borrow */
03762     if (borrow && --diff == MP_DIGIT_MAX)
03763       ++d;
03764     *pa++ = diff;
03765     borrow = d;      
03766 #endif
03767   }
03768   limit = MP_DIGITS(a) + MP_USED(a);
03769 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
03770   while (w && pa < limit) {
03771     w = w + *pa;
03772     *pa++ = ACCUM(w);
03773     w >>= MP_DIGIT_BIT;
03774   }
03775 #else
03776   while (borrow && pa < limit) {
03777     d = *pa;
03778     *pa++ = diff = d - borrow;
03779     borrow = (diff > d);
03780   }
03781 #endif
03782 
03783   /* Clobber any leading zeroes we created    */
03784   s_mp_clamp(a);
03785 
03786   /* 
03787      If there was a borrow out, then |b| > |a| in violation
03788      of our input invariant.  We've already done the work,
03789      but we'll at least complain about it...
03790    */
03791 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
03792   return w ? MP_RANGE : MP_OKAY;
03793 #else
03794   return borrow ? MP_RANGE : MP_OKAY;
03795 #endif
03796 } /* end s_mp_sub() */
03797 
03798 /* }}} */
03799 
03800 /* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
03801 mp_err   s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)  
03802 {
03803   mp_digit *pa, *pb, *pc;
03804 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
03805   mp_sword  w = 0;
03806 #else
03807   mp_digit  d, diff, borrow = 0;
03808 #endif
03809   int       ix, limit;
03810   mp_err    res;
03811 
03812   MP_SIGN(c) = MP_SIGN(a);
03813 
03814   /* Make sure a has enough precision for the output value */
03815   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
03816     return res;
03817 
03818   /*
03819     Subtract and propagate borrow.  Up to the precision of b, this
03820     accounts for the digits of b; after that, we just make sure the
03821     carries get to the right place.  This saves having to pad b out to
03822     the precision of a just to make the loops work right...
03823    */
03824   pa = MP_DIGITS(a);
03825   pb = MP_DIGITS(b);
03826   pc = MP_DIGITS(c);
03827   limit = MP_USED(b);
03828   for (ix = 0; ix < limit; ++ix) {
03829 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
03830     w = w + *pa++ - *pb++;
03831     *pc++ = ACCUM(w);
03832     w >>= MP_DIGIT_BIT;
03833 #else
03834     d = *pa++;
03835     diff = d - *pb++;
03836     d = (diff > d);
03837     if (borrow && --diff == MP_DIGIT_MAX)
03838       ++d;
03839     *pc++ = diff;
03840     borrow = d;
03841 #endif
03842   }
03843   for (limit = MP_USED(a); ix < limit; ++ix) {
03844 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
03845     w = w + *pa++;
03846     *pc++ = ACCUM(w);
03847     w >>= MP_DIGIT_BIT;
03848 #else
03849     d = *pa++;
03850     *pc++ = diff = d - borrow;
03851     borrow = (diff > d);
03852 #endif
03853   }
03854 
03855   /* Clobber any leading zeroes we created    */
03856   MP_USED(c) = ix;
03857   s_mp_clamp(c);
03858 
03859   /* 
03860      If there was a borrow out, then |b| > |a| in violation
03861      of our input invariant.  We've already done the work,
03862      but we'll at least complain about it...
03863    */
03864 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
03865   return w ? MP_RANGE : MP_OKAY;
03866 #else
03867   return borrow ? MP_RANGE : MP_OKAY;
03868 #endif
03869 }
03870 /* {{{ s_mp_mul(a, b) */
03871 
03872 /* Compute a = |a| * |b|                                                  */
03873 mp_err   s_mp_mul(mp_int *a, const mp_int *b)
03874 {
03875   return mp_mul(a, b, a);
03876 } /* end s_mp_mul() */
03877 
03878 /* }}} */
03879 
03880 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
03881 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
03882 #define MP_MUL_DxD(a, b, Phi, Plo) \
03883   { unsigned long long product = (unsigned long long)a * b; \
03884     Plo = (mp_digit)product; \
03885     Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
03886 #elif defined(OSF1)
03887 #define MP_MUL_DxD(a, b, Phi, Plo) \
03888   { Plo = asm ("mulq %a0, %a1, %v0", a, b);\
03889     Phi = asm ("umulh %a0, %a1, %v0", a, b); }
03890 #else
03891 #define MP_MUL_DxD(a, b, Phi, Plo) \
03892   { mp_digit a0b1, a1b0; \
03893     Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
03894     Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
03895     a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
03896     a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
03897     a1b0 += a0b1; \
03898     Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
03899     if (a1b0 < a0b1)  \
03900       Phi += MP_HALF_RADIX; \
03901     a1b0 <<= MP_HALF_DIGIT_BIT; \
03902     Plo += a1b0; \
03903     if (Plo < a1b0) \
03904       ++Phi; \
03905   }
03906 #endif
03907 
03908 #if !defined(MP_ASSEMBLY_MULTIPLY)
03909 /* c = a * b */
03910 void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
03911 {
03912 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
03913   mp_digit   d = 0;
03914 
03915   /* Inner product:  Digits of a */
03916   while (a_len--) {
03917     mp_word w = ((mp_word)b * *a++) + d;
03918     *c++ = ACCUM(w);
03919     d = CARRYOUT(w);
03920   }
03921   *c = d;
03922 #else
03923   mp_digit carry = 0;
03924   while (a_len--) {
03925     mp_digit a_i = *a++;
03926     mp_digit a0b0, a1b1;
03927 
03928     MP_MUL_DxD(a_i, b, a1b1, a0b0);
03929 
03930     a0b0 += carry;
03931     if (a0b0 < carry)
03932       ++a1b1;
03933     *c++ = a0b0;
03934     carry = a1b1;
03935   }
03936   *c = carry;
03937 #endif
03938 }
03939 
03940 /* c += a * b */
03941 void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, 
03942                            mp_digit *c)
03943 {
03944 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
03945   mp_digit   d = 0;
03946 
03947   /* Inner product:  Digits of a */
03948   while (a_len--) {
03949     mp_word w = ((mp_word)b * *a++) + *c + d;
03950     *c++ = ACCUM(w);
03951     d = CARRYOUT(w);
03952   }
03953   *c = d;
03954 #else
03955   mp_digit carry = 0;
03956   while (a_len--) {
03957     mp_digit a_i = *a++;
03958     mp_digit a0b0, a1b1;
03959 
03960     MP_MUL_DxD(a_i, b, a1b1, a0b0);
03961 
03962     a0b0 += carry;
03963     if (a0b0 < carry)
03964       ++a1b1;
03965     a0b0 += a_i = *c;
03966     if (a0b0 < a_i)
03967       ++a1b1;
03968     *c++ = a0b0;
03969     carry = a1b1;
03970   }
03971   *c = carry;
03972 #endif
03973 }
03974 
03975 /* Presently, this is only used by the Montgomery arithmetic code. */
03976 /* c += a * b */
03977 void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
03978 {
03979 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
03980   mp_digit   d = 0;
03981 
03982   /* Inner product:  Digits of a */
03983   while (a_len--) {
03984     mp_word w = ((mp_word)b * *a++) + *c + d;
03985     *c++ = ACCUM(w);
03986     d = CARRYOUT(w);
03987   }
03988 
03989   while (d) {
03990     mp_word w = (mp_word)*c + d;
03991     *c++ = ACCUM(w);
03992     d = CARRYOUT(w);
03993   }
03994 #else
03995   mp_digit carry = 0;
03996   while (a_len--) {
03997     mp_digit a_i = *a++;
03998     mp_digit a0b0, a1b1;
03999 
04000     MP_MUL_DxD(a_i, b, a1b1, a0b0);
04001 
04002     a0b0 += carry;
04003     if (a0b0 < carry)
04004       ++a1b1;
04005 
04006     a0b0 += a_i = *c;
04007     if (a0b0 < a_i)
04008       ++a1b1;
04009 
04010     *c++ = a0b0;
04011     carry = a1b1;
04012   }
04013   while (carry) {
04014     mp_digit c_i = *c;
04015     carry += c_i;
04016     *c++ = carry;
04017     carry = carry < c_i;
04018   }
04019 #endif
04020 }
04021 #endif
04022 
04023 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
04024 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
04025 #define MP_SQR_D(a, Phi, Plo) \
04026   { unsigned long long square = (unsigned long long)a * a; \
04027     Plo = (mp_digit)square; \
04028     Phi = (mp_digit)(square >> MP_DIGIT_BIT); }
04029 #elif defined(OSF1)
04030 #define MP_SQR_D(a, Phi, Plo) \
04031   { Plo = asm ("mulq  %a0, %a0, %v0", a);\
04032     Phi = asm ("umulh %a0, %a0, %v0", a); }
04033 #else
04034 #define MP_SQR_D(a, Phi, Plo) \
04035   { mp_digit Pmid; \
04036     Plo  = (a  & MP_HALF_DIGIT_MAX) * (a  & MP_HALF_DIGIT_MAX); \
04037     Phi  = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
04038     Pmid = (a  & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
04039     Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);  \
04040     Pmid <<= (MP_HALF_DIGIT_BIT + 1);  \
04041     Plo += Pmid;  \
04042     if (Plo < Pmid)  \
04043       ++Phi;  \
04044   }
04045 #endif
04046 
04047 #if !defined(MP_ASSEMBLY_SQUARE)
04048 /* Add the squares of the digits of a to the digits of b. */
04049 void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
04050 {
04051 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
04052   mp_word  w;
04053   mp_digit d;
04054   mp_size  ix;
04055 
04056   w  = 0;
04057 #define ADD_SQUARE(n) \
04058     d = pa[n]; \
04059     w += (d * (mp_word)d) + ps[2*n]; \
04060     ps[2*n] = ACCUM(w); \
04061     w = (w >> DIGIT_BIT) + ps[2*n+1]; \
04062     ps[2*n+1] = ACCUM(w); \
04063     w = (w >> DIGIT_BIT)
04064 
04065   for (ix = a_len; ix >= 4; ix -= 4) {
04066     ADD_SQUARE(0);
04067     ADD_SQUARE(1);
04068     ADD_SQUARE(2);
04069     ADD_SQUARE(3);
04070     pa += 4;
04071     ps += 8;
04072   }
04073   if (ix) {
04074     ps += 2*ix;
04075     pa += ix;
04076     switch (ix) {
04077     case 3: ADD_SQUARE(-3); /* FALLTHRU */
04078     case 2: ADD_SQUARE(-2); /* FALLTHRU */
04079     case 1: ADD_SQUARE(-1); /* FALLTHRU */
04080     case 0: break;
04081     }
04082   }
04083   while (w) {
04084     w += *ps;
04085     *ps++ = ACCUM(w);
04086     w = (w >> DIGIT_BIT);
04087   }
04088 #else
04089   mp_digit carry = 0;
04090   while (a_len--) {
04091     mp_digit a_i = *pa++;
04092     mp_digit a0a0, a1a1;
04093 
04094     MP_SQR_D(a_i, a1a1, a0a0);
04095 
04096     /* here a1a1 and a0a0 constitute a_i ** 2 */
04097     a0a0 += carry;
04098     if (a0a0 < carry)
04099       ++a1a1;
04100 
04101     /* now add to ps */
04102     a0a0 += a_i = *ps;
04103     if (a0a0 < a_i)
04104       ++a1a1;
04105     *ps++ = a0a0;
04106     a1a1 += a_i = *ps;
04107     carry = (a1a1 < a_i);
04108     *ps++ = a1a1;
04109   }
04110   while (carry) {
04111     mp_digit s_i = *ps;
04112     carry += s_i;
04113     *ps++ = carry;
04114     carry = carry < s_i;
04115   }
04116 #endif
04117 }
04118 #endif
04119 
04120 #if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) \
04121 && !defined(MP_ASSEMBLY_DIV_2DX1D)
04122 /*
04123 ** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized 
04124 ** so its high bit is 1.   This code is from NSPR.
04125 */
04126 mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, 
04127                      mp_digit *qp, mp_digit *rp)
04128 {
04129     mp_digit d1, d0, q1, q0;
04130     mp_digit r1, r0, m;
04131 
04132     d1 = divisor >> MP_HALF_DIGIT_BIT;
04133     d0 = divisor & MP_HALF_DIGIT_MAX;
04134     r1 = Nhi % d1;
04135     q1 = Nhi / d1;
04136     m = q1 * d0;
04137     r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
04138     if (r1 < m) {
04139         q1--, r1 += divisor;
04140         if (r1 >= divisor && r1 < m) {
04141            q1--, r1 += divisor;
04142        }
04143     }
04144     r1 -= m;
04145     r0 = r1 % d1;
04146     q0 = r1 / d1;
04147     m = q0 * d0;
04148     r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
04149     if (r0 < m) {
04150         q0--, r0 += divisor;
04151         if (r0 >= divisor && r0 < m) {
04152            q0--, r0 += divisor;
04153        }
04154     }
04155     if (qp)
04156        *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
04157     if (rp)
04158        *rp = r0 - m;
04159     return MP_OKAY;
04160 }
04161 #endif
04162 
04163 #if MP_SQUARE
04164 /* {{{ s_mp_sqr(a) */
04165 
04166 mp_err   s_mp_sqr(mp_int *a)
04167 {
04168   mp_err   res;
04169   mp_int   tmp;
04170 
04171   if((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY)
04172     return res;
04173   res = mp_sqr(a, &tmp);
04174   if (res == MP_OKAY) {
04175     s_mp_exch(&tmp, a);
04176   }
04177   mp_clear(&tmp);
04178   return res;
04179 }
04180 
04181 /* }}} */
04182 #endif
04183 
04184 /* {{{ s_mp_div(a, b) */
04185 
04186 /*
04187   s_mp_div(a, b)
04188 
04189   Compute a = a / b and b = a mod b.  Assumes b > a.
04190  */
04191 
04192 mp_err   s_mp_div(mp_int *rem,     /* i: dividend, o: remainder */
04193                   mp_int *div,     /* i: divisor                */
04194                 mp_int *quot)      /* i: 0;        o: quotient  */
04195 {
04196   mp_int   part, t;
04197 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
04198   mp_word  q_msd;
04199 #else
04200   mp_digit q_msd;
04201 #endif
04202   mp_err   res;
04203   mp_digit d;
04204   mp_digit div_msd;
04205   int      ix;
04206 
04207   if(mp_cmp_z(div) == 0)
04208     return MP_RANGE;
04209 
04210   /* Shortcut if divisor is power of two */
04211   if((ix = s_mp_ispow2(div)) >= 0) {
04212     MP_CHECKOK( mp_copy(rem, quot) );
04213     s_mp_div_2d(quot, (mp_digit)ix);
04214     s_mp_mod_2d(rem,  (mp_digit)ix);
04215 
04216     return MP_OKAY;
04217   }
04218 
04219   DIGITS(&t) = 0;
04220   MP_SIGN(rem) = ZPOS;
04221   MP_SIGN(div) = ZPOS;
04222 
04223   /* A working temporary for division     */
04224   MP_CHECKOK( mp_init_size(&t, MP_ALLOC(rem)));
04225 
04226   /* Normalize to optimize guessing       */
04227   MP_CHECKOK( s_mp_norm(rem, div, &d) );
04228 
04229   part = *rem;
04230 
04231   /* Perform the division itself...woo!   */
04232   MP_USED(quot) = MP_ALLOC(quot);
04233 
04234   /* Find a partial substring of rem which is at least div */
04235   /* If we didn't find one, we're finished dividing    */
04236   while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
04237     int i;
04238     int unusedRem;
04239 
04240     unusedRem = MP_USED(rem) - MP_USED(div);
04241     MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
04242     MP_ALLOC(&part)  = MP_ALLOC(rem)  - unusedRem;
04243     MP_USED(&part)   = MP_USED(div);
04244     if (s_mp_cmp(&part, div) < 0) {
04245       -- unusedRem;
04246 #if MP_ARGCHK == 2
04247       assert(unusedRem >= 0);
04248 #endif
04249       -- MP_DIGITS(&part);
04250       ++ MP_USED(&part);
04251       ++ MP_ALLOC(&part);
04252     }
04253 
04254     /* Compute a guess for the next quotient digit       */
04255     q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
04256     div_msd = MP_DIGIT(div, MP_USED(div) - 1);
04257     if (q_msd >= div_msd) {
04258       q_msd = 1;
04259     } else if (MP_USED(&part) > 1) {
04260 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
04261       q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2);
04262       q_msd /= div_msd;
04263       if (q_msd == RADIX)
04264         --q_msd;
04265 #else
04266       mp_digit r;
04267       MP_CHECKOK( s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2), 
04268                               div_msd, &q_msd, &r) );
04269 #endif
04270     } else {
04271       q_msd = 0;
04272     }
04273 #if MP_ARGCHK == 2
04274     assert(q_msd > 0); /* This case should never occur any more. */
04275 #endif
04276     if (q_msd <= 0)
04277       break;
04278 
04279     /* See what that multiplies out to                   */
04280     mp_copy(div, &t);
04281     MP_CHECKOK( s_mp_mul_d(&t, (mp_digit)q_msd) );
04282 
04283     /* 
04284        If it's too big, back it off.  We should not have to do this
04285        more than once, or, in rare cases, twice.  Knuth describes a
04286        method by which this could be reduced to a maximum of once, but
04287        I didn't implement that here.
04288      * When using s_mpv_div_2dx1d, we may have to do this 3 times.
04289      */
04290     for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
04291       --q_msd;
04292       s_mp_sub(&t, div);    /* t -= div */
04293     }
04294     if (i < 0) {
04295       res = MP_RANGE;
04296       goto CLEANUP;
04297     }
04298 
04299     /* At this point, q_msd should be the right next digit   */
04300     MP_CHECKOK( s_mp_sub(&part, &t) );    /* part -= t */
04301     s_mp_clamp(rem);
04302 
04303     /*
04304       Include the digit in the quotient.  We allocated enough memory
04305       for any quotient we could ever possibly get, so we should not
04306       have to check for failures here
04307      */
04308     MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
04309   }
04310 
04311   /* Denormalize remainder                */
04312   if (d) {
04313     s_mp_div_2d(rem, d);
04314   }
04315 
04316   s_mp_clamp(quot);
04317 
04318 CLEANUP:
04319   mp_clear(&t);
04320 
04321   return res;
04322 
04323 } /* end s_mp_div() */
04324 
04325 
04326 /* }}} */
04327 
04328 /* {{{ s_mp_2expt(a, k) */
04329 
04330 mp_err   s_mp_2expt(mp_int *a, mp_digit k)
04331 {
04332   mp_err    res;
04333   mp_size   dig, bit;
04334 
04335   dig = k / DIGIT_BIT;
04336   bit = k % DIGIT_BIT;
04337 
04338   mp_zero(a);
04339   if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
04340     return res;
04341   
04342   DIGIT(a, dig) |= ((mp_digit)1 << bit);
04343 
04344   return MP_OKAY;
04345 
04346 } /* end s_mp_2expt() */
04347 
04348 /* }}} */
04349 
04350 /* {{{ s_mp_reduce(x, m, mu) */
04351 
04352 /*
04353   Compute Barrett reduction, x (mod m), given a precomputed value for
04354   mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
04355   faster than straight division, when many reductions by the same
04356   value of m are required (such as in modular exponentiation).  This
04357   can nearly halve the time required to do modular exponentiation,
04358   as compared to using the full integer divide to reduce.
04359 
04360   This algorithm was derived from the _Handbook of Applied
04361   Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
04362   pp. 603-604.  
04363  */
04364 
04365 mp_err   s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
04366 {
04367   mp_int   q;
04368   mp_err   res;
04369 
04370   if((res = mp_init_copy(&q, x)) != MP_OKAY)
04371     return res;
04372 
04373   s_mp_rshd(&q, USED(m) - 1);  /* q1 = x / b^(k-1)  */
04374   s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
04375   s_mp_rshd(&q, USED(m) + 1);  /* q3 = q2 / b^(k+1) */
04376 
04377   /* x = x mod b^(k+1), quick (no division) */
04378   s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
04379 
04380   /* q = q * m mod b^(k+1), quick (no division) */
04381   s_mp_mul(&q, m);
04382   s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
04383 
04384   /* x = x - q */
04385   if((res = mp_sub(x, &q, x)) != MP_OKAY)
04386     goto CLEANUP;
04387 
04388   /* If x < 0, add b^(k+1) to it */
04389   if(mp_cmp_z(x) < 0) {
04390     mp_set(&q, 1);
04391     if((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
04392       goto CLEANUP;
04393     if((res = mp_add(x, &q, x)) != MP_OKAY)
04394       goto CLEANUP;
04395   }
04396 
04397   /* Back off if it's too big */
04398   while(mp_cmp(x, m) >= 0) {
04399     if((res = s_mp_sub(x, m)) != MP_OKAY)
04400       break;
04401   }
04402 
04403  CLEANUP:
04404   mp_clear(&q);
04405 
04406   return res;
04407 
04408 } /* end s_mp_reduce() */
04409 
04410 /* }}} */
04411 
04412 /* }}} */
04413 
04414 /* {{{ Primitive comparisons */
04415 
04416 /* {{{ s_mp_cmp(a, b) */
04417 
04418 /* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
04419 int      s_mp_cmp(const mp_int *a, const mp_int *b)
04420 {
04421   mp_size used_a = MP_USED(a);
04422   {
04423     mp_size used_b = MP_USED(b);
04424 
04425     if (used_a > used_b)
04426       goto IS_GT;
04427     if (used_a < used_b)
04428       goto IS_LT;
04429   }
04430   {
04431     mp_digit *pa, *pb;
04432     mp_digit da = 0, db = 0;
04433 
04434 #define CMP_AB(n) if ((da = pa[n]) != (db = pb[n])) goto done
04435 
04436     pa = MP_DIGITS(a) + used_a;
04437     pb = MP_DIGITS(b) + used_a;
04438     while (used_a >= 4) {
04439       pa     -= 4;
04440       pb     -= 4;
04441       used_a -= 4;
04442       CMP_AB(3);
04443       CMP_AB(2);
04444       CMP_AB(1);
04445       CMP_AB(0);
04446     }
04447     while (used_a-- > 0 && ((da = *--pa) == (db = *--pb))) 
04448       /* do nothing */;
04449 done:
04450     if (da > db)
04451       goto IS_GT;
04452     if (da < db) 
04453       goto IS_LT;
04454   }
04455   return MP_EQ;
04456 IS_LT:
04457   return MP_LT;
04458 IS_GT:
04459   return MP_GT;
04460 } /* end s_mp_cmp() */
04461 
04462 /* }}} */
04463 
04464 /* {{{ s_mp_cmp_d(a, d) */
04465 
04466 /* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
04467 int      s_mp_cmp_d(const mp_int *a, mp_digit d)
04468 {
04469   if(USED(a) > 1)
04470     return MP_GT;
04471 
04472   if(DIGIT(a, 0) < d)
04473     return MP_LT;
04474   else if(DIGIT(a, 0) > d)
04475     return MP_GT;
04476   else
04477     return MP_EQ;
04478 
04479 } /* end s_mp_cmp_d() */
04480 
04481 /* }}} */
04482 
04483 /* {{{ s_mp_ispow2(v) */
04484 
04485 /*
04486   Returns -1 if the value is not a power of two; otherwise, it returns
04487   k such that v = 2^k, i.e. lg(v).
04488  */
04489 int      s_mp_ispow2(const mp_int *v)
04490 {
04491   mp_digit d;
04492   int      extra = 0, ix;
04493 
04494   ix = MP_USED(v) - 1;
04495   d = MP_DIGIT(v, ix); /* most significant digit of v */
04496 
04497   extra = s_mp_ispow2d(d);
04498   if (extra < 0 || ix == 0)
04499     return extra;
04500 
04501   while (--ix >= 0) {
04502     if (DIGIT(v, ix) != 0)
04503       return -1; /* not a power of two */
04504     extra += MP_DIGIT_BIT;
04505   }
04506 
04507   return extra;
04508 
04509 } /* end s_mp_ispow2() */
04510 
04511 /* }}} */
04512 
04513 /* {{{ s_mp_ispow2d(d) */
04514 
04515 int      s_mp_ispow2d(mp_digit d)
04516 {
04517   if ((d != 0) && ((d & (d-1)) == 0)) { /* d is a power of 2 */
04518     int pow = 0;
04519 #if defined (MP_USE_UINT_DIGIT)
04520     if (d & 0xffff0000U)
04521       pow += 16;
04522     if (d & 0xff00ff00U)
04523       pow += 8;
04524     if (d & 0xf0f0f0f0U)
04525       pow += 4;
04526     if (d & 0xccccccccU)
04527       pow += 2;
04528     if (d & 0xaaaaaaaaU)
04529       pow += 1;
04530 #elif defined(MP_USE_LONG_LONG_DIGIT)
04531     if (d & 0xffffffff00000000ULL)
04532       pow += 32;
04533     if (d & 0xffff0000ffff0000ULL)
04534       pow += 16;
04535     if (d & 0xff00ff00ff00ff00ULL)
04536       pow += 8;
04537     if (d & 0xf0f0f0f0f0f0f0f0ULL)
04538       pow += 4;
04539     if (d & 0xccccccccccccccccULL)
04540       pow += 2;
04541     if (d & 0xaaaaaaaaaaaaaaaaULL)
04542       pow += 1;
04543 #elif defined(MP_USE_LONG_DIGIT)
04544     if (d & 0xffffffff00000000UL)
04545       pow += 32;
04546     if (d & 0xffff0000ffff0000UL)
04547       pow += 16;
04548     if (d & 0xff00ff00ff00ff00UL)
04549       pow += 8;
04550     if (d & 0xf0f0f0f0f0f0f0f0UL)
04551       pow += 4;
04552     if (d & 0xccccccccccccccccUL)
04553       pow += 2;
04554     if (d & 0xaaaaaaaaaaaaaaaaUL)
04555       pow += 1;
04556 #else
04557 #error "unknown type for mp_digit"
04558 #endif
04559     return pow;
04560   }
04561   return -1;
04562 
04563 } /* end s_mp_ispow2d() */
04564 
04565 /* }}} */
04566 
04567 /* }}} */
04568 
04569 /* {{{ Primitive I/O helpers */
04570 
04571 /* {{{ s_mp_tovalue(ch, r) */
04572 
04573 /*
04574   Convert the given character to its digit value, in the given radix.
04575   If the given character is not understood in the given radix, -1 is
04576   returned.  Otherwise the digit's numeric value is returned.
04577 
04578   The results will be odd if you use a radix < 2 or > 62, you are
04579   expected to know what you're up to.
04580  */
04581 int      s_mp_tovalue(char ch, int r)
04582 {
04583   int    val, xch;
04584   
04585   if(r > 36)
04586     xch = ch;
04587   else
04588     xch = toupper(ch);
04589 
04590   if(isdigit(xch))
04591     val = xch - '0';
04592   else if(isupper(xch))
04593     val = xch - 'A' + 10;
04594   else if(islower(xch))
04595     val = xch - 'a' + 36;
04596   else if(xch == '+')
04597     val = 62;
04598   else if(xch == '/')
04599     val = 63;
04600   else 
04601     return -1;
04602 
04603   if(val < 0 || val >= r)
04604     return -1;
04605 
04606   return val;
04607 
04608 } /* end s_mp_tovalue() */
04609 
04610 /* }}} */
04611 
04612 /* {{{ s_mp_todigit(val, r, low) */
04613 
04614 /*
04615   Convert val to a radix-r digit, if possible.  If val is out of range
04616   for r, returns zero.  Otherwise, returns an ASCII character denoting
04617   the value in the given radix.
04618 
04619   The results may be odd if you use a radix < 2 or > 64, you are
04620   expected to know what you're doing.
04621  */
04622   
04623 char     s_mp_todigit(mp_digit val, int r, int low)
04624 {
04625   char   ch;
04626 
04627   if(val >= r)
04628     return 0;
04629 
04630   ch = s_dmap_1[val];
04631 
04632   if(r <= 36 && low)
04633     ch = tolower(ch);
04634 
04635   return ch;
04636 
04637 } /* end s_mp_todigit() */
04638 
04639 /* }}} */
04640 
04641 /* {{{ s_mp_outlen(bits, radix) */
04642 
04643 /* 
04644    Return an estimate for how long a string is needed to hold a radix
04645    r representation of a number with 'bits' significant bits, plus an
04646    extra for a zero terminator (assuming C style strings here)
04647  */
04648 int      s_mp_outlen(int bits, int r)
04649 {
04650   return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
04651 
04652 } /* end s_mp_outlen() */
04653 
04654 /* }}} */
04655 
04656 /* }}} */
04657 
04658 /* {{{ mp_read_unsigned_octets(mp, str, len) */
04659 /* mp_read_unsigned_octets(mp, str, len)
04660    Read in a raw value (base 256) into the given mp_int
04661    No sign bit, number is positive.  Leading zeros ignored.
04662  */
04663 
04664 mp_err  
04665 mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
04666 {
04667   int            count;
04668   mp_err         res;
04669   mp_digit       d;
04670 
04671   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
04672 
04673   mp_zero(mp);
04674 
04675   count = len % sizeof(mp_digit);
04676   if (count) {
04677     for (d = 0; count-- > 0; --len) {
04678       d = (d << 8) | *str++;
04679     }
04680     MP_DIGIT(mp, 0) = d;
04681   }
04682 
04683   /* Read the rest of the digits */
04684   for(; len > 0; len -= sizeof(mp_digit)) {
04685     for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
04686       d = (d << 8) | *str++;
04687     }
04688     if (MP_EQ == mp_cmp_z(mp)) {
04689       if (!d)
04690        continue;
04691     } else {
04692       if((res = s_mp_lshd(mp, 1)) != MP_OKAY)
04693        return res;
04694     }
04695     MP_DIGIT(mp, 0) = d;
04696   }
04697   return MP_OKAY;
04698 } /* end mp_read_unsigned_octets() */
04699 /* }}} */
04700 
04701 /* {{{ mp_unsigned_octet_size(mp) */
04702 int    
04703 mp_unsigned_octet_size(const mp_int *mp)
04704 {
04705   int  bytes;
04706   int  ix;
04707   mp_digit  d = 0;
04708 
04709   ARGCHK(mp != NULL, MP_BADARG);
04710   ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
04711 
04712   bytes = (USED(mp) * sizeof(mp_digit));
04713 
04714   /* subtract leading zeros. */
04715   /* Iterate over each digit... */
04716   for(ix = USED(mp) - 1; ix >= 0; ix--) {
04717     d = DIGIT(mp, ix);
04718     if (d) 
04719        break;
04720     bytes -= sizeof(d);
04721   }
04722   if (!bytes)
04723     return 1;
04724 
04725   /* Have MSD, check digit bytes, high order first */
04726   for(ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
04727     unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
04728     if (x) 
04729        break;
04730     --bytes;
04731   }
04732   return bytes;
04733 } /* end mp_unsigned_octet_size() */
04734 /* }}} */
04735 
04736 /* {{{ mp_to_unsigned_octets(mp, str) */
04737 /* output a buffer of big endian octets no longer than specified. */
04738 mp_err 
04739 mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
04740 {
04741   int  ix, pos = 0;
04742   int  bytes;
04743 
04744   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
04745 
04746   bytes = mp_unsigned_octet_size(mp);
04747   ARGCHK(bytes <= maxlen, MP_BADARG);
04748 
04749   /* Iterate over each digit... */
04750   for(ix = USED(mp) - 1; ix >= 0; ix--) {
04751     mp_digit  d = DIGIT(mp, ix);
04752     int       jx;
04753 
04754     /* Unpack digit bytes, high order first */
04755     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
04756       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
04757       if (!pos && !x)       /* suppress leading zeros */
04758        continue;
04759       str[pos++] = x;
04760     }
04761   }
04762   if (!pos)
04763     str[pos++] = 0;
04764   return pos;
04765 } /* end mp_to_unsigned_octets() */
04766 /* }}} */
04767 
04768 /* {{{ mp_to_signed_octets(mp, str) */
04769 /* output a buffer of big endian octets no longer than specified. */
04770 mp_err 
04771 mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
04772 {
04773   int  ix, pos = 0;
04774   int  bytes;
04775 
04776   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
04777 
04778   bytes = mp_unsigned_octet_size(mp);
04779   ARGCHK(bytes <= maxlen, MP_BADARG);
04780 
04781   /* Iterate over each digit... */
04782   for(ix = USED(mp) - 1; ix >= 0; ix--) {
04783     mp_digit  d = DIGIT(mp, ix);
04784     int       jx;
04785 
04786     /* Unpack digit bytes, high order first */
04787     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
04788       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
04789       if (!pos) {
04790        if (!x)              /* suppress leading zeros */
04791          continue;
04792        if (x & 0x80) { /* add one leading zero to make output positive.  */
04793          ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
04794          if (bytes + 1 > maxlen)
04795            return MP_BADARG;
04796          str[pos++] = 0;
04797        }
04798       }
04799       str[pos++] = x;
04800     }
04801   }
04802   if (!pos)
04803     str[pos++] = 0;
04804   return pos;
04805 } /* end mp_to_signed_octets() */
04806 /* }}} */
04807 
04808 /* {{{ mp_to_fixlen_octets(mp, str) */
04809 /* output a buffer of big endian octets exactly as long as requested. */
04810 mp_err 
04811 mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
04812 {
04813   int  ix, pos = 0;
04814   int  bytes;
04815 
04816   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
04817 
04818   bytes = mp_unsigned_octet_size(mp);
04819   ARGCHK(bytes <= length, MP_BADARG);
04820 
04821   /* place any needed leading zeros */
04822   for (;length > bytes; --length) {
04823        *str++ = 0;
04824   }
04825 
04826   /* Iterate over each digit... */
04827   for(ix = USED(mp) - 1; ix >= 0; ix--) {
04828     mp_digit  d = DIGIT(mp, ix);
04829     int       jx;
04830 
04831     /* Unpack digit bytes, high order first */
04832     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
04833       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
04834       if (!pos && !x)       /* suppress leading zeros */
04835        continue;
04836       str[pos++] = x;
04837     }
04838   }
04839   if (!pos)
04840     str[pos++] = 0;
04841   return MP_OKAY;
04842 } /* end mp_to_fixlen_octets() */
04843 /* }}} */
04844 
04845 
04846 /*------------------------------------------------------------------------*/
04847 /* HERE THERE BE DRAGONS                                                  */