Back to index

plt-scheme  4.2.1
gmp.c
Go to the documentation of this file.
00001 /* Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
00002 
00003 This file is part of the GNU MP Library.
00004 
00005 The GNU MP Library is free software; you can redistribute it and/or modify
00006 it under the terms of the GNU Lesser General Public License as published by
00007 the Free Software Foundation; either version 2.1 of the License, or (at your
00008 option) any later version.
00009 
00010 The GNU MP Library is distributed in the hope that it will be useful, but
00011 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
00012 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
00013 License for more details.
00014 
00015 You should have received a copy of the GNU Lesser General Public License
00016 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
00017 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00018 MA 02111-1307, USA. */
00019 
00020 
00021 #define _FORCE_INLINES
00022 #define _EXTERN_INLINE /* empty */
00023 
00024 extern void *scheme_malloc_gmp(unsigned long, void **mem_pool);
00025 extern void scheme_free_gmp(void *, void **mem_pool);
00026 static void *mem_pool = 0;
00027 #define MALLOC(amt) scheme_malloc_gmp(amt, &mem_pool)
00028 #define FREE(p, s) scheme_free_gmp(p, &mem_pool)
00029 
00030 #include "../../sconfig.h"
00031 #include "mzconfig.h"
00032 #include "gmp.h"
00033 #include "gmp-impl.h"
00034 #include "gmplonglong.h"
00035 
00036 #define GMP_NAIL_BITS 0
00037 #define GMP_LIMB_BITS BITS_PER_MP_LIMB
00038 #define GMP_NUMB_BITS BITS_PER_MP_LIMB
00039 #define GMP_LIMB_HIGHBIT ((mp_limb_t)1 << (BITS_PER_MP_LIMB - 1))
00040 #define GMP_NUMB_HIGHBIT GMP_LIMB_HIGHBIT
00041 
00042 #ifndef NULL
00043 # define NULL 0L
00044 #endif
00045 
00046 #if GMP_NUMB_BITS == 32
00047 # define MP_BASES_CHARS_PER_LIMB_10      9
00048 # define MP_BASES_BIG_BASE_10            CNST_LIMB(0x3b9aca00)
00049 # define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0x12e0be82)
00050 # define MP_BASES_NORMALIZATION_STEPS_10 2
00051 # define GMP_NUMB_MASK 0xFFFFFFFF
00052 #else
00053 # define MP_BASES_CHARS_PER_LIMB_10      19
00054 # define MP_BASES_BIG_BASE_10            CNST_LIMB(0x8ac7230489e80000)
00055 # define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0xd83c94fb6d2ac34a)
00056 # define MP_BASES_NORMALIZATION_STEPS_10 0
00057 # define GMP_NUMB_MASK 0xFFFFFFFFFFFFFFFF
00058 #endif
00059 
00060 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
00061   mpn_divrem_1 (qp, xsize, ap, size, d)
00062 
00063 #define ABOVE_THRESHOLD(size,thresh)    \
00064   ((thresh) == 0                        \
00065    || ((thresh) != MP_SIZE_T_MAX        \
00066        && (size) >= (thresh)))
00067 #define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
00068 
00069 /* n-1 inverts any low zeros and the lowest one bit.  If n&(n-1) leaves zero
00070    then that lowest one bit must have been the only bit set.  n==0 will
00071    return true though, so avoid that.  */
00072 #define POW2_P(n)  (((n) & ((n) - 1)) == 0)
00073 
00074 # define __GMP_ALLOCATE_FUNC_LIMBS(n) TMP_ALLOC(n * sizeof(mp_limb_t))
00075 # define __GMP_FREE_FUNC_LIMBS(p, n) /* */
00076 
00077 /* static const int mp_bits_per_limb = BITS_PER_MP_LIMB; */
00078 /* static const int __gmp_0 = 0; */
00079 /* static int __gmp_junk; */
00080 /* static int gmp_errno = 0; */
00081 
00082 #define SCHEME_BIGNUM_USE_FUEL(n) scheme_bignum_use_fuel(n)
00083 extern void scheme_bignum_use_fuel(long n);
00084 
00085 /* Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
00086    There are no restrictions on the relative sizes of
00087    the two arguments.
00088    Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2.  */
00089 
00090 int
00091 #if __STDC__
00092 mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
00093 #else
00094 mpn_cmp (op1_ptr, op2_ptr, size)
00095      mp_srcptr op1_ptr;
00096      mp_srcptr op2_ptr;
00097      mp_size_t size;
00098 #endif
00099 {
00100   mp_size_t i;
00101   mp_limb_t op1_word, op2_word;
00102 
00103   for (i = size - 1; i >= 0; i--)
00104     {
00105       op1_word = op1_ptr[i];
00106       op2_word = op2_ptr[i];
00107       if (op1_word != op2_word)
00108        goto diff;
00109     }
00110   return 0;
00111  diff:
00112   /* This can *not* be simplified to
00113        op2_word - op2_word
00114      since that expression might give signed overflow.  */
00115   return (op1_word > op2_word) ? 1 : -1;
00116 }
00117 
00118 
00119 /* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length. */
00120 
00121 mp_limb_t
00122 #if __STDC__
00123 mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
00124 #else
00125 mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
00126      register mp_ptr res_ptr;
00127      register mp_srcptr s1_ptr;
00128      register mp_srcptr s2_ptr;
00129      mp_size_t size;
00130 #endif
00131 {
00132   register mp_limb_t x, y, cy;
00133   register mp_size_t j;
00134 
00135   /* The loop counter and index J goes from -SIZE to -1.  This way
00136      the loop becomes faster.  */
00137   j = -size;
00138 
00139   /* Offset the base pointers to compensate for the negative indices.  */
00140   s1_ptr -= j;
00141   s2_ptr -= j;
00142   res_ptr -= j;
00143 
00144   cy = 0;
00145   do
00146     {
00147       y = s2_ptr[j];
00148       x = s1_ptr[j];
00149       y += cy;                     /* add previous carry to subtrahend */
00150       cy = (y < cy);        /* get out carry from that addition */
00151       y = x - y;            /* main subtract */
00152       cy = (y > x) + cy;    /* get out carry from the subtract, combine */
00153       res_ptr[j] = y;
00154     }
00155   while (++j != 0);
00156 
00157   return cy;
00158 }
00159 
00160 /* mpn_add_n -- Add two limb vectors of equal, non-zero length. */
00161 
00162 mp_limb_t
00163 #if __STDC__
00164 mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
00165 #else
00166 mpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
00167      register mp_ptr res_ptr;
00168      register mp_srcptr s1_ptr;
00169      register mp_srcptr s2_ptr;
00170      mp_size_t size;
00171 #endif
00172 {
00173   register mp_limb_t x, y, cy;
00174   register mp_size_t j;
00175 
00176   /* The loop counter and index J goes from -SIZE to -1.  This way
00177      the loop becomes faster.  */
00178   j = -size;
00179 
00180   /* Offset the base pointers to compensate for the negative indices.  */
00181   s1_ptr -= j;
00182   s2_ptr -= j;
00183   res_ptr -= j;
00184 
00185   cy = 0;
00186   do
00187     {
00188       y = s2_ptr[j];
00189       x = s1_ptr[j];
00190       y += cy;                     /* add previous carry to one addend */
00191       cy = (y < cy);        /* get out carry from that addition */
00192       y = x + y;            /* add other addend */
00193       cy = (y < x) + cy;    /* get out carry from that add, combine */
00194       res_ptr[j] = y;
00195     }
00196   while (++j != 0);
00197 
00198   return cy;
00199 }
00200 
00201 
00202 /* mpn_mul_n and helper function -- Multiply/square natural numbers. */
00203 
00204 /* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB.
00205    0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */
00206 #define INVERSE_3      ((MP_LIMB_T_MAX / 3) * 2 + 1)
00207 
00208 #if !defined (__alpha) && !defined (__mips)
00209 /* For all other machines, we want to call mpn functions for the compund
00210    operations instead of open-coding them.  */
00211 #define USE_MORE_MPN
00212 #endif
00213 
00214 /*== Function declarations =================================================*/
00215 
00216 static void evaluate3 _PROTO ((mp_ptr, mp_ptr, mp_ptr,
00217                                mp_ptr, mp_ptr, mp_ptr,
00218                                mp_srcptr, mp_srcptr, mp_srcptr,
00219                                mp_size_t, mp_size_t));
00220 static void interpolate3 _PROTO ((mp_srcptr,
00221                                   mp_ptr, mp_ptr, mp_ptr,
00222                                   mp_srcptr,
00223                                   mp_ptr, mp_ptr, mp_ptr,
00224                                   mp_size_t, mp_size_t));
00225 static mp_limb_t add2Times _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
00226 
00227 
00228 /*-- mpn_kara_mul_n ---------------------------------------------------------------*/
00229 
00230 /* Multiplies using 3 half-sized mults and so on recursively.
00231  * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1].
00232  * No overlap of p[...] with a[...] or b[...].
00233  * ws is workspace.
00234  */
00235 
00236 void
00237 #if __STDC__
00238 mpn_kara_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws)
00239 #else
00240 mpn_kara_mul_n(p, a, b, n, ws)
00241      mp_ptr    p;
00242      mp_srcptr a;
00243      mp_srcptr b;
00244      mp_size_t n;
00245      mp_ptr    ws;
00246 #endif
00247 {
00248   mp_limb_t i, sign, w, w0, w1;
00249   mp_size_t n2;
00250   mp_srcptr x, y;
00251 
00252   n2 = n >> 1;
00253   ASSERT (n2 > 0);
00254 
00255   SCHEME_BIGNUM_USE_FUEL(n);
00256 
00257   if (n & 1)
00258     {
00259       /* Odd length. */
00260       mp_size_t n1, n3, nm1;
00261 
00262       n3 = n - n2;
00263 
00264       sign = 0;
00265       w = a[n2];
00266       if (w != 0)
00267        w -= mpn_sub_n (p, a, a + n3, n2);
00268       else
00269        {
00270          i = n2;
00271          do
00272            {
00273              --i;
00274              w0 = a[i];
00275              w1 = a[n3+i];
00276            }
00277          while (w0 == w1 && i != 0);
00278          if (w0 < w1)
00279            {
00280              x = a + n3;
00281              y = a;
00282              sign = 1;
00283            }
00284          else
00285            {
00286              x = a;
00287              y = a + n3;
00288            }
00289          mpn_sub_n (p, x, y, n2);
00290        }
00291       p[n2] = w;
00292 
00293       w = b[n2];
00294       if (w != 0)
00295        w -= mpn_sub_n (p + n3, b, b + n3, n2);
00296       else
00297        {
00298          i = n2;
00299          do 
00300            {
00301              --i;
00302              w0 = b[i]; 
00303              w1 = b[n3+i];
00304            }
00305          while (w0 == w1 && i != 0);
00306          if (w0 < w1)
00307            {
00308              x = b + n3;
00309              y = b;
00310              sign ^= 1;
00311            }
00312          else
00313            {
00314              x = b;
00315              y = b + n3;
00316            }
00317          mpn_sub_n (p + n3, x, y, n2);
00318        }
00319       p[n] = w;
00320 
00321       n1 = n + 1;
00322       if (n2 < KARATSUBA_MUL_THRESHOLD)
00323        {
00324          if (n3 < KARATSUBA_MUL_THRESHOLD)
00325            {
00326              mpn_mul_basecase (ws, p, n3, p + n3, n3);
00327              mpn_mul_basecase (p, a, n3, b, n3);
00328            }
00329          else
00330            {
00331              mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);
00332              mpn_kara_mul_n (p, a, b, n3, ws + n1);
00333            }
00334          mpn_mul_basecase (p + n1, a + n3, n2, b + n3, n2);
00335        }
00336       else
00337        {
00338          mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);
00339          mpn_kara_mul_n (p, a, b, n3, ws + n1);
00340          mpn_kara_mul_n (p + n1, a + n3, b + n3, n2, ws + n1);
00341        }
00342 
00343       if (sign)
00344        mpn_add_n (ws, p, ws, n1);
00345       else
00346        mpn_sub_n (ws, p, ws, n1);
00347 
00348       nm1 = n - 1;
00349       if (mpn_add_n (ws, p + n1, ws, nm1))
00350        {
00351          mp_limb_t x = ws[nm1] + 1;
00352          ws[nm1] = x;
00353          if (x == 0)
00354            ++ws[n];
00355        }
00356       if (mpn_add_n (p + n3, p + n3, ws, n1))
00357        {
00358          mp_limb_t x;
00359          i = n1 + n3;
00360          do
00361            {
00362              x = p[i] + 1;
00363              p[i] = x;
00364              ++i;
00365            } while (x == 0);
00366        }
00367     }
00368   else
00369     {
00370       /* Even length. */
00371       mp_limb_t t;
00372 
00373       i = n2;
00374       do
00375        {
00376          --i;
00377          w0 = a[i];
00378          w1 = a[n2+i];
00379        }
00380       while (w0 == w1 && i != 0);
00381       sign = 0;
00382       if (w0 < w1)
00383        {
00384          x = a + n2;
00385          y = a;
00386          sign = 1;
00387        }
00388       else
00389        {
00390          x = a;
00391          y = a + n2;
00392        }
00393       mpn_sub_n (p, x, y, n2);
00394 
00395       i = n2;
00396       do 
00397        {
00398          --i;
00399          w0 = b[i];
00400          w1 = b[n2+i];
00401        }
00402       while (w0 == w1 && i != 0);
00403       if (w0 < w1)
00404        {
00405          x = b + n2;
00406          y = b;
00407          sign ^= 1;
00408        }
00409       else
00410        {
00411          x = b;
00412          y = b + n2;
00413        }
00414       mpn_sub_n (p + n2, x, y, n2);
00415 
00416       /* Pointwise products. */
00417       if (n2 < KARATSUBA_MUL_THRESHOLD)
00418        {
00419          mpn_mul_basecase (ws, p, n2, p + n2, n2);
00420          mpn_mul_basecase (p, a, n2, b, n2);
00421          mpn_mul_basecase (p + n, a + n2, n2, b + n2, n2);
00422        }
00423       else
00424        {
00425          mpn_kara_mul_n (ws, p, p + n2, n2, ws + n);
00426          mpn_kara_mul_n (p, a, b, n2, ws + n);
00427          mpn_kara_mul_n (p + n, a + n2, b + n2, n2, ws + n);
00428        }
00429 
00430       /* Interpolate. */
00431       if (sign)
00432        w = mpn_add_n (ws, p, ws, n);
00433       else
00434        w = -mpn_sub_n (ws, p, ws, n);
00435       w += mpn_add_n (ws, p + n, ws, n);
00436       w += mpn_add_n (p + n2, p + n2, ws, n);
00437       /* TO DO: could put "if (w) { ... }" here.
00438        * Less work but badly predicted branch.
00439        * No measurable difference in speed on Alpha.
00440        */
00441       i = n + n2;
00442       t = p[i] + w;
00443       p[i] = t;
00444       if (t < w)
00445        {
00446          do
00447            {
00448              ++i;
00449              w = p[i] + 1;
00450              p[i] = w;
00451            }
00452          while (w == 0);
00453        }
00454     }
00455 }
00456 
00457 void
00458 #if __STDC__
00459 mpn_kara_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws)
00460 #else
00461 mpn_kara_sqr_n (p, a, n, ws)
00462      mp_ptr    p;
00463      mp_srcptr a;
00464      mp_size_t n;
00465      mp_ptr    ws;
00466 #endif
00467 {
00468   mp_limb_t i, sign, w, w0, w1;
00469   mp_size_t n2;
00470   mp_srcptr x, y;
00471 
00472   n2 = n >> 1;
00473   ASSERT (n2 > 0);
00474 
00475   SCHEME_BIGNUM_USE_FUEL(n);
00476 
00477   if (n & 1)
00478     {
00479       /* Odd length. */
00480       mp_size_t n1, n3, nm1;
00481 
00482       n3 = n - n2;
00483 
00484       sign = 0;
00485       w = a[n2];
00486       if (w != 0)
00487        w -= mpn_sub_n (p, a, a + n3, n2);
00488       else
00489        {
00490          i = n2;
00491          do
00492            {
00493              --i;
00494              w0 = a[i];
00495              w1 = a[n3+i];
00496            }
00497          while (w0 == w1 && i != 0);
00498          if (w0 < w1)
00499            {
00500              x = a + n3;
00501              y = a;
00502              sign = 1;
00503            }
00504          else
00505            {
00506              x = a;
00507              y = a + n3;
00508            }
00509          mpn_sub_n (p, x, y, n2);
00510        }
00511       p[n2] = w;
00512 
00513       w = a[n2];
00514       if (w != 0)
00515        w -= mpn_sub_n (p + n3, a, a + n3, n2);
00516       else
00517        {
00518          i = n2;
00519          do 
00520            {
00521              --i;
00522              w0 = a[i]; 
00523              w1 = a[n3+i];
00524            }
00525          while (w0 == w1 && i != 0);
00526          if (w0 < w1)
00527            {
00528              x = a + n3;
00529              y = a;
00530              sign ^= 1;
00531            }
00532          else
00533            {
00534              x = a;
00535              y = a + n3;
00536            }
00537          mpn_sub_n (p + n3, x, y, n2);
00538        }
00539       p[n] = w;
00540 
00541       n1 = n + 1;
00542       if (n2 < KARATSUBA_SQR_THRESHOLD)
00543        {
00544          if (n3 < KARATSUBA_SQR_THRESHOLD)
00545            {
00546              mpn_sqr_basecase (ws, p, n3);
00547              mpn_sqr_basecase (p, a, n3);
00548            }
00549          else
00550            {
00551              mpn_kara_sqr_n (ws, p, n3, ws + n1);
00552              mpn_kara_sqr_n (p, a, n3, ws + n1);
00553            }
00554          mpn_sqr_basecase (p + n1, a + n3, n2);
00555        }
00556       else
00557        {
00558          mpn_kara_sqr_n (ws, p, n3, ws + n1);
00559          mpn_kara_sqr_n (p, a, n3, ws + n1);
00560          mpn_kara_sqr_n (p + n1, a + n3, n2, ws + n1);
00561        }
00562 
00563       if (sign)
00564        mpn_add_n (ws, p, ws, n1);
00565       else
00566        mpn_sub_n (ws, p, ws, n1);
00567 
00568       nm1 = n - 1;
00569       if (mpn_add_n (ws, p + n1, ws, nm1))
00570        {
00571          mp_limb_t x = ws[nm1] + 1;
00572          ws[nm1] = x;
00573          if (x == 0)
00574            ++ws[n];
00575        }
00576       if (mpn_add_n (p + n3, p + n3, ws, n1))
00577        {
00578          mp_limb_t x;
00579          i = n1 + n3;
00580          do
00581            {
00582              x = p[i] + 1;
00583              p[i] = x;
00584              ++i;
00585            } while (x == 0);
00586        }
00587     }
00588   else
00589     {
00590       /* Even length. */
00591       mp_limb_t t;
00592 
00593       i = n2;
00594       do
00595        {
00596          --i;
00597          w0 = a[i];
00598          w1 = a[n2+i];
00599        }
00600       while (w0 == w1 && i != 0);
00601       sign = 0;
00602       if (w0 < w1)
00603        {
00604          x = a + n2;
00605          y = a;
00606          sign = 1;
00607        }
00608       else
00609        {
00610          x = a;
00611          y = a + n2;
00612        }
00613       mpn_sub_n (p, x, y, n2);
00614 
00615       i = n2;
00616       do 
00617        {
00618          --i;
00619          w0 = a[i];
00620          w1 = a[n2+i];
00621        }
00622       while (w0 == w1 && i != 0);
00623       if (w0 < w1)
00624        {
00625          x = a + n2;
00626          y = a;
00627          sign ^= 1;
00628        }
00629       else
00630        {
00631          x = a;
00632          y = a + n2;
00633        }
00634       mpn_sub_n (p + n2, x, y, n2);
00635 
00636       /* Pointwise products. */
00637       if (n2 < KARATSUBA_SQR_THRESHOLD)
00638        {
00639          mpn_sqr_basecase (ws, p, n2);
00640          mpn_sqr_basecase (p, a, n2);
00641          mpn_sqr_basecase (p + n, a + n2, n2);
00642        }
00643       else
00644        {
00645          mpn_kara_sqr_n (ws, p, n2, ws + n);
00646          mpn_kara_sqr_n (p, a, n2, ws + n);
00647          mpn_kara_sqr_n (p + n, a + n2, n2, ws + n);
00648        }
00649 
00650       /* Interpolate. */
00651       if (sign)
00652        w = mpn_add_n (ws, p, ws, n);
00653       else
00654        w = -mpn_sub_n (ws, p, ws, n);
00655       w += mpn_add_n (ws, p + n, ws, n);
00656       w += mpn_add_n (p + n2, p + n2, ws, n);
00657       /* TO DO: could put "if (w) { ... }" here.
00658        * Less work but badly predicted branch.
00659        * No measurable difference in speed on Alpha.
00660        */
00661       i = n + n2;
00662       t = p[i] + w;
00663       p[i] = t;
00664       if (t < w)
00665        {
00666          do
00667            {
00668              ++i;
00669              w = p[i] + 1;
00670              p[i] = w;
00671            }
00672          while (w == 0);
00673        }
00674     }
00675 }
00676 
00677 /*-- add2Times -------------------------------------------------------------*/
00678 
00679 /* z[] = x[] + 2 * y[]
00680    Note that z and x might point to the same vectors. */
00681 #ifdef USE_MORE_MPN
00682 static inline mp_limb_t
00683 #if __STDC__
00684 add2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n)
00685 #else
00686 add2Times (z, x, y, n)
00687      mp_ptr    z;
00688      mp_srcptr x;
00689      mp_srcptr y;
00690      mp_size_t n;
00691 #endif
00692 {
00693   mp_ptr t;
00694   mp_limb_t c;
00695   TMP_DECL (marker);
00696   TMP_MARK (marker);
00697   t = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
00698   c = mpn_lshift (t, y, n, 1);
00699   c += mpn_add_n (z, x, t, n);
00700   TMP_FREE (marker);
00701   return c;
00702 }
00703 #else
00704 
00705 static mp_limb_t
00706 #if __STDC__
00707 add2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n)
00708 #else
00709 add2Times (z, x, y, n)
00710      mp_ptr    z;
00711      mp_srcptr x;
00712      mp_srcptr y;
00713      mp_size_t n;
00714 #endif
00715 {
00716   mp_limb_t c, v, w;
00717 
00718   ASSERT (n > 0);
00719   v = *x; w = *y;
00720   c = w >> (BITS_PER_MP_LIMB - 1);
00721   w <<= 1;
00722   v += w;
00723   c += v < w;
00724   *z = v;
00725   ++x; ++y; ++z;
00726   while (--n)
00727     {
00728       v = *x;
00729       w = *y;
00730       v += c;
00731       c = v < c;
00732       c += w >> (BITS_PER_MP_LIMB - 1);
00733       w <<= 1;
00734       v += w;
00735       c += v < w;
00736       *z = v;
00737       ++x; ++y; ++z;
00738     }
00739 
00740   return c;
00741 }
00742 #endif
00743 
00744 /*-- evaluate3 -------------------------------------------------------------*/
00745 
00746 /* Evaluates:
00747  *   ph := 4*A+2*B+C
00748  *   p1 := A+B+C
00749  *   p2 := A+2*B+4*C
00750  * where:
00751  *   ph[], p1[], p2[], A[] and B[] all have length len,
00752  *   C[] has length len2 with len-len2 = 0, 1 or 2.
00753  * Returns top words (overflow) at pth, pt1 and pt2 respectively.
00754  */
00755 #ifdef USE_MORE_MPN
00756 static void
00757 #if __STDC__
00758 evaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2,
00759           mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t len, mp_size_t len2)
00760 #else
00761 evaluate3 (ph, p1, p2, pth, pt1, pt2,
00762            A, B, C, len, len2)
00763      mp_ptr    ph;
00764      mp_ptr    p1;
00765      mp_ptr    p2;
00766      mp_ptr    pth;
00767      mp_ptr    pt1;
00768      mp_ptr    pt2;
00769      mp_srcptr A;
00770      mp_srcptr B;
00771      mp_srcptr C;
00772      mp_size_t len;
00773      mp_size_t len2;
00774 #endif
00775 {
00776   mp_limb_t c, d, e;
00777   
00778   ASSERT (len - len2 <= 2);
00779 
00780   e = mpn_lshift (p1, B, len, 1);
00781 
00782   c = mpn_lshift (ph, A, len, 2);
00783   c += e + mpn_add_n (ph, ph, p1, len);
00784   d = mpn_add_n (ph, ph, C, len2);
00785   if (len2 == len) c += d; else c += mpn_add_1 (ph + len2, ph + len2, len-len2, d);
00786   ASSERT (c < 7);
00787   *pth = c;
00788 
00789   c = mpn_lshift (p2, C, len2, 2);
00790 #if 1
00791   if (len2 != len) { p2[len-1] = 0; p2[len2] = c; c = 0; }
00792   c += e + mpn_add_n (p2, p2, p1, len);
00793 #else
00794   d = mpn_add_n (p2, p2, p1, len2);
00795   c += d;
00796   if (len2 != len) c = mpn_add_1 (p2+len2, p1+len2, len-len2, c);
00797   c += e;
00798 #endif
00799   c += mpn_add_n (p2, p2, A, len);
00800   ASSERT (c < 7);
00801   *pt2 = c;
00802 
00803   c = mpn_add_n (p1, A, B, len);
00804   d = mpn_add_n (p1, p1, C, len2);
00805   if (len2 == len) c += d;
00806   else c += mpn_add_1 (p1+len2, p1+len2, len-len2, d);
00807   ASSERT (c < 3);
00808   *pt1 = c;
00809 
00810 }
00811 
00812 #else
00813 
00814 static void
00815 #if __STDC__
00816 evaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2,
00817           mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t l, mp_size_t ls)
00818 #else
00819 evaluate3 (ph, p1, p2, pth, pt1, pt2,
00820            A, B, C, l, ls)
00821      mp_ptr    ph;
00822      mp_ptr    p1;
00823      mp_ptr    p2;
00824      mp_ptr    pth;
00825      mp_ptr    pt1;
00826      mp_ptr    pt2;
00827      mp_srcptr A;
00828      mp_srcptr B;
00829      mp_srcptr C;
00830      mp_size_t l;
00831      mp_size_t ls;
00832 #endif
00833 {
00834   mp_limb_t a,b,c, i, t, th,t1,t2, vh,v1,v2;
00835 
00836   ASSERT (l - ls <= 2);
00837 
00838   th = t1 = t2 = 0;
00839   for (i = 0; i < l; ++i)
00840     {
00841       a = *A;
00842       b = *B;
00843       c = i < ls ? *C : 0;
00844 
00845       /* TO DO: choose one of the following alternatives. */
00846 #if 0
00847       t = a << 2;
00848       vh = th + t;
00849       th = vh < t;
00850       th += a >> (BITS_PER_MP_LIMB - 2);
00851       t = b << 1;
00852       vh += t;
00853       th += vh < t;
00854       th += b >> (BITS_PER_MP_LIMB - 1);
00855       vh += c;
00856       th += vh < c;
00857 #else
00858       vh = th + c;
00859       th = vh < c;
00860       t = b << 1;
00861       vh += t;
00862       th += vh < t;
00863       th += b >> (BITS_PER_MP_LIMB - 1);
00864       t = a << 2;
00865       vh += t;
00866       th += vh < t;
00867       th += a >> (BITS_PER_MP_LIMB - 2);
00868 #endif
00869 
00870       v1 = t1 + a;
00871       t1 = v1 < a;
00872       v1 += b;
00873       t1 += v1 < b;
00874       v1 += c;
00875       t1 += v1 < c;
00876 
00877       v2 = t2 + a;
00878       t2 = v2 < a;
00879       t = b << 1;
00880       v2 += t;
00881       t2 += v2 < t;
00882       t2 += b >> (BITS_PER_MP_LIMB - 1);
00883       t = c << 2;
00884       v2 += t;
00885       t2 += v2 < t;
00886       t2 += c >> (BITS_PER_MP_LIMB - 2);
00887 
00888       *ph = vh;
00889       *p1 = v1;
00890       *p2 = v2;
00891 
00892       ++A; ++B; ++C;
00893       ++ph; ++p1; ++p2;
00894     }
00895 
00896   ASSERT (th < 7);
00897   ASSERT (t1 < 3);
00898   ASSERT (t2 < 7);
00899 
00900   *pth = th;
00901   *pt1 = t1;
00902   *pt2 = t2;
00903 }
00904 #endif
00905 
00906 
00907 /*-- interpolate3 ----------------------------------------------------------*/
00908 
00909 /* Interpolates B, C, D (in-place) from:
00910  *   16*A+8*B+4*C+2*D+E
00911  *   A+B+C+D+E
00912  *   A+2*B+4*C+8*D+16*E
00913  * where:
00914  *   A[], B[], C[] and D[] all have length l,
00915  *   E[] has length ls with l-ls = 0, 2 or 4.
00916  *
00917  * Reads top words (from earlier overflow) from ptb, ptc and ptd,
00918  * and returns new top words there.
00919  */
00920 
00921 #ifdef USE_MORE_MPN
00922 static void
00923 #if __STDC__
00924 interpolate3 (mp_srcptr A, mp_ptr B, mp_ptr C, mp_ptr D, mp_srcptr E,
00925               mp_ptr ptb, mp_ptr ptc, mp_ptr ptd, mp_size_t len, mp_size_t len2)
00926 #else
00927 interpolate3 (A, B, C, D, E,
00928               ptb, ptc, ptd, len, len2)
00929      mp_srcptr A;
00930      mp_ptr    B;
00931      mp_ptr    C;
00932      mp_ptr    D;
00933      mp_srcptr E;
00934      mp_ptr    ptb;
00935      mp_ptr    ptc;
00936      mp_ptr    ptd;
00937      mp_size_t len;
00938      mp_size_t len2;
00939 #endif
00940 {
00941   mp_ptr ws;
00942   mp_limb_t t, tb,tc,td;
00943   TMP_DECL (marker);
00944   TMP_MARK (marker);
00945 
00946   ASSERT (len - len2 == 0 || len - len2 == 2 || len - len2 == 4);
00947 
00948   /* Let x1, x2, x3 be the values to interpolate.  We have:
00949    *         b = 16*a + 8*x1 + 4*x2 + 2*x3 +    e
00950    *         c =    a +   x1 +   x2 +   x3 +    e
00951    *         d =    a + 2*x1 + 4*x2 + 8*x3 + 16*e
00952    */
00953 
00954   ws = (mp_ptr) TMP_ALLOC (len * BYTES_PER_MP_LIMB);
00955 
00956   tb = *ptb; tc = *ptc; td = *ptd;
00957 
00958 
00959   /* b := b - 16*a -    e
00960    * c := c -    a -    e
00961    * d := d -    a - 16*e
00962    */
00963 
00964   t = mpn_lshift (ws, A, len, 4);
00965   tb -= t + mpn_sub_n (B, B, ws, len);
00966   t = mpn_sub_n (B, B, E, len2);
00967   if (len2 == len) tb -= t;
00968   else tb -= mpn_sub_1 (B+len2, B+len2, len-len2, t);
00969 
00970   tc -= mpn_sub_n (C, C, A, len);
00971   t = mpn_sub_n (C, C, E, len2);
00972   if (len2 == len) tc -= t;
00973   else tc -= mpn_sub_1 (C+len2, C+len2, len-len2, t);
00974 
00975   t = mpn_lshift (ws, E, len2, 4);
00976   t += mpn_add_n (ws, ws, A, len2);
00977 #if 1
00978   if (len2 != len) t = mpn_add_1 (ws+len2, A+len2, len-len2, t);
00979   td -= t + mpn_sub_n (D, D, ws, len);
00980 #else
00981   t += mpn_sub_n (D, D, ws, len2);
00982   if (len2 != len) {
00983     t = mpn_sub_1 (D+len2, D+len2, len-len2, t);
00984     t += mpn_sub_n (D+len2, D+len2, A+len2, len-len2);
00985   } /* end if/else */
00986   td -= t;
00987 #endif
00988 
00989 
00990   /* b, d := b + d, b - d */
00991 
00992 #ifdef HAVE_MPN_ADD_SUB_N
00993   /* #error TO DO ... */
00994 #else
00995   t = tb + td + mpn_add_n (ws, B, D, len);  
00996   td = tb - td - mpn_sub_n (D, B, D, len);
00997   tb = t;
00998   MPN_COPY (B, ws, len);
00999 #endif
01000   
01001   /* b := b-8*c */
01002   t = 8 * tc + mpn_lshift (ws, C, len, 3);
01003   tb -= t + mpn_sub_n (B, B, ws, len);
01004 
01005   /* c := 2*c - b */
01006   tc = 2 * tc + mpn_lshift (C, C, len, 1);
01007   tc -= tb + mpn_sub_n (C, C, B, len);
01008 
01009   /* d := d/3 */
01010   td = (td - mpn_divexact_by3 (D, D, len)) * INVERSE_3;
01011 
01012   /* b, d := b + d, b - d */
01013 #ifdef HAVE_MPN_ADD_SUB_N
01014   /* #error TO DO ... */
01015 #else
01016   t = tb + td + mpn_add_n (ws, B, D, len);  
01017   td = tb - td - mpn_sub_n (D, B, D, len);
01018   tb = t;
01019   MPN_COPY (B, ws, len);
01020 #endif
01021 
01022       /* Now:
01023        *       b = 4*x1
01024        *       c = 2*x2
01025        *       d = 4*x3
01026        */
01027 
01028   ASSERT(!(*B & 3));
01029   mpn_rshift (B, B, len, 2);
01030   B[len-1] |= tb<<(BITS_PER_MP_LIMB-2);
01031   ASSERT((long)tb >= 0);
01032   tb >>= 2;
01033 
01034   ASSERT(!(*C & 1));
01035   mpn_rshift (C, C, len, 1);
01036   C[len-1] |= tc<<(BITS_PER_MP_LIMB-1);
01037   ASSERT((long)tc >= 0);
01038   tc >>= 1;
01039 
01040   ASSERT(!(*D & 3));
01041   mpn_rshift (D, D, len, 2);
01042   D[len-1] |= td<<(BITS_PER_MP_LIMB-2);
01043   ASSERT((long)td >= 0);
01044   td >>= 2;
01045 
01046 #if WANT_ASSERT
01047   ASSERT (tb < 2);
01048   if (len == len2)
01049     {
01050       ASSERT (tc < 3);
01051       ASSERT (td < 2);
01052     }
01053   else
01054     {
01055       ASSERT (tc < 2);
01056       ASSERT (!td);
01057     }
01058 #endif
01059 
01060   *ptb = tb;
01061   *ptc = tc;
01062   *ptd = td;
01063 
01064   TMP_FREE (marker);
01065 }
01066 
01067 #else
01068 
01069 static void
01070 #if __STDC__
01071 interpolate3 (mp_srcptr A, mp_ptr B, mp_ptr C, mp_ptr D, mp_srcptr E,
01072              mp_ptr ptb, mp_ptr ptc, mp_ptr ptd, mp_size_t l, mp_size_t ls)
01073 #else
01074 interpolate3 (A, B, C, D, E,
01075               ptb, ptc, ptd, l, ls)
01076      mp_srcptr A;
01077      mp_ptr    B;
01078      mp_ptr    C;
01079      mp_ptr    D;
01080      mp_srcptr E;
01081      mp_ptr    ptb;
01082      mp_ptr    ptc;
01083      mp_ptr    ptd;
01084      mp_size_t l;
01085      mp_size_t ls;
01086 #endif
01087 {
01088   mp_limb_t a,b,c,d,e,t, i, sb,sc,sd, ob,oc,od;
01089   const mp_limb_t maskOffHalf = (~(mp_limb_t) 0) << (BITS_PER_MP_LIMB >> 1);
01090 
01091 #if WANT_ASSERT
01092   t = l - ls;
01093   ASSERT (t == 0 || t == 2 || t == 4);
01094 #endif
01095 
01096   sb = sc = sd = 0;
01097   for (i = 0; i < l; ++i)
01098     {
01099       mp_limb_t tb, tc, td, tt;
01100 
01101       a = *A;
01102       b = *B;
01103       c = *C;
01104       d = *D;
01105       e = i < ls ? *E : 0;
01106 
01107       /* Let x1, x2, x3 be the values to interpolate.  We have:
01108        *       b = 16*a + 8*x1 + 4*x2 + 2*x3 +    e
01109        *       c =   a +   x1 +   x2 +   x3 +    e
01110        *       d =   a + 2*x1 + 4*x2 + 8*x3 + 16*e
01111        */
01112 
01113       /* b := b - 16*a -    e
01114        * c := c -    a -    e
01115        * d := d -    a - 16*e
01116        */
01117       t = a << 4;
01118       tb = -(a >> (BITS_PER_MP_LIMB - 4)) - (b < t);
01119       b -= t;
01120       tb -= b < e;
01121       b -= e;
01122       tc = -(c < a);
01123       c -= a;
01124       tc -= c < e;
01125       c -= e;
01126       td = -(d < a);
01127       d -= a;
01128       t = e << 4;
01129       td = td - (e >> (BITS_PER_MP_LIMB - 4)) - (d < t);
01130       d -= t;
01131 
01132       /* b, d := b + d, b - d */
01133       t = b + d;
01134       tt = tb + td + (t < b);
01135       td = tb - td - (b < d);
01136       d = b - d;
01137       b = t;
01138       tb = tt;
01139 
01140       /* b := b-8*c */
01141       t = c << 3;
01142       tb = tb - (tc << 3) - (c >> (BITS_PER_MP_LIMB - 3)) - (b < t);
01143       b -= t;
01144 
01145       /* c := 2*c - b */
01146       t = c << 1;
01147       tc = (tc << 1) + (c >> (BITS_PER_MP_LIMB - 1)) - tb - (t < b);
01148       c = t - b;
01149 
01150       /* d := d/3 */
01151       d *= INVERSE_3;
01152       td = td - (d >> (BITS_PER_MP_LIMB - 1)) - (d*3 < d);
01153       td *= INVERSE_3;
01154 
01155       /* b, d := b + d, b - d */
01156       t = b + d;
01157       tt = tb + td + (t < b);
01158       td = tb - td - (b < d);
01159       d = b - d;
01160       b = t;
01161       tb = tt;
01162 
01163       /* Now:
01164        *       b = 4*x1
01165        *       c = 2*x2
01166        *       d = 4*x3
01167        */
01168 
01169       /* sb has period 2. */
01170       b += sb;
01171       tb += b < sb;
01172       sb &= maskOffHalf;
01173       sb |= sb >> (BITS_PER_MP_LIMB >> 1);
01174       sb += tb;
01175 
01176       /* sc has period 1. */
01177       c += sc;
01178       tc += c < sc;
01179       /* TO DO: choose one of the following alternatives. */
01180 #if 1
01181       sc = (mp_limb_t)((mp_limb_signed_t)sc >> (BITS_PER_MP_LIMB - 1));
01182       sc += tc;
01183 #else
01184       sc = tc - ((mp_limb_signed_t)sc < 0L);
01185 #endif
01186 
01187       /* sd has period 2. */
01188       d += sd;
01189       td += d < sd;
01190       sd &= maskOffHalf;
01191       sd |= sd >> (BITS_PER_MP_LIMB >> 1);
01192       sd += td;
01193 
01194       if (i != 0)
01195        {
01196          B[-1] = ob | b << (BITS_PER_MP_LIMB - 2);
01197          C[-1] = oc | c << (BITS_PER_MP_LIMB - 1);
01198          D[-1] = od | d << (BITS_PER_MP_LIMB - 2);
01199        }
01200       ob = b >> 2;
01201       oc = c >> 1;
01202       od = d >> 2;
01203 
01204       ++A; ++B; ++C; ++D; ++E;
01205     }
01206 
01207   /* Handle top words. */
01208   b = *ptb;
01209   c = *ptc;
01210   d = *ptd;
01211 
01212   t = b + d;
01213   d = b - d;
01214   b = t;
01215   b -= c << 3;
01216   c = (c << 1) - b;
01217   d *= INVERSE_3;
01218   t = b + d;
01219   d = b - d;
01220   b = t;
01221 
01222   b += sb;
01223   c += sc;
01224   d += sd;
01225 
01226   B[-1] = ob | b << (BITS_PER_MP_LIMB - 2);
01227   C[-1] = oc | c << (BITS_PER_MP_LIMB - 1);
01228   D[-1] = od | d << (BITS_PER_MP_LIMB - 2);
01229 
01230   b >>= 2;
01231   c >>= 1;
01232   d >>= 2;
01233 
01234 #if WANT_ASSERT
01235   ASSERT (b < 2);
01236   if (l == ls)
01237     {
01238       ASSERT (c < 3);
01239       ASSERT (d < 2);
01240     }
01241   else
01242     {
01243       ASSERT (c < 2);
01244       ASSERT (!d);
01245     }
01246 #endif
01247 
01248   *ptb = b;
01249   *ptc = c;
01250   *ptd = d;
01251 }
01252 #endif
01253 
01254 
01255 /*-- mpn_toom3_mul_n --------------------------------------------------------------*/
01256 
01257 /* Multiplies using 5 mults of one third size and so on recursively.
01258  * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1].
01259  * No overlap of p[...] with a[...] or b[...].
01260  * ws is workspace.
01261  */
01262 
01263 /* TO DO: If TOOM3_MUL_THRESHOLD is much bigger than KARATSUBA_MUL_THRESHOLD then the
01264  *        recursion in mpn_toom3_mul_n() will always bottom out with mpn_kara_mul_n()
01265  *        because the "n < KARATSUBA_MUL_THRESHOLD" test here will always be false.
01266  */
01267 
01268 #define TOOM3_MUL_REC(p, a, b, n, ws) \
01269   do {                                                  \
01270     if (n < KARATSUBA_MUL_THRESHOLD)                           \
01271       mpn_mul_basecase (p, a, n, b, n);                        \
01272     else if (n < TOOM3_MUL_THRESHOLD)                          \
01273       mpn_kara_mul_n (p, a, b, n, ws);                         \
01274     else                                                \
01275       mpn_toom3_mul_n (p, a, b, n, ws);                        \
01276   } while (0)
01277 
01278 void
01279 #if __STDC__
01280 mpn_toom3_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws)
01281 #else
01282 mpn_toom3_mul_n (p, a, b, n, ws)
01283      mp_ptr    p;
01284      mp_srcptr a;
01285      mp_srcptr b;
01286      mp_size_t n;
01287      mp_ptr    ws;
01288 #endif
01289 {
01290   mp_limb_t cB,cC,cD, dB,dC,dD, tB,tC,tD;
01291   mp_limb_t *A,*B,*C,*D,*E, *W;
01292   mp_size_t l,l2,l3,l4,l5,ls;
01293 
01294   SCHEME_BIGNUM_USE_FUEL(n);
01295 
01296   /* Break n words into chunks of size l, l and ls.
01297    * n = 3*k   => l = k,   ls = k
01298    * n = 3*k+1 => l = k+1, ls = k-1
01299    * n = 3*k+2 => l = k+1, ls = k
01300    */
01301   {
01302     mp_limb_t m;
01303 
01304     ASSERT (n >= TOOM3_MUL_THRESHOLD);
01305     l = ls = n / 3;
01306     m = n - l * 3;
01307     if (m != 0)
01308       ++l;
01309     if (m == 1)
01310       --ls;
01311 
01312     l2 = l * 2;
01313     l3 = l * 3;
01314     l4 = l * 4;
01315     l5 = l * 5;
01316     A = p;
01317     B = ws;
01318     C = p + l2;
01319     D = ws + l2;
01320     E = p + l4;
01321     W = ws + l4;
01322   }
01323 
01325   evaluate3 (A, B, C, &cB, &cC, &cD, a, a + l, a + l2, l, ls);
01326   evaluate3 (A + l, B + l, C + l, &dB, &dC, &dD, b, b + l, b + l2, l, ls);
01327 
01329   TOOM3_MUL_REC(D, C, C + l, l, W);
01330   tD = cD*dD;
01331   if (cD) tD += mpn_addmul_1 (D + l, C + l, l, cD);
01332   if (dD) tD += mpn_addmul_1 (D + l, C, l, dD);
01333   ASSERT (tD < 49);
01334   TOOM3_MUL_REC(C, B, B + l, l, W);
01335   tC = cC*dC;
01336   /* TO DO: choose one of the following alternatives. */
01337 #if 0
01338   if (cC) tC += mpn_addmul_1 (C + l, B + l, l, cC);
01339   if (dC) tC += mpn_addmul_1 (C + l, B, l, dC);
01340 #else
01341   if (cC)
01342     {
01343       if (cC == 1) tC += mpn_add_n (C + l, C + l, B + l, l);
01344       else tC += add2Times (C + l, C + l, B + l, l);
01345     }
01346   if (dC)
01347     {
01348       if (dC == 1) tC += mpn_add_n (C + l, C + l, B, l);
01349       else tC += add2Times (C + l, C + l, B, l);
01350     }
01351 #endif
01352   ASSERT (tC < 9);
01353   TOOM3_MUL_REC(B, A, A + l, l, W);
01354   tB = cB*dB;
01355   if (cB) tB += mpn_addmul_1 (B + l, A + l, l, cB);
01356   if (dB) tB += mpn_addmul_1 (B + l, A, l, dB);
01357   ASSERT (tB < 49);
01358   TOOM3_MUL_REC(A, a, b, l, W);
01359   TOOM3_MUL_REC(E, a + l2, b + l2, ls, W);
01360 
01362   interpolate3 (A, B, C, D, E, &tB, &tC, &tD, l2, ls << 1);
01363 
01365   {
01366     /* mp_limb_t i, x, y; */
01367     tB += mpn_add_n (p + l, p + l, B, l2);
01368     tD += mpn_add_n (p + l3, p + l3, D, l2);
01369     mpn_incr_u (p + l3, tB);
01370     mpn_incr_u (p + l4, tC);
01371     mpn_incr_u (p + l5, tD);
01372   }
01373 }
01374 
01375 /*-- mpn_toom3_sqr_n --------------------------------------------------------------*/
01376 
01377 /* Like previous function but for squaring */
01378 
01379 #define TOOM3_SQR_REC(p, a, n, ws) \
01380   do {                                                  \
01381     if (n < KARATSUBA_SQR_THRESHOLD)                           \
01382       mpn_sqr_basecase (p, a, n);                       \
01383     else if (n < TOOM3_SQR_THRESHOLD)                          \
01384       mpn_kara_sqr_n (p, a, n, ws);                            \
01385     else                                                \
01386       mpn_toom3_sqr_n (p, a, n, ws);                           \
01387   } while (0)
01388 
01389 void
01390 #if __STDC__
01391 mpn_toom3_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws)
01392 #else
01393 mpn_toom3_sqr_n (p, a, n, ws)
01394      mp_ptr    p;
01395      mp_srcptr a;
01396      mp_size_t n;
01397      mp_ptr    ws;
01398 #endif
01399 {
01400   mp_limb_t cB,cC,cD, tB,tC,tD;
01401   mp_limb_t *A,*B,*C,*D,*E, *W;
01402   mp_size_t l,l2,l3,l4,l5,ls;
01403 
01404   SCHEME_BIGNUM_USE_FUEL(n);
01405 
01406   /* Break n words into chunks of size l, l and ls.
01407    * n = 3*k   => l = k,   ls = k
01408    * n = 3*k+1 => l = k+1, ls = k-1
01409    * n = 3*k+2 => l = k+1, ls = k
01410    */
01411   {
01412     mp_limb_t m;
01413 
01414     ASSERT (n >= TOOM3_MUL_THRESHOLD);
01415     l = ls = n / 3;
01416     m = n - l * 3;
01417     if (m != 0)
01418       ++l;
01419     if (m == 1)
01420       --ls;
01421 
01422     l2 = l * 2;
01423     l3 = l * 3;
01424     l4 = l * 4;
01425     l5 = l * 5;
01426     A = p;
01427     B = ws;
01428     C = p + l2;
01429     D = ws + l2;
01430     E = p + l4;
01431     W = ws + l4;
01432   }
01433 
01435   evaluate3 (A, B, C, &cB, &cC, &cD, a, a + l, a + l2, l, ls);
01436 
01438   TOOM3_SQR_REC(D, C, l, W);
01439   tD = cD*cD;
01440   if (cD) tD += mpn_addmul_1 (D + l, C, l, 2*cD);
01441   ASSERT (tD < 49);
01442   TOOM3_SQR_REC(C, B, l, W);
01443   tC = cC*cC;
01444   /* TO DO: choose one of the following alternatives. */
01445 #if 0
01446   if (cC) tC += mpn_addmul_1 (C + l, B, l, 2*cC);
01447 #else
01448   if (cC >= 1)
01449     {
01450       tC += add2Times (C + l, C + l, B, l);
01451       if (cC == 2)
01452         tC += add2Times (C + l, C + l, B, l);
01453     }
01454 #endif
01455   ASSERT (tC < 9);
01456   TOOM3_SQR_REC(B, A, l, W);
01457   tB = cB*cB;
01458   if (cB) tB += mpn_addmul_1 (B + l, A, l, 2*cB);
01459   ASSERT (tB < 49);
01460   TOOM3_SQR_REC(A, a, l, W);
01461   TOOM3_SQR_REC(E, a + l2, ls, W);
01462 
01464   interpolate3 (A, B, C, D, E, &tB, &tC, &tD, l2, ls << 1);
01465 
01467   {
01468     /* mp_limb_t i, x, y; */
01469     tB += mpn_add_n (p + l, p + l, B, l2);
01470     tD += mpn_add_n (p + l3, p + l3, D, l2);
01471     mpn_incr_u (p + l3, tB);
01472     mpn_incr_u (p + l4, tC);
01473     mpn_incr_u (p + l5, tD);
01474   }
01475 }
01476 
01477 void
01478 #if __STDC__
01479 mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
01480 #else
01481 mpn_mul_n (p, a, b, n)
01482      mp_ptr    p;
01483      mp_srcptr a;
01484      mp_srcptr b;
01485      mp_size_t n;
01486 #endif
01487 {
01488   if (n < KARATSUBA_MUL_THRESHOLD)
01489     mpn_mul_basecase (p, a, n, b, n);
01490   else if (n < TOOM3_MUL_THRESHOLD)
01491     {
01492       /* Allocate workspace of fixed size on stack: fast! */
01493 #if TUNE_PROGRAM_BUILD
01494       mp_limb_t ws[2 * (TOOM3_MUL_THRESHOLD_LIMIT-1) + 2 * BITS_PER_MP_LIMB];
01495 #else
01496       mp_limb_t ws[2 * (TOOM3_MUL_THRESHOLD-1) + 2 * BITS_PER_MP_LIMB];
01497 #endif
01498       mpn_kara_mul_n (p, a, b, n, ws);
01499     }
01500 #if WANT_FFT || TUNE_PROGRAM_BUILD
01501   else if (n < FFT_MUL_THRESHOLD)
01502 #else
01503   else
01504 #endif
01505     {
01506       /* Use workspace of unknown size in heap, as stack space may
01507        * be limited.  Since n is at least TOOM3_MUL_THRESHOLD, the
01508        * multiplication will take much longer than malloc()/free().  */
01509       mp_limb_t wsLen, *ws;
01510       TMP_DECL (marker);
01511       TMP_MARK (marker);
01512 
01513       wsLen = 2 * n + 3 * BITS_PER_MP_LIMB;
01514       ws = (mp_ptr) TMP_ALLOC ((size_t) wsLen * sizeof (mp_limb_t));
01515       mpn_toom3_mul_n (p, a, b, n, ws);
01516       TMP_FREE (marker);
01517     }
01518 #if WANT_FFT || TUNE_PROGRAM_BUILD
01519   else
01520     {
01521       mpn_mul_fft_full (p, a, n, b, n);      
01522     }
01523 #endif
01524 }
01525 
01526 
01527 /* mpn_rshift -- Shift right a low-level natural-number integer. */
01528 
01529 /* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
01530    and store the USIZE least significant limbs of the result at WP.
01531    The bits shifted out to the right are returned.
01532 
01533    Argument constraints:
01534    1. 0 < CNT < BITS_PER_MP_LIMB
01535    2. If the result is to be written over the input, WP must be <= UP.
01536 */
01537 
01538 mp_limb_t
01539 #if __STDC__
01540 mpn_rshift (register mp_ptr wp,
01541            register mp_srcptr up, mp_size_t usize,
01542            register unsigned int cnt)
01543 #else
01544 mpn_rshift (wp, up, usize, cnt)
01545      register mp_ptr wp;
01546      register mp_srcptr up;
01547      mp_size_t usize;
01548      register unsigned int cnt;
01549 #endif
01550 {
01551   register mp_limb_t high_limb, low_limb;
01552   register unsigned sh_1, sh_2;
01553   register mp_size_t i;
01554   mp_limb_t retval;
01555 
01556 #ifdef DEBUG
01557   if (usize == 0 || cnt == 0)
01558     abort ();
01559 #endif
01560 
01561   sh_1 = cnt;
01562 
01563 #if 0
01564   if (sh_1 == 0)
01565     {
01566       if (wp != up)
01567        {
01568          /* Copy from low end to high end, to allow specified input/output
01569             overlapping.  */
01570          for (i = 0; i < usize; i++)
01571            wp[i] = up[i];
01572        }
01573       return usize;
01574     }
01575 #endif
01576 
01577   wp -= 1;
01578   sh_2 = BITS_PER_MP_LIMB - sh_1;
01579   high_limb = up[0];
01580   retval = high_limb << sh_2;
01581   low_limb = high_limb;
01582 
01583   for (i = 1; i < usize; i++)
01584     {
01585       high_limb = up[i];
01586       wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
01587       low_limb = high_limb;
01588     }
01589   wp[i] = low_limb >> sh_1;
01590 
01591   return retval;
01592 }
01593 
01594 /* mpn_lshift -- Shift left low level. */
01595 
01596 /* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
01597    and store the USIZE least significant digits of the result at WP.
01598    Return the bits shifted out from the most significant digit.
01599 
01600    Argument constraints:
01601    1. 0 < CNT < BITS_PER_MP_LIMB
01602    2. If the result is to be written over the input, WP must be >= UP.
01603 */
01604 
01605 mp_limb_t
01606 #if __STDC__
01607 mpn_lshift (register mp_ptr wp,
01608            register mp_srcptr up, mp_size_t usize,
01609            register unsigned int cnt)
01610 #else
01611 mpn_lshift (wp, up, usize, cnt)
01612      register mp_ptr wp;
01613      register mp_srcptr up;
01614      mp_size_t usize;
01615      register unsigned int cnt;
01616 #endif
01617 {
01618   register mp_limb_t high_limb, low_limb;
01619   register unsigned sh_1, sh_2;
01620   register mp_size_t i;
01621   mp_limb_t retval;
01622 
01623 #ifdef DEBUG
01624   if (usize == 0 || cnt == 0)
01625     abort ();
01626 #endif
01627 
01628   sh_1 = cnt;
01629 #if 0
01630   if (sh_1 == 0)
01631     {
01632       if (wp != up)
01633        {
01634          /* Copy from high end to low end, to allow specified input/output
01635             overlapping.  */
01636          for (i = usize - 1; i >= 0; i--)
01637            wp[i] = up[i];
01638        }
01639       return 0;
01640     }
01641 #endif
01642 
01643   wp += 1;
01644   sh_2 = BITS_PER_MP_LIMB - sh_1;
01645   i = usize - 1;
01646   low_limb = up[i];
01647   retval = low_limb >> sh_2;
01648   high_limb = low_limb;
01649   while (--i >= 0)
01650     {
01651       low_limb = up[i];
01652       wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
01653       high_limb = low_limb;
01654     }
01655   wp[i] = high_limb << sh_1;
01656 
01657   return retval;
01658 }
01659 
01660 
01661 
01662 /* Conversion of U {up,un} to a string in base b.  Internally, we convert to
01663      base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
01664 
01665   A) Divide U repeatedly by B, generating a quotient and remainder, until the
01666      quotient becomes zero.  The remainders hold the converted digits.  Digits
01667      come out from right to left.  (Used in mpn_sb_get_str.)
01668 
01669   B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
01670      Then develop digits by multiplying the fraction repeatedly by b.  Digits
01671      come out from left to right.  (Currently not used herein, except for in
01672      code for converting single limbs to individual digits.)
01673 
01674   C) Compute B^1, B^2, B^4, ..., B^(2^s), for s such that B^(2^s) > sqrt(U).
01675      Then divide U by B^(2^k), generating an integer quotient and remainder.
01676      Recursively convert the quotient, then the remainder, using the
01677      precomputed powers.  Digits come out from left to right.  (Used in
01678      mpn_dc_get_str.)
01679 
01680   When using algorithm C, algorithm B might be suitable for basecase code,
01681   since the required b^g power will be readily accessible.
01682 
01683   Optimization ideas:
01684   1. The recursive function of (C) could avoid TMP allocation:
01685      a) Overwrite dividend with quotient and remainder, just as permitted by
01686         mpn_sb_divrem_mn.
01687      b) If TMP memory is anyway needed, pass it as a parameter, similarly to
01688         how we do it in Karatsuba multiplication.
01689   2. Store the powers of (C) in normalized form, with the normalization count.
01690      Quotients will usually need to be left-shifted before each divide, and
01691      remainders will either need to be left-shifted of right-shifted.
01692   3. When b is even, the powers will end up with lots of low zero limbs.  Could
01693      save significant time in the mpn_tdiv_qr call by stripping these zeros.
01694   4. In the code for developing digits from a single limb, we could avoid using
01695      a full umul_ppmm except for the first (or first few) digits, provided base
01696      is even.  Subsequent digits can be developed using plain multiplication.
01697      (This saves on register-starved machines (read x86) and on all machines
01698      that generate the upper product half using a separate instruction (alpha,
01699      powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
01700   5. Separate mpn_dc_get_str basecase code from code for small conversions. The
01701      former code will have the exact right power readily available in the
01702      powtab parameter for dividing the current number into a fraction.  Convert
01703      that using algorithm B.
01704   6. Completely avoid division.  Compute the inverses of the powers now in
01705      powtab instead of the actual powers.
01706 
01707   Basic structure of (C):
01708     mpn_get_str:
01709       if POW2_P (n)
01710        ...
01711       else
01712        if (un < GET_STR_PRECOMPUTE_THRESHOLD)
01713          mpn_sb_get_str (str, base, up, un);
01714        else
01715          precompute_power_tables
01716          mpn_dc_get_str
01717 
01718     mpn_dc_get_str:
01719        mpn_tdiv_qr
01720        if (qn < GET_STR_DC_THRESHOLD)
01721          mpn_sb_get_str
01722        else
01723          mpn_dc_get_str
01724        if (rn < GET_STR_DC_THRESHOLD)
01725          mpn_sb_get_str
01726        else
01727          mpn_dc_get_str
01728 
01729 
01730   The reason for the two threshold values is the cost of
01731   precompute_power_tables.  GET_STR_PRECOMPUTE_THRESHOLD will be considerably
01732   larger than GET_STR_PRECOMPUTE_THRESHOLD.  Do you think I should change
01733   mpn_dc_get_str to instead look like the following?  */
01734 
01735 
01736 /* The x86s and m68020 have a quotient and remainder "div" instruction and
01737    gcc recognises an adjacent "/" and "%" can be combined using that.
01738    Elsewhere "/" and "%" are either separate instructions, or separate
01739    libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
01740    A multiply and subtract should be faster than a "%" in those cases.  */
01741 #if HAVE_HOST_CPU_FAMILY_x86            \
01742   || HAVE_HOST_CPU_m68020               \
01743   || HAVE_HOST_CPU_m68030               \
01744   || HAVE_HOST_CPU_m68040               \
01745   || HAVE_HOST_CPU_m68060               \
01746   || HAVE_HOST_CPU_m68360 /* CPU32 */
01747 #define udiv_qrnd_unnorm(q,r,n,d)       \
01748   do {                                  \
01749     mp_limb_t  __q = (n) / (d);         \
01750     mp_limb_t  __r = (n) % (d);         \
01751     (q) = __q;                          \
01752     (r) = __r;                          \
01753   } while (0)
01754 #else
01755 #define udiv_qrnd_unnorm(q,r,n,d)       \
01756   do {                                  \
01757     mp_limb_t  __q = (n) / (d);         \
01758     mp_limb_t  __r = (n) - __q*(d);     \
01759     (q) = __q;                          \
01760     (r) = __r;                          \
01761   } while (0)
01762 #endif
01763 
01764 /* When to stop divide-and-conquer and call the basecase mpn_get_str.  */
01765 #ifndef GET_STR_DC_THRESHOLD
01766 #define GET_STR_DC_THRESHOLD 15
01767 #endif
01768 /* Whether to bother at all with precomputing powers of the base, or go
01769    to the basecase mpn_get_str directly.  */
01770 #ifndef GET_STR_PRECOMPUTE_THRESHOLD
01771 #define GET_STR_PRECOMPUTE_THRESHOLD 30
01772 #endif
01773 
01774 struct powers
01775 {
01776   size_t digits_in_base;
01777   mp_ptr p;
01778   mp_size_t n;              /* mpz_struct uses int for sizes, but not mpn! */
01779   int base;
01780 };
01781 typedef struct powers powers_t;
01782 
01783 /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
01784    the string in STR.  Generate LEN characters, possibly padding with zeros to
01785    the left.  If LEN is zero, generate as many characters as required.
01786    Return a pointer immediately after the last digit of the result string.
01787    Complexity is O(UN^2); intended for small conversions.  */
01788 static unsigned char *
01789 mpn_sb_get_str (unsigned char *str, size_t len,
01790               mp_ptr up, mp_size_t un,
01791               const powers_t *powtab)
01792 {
01793   mp_limb_t rl, ul;
01794   unsigned char *s;
01795   int base;
01796   size_t l;
01797   /* Allocate memory for largest possible string, given that we only get here
01798      for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
01799      base is 3.  7/11 is an approximation to 1/log2(3).  */
01800 #if TUNE_PROGRAM_BUILD
01801 #define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * BITS_PER_MP_LIMB * 7 / 11)
01802 #else
01803 #define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * BITS_PER_MP_LIMB * 7 / 11)
01804 #endif
01805   unsigned char buf[BUF_ALLOC];
01806 #if TUNE_PROGRAM_BUILD
01807   mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
01808 #else
01809   mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
01810 #endif
01811 
01812   base = powtab->base;
01813   if (base == 10)
01814     {
01815       /* Special case code for base==10 so that the compiler has a chance to
01816         optimize things.  */
01817 
01818       MPN_COPY (rp + 1, up, un);
01819 
01820       s = buf + BUF_ALLOC;
01821       while (un > 1)
01822        {
01823          int i;
01824          mp_limb_t frac, digit;
01825          MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
01826                                     MP_BASES_BIG_BASE_10,
01827                                     MP_BASES_BIG_BASE_INVERTED_10,
01828                                     MP_BASES_NORMALIZATION_STEPS_10);
01829          un -= rp[un] == 0;
01830          frac = (rp[0] + 1) << GMP_NAIL_BITS;
01831          s -= MP_BASES_CHARS_PER_LIMB_10;
01832 #if HAVE_HOST_CPU_FAMILY_x86
01833          /* The code below turns out to be a bit slower for x86 using gcc.
01834             Use plain code.  */
01835          i = MP_BASES_CHARS_PER_LIMB_10;
01836          do
01837            {
01838              umul_ppmm (digit, frac, frac, 10);
01839              *s++ = digit;
01840            }
01841          while (--i);
01842 #else
01843          /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
01844             After a few umul_ppmm, we will have accumulated enough low zeros
01845             to use a plain multiply.  */
01846          if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
01847            {
01848              umul_ppmm (digit, frac, frac, 10);
01849              *s++ = digit;
01850            }
01851          if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
01852            {
01853              umul_ppmm (digit, frac, frac, 10);
01854              *s++ = digit;
01855            }
01856          if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
01857            {
01858              umul_ppmm (digit, frac, frac, 10);
01859              *s++ = digit;
01860            }
01861          if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
01862            {
01863              umul_ppmm (digit, frac, frac, 10);
01864              *s++ = digit;
01865            }
01866          i = MP_BASES_CHARS_PER_LIMB_10 - (4-MP_BASES_NORMALIZATION_STEPS_10);
01867          frac = (frac + 0xf) >> 4;
01868          do
01869            {
01870              frac *= 10;
01871              digit = frac >> (BITS_PER_MP_LIMB - 4);
01872              *s++ = digit;
01873              frac &= (~(mp_limb_t) 0) >> 4;
01874            }
01875          while (--i);
01876 #endif
01877          s -= MP_BASES_CHARS_PER_LIMB_10;
01878        }
01879 
01880       ul = rp[1];
01881       while (ul != 0)
01882        {
01883          udiv_qrnd_unnorm (ul, rl, ul, 10);
01884          *--s = rl;
01885        }
01886     }
01887   else /* not base 10 */
01888     {
01889       unsigned chars_per_limb;
01890       mp_limb_t big_base, big_base_inverted;
01891       unsigned normalization_steps;
01892 
01893       chars_per_limb = __mp_bases[base].chars_per_limb;
01894       big_base = __mp_bases[base].big_base;
01895       big_base_inverted = __mp_bases[base].big_base_inverted;
01896       count_leading_zeros (normalization_steps, big_base);
01897 
01898       MPN_COPY (rp + 1, up, un);
01899 
01900       s = buf + BUF_ALLOC;
01901       while (un > 1)
01902        {
01903          int i;
01904          mp_limb_t frac;
01905          MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
01906                                     big_base, big_base_inverted,
01907                                     normalization_steps);
01908          un -= rp[un] == 0;
01909          frac = (rp[0] + 1) << GMP_NAIL_BITS;
01910          s -= chars_per_limb;
01911          i = chars_per_limb;
01912          do
01913            {
01914              mp_limb_t digit;
01915              umul_ppmm (digit, frac, frac, base);
01916              *s++ = digit;
01917            }
01918          while (--i);
01919          s -= chars_per_limb;
01920        }
01921 
01922       ul = rp[1];
01923       while (ul != 0)
01924        {
01925          udiv_qrnd_unnorm (ul, rl, ul, base);
01926          *--s = rl;
01927        }
01928     }
01929 
01930   l = buf + BUF_ALLOC - s;
01931   while (l < len)
01932     {
01933       *str++ = 0;
01934       len--;
01935     }
01936   while (l != 0)
01937     {
01938       *str++ = *s++;
01939       l--;
01940     }
01941   return str;
01942 }
01943 
01944 /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
01945    the string in STR.  Generate LEN characters, possibly padding with zeros to
01946    the left.  If LEN is zero, generate as many characters as required.
01947    Return a pointer immediately after the last digit of the result string.
01948    This uses divide-and-conquer and is intended for large conversions.  */
01949 static unsigned char *
01950 mpn_dc_get_str (unsigned char *str, size_t len,
01951               mp_ptr up, mp_size_t un,
01952               const powers_t *powtab)
01953 {
01954   if (un < GET_STR_DC_THRESHOLD)
01955     {
01956       if (un != 0)
01957        str = mpn_sb_get_str (str, len, up, un, powtab);
01958       else
01959        {
01960          while (len != 0)
01961            {
01962              *str++ = 0;
01963              len--;
01964            }
01965        }
01966     }
01967   else
01968     {
01969       mp_ptr pwp, qp, rp;
01970       mp_size_t pwn, qn;
01971 
01972       pwp = powtab->p;
01973       pwn = powtab->n;
01974 
01975       if (un < pwn || (un == pwn && mpn_cmp (up, pwp, un) < 0))
01976        {
01977          str = mpn_dc_get_str (str, len, up, un, powtab - 1);
01978        }
01979       else
01980        {
01981          TMP_DECL (marker);
01982          TMP_MARK (marker);
01983          qp = TMP_ALLOC_LIMBS (un - pwn + 1);
01984          rp = TMP_ALLOC_LIMBS (pwn);
01985 
01986          mpn_tdiv_qr (qp, rp, 0L, up, un, pwp, pwn);
01987          qn = un - pwn; qn += qp[qn] != 0;              /* quotient size */
01988          if (len != 0)
01989            len = len - powtab->digits_in_base;
01990          str = mpn_dc_get_str (str, len, qp, qn, powtab - 1);
01991          str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn, powtab - 1);
01992          TMP_FREE (marker);
01993        }
01994     }
01995   return str;
01996 }
01997 
01998 /* There are no leading zeros on the digits generated at str, but that's not
01999    currently a documented feature.  */
02000 
02001 size_t
02002 mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
02003 {
02004   mp_ptr powtab_mem, powtab_mem_ptr;
02005   mp_limb_t big_base;
02006   size_t digits_in_base;
02007   powers_t powtab[30];
02008   int pi;
02009   mp_size_t n;
02010   mp_ptr p, t;
02011   size_t out_len;
02012 
02013   /* Special case zero, as the code below doesn't handle it.  */
02014   if (un == 0)
02015     {
02016       str[0] = 0;
02017       return 1;
02018     }
02019 
02020   if (POW2_P (base))
02021     {
02022       /* The base is a power of 2.  Convert from most significant end.  */
02023       mp_limb_t n1, n0;
02024       int bits_per_digit = __mp_bases[base].big_base;
02025       int cnt;
02026       int bit_pos;
02027       mp_size_t i;
02028       unsigned char *s = str;
02029 
02030       n1 = up[un - 1];
02031       count_leading_zeros (cnt, n1);
02032 
02033       /* BIT_POS should be R when input ends in least significant nibble,
02034         R + bits_per_digit * n when input ends in nth least significant
02035         nibble. */
02036 
02037       {
02038        unsigned long bits;
02039 
02040        bits = GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
02041        cnt = bits % bits_per_digit;
02042        if (cnt != 0)
02043          bits += bits_per_digit - cnt;
02044        bit_pos = bits - (un - 1) * GMP_NUMB_BITS;
02045       }
02046 
02047       /* Fast loop for bit output.  */
02048       i = un - 1;
02049       for (;;)
02050        {
02051          bit_pos -= bits_per_digit;
02052          while (bit_pos >= 0)
02053            {
02054              *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
02055              bit_pos -= bits_per_digit;
02056            }
02057          i--;
02058          if (i < 0)
02059            break;
02060          n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
02061          n1 = up[i];
02062          bit_pos += GMP_NUMB_BITS;
02063          *s++ = n0 | (n1 >> bit_pos);
02064 
02065          if (!(i & 0xFF)) SCHEME_BIGNUM_USE_FUEL(1);
02066        }
02067 
02068       *s = 0;
02069 
02070       return s - str;
02071     }
02072 
02073   /* General case.  The base is not a power of 2.  */
02074 
02075   if (un < GET_STR_PRECOMPUTE_THRESHOLD)
02076     {
02077       struct powers ptab[1];
02078       ptab[0].base = base;
02079       return mpn_sb_get_str (str, (size_t) 0, up, un, ptab) - str;
02080     }
02081 
02082   /* Allocate one large block for the powers of big_base.  With the current
02083      scheme, we need to allocate twice as much as would be possible if a
02084      minimal set of powers were generated.  */
02085 #define ALLOC_SIZE (2 * un + 30)
02086  {
02087    TMP_DECL (marker);
02088    TMP_MARK (marker);
02089 
02090    powtab_mem = __GMP_ALLOCATE_FUNC_LIMBS (ALLOC_SIZE);
02091    powtab_mem_ptr = powtab_mem;
02092 
02093   /* Compute a table of powers: big_base^1, big_base^2, big_base^4, ...,
02094      big_base^(2^k), for k such that the biggest power is between U and
02095      sqrt(U).  */
02096 
02097   big_base = __mp_bases[base].big_base;
02098   digits_in_base = __mp_bases[base].chars_per_limb;
02099 
02100   powtab[0].base = base; /* FIXME: hack for getting base to mpn_sb_get_str */
02101   powtab[1].p = &big_base;
02102   powtab[1].n = 1;
02103   powtab[1].digits_in_base = digits_in_base;
02104   powtab[1].base = base;
02105   powtab[2].p = &big_base;
02106   powtab[2].n = 1;
02107   powtab[2].digits_in_base = digits_in_base;
02108   powtab[2].base = base;
02109   n = 1;
02110   pi = 2;
02111   p = &big_base;
02112   for (;;)
02113     {
02114       ++pi;
02115       t = powtab_mem_ptr;
02116       powtab_mem_ptr += 2 * n;
02117       mpn_sqr_n (t, p, n);
02118       n *= 2; n -= t[n - 1] == 0;
02119       digits_in_base *= 2;
02120       p = t;
02121       powtab[pi].p = p;
02122       powtab[pi].n = n;
02123       powtab[pi].digits_in_base = digits_in_base;
02124       powtab[pi].base = base;
02125 
02126       if (2 * n > un)
02127        break;
02128     }
02129   ASSERT_ALWAYS (ALLOC_SIZE > powtab_mem_ptr - powtab_mem);
02130 
02131   /* Using our precomputed powers, now in powtab[], convert our number.  */
02132   out_len = mpn_dc_get_str (str, 0, up, un, powtab + pi) - str;
02133 
02134   __GMP_FREE_FUNC_LIMBS (powtab_mem, ALLOC_SIZE);
02135   
02136   TMP_FREE(marker);
02137  }
02138 
02139   return out_len;
02140 }
02141 
02142 
02143 /* When to switch to sub-quadratic code.  This counts characters/digits in
02144    the input string, not limbs as most other *_THRESHOLD.  */
02145 #ifndef SET_STR_THRESHOLD
02146 #define SET_STR_THRESHOLD 4000
02147 #endif
02148 
02149 /* Don't define this to anything but 1 for now.  In order to make other values
02150    work well, either the convert_blocks function should be generazed to handle
02151    larger blocks than chars_per_limb, or the basecase code should be broken out
02152    of the main function.  Also note that this must be a power of 2.  */
02153 #ifndef SET_STR_BLOCK_SIZE
02154 #define SET_STR_BLOCK_SIZE 1       /* Must be a power of 2. */
02155 #endif
02156 
02157 
02158 /* This check interferes with expression based values of SET_STR_THRESHOLD
02159    used for tuning and measuring.
02160 #if SET_STR_BLOCK_SIZE >= SET_STR_THRESHOLD
02161 These values are silly.
02162 The sub-quadratic code would recurse to itself.
02163 #endif
02164 */
02165 
02166 static mp_size_t
02167 convert_blocks (mp_ptr dp, const unsigned char *str, size_t str_len, int base);
02168 
02169 mp_size_t
02170 mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
02171 {
02172   mp_size_t size;
02173   mp_limb_t big_base;
02174   int chars_per_limb;
02175   mp_limb_t res_digit;
02176 
02177   ASSERT (base >= 2);
02178   ASSERT (base < numberof (__mp_bases));
02179   ASSERT (str_len >= 1);
02180 
02181   big_base = __mp_bases[base].big_base;
02182   chars_per_limb = __mp_bases[base].chars_per_limb;
02183 
02184   size = 0;
02185 
02186   if (POW2_P (base))
02187     {
02188       /* The base is a power of 2.  Read the input string from least to most
02189         significant character/digit.  */
02190 
02191       const unsigned char *s;
02192       int next_bitpos;
02193       int bits_per_indigit = big_base;
02194 
02195       res_digit = 0;
02196       next_bitpos = 0;
02197 
02198       for (s = str + str_len - 1; s >= str; s--)
02199        {
02200          int inp_digit = *s;
02201 
02202          res_digit |= ((mp_limb_t) inp_digit << next_bitpos) & GMP_NUMB_MASK;
02203          next_bitpos += bits_per_indigit;
02204          if (next_bitpos >= GMP_NUMB_BITS)
02205            {
02206              rp[size++] = res_digit;
02207              next_bitpos -= GMP_NUMB_BITS;
02208              res_digit = inp_digit >> (bits_per_indigit - next_bitpos);
02209            }
02210 
02211          if (!((long)s & 0xFF)) SCHEME_BIGNUM_USE_FUEL(1);
02212        }
02213 
02214       if (res_digit != 0)
02215        rp[size++] = res_digit;
02216       return size;
02217     }
02218   else
02219     {
02220       /* General case.  The base is not a power of 2.  */
02221 
02222       if (str_len < SET_STR_THRESHOLD)
02223        {
02224          size_t i;
02225          int j;
02226          mp_limb_t cy_limb;
02227 
02228          for (i = chars_per_limb; i < str_len; i += chars_per_limb)
02229            {
02230              res_digit = *str++;
02231              if (base == 10)
02232               { /* This is a common case.
02233                    Help the compiler to avoid multiplication.  */
02234                 for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
02235                   res_digit = res_digit * 10 + *str++;
02236               }
02237              else
02238               {
02239                 for (j = chars_per_limb - 1; j != 0; j--)
02240                   res_digit = res_digit * base + *str++;
02241               }
02242 
02243              if (size == 0)
02244               {
02245                 if (res_digit != 0)
02246                   {
02247                     rp[0] = res_digit;
02248                     size = 1;
02249                   }
02250               }
02251              else
02252               {
02253 #if HAVE_NATIVE_mpn_mul_1c
02254                 cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
02255 #else
02256                 cy_limb = mpn_mul_1 (rp, rp, size, big_base);
02257                 cy_limb += mpn_add_1 (rp, rp, size, res_digit);
02258 #endif
02259                 if (cy_limb != 0)
02260                   rp[size++] = cy_limb;
02261               }
02262            }
02263 
02264          big_base = base;
02265          res_digit = *str++;
02266          if (base == 10)
02267            { /* This is a common case.
02268                Help the compiler to avoid multiplication.  */
02269              for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--)
02270               {
02271                 res_digit = res_digit * 10 + *str++;
02272                 big_base *= 10;
02273               }
02274            }
02275          else
02276            {
02277              for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--)
02278               {
02279                 res_digit = res_digit * base + *str++;
02280                 big_base *= base;
02281               }
02282            }
02283 
02284          if (size == 0)
02285            {
02286              if (res_digit != 0)
02287               {
02288                 rp[0] = res_digit;
02289                 size = 1;
02290               }
02291            }
02292          else
02293            {
02294 #if HAVE_NATIVE_mpn_mul_1c
02295              cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
02296 #else
02297              cy_limb = mpn_mul_1 (rp, rp, size, big_base);
02298              cy_limb += mpn_add_1 (rp, rp, size, res_digit);
02299 #endif
02300              if (cy_limb != 0)
02301               rp[size++] = cy_limb;
02302            }
02303          return size;
02304        }
02305       else
02306        {
02307          /* Sub-quadratic code.  */
02308 
02309          mp_ptr dp;
02310          mp_size_t dsize;
02311          mp_ptr xp, tp;
02312          mp_size_t step;
02313          mp_size_t i;
02314          size_t alloc;
02315          mp_size_t n;
02316          mp_ptr pow_mem;
02317          TMP_DECL (marker);
02318          TMP_MARK (marker);
02319 
02320          alloc = (str_len + chars_per_limb - 1) / chars_per_limb;
02321          alloc = 2 * alloc;
02322          dp = __GMP_ALLOCATE_FUNC_LIMBS (alloc);
02323 
02324 #if SET_STR_BLOCK_SIZE == 1
02325          dsize = convert_blocks (dp, str, str_len, base);
02326 #else
02327          {
02328            const unsigned char *s;
02329            mp_ptr ddp = dp;
02330 
02331            s = str + str_len;
02332            while (s - str >  SET_STR_BLOCK_SIZE * chars_per_limb)
02333              {
02334               s -= SET_STR_BLOCK_SIZE * chars_per_limb;
02335               mpn_set_str (ddp, s, SET_STR_BLOCK_SIZE * chars_per_limb, base);
02336               ddp += SET_STR_BLOCK_SIZE;
02337              }
02338            ddp += mpn_set_str (ddp, str, s - str, base);
02339            dsize = ddp - dp;
02340          }
02341 #endif
02342 
02343          /* Allocate space for powers of big_base.  Could trim this in two
02344             ways:
02345             1. Only really need 2^ceil(log2(dsize)) bits for the largest
02346               power.
02347             2. Only the variable to get the largest power need that much
02348               memory.  The other variable needs half as much.  Need just
02349               figure out which of xp and tp will hold the last one.
02350             Net space savings would be in the range 1/4 to 5/8 of current
02351             allocation, depending on how close to the next power of 2 that
02352             dsize is.  */
02353          pow_mem = __GMP_ALLOCATE_FUNC_LIMBS (2 * alloc);
02354          xp = pow_mem;
02355          tp = pow_mem + alloc;
02356 
02357          xp[0] = big_base;
02358          n = 1;
02359          step = 1;
02360 #if SET_STR_BLOCK_SIZE != 1
02361          for (i = SET_STR_BLOCK_SIZE; i > 1; i >>= 1)
02362            {
02363              mpn_sqr_n (tp, xp, n);
02364              n = 2 * n;
02365              n -= tp[n - 1] == 0;
02366 
02367              step = 2 * step;
02368              MP_PTR_SWAP (tp, xp);
02369            }
02370 #endif
02371 
02372          /* Multiply every second limb block, each `step' limbs large by the
02373             base power currently in xp[], then add this to the adjacent block.
02374             We thereby convert from dsize blocks in base big_base, to dsize/2
02375             blocks in base big_base^2, then to dsize/4 blocks in base
02376             big_base^4, etc, etc.  */
02377 
02378          if (step < dsize)
02379            {
02380              for (;;)
02381               {
02382                 for (i = 0; i < dsize - step; i += 2 * step)
02383                   {
02384                     mp_ptr bp = dp + i;
02385                     mp_size_t m = dsize - i - step;
02386                     mp_size_t hi;
02387                     if (n >= m)
02388                      {
02389                        mpn_mul (tp, xp, n, bp + step, m);
02390                        mpn_add (bp, tp, n + m, bp, n);
02391                        hi = i + n + m;
02392                        dsize = hi;
02393                        dsize -= dp[dsize - 1] == 0;
02394                      }
02395                     else
02396                      {
02397                        mpn_mul_n (tp, xp, bp + step, n);
02398                        mpn_add (bp, tp, n + n, bp, n);
02399                      }
02400                   }
02401 
02402                 step = 2 * step;
02403                 if (! (step < dsize))
02404                   break;
02405 
02406                 mpn_sqr_n (tp, xp, n);
02407                 n = 2 * n;
02408                 n -= tp[n - 1] == 0;
02409                 MP_PTR_SWAP (tp, xp);
02410               }
02411            }
02412 
02413          MPN_NORMALIZE (dp, dsize);
02414          MPN_COPY (rp, dp, dsize);
02415          __GMP_FREE_FUNC_LIMBS (pow_mem, 2 * alloc);
02416          __GMP_FREE_FUNC_LIMBS (dp, alloc);
02417          TMP_FREE (marker);
02418          return dsize;
02419        }
02420     }
02421 }
02422 
02423 static mp_size_t
02424 convert_blocks (mp_ptr dp, const unsigned char *str, size_t str_len, int base)
02425 {
02426   int chars_per_limb;
02427   mp_size_t i;
02428   int j;
02429   int ds;
02430   mp_size_t dsize;
02431   mp_limb_t res_digit;
02432 
02433   chars_per_limb = __mp_bases[base].chars_per_limb;
02434 
02435   dsize = str_len / chars_per_limb;
02436   ds = str_len % chars_per_limb;
02437 
02438   if (ds != 0)
02439     {
02440       res_digit = *str++;
02441       for (j = ds - 1; j != 0; j--)
02442        res_digit = res_digit * base + *str++;
02443       dp[dsize] = res_digit;
02444     }
02445 
02446   if (base == 10)
02447     {
02448       for (i = dsize - 1; i >= 0; i--)
02449        {
02450          res_digit = *str++;
02451          for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
02452            res_digit = res_digit * 10 + *str++;
02453          dp[i] = res_digit;
02454        }
02455     }
02456   else
02457     {
02458       for (i = dsize - 1; i >= 0; i--)
02459        {
02460          res_digit = *str++;
02461          for (j = chars_per_limb - 1; j != 0; j--)
02462            res_digit = res_digit * base + *str++;
02463          dp[i] = res_digit;
02464        }
02465     }
02466 
02467   return dsize + (ds != 0);
02468 }
02469 
02470 
02471 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
02472    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
02473    qxn is non-zero, generate that many fraction limbs and append them after the
02474    other quotient limbs, and update the remainder accordningly.  The input
02475    operands are unaffected.
02476 
02477    Preconditions:
02478    1. The most significant limb of of the divisor must be non-zero.
02479    2. No argument overlap is permitted.  (??? relax this ???)
02480    3. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
02481 
02482    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
02483    complexity of multiplication. */
02484 
02485 #ifndef BZ_THRESHOLD
02486 #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
02487 #endif
02488 
02489 /* Extract the middle limb from ((h,,l) << cnt) */
02490 #define SHL(h,l,cnt) \
02491   ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1))))
02492 
02493 void
02494 #if __STDC__
02495 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
02496             mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
02497 #else
02498 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn)
02499      mp_ptr qp;
02500      mp_ptr rp;
02501      mp_size_t qxn;
02502      mp_srcptr np;
02503      mp_size_t nn;
02504      mp_srcptr dp;
02505      mp_size_t dn;
02506 #endif
02507 {
02508   /* FIXME:
02509      1. qxn
02510      2. pass allocated storage in additional parameter?
02511   */
02512 #ifdef DEBUG
02513   if (qxn != 0)
02514     abort ();
02515 #endif
02516 
02517   switch (dn)
02518     {
02519     case 0:
02520 #if 0
02521       DIVIDE_BY_ZERO;
02522 #endif
02523       return;
02524 
02525     case 1:
02526       {
02527        rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
02528        return;
02529       }
02530 
02531     case 2:
02532       {
02533        int cnt;
02534        mp_ptr n2p, d2p;
02535        mp_limb_t qhl, cy;
02536        TMP_DECL (marker);
02537        TMP_MARK (marker);
02538        count_leading_zeros (cnt, dp[dn - 1]);
02539        if (cnt != 0)
02540          {
02541            d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
02542            mpn_lshift (d2p, dp, dn, cnt);
02543            n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
02544            cy = mpn_lshift (n2p, np, nn, cnt);
02545            n2p[nn] = cy;
02546            qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
02547            if (cy == 0)
02548              qp[nn - 2] = qhl;     /* always store nn-dn+1 quotient limbs */
02549          }
02550        else
02551          {
02552            d2p = (mp_ptr) dp;
02553            n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
02554            MPN_COPY (n2p, np, nn);
02555            qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
02556            qp[nn - 2] = qhl;       /* always store nn-dn+1 quotient limbs */
02557          }
02558 
02559        if (cnt != 0)
02560          mpn_rshift (rp, n2p, dn, cnt);
02561        else
02562          MPN_COPY (rp, n2p, dn);
02563        TMP_FREE (marker);
02564        return;
02565       }
02566 
02567     default:
02568       {
02569        int adjust;
02570        TMP_DECL (marker);
02571        TMP_MARK (marker);
02572        adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */
02573        if (nn + adjust >= 2 * dn)
02574          {
02575            mp_ptr n2p, d2p;
02576            mp_limb_t cy;
02577            int cnt;
02578            count_leading_zeros (cnt, dp[dn - 1]);
02579 
02580            qp[nn - dn] = 0;               /* zero high quotient limb */
02581            if (cnt != 0)                  /* normalize divisor if needed */
02582              {
02583               d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
02584               mpn_lshift (d2p, dp, dn, cnt);
02585               n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
02586               cy = mpn_lshift (n2p, np, nn, cnt);
02587               n2p[nn] = cy;
02588               nn += adjust;
02589              }
02590            else
02591              {
02592               d2p = (mp_ptr) dp;
02593               n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
02594               MPN_COPY (n2p, np, nn);
02595               n2p[nn] = 0;
02596               nn += adjust;
02597              }
02598 
02599            if (dn == 2)
02600              mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
02601            else if (dn < BZ_THRESHOLD)
02602              mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
02603            else
02604              {
02605               /* Perform 2*dn / dn limb divisions as long as the limbs
02606                  in np last.  */
02607               mp_ptr q2p = qp + nn - 2 * dn;
02608               n2p += nn - 2 * dn;
02609               mpn_bz_divrem_n (q2p, n2p, d2p, dn);
02610               nn -= dn;
02611               while (nn >= 2 * dn)
02612                 {
02613                   mp_limb_t c;
02614                   q2p -= dn;  n2p -= dn;
02615                   c = mpn_bz_divrem_n (q2p, n2p, d2p, dn);
02616                   ASSERT_ALWAYS (c == 0);
02617                   nn -= dn;
02618                 }
02619 
02620               if (nn != dn)
02621                 {
02622                   n2p -= nn - dn;
02623                   /* In theory, we could fall out to the cute code below
02624                      since we now have exactly the situation that code
02625                      is designed to handle.  We botch this badly and call
02626                      the basic mpn_sb_divrem_mn!  */
02627                   if (dn == 2)
02628                     mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
02629                   else
02630                     mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
02631                 }
02632              }
02633 
02634 
02635            if (cnt != 0)
02636              mpn_rshift (rp, n2p, dn, cnt);
02637            else
02638              MPN_COPY (rp, n2p, dn);
02639            TMP_FREE (marker);
02640            return;
02641          }
02642 
02643        /* When we come here, the numerator/partial remainder is less
02644           than twice the size of the denominator.  */
02645 
02646          {
02647            /* Problem:
02648 
02649               Divide a numerator N with nn limbs by a denominator D with dn
02650               limbs forming a quotient of nn-dn+1 limbs.  When qn is small
02651               compared to dn, conventional division algorithms perform poorly.
02652               We want an algorithm that has an expected running time that is
02653               dependent only on qn.  It is assumed that the most significant
02654               limb of the numerator is smaller than the most significant limb
02655               of the denominator.
02656 
02657               Algorithm (very informally stated):
02658 
02659               1) Divide the 2 x qn most significant limbs from the numerator
02660                 by the qn most significant limbs from the denominator.  Call
02661                 the result qest.  This is either the correct quotient, but
02662                 might be 1 or 2 too large.  Compute the remainder from the
02663                 division.  (This step is implemented by a mpn_divrem call.)
02664 
02665               2) Is the most significant limb from the remainder < p, where p
02666                 is the product of the most significant limb from the quotient
02667                 and the next(d).  (Next(d) denotes the next ignored limb from
02668                 the denominator.)  If it is, decrement qest, and adjust the
02669                 remainder accordingly.
02670 
02671               3) Is the remainder >= qest?  If it is, qest is the desired
02672                 quotient.  The algorithm terminates.
02673 
02674               4) Subtract qest x next(d) from the remainder.  If there is
02675                 borrow out, decrement qest, and adjust the remainder
02676                 accordingly.
02677 
02678               5) Skip one word from the denominator (i.e., let next(d) denote
02679                 the next less significant limb.  */
02680 
02681            mp_size_t qn;
02682            mp_ptr n2p, d2p;
02683            mp_ptr tp;
02684            mp_limb_t cy;
02685            mp_size_t in, rn;
02686            mp_limb_t quotient_too_large;
02687            int cnt;
02688 
02689            qn = nn - dn;
02690            qp[qn] = 0;                           /* zero high quotient limb */
02691            qn += adjust;                  /* qn cannot become bigger */
02692 
02693            if (qn == 0)
02694              {
02695               MPN_COPY (rp, np, dn);
02696               TMP_FREE (marker);
02697               return;
02698              }
02699 
02700            in = dn - qn;           /* (at least partially) ignored # of limbs in ops */
02701            /* Normalize denominator by shifting it to the left such that its
02702               most significant bit is set.  Then shift the numerator the same
02703               amount, to mathematically preserve quotient.  */
02704            count_leading_zeros (cnt, dp[dn - 1]);
02705            if (cnt != 0)
02706              {
02707               d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
02708 
02709               mpn_lshift (d2p, dp + in, qn, cnt);
02710               d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt);
02711 
02712               n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
02713               cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
02714               if (adjust)
02715                 {
02716                   n2p[2 * qn] = cy;
02717                   n2p++;
02718                 }
02719               else
02720                 {
02721                   n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt);
02722                 }
02723              }
02724            else
02725              {
02726               d2p = (mp_ptr) dp + in;
02727 
02728               n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
02729               MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
02730               if (adjust)
02731                 {
02732                   n2p[2 * qn] = 0;
02733                   n2p++;
02734                 }
02735              }
02736 
02737            /* Get an approximate quotient using the extracted operands.  */
02738            if (qn == 1)
02739              {
02740               mp_limb_t q0, r0;
02741               mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
02742               /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
02743                  temps here.  This doesn't hurt code quality on any machines
02744                  so we do it unconditionally.  */
02745               gcc272bug_n1 = n2p[1];
02746               gcc272bug_n0 = n2p[0];
02747               gcc272bug_d0 = d2p[0];
02748               udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0);
02749               n2p[0] = r0;
02750               qp[0] = q0;
02751              }
02752            else if (qn == 2)
02753              mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
02754            else if (qn < BZ_THRESHOLD)
02755              mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn);
02756            else
02757              mpn_bz_divrem_n (qp, n2p, d2p, qn);
02758 
02759            rn = qn;
02760            /* Multiply the first ignored divisor limb by the most significant
02761               quotient limb.  If that product is > the partial remainder's
02762               most significant limb, we know the quotient is too large.  This
02763               test quickly catches most cases where the quotient is too large;
02764               it catches all cases where the quotient is 2 too large.  */
02765            {
02766              mp_limb_t dl, x;
02767              mp_limb_t h, l;
02768 
02769              if (in - 2 < 0)
02770               dl = 0;
02771              else
02772               dl = dp[in - 2];
02773 
02774              x = SHL (dp[in - 1], dl, cnt);
02775              umul_ppmm (h, l, x, qp[qn - 1]);
02776 
02777              if (n2p[qn - 1] < h)
02778               {
02779                 mp_limb_t cy;
02780 
02781                 mpn_decr_u (qp, (mp_limb_t) 1);
02782                 cy = mpn_add_n (n2p, n2p, d2p, qn);
02783                 if (cy)
02784                   {
02785                     /* The partial remainder is safely large.  */
02786                     n2p[qn] = cy;
02787                     ++rn;
02788                   }
02789               }
02790            }
02791 
02792            quotient_too_large = 0;
02793            if (cnt != 0)
02794              {
02795               mp_limb_t cy1, cy2;
02796 
02797               /* Append partially used numerator limb to partial remainder.  */
02798               cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt);
02799               n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt);
02800 
02801               /* Update partial remainder with partially used divisor limb.  */
02802               cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt));
02803               if (qn != rn)
02804                 {
02805 #ifdef DEBUG
02806                   if (n2p[qn] < cy2)
02807                     abort ();
02808 #endif
02809                   n2p[qn] -= cy2;
02810                 }
02811               else
02812                 {
02813                   n2p[qn] = cy1 - cy2;
02814 
02815                   quotient_too_large = (cy1 < cy2);
02816                   ++rn;
02817                 }
02818               --in;
02819              }
02820            /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
02821 
02822            tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
02823 
02824            if (in < qn)
02825              {
02826               if (in == 0)
02827                 {
02828                   MPN_COPY (rp, n2p, rn);
02829 #ifdef DEBUG
02830                   if (rn != dn)
02831                     abort ();
02832 #endif
02833                   goto foo;
02834                 }
02835               mpn_mul (tp, qp, qn, dp, in);
02836              }
02837            else
02838              mpn_mul (tp, dp, in, qp, qn);
02839 
02840            cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
02841            MPN_COPY (rp + in, n2p, dn - in);
02842            quotient_too_large |= cy;
02843            cy = mpn_sub_n (rp, np, tp, in);
02844            cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
02845            quotient_too_large |= cy;
02846          foo:
02847            if (quotient_too_large)
02848              {
02849               mpn_decr_u (qp, (mp_limb_t) 1);
02850               mpn_add_n (rp, rp, dp, dn);
02851              }
02852          }
02853        TMP_FREE (marker);
02854        return;
02855       }
02856     }
02857 }
02858 
02859 
02860 /* Square roots table.  Generated by the following program:
02861 #include "gmp.h"
02862 main(){mpz_t x;int i;mpz_init(x);for(i=64;i<256;i++){mpz_set_ui(x,256*i);
02863 mpz_sqrt(x,x);mpz_out_str(0,10,x);printf(",");if(i%16==15)printf("\n");}}
02864 */
02865 static const unsigned char approx_tab[192] =
02866   {
02867     128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,
02868     143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,
02869     156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,
02870     169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,
02871     181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,
02872     192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,
02873     202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,
02874     212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,
02875     221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,
02876     230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,
02877     239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,
02878     247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255
02879   };
02880 
02881 #define HALF_NAIL (GMP_NAIL_BITS / 2)
02882 
02883 /* same as mpn_sqrtrem, but for size=1 and {np, 1} normalized */
02884 static mp_size_t
02885 mpn_sqrtrem1 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
02886 {
02887   mp_limb_t np0, s, r, q, u;
02888   int prec;
02889 
02890   ASSERT (np[0] >= GMP_NUMB_HIGHBIT / 2);
02891   ASSERT (GMP_LIMB_BITS >= 16);
02892 
02893   /* first compute a 8-bit approximation from the high 8-bits of np[0] */
02894   np0 = np[0] << GMP_NAIL_BITS;
02895   q = np0 >> (GMP_LIMB_BITS - 8);
02896   /* 2^6 = 64 <= q < 256 = 2^8 */
02897   s = approx_tab[q - 64];                        /* 128 <= s < 255 */
02898   r = (np0 >> (GMP_LIMB_BITS - 16)) - s * s;            /* r <= 2*s */
02899   if (r > 2 * s)
02900     {
02901       r -= 2 * s + 1;
02902       s++;
02903     }
02904 
02905   prec = 8;
02906   np0 <<= 2 * prec;
02907   while (2 * prec < GMP_LIMB_BITS)
02908     {
02909       /* invariant: s has prec bits, and r <= 2*s */
02910       r = (r << prec) + (np0 >> (GMP_LIMB_BITS - prec));
02911       np0 <<= prec;
02912       u = 2 * s;
02913       q = r / u;
02914       u = r - q * u;
02915       s = (s << prec) + q;
02916       u = (u << prec) + (np0 >> (GMP_LIMB_BITS - prec));
02917       q = q * q;
02918       r = u - q;
02919       if (u < q)
02920        {
02921          r += 2 * s - 1;
02922          s --;
02923        }
02924       np0 <<= prec;
02925       prec = 2 * prec;
02926     }
02927 
02928   ASSERT (2 * prec == GMP_LIMB_BITS); /* GMP_LIMB_BITS must be a power of 2 */
02929 
02930   /* normalize back, assuming GMP_NAIL_BITS is even */
02931   ASSERT (GMP_NAIL_BITS % 2 == 0);
02932   sp[0] = s >> HALF_NAIL;
02933   u = s - (sp[0] << HALF_NAIL); /* s mod 2^HALF_NAIL */
02934   r += u * ((sp[0] << (HALF_NAIL + 1)) + u);
02935   r = r >> GMP_NAIL_BITS;
02936 
02937   if (rp != NULL)
02938     rp[0] = r;
02939   return r != 0 ? 1 : 0;
02940 }
02941 
02942 
02943 #define Prec (GMP_NUMB_BITS >> 1)
02944 
02945 /* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
02946    return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
02947 static mp_limb_t
02948 mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
02949 {
02950   mp_limb_t qhl, q, u, np0;
02951   int cc;
02952 
02953   ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);
02954 
02955   np0 = np[0];
02956   mpn_sqrtrem1 (sp, rp, np + 1);
02957   qhl = 0;
02958   while (rp[0] >= sp[0])
02959     {
02960       qhl++;
02961       rp[0] -= sp[0];
02962     }
02963   /* now rp[0] < sp[0] < 2^Prec */
02964   rp[0] = (rp[0] << Prec) + (np0 >> Prec);
02965   u = 2 * sp[0];
02966   q = rp[0] / u;
02967   u = rp[0] - q * u;
02968   q += (qhl & 1) << (Prec - 1);
02969   qhl >>= 1; /* if qhl=1, necessary q=0 as qhl*2^Prec + q <= 2^Prec */
02970   /* now we have (initial rp[0])<<Prec + np0>>Prec = (qhl<<Prec + q) * (2sp[0]) + u */
02971   sp[0] = ((sp[0] + qhl) << Prec) + q;
02972   cc = u >> Prec;
02973   rp[0] = ((u << Prec) & GMP_NUMB_MASK) + (np0 & (((mp_limb_t) 1 << Prec) - 1));
02974   /* subtract q * q or qhl*2^(2*Prec) from rp */
02975   cc -= mpn_sub_1 (rp, rp, 1, q * q) + qhl;
02976   /* now subtract 2*q*2^Prec + 2^(2*Prec) if qhl is set */
02977   if (cc < 0)
02978     {
02979       cc += sp[0] != 0 ? mpn_add_1 (rp, rp, 1, sp[0]) : 1;
02980       cc += mpn_add_1 (rp, rp, 1, --sp[0]);
02981     }
02982 
02983   return cc;
02984 }
02985 
02986 /* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
02987    and in {np, n} the low n limbs of the remainder, returns the high
02988    limb of the remainder (which is 0 or 1).
02989    Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
02990    where B=2^GMP_NUMB_BITS.  */
02991 static mp_limb_t
02992 mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)
02993 {
02994   mp_limb_t q;                     /* carry out of {sp, n} */
02995   int c, b;                 /* carry out of remainder */
02996   mp_size_t l, h;
02997 
02998   ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);
02999 
03000   if (n == 1)
03001     c = mpn_sqrtrem2 (sp, np, np);
03002   else
03003     {
03004       l = n / 2;
03005       h = n - l;
03006       q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h);
03007       if (q != 0)
03008         mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h);
03009       q += mpn_divrem (sp, 0, np + l, n, sp + l, h);
03010       c = sp[0] & 1;
03011       mpn_rshift (sp, sp, l, 1);
03012       sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
03013       q >>= 1;
03014       if (c != 0)
03015         c = mpn_add_n (np + l, np + l, sp + l, h);
03016       mpn_sqr_n (np + n, sp, l);
03017       b = q + mpn_sub_n (np, np, np + n, 2 * l);
03018       c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, b);
03019       q = mpn_add_1 (sp + l, sp + l, h, q);
03020 
03021       if (c < 0)
03022         {
03023           c += mpn_addmul_1 (np, sp, n, 2) + 2 * q;
03024           c -= mpn_sub_1 (np, np, n, 1);
03025           q -= mpn_sub_1 (sp, sp, n, 1);
03026         }
03027     }
03028 
03029   return c;
03030 }
03031 
03032 
03033 mp_size_t
03034 mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)
03035 {
03036   mp_limb_t *tp, s0[1], cc, high, rl;
03037   int c;
03038   mp_size_t rn, tn;
03039   TMP_DECL (marker);
03040 
03041   ASSERT (nn >= 0);
03042 
03043   /* If OP is zero, both results are zero.  */
03044   if (nn == 0)
03045     return 0;
03046 
03047   ASSERT (np[nn - 1] != 0);
03048   ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nn));
03049   ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
03050   ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));
03051 
03052   high = np[nn - 1];
03053   if (nn == 1 && (high & GMP_NUMB_HIGHBIT))
03054     return mpn_sqrtrem1 (sp, rp, np);
03055   count_leading_zeros (c, high);
03056   c -= GMP_NAIL_BITS;
03057 
03058   c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
03059   tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */
03060 
03061   TMP_MARK (marker);
03062   if (nn % 2 != 0 || c > 0)
03063     {
03064       tp = TMP_ALLOC_LIMBS (2 * tn);
03065       tp[0] = 0;          /* needed only when 2*tn > nn, but saves a test */
03066       if (c != 0)
03067        mpn_lshift (tp + 2 * tn - nn, np, nn, 2 * c);
03068       else
03069        MPN_COPY (tp + 2 * tn - nn, np, nn);
03070       rl = mpn_dc_sqrtrem (sp, tp, tn);
03071       /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
03072         thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
03073       c += (nn % 2) * GMP_NUMB_BITS / 2;         /* c now represents k */
03074       s0[0] = sp[0] & (((mp_limb_t) 1 << c) - 1);       /* S mod 2^k */
03075       rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]);       /* R = R + 2*s0*S */
03076       cc = mpn_submul_1 (tp, s0, 1, s0[0]);
03077       rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
03078       mpn_rshift (sp, sp, tn, c);
03079       tp[tn] = rl;
03080       if (rp == NULL)
03081        rp = tp;
03082       c = c << 1;
03083       if (c < GMP_NUMB_BITS)
03084        tn++;
03085       else
03086        {
03087          tp++;
03088          c -= GMP_NUMB_BITS;
03089        }
03090       if (c != 0)
03091        mpn_rshift (rp, tp, tn, c);
03092       else
03093        MPN_COPY_INCR (rp, tp, tn);
03094       rn = tn;
03095     }
03096   else
03097     {
03098       if (rp == NULL)
03099        rp = TMP_ALLOC_LIMBS (nn);
03100       if (rp != np)
03101        MPN_COPY (rp, np, nn);
03102       rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn));
03103     }
03104 
03105   MPN_NORMALIZE (rp, rn);
03106 
03107   TMP_FREE (marker);
03108   return rn;
03109 }
03110 
03111 /* mpn_bz_divrem_n and auxilliary routines. */
03112 
03113 /*
03114 [1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
03115     Technical report MPI-I-98-1-022, october 1998.
03116     http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
03117 */
03118 
03119 static mp_limb_t mpn_bz_div_3_halves_by_2
03120   _PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n));
03121 
03122 
03123 /* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
03124    div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
03125    i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */
03126 #ifndef BZ_THRESHOLD
03127 #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
03128 #endif
03129 
03130 /* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
03131    by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
03132    Returns most significant limb of the quotient, which is 0 or 1.
03133    Requires that the most significant bit of the divisor is set.  */
03134 
03135 mp_limb_t
03136 #if __STDC__
03137 mpn_bz_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
03138 #else
03139 mpn_bz_divrem_n (qp, np, dp, n)
03140      mp_ptr qp;
03141      mp_ptr np;
03142      mp_srcptr dp;
03143      mp_size_t n;
03144 #endif
03145 {
03146   mp_limb_t qhl, cc;
03147 
03148   if (n % 2 != 0)
03149     {
03150       qhl = mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1);
03151       cc = mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0]);
03152       cc = mpn_sub_1 (np + n, np + n, 1, cc);
03153       if (qhl) cc += mpn_sub_1 (np + n, np + n, 1, dp[0]);
03154       while (cc)
03155         {
03156           qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, (mp_limb_t) 1);
03157           cc -= mpn_add_n (np + 1, np + 1, dp, n);
03158         }
03159       qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
03160                         mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
03161     }
03162   else
03163     {
03164       mp_size_t n2 = n/2;
03165       qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2);
03166       qhl += mpn_add_1 (qp + n2, qp + n2, n2,
03167                         mpn_bz_div_3_halves_by_2 (qp, np, dp, n2));
03168     }
03169   return qhl;
03170 }
03171 
03172 
03173 /* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
03174    the remainder in (np, 2n) */
03175 
03176 static mp_limb_t
03177 #if __STDC__
03178 mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
03179 #else
03180 mpn_bz_div_3_halves_by_2 (qp, np, dp, n)
03181      mp_ptr qp;
03182      mp_ptr np;
03183      mp_srcptr dp;
03184      mp_size_t n;
03185 #endif
03186 {
03187   mp_size_t twon = n + n; 
03188   mp_limb_t qhl, cc;
03189   mp_ptr tmp;
03190   TMP_DECL (marker);
03191 
03192   TMP_MARK (marker);
03193   if (n < BZ_THRESHOLD)
03194     qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
03195   else
03196     qhl = mpn_bz_divrem_n (qp, np + n, dp + n, n);
03197   tmp = (mp_ptr) TMP_ALLOC (twon * BYTES_PER_MP_LIMB);
03198   mpn_mul_n (tmp, qp, dp, n);
03199   cc = mpn_sub_n (np, np, tmp, twon);
03200   TMP_FREE (marker);
03201   if (qhl) cc += mpn_sub_n (np + n, np + n, dp, n);
03202   while (cc)
03203     {
03204       qhl -= mpn_sub_1 (qp, qp, n, (mp_limb_t) 1);
03205       cc -= mpn_add_n (np, np, dp, twon);
03206     }
03207   return qhl;
03208 }
03209 
03210 /* mpn_sb_divrem_mn -- Divide natural numbers, producing both remainder and
03211    quotient. */
03212 
03213 /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
03214    the NSIZE-DSIZE least significant quotient limbs at QP
03215    and the DSIZE long remainder at NP.  If QEXTRA_LIMBS is
03216    non-zero, generate that many fraction bits and append them after the
03217    other quotient limbs.
03218    Return the most significant limb of the quotient, this is always 0 or 1.
03219 
03220    Preconditions:
03221    0. NSIZE >= DSIZE.
03222    1. The most significant bit of the divisor must be set.
03223    2. QP must either not overlap with the input operands at all, or
03224       QP + DSIZE >= NP must hold true.  (This means that it's
03225       possible to put the quotient in the high part of NUM, right after the
03226       remainder in NUM.
03227    3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
03228    4. DSIZE >= 2.  */
03229 
03230 
03231 #define PREINVERT_VIABLE \
03232   (UDIV_TIME > 2 * UMUL_TIME + 6 /* && ! TARGET_REGISTER_STARVED */)
03233 
03234 mp_limb_t
03235 #if __STDC__
03236 mpn_sb_divrem_mn (mp_ptr qp,
03237               mp_ptr np, mp_size_t nsize,
03238               mp_srcptr dp, mp_size_t dsize)
03239 #else
03240 mpn_sb_divrem_mn (qp, np, nsize, dp, dsize)
03241      mp_ptr qp;
03242      mp_ptr np;
03243      mp_size_t nsize;
03244      mp_srcptr dp;
03245      mp_size_t dsize;
03246 #endif
03247 {
03248   mp_limb_t most_significant_q_limb = 0;
03249   mp_size_t i;
03250   mp_limb_t dx, d1, n0;
03251   mp_limb_t dxinv = 0;
03252   int have_preinv;
03253 
03254   ASSERT_ALWAYS (dsize > 2);
03255 
03256   np += nsize - dsize;
03257   dx = dp[dsize - 1];
03258   d1 = dp[dsize - 2];
03259   n0 = np[dsize - 1];
03260 
03261   if (n0 >= dx)
03262     {
03263       if (n0 > dx || mpn_cmp (np, dp, dsize - 1) >= 0)
03264        {
03265          mpn_sub_n (np, np, dp, dsize);
03266          most_significant_q_limb = 1;
03267        }
03268     }
03269 
03270   /* If multiplication is much faster than division, preinvert the
03271      most significant divisor limb before entering the loop.  */
03272   if (PREINVERT_VIABLE)
03273     {
03274       have_preinv = 0;
03275       if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - dsize) > UDIV_TIME)
03276        {
03277          invert_limb (dxinv, dx);
03278          have_preinv = 1;
03279        }
03280     }
03281 
03282   for (i = nsize - dsize - 1; i >= 0; i--)
03283     {
03284       mp_limb_t q;
03285       mp_limb_t nx;
03286       mp_limb_t cy_limb;
03287 
03288       nx = np[dsize - 1];
03289       np--;
03290 
03291       if (nx == dx)
03292        {
03293          /* This might over-estimate q, but it's probably not worth
03294             the extra code here to find out.  */
03295          q = ~(mp_limb_t) 0;
03296 
03297 #if 1
03298          cy_limb = mpn_submul_1 (np, dp, dsize, q);
03299 #else
03300          /* This should be faster on many machines */
03301          cy_limb = mpn_sub_n (np + 1, np + 1, dp, dsize);
03302          cy = mpn_add_n (np, np, dp, dsize);
03303          np[dsize] += cy;
03304 #endif
03305 
03306          if (nx != cy_limb)
03307            {
03308              mpn_add_n (np, np, dp, dsize);
03309              q--;
03310            }
03311 
03312          qp[i] = q;
03313        }
03314       else
03315        {
03316          mp_limb_t rx, r1, r0, p1, p0;
03317 
03318           /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register
03319              usage when np[dsize-1] is used in an asm statement like
03320              umul_ppmm in udiv_qrnnd_preinv.  The symptom is seg faults due
03321              to registers being clobbered.  gcc 2.95 i386 doesn't have the
03322              problem. */
03323           {
03324             mp_limb_t  workaround = np[dsize - 1];
03325             if (PREINVERT_VIABLE && have_preinv)
03326               udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
03327             else
03328               udiv_qrnnd (q, r1, nx, workaround, dx);
03329           }
03330          umul_ppmm (p1, p0, d1, q);
03331 
03332          r0 = np[dsize - 2];
03333          rx = 0;
03334          if (r1 < p1 || (r1 == p1 && r0 < p0))
03335            {
03336              p1 -= p0 < d1;
03337              p0 -= d1;
03338              q--;
03339              r1 += dx;
03340              rx = r1 < dx;
03341            }
03342 
03343          p1 += r0 < p0;     /* cannot carry! */
03344          rx -= r1 < p1;     /* may become 11..1 if q is still too large */
03345          r1 -= p1;
03346          r0 -= p0;
03347 
03348          cy_limb = mpn_submul_1 (np, dp, dsize - 2, q);
03349 
03350          {
03351            mp_limb_t cy1, cy2;
03352            cy1 = r0 < cy_limb;
03353            r0 -= cy_limb;
03354            cy2 = r1 < cy1;
03355            r1 -= cy1;
03356            np[dsize - 1] = r1;
03357            np[dsize - 2] = r0;
03358            if (cy2 != rx)
03359              {
03360               mpn_add_n (np, np, dp, dsize);
03361               q--;
03362              }
03363          }
03364          qp[i] = q;
03365        }
03366     }
03367 
03368   /* ______ ______ ______
03369     |__rx__|__r1__|__r0__|         partial remainder
03370            ______ ______
03371         - |__p1__|__p0__|          partial product to subtract
03372            ______ ______
03373         - |______|cylimb|          
03374 
03375      rx is -1, 0 or 1.  If rx=1, then q is correct (it should match
03376      carry out).  If rx=-1 then q is too large.  If rx=0, then q might
03377      be too large, but it is most likely correct.
03378   */
03379 
03380   return most_significant_q_limb;
03381 }
03382 
03383 /* mpn_divrem_1(quot_ptr, qsize, dividend_ptr, dividend_size, divisor_limb) --
03384    Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
03385    Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
03386    Return the single-limb remainder.
03387    There are no constraints on the value of the divisor.
03388 
03389    QUOT_PTR and DIVIDEND_PTR might point to the same limb.
03390  */
03391 
03392 /* __gmpn_divmod_1_internal(quot_ptr,dividend_ptr,dividend_size,divisor_limb)
03393    Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
03394    Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
03395    Return the single-limb remainder.
03396    There are no constraints on the value of the divisor.
03397 
03398    QUOT_PTR and DIVIDEND_PTR might point to the same limb. */
03399 
03400 #ifndef UMUL_TIME
03401 #define UMUL_TIME 1
03402 #endif
03403 
03404 #ifndef UDIV_TIME
03405 #define UDIV_TIME UMUL_TIME
03406 #endif
03407 
03408 static mp_limb_t
03409 #if __STDC__
03410 __gmpn_divmod_1_internal (mp_ptr quot_ptr,
03411              mp_srcptr dividend_ptr, mp_size_t dividend_size,
03412              mp_limb_t divisor_limb)
03413 #else
03414 __gmpn_divmod_1_internal (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
03415      mp_ptr quot_ptr;
03416      mp_srcptr dividend_ptr;
03417      mp_size_t dividend_size;
03418      mp_limb_t divisor_limb;
03419 #endif
03420 {
03421   mp_size_t i;
03422   mp_limb_t n1, n0, r;
03423 
03424   /* ??? Should this be handled at all?  Rely on callers?  */
03425   if (dividend_size == 0)
03426     return 0;
03427 
03428   /* If multiplication is much faster than division, and the
03429      dividend is large, pre-invert the divisor, and use
03430      only multiplications in the inner loop.  */
03431 
03432   /* This test should be read:
03433        Does it ever help to use udiv_qrnnd_preinv?
03434         && Does what we save compensate for the inversion overhead?  */
03435   if (UDIV_TIME > (2 * UMUL_TIME + 6)
03436       && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME)
03437     {
03438       int normalization_steps;
03439 
03440       count_leading_zeros (normalization_steps, divisor_limb);
03441       if (normalization_steps != 0)
03442        {
03443          mp_limb_t divisor_limb_inverted;
03444 
03445          divisor_limb <<= normalization_steps;
03446          invert_limb (divisor_limb_inverted, divisor_limb);
03447 
03448          n1 = dividend_ptr[dividend_size - 1];
03449          r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
03450 
03451          /* Possible optimization:
03452             if (r == 0
03453             && divisor_limb > ((n1 << normalization_steps)
03454                           | (dividend_ptr[dividend_size - 2] >> ...)))
03455             ...one division less... */
03456 
03457          for (i = dividend_size - 2; i >= 0; i--)
03458            {
03459              n0 = dividend_ptr[i];
03460              udiv_qrnnd_preinv (quot_ptr[i + 1], r, r,
03461                              ((n1 << normalization_steps)
03462                               | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
03463                              divisor_limb, divisor_limb_inverted);
03464              n1 = n0;
03465            }
03466          udiv_qrnnd_preinv (quot_ptr[0], r, r,
03467                           n1 << normalization_steps,
03468                           divisor_limb, divisor_limb_inverted);
03469          return r >> normalization_steps;
03470        }
03471       else
03472        {
03473          mp_limb_t divisor_limb_inverted;
03474 
03475          invert_limb (divisor_limb_inverted, divisor_limb);
03476 
03477          i = dividend_size - 1;
03478          r = dividend_ptr[i];
03479 
03480          if (r >= divisor_limb)
03481            r = 0;
03482          else
03483            {
03484              quot_ptr[i] = 0;
03485              i--;
03486            }
03487 
03488          for (; i >= 0; i--)
03489            {
03490              n0 = dividend_ptr[i];
03491              udiv_qrnnd_preinv (quot_ptr[i], r, r,
03492                              n0, divisor_limb, divisor_limb_inverted);
03493            }
03494          return r;
03495        }
03496     }
03497   else
03498     {
03499       if (UDIV_NEEDS_NORMALIZATION)
03500        {
03501          int normalization_steps;
03502 
03503          count_leading_zeros (normalization_steps, divisor_limb);
03504          if (normalization_steps != 0)
03505            {
03506              divisor_limb <<= normalization_steps;
03507 
03508              n1 = dividend_ptr[dividend_size - 1];
03509              r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
03510 
03511              /* Possible optimization:
03512                if (r == 0
03513                && divisor_limb > ((n1 << normalization_steps)
03514                              | (dividend_ptr[dividend_size - 2] >> ...)))
03515                ...one division less... */
03516 
03517              for (i = dividend_size - 2; i >= 0; i--)
03518               {
03519                 n0 = dividend_ptr[i];
03520                 udiv_qrnnd (quot_ptr[i + 1], r, r,
03521                            ((n1 << normalization_steps)
03522                             | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
03523                            divisor_limb);
03524                 n1 = n0;
03525               }
03526              udiv_qrnnd (quot_ptr[0], r, r,
03527                        n1 << normalization_steps,
03528                        divisor_limb);
03529              return r >> normalization_steps;
03530            }
03531        }
03532       /* No normalization needed, either because udiv_qrnnd doesn't require
03533         it, or because DIVISOR_LIMB is already normalized.  */
03534 
03535       i = dividend_size - 1;
03536       r = dividend_ptr[i];
03537 
03538       if (r >= divisor_limb)
03539        r = 0;
03540       else
03541        {
03542          quot_ptr[i] = 0;
03543          i--;
03544        }
03545 
03546       for (; i >= 0; i--)
03547        {
03548          n0 = dividend_ptr[i];
03549          udiv_qrnnd (quot_ptr[i], r, r, n0, divisor_limb);
03550        }
03551       return r;
03552     }
03553 }
03554 
03555 
03556 
03557 mp_limb_t
03558 #if __STDC__
03559 mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
03560              mp_srcptr np, mp_size_t nn,
03561              mp_limb_t d)
03562 #else
03563 mpn_divrem_1 (qp, qxn, np, nn, d)
03564      mp_ptr qp;
03565      mp_size_t qxn;
03566      mp_srcptr np;
03567      mp_size_t nn;
03568      mp_limb_t d;
03569 #endif
03570 {
03571   mp_limb_t rlimb;
03572   mp_size_t i;
03573 
03574   /* Develop integer part of quotient.  */
03575   rlimb = __gmpn_divmod_1_internal (qp + qxn, np, nn, d);
03576 
03577   /* Develop fraction part of quotient.  This is not as fast as it should;
03578      the preinvert stuff from __gmpn_divmod_1_internal ought to be used here
03579      too.  */
03580   if (UDIV_NEEDS_NORMALIZATION)
03581     {
03582       int normalization_steps;
03583 
03584       count_leading_zeros (normalization_steps, d);
03585       if (normalization_steps != 0)
03586        {
03587          d <<= normalization_steps;
03588          rlimb <<= normalization_steps;
03589 
03590          for (i = qxn - 1; i >= 0; i--)
03591            udiv_qrnnd (qp[i], rlimb, rlimb, 0, d);
03592 
03593          return rlimb >> normalization_steps;
03594        }
03595       else
03596        /* fall out */
03597        ;
03598     }
03599 
03600   for (i = qxn - 1; i >= 0; i--)
03601     udiv_qrnnd (qp[i], rlimb, rlimb, 0, d);
03602 
03603   return rlimb;
03604 }
03605 
03606 /* mpn_divrem_2 -- Divide natural numbers, producing both remainder and
03607    quotient.  The divisor is two limbs. */
03608 
03609 /* Divide num (NP/NSIZE) by den (DP/2) and write
03610    the NSIZE-2 least significant quotient limbs at QP
03611    and the 2 long remainder at NP.  If QEXTRA_LIMBS is
03612    non-zero, generate that many fraction bits and append them after the
03613    other quotient limbs.
03614    Return the most significant limb of the quotient, this is always 0 or 1.
03615 
03616    Preconditions:
03617    0. NSIZE >= 2.
03618    1. The most significant bit of the divisor must be set.
03619    2. QP must either not overlap with the input operands at all, or
03620       QP + 2 >= NP must hold true.  (This means that it's
03621       possible to put the quotient in the high part of NUM, right after the
03622       remainder in NUM.
03623    3. NSIZE >= 2, even if QEXTRA_LIMBS is non-zero.  */
03624 
03625 mp_limb_t
03626 #if __STDC__
03627 mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
03628              mp_ptr np, mp_size_t nsize,
03629              mp_srcptr dp)
03630 #else
03631 mpn_divrem_2 (qp, qxn, np, nsize, dp)
03632      mp_ptr qp;
03633      mp_size_t qxn;
03634      mp_ptr np;
03635      mp_size_t nsize;
03636      mp_srcptr dp;
03637 #endif
03638 {
03639   mp_limb_t most_significant_q_limb = 0;
03640   mp_size_t i;
03641   mp_limb_t n1, n0, n2;
03642   mp_limb_t d1, d0;
03643   mp_limb_t d1inv = 0;
03644   int have_preinv;
03645 
03646   np += nsize - 2;
03647   d1 = dp[1];
03648   d0 = dp[0];
03649   n1 = np[1];
03650   n0 = np[0];
03651 
03652   if (n1 >= d1 && (n1 > d1 || n0 >= d0))
03653     {
03654       sub_ddmmss (n1, n0, n1, n0, d1, d0);
03655       most_significant_q_limb = 1;
03656     }
03657 
03658   /* If multiplication is much faster than division, preinvert the most 
03659      significant divisor limb before entering the loop.  */
03660   if (UDIV_TIME > 2 * UMUL_TIME + 6)
03661     {
03662       have_preinv = 0;
03663       if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - 2) > UDIV_TIME)
03664        {
03665          invert_limb (d1inv, d1);
03666          have_preinv = 1;
03667        }
03668     }
03669 
03670   for (i = qxn + nsize - 2 - 1; i >= 0; i--)
03671     {
03672       mp_limb_t q;
03673       mp_limb_t r;
03674 
03675       if (i >= qxn)
03676        np--;
03677       else
03678        np[0] = 0;
03679 
03680       if (n1 == d1)
03681        {
03682          /* Q should be either 111..111 or 111..110.  Need special treatment
03683             of this rare case as normal division would give overflow.  */
03684          q = ~(mp_limb_t) 0;
03685 
03686          r = n0 + d1;
03687          if (r < d1) /* Carry in the addition? */
03688            {
03689              add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
03690              qp[i] = q;
03691              continue;
03692            }
03693          n1 = d0 - (d0 != 0);
03694          n0 = -d0;
03695        }
03696       else
03697        {
03698          if (UDIV_TIME > 2 * UMUL_TIME + 6 && have_preinv)
03699            udiv_qrnnd_preinv (q, r, n1, n0, d1, d1inv);
03700          else
03701            udiv_qrnnd (q, r, n1, n0, d1);
03702          umul_ppmm (n1, n0, d0, q);
03703        }
03704 
03705       n2 = np[0];
03706 
03707     q_test:
03708       if (n1 > r || (n1 == r && n0 > n2))
03709        {
03710          /* The estimated Q was too large.  */
03711          q--;
03712 
03713          sub_ddmmss (n1, n0, n1, n0, 0, d0);
03714          r += d1;
03715          if (r >= d1)       /* If not carry, test Q again.  */
03716            goto q_test;
03717        }
03718 
03719       qp[i] = q;
03720       sub_ddmmss (n1, n0, r, n2, n1, n0);
03721     }
03722   np[1] = n1;
03723   np[0] = n0;
03724 
03725   return most_significant_q_limb;
03726 }
03727 
03728 /* mpn_mul_basecase -- Internal routine to multiply two natural numbers
03729    of length m and n. */
03730 
03731 /* Handle simple cases with traditional multiplication.
03732 
03733    This is the most critical code of multiplication.  All multiplies rely on
03734    this, both small and huge.  Small ones arrive here immediately, huge ones
03735    arrive here as this is the base case for Karatsuba's recursive algorithm. */
03736 
03737 void
03738 #if __STDC__
03739 mpn_mul_basecase (mp_ptr prodp,
03740                    mp_srcptr up, mp_size_t usize,
03741                    mp_srcptr vp, mp_size_t vsize)
03742 #else
03743 mpn_mul_basecase (prodp, up, usize, vp, vsize)
03744      mp_ptr prodp;
03745      mp_srcptr up;
03746      mp_size_t usize;
03747      mp_srcptr vp;
03748      mp_size_t vsize;
03749 #endif
03750 {
03751   /* We first multiply by the low order one or two limbs, as the result can
03752      be stored, not added, to PROD.  We also avoid a loop for zeroing this
03753      way.  */
03754 #if HAVE_NATIVE_mpn_mul_2
03755   if (vsize >= 2)
03756     {
03757       prodp[usize + 1] = mpn_mul_2 (prodp, up, usize, vp[0], vp[1]);
03758       prodp += 2, vp += 2, vsize -= 2;
03759     }
03760   else
03761     {
03762       prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]);
03763       return;
03764     }
03765 #else
03766   prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]);
03767   prodp += 1, vp += 1, vsize -= 1;
03768 #endif
03769 
03770 #if HAVE_NATIVE_mpn_addmul_2
03771   while (vsize >= 2)
03772     {
03773       prodp[usize + 1] = mpn_addmul_2 (prodp, up, usize, vp[0], vp[1]);
03774       prodp += 2, vp += 2, vsize -= 2;
03775     }
03776   if (vsize != 0)
03777     prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]);
03778 #else
03779   /* For each iteration in the loop, multiply U with one limb from V, and
03780      add the result to PROD.  */
03781   while (vsize != 0)
03782     {
03783       prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]);
03784       prodp += 1, vp += 1, vsize -= 1;
03785     }
03786 #endif
03787 }
03788 
03789 
03790 /* mpn_sqr_basecase -- Internal routine to square two natural numbers
03791    of length m and n. */
03792 
03793 
03794 #define SQR_KARATSUBA_THRESHOLD KARATSUBA_SQR_THRESHOLD 
03795 
03796 void
03797 mpn_sqr_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t n)
03798 {
03799   mp_size_t i;
03800 
03801   ASSERT (n >= 1);
03802   ASSERT (! MPN_OVERLAP_P (prodp, 2*n, up, n));
03803 
03804   {
03805     /* N.B.!  We need the superfluous indirection through argh to work around
03806        a reloader bug in GCC 2.7.*.  */
03807 #if GMP_NAIL_BITS == 0
03808     mp_limb_t ul, argh;
03809     ul = up[0];
03810     umul_ppmm (argh, prodp[0], ul, ul);
03811     prodp[1] = argh;
03812 #else
03813     mp_limb_t ul, lpl;
03814     ul = up[0];
03815     umul_ppmm (prodp[1], lpl, ul, ul << GMP_NAIL_BITS);
03816     prodp[0] = lpl >> GMP_NAIL_BITS;
03817 #endif
03818   }
03819   if (n > 1)
03820     {
03821       mp_limb_t tarr[2 * SQR_KARATSUBA_THRESHOLD];
03822       mp_ptr tp = tarr;
03823       mp_limb_t cy;
03824 
03825       /* must fit 2*n limbs in tarr */
03826       ASSERT (n <= SQR_KARATSUBA_THRESHOLD);
03827 
03828       cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
03829       tp[n - 1] = cy;
03830       for (i = 2; i < n; i++)
03831        {
03832          mp_limb_t cy;
03833          cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
03834          tp[n + i - 2] = cy;
03835        }
03836 #if HAVE_NATIVE_mpn_sqr_diagonal
03837       mpn_sqr_diagonal (prodp + 2, up + 1, n - 1);
03838 #else
03839       for (i = 1; i < n; i++)
03840        {
03841 #if GMP_NAIL_BITS == 0
03842          mp_limb_t ul;
03843          ul = up[i];
03844          umul_ppmm (prodp[2 * i + 1], prodp[2 * i], ul, ul);
03845 #else
03846          mp_limb_t ul, lpl;
03847          ul = up[i];
03848          umul_ppmm (prodp[2 * i + 1], lpl, ul, ul << GMP_NAIL_BITS);
03849          prodp[2 * i] = lpl >> GMP_NAIL_BITS;
03850 #endif
03851        }
03852 #endif
03853       {
03854        mp_limb_t cy;
03855        cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
03856        cy += mpn_add_n (prodp + 1, prodp + 1, tp, 2 * n - 2);
03857        prodp[2 * n - 1] += cy;
03858       }
03859     }
03860 }
03861 
03862 #if 0
03863 
03864 void
03865 #if __STDC__
03866 mpn_sqr_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t n)
03867 #else
03868 mpn_sqr_basecase (prodp, up, n)
03869      mp_ptr prodp;
03870      mp_srcptr up;
03871      mp_size_t n;
03872 #endif
03873 {
03874   mp_size_t i;
03875 
03876   {
03877     /* N.B.!  We need the superfluous indirection through argh to work around
03878        a reloader bug in GCC 2.7.*.  */
03879     mp_limb_t x;
03880     mp_limb_t argh;
03881     x = up[0];
03882     umul_ppmm (argh, prodp[0], x, x);
03883     prodp[1] = argh;
03884   }
03885   if (n > 1)
03886     {
03887       mp_limb_t tarr[2 * KARATSUBA_SQR_THRESHOLD];
03888       mp_ptr tp = tarr;
03889       mp_limb_t cy;
03890 
03891       /* must fit 2*n limbs in tarr */
03892       ASSERT (n <= KARATSUBA_SQR_THRESHOLD);
03893 
03894       cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
03895       tp[n - 1] = cy;
03896       for (i = 2; i < n; i++)
03897        {
03898          mp_limb_t cy;
03899          cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
03900          tp[n + i - 2] = cy;
03901        }
03902       for (i = 1; i < n; i++)
03903        {
03904          mp_limb_t x;
03905          x = up[i];
03906          umul_ppmm (prodp[2 * i + 1], prodp[2 * i], x, x);
03907        }
03908       {
03909        mp_limb_t cy;
03910        cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
03911        cy += mpn_add_n (prodp + 1, prodp + 1, tp, 2 * n - 2);
03912        prodp[2 * n - 1] += cy;
03913       }
03914     }
03915 }
03916 
03917 #endif
03918 
03919 /* mpn_mul_1 -- Multiply a limb vector with a single limb and
03920    store the product in a second limb vector. */
03921 
03922 mp_limb_t
03923 mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_size_t s1_size, mp_limb_t s2_limb)
03924 {
03925   register mp_limb_t cy_limb;
03926   register mp_size_t j;
03927   register mp_limb_t prod_high, prod_low;
03928 
03929   SCHEME_BIGNUM_USE_FUEL(s1_size);
03930 
03931   /* The loop counter and index J goes from -S1_SIZE to -1.  This way
03932      the loop becomes faster.  */
03933   j = -s1_size;
03934 
03935   /* Offset the base pointers to compensate for the negative indices.  */
03936   s1_ptr -= j;
03937   res_ptr -= j;
03938 
03939   cy_limb = 0;
03940   do
03941     {
03942       umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
03943 
03944       prod_low += cy_limb;
03945       cy_limb = (prod_low < cy_limb) + prod_high;
03946 
03947       res_ptr[j] = prod_low;
03948     }
03949   while (++j != 0);
03950 
03951   return cy_limb;
03952 }
03953 
03954 /* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
03955    by S2_LIMB, add the S1_SIZE least significant limbs of the product to the
03956    limb vector pointed to by RES_PTR.  Return the most significant limb of
03957    the product, adjusted for carry-out from the addition. */
03958 
03959 mp_limb_t
03960 mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
03961 {
03962   mp_limb_t ul, cl, hpl, lpl, rl;
03963  
03964   SCHEME_BIGNUM_USE_FUEL(n);
03965  
03966   cl = 0;
03967   do
03968     {
03969       ul = *up++;
03970       umul_ppmm (hpl, lpl, ul, vl);
03971                                                                                 
03972       lpl += cl;
03973       cl = (lpl < cl) + hpl;
03974                                                                                 
03975       rl = *rp;
03976       lpl = rl + lpl;
03977       cl += lpl < rl;
03978       *rp++ = lpl;
03979     }
03980   while (--n != 0);
03981                                                                                 
03982   return cl;
03983 }
03984 
03985 /* mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
03986    by S2_LIMB, subtract the S1_SIZE least significant limbs of the product
03987    from the limb vector pointed to by RES_PTR.  Return the most significant
03988    limb of the product, adjusted for carry-out from the subtraction.
03989  */
03990 
03991 mp_limb_t
03992 mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_size_t s1_size, mp_limb_t s2_limb)
03993 {
03994   register mp_limb_t cy_limb;
03995   register mp_size_t j;
03996   register mp_limb_t prod_high, prod_low;
03997   register mp_limb_t x;
03998 
03999   SCHEME_BIGNUM_USE_FUEL(s1_size);
04000 
04001   /* The loop counter and index J goes from -SIZE to -1.  This way
04002      the loop becomes faster.  */
04003   j = -s1_size;
04004 
04005   /* Offset the base pointers to compensate for the negative indices.  */
04006   res_ptr -= j;
04007   s1_ptr -= j;
04008 
04009   cy_limb = 0;
04010   do
04011     {
04012       umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
04013 
04014       prod_low += cy_limb;
04015       cy_limb = (prod_low < cy_limb) + prod_high;
04016 
04017       x = res_ptr[j];
04018       prod_low = x - prod_low;
04019       cy_limb += (prod_low > x);
04020       res_ptr[j] = prod_low;
04021     }
04022   while (++j != 0);
04023 
04024   return cy_limb;
04025 }
04026 
04027 /* mpn_divexact_by3 -- mpn division by 3, expecting no remainder. */
04028 
04029 
04030 /* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB.
04031    0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */
04032 #define INVERSE_3      ((MP_LIMB_T_MAX / 3) * 2 + 1)
04033 
04034 
04035 /* The "c += ..."s are adding the high limb of 3*l to c.  That high limb
04036    will be 0, 1 or 2.  Doing two separate "+="s seems to turn out better
04037    code on gcc (as of 2.95.2 at least).
04038 
04039    When a subtraction of a 0,1,2 carry value causes a borrow, that leaves a
04040    limb value of either 0xFF...FF or 0xFF...FE and the multiply by INVERSE_3
04041    gives 0x55...55 or 0xAA...AA respectively, producing a further borrow of
04042    only 0 or 1 respectively.  Hence the carry out of each stage and for the
04043    return value is always only 0, 1 or 2.  */
04044 
04045 mp_limb_t
04046 #if __STDC__
04047 mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t c)
04048 #else
04049 mpn_divexact_by3c (dst, src, size, c)
04050      mp_ptr    dst;
04051      mp_srcptr src;
04052      mp_size_t size;
04053      mp_limb_t c;
04054 #endif
04055 {
04056   mp_size_t  i;
04057 
04058   SCHEME_BIGNUM_USE_FUEL(size);
04059 
04060   ASSERT (size >= 1);
04061 
04062   i = 0;
04063   do
04064     {
04065       mp_limb_t  l, s;
04066 
04067       s = src[i];
04068       l = s - c;
04069       c = (l > s);
04070 
04071       l *= INVERSE_3;
04072       dst[i] = l;
04073 
04074       c += (l > MP_LIMB_T_MAX/3);
04075       c += (l > (MP_LIMB_T_MAX/3)*2);
04076     }
04077   while (++i < size);
04078 
04079   return c;
04080 }
04081 
04082 /* mpn_mul -- Multiply two natural numbers. */
04083 
04084 /* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
04085    (pointed to by VP, with VN limbs), and store the result at PRODP.  The
04086    result is UN + VN limbs.  Return the most significant limb of the result.
04087 
04088    NOTE: The space pointed to by PRODP is overwritten before finished with U
04089    and V, so overlap is an error.
04090 
04091    Argument constraints:
04092    1. UN >= VN.
04093    2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
04094       the multiplier and the multiplicand.  */
04095 
04096 void
04097 #if __STDC__
04098 mpn_sqr_n (mp_ptr prodp,
04099          mp_srcptr up, mp_size_t un)
04100 #else
04101 mpn_sqr_n (prodp, up, un)
04102      mp_ptr prodp;
04103      mp_srcptr up;
04104      mp_size_t un;
04105 #endif
04106 {
04107   if (un < KARATSUBA_SQR_THRESHOLD)
04108     { /* plain schoolbook multiplication */
04109       if (un == 0)
04110        return;
04111       mpn_sqr_basecase (prodp, up, un);
04112     }
04113   else if (un < TOOM3_SQR_THRESHOLD)
04114     { /* karatsuba multiplication */
04115       mp_ptr tspace;
04116       TMP_DECL (marker);
04117       TMP_MARK (marker);
04118       tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB);
04119       mpn_kara_sqr_n (prodp, up, un, tspace);
04120       TMP_FREE (marker);
04121     }
04122 #if WANT_FFT || TUNE_PROGRAM_BUILD
04123   else if (un < FFT_SQR_THRESHOLD)
04124 #else
04125   else
04126 #endif
04127     { /* toom3 multiplication */
04128       mp_ptr tspace;
04129       TMP_DECL (marker);
04130       TMP_MARK (marker);
04131       tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB);
04132       mpn_toom3_sqr_n (prodp, up, un, tspace);
04133       TMP_FREE (marker);
04134     }
04135 #if WANT_FFT || TUNE_PROGRAM_BUILD
04136   else
04137     {
04138       /* schoenhage multiplication */
04139       mpn_mul_fft_full (prodp, up, un, up, un);
04140     }
04141 #endif
04142 }
04143 
04144 mp_limb_t
04145 #if __STDC__
04146 mpn_mul (mp_ptr prodp,
04147         mp_srcptr up, mp_size_t un,
04148         mp_srcptr vp, mp_size_t vn)
04149 #else
04150 mpn_mul (prodp, up, un, vp, vn)
04151      mp_ptr prodp;
04152      mp_srcptr up;
04153      mp_size_t un;
04154      mp_srcptr vp;
04155      mp_size_t vn;
04156 #endif
04157 {
04158   mp_size_t l;
04159   mp_limb_t c;
04160 
04161   if (up == vp && un == vn)
04162     {
04163       mpn_sqr_n (prodp, up, un);
04164       return prodp[2 * un - 1];
04165     }
04166 
04167   if (vn < KARATSUBA_MUL_THRESHOLD)
04168     { /* long multiplication */
04169       mpn_mul_basecase (prodp, up, un, vp, vn);
04170       return prodp[un + vn - 1];
04171     }
04172 
04173   mpn_mul_n (prodp, up, vp, vn);
04174   if (un != vn)
04175     { mp_limb_t t;
04176       mp_ptr ws;
04177       TMP_DECL (marker);
04178       TMP_MARK (marker);
04179 
04180       prodp += vn;
04181       l = vn;
04182       up += vn;
04183       un -= vn;
04184 
04185       if (un < vn) 
04186        {
04187          /* Swap u's and v's. */
04188           MPN_SRCPTR_SWAP (up,un, vp,vn);
04189        }
04190 
04191       ws = (mp_ptr) TMP_ALLOC (((vn >= KARATSUBA_MUL_THRESHOLD ? vn : un) + vn)
04192                             * BYTES_PER_MP_LIMB);
04193 
04194       t = 0;
04195       while (vn >= KARATSUBA_MUL_THRESHOLD)
04196        {
04197          mpn_mul_n (ws, up, vp, vn);
04198          if (l <= 2*vn) 
04199            {
04200              t += mpn_add_n (prodp, prodp, ws, l);
04201              if (l != 2*vn)
04202               {
04203                 t = mpn_add_1 (prodp + l, ws + l, 2*vn - l, t);
04204                 l = 2*vn;
04205               }
04206            }
04207          else
04208            {
04209              c = mpn_add_n (prodp, prodp, ws, 2*vn);
04210              t += mpn_add_1 (prodp + 2*vn, prodp + 2*vn, l - 2*vn, c);
04211            }
04212          prodp += vn;
04213          l -= vn;
04214          up += vn;
04215          un -= vn;
04216          if (un < vn) 
04217            {
04218              /* Swap u's and v's. */
04219               MPN_SRCPTR_SWAP (up,un, vp,vn);
04220            }
04221        }
04222 
04223       if (vn)
04224        {
04225          mpn_mul_basecase (ws, up, un, vp, vn);
04226          if (l <= un + vn) 
04227            {
04228              t += mpn_add_n (prodp, prodp, ws, l);
04229              if (l != un + vn)
04230               t = mpn_add_1 (prodp + l, ws + l, un + vn - l, t);
04231            } 
04232          else
04233            {
04234              c = mpn_add_n (prodp, prodp, ws, un + vn);
04235              t += mpn_add_1 (prodp + un + vn, prodp + un + vn, l - un - vn, c);
04236            }
04237        }
04238 
04239     TMP_FREE (marker);
04240   }
04241   return prodp[un + vn - 1];
04242 }
04243 
04244 /* mpn_divrem -- Divide natural numbers, producing both remainder and
04245    quotient.  This is now just a middle layer for calling the new
04246    internal mpn_tdiv_qr. */
04247 
04248 mp_limb_t
04249 #if __STDC__
04250 mpn_divrem (mp_ptr qp, mp_size_t qxn,
04251            mp_ptr np, mp_size_t nn,
04252            mp_srcptr dp, mp_size_t dn)
04253 #else
04254 mpn_divrem (qp, qxn, np, nn, dp, dn)
04255      mp_ptr qp;
04256      mp_size_t qxn;
04257      mp_ptr np;
04258      mp_size_t nn;
04259      mp_srcptr dp;
04260      mp_size_t dn;
04261 #endif
04262 {
04263   SCHEME_BIGNUM_USE_FUEL(dn + nn);
04264 
04265   if (dn == 1)
04266     {
04267       mp_limb_t ret;
04268       mp_ptr q2p;
04269       mp_size_t qn;
04270       TMP_DECL (marker);
04271 
04272       TMP_MARK (marker);
04273       q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
04274 
04275       np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
04276       qn = nn + qxn - 1;
04277       MPN_COPY (qp, q2p, qn);
04278       ret = q2p[qn];
04279 
04280       TMP_FREE (marker);
04281       return ret;
04282     }
04283   else if (dn == 2)
04284     {
04285       return mpn_divrem_2 (qp, qxn, np, nn, dp);
04286     }
04287   else
04288     {
04289       mp_ptr rp, q2p;
04290       mp_limb_t qhl;
04291       mp_size_t qn;
04292       TMP_DECL (marker);
04293 
04294       TMP_MARK (marker);
04295       if (qxn != 0)
04296        {
04297          mp_ptr n2p;
04298          n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
04299          MPN_ZERO (n2p, qxn);
04300          MPN_COPY (n2p + qxn, np, nn);
04301          q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);
04302          rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
04303          mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);
04304          MPN_COPY (np, rp, dn);
04305          qn = nn - dn + qxn;
04306          MPN_COPY (qp, q2p, qn);
04307          qhl = q2p[qn];
04308        }
04309       else
04310        {
04311          q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);
04312          rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
04313          mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);
04314          MPN_COPY (np, rp, dn);    /* overwrite np area with remainder */
04315          qn = nn - dn;
04316          MPN_COPY (qp, q2p, qn);
04317          qhl = q2p[qn];
04318        }
04319       TMP_FREE (marker);
04320       return qhl;
04321     }
04322 }
04323 
04324 /************************************************************************/
04325 /* GCD: */
04326 
04327 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor)     \
04328   mpn_mod_1 (src, size, divisor)
04329 
04330 
04331 
04332 /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
04333    meaning the quotient size where that should happen, the quotient size
04334    being how many udiv divisions will be done.
04335 
04336    The default is to use preinv always, CPUs where this doesn't suit have
04337    tuned thresholds.  Note in particular that preinv should certainly be
04338    used if that's the only division available (USE_PREINV_ALWAYS).  */
04339 
04340 #ifndef MOD_1_NORM_THRESHOLD
04341 #define MOD_1_NORM_THRESHOLD  0
04342 #endif
04343 #ifndef MOD_1_UNNORM_THRESHOLD
04344 #define MOD_1_UNNORM_THRESHOLD  0
04345 #endif
04346 
04347 
04348 /* The comments in mpn/generic/divrem_1.c apply here too.
04349 
04350    As noted in the algorithms section of the manual, the shifts in the loop
04351    for the unnorm case can be avoided by calculating r = a%(d*2^n), followed
04352    by a final (r*2^n)%(d*2^n).  In fact if it happens that a%(d*2^n) can
04353    skip a division where (a*2^n)%(d*2^n) can't then there's the same number
04354    of divide steps, though how often that happens depends on the assumed
04355    distributions of dividend and divisor.  In any case this idea is left to
04356    CPU specific implementations to consider.  */
04357 
04358 mp_limb_t
04359 mpn_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d)
04360 {
04361   mp_size_t  i;
04362   mp_limb_t  n1, n0, r;
04363   mp_limb_t  dummy;
04364 
04365   ASSERT (un >= 0);
04366   ASSERT (d != 0);
04367 
04368   /* Botch: Should this be handled at all?  Rely on callers?
04369      But note un==0 is currently required by mpz/fdiv_r_ui.c and possibly
04370      other places.  */
04371   if (un == 0)
04372     return 0;
04373 
04374   d <<= GMP_NAIL_BITS;
04375 
04376   if ((d & GMP_LIMB_HIGHBIT) != 0)
04377     {
04378       /* High limb is initial remainder, possibly with one subtract of
04379         d to get r<d.  */
04380       r = up[un - 1] << GMP_NAIL_BITS;
04381       if (r >= d)
04382        r -= d;
04383       r >>= GMP_NAIL_BITS;
04384       un--;
04385       if (un == 0)
04386        return r;
04387 
04388       if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD))
04389        {
04390        plain:
04391          for (i = un - 1; i >= 0; i--)
04392            {
04393              n0 = up[i] << GMP_NAIL_BITS;
04394              udiv_qrnnd (dummy, r, r, n0, d);
04395              r >>= GMP_NAIL_BITS;
04396            }
04397          return r;
04398        }
04399       else
04400        {
04401          mp_limb_t  inv;
04402          invert_limb (inv, d);
04403          for (i = un - 1; i >= 0; i--)
04404            {
04405              n0 = up[i] << GMP_NAIL_BITS;
04406              udiv_qrnnd_preinv (dummy, r, r, n0, d, inv);
04407              r >>= GMP_NAIL_BITS;
04408            }
04409          return r;
04410        }
04411     }
04412   else
04413     {
04414       int norm;
04415 
04416       /* Skip a division if high < divisor.  Having the test here before
04417         normalizing will still skip as often as possible.  */
04418       r = up[un - 1] << GMP_NAIL_BITS;
04419       if (r < d)
04420        {
04421          r >>= GMP_NAIL_BITS;
04422          un--;
04423          if (un == 0)
04424            return r;
04425        }
04426       else
04427        r = 0;
04428 
04429       /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple
04430         code above. */
04431       if (! UDIV_NEEDS_NORMALIZATION
04432          && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
04433        goto plain;
04434 
04435       count_leading_zeros (norm, d);
04436       d <<= norm;
04437 
04438       n1 = up[un - 1] << GMP_NAIL_BITS;
04439       r = (r << norm) | (n1 >> (GMP_LIMB_BITS - norm));
04440 
04441       if (UDIV_NEEDS_NORMALIZATION
04442          && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
04443        {
04444          for (i = un - 2; i >= 0; i--)
04445            {
04446              n0 = up[i] << GMP_NAIL_BITS;
04447              udiv_qrnnd (dummy, r, r,
04448                        (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
04449                        d);
04450              r >>= GMP_NAIL_BITS;
04451              n1 = n0;
04452            }
04453          udiv_qrnnd (dummy, r, r, n1 << norm, d);
04454          r >>= GMP_NAIL_BITS;
04455          return r >> norm;
04456        }
04457       else
04458        {
04459          mp_limb_t inv;
04460          invert_limb (inv, d);
04461 
04462          for (i = un - 2; i >= 0; i--)
04463            {
04464              n0 = up[i] << GMP_NAIL_BITS;
04465              udiv_qrnnd_preinv (dummy, r, r,
04466                              (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
04467                              d, inv);
04468              r >>= GMP_NAIL_BITS;
04469              n1 = n0;
04470            }
04471          udiv_qrnnd_preinv (dummy, r, r, n1 << norm, d, inv);
04472          r >>= GMP_NAIL_BITS;
04473          return r >> norm;
04474        }
04475     }
04476 }
04477 
04478 /* Does not work for U == 0 or V == 0.  It would be tough to make it work for
04479    V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t.
04480 
04481    The threshold for doing u%v when size==1 will vary by CPU according to
04482    the speed of a division and the code generated for the main loop.  Any
04483    tuning for this is left to a CPU specific implementation.  */
04484 
04485 mp_limb_t
04486 mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
04487 {
04488   mp_limb_t      ulimb;
04489   unsigned long  zero_bits, u_low_zero_bits;
04490 
04491   ASSERT (size >= 1);
04492   ASSERT (vlimb != 0);
04493   /* ASSERT_MPN_NONZERO_P (up, size); */
04494 
04495   ulimb = up[0];
04496 
04497   /* Need vlimb odd for modexact, want it odd to get common zeros. */
04498   count_trailing_zeros (zero_bits, vlimb);
04499   vlimb >>= zero_bits;
04500 
04501   if (size > 1)
04502     {
04503       /* Must get common zeros before the mod reduction.  If ulimb==0 then
04504         vlimb already gives the common zeros.  */
04505       if (ulimb != 0)
04506        {
04507          count_trailing_zeros (u_low_zero_bits, ulimb);
04508          zero_bits = MIN (zero_bits, u_low_zero_bits);
04509        }
04510 
04511       ulimb = MPN_MOD_OR_MODEXACT_1_ODD (up, size, vlimb);
04512       if (ulimb == 0)
04513        goto done;
04514 
04515       goto strip_u_maybe;
04516     }
04517 
04518   /* size==1, so up[0]!=0 */
04519   count_trailing_zeros (u_low_zero_bits, ulimb);
04520   ulimb >>= u_low_zero_bits;
04521   zero_bits = MIN (zero_bits, u_low_zero_bits);
04522 
04523   /* make u bigger */
04524   if (vlimb > ulimb)
04525     MP_LIMB_T_SWAP (ulimb, vlimb);
04526 
04527   /* if u is much bigger than v, reduce using a division rather than
04528      chipping away at it bit-by-bit */
04529   if ((ulimb >> 16) > vlimb)
04530     {
04531       ulimb %= vlimb;
04532       if (ulimb == 0)
04533        goto done;
04534       goto strip_u_maybe;
04535     }
04536 
04537   while (ulimb != vlimb)
04538     {
04539       ASSERT (ulimb & 1);
04540       ASSERT (vlimb & 1);
04541 
04542       if (ulimb > vlimb)
04543        {
04544          ulimb -= vlimb;
04545          do
04546            {
04547              ulimb >>= 1;
04548              ASSERT (ulimb != 0);
04549            strip_u_maybe:
04550              ;
04551            }
04552          while ((ulimb & 1) == 0);
04553        }
04554       else /*  vlimb > ulimb.  */
04555        {
04556          vlimb -= ulimb;
04557          do
04558            {
04559              vlimb >>= 1;
04560              ASSERT (vlimb != 0);
04561            }
04562          while ((vlimb & 1) == 0);
04563        }
04564     }
04565 
04566  done:
04567   return vlimb << zero_bits;
04568 }
04569 
04570 
04571 /* Integer greatest common divisor of two unsigned integers, using
04572    the accelerated algorithm (see reference below).
04573 
04574    mp_size_t mpn_gcd (up, usize, vp, vsize).
04575 
04576    Preconditions [U = (up, usize) and V = (vp, vsize)]:
04577 
04578    1.  V is odd.
04579    2.  numbits(U) >= numbits(V).
04580 
04581    Both U and V are destroyed by the operation.  The result is left at vp,
04582    and its size is returned.
04583 
04584    Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
04585 
04586    Funding for this work has been partially provided by Conselho Nacional
04587    de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
04588    301314194-2, and was done while I was a visiting reseacher in the Instituto
04589    de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
04590 
04591    Refer to
04592        K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
04593        Mathematical Software, v. 21 (March), 1995, pp. 111-122.  */
04594 
04595 /* If MIN (usize, vsize) >= GCD_ACCEL_THRESHOLD, then the accelerated
04596    algorithm is used, otherwise the binary algorithm is used.  This may be
04597    adjusted for different architectures.  */
04598 #ifndef GCD_ACCEL_THRESHOLD
04599 #define GCD_ACCEL_THRESHOLD 5
04600 #endif
04601 
04602 /* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
04603    algorithm reduces using the bmod operation.  Otherwise, the k-ary reduction
04604    is used.  0 <= BMOD_THRESHOLD < GMP_NUMB_BITS.  */
04605 enum
04606   {
04607     BMOD_THRESHOLD = GMP_NUMB_BITS/2
04608   };
04609 
04610 
04611 /* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
04612    Both U and V must be odd.  */
04613 static inline mp_size_t
04614 gcd_2 (mp_ptr vp, mp_srcptr up)
04615 {
04616   mp_limb_t u0, u1, v0, v1;
04617   mp_size_t vsize;
04618 
04619   u0 = up[0];
04620   u1 = up[1];
04621   v0 = vp[0];
04622   v1 = vp[1];
04623 
04624   while (u1 != v1 && u0 != v0)
04625     {
04626       unsigned long int r;
04627       if (u1 > v1)
04628        {
04629          u1 -= v1 + (u0 < v0);
04630          u0 = (u0 - v0) & GMP_NUMB_MASK;
04631          count_trailing_zeros (r, u0);
04632          u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
04633          u1 >>= r;
04634        }
04635       else  /* u1 < v1.  */
04636        {
04637          v1 -= u1 + (v0 < u0);
04638          v0 = (v0 - u0) & GMP_NUMB_MASK;
04639          count_trailing_zeros (r, v0);
04640          v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
04641          v1 >>= r;
04642        }
04643     }
04644 
04645   vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
04646 
04647   /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
04648   if (u1 == v1 && u0 == v0)
04649     return vsize;
04650 
04651   v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
04652   vp[0] = mpn_gcd_1 (vp, vsize, v0);
04653 
04654   return 1;
04655 }
04656 
04657 /* The function find_a finds 0 < N < 2^GMP_NUMB_BITS such that there exists
04658    0 < |D| < 2^GMP_NUMB_BITS, and N == D * C mod 2^(2*GMP_NUMB_BITS).
04659    In the reference article, D was computed along with N, but it is better to
04660    compute D separately as D <-- N / C mod 2^(GMP_NUMB_BITS + 1), treating
04661    the result as a twos' complement signed integer.
04662 
04663    Initialize N1 to C mod 2^(2*GMP_NUMB_BITS).  According to the reference
04664    article, N2 should be initialized to 2^(2*GMP_NUMB_BITS), but we use
04665    2^(2*GMP_NUMB_BITS) - N1 to start the calculations within double
04666    precision.  If N2 > N1 initially, the first iteration of the while loop
04667    will swap them.  In all other situations, N1 >= N2 is maintained.  */
04668 
04669 #if HAVE_NATIVE_mpn_gcd_finda
04670 #define find_a(cp)  mpn_gcd_finda (cp)
04671 
04672 #else
04673 static
04674 #if ! defined (__i386__)
04675 inline                      /* don't inline this for the x86 */
04676 #endif
04677 mp_limb_t
04678 find_a (mp_srcptr cp)
04679 {
04680   unsigned long int leading_zero_bits = 0;
04681 
04682   mp_limb_t n1_l = cp[0];   /* N1 == n1_h * 2^GMP_NUMB_BITS + n1_l.  */
04683   mp_limb_t n1_h = cp[1];
04684 
04685   mp_limb_t n2_l = (-n1_l & GMP_NUMB_MASK);      /* N2 == n2_h * 2^GMP_NUMB_BITS + n2_l.  */
04686   mp_limb_t n2_h = (~n1_h & GMP_NUMB_MASK);
04687 
04688   /* Main loop.  */
04689   while (n2_h != 0)         /* While N2 >= 2^GMP_NUMB_BITS.  */
04690     {
04691       /* N1 <-- N1 % N2.  */
04692       if (((GMP_NUMB_HIGHBIT >> leading_zero_bits) & n2_h) == 0)
04693        {
04694          unsigned long int i;
04695          count_leading_zeros (i, n2_h);
04696          i -= GMP_NAIL_BITS;
04697          i -= leading_zero_bits;
04698          leading_zero_bits += i;
04699          n2_h = ((n2_h << i) & GMP_NUMB_MASK) | (n2_l >> (GMP_NUMB_BITS - i));
04700          n2_l = (n2_l << i) & GMP_NUMB_MASK;
04701          do
04702            {
04703              if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
04704               {
04705                 n1_h -= n2_h + (n1_l < n2_l);
04706                 n1_l = (n1_l - n2_l) & GMP_NUMB_MASK;
04707               }
04708              n2_l = (n2_l >> 1) | ((n2_h << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK);
04709              n2_h >>= 1;
04710              i -= 1;
04711            }
04712          while (i != 0);
04713        }
04714       if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
04715        {
04716          n1_h -= n2_h + (n1_l < n2_l);
04717          n1_l = (n1_l - n2_l) & GMP_NUMB_MASK;
04718        }
04719 
04720       MP_LIMB_T_SWAP (n1_h, n2_h);
04721       MP_LIMB_T_SWAP (n1_l, n2_l);
04722     }
04723 
04724   return n2_l;
04725 }
04726 #endif
04727 
04728 
04729 mp_size_t
04730 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize)
04731 {
04732   mp_ptr orig_vp = vp;
04733   mp_size_t orig_vsize = vsize;
04734   int binary_gcd_ctr;              /* Number of times binary gcd will execute.  */
04735   TMP_DECL (marker);
04736 
04737   ASSERT (usize >= 1);
04738   ASSERT (vsize >= 1);
04739   ASSERT (usize >= vsize);
04740   ASSERT (vp[0] & 1);
04741   ASSERT (up[usize - 1] != 0);
04742   ASSERT (vp[vsize - 1] != 0);
04743 #if WANT_ASSERT
04744   if (usize == vsize)
04745     {
04746       int  uzeros, vzeros;
04747       count_leading_zeros (uzeros, up[usize - 1]);
04748       count_leading_zeros (vzeros, vp[vsize - 1]);
04749       ASSERT (uzeros <= vzeros);
04750     }
04751 #endif
04752   ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize));
04753   ASSERT (MPN_SAME_OR_SEPARATE2_P (gp, vsize, up, usize));
04754   ASSERT (MPN_SAME_OR_SEPARATE2_P (gp, vsize, vp, vsize));
04755 
04756   TMP_MARK (marker);
04757 
04758   /* Use accelerated algorithm if vsize is over GCD_ACCEL_THRESHOLD.
04759      Two EXTRA limbs for U and V are required for kary reduction.  */
04760   if (vsize >= GCD_ACCEL_THRESHOLD)
04761     {
04762       unsigned long int vbitsize, d;
04763       mp_ptr orig_up = up;
04764       mp_size_t orig_usize = usize;
04765       mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
04766 
04767       MPN_COPY (anchor_up, orig_up, usize);
04768       up = anchor_up;
04769 
04770       count_leading_zeros (d, up[usize - 1]);
04771       d -= GMP_NAIL_BITS;
04772       d = usize * GMP_NUMB_BITS - d;
04773       count_leading_zeros (vbitsize, vp[vsize - 1]);
04774       vbitsize -= GMP_NAIL_BITS;
04775       vbitsize = vsize * GMP_NUMB_BITS - vbitsize;
04776       ASSERT (d >= vbitsize);
04777       d = d - vbitsize + 1;
04778 
04779       /* Use bmod reduction to quickly discover whether V divides U.  */
04780       up[usize++] = 0;                           /* Insert leading zero.  */
04781       mpn_bdivmod (up, up, usize, vp, vsize, d);
04782 
04783       /* Now skip U/V mod 2^d and any low zero limbs.  */
04784       d /= GMP_NUMB_BITS, up += d, usize -= d;
04785       while (usize != 0 && up[0] == 0)
04786        up++, usize--;
04787 
04788       if (usize == 0)                            /* GCD == ORIG_V.  */
04789        goto done;
04790 
04791       vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
04792       MPN_COPY (vp, orig_vp, vsize);
04793 
04794       do                                  /* Main loop.  */
04795        {
04796          /* mpn_com_n can't be used here because anchor_up and up may
04797             partially overlap */
04798          if ((up[usize - 1] & GMP_NUMB_HIGHBIT) != 0)  /* U < 0; take twos' compl. */
04799            {
04800              mp_size_t i;
04801              anchor_up[0] = -up[0] & GMP_NUMB_MASK;
04802              for (i = 1; i < usize; i++)
04803               anchor_up[i] = (~up[i] & GMP_NUMB_MASK);
04804              up = anchor_up;
04805            }
04806 
04807          MPN_NORMALIZE_NOT_ZERO (up, usize);
04808 
04809          if ((up[0] & 1) == 0)                   /* Result even; remove twos. */
04810            {
04811              unsigned int r;
04812              count_trailing_zeros (r, up[0]);
04813              mpn_rshift (anchor_up, up, usize, r);
04814              usize -= (anchor_up[usize - 1] == 0);
04815            }
04816          else if (anchor_up != up)
04817            MPN_COPY_INCR (anchor_up, up, usize);
04818 
04819          MPN_PTR_SWAP (anchor_up,usize, vp,vsize);
04820          up = anchor_up;
04821 
04822          if (vsize <= 2)           /* Kary can't handle < 2 limbs and  */
04823            break;                  /* isn't efficient for == 2 limbs.  */
04824 
04825          d = vbitsize;
04826          count_leading_zeros (vbitsize, vp[vsize - 1]);
04827          vbitsize -= GMP_NAIL_BITS;
04828          vbitsize = vsize * GMP_NUMB_BITS - vbitsize;
04829          d = d - vbitsize + 1;
04830 
04831          if (d > BMOD_THRESHOLD)   /* Bmod reduction.  */
04832            {
04833              up[usize++] = 0;
04834              mpn_bdivmod (up, up, usize, vp, vsize, d);
04835              d /= GMP_NUMB_BITS, up += d, usize -= d;
04836            }
04837          else                      /* Kary reduction.  */
04838            {
04839              mp_limb_t bp[2], cp[2];
04840 
04841              /* C <-- V/U mod 2^(2*GMP_NUMB_BITS).  */
04842              {
04843               mp_limb_t u_inv, hi, lo;
04844               modlimb_invert (u_inv, up[0]);
04845               cp[0] = (vp[0] * u_inv) & GMP_NUMB_MASK;
04846               umul_ppmm (hi, lo, cp[0], up[0] << GMP_NAIL_BITS);
04847               lo >>= GMP_NAIL_BITS;
04848               cp[1] = (vp[1] - hi - cp[0] * up[1]) * u_inv & GMP_NUMB_MASK;
04849              }
04850 
04851              /* U <-- find_a (C)  *  U.  */
04852              up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
04853              usize++;
04854 
04855              /* B <-- A/C == U/V mod 2^(GMP_NUMB_BITS + 1).
04856                 bp[0] <-- U/V mod 2^GMP_NUMB_BITS and
04857                 bp[1] <-- ( (U - bp[0] * V)/2^GMP_NUMB_BITS ) / V mod 2
04858 
04859                 Like V/U above, but simplified because only the low bit of
04860                 bp[1] is wanted. */
04861              {
04862               mp_limb_t  v_inv, hi, lo;
04863               modlimb_invert (v_inv, vp[0]);
04864               bp[0] = (up[0] * v_inv) & GMP_NUMB_MASK;
04865               umul_ppmm (hi, lo, bp[0], vp[0] << GMP_NAIL_BITS);
04866               lo >>= GMP_NAIL_BITS;
04867               bp[1] = (up[1] + hi + (bp[0] & vp[1])) & 1;
04868              }
04869 
04870              up[usize++] = 0;
04871              if (bp[1] != 0)       /* B < 0: U <-- U + (-B)  * V.  */
04872               {
04873                  mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0] & GMP_NUMB_MASK);
04874                  mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
04875               }
04876              else           /* B >= 0:  U <-- U - B * V.  */
04877               {
04878                 mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
04879                 mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
04880               }
04881 
04882              up += 2, usize -= 2;  /* At least two low limbs are zero.  */
04883            }
04884 
04885          /* Must remove low zero limbs before complementing.  */
04886          while (usize != 0 && up[0] == 0)
04887            up++, usize--;
04888        }
04889       while (usize != 0);
04890 
04891       /* Compute GCD (ORIG_V, GCD (ORIG_U, V)).  Binary will execute twice.  */
04892       up = orig_up, usize = orig_usize;
04893       binary_gcd_ctr = 2;
04894     }
04895   else
04896     binary_gcd_ctr = 1;
04897 
04898   /* Finish up with the binary algorithm.  Executes once or twice.  */
04899   for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
04900     {
04901       if (usize > 2)        /* First make U close to V in size.  */
04902        {
04903          unsigned long int vbitsize, d;
04904          count_leading_zeros (d, up[usize - 1]);
04905          d -= GMP_NAIL_BITS;
04906          d = usize * GMP_NUMB_BITS - d;
04907          count_leading_zeros (vbitsize, vp[vsize - 1]);
04908          vbitsize -= GMP_NAIL_BITS;
04909          vbitsize = vsize * GMP_NUMB_BITS - vbitsize;
04910          d = d - vbitsize - 1;
04911          if (d != -(unsigned long int)1 && d > 2)
04912            {
04913              mpn_bdivmod (up, up, usize, vp, vsize, d);  /* Result > 0.  */
04914              d /= (unsigned long int)GMP_NUMB_BITS, up += d, usize -= d;
04915            }
04916        }
04917 
04918       /* Start binary GCD.  */
04919       do
04920        {
04921          mp_size_t zeros;
04922 
04923          /* Make sure U is odd.  */
04924          MPN_NORMALIZE (up, usize);
04925          while (up[0] == 0)
04926            up += 1, usize -= 1;
04927          if ((up[0] & 1) == 0)
04928            {
04929              unsigned int r;
04930              count_trailing_zeros (r, up[0]);
04931              mpn_rshift (up, up, usize, r);
04932              usize -= (up[usize - 1] == 0);
04933            }
04934 
04935          /* Keep usize >= vsize.  */
04936          if (usize < vsize)
04937            MPN_PTR_SWAP (up, usize, vp, vsize);
04938 
04939          if (usize <= 2)                         /* Double precision. */
04940            {
04941              if (vsize == 1)
04942               vp[0] = mpn_gcd_1 (up, usize, vp[0]);
04943              else
04944               vsize = gcd_2 (vp, up);
04945              break;                              /* Binary GCD done.  */
04946            }
04947 
04948          /* Count number of low zero limbs of U - V.  */
04949          for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
04950            continue;
04951 
04952          /* If U < V, swap U and V; in any case, subtract V from U.  */
04953          if (zeros == vsize)                            /* Subtract done.  */
04954            up += zeros, usize -= zeros;
04955          else if (usize == vsize)
04956            {
04957              mp_size_t size = vsize;
04958              do
04959               size--;
04960              while (up[size] == vp[size]);
04961              if (up[size] < vp[size])                   /* usize == vsize.  */
04962               MP_PTR_SWAP (up, vp);
04963              up += zeros, usize = size + 1 - zeros;
04964              mpn_sub_n (up, up, vp + zeros, usize);
04965            }
04966          else
04967            {
04968              mp_size_t size = vsize - zeros;
04969              up += zeros, usize -= zeros;
04970              if (mpn_sub_n (up, up, vp + zeros, size))
04971               {
04972                 while (up[size] == 0)                   /* Propagate borrow. */
04973                   up[size++] = -(mp_limb_t)1;
04974                 up[size] -= 1;
04975               }
04976            }
04977        }
04978       while (usize);                             /* End binary GCD.  */
04979     }
04980 
04981 done:
04982   if (vp != gp)
04983     MPN_COPY_INCR (gp, vp, vsize);
04984   TMP_FREE (marker);
04985   return vsize;
04986 }
04987 
04988 
04989 /* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d).
04990 
04991    Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and
04992    returns the high d%BITS_PER_MP_LIMB bits of Q as the result.
04993 
04994    Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up.  Since the
04995    low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows
04996    the limb vectors at qp to overwrite the low limbs at up, provided qp <= up.
04997 
04998    Preconditions:
04999    1.  V is odd.
05000    2.  usize * BITS_PER_MP_LIMB >= d.
05001    3.  If Q and U overlap, qp <= up.
05002 
05003    Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
05004 
05005    Funding for this work has been partially provided by Conselho Nacional
05006    de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
05007    301314194-2, and was done while I was a visiting reseacher in the Instituto
05008    de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
05009 
05010    References:
05011        T. Jebelean, An algorithm for exact division, Journal of Symbolic
05012        Computation, v. 15, 1993, pp. 169-180.
05013 
05014        K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
05015        Mathematical Software, v. 21 (March), 1995, pp. 111-122.  */
05016 
05017 mp_limb_t
05018 mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize,
05019             mp_srcptr vp, mp_size_t vsize, unsigned long int d)
05020 {
05021   mp_limb_t v_inv;
05022 
05023   ASSERT (usize >= 1);
05024   ASSERT (vsize >= 1);
05025   ASSERT (usize * GMP_NUMB_BITS >= d);
05026   ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize));
05027   ASSERT (! MPN_OVERLAP_P (qp, d/GMP_NUMB_BITS, vp, vsize));
05028   ASSERT (MPN_SAME_OR_INCR2_P (qp, d/GMP_NUMB_BITS, up, usize));
05029   /* ASSERT_MPN (up, usize); */
05030   /* ASSERT_MPN (vp, vsize); */
05031 
05032   /* 1/V mod 2^GMP_NUMB_BITS. */
05033   modlimb_invert (v_inv, vp[0]);
05034 
05035   /* Fast code for two cases previously used by the accel part of mpn_gcd.
05036      (Could probably remove this now it's inlined there.) */
05037   if (usize == 2 && vsize == 2 &&
05038       (d == GMP_NUMB_BITS || d == 2*GMP_NUMB_BITS))
05039     {
05040       mp_limb_t hi, lo;
05041       mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
05042       umul_ppmm (hi, lo, q, vp[0] << GMP_NAIL_BITS);
05043       up[0] = 0;
05044       up[1] -= hi + q*vp[1];
05045       qp[0] = q;
05046       if (d == 2*GMP_NUMB_BITS)
05047         {
05048           q = (up[1] * v_inv) & GMP_NUMB_MASK;
05049           up[1] = 0;
05050           qp[1] = q;
05051         }
05052       return 0;
05053     }
05054 
05055   /* Main loop.  */
05056   while (d >= GMP_NUMB_BITS)
05057     {
05058       mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
05059       mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
05060       if (usize > vsize)
05061        mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
05062       d -= GMP_NUMB_BITS;
05063       up += 1, usize -= 1;
05064       *qp++ = q;
05065     }
05066 
05067   if (d)
05068     {
05069       mp_limb_t b;
05070       mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);
05071       if (q <= 1)
05072        {
05073          if (q == 0)
05074            return 0;
05075          else
05076            b = mpn_sub_n (up, up, vp, MIN (usize, vsize));
05077        }
05078       else
05079        b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
05080 
05081       if (usize > vsize)
05082        mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
05083       return q;
05084     }
05085 
05086   return 0;
05087 }
05088 
05089 
05090 /* modlimb_invert_table[i] is the multiplicative inverse of 2*i+1 mod 256,
05091    ie. (modlimb_invert_table[i] * (2*i+1)) % 256 == 1 */
05092 
05093 const unsigned char  modlimb_invert_table[128] = {
05094   0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
05095   0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
05096   0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
05097   0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
05098   0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
05099   0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
05100   0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
05101   0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
05102   0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
05103   0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
05104   0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
05105   0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
05106   0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
05107   0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
05108   0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
05109   0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
05110 };
05111 
05112 
05113 /************************************************************************/
05114 
05115 /* __mp_bases -- Structure for conversion between internal binary
05116    format and strings in base 2..255.  The fields are explained in
05117    gmp-impl.h. */
05118 
05119 #if BITS_PER_MP_LIMB == 32
05120 const struct bases __mp_bases[256] =
05121 {
05122   /*  0 */ {0, 0.0, 0, 0},
05123   /*  1 */ {0, 1e38, 0, 0},
05124   /*  2 */ {32, 1.0000000000000000, 0x1, 0x0},
05125   /*  3 */ {20, 0.6309297535714575, 0xcfd41b91, 0x3b563c24},
05126   /*  4 */ {16, 0.5000000000000000, 0x2, 0x0},
05127   /*  5 */ {13, 0.4306765580733931, 0x48c27395, 0xc25c2684},
05128   /*  6 */ {12, 0.3868528072345416, 0x81bf1000, 0xf91bd1b6},
05129   /*  7 */ {11, 0.3562071871080222, 0x75db9c97, 0x1607a2cb},
05130   /*  8 */ {10, 0.3333333333333334, 0x3, 0x0},
05131   /*  9 */ {10, 0.3154648767857287, 0xcfd41b91, 0x3b563c24},
05132   /* 10 */ {9, 0.3010299956639811, 0x3b9aca00, 0x12e0be82},
05133   /* 11 */ {9, 0.2890648263178878, 0x8c8b6d2b, 0xd24cde04},
05134   /* 12 */ {8, 0.2789429456511298, 0x19a10000, 0x3fa39ab5},
05135   /* 13 */ {8, 0.2702381544273197, 0x309f1021, 0x50f8ac5f},
05136   /* 14 */ {8, 0.2626495350371936, 0x57f6c100, 0x74843b1e},
05137   /* 15 */ {8, 0.2559580248098155, 0x98c29b81, 0xad0326c2},
05138   /* 16 */ {8, 0.2500000000000000, 0x4, 0x0},
05139   /* 17 */ {7, 0.2446505421182260, 0x18754571, 0x4ef0b6bd},
05140   /* 18 */ {7, 0.2398124665681315, 0x247dbc80, 0xc0fc48a1},
05141   /* 19 */ {7, 0.2354089133666382, 0x3547667b, 0x33838942},
05142   /* 20 */ {7, 0.2313782131597592, 0x4c4b4000, 0xad7f29ab},
05143   /* 21 */ {7, 0.2276702486969530, 0x6b5a6e1d, 0x313c3d15},
05144   /* 22 */ {7, 0.2242438242175754, 0x94ace180, 0xb8cca9e0},
05145   /* 23 */ {7, 0.2210647294575037, 0xcaf18367, 0x42ed6de9},
05146   /* 24 */ {6, 0.2181042919855316, 0xb640000, 0x67980e0b},
05147   /* 25 */ {6, 0.2153382790366965, 0xe8d4a51, 0x19799812},
05148   /* 26 */ {6, 0.2127460535533632, 0x1269ae40, 0xbce85396},
05149   /* 27 */ {6, 0.2103099178571525, 0x17179149, 0x62c103a9},
05150   /* 28 */ {6, 0.2080145976765095, 0x1cb91000, 0x1d353d43},
05151   /* 29 */ {6, 0.2058468324604344, 0x23744899, 0xce1decea},
05152   /* 30 */ {6, 0.2037950470905062, 0x2b73a840, 0x790fc511},
05153   /* 31 */ {6, 0.2018490865820999, 0x34e63b41, 0x35b865a0},
05154   /* 32 */ {6, 0.2000000000000000, 0x5, 0x0},
05155   /* 33 */ {6, 0.1982398631705605, 0x4cfa3cc1, 0xa9aed1b3},
05156   /* 34 */ {6, 0.1965616322328226, 0x5c13d840, 0x63dfc229},
05157   /* 35 */ {6, 0.1949590218937863, 0x6d91b519, 0x2b0fee30},
05158   /* 36 */ {6, 0.1934264036172708, 0x81bf1000, 0xf91bd1b6},
05159   /* 37 */ {6, 0.1919587200065601, 0x98ede0c9, 0xac89c3a9},
05160   /* 38 */ {6, 0.1905514124267734, 0xb3773e40, 0x6d2c32fe},
05161   /* 39 */ {6, 0.1892003595168700, 0xd1bbc4d1, 0x387907c9},
05162   /* 40 */ {6, 0.1879018247091076, 0xf4240000, 0xc6f7a0b},
05163   /* 41 */ {5, 0.1866524112389434, 0x6e7d349, 0x28928154},
05164   /* 42 */ {5, 0.1854490234153689, 0x7ca30a0, 0x6e8629d},
05165   /* 43 */ {5, 0.1842888331487062, 0x8c32bbb, 0xd373dca0},
05166   /* 44 */ {5, 0.1831692509136336, 0x9d46c00, 0xa0b17895},
05167   /* 45 */ {5, 0.1820879004699383, 0xaffacfd, 0x746811a5},
05168   /* 46 */ {5, 0.1810425967800402, 0xc46bee0, 0x4da6500f},
05169   /* 47 */ {5, 0.1800313266566926, 0xdab86ef, 0x2ba23582},
05170   /* 48 */ {5, 0.1790522317510414, 0xf300000, 0xdb20a88},
05171   /* 49 */ {5, 0.1781035935540111, 0x10d63af1, 0xe68d5ce4},
05172   /* 50 */ {5, 0.1771838201355579, 0x12a05f20, 0xb7cdfd9d},
05173   /* 51 */ {5, 0.1762914343888821, 0x1490aae3, 0x8e583933},
05174   /* 52 */ {5, 0.1754250635819545, 0x16a97400, 0x697cc3ea},
05175   /* 53 */ {5, 0.1745834300480449, 0x18ed2825, 0x48a5ca6c},
05176   /* 54 */ {5, 0.1737653428714400, 0x1b5e4d60, 0x2b52db16},
05177   /* 55 */ {5, 0.1729696904450771, 0x1dff8297, 0x111586a6},
05178   /* 56 */ {5, 0.1721954337940981, 0x20d38000, 0xf31d2b36},
05179   /* 57 */ {5, 0.1714416005739134, 0x23dd1799, 0xc8d76d19},
05180   /* 58 */ {5, 0.1707072796637201, 0x271f35a0, 0xa2cb1eb4},
05181   /* 59 */ {5, 0.1699916162869140, 0x2a9ce10b, 0x807c3ec3},
05182   /* 60 */ {5, 0.1692938075987814, 0x2e593c00, 0x617ec8bf},
05183   /* 61 */ {5, 0.1686130986895011, 0x3257844d, 0x45746cbe},
05184   /* 62 */ {5, 0.1679487789570419, 0x369b13e0, 0x2c0aa273},
05185   /* 63 */ {5, 0.1673001788101741, 0x3b27613f, 0x14f90805},
05186   /* 64 */ {5, 0.1666666666666667, 0x6, 0x0},
05187   /* 65 */ {5, 0.1660476462159378, 0x4528a141, 0xd9cf0829},
05188   /* 66 */ {5, 0.1654425539190583, 0x4aa51420, 0xb6fc4841},
05189   /* 67 */ {5, 0.1648508567221604, 0x50794633, 0x973054cb},
05190   /* 68 */ {5, 0.1642720499620502, 0x56a94400, 0x7a1dbe4b},
05191   /* 69 */ {5, 0.1637056554452156, 0x5d393975, 0x5f7fcd7f},
05192   /* 70 */ {5, 0.1631512196835108, 0x642d7260, 0x47196c84},
05193   /* 71 */ {5, 0.1626083122716341, 0x6b8a5ae7, 0x30b43635},
05194   /* 72 */ {5, 0.1620765243931223, 0x73548000, 0x1c1fa5f6},
05195   /* 73 */ {5, 0.1615554674429964, 0x7b908fe9, 0x930634a},
05196   /* 74 */ {5, 0.1610447717564445, 0x84435aa0, 0xef7f4a3c},
05197   /* 75 */ {5, 0.1605440854340214, 0x8d71d25b, 0xcf5552d2},
05198   /* 76 */ {5, 0.1600530732548213, 0x97210c00, 0xb1a47c8e},
05199   /* 77 */ {5, 0.1595714156699382, 0xa1563f9d, 0x9634b43e},
05200   /* 78 */ {5, 0.1590988078692941, 0xac16c8e0, 0x7cd3817d},
05201   /* 79 */ {5, 0.1586349589155960, 0xb768278f, 0x65536761},
05202   /* 80 */ {5, 0.1581795909397823, 0xc3500000, 0x4f8b588e},
05203   /* 81 */ {5, 0.1577324383928644, 0xcfd41b91, 0x3b563c24},
05204   /* 82 */ {5, 0.1572932473495469, 0xdcfa6920, 0x28928154},
05205   /* 83 */ {5, 0.1568617748594410, 0xeac8fd83, 0x1721bfb0},
05206   /* 84 */ {5, 0.1564377883420716, 0xf9461400, 0x6e8629d},
05207   /* 85 */ {4, 0.1560210650222250, 0x31c84b1, 0x491cc17c},
05208   /* 86 */ {4, 0.1556113914024940, 0x342ab10, 0x3a11d83b},
05209   /* 87 */ {4, 0.1552085627701551, 0x36a2c21, 0x2be074cd},
05210   /* 88 */ {4, 0.1548123827357682, 0x3931000, 0x1e7a02e7},
05211   /* 89 */ {4, 0.1544226628011101, 0x3bd5ee1, 0x11d10edd},
05212   /* 90 */ {4, 0.1540392219542636, 0x3e92110, 0x5d92c68},
05213   /* 91 */ {4, 0.1536618862898642, 0x4165ef1, 0xf50dbfb2},
05214   /* 92 */ {4, 0.1532904886526781, 0x4452100, 0xdf9f1316},
05215   /* 93 */ {4, 0.1529248683028321, 0x4756fd1, 0xcb52a684},
05216   /* 94 */ {4, 0.1525648706011593, 0x4a75410, 0xb8163e97},
05217   /* 95 */ {4, 0.1522103467132434, 0x4dad681, 0xa5d8f269},
05218   /* 96 */ {4, 0.1518611533308632, 0x5100000, 0x948b0fcd},
05219   /* 97 */ {4, 0.1515171524096389, 0x546d981, 0x841e0215},
05220   /* 98 */ {4, 0.1511782109217764, 0x57f6c10, 0x74843b1e},
05221   /* 99 */ {4, 0.1508442006228941, 0x5b9c0d1, 0x65b11e6e},
05222   /* 100 */ {4, 0.1505149978319906, 0x5f5e100, 0x5798ee23},
05223   /* 101 */ {4, 0.1501904832236879, 0x633d5f1, 0x4a30b99b},
05224   /* 102 */ {4, 0.1498705416319474, 0x673a910, 0x3d6e4d94},
05225   /* 103 */ {4, 0.1495550618645152, 0x6b563e1, 0x314825b0},
05226   /* 104 */ {4, 0.1492439365274121, 0x6f91000, 0x25b55f2e},
05227   /* 105 */ {4, 0.1489370618588283, 0x73eb721, 0x1aadaccb},
05228   /* 106 */ {4, 0.1486343375718350, 0x7866310, 0x10294ba2},
05229   /* 107 */ {4, 0.1483356667053617, 0x7d01db1, 0x620f8f6},
05230   /* 108 */ {4, 0.1480409554829326, 0x81bf100, 0xf91bd1b6},
05231   /* 109 */ {4, 0.1477501131786861, 0x869e711, 0xe6d37b2a},
05232   /* 110 */ {4, 0.1474630519902391, 0x8ba0a10, 0xd55cff6e},
05233   /* 111 */ {4, 0.1471796869179852, 0x90c6441, 0xc4ad2db2},
05234   /* 112 */ {4, 0.1468999356504447, 0x9610000, 0xb4b985cf},
05235   /* 113 */ {4, 0.1466237184553111, 0x9b7e7c1, 0xa5782bef},
05236   /* 114 */ {4, 0.1463509580758620, 0xa112610, 0x96dfdd2a},
05237   /* 115 */ {4, 0.1460815796324244, 0xa6cc591, 0x88e7e509},
05238   /* 116 */ {4, 0.1458155105286054, 0xacad100, 0x7b8813d3},
05239   /* 117 */ {4, 0.1455526803620167, 0xb2b5331, 0x6eb8b595},
05240   /* 118 */ {4, 0.1452930208392428, 0xb8e5710, 0x627289db},
05241   /* 119 */ {4, 0.1450364656948130, 0xbf3e7a1, 0x56aebc07},
05242   /* 120 */ {4, 0.1447829506139581, 0xc5c1000, 0x4b66dc33},
05243   /* 121 */ {4, 0.1445324131589439, 0xcc6db61, 0x4094d8a3},
05244   /* 122 */ {4, 0.1442847926987864, 0xd345510, 0x3632f7a5},
05245   /* 123 */ {4, 0.1440400303421672, 0xda48871, 0x2c3bd1f0},
05246   /* 124 */ {4, 0.1437980688733775, 0xe178100, 0x22aa4d5f},
05247   /* 125 */ {4, 0.1435588526911310, 0xe8d4a51, 0x19799812},
05248   /* 126 */ {4, 0.1433223277500932, 0xf05f010, 0x10a523e5},
05249   /* 127 */ {4, 0.1430884415049874, 0xf817e01, 0x828a237},
05250   /* 128 */ {4, 0.1428571428571428, 0x7, 0x0},
05251   /* 129 */ {4, 0.1426283821033600, 0x10818201, 0xf04ec452},
05252   /* 130 */ {4, 0.1424021108869747, 0x11061010, 0xe136444a},
05253   /* 131 */ {4, 0.1421782821510107, 0x118db651, 0xd2af9589},
05254   /* 132 */ {4, 0.1419568500933153, 0x12188100, 0xc4b42a83},
05255   /* 133 */ {4, 0.1417377701235801, 0x12a67c71, 0xb73dccf5},
05256   /* 134 */ {4, 0.1415209988221527, 0x1337b510, 0xaa4698c5},
05257   /* 135 */ {4, 0.1413064939005528, 0x13cc3761, 0x9dc8f729},
05258   /* 136 */ {4, 0.1410942141636095, 0x14641000, 0x91bf9a30},
05259   /* 137 */ {4, 0.1408841194731412, 0x14ff4ba1, 0x86257887},
05260   /* 138 */ {4, 0.1406761707131039, 0x159df710, 0x7af5c98c},
05261   /* 139 */ {4, 0.1404703297561400, 0x16401f31, 0x702c01a0},
05262   /* 140 */ {4, 0.1402665594314587, 0x16e5d100, 0x65c3ceb1},
05263   /* 141 */ {4, 0.1400648234939879, 0x178f1991, 0x5bb91502},
05264   /* 142 */ {4, 0.1398650865947379, 0x183c0610, 0x5207ec23},
05265   /* 143 */ {4, 0.1396673142523192, 0x18eca3c1, 0x48ac9c19},
05266   /* 144 */ {4, 0.1394714728255649, 0x19a10000, 0x3fa39ab5},
05267   /* 145 */ {4, 0.1392775294872041, 0x1a592841, 0x36e98912},
05268   /* 146 */ {4, 0.1390854521985406, 0x1b152a10, 0x2e7b3140},
05269   /* 147 */ {4, 0.1388952096850913, 0x1bd51311, 0x2655840b},
05270   /* 148 */ {4, 0.1387067714131417, 0x1c98f100, 0x1e7596ea},
05271   /* 149 */ {4, 0.1385201075671774, 0x1d60d1b1, 0x16d8a20d},
05272   /* 150 */ {4, 0.1383351890281539, 0x1e2cc310, 0xf7bfe87},
05273   /* 151 */ {4, 0.1381519873525671, 0x1efcd321, 0x85d2492},
05274   /* 152 */ {4, 0.1379704747522905, 0x1fd11000, 0x179a9f4},
05275   /* 153 */ {4, 0.1377906240751463, 0x20a987e1, 0xf59e80eb},
05276   /* 154 */ {4, 0.1376124087861776, 0x21864910, 0xe8b768db},
05277   /* 155 */ {4, 0.1374358029495937, 0x226761f1, 0xdc39d6d5},
05278   /* 156 */ {4, 0.1372607812113589, 0x234ce100, 0xd021c5d1},
05279   /* 157 */ {4, 0.1370873187823978, 0x2436d4d1, 0xc46b5e37},
05280   /* 158 */ {4, 0.1369153914223921, 0x25254c10, 0xb912f39c},
05281   /* 159 */ {4, 0.1367449754241439, 0x26185581, 0xae150294},
05282   /* 160 */ {4, 0.1365760475984821, 0x27100000, 0xa36e2eb1},
05283   /* 161 */ {4, 0.1364085852596902, 0x280c5a81, 0x991b4094},
05284   /* 162 */ {4, 0.1362425662114337, 0x290d7410, 0x8f19241e},
05285   /* 163 */ {4, 0.1360779687331669, 0x2a135bd1, 0x8564e6b7},
05286   /* 164 */ {4, 0.1359147715670014, 0x2b1e2100, 0x7bfbb5b4},
05287   /* 165 */ {4, 0.1357529539050150, 0x2c2dd2f1, 0x72dadcc8},
05288   /* 166 */ {4, 0.1355924953769863, 0x2d428110, 0x69ffc498},
05289   /* 167 */ {4, 0.1354333760385373, 0x2e5c3ae1, 0x6167f154},
05290   /* 168 */ {4, 0.1352755763596663, 0x2f7b1000, 0x5911016e},
05291   /* 169 */ {4, 0.1351190772136599, 0x309f1021, 0x50f8ac5f},
05292   /* 170 */ {4, 0.1349638598663645, 0x31c84b10, 0x491cc17c},
05293   /* 171 */ {4, 0.1348099059658079, 0x32f6d0b1, 0x417b26d8},
05294   /* 172 */ {4, 0.1346571975321549, 0x342ab100, 0x3a11d83b},
05295   /* 173 */ {4, 0.1345057169479844, 0x3563fc11, 0x32dee622},
05296   /* 174 */ {4, 0.1343554469488779, 0x36a2c210, 0x2be074cd},
05297   /* 175 */ {4, 0.1342063706143054, 0x37e71341, 0x2514bb58},
05298   /* 176 */ {4, 0.1340584713587980, 0x39310000, 0x1e7a02e7},
05299   /* 177 */ {4, 0.1339117329233981, 0x3a8098c1, 0x180ea5d0},
05300   /* 178 */ {4, 0.1337661393673756, 0x3bd5ee10, 0x11d10edd},
05301   /* 179 */ {4, 0.1336216750601996, 0x3d311091, 0xbbfb88e},
05302   /* 180 */ {4, 0.1334783246737591, 0x3e921100, 0x5d92c68},
05303   /* 181 */ {4, 0.1333360731748201, 0x3ff90031, 0x1c024c},
05304   /* 182 */ {4, 0.1331949058177136, 0x4165ef10, 0xf50dbfb2},
05305   /* 183 */ {4, 0.1330548081372441, 0x42d8eea1, 0xea30efa3},
05306   /* 184 */ {4, 0.1329157659418126, 0x44521000, 0xdf9f1316},
05307   /* 185 */ {4, 0.1327777653067443, 0x45d16461, 0xd555c0c9},
05308   /* 186 */ {4, 0.1326407925678156, 0x4756fd10, 0xcb52a684},
05309   /* 187 */ {4, 0.1325048343149731, 0x48e2eb71, 0xc193881f},
05310   /* 188 */ {4, 0.1323698773862368, 0x4a754100, 0xb8163e97},
05311   /* 189 */ {4, 0.1322359088617821, 0x4c0e0f51, 0xaed8b724},
05312   /* 190 */ {4, 0.1321029160581950, 0x4dad6810, 0xa5d8f269},
05313   /* 191 */ {4, 0.1319708865228925, 0x4f535d01, 0x9d15039d},
05314   /* 192 */ {4, 0.1318398080287045, 0x51000000, 0x948b0fcd},
05315   /* 193 */ {4, 0.1317096685686114, 0x52b36301, 0x8c394d1d},
05316   /* 194 */ {4, 0.1315804563506306, 0x546d9810, 0x841e0215},
05317   /* 195 */ {4, 0.1314521597928493, 0x562eb151, 0x7c3784f8},
05318   /* 196 */ {4, 0.1313247675185968, 0x57f6c100, 0x74843b1e},
05319   /* 197 */ {4, 0.1311982683517524, 0x59c5d971, 0x6d02985d},
05320   /* 198 */ {4, 0.1310726513121843, 0x5b9c0d10, 0x65b11e6e},
05321   /* 199 */ {4, 0.1309479056113158, 0x5d796e61, 0x5e8e5c64},
05322   /* 200 */ {4, 0.1308240206478128, 0x5f5e1000, 0x5798ee23},
05323   /* 201 */ {4, 0.1307009860033912, 0x614a04a1, 0x50cf7bde},
05324   /* 202 */ {4, 0.1305787914387386, 0x633d5f10, 0x4a30b99b},
05325   /* 203 */ {4, 0.1304574268895465, 0x65383231, 0x43bb66bd},
05326   /* 204 */ {4, 0.1303368824626505, 0x673a9100, 0x3d6e4d94},
05327   /* 205 */ {4, 0.1302171484322746, 0x69448e91, 0x374842ee},
05328   /* 206 */ {4, 0.1300982152363760, 0x6b563e10, 0x314825b0},
05329   /* 207 */ {4, 0.1299800734730872, 0x6d6fb2c1, 0x2b6cde75},
05330   /* 208 */ {4, 0.1298627138972530, 0x6f910000, 0x25b55f2e},
05331   /* 209 */ {4, 0.1297461274170591, 0x71ba3941, 0x2020a2c5},
05332   /* 210 */ {4, 0.1296303050907487, 0x73eb7210, 0x1aadaccb},
05333   /* 211 */ {4, 0.1295152381234257, 0x7624be11, 0x155b891f},
05334   /* 212 */ {4, 0.1294009178639407, 0x78663100, 0x10294ba2},
05335   /* 213 */ {4, 0.1292873358018581, 0x7aafdeb1, 0xb160fe9},
05336   /* 214 */ {4, 0.1291744835645007, 0x7d01db10, 0x620f8f6},
05337   /* 215 */ {4, 0.1290623529140715, 0x7f5c3a21, 0x14930ef},
05338   /* 216 */ {4, 0.1289509357448472, 0x81bf1000, 0xf91bd1b6},
05339   /* 217 */ {4, 0.1288402240804449, 0x842a70e1, 0xefdcb0c7},
05340   /* 218 */ {4, 0.1287302100711567, 0x869e7110, 0xe6d37b2a},
05341   /* 219 */ {4, 0.1286208859913518, 0x891b24f1, 0xddfeb94a},
05342   /* 220 */ {4, 0.1285122442369443, 0x8ba0a100, 0xd55cff6e},
05343   /* 221 */ {4, 0.1284042773229231, 0x8e2ef9d1, 0xcceced50},
05344   /* 222 */ {4, 0.1282969778809442, 0x90c64410, 0xc4ad2db2},
05345   /* 223 */ {4, 0.1281903386569819, 0x93669481, 0xbc9c75f9},
05346   /* 224 */ {4, 0.1280843525090381, 0x96100000, 0xb4b985cf},
05347   /* 225 */ {4, 0.1279790124049077, 0x98c29b81, 0xad0326c2},
05348   /* 226 */ {4, 0.1278743114199984, 0x9b7e7c10, 0xa5782bef},
05349   /* 227 */ {4, 0.1277702427352035, 0x9e43b6d1, 0x9e1771a9},
05350   /* 228 */ {4, 0.1276667996348261, 0xa1126100, 0x96dfdd2a},
05351   /* 229 */ {4, 0.1275639755045533, 0xa3ea8ff1, 0x8fd05c41},
05352   /* 230 */ {4, 0.1274617638294791, 0xa6cc5910, 0x88e7e509},
05353   /* 231 */ {4, 0.1273601581921741, 0xa9b7d1e1, 0x8225759d},
05354   /* 232 */ {4, 0.1272591522708010, 0xacad1000, 0x7b8813d3},
05355   /* 233 */ {4, 0.1271587398372755, 0xafac2921, 0x750eccf9},
05356   /* 234 */ {4, 0.1270589147554692, 0xb2b53310, 0x6eb8b595},
05357   /* 235 */ {4, 0.1269596709794558, 0xb5c843b1, 0x6884e923},
05358   /* 236 */ {4, 0.1268610025517973, 0xb8e57100, 0x627289db},
05359   /* 237 */ {4, 0.1267629036018709, 0xbc0cd111, 0x5c80c07b},
05360   /* 238 */ {4, 0.1266653683442337, 0xbf3e7a10, 0x56aebc07},
05361   /* 239 */ {4, 0.1265683910770258, 0xc27a8241, 0x50fbb19b},
05362   /* 240 */ {4, 0.1264719661804097, 0xc5c10000, 0x4b66dc33},
05363   /* 241 */ {4, 0.1263760881150453, 0xc91209c1, 0x45ef7c7c},
05364   /* 242 */ {4, 0.1262807514205999, 0xcc6db610, 0x4094d8a3},
05365   /* 243 */ {4, 0.1261859507142915, 0xcfd41b91, 0x3b563c24},
05366   /* 244 */ {4, 0.1260916806894653, 0xd3455100, 0x3632f7a5},
05367   /* 245 */ {4, 0.1259979361142023, 0xd6c16d31, 0x312a60c3},
05368   /* 246 */ {4, 0.1259047118299582, 0xda488710, 0x2c3bd1f0},
05369   /* 247 */ {4, 0.1258120027502338, 0xdddab5a1, 0x2766aa45},
05370   /* 248 */ {4, 0.1257198038592741, 0xe1781000, 0x22aa4d5f},
05371   /* 249 */ {4, 0.1256281102107963, 0xe520ad61, 0x1e06233c},
05372   /* 250 */ {4, 0.1255369169267456, 0xe8d4a510, 0x19799812},
05373   /* 251 */ {4, 0.1254462191960791, 0xec940e71, 0x15041c33},
05374   /* 252 */ {4, 0.1253560122735751, 0xf05f0100, 0x10a523e5},
05375   /* 253 */ {4, 0.1252662914786691, 0xf4359451, 0xc5c2749},
05376   /* 254 */ {4, 0.1251770521943144, 0xf817e010, 0x828a237},
05377   /* 255 */ {4, 0.1250882898658681, 0xfc05fc01, 0x40a1423},
05378 };
05379 #endif
05380 #if BITS_PER_MP_LIMB == 64
05381 const struct bases __mp_bases[256] =
05382 {
05383   /*  0 */ {0, 0.0, 0, 0},
05384   /*  1 */ {0, 1e38, 0, 0},
05385   /*  2 */ {64, 1.0000000000000000, CNST_LIMB(0x1), CNST_LIMB(0x0)},
05386   /*  3 */ {40, 0.6309297535714574, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
05387   /*  4 */ {32, 0.5000000000000000, CNST_LIMB(0x2), CNST_LIMB(0x0)},
05388   /*  5 */ {27, 0.4306765580733931, CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90)},
05389   /*  6 */ {24, 0.3868528072345416, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)},
05390   /*  7 */ {22, 0.3562071871080222, CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b)},
05391   /*  8 */ {21, 0.3333333333333334, CNST_LIMB(0x3), CNST_LIMB(0x0)},
05392   /*  9 */ {20, 0.3154648767857287, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
05393   /* 10 */ {19, 0.3010299956639811, CNST_LIMB(0x8ac7230489e80000), CNST_LIMB(0xd83c94fb6d2ac34a)},
05394   /* 11 */ {18, 0.2890648263178878, CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b)},
05395   /* 12 */ {17, 0.2789429456511298, CNST_LIMB(0x1eca170c00000000), CNST_LIMB(0xa10c2bec5da8f8f)},
05396   /* 13 */ {17, 0.2702381544273197, CNST_LIMB(0x780c7372621bd74d), CNST_LIMB(0x10f4becafe412ec3)},
05397   /* 14 */ {16, 0.2626495350371936, CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86)},
05398   /* 15 */ {16, 0.2559580248098155, CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48)},
05399   /* 16 */ {16, 0.2500000000000000, CNST_LIMB(0x4), CNST_LIMB(0x0)},
05400   /* 17 */ {15, 0.2446505421182260, CNST_LIMB(0x27b95e997e21d9f1), CNST_LIMB(0x9c71e11bab279323)},
05401   /* 18 */ {15, 0.2398124665681315, CNST_LIMB(0x5da0e1e53c5c8000), CNST_LIMB(0x5dfaa697ec6f6a1c)},
05402   /* 19 */ {15, 0.2354089133666382, CNST_LIMB(0xd2ae3299c1c4aedb), CNST_LIMB(0x3711783f6be7e9ec)},
05403   /* 20 */ {14, 0.2313782131597592, CNST_LIMB(0x16bcc41e90000000), CNST_LIMB(0x6849b86a12b9b01e)},
05404   /* 21 */ {14, 0.2276702486969530, CNST_LIMB(0x2d04b7fdd9c0ef49), CNST_LIMB(0x6bf097ba5ca5e239)},
05405   /* 22 */ {14, 0.2242438242175754, CNST_LIMB(0x5658597bcaa24000), CNST_LIMB(0x7b8015c8d7af8f08)},
05406   /* 23 */ {14, 0.2210647294575037, CNST_LIMB(0xa0e2073737609371), CNST_LIMB(0x975a24b3a3151b38)},
05407   /* 24 */ {13, 0.2181042919855316, CNST_LIMB(0xc29e98000000000), CNST_LIMB(0x50bd367972689db1)},
05408   /* 25 */ {13, 0.2153382790366965, CNST_LIMB(0x14adf4b7320334b9), CNST_LIMB(0x8c240c4aecb13bb5)},
05409   /* 26 */ {13, 0.2127460535533632, CNST_LIMB(0x226ed36478bfa000), CNST_LIMB(0xdbd2e56854e118c9)},
05410   /* 27 */ {13, 0.2103099178571525, CNST_LIMB(0x383d9170b85ff80b), CNST_LIMB(0x2351ffcaa9c7c4ae)},
05411   /* 28 */ {13, 0.2080145976765095, CNST_LIMB(0x5a3c23e39c000000), CNST_LIMB(0x6b24188ca33b0636)},
05412   /* 29 */ {13, 0.2058468324604344, CNST_LIMB(0x8e65137388122bcd), CNST_LIMB(0xcc3dceaf2b8ba99d)},
05413   /* 30 */ {13, 0.2037950470905062, CNST_LIMB(0xdd41bb36d259e000), CNST_LIMB(0x2832e835c6c7d6b6)},
05414   /* 31 */ {12, 0.2018490865820999, CNST_LIMB(0xaee5720ee830681), CNST_LIMB(0x76b6aa272e1873c5)},
05415   /* 32 */ {12, 0.2000000000000000, CNST_LIMB(0x5), CNST_LIMB(0x0)},
05416   /* 33 */ {12, 0.1982398631705605, CNST_LIMB(0x172588ad4f5f0981), CNST_LIMB(0x61eaf5d402c7bf4f)},
05417   /* 34 */ {12, 0.1965616322328226, CNST_LIMB(0x211e44f7d02c1000), CNST_LIMB(0xeeb658123ffb27ec)},
05418   /* 35 */ {12, 0.1949590218937863, CNST_LIMB(0x2ee56725f06e5c71), CNST_LIMB(0x5d5e3762e6fdf509)},
05419   /* 36 */ {12, 0.1934264036172708, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)},
05420   /* 37 */ {12, 0.1919587200065601, CNST_LIMB(0x5b5b57f8a98a5dd1), CNST_LIMB(0x66ae7831762efb6f)},
05421   /* 38 */ {12, 0.1905514124267734, CNST_LIMB(0x7dcff8986ea31000), CNST_LIMB(0x47388865a00f544)},
05422   /* 39 */ {12, 0.1892003595168700, CNST_LIMB(0xabd4211662a6b2a1), CNST_LIMB(0x7d673c33a123b54c)},
05423   /* 40 */ {12, 0.1879018247091076, CNST_LIMB(0xe8d4a51000000000), CNST_LIMB(0x19799812dea11197)},
05424   /* 41 */ {11, 0.1866524112389434, CNST_LIMB(0x7a32956ad081b79), CNST_LIMB(0xc27e62e0686feae)},
05425   /* 42 */ {11, 0.1854490234153689, CNST_LIMB(0x9f49aaff0e86800), CNST_LIMB(0x9b6e7507064ce7c7)},
05426   /* 43 */ {11, 0.1842888331487062, CNST_LIMB(0xce583bb812d37b3), CNST_LIMB(0x3d9ac2bf66cfed94)},
05427   /* 44 */ {11, 0.1831692509136336, CNST_LIMB(0x109b79a654c00000), CNST_LIMB(0xed46bc50ce59712a)},
05428   /* 45 */ {11, 0.1820879004699383, CNST_LIMB(0x1543beff214c8b95), CNST_LIMB(0x813d97e2c89b8d46)},
05429   /* 46 */ {11, 0.1810425967800402, CNST_LIMB(0x1b149a79459a3800), CNST_LIMB(0x2e81751956af8083)},
05430   /* 47 */ {11, 0.1800313266566926, CNST_LIMB(0x224edfb5434a830f), CNST_LIMB(0xdd8e0a95e30c0988)},
05431   /* 48 */ {11, 0.1790522317510413, CNST_LIMB(0x2b3fb00000000000), CNST_LIMB(0x7ad4dd48a0b5b167)},
05432   /* 49 */ {11, 0.1781035935540111, CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b)},
05433   /* 50 */ {11, 0.1771838201355579, CNST_LIMB(0x43c33c1937564800), CNST_LIMB(0xe392010175ee5962)},
05434   /* 51 */ {11, 0.1762914343888821, CNST_LIMB(0x54411b2441c3cd8b), CNST_LIMB(0x84eaf11b2fe7738e)},
05435   /* 52 */ {11, 0.1754250635819545, CNST_LIMB(0x6851455acd400000), CNST_LIMB(0x3a1e3971e008995d)},
05436   /* 53 */ {11, 0.1745834300480449, CNST_LIMB(0x80a23b117c8feb6d), CNST_LIMB(0xfd7a462344ffce25)},
05437   /* 54 */ {11, 0.1737653428714400, CNST_LIMB(0x9dff7d32d5dc1800), CNST_LIMB(0x9eca40b40ebcef8a)},
05438   /* 55 */ {11, 0.1729696904450771, CNST_LIMB(0xc155af6faeffe6a7), CNST_LIMB(0x52fa161a4a48e43d)},
05439   /* 56 */ {11, 0.1721954337940981, CNST_LIMB(0xebb7392e00000000), CNST_LIMB(0x1607a2cbacf930c1)},
05440   /* 57 */ {10, 0.1714416005739134, CNST_LIMB(0x50633659656d971), CNST_LIMB(0x97a014f8e3be55f1)},
05441   /* 58 */ {10, 0.1707072796637201, CNST_LIMB(0x5fa8624c7fba400), CNST_LIMB(0x568df8b76cbf212c)},
05442   /* 59 */ {10, 0.1699916162869140, CNST_LIMB(0x717d9faa73c5679), CNST_LIMB(0x20ba7c4b4e6ef492)},
05443   /* 60 */ {10, 0.1692938075987814, CNST_LIMB(0x86430aac6100000), CNST_LIMB(0xe81ee46b9ef492f5)},
05444   /* 61 */ {10, 0.1686130986895011, CNST_LIMB(0x9e64d9944b57f29), CNST_LIMB(0x9dc0d10d51940416)},
05445   /* 62 */ {10, 0.1679487789570419, CNST_LIMB(0xba5ca5392cb0400), CNST_LIMB(0x5fa8ed2f450272a5)},
05446   /* 63 */ {10, 0.1673001788101741, CNST_LIMB(0xdab2ce1d022cd81), CNST_LIMB(0x2ba9eb8c5e04e641)},
05447   /* 64 */ {10, 0.1666666666666667, CNST_LIMB(0x6), CNST_LIMB(0x0)},
05448   /* 65 */ {10, 0.1660476462159378, CNST_LIMB(0x12aeed5fd3e2d281), CNST_LIMB(0xb67759cc00287bf1)},
05449   /* 66 */ {10, 0.1654425539190583, CNST_LIMB(0x15c3da1572d50400), CNST_LIMB(0x78621feeb7f4ed33)},
05450   /* 67 */ {10, 0.1648508567221604, CNST_LIMB(0x194c05534f75ee29), CNST_LIMB(0x43d55b5f72943bc0)},
05451   /* 68 */ {10, 0.1642720499620502, CNST_LIMB(0x1d56299ada100000), CNST_LIMB(0x173decb64d1d4409)},
05452   /* 69 */ {10, 0.1637056554452156, CNST_LIMB(0x21f2a089a4ff4f79), CNST_LIMB(0xe29fb54fd6b6074f)},
05453   /* 70 */ {10, 0.1631512196835108, CNST_LIMB(0x2733896c68d9a400), CNST_LIMB(0xa1f1f5c210d54e62)},
05454   /* 71 */ {10, 0.1626083122716341, CNST_LIMB(0x2d2cf2c33b533c71), CNST_LIMB(0x6aac7f9bfafd57b2)},
05455   /* 72 */ {10, 0.1620765243931223, CNST_LIMB(0x33f506e440000000), CNST_LIMB(0x3b563c2478b72ee2)},
05456   /* 73 */ {10, 0.1615554674429964, CNST_LIMB(0x3ba43bec1d062211), CNST_LIMB(0x12b536b574e92d1b)},
05457   /* 74 */ {10, 0.1610447717564444, CNST_LIMB(0x4455872d8fd4e400), CNST_LIMB(0xdf86c03020404fa5)},
05458   /* 75 */ {10, 0.1605440854340214, CNST_LIMB(0x4e2694539f2f6c59), CNST_LIMB(0xa34adf02234eea8e)},
05459   /* 76 */ {10, 0.1600530732548213, CNST_LIMB(0x5938006c18900000), CNST_LIMB(0x6f46eb8574eb59dd)},
05460   /* 77 */ {10, 0.1595714156699382, CNST_LIMB(0x65ad9912474aa649), CNST_LIMB(0x42459b481df47cec)},
05461   /* 78 */ {10, 0.1590988078692941, CNST_LIMB(0x73ae9ff4241ec400), CNST_LIMB(0x1b424b95d80ca505)},
05462   /* 79 */ {10, 0.1586349589155960, CNST_LIMB(0x836612ee9c4ce1e1), CNST_LIMB(0xf2c1b982203a0dac)},
05463   /* 80 */ {10, 0.1581795909397823, CNST_LIMB(0x9502f90000000000), CNST_LIMB(0xb7cdfd9d7bdbab7d)},
05464   /* 81 */ {10, 0.1577324383928644, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
05465   /* 82 */ {10, 0.1572932473495469, CNST_LIMB(0xbebf59a07dab4400), CNST_LIMB(0x57931eeaf85cf64f)},
05466   /* 83 */ {10, 0.1568617748594410, CNST_LIMB(0xd7540d4093bc3109), CNST_LIMB(0x305a944507c82f47)},
05467   /* 84 */ {10, 0.1564377883420716, CNST_LIMB(0xf2b96616f1900000), CNST_LIMB(0xe007ccc9c22781a)},
05468   /* 85 */ {9, 0.1560210650222250, CNST_LIMB(0x336de62af2bca35), CNST_LIMB(0x3e92c42e000eeed4)},
05469   /* 86 */ {9, 0.1556113914024940, CNST_LIMB(0x39235ec33d49600), CNST_LIMB(0x1ebe59130db2795e)},
05470   /* 87 */ {9, 0.1552085627701551, CNST_LIMB(0x3f674e539585a17), CNST_LIMB(0x268859e90f51b89)},
05471   /* 88 */ {9, 0.1548123827357682, CNST_LIMB(0x4645b6958000000), CNST_LIMB(0xd24cde0463108cfa)},
05472   /* 89 */ {9, 0.1544226628011101, CNST_LIMB(0x4dcb74afbc49c19), CNST_LIMB(0xa536009f37adc383)},
05473   /* 90 */ {9, 0.1540392219542636, CNST_LIMB(0x56064e1d18d9a00), CNST_LIMB(0x7cea06ce1c9ace10)},
05474   /* 91 */ {9, 0.1536618862898642, CNST_LIMB(0x5f04fe2cd8a39fb), CNST_LIMB(0x58db032e72e8ba43)},
05475   /* 92 */ {9, 0.1532904886526781, CNST_LIMB(0x68d74421f5c0000), CNST_LIMB(0x388cc17cae105447)},
05476   /* 93 */ {9, 0.1529248683028321, CNST_LIMB(0x738df1f6ab4827d), CNST_LIMB(0x1b92672857620ce0)},
05477   /* 94 */ {9, 0.1525648706011593, CNST_LIMB(0x7f3afbc9cfb5e00), CNST_LIMB(0x18c6a9575c2ade4)},
05478   /* 95 */ {9, 0.1522103467132434, CNST_LIMB(0x8bf187fba88f35f), CNST_LIMB(0xd44da7da8e44b24f)},
05479   /* 96 */ {9, 0.1518611533308632, CNST_LIMB(0x99c600000000000), CNST_LIMB(0xaa2f78f1b4cc6794)},
05480   /* 97 */ {9, 0.1515171524096389, CNST_LIMB(0xa8ce21eb6531361), CNST_LIMB(0x843c067d091ee4cc)},
05481   /* 98 */ {9, 0.1511782109217764, CNST_LIMB(0xb92112c1a0b6200), CNST_LIMB(0x62005e1e913356e3)},
05482   /* 99 */ {9, 0.1508442006228941, CNST_LIMB(0xcad7718b8747c43), CNST_LIMB(0x4316eed01dedd518)},
05483   /* 100 */ {9, 0.1505149978319906, CNST_LIMB(0xde0b6b3a7640000), CNST_LIMB(0x2725dd1d243aba0e)},
05484   /* 101 */ {9, 0.1501904832236879, CNST_LIMB(0xf2d8cf5fe6d74c5), CNST_LIMB(0xddd9057c24cb54f)},
05485   /* 102 */ {9, 0.1498705416319474, CNST_LIMB(0x1095d25bfa712600), CNST_LIMB(0xedeee175a736d2a1)},
05486   /* 103 */ {9, 0.1495550618645152, CNST_LIMB(0x121b7c4c3698faa7), CNST_LIMB(0xc4699f3df8b6b328)},
05487   /* 104 */ {9, 0.1492439365274121, CNST_LIMB(0x13c09e8d68000000), CNST_LIMB(0x9ebbe7d859cb5a7c)},
05488   /* 105 */ {9, 0.1489370618588283, CNST_LIMB(0x15876ccb0b709ca9), CNST_LIMB(0x7c828b9887eb2179)},
05489   /* 106 */ {9, 0.1486343375718350, CNST_LIMB(0x17723c2976da2a00), CNST_LIMB(0x5d652ab99001adcf)},
05490   /* 107 */ {9, 0.1483356667053617, CNST_LIMB(0x198384e9c259048b), CNST_LIMB(0x4114f1754e5d7b32)},
05491   /* 108 */ {9, 0.1480409554829326, CNST_LIMB(0x1bbde41dfeec0000), CNST_LIMB(0x274b7c902f7e0188)},
05492   /* 109 */ {9, 0.1477501131786861, CNST_LIMB(0x1e241d6e3337910d), CNST_LIMB(0xfc9e0fbb32e210c)},
05493   /* 110 */ {9, 0.1474630519902391, CNST_LIMB(0x20b91cee9901ee00), CNST_LIMB(0xf4afa3e594f8ea1f)},
05494   /* 111 */ {9, 0.1471796869179852, CNST_LIMB(0x237ff9079863dfef), CNST_LIMB(0xcd85c32e9e4437b0)},
05495   /* 112 */ {9, 0.1468999356504447, CNST_LIMB(0x267bf47000000000), CNST_LIMB(0xa9bbb147e0dd92a8)},
05496   /* 113 */ {9, 0.1466237184553111, CNST_LIMB(0x29b08039fbeda7f1), CNST_LIMB(0x8900447b70e8eb82)},
05497   /* 114 */ {9, 0.1463509580758620, CNST_LIMB(0x2d213df34f65f200), CNST_LIMB(0x6b0a92adaad5848a)},
05498   /* 115 */ {9, 0.1460815796324244, CNST_LIMB(0x30d201d957a7c2d3), CNST_LIMB(0x4f990ad8740f0ee5)},
05499   /* 116 */ {9, 0.1458155105286054, CNST_LIMB(0x34c6d52160f40000), CNST_LIMB(0x3670a9663a8d3610)},
05500   /* 117 */ {9, 0.1455526803620167, CNST_LIMB(0x3903f855d8f4c755), CNST_LIMB(0x1f5c44188057be3c)},
05501   /* 118 */ {9, 0.1452930208392428, CNST_LIMB(0x3d8de5c8ec59b600), CNST_LIMB(0xa2bea956c4e4977)},
05502   /* 119 */ {9, 0.1450364656948130, CNST_LIMB(0x4269541d1ff01337), CNST_LIMB(0xed68b23033c3637e)},
05503   /* 120 */ {9, 0.1447829506139581, CNST_LIMB(0x479b38e478000000), CNST_LIMB(0xc99cf624e50549c5)},
05504   /* 121 */ {9, 0.1445324131589439, CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b)},
05505   /* 122 */ {9, 0.1442847926987864, CNST_LIMB(0x5317871fa13aba00), CNST_LIMB(0x8a5bc740b1c113e5)},
05506   /* 123 */ {9, 0.1440400303421672, CNST_LIMB(0x596d2f44de9fa71b), CNST_LIMB(0x6e6c7efb81cfbb9b)},
05507   /* 124 */ {9, 0.1437980688733775, CNST_LIMB(0x602fd125c47c0000), CNST_LIMB(0x54aba5c5cada5f10)},
05508   /* 125 */ {9, 0.1435588526911310, CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90)},
05509   /* 126 */ {9, 0.1433223277500932, CNST_LIMB(0x6f15be069b847e00), CNST_LIMB(0x26fb43de2c8cd2a8)},
05510   /* 127 */ {9, 0.1430884415049874, CNST_LIMB(0x7746b3e82a77047f), CNST_LIMB(0x12b94793db8486a1)},
05511   /* 128 */ {9, 0.1428571428571428, CNST_LIMB(0x7), CNST_LIMB(0x0)},
05512   /* 129 */ {9, 0.1426283821033600, CNST_LIMB(0x894953f7ea890481), CNST_LIMB(0xdd5deca404c0156d)},
05513   /* 130 */ {9, 0.1424021108869747, CNST_LIMB(0x932abffea4848200), CNST_LIMB(0xbd51373330291de0)},
05514   /* 131 */ {9, 0.1421782821510107, CNST_LIMB(0x9dacb687d3d6a163), CNST_LIMB(0x9fa4025d66f23085)},
05515   /* 132 */ {9, 0.1419568500933153, CNST_LIMB(0xa8d8102a44840000), CNST_LIMB(0x842530ee2db4949d)},
05516   /* 133 */ {9, 0.1417377701235801, CNST_LIMB(0xb4b60f9d140541e5), CNST_LIMB(0x6aa7f2766b03dc25)},
05517   /* 134 */ {9, 0.1415209988221527, CNST_LIMB(0xc15065d4856e4600), CNST_LIMB(0x53035ba7ebf32e8d)},
05518   /* 135 */ {9, 0.1413064939005528, CNST_LIMB(0xceb1363f396d23c7), CNST_LIMB(0x3d12091fc9fb4914)},
05519   /* 136 */ {9, 0.1410942141636095, CNST_LIMB(0xdce31b2488000000), CNST_LIMB(0x28b1cb81b1ef1849)},
05520   /* 137 */ {9, 0.1408841194731412, CNST_LIMB(0xebf12a24bca135c9), CNST_LIMB(0x15c35be67ae3e2c9)},
05521   /* 138 */ {9, 0.1406761707131039, CNST_LIMB(0xfbe6f8dbf88f4a00), CNST_LIMB(0x42a17bd09be1ff0)},
05522   /* 139 */ {8, 0.1404703297561400, CNST_LIMB(0x1ef156c084ce761), CNST_LIMB(0x8bf461f03cf0bbf)},
05523   /* 140 */ {8, 0.1402665594314587, CNST_LIMB(0x20c4e3b94a10000), CNST_LIMB(0xf3fbb43f68a32d05)},
05524   /* 141 */ {8, 0.1400648234939879, CNST_LIMB(0x22b0695a08ba421), CNST_LIMB(0xd84f44c48564dc19)},
05525   /* 142 */ {8, 0.1398650865947379, CNST_LIMB(0x24b4f35d7a4c100), CNST_LIMB(0xbe58ebcce7956abe)},
05526   /* 143 */ {8, 0.1396673142523192, CNST_LIMB(0x26d397284975781), CNST_LIMB(0xa5fac463c7c134b7)},
05527   /* 144 */ {8, 0.1394714728255649, CNST_LIMB(0x290d74100000000), CNST_LIMB(0x8f19241e28c7d757)},
05528   /* 145 */ {8, 0.1392775294872041, CNST_LIMB(0x2b63b3a37866081), CNST_LIMB(0x799a6d046c0ae1ae)},
05529   /* 146 */ {8, 0.1390854521985406, CNST_LIMB(0x2dd789f4d894100), CNST_LIMB(0x6566e37d746a9e40)},
05530   /* 147 */ {8, 0.1388952096850913, CNST_LIMB(0x306a35e51b58721), CNST_LIMB(0x526887dbfb5f788f)},
05531   /* 148 */ {8, 0.1387067714131417, CNST_LIMB(0x331d01712e10000), CNST_LIMB(0x408af3382b8efd3d)},
05532   /* 149 */ {8, 0.1385201075671774, CNST_LIMB(0x35f14200a827c61), CNST_LIMB(0x2fbb374806ec05f1)},
05533   /* 150 */ {8, 0.1383351890281539, CNST_LIMB(0x38e858b62216100), CNST_LIMB(0x1fe7c0f0afce87fe)},
05534   /* 151 */ {8, 0.1381519873525671, CNST_LIMB(0x3c03b2c13176a41), CNST_LIMB(0x11003d517540d32e)},
05535   /* 152 */ {8, 0.1379704747522905, CNST_LIMB(0x3f44c9b21000000), CNST_LIMB(0x2f5810f98eff0dc)},
05536   /* 153 */ {8, 0.1377906240751463, CNST_LIMB(0x42ad23cef3113c1), CNST_LIMB(0xeb72e35e7840d910)},
05537   /* 154 */ {8, 0.1376124087861776, CNST_LIMB(0x463e546b19a2100), CNST_LIMB(0xd27de19593dc3614)},
05538   /* 155 */ {8, 0.1374358029495937, CNST_LIMB(0x49f9fc3f96684e1), CNST_LIMB(0xbaf391fd3e5e6fc2)},
05539   /* 156 */ {8, 0.1372607812113589, CNST_LIMB(0x4de1c9c5dc10000), CNST_LIMB(0xa4bd38c55228c81d)},
05540   /* 157 */ {8, 0.1370873187823978, CNST_LIMB(0x51f77994116d2a1), CNST_LIMB(0x8fc5a8de8e1de782)},
05541   /* 158 */ {8, 0.1369153914223921, CNST_LIMB(0x563cd6bb3398100), CNST_LIMB(0x7bf9265bea9d3a3b)},
05542   /* 159 */ {8, 0.1367449754241439, CNST_LIMB(0x5ab3bb270beeb01), CNST_LIMB(0x69454b325983dccd)},
05543   /* 160 */ {8, 0.1365760475984821, CNST_LIMB(0x5f5e10000000000), CNST_LIMB(0x5798ee2308c39df9)},
05544   /* 161 */ {8, 0.1364085852596902, CNST_LIMB(0x643dce0ec16f501), CNST_LIMB(0x46e40ba0fa66a753)},
05545   /* 162 */ {8, 0.1362425662114337, CNST_LIMB(0x6954fe21e3e8100), CNST_LIMB(0x3717b0870b0db3a7)},
05546   /* 163 */ {8, 0.1360779687331669, CNST_LIMB(0x6ea5b9755f440a1), CNST_LIMB(0x2825e6775d11cdeb)},
05547   /* 164 */ {8, 0.1359147715670014, CNST_LIMB(0x74322a1c0410000), CNST_LIMB(0x1a01a1c09d1b4dac)},
05548   /* 165 */ {8, 0.1357529539050150, CNST_LIMB(0x79fc8b6ae8a46e1), CNST_LIMB(0xc9eb0a8bebc8f3e)},
05549   /* 166 */ {8, 0.1355924953769863, CNST_LIMB(0x80072a66d512100), CNST_LIMB(0xffe357ff59e6a004)},
05550   /* 167 */ {8, 0.1354333760385373, CNST_LIMB(0x86546633b42b9c1), CNST_LIMB(0xe7dfd1be05fa61a8)},
05551   /* 168 */ {8, 0.1352755763596663, CNST_LIMB(0x8ce6b0861000000), CNST_LIMB(0xd11ed6fc78f760e5)},
05552   /* 169 */ {8, 0.1351190772136599, CNST_LIMB(0x93c08e16a022441), CNST_LIMB(0xbb8db609dd29ebfe)},
05553   /* 170 */ {8, 0.1349638598663645, CNST_LIMB(0x9ae49717f026100), CNST_LIMB(0xa71aec8d1813d532)},
05554   /* 171 */ {8, 0.1348099059658079, CNST_LIMB(0xa25577ae24c1a61), CNST_LIMB(0x93b612a9f20fbc02)},
05555   /* 172 */ {8, 0.1346571975321549, CNST_LIMB(0xaa15f068e610000), CNST_LIMB(0x814fc7b19a67d317)},
05556   /* 173 */ {8, 0.1345057169479844, CNST_LIMB(0xb228d6bf7577921), CNST_LIMB(0x6fd9a03f2e0a4b7c)},
05557   /* 174 */ {8, 0.1343554469488779, CNST_LIMB(0xba91158ef5c4100), CNST_LIMB(0x5f4615a38d0d316e)},
05558   /* 175 */ {8, 0.1342063706143054, CNST_LIMB(0xc351ad9aec0b681), CNST_LIMB(0x4f8876863479a286)},
05559   /* 176 */ {8, 0.1340584713587980, CNST_LIMB(0xcc6db6100000000), CNST_LIMB(0x4094d8a3041b60eb)},
05560   /* 177 */ {8, 0.1339117329233981, CNST_LIMB(0xd5e85d09025c181), CNST_LIMB(0x32600b8ed883a09b)},
05561   /* 178 */ {8, 0.1337661393673756, CNST_LIMB(0xdfc4e816401c100), CNST_LIMB(0x24df8c6eb4b6d1f1)},
05562   /* 179 */ {8, 0.1336216750601996, CNST_LIMB(0xea06b4c72947221), CNST_LIMB(0x18097a8ee151acef)},
05563   /* 180 */ {8, 0.1334783246737591, CNST_LIMB(0xf4b139365210000), CNST_LIMB(0xbd48cc8ec1cd8e3)},
05564   /* 181 */ {8, 0.1333360731748201, CNST_LIMB(0xffc80497d520961), CNST_LIMB(0x3807a8d67485fb)},
05565   /* 182 */ {8, 0.1331949058177136, CNST_LIMB(0x10b4ebfca1dee100), CNST_LIMB(0xea5768860b62e8d8)},
05566   /* 183 */ {8, 0.1330548081372441, CNST_LIMB(0x117492de921fc141), CNST_LIMB(0xd54faf5b635c5005)},
05567   /* 184 */ {8, 0.1329157659418126, CNST_LIMB(0x123bb2ce41000000), CNST_LIMB(0xc14a56233a377926)},
05568   /* 185 */ {8, 0.1327777653067443, CNST_LIMB(0x130a8b6157bdecc1), CNST_LIMB(0xae39a88db7cd329f)},
05569   /* 186 */ {8, 0.1326407925678156, CNST_LIMB(0x13e15dede0e8a100), CNST_LIMB(0x9c10bde69efa7ab6)},
05570   /* 187 */ {8, 0.1325048343149731, CNST_LIMB(0x14c06d941c0ca7e1), CNST_LIMB(0x8ac36c42a2836497)},
05571   /* 188 */ {8, 0.1323698773862368, CNST_LIMB(0x15a7ff487a810000), CNST_LIMB(0x7a463c8b84f5ef67)},
05572   /* 189 */ {8, 0.1322359088617821, CNST_LIMB(0x169859ddc5c697a1), CNST_LIMB(0x6a8e5f5ad090fd4b)},
05573   /* 190 */ {8, 0.1321029160581950, CNST_LIMB(0x1791c60f6fed0100), CNST_LIMB(0x5b91a2943596fc56)},
05574   /* 191 */ {8, 0.1319708865228925, CNST_LIMB(0x18948e8c0e6fba01), CNST_LIMB(0x4d4667b1c468e8f0)},
05575   /* 192 */ {8, 0.1318398080287045, CNST_LIMB(0x19a1000000000000), CNST_LIMB(0x3fa39ab547994daf)},
05576   /* 193 */ {8, 0.1317096685686114, CNST_LIMB(0x1ab769203dafc601), CNST_LIMB(0x32a0a9b2faee1e2a)},
05577   /* 194 */ {8, 0.1315804563506306, CNST_LIMB(0x1bd81ab557f30100), CNST_LIMB(0x26357ceac0e96962)},
05578   /* 195 */ {8, 0.1314521597928493, CNST_LIMB(0x1d0367a69fed1ba1), CNST_LIMB(0x1a5a6f65caa5859e)},
05579   /* 196 */ {8, 0.1313247675185968, CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86)},
05580   /* 197 */ {8, 0.1311982683517524, CNST_LIMB(0x1f7b2a18f29ac3e1), CNST_LIMB(0x4383340615612ca)},
05581   /* 198 */ {8, 0.1310726513121843, CNST_LIMB(0x20c850694c2aa100), CNST_LIMB(0xf3c77969ee4be5a2)},
05582   /* 199 */ {8, 0.1309479056113158, CNST_LIMB(0x222173cc014980c1), CNST_LIMB(0xe00993cc187c5ec9)},
05583   /* 200 */ {8, 0.1308240206478128, CNST_LIMB(0x2386f26fc1000000), CNST_LIMB(0xcd2b297d889bc2b6)},
05584   /* 201 */ {8, 0.1307009860033912, CNST_LIMB(0x24f92ce8af296d41), CNST_LIMB(0xbb214d5064862b22)},
05585   /* 202 */ {8, 0.1305787914387386, CNST_LIMB(0x2678863cd0ece100), CNST_LIMB(0xa9e1a7ca7ea10e20)},
05586   /* 203 */ {8, 0.1304574268895465, CNST_LIMB(0x280563f0a9472d61), CNST_LIMB(0x99626e72b39ea0cf)},
05587   /* 204 */ {8, 0.1303368824626505, CNST_LIMB(0x29a02e1406210000), CNST_LIMB(0x899a5ba9c13fafd9)},
05588   /* 205 */ {8, 0.1302171484322746, CNST_LIMB(0x2b494f4efe6d2e21), CNST_LIMB(0x7a80a705391e96ff)},
05589   /* 206 */ {8, 0.1300982152363760, CNST_LIMB(0x2d0134ef21cbc100), CNST_LIMB(0x6c0cfe23de23042a)},
05590   /* 207 */ {8, 0.1299800734730872, CNST_LIMB(0x2ec84ef4da2ef581), CNST_LIMB(0x5e377df359c944dd)},
05591   /* 208 */ {8, 0.1298627138972530, CNST_LIMB(0x309f102100000000), CNST_LIMB(0x50f8ac5fc8f53985)},
05592   /* 209 */ {8, 0.1297461274170591, CNST_LIMB(0x3285ee02a1420281), CNST_LIMB(0x44497266278e35b7)},
05593   /* 210 */ {8, 0.1296303050907487, CNST_LIMB(0x347d6104fc324100), CNST_LIMB(0x382316831f7ee175)},
05594   /* 211 */ {8, 0.1295152381234257, CNST_LIMB(0x3685e47dade53d21), CNST_LIMB(0x2c7f377833b8946e)},
05595   /* 212 */ {8, 0.1294009178639407, CNST_LIMB(0x389ff6bb15610000), CNST_LIMB(0x2157c761ab4163ef)},
05596   /* 213 */ {8, 0.1292873358018581, CNST_LIMB(0x3acc1912ebb57661), CNST_LIMB(0x16a7071803cc49a9)},
05597   /* 214 */ {8, 0.1291744835645007, CNST_LIMB(0x3d0acff111946100), CNST_LIMB(0xc6781d80f8224fc)},
05598   /* 215 */ {8, 0.1290623529140715, CNST_LIMB(0x3f5ca2e692eaf841), CNST_LIMB(0x294092d370a900b)},
05599   /* 216 */ {8, 0.1289509357448472, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)},
05600   /* 217 */ {8, 0.1288402240804449, CNST_LIMB(0x443bcb714399a5c1), CNST_LIMB(0xe03b98f103fad6d2)},
05601   /* 218 */ {8, 0.1287302100711567, CNST_LIMB(0x46ca406c81af2100), CNST_LIMB(0xcee3d32cad2a9049)},
05602   /* 219 */ {8, 0.1286208859913518, CNST_LIMB(0x496e106ac22aaae1), CNST_LIMB(0xbe3f9df9277fdada)},
05603   /* 220 */ {8, 0.1285122442369443, CNST_LIMB(0x4c27d39fa5410000), CNST_LIMB(0xae46f0d94c05e933)},
05604   /* 221 */ {8, 0.1284042773229231, CNST_LIMB(0x4ef825c296e43ca1), CNST_LIMB(0x9ef2280fb437a33d)},
05605   /* 222 */ {8, 0.1282969778809442, CNST_LIMB(0x51dfa61f5ad88100), CNST_LIMB(0x9039ff426d3f284b)},
05606   /* 223 */ {8, 0.1281903386569819, CNST_LIMB(0x54def7a6d2f16901), CNST_LIMB(0x82178c6d6b51f8f4)},
05607   /* 224 */ {8, 0.1280843525090381, CNST_LIMB(0x57f6c10000000000), CNST_LIMB(0x74843b1ee4c1e053)},
05608   /* 225 */ {8, 0.1279790124049077, CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48)},
05609   /* 226 */ {8, 0.1278743114199984, CNST_LIMB(0x5e7268b9bbdf8100), CNST_LIMB(0x5af23c74f9ad9fe9)},
05610   /* 227 */ {8, 0.1277702427352035, CNST_LIMB(0x61d7a7932ff3d6a1), CNST_LIMB(0x4ee7eae2acdc617e)},
05611   /* 228 */ {8, 0.1276667996348261, CNST_LIMB(0x65581f53c8c10000), CNST_LIMB(0x43556aa2ac262a0b)},
05612   /* 229 */ {8, 0.1275639755045533, CNST_LIMB(0x68f48a385b8320e1), CNST_LIMB(0x3835949593b8ddd1)},
05613   /* 230 */ {8, 0.1274617638294791, CNST_LIMB(0x6cada69ed07c2100), CNST_LIMB(0x2d837fbe78458762)},
05614   /* 231 */ {8, 0.1273601581921741, CNST_LIMB(0x70843718cdbf27c1), CNST_LIMB(0x233a7e150a54a555)},
05615   /* 232 */ {8, 0.1272591522708010, CNST_LIMB(0x7479027ea1000000), CNST_LIMB(0x19561984a50ff8fe)},
05616   /* 233 */ {8, 0.1271587398372755, CNST_LIMB(0x788cd40268f39641), CNST_LIMB(0xfd211159fe3490f)},
05617   /* 234 */ {8, 0.1270589147554692, CNST_LIMB(0x7cc07b437ecf6100), CNST_LIMB(0x6aa563e655033e3)},
05618   /* 235 */ {8, 0.1269596709794558, CNST_LIMB(0x8114cc6220762061), CNST_LIMB(0xfbb614b3f2d3b14c)},
05619   /* 236 */ {8, 0.1268610025517973, CNST_LIMB(0x858aa0135be10000), CNST_LIMB(0xeac0f8837fb05773)},
05620   /* 237 */ {8, 0.1267629036018709, CNST_LIMB(0x8a22d3b53c54c321), CNST_LIMB(0xda6e4c10e8615ca5)},
05621   /* 238 */ {8, 0.1266653683442337, CNST_LIMB(0x8ede496339f34100), CNST_LIMB(0xcab755a8d01fa67f)},
05622   /* 239 */ {8, 0.1265683910770258, CNST_LIMB(0x93bde80aec3a1481), CNST_LIMB(0xbb95a9ae71aa3e0c)},
05623   /* 240 */ {8, 0.1264719661804097, CNST_LIMB(0x98c29b8100000000), CNST_LIMB(0xad0326c296b4f529)},
05624   /* 241 */ {8, 0.1263760881150453, CNST_LIMB(0x9ded549671832381), CNST_LIMB(0x9ef9f21eed31b7c1)},
05625   /* 242 */ {8, 0.1262807514205999, CNST_LIMB(0xa33f092e0b1ac100), CNST_LIMB(0x91747422be14b0b2)},
05626   /* 243 */ {8, 0.1261859507142915, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
05627   /* 244 */ {8, 0.1260916806894653, CNST_LIMB(0xae5b564ac3a10000), CNST_LIMB(0x77df79e9a96c06f6)},
05628   /* 245 */ {8, 0.1259979361142023, CNST_LIMB(0xb427f4b3be74c361), CNST_LIMB(0x6bc6019636c7d0c2)},
05629   /* 246 */ {8, 0.1259047118299582, CNST_LIMB(0xba1f9a938041e100), CNST_LIMB(0x601c4205aebd9e47)},
05630   /* 247 */ {8, 0.1258120027502338, CNST_LIMB(0xc0435871d1110f41), CNST_LIMB(0x54ddc59756f05016)},
05631   /* 248 */ {8, 0.1257198038592741, CNST_LIMB(0xc694446f01000000), CNST_LIMB(0x4a0648979c838c18)},
05632   /* 249 */ {8, 0.1256281102107963, CNST_LIMB(0xcd137a5b57ac3ec1), CNST_LIMB(0x3f91b6e0bb3a053d)},
05633   /* 250 */ {8, 0.1255369169267456, CNST_LIMB(0xd3c21bcecceda100), CNST_LIMB(0x357c299a88ea76a5)},
05634   /* 251 */ {8, 0.1254462191960791, CNST_LIMB(0xdaa150410b788de1), CNST_LIMB(0x2bc1e517aecc56e3)},
05635   /* 252 */ {8, 0.1253560122735751, CNST_LIMB(0xe1b24521be010000), CNST_LIMB(0x225f56ceb3da9f5d)},
05636   /* 253 */ {8, 0.1252662914786691, CNST_LIMB(0xe8f62df12777c1a1), CNST_LIMB(0x1951136d53ad63ac)},
05637   /* 254 */ {8, 0.1251770521943144, CNST_LIMB(0xf06e445906fc0100), CNST_LIMB(0x1093d504b3cd7d93)},
05638   /* 255 */ {8, 0.1250882898658681, CNST_LIMB(0xf81bc845c81bf801), CNST_LIMB(0x824794d1ec1814f)},
05639 };
05640 #endif
05641 
05642 static int
05643 #if __STDC__
05644 __gmp_assert_fail (const char *filename, int linenum,
05645                    const char *expr)
05646 #else
05647 __gmp_assert_fail (filename, linenum, expr)
05648 char *filename;
05649 int  linenum;
05650 char *expr;
05651 #endif
05652 {
05653 #if 0
05654   if (filename != NULL && filename[0] != '\0')
05655     {
05656       fprintf (stderr, "%s:", filename);
05657       if (linenum != -1)
05658         fprintf (stderr, "%d: ", linenum);
05659     }
05660 
05661   fprintf (stderr, "GNU MP assertion failed: %s\n", expr);
05662   abort();
05663 #endif
05664 
05665   /*NOTREACHED*/
05666   return 0;
05667 }
05668 
05669 /* __clz_tab -- support for longlong.h */
05670 
05671 const
05672 unsigned char __clz_tab[] =
05673 {
05674   0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
05675   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
05676   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
05677   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
05678   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
05679   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
05680   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
05681   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
05682 };
05683 
05684 
05685 /****************************************/
05686 
05687 #ifndef HAVE_ALLOCA
05688 
05689 typedef struct tmp_stack tmp_stack;
05690 
05691 static unsigned long max_total_allocation = 0;
05692 static unsigned long current_total_allocation = 0;
05693 
05694 static tmp_stack xxx = {&xxx, &xxx, 0};
05695 static tmp_stack *current = &xxx;
05696 
05697 /* The rounded size of the header of each allocation block.  */
05698 #define HSIZ ((sizeof (tmp_stack) + __TMP_ALIGN - 1) & -__TMP_ALIGN)
05699 
05700 /* Allocate a block of exactly <size> bytes.  This should only be called
05701    through the TMP_ALLOC macro, which takes care of rounding/alignment.  */
05702 void *
05703 #if __STDC__
05704 __gmp_tmp_alloc (unsigned long size)
05705 #else
05706 __gmp_tmp_alloc (size)
05707      unsigned long size;
05708 #endif
05709 {
05710   void *that;
05711 
05712   if (size > (char *) current->end - (char *) current->alloc_point)
05713     {
05714       void *chunk;
05715       tmp_stack *header;
05716       unsigned long chunk_size;
05717       unsigned long now;
05718 
05719       /* Allocate a chunk that makes the total current allocation somewhat
05720         larger than the maximum allocation ever.  If size is very large, we
05721         allocate that much.  */
05722 
05723       now = current_total_allocation + size;
05724       if (now > max_total_allocation)
05725        {
05726          /* We need more temporary memory than ever before.  Increase
05727             for future needs.  */
05728          now = now * 3 / 2;
05729          chunk_size = now - current_total_allocation + HSIZ;
05730          current_total_allocation = now;
05731          max_total_allocation = current_total_allocation;
05732        }
05733       else
05734        {
05735          chunk_size = max_total_allocation - current_total_allocation + HSIZ;
05736          current_total_allocation = max_total_allocation;
05737        }
05738 
05739       chunk = MALLOC (chunk_size);
05740       header = (tmp_stack *) chunk;
05741       header->end = (char *) chunk + chunk_size;
05742       header->alloc_point = (char *) chunk + HSIZ;
05743       header->prev = current;
05744       current = header;
05745     }
05746 
05747   that = current->alloc_point;
05748   current->alloc_point = (char *) that + size;
05749   return that;
05750 }
05751 
05752 /* Typically called at function entry.  <mark> is assigned so that
05753    __gmp_tmp_free can later be used to reclaim all subsequently allocated
05754    storage.  */
05755 void
05756 #if __STDC__
05757 __gmp_tmp_mark (tmp_marker *mark)
05758 #else
05759 __gmp_tmp_mark (mark)
05760      tmp_marker *mark;
05761 #endif
05762 {
05763   mark->which_chunk = current;
05764   mark->alloc_point = current->alloc_point;
05765 }
05766 
05767 /* Free everything allocated since <mark> was assigned by __gmp_tmp_mark */
05768 void
05769 #if __STDC__
05770 __gmp_tmp_free (tmp_marker *mark)
05771 #else
05772 __gmp_tmp_free (mark)
05773      tmp_marker *mark;
05774 #endif
05775 {
05776   while (mark->which_chunk != current)
05777     {
05778       tmp_stack *tmp;
05779 
05780       tmp = current;
05781       current = tmp->prev;
05782       current_total_allocation -= (((char *) (tmp->end) - (char *) tmp) - HSIZ);
05783       FREE (tmp, (char *) tmp->end - (char *) tmp);
05784     }
05785   current->alloc_point = mark->alloc_point;
05786 }
05787 
05788 void scheme_gmp_tls_init(long *s) 
05789 {
05790   s[0] = 0;
05791   s[1] = 0;
05792   s[2] = (long)&xxx;
05793   ((tmp_marker *)(s + 3))->which_chunk = &xxx;
05794   ((tmp_marker *)(s + 3))->alloc_point = &xxx;
05795 }
05796 
05797 void *scheme_gmp_tls_load(long *s) 
05798 {
05799   s[0] = (long)current_total_allocation;
05800   s[1] = (long)max_total_allocation;
05801   s[2] = (long)current;
05802   return mem_pool;
05803 }
05804 
05805 void scheme_gmp_tls_unload(long *s, void *data)
05806 {
05807   current_total_allocation = (unsigned long)s[0];
05808   max_total_allocation = (unsigned long)s[1];
05809   current = (tmp_stack *)s[2];
05810   s[0] = 0;
05811   mem_pool = data;
05812 }
05813 
05814 void scheme_gmp_tls_snapshot(long *s, long *save)
05815 {
05816   save[0] = s[3];
05817   save[1] = s[4];
05818   __gmp_tmp_mark((tmp_marker *)(s + 3));
05819 }
05820 
05821 void scheme_gmp_tls_restore_snapshot(long *s, void *data, long *save, int do_free)
05822 {
05823   long other[6];
05824   void *other_data;
05825 
05826   if (do_free == 2) {
05827     other_data = scheme_gmp_tls_load(other);
05828     scheme_gmp_tls_unload(s, data);
05829   } else
05830     other_data = NULL;
05831 
05832   if (do_free)
05833     __gmp_tmp_free((tmp_marker *)(s + 3));  
05834 
05835   if (save) {
05836     s[3] = save[0];
05837     s[4] = save[1];
05838     
05839   }
05840 
05841   if (do_free == 2) {
05842     data = scheme_gmp_tls_load(s);
05843     scheme_gmp_tls_unload(other, other_data);
05844   }
05845 }
05846 
05847 #else
05848 
05849 void scheme_gmp_tls_init(long *s) 
05850 {
05851 }
05852 
05853 void scheme_gmp_tls_load(long *s) 
05854 {
05855 }
05856 
05857 void scheme_gmp_tls_unload(long *s)
05858 {
05859 }
05860 
05861 #endif