Back to index

glibc  2.9
mul_n.c
Go to the documentation of this file.
00001 /* mpn_mul_n -- Multiply two natural numbers of length n.
00002 
00003 Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
00004 
00005 This file is part of the GNU MP Library.
00006 
00007 The GNU MP Library is free software; you can redistribute it and/or modify
00008 it under the terms of the GNU Lesser General Public License as published by
00009 the Free Software Foundation; either version 2.1 of the License, or (at your
00010 option) any later version.
00011 
00012 The GNU MP Library is distributed in the hope that it will be useful, but
00013 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
00014 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
00015 License for more details.
00016 
00017 You should have received a copy of the GNU Lesser General Public License
00018 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
00019 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00020 MA 02111-1307, USA. */
00021 
00022 #include <gmp.h>
00023 #include "gmp-impl.h"
00024 
00025 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
00026    both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
00027    always stored.  Return the most significant limb.
00028 
00029    Argument constraints:
00030    1. PRODP != UP and PRODP != VP, i.e. the destination
00031       must be distinct from the multiplier and the multiplicand.  */
00032 
00033 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
00034    value which is good on most machines.  */
00035 #ifndef KARATSUBA_THRESHOLD
00036 #define KARATSUBA_THRESHOLD 32
00037 #endif
00038 
00039 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
00040 #if KARATSUBA_THRESHOLD < 2
00041 #undef KARATSUBA_THRESHOLD
00042 #define KARATSUBA_THRESHOLD 2
00043 #endif
00044 
00045 /* Handle simple cases with traditional multiplication.
00046 
00047    This is the most critical code of multiplication.  All multiplies rely
00048    on this, both small and huge.  Small ones arrive here immediately.  Huge
00049    ones arrive here as this is the base case for Karatsuba's recursive
00050    algorithm below.  */
00051 
00052 void
00053 #if __STDC__
00054 impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
00055 #else
00056 impn_mul_n_basecase (prodp, up, vp, size)
00057      mp_ptr prodp;
00058      mp_srcptr up;
00059      mp_srcptr vp;
00060      mp_size_t size;
00061 #endif
00062 {
00063   mp_size_t i;
00064   mp_limb_t cy_limb;
00065   mp_limb_t v_limb;
00066 
00067   /* Multiply by the first limb in V separately, as the result can be
00068      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
00069   v_limb = vp[0];
00070   if (v_limb <= 1)
00071     {
00072       if (v_limb == 1)
00073        MPN_COPY (prodp, up, size);
00074       else
00075        MPN_ZERO (prodp, size);
00076       cy_limb = 0;
00077     }
00078   else
00079     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
00080 
00081   prodp[size] = cy_limb;
00082   prodp++;
00083 
00084   /* For each iteration in the outer loop, multiply one limb from
00085      U with one limb from V, and add it to PROD.  */
00086   for (i = 1; i < size; i++)
00087     {
00088       v_limb = vp[i];
00089       if (v_limb <= 1)
00090        {
00091          cy_limb = 0;
00092          if (v_limb == 1)
00093            cy_limb = mpn_add_n (prodp, prodp, up, size);
00094        }
00095       else
00096        cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
00097 
00098       prodp[size] = cy_limb;
00099       prodp++;
00100     }
00101 }
00102 
00103 void
00104 #if __STDC__
00105 impn_mul_n (mp_ptr prodp,
00106             mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
00107 #else
00108 impn_mul_n (prodp, up, vp, size, tspace)
00109      mp_ptr prodp;
00110      mp_srcptr up;
00111      mp_srcptr vp;
00112      mp_size_t size;
00113      mp_ptr tspace;
00114 #endif
00115 {
00116   if ((size & 1) != 0)
00117     {
00118       /* The size is odd, the code code below doesn't handle that.
00119         Multiply the least significant (size - 1) limbs with a recursive
00120         call, and handle the most significant limb of S1 and S2
00121         separately.  */
00122       /* A slightly faster way to do this would be to make the Karatsuba
00123         code below behave as if the size were even, and let it check for
00124         odd size in the end.  I.e., in essence move this code to the end.
00125         Doing so would save us a recursive call, and potentially make the
00126         stack grow a lot less.  */
00127 
00128       mp_size_t esize = size - 1;  /* even size */
00129       mp_limb_t cy_limb;
00130 
00131       MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
00132       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
00133       prodp[esize + esize] = cy_limb;
00134       cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
00135 
00136       prodp[esize + size] = cy_limb;
00137     }
00138   else
00139     {
00140       /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
00141 
00142         Split U in two pieces, U1 and U0, such that
00143         U = U0 + U1*(B**n),
00144         and V in V1 and V0, such that
00145         V = V0 + V1*(B**n).
00146 
00147         UV is then computed recursively using the identity
00148 
00149               2n   n          n                     n
00150         UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
00151                      1 1        1  0   0  1              0 0
00152 
00153         Where B = 2**BITS_PER_MP_LIMB.  */
00154 
00155       mp_size_t hsize = size >> 1;
00156       mp_limb_t cy;
00157       int negflg;
00158 
00159       /*** Product H.        ________________  ________________
00160                      |_____U1 x V1____||____U0 x V0_____|  */
00161       /* Put result in upper part of PROD and pass low part of TSPACE
00162         as new TSPACE.  */
00163       MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
00164 
00165       /*** Product M.        ________________
00166                      |_(U1-U0)(V0-V1)_|  */
00167       if (mpn_cmp (up + hsize, up, hsize) >= 0)
00168        {
00169          mpn_sub_n (prodp, up + hsize, up, hsize);
00170          negflg = 0;
00171        }
00172       else
00173        {
00174          mpn_sub_n (prodp, up, up + hsize, hsize);
00175          negflg = 1;
00176        }
00177       if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
00178        {
00179          mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
00180          negflg ^= 1;
00181        }
00182       else
00183        {
00184          mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
00185          /* No change of NEGFLG.  */
00186        }
00187       /* Read temporary operands from low part of PROD.
00188         Put result in low part of TSPACE using upper part of TSPACE
00189         as new TSPACE.  */
00190       MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
00191 
00192       /*** Add/copy product H.  */
00193       MPN_COPY (prodp + hsize, prodp + size, hsize);
00194       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
00195 
00196       /*** Add product M (if NEGFLG M is a negative number).  */
00197       if (negflg)
00198        cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
00199       else
00200        cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
00201 
00202       /*** Product L.        ________________  ________________
00203                      |________________||____U0 x V0_____|  */
00204       /* Read temporary operands from low part of PROD.
00205         Put result in low part of TSPACE using upper part of TSPACE
00206         as new TSPACE.  */
00207       MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
00208 
00209       /*** Add/copy Product L (twice).  */
00210 
00211       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
00212       if (cy)
00213        mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
00214 
00215       MPN_COPY (prodp, tspace, hsize);
00216       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
00217       if (cy)
00218        mpn_add_1 (prodp + size, prodp + size, size, 1);
00219     }
00220 }
00221 
00222 void
00223 #if __STDC__
00224 impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
00225 #else
00226 impn_sqr_n_basecase (prodp, up, size)
00227      mp_ptr prodp;
00228      mp_srcptr up;
00229      mp_size_t size;
00230 #endif
00231 {
00232   mp_size_t i;
00233   mp_limb_t cy_limb;
00234   mp_limb_t v_limb;
00235 
00236   /* Multiply by the first limb in V separately, as the result can be
00237      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
00238   v_limb = up[0];
00239   if (v_limb <= 1)
00240     {
00241       if (v_limb == 1)
00242        MPN_COPY (prodp, up, size);
00243       else
00244        MPN_ZERO (prodp, size);
00245       cy_limb = 0;
00246     }
00247   else
00248     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
00249 
00250   prodp[size] = cy_limb;
00251   prodp++;
00252 
00253   /* For each iteration in the outer loop, multiply one limb from
00254      U with one limb from V, and add it to PROD.  */
00255   for (i = 1; i < size; i++)
00256     {
00257       v_limb = up[i];
00258       if (v_limb <= 1)
00259        {
00260          cy_limb = 0;
00261          if (v_limb == 1)
00262            cy_limb = mpn_add_n (prodp, prodp, up, size);
00263        }
00264       else
00265        cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
00266 
00267       prodp[size] = cy_limb;
00268       prodp++;
00269     }
00270 }
00271 
00272 void
00273 #if __STDC__
00274 impn_sqr_n (mp_ptr prodp,
00275             mp_srcptr up, mp_size_t size, mp_ptr tspace)
00276 #else
00277 impn_sqr_n (prodp, up, size, tspace)
00278      mp_ptr prodp;
00279      mp_srcptr up;
00280      mp_size_t size;
00281      mp_ptr tspace;
00282 #endif
00283 {
00284   if ((size & 1) != 0)
00285     {
00286       /* The size is odd, the code code below doesn't handle that.
00287         Multiply the least significant (size - 1) limbs with a recursive
00288         call, and handle the most significant limb of S1 and S2
00289         separately.  */
00290       /* A slightly faster way to do this would be to make the Karatsuba
00291         code below behave as if the size were even, and let it check for
00292         odd size in the end.  I.e., in essence move this code to the end.
00293         Doing so would save us a recursive call, and potentially make the
00294         stack grow a lot less.  */
00295 
00296       mp_size_t esize = size - 1;  /* even size */
00297       mp_limb_t cy_limb;
00298 
00299       MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
00300       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
00301       prodp[esize + esize] = cy_limb;
00302       cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
00303 
00304       prodp[esize + size] = cy_limb;
00305     }
00306   else
00307     {
00308       mp_size_t hsize = size >> 1;
00309       mp_limb_t cy;
00310 
00311       /*** Product H.        ________________  ________________
00312                      |_____U1 x U1____||____U0 x U0_____|  */
00313       /* Put result in upper part of PROD and pass low part of TSPACE
00314         as new TSPACE.  */
00315       MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
00316 
00317       /*** Product M.        ________________
00318                      |_(U1-U0)(U0-U1)_|  */
00319       if (mpn_cmp (up + hsize, up, hsize) >= 0)
00320        {
00321          mpn_sub_n (prodp, up + hsize, up, hsize);
00322        }
00323       else
00324        {
00325          mpn_sub_n (prodp, up, up + hsize, hsize);
00326        }
00327 
00328       /* Read temporary operands from low part of PROD.
00329         Put result in low part of TSPACE using upper part of TSPACE
00330         as new TSPACE.  */
00331       MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
00332 
00333       /*** Add/copy product H.  */
00334       MPN_COPY (prodp + hsize, prodp + size, hsize);
00335       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
00336 
00337       /*** Add product M (if NEGFLG M is a negative number).  */
00338       cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
00339 
00340       /*** Product L.        ________________  ________________
00341                      |________________||____U0 x U0_____|  */
00342       /* Read temporary operands from low part of PROD.
00343         Put result in low part of TSPACE using upper part of TSPACE
00344         as new TSPACE.  */
00345       MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
00346 
00347       /*** Add/copy Product L (twice).  */
00348 
00349       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
00350       if (cy)
00351        mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
00352 
00353       MPN_COPY (prodp, tspace, hsize);
00354       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
00355       if (cy)
00356        mpn_add_1 (prodp + size, prodp + size, size, 1);
00357     }
00358 }
00359 
00360 /* This should be made into an inline function in gmp.h.  */
00361 void
00362 #if __STDC__
00363 mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
00364 #else
00365 mpn_mul_n (prodp, up, vp, size)
00366      mp_ptr prodp;
00367      mp_srcptr up;
00368      mp_srcptr vp;
00369      mp_size_t size;
00370 #endif
00371 {
00372   TMP_DECL (marker);
00373   TMP_MARK (marker);
00374   if (up == vp)
00375     {
00376       if (size < KARATSUBA_THRESHOLD)
00377        {
00378          impn_sqr_n_basecase (prodp, up, size);
00379        }
00380       else
00381        {
00382          mp_ptr tspace;
00383          tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
00384          impn_sqr_n (prodp, up, size, tspace);
00385        }
00386     }
00387   else
00388     {
00389       if (size < KARATSUBA_THRESHOLD)
00390        {
00391          impn_mul_n_basecase (prodp, up, vp, size);
00392        }
00393       else
00394        {
00395          mp_ptr tspace;
00396          tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
00397          impn_mul_n (prodp, up, vp, size, tspace);
00398        }
00399     }
00400   TMP_FREE (marker);
00401 }