Back to index

lightning-sunbird  0.9+nobinonly
k_rem_pio2.c
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
00002  *
00003  * ***** BEGIN LICENSE BLOCK *****
00004  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00005  *
00006  * The contents of this file are subject to the Mozilla Public License Version
00007  * 1.1 (the "License"); you may not use this file except in compliance with
00008  * the License. You may obtain a copy of the License at
00009  * http://www.mozilla.org/MPL/
00010  *
00011  * Software distributed under the License is distributed on an "AS IS" basis,
00012  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00013  * for the specific language governing rights and limitations under the
00014  * License.
00015  *
00016  * The Original Code is Mozilla Communicator client code, released
00017  * March 31, 1998.
00018  *
00019  * The Initial Developer of the Original Code is
00020  * Sun Microsystems, Inc.
00021  * Portions created by the Initial Developer are Copyright (C) 1998
00022  * the Initial Developer. All Rights Reserved.
00023  *
00024  * Contributor(s):
00025  *
00026  * Alternatively, the contents of this file may be used under the terms of
00027  * either of the GNU General Public License Version 2 or later (the "GPL"),
00028  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00029  * in which case the provisions of the GPL or the LGPL are applicable instead
00030  * of those above. If you wish to allow use of your version of this file only
00031  * under the terms of either the GPL or the LGPL, and not to allow others to
00032  * use your version of this file under the terms of the MPL, indicate your
00033  * decision by deleting the provisions above and replace them with the notice
00034  * and other provisions required by the GPL or the LGPL. If you do not delete
00035  * the provisions above, a recipient may use your version of this file under
00036  * the terms of any one of the MPL, the GPL or the LGPL.
00037  *
00038  * ***** END LICENSE BLOCK ***** */
00039 
00040 /* @(#)k_rem_pio2.c 1.3 95/01/18 */
00041 /*
00042  * ====================================================
00043  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
00044  *
00045  * Developed at SunSoft, a Sun Microsystems, Inc. business.
00046  * Permission to use, copy, modify, and distribute this
00047  * software is freely granted, provided that this notice 
00048  * is preserved.
00049  * ====================================================
00050  */
00051 
00052 /*
00053  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
00054  * double x[],y[]; int e0,nx,prec; int ipio2[];
00055  * 
00056  * __kernel_rem_pio2 return the last three digits of N with 
00057  *            y = x - N*pi/2
00058  * so that |y| < pi/2.
00059  *
00060  * The method is to compute the integer (mod 8) and fraction parts of 
00061  * (2/pi)*x without doing the full multiplication. In general we
00062  * skip the part of the product that are known to be a huge integer (
00063  * more accurately, = 0 mod 8 ). Thus the number of operations are
00064  * independent of the exponent of the input.
00065  *
00066  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
00067  *
00068  * Input parameters:
00069  *     x[]    The input value (must be positive) is broken into nx 
00070  *            pieces of 24-bit integers in double precision format.
00071  *            x[i] will be the i-th 24 bit of x. The scaled exponent 
00072  *            of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 
00073  *            match x's up to 24 bits.
00074  *
00075  *            Example of breaking a double positive z into x[0]+x[1]+x[2]:
00076  *                   e0 = ilogb(z)-23
00077  *                   z  = scalbn(z,-e0)
00078  *            for i = 0,1,2
00079  *                   x[i] = floor(z)
00080  *                   z    = (z-x[i])*2**24
00081  *
00082  *
00083  *     y[]    ouput result in an array of double precision numbers.
00084  *            The dimension of y[] is:
00085  *                   24-bit  precision    1
00086  *                   53-bit  precision    2
00087  *                   64-bit  precision    2
00088  *                   113-bit precision    3
00089  *            The actual value is the sum of them. Thus for 113-bit
00090  *            precison, one may have to do something like:
00091  *
00092  *            long double t,w,r_head, r_tail;
00093  *            t = (long double)y[2] + (long double)y[1];
00094  *            w = (long double)y[0];
00095  *            r_head = t+w;
00096  *            r_tail = w - (r_head - t);
00097  *
00098  *     e0     The exponent of x[0]
00099  *
00100  *     nx     dimension of x[]
00101  *
00102  *     prec   an integer indicating the precision:
00103  *                   0      24  bits (single)
00104  *                   1      53  bits (double)
00105  *                   2      64  bits (extended)
00106  *                   3      113 bits (quad)
00107  *
00108  *     ipio2[]
00109  *            integer array, contains the (24*i)-th to (24*i+23)-th 
00110  *            bit of 2/pi after binary point. The corresponding 
00111  *            floating value is
00112  *
00113  *                   ipio2[i] * 2^(-24(i+1)).
00114  *
00115  * External function:
00116  *     double scalbn(), floor();
00117  *
00118  *
00119  * Here is the description of some local variables:
00120  *
00121  *     jk     jk+1 is the initial number of terms of ipio2[] needed
00122  *            in the computation. The recommended value is 2,3,4,
00123  *            6 for single, double, extended,and quad.
00124  *
00125  *     jz     local integer variable indicating the number of 
00126  *            terms of ipio2[] used. 
00127  *
00128  *     jx     nx - 1
00129  *
00130  *     jv     index for pointing to the suitable ipio2[] for the
00131  *            computation. In general, we want
00132  *                   ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
00133  *            is an integer. Thus
00134  *                   e0-3-24*jv >= 0 or (e0-3)/24 >= jv
00135  *            Hence jv = max(0,(e0-3)/24).
00136  *
00137  *     jp     jp+1 is the number of terms in PIo2[] needed, jp = jk.
00138  *
00139  *     q[]    double array with integral value, representing the
00140  *            24-bits chunk of the product of x and 2/pi.
00141  *
00142  *     q0     the corresponding exponent of q[0]. Note that the
00143  *            exponent for q[i] would be q0-24*i.
00144  *
00145  *     PIo2[] double precision array, obtained by cutting pi/2
00146  *            into 24 bits chunks. 
00147  *
00148  *     f[]    ipio2[] in floating point 
00149  *
00150  *     iq[]   integer array by breaking up q[] in 24-bits chunk.
00151  *
00152  *     fq[]   final product of x*(2/pi) in fq[0],..,fq[jk]
00153  *
00154  *     ih     integer. If >0 it indicates q[] is >= 0.5, hence
00155  *            it also indicates the *sign* of the result.
00156  *
00157  */
00158 
00159 
00160 /*
00161  * Constants:
00162  * The hexadecimal values are the intended ones for the following 
00163  * constants. The decimal values may be used, provided that the 
00164  * compiler will convert from decimal to binary accurately enough 
00165  * to produce the hexadecimal values shown.
00166  */
00167 
00168 #include "fdlibm.h"
00169 
00170 #ifdef __STDC__
00171 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
00172 #else
00173 static int init_jk[] = {2,3,4,6}; 
00174 #endif
00175 
00176 #ifdef __STDC__
00177 static const double PIo2[] = {
00178 #else
00179 static double PIo2[] = {
00180 #endif
00181   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
00182   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
00183   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
00184   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
00185   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
00186   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
00187   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
00188   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
00189 };
00190 
00191 #ifdef __STDC__
00192 static const double                
00193 #else
00194 static double               
00195 #endif
00196 zero   = 0.0,
00197 one    = 1.0,
00198 two24   =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
00199 twon24  =  5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
00200 
00201 #ifdef __STDC__
00202        int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) 
00203 #else
00204        int __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)      
00205        double x[], y[]; int e0,nx,prec; int ipio2[];
00206 #endif
00207 {
00208        int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
00209        double z,fw,f[20],fq[20],q[20];
00210 
00211     /* initialize jk*/
00212        jk = init_jk[prec];
00213        jp = jk;
00214 
00215     /* determine jx,jv,q0, note that 3>q0 */
00216        jx =  nx-1;
00217        jv = (e0-3)/24; if(jv<0) jv=0;
00218        q0 =  e0-24*(jv+1);
00219 
00220     /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
00221        j = jv-jx; m = jx+jk;
00222        for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (double) ipio2[j];
00223 
00224     /* compute q[0],q[1],...q[jk] */
00225        for (i=0;i<=jk;i++) {
00226            for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
00227        }
00228 
00229        jz = jk;
00230 recompute:
00231     /* distill q[] into iq[] reversingly */
00232        for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
00233            fw    =  (double)((int)(twon24* z));
00234            iq[i] =  (int)(z-two24*fw);
00235            z     =  q[j-1]+fw;
00236        }
00237 
00238     /* compute n */
00239        z  = fd_scalbn(z,q0);              /* actual value of z */
00240        z -= 8.0*fd_floor(z*0.125);        /* trim off integer >= 8 */
00241        n  = (int) z;
00242        z -= (double)n;
00243        ih = 0;
00244        if(q0>0) {    /* need iq[jz-1] to determine n */
00245            i  = (iq[jz-1]>>(24-q0)); n += i;
00246            iq[jz-1] -= i<<(24-q0);
00247            ih = iq[jz-1]>>(23-q0);
00248        } 
00249        else if(q0==0) ih = iq[jz-1]>>23;
00250        else if(z>=0.5) ih=2;
00251 
00252        if(ih>0) {    /* q > 0.5 */
00253            n += 1; carry = 0;
00254            for(i=0;i<jz ;i++) {    /* compute 1-q */
00255               j = iq[i];
00256               if(carry==0) {
00257                   if(j!=0) {
00258                      carry = 1; iq[i] = 0x1000000- j;
00259                   }
00260               } else  iq[i] = 0xffffff - j;
00261            }
00262            if(q0>0) {              /* rare case: chance is 1 in 12 */
00263                switch(q0) {
00264                case 1:
00265                  iq[jz-1] &= 0x7fffff; break;
00266               case 2:
00267                  iq[jz-1] &= 0x3fffff; break;
00268                }
00269            }
00270            if(ih==2) {
00271               z = one - z;
00272               if(carry!=0) z -= fd_scalbn(one,q0);
00273            }
00274        }
00275 
00276     /* check if recomputation is needed */
00277        if(z==zero) {
00278            j = 0;
00279            for (i=jz-1;i>=jk;i--) j |= iq[i];
00280            if(j==0) { /* need recomputation */
00281               for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
00282 
00283               for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
00284                   f[jx+i] = (double) ipio2[jv+i];
00285                   for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
00286                   q[i] = fw;
00287               }
00288               jz += k;
00289               goto recompute;
00290            }
00291        }
00292 
00293     /* chop off zero terms */
00294        if(z==0.0) {
00295            jz -= 1; q0 -= 24;
00296            while(iq[jz]==0) { jz--; q0-=24;}
00297        } else { /* break z into 24-bit if necessary */
00298            z = fd_scalbn(z,-q0);
00299            if(z>=two24) { 
00300               fw = (double)((int)(twon24*z));
00301               iq[jz] = (int)(z-two24*fw);
00302               jz += 1; q0 += 24;
00303               iq[jz] = (int) fw;
00304            } else iq[jz] = (int) z ;
00305        }
00306 
00307     /* convert integer "bit" chunk to floating-point value */
00308        fw = fd_scalbn(one,q0);
00309        for(i=jz;i>=0;i--) {
00310            q[i] = fw*(double)iq[i]; fw*=twon24;
00311        }
00312 
00313     /* compute PIo2[0,...,jp]*q[jz,...,0] */
00314        for(i=jz;i>=0;i--) {
00315            for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
00316            fq[jz-i] = fw;
00317        }
00318 
00319     /* compress fq[] into y[] */
00320        switch(prec) {
00321            case 0:
00322               fw = 0.0;
00323               for (i=jz;i>=0;i--) fw += fq[i];
00324               y[0] = (ih==0)? fw: -fw; 
00325               break;
00326            case 1:
00327            case 2:
00328               fw = 0.0;
00329               for (i=jz;i>=0;i--) fw += fq[i]; 
00330               y[0] = (ih==0)? fw: -fw; 
00331               fw = fq[0]-fw;
00332               for (i=1;i<=jz;i++) fw += fq[i];
00333               y[1] = (ih==0)? fw: -fw; 
00334               break;
00335            case 3:   /* painful */
00336               for (i=jz;i>0;i--) {
00337                   fw      = fq[i-1]+fq[i]; 
00338                   fq[i]  += fq[i-1]-fw;
00339                   fq[i-1] = fw;
00340               }
00341               for (i=jz;i>1;i--) {
00342                   fw      = fq[i-1]+fq[i]; 
00343                   fq[i]  += fq[i-1]-fw;
00344                   fq[i-1] = fw;
00345               }
00346               for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; 
00347               if(ih==0) {
00348                   y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
00349               } else {
00350                   y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
00351               }
00352        }
00353        return n&7;
00354 }