Back to index

glibc  2.9
e_powl.c
Go to the documentation of this file.
00001 /*
00002  * ====================================================
00003  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
00004  *
00005  * Developed at SunPro, a Sun Microsystems, Inc. business.
00006  * Permission to use, copy, modify, and distribute this
00007  * software is freely granted, provided that this notice
00008  * is preserved.
00009  * ====================================================
00010  */
00011 
00012 /* Expansions and modifications for 128-bit long double are
00013    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
00014    and are incorporated herein by permission of the author.  The author 
00015    reserves the right to distribute this material elsewhere under different
00016    copying permissions.  These modifications are distributed here under 
00017    the following terms:
00018 
00019     This library is free software; you can redistribute it and/or
00020     modify it under the terms of the GNU Lesser General Public
00021     License as published by the Free Software Foundation; either
00022     version 2.1 of the License, or (at your option) any later version.
00023 
00024     This library is distributed in the hope that it will be useful,
00025     but WITHOUT ANY WARRANTY; without even the implied warranty of
00026     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00027     Lesser General Public License for more details.
00028 
00029     You should have received a copy of the GNU Lesser General Public
00030     License along with this library; if not, write to the Free Software
00031     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA */
00032 
00033 /* __ieee754_powl(x,y) return x**y
00034  *
00035  *                  n
00036  * Method:  Let x =  2   * (1+f)
00037  *     1. Compute and return log2(x) in two pieces:
00038  *            log2(x) = w1 + w2,
00039  *        where w1 has 113-53 = 60 bit trailing zeros.
00040  *     2. Perform y*log2(x) = n+y' by simulating muti-precision
00041  *        arithmetic, where |y'|<=0.5.
00042  *     3. Return x**y = 2**n*exp(y'*log2)
00043  *
00044  * Special cases:
00045  *     1.  (anything) ** 0  is 1
00046  *     2.  (anything) ** 1  is itself
00047  *     3.  (anything) ** NAN is NAN
00048  *     4.  NAN ** (anything except 0) is NAN
00049  *     5.  +-(|x| > 1) **  +INF is +INF
00050  *     6.  +-(|x| > 1) **  -INF is +0
00051  *     7.  +-(|x| < 1) **  +INF is +0
00052  *     8.  +-(|x| < 1) **  -INF is +INF
00053  *     9.  +-1         ** +-INF is NAN
00054  *     10. +0 ** (+anything except 0, NAN)               is +0
00055  *     11. -0 ** (+anything except 0, NAN, odd integer)  is +0
00056  *     12. +0 ** (-anything except 0, NAN)               is +INF
00057  *     13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
00058  *     14. -0 ** (odd integer) = -( +0 ** (odd integer) )
00059  *     15. +INF ** (+anything except 0,NAN) is +INF
00060  *     16. +INF ** (-anything except 0,NAN) is +0
00061  *     17. -INF ** (anything)  = -0 ** (-anything)
00062  *     18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
00063  *     19. (-anything except 0 and inf) ** (non-integer) is NAN
00064  *
00065  */
00066 
00067 #include "math.h"
00068 #include "math_private.h"
00069 
00070 static const long double bp[] = {
00071   1.0L,
00072   1.5L,
00073 };
00074 
00075 /* log_2(1.5) */
00076 static const long double dp_h[] = {
00077   0.0,
00078   5.8496250072115607565592654282227158546448E-1L
00079 };
00080 
00081 /* Low part of log_2(1.5) */
00082 static const long double dp_l[] = {
00083   0.0,
00084   1.0579781240112554492329533686862998106046E-16L
00085 };
00086 
00087 static const long double zero = 0.0L,
00088   one = 1.0L,
00089   two = 2.0L,
00090   two113 = 1.0384593717069655257060992658440192E34L,
00091   huge = 1.0e3000L,
00092   tiny = 1.0e-3000L;
00093 
00094 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
00095    z = (x-1)/(x+1)
00096    1 <= x <= 1.25
00097    Peak relative error 2.3e-37 */
00098 static const long double LN[] =
00099 {
00100  -3.0779177200290054398792536829702930623200E1L,
00101   6.5135778082209159921251824580292116201640E1L,
00102  -4.6312921812152436921591152809994014413540E1L,
00103   1.2510208195629420304615674658258363295208E1L,
00104  -9.9266909031921425609179910128531667336670E-1L
00105 };
00106 static const long double LD[] =
00107 {
00108  -5.129862866715009066465422805058933131960E1L,
00109   1.452015077564081884387441590064272782044E2L,
00110  -1.524043275549860505277434040464085593165E2L,
00111   7.236063513651544224319663428634139768808E1L,
00112  -1.494198912340228235853027849917095580053E1L
00113   /* 1.0E0 */
00114 };
00115 
00116 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
00117    0 <= x <= 0.5
00118    Peak relative error 5.7e-38  */
00119 static const long double PN[] =
00120 {
00121   5.081801691915377692446852383385968225675E8L,
00122   9.360895299872484512023336636427675327355E6L,
00123   4.213701282274196030811629773097579432957E4L,
00124   5.201006511142748908655720086041570288182E1L,
00125   9.088368420359444263703202925095675982530E-3L,
00126 };
00127 static const long double PD[] =
00128 {
00129   3.049081015149226615468111430031590411682E9L,
00130   1.069833887183886839966085436512368982758E8L,
00131   8.259257717868875207333991924545445705394E5L,
00132   1.872583833284143212651746812884298360922E3L,
00133   /* 1.0E0 */
00134 };
00135 
00136 static const long double
00137   /* ln 2 */
00138   lg2 = 6.9314718055994530941723212145817656807550E-1L,
00139   lg2_h = 6.9314718055994528622676398299518041312695E-1L,
00140   lg2_l = 2.3190468138462996154948554638754786504121E-17L,
00141   ovt = 8.0085662595372944372e-0017L,
00142   /* 2/(3*log(2)) */
00143   cp = 9.6179669392597560490661645400126142495110E-1L,
00144   cp_h = 9.6179669392597555432899980587535537779331E-1L,
00145   cp_l = 5.0577616648125906047157785230014751039424E-17L;
00146 
00147 #ifdef __STDC__
00148 long double
00149 __ieee754_powl (long double x, long double y)
00150 #else
00151 long double
00152 __ieee754_powl (x, y)
00153      long double x, y;
00154 #endif
00155 {
00156   long double z, ax, z_h, z_l, p_h, p_l;
00157   long double y1, t1, t2, r, s, t, u, v, w;
00158   long double s2, s_h, s_l, t_h, t_l;
00159   int32_t i, j, k, yisint, n;
00160   u_int32_t ix, iy;
00161   int32_t hx, hy;
00162   ieee854_long_double_shape_type o, p, q;
00163 
00164   p.value = x;
00165   hx = p.parts32.w0;
00166   ix = hx & 0x7fffffff;
00167 
00168   q.value = y;
00169   hy = q.parts32.w0;
00170   iy = hy & 0x7fffffff;
00171 
00172 
00173   /* y==zero: x**0 = 1 */
00174   if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
00175     return one;
00176 
00177   /* 1.0**y = 1; -1.0**+-Inf = 1 */
00178   if (x == one)
00179     return one;
00180   if (x == -1.0L && iy == 0x7fff0000
00181       && (q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
00182     return one;
00183 
00184   /* +-NaN return x+y */
00185   if ((ix > 0x7fff0000)
00186       || ((ix == 0x7fff0000)
00187          && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
00188       || (iy > 0x7fff0000)
00189       || ((iy == 0x7fff0000)
00190          && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
00191     return x + y;
00192 
00193   /* determine if y is an odd int when x < 0
00194    * yisint = 0       ... y is not an integer
00195    * yisint = 1       ... y is an odd int
00196    * yisint = 2       ... y is an even int
00197    */
00198   yisint = 0;
00199   if (hx < 0)
00200     {
00201       if (iy >= 0x40700000) /* 2^113 */
00202        yisint = 2;          /* even integer y */
00203       else if (iy >= 0x3fff0000)   /* 1.0 */
00204        {
00205          if (__floorl (y) == y)
00206            {
00207              z = 0.5 * y;
00208              if (__floorl (z) == z)
00209               yisint = 2;
00210              else
00211               yisint = 1;
00212            }
00213        }
00214     }
00215 
00216   /* special value of y */
00217   if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
00218     {
00219       if (iy == 0x7fff0000) /* y is +-inf */
00220        {
00221          if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
00222              == 0)
00223            return y - y;    /* +-1**inf is NaN */
00224          else if (ix >= 0x3fff0000)       /* (|x|>1)**+-inf = inf,0 */
00225            return (hy >= 0) ? y : zero;
00226          else               /* (|x|<1)**-,+inf = inf,0 */
00227            return (hy < 0) ? -y : zero;
00228        }
00229       if (iy == 0x3fff0000)
00230        {                    /* y is  +-1 */
00231          if (hy < 0)
00232            return one / x;
00233          else
00234            return x;
00235        }
00236       if (hy == 0x40000000)
00237        return x * x;        /* y is  2 */
00238       if (hy == 0x3ffe0000)
00239        {                    /* y is  0.5 */
00240          if (hx >= 0)              /* x >= +0 */
00241            return __ieee754_sqrtl (x);
00242        }
00243     }
00244 
00245   ax = fabsl (x);
00246   /* special value of x */
00247   if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
00248     {
00249       if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
00250        {
00251          z = ax;            /*x is +-0,+-inf,+-1 */
00252          if (hy < 0)
00253            z = one / z;     /* z = (1/|x|) */
00254          if (hx < 0)
00255            {
00256              if (((ix - 0x3fff0000) | yisint) == 0)
00257               {
00258                 z = (z - z) / (z - z);    /* (-1)**non-int is NaN */
00259               }
00260              else if (yisint == 1)
00261               z = -z;              /* (x<0)**odd = -(|x|**odd) */
00262            }
00263          return z;
00264        }
00265     }
00266 
00267   /* (x<0)**(non-int) is NaN */
00268   if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
00269     return (x - x) / (x - x);
00270 
00271   /* |y| is huge.
00272      2^-16495 = 1/2 of smallest representable value.
00273      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
00274   if (iy > 0x401d654b)
00275     {
00276       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
00277       if (iy > 0x407d654b)
00278        {
00279          if (ix <= 0x3ffeffff)
00280            return (hy < 0) ? huge * huge : tiny * tiny;
00281          if (ix >= 0x3fff0000)
00282            return (hy > 0) ? huge * huge : tiny * tiny;
00283        }
00284       /* over/underflow if x is not close to one */
00285       if (ix < 0x3ffeffff)
00286        return (hy < 0) ? huge * huge : tiny * tiny;
00287       if (ix > 0x3fff0000)
00288        return (hy > 0) ? huge * huge : tiny * tiny;
00289     }
00290 
00291   n = 0;
00292   /* take care subnormal number */
00293   if (ix < 0x00010000)
00294     {
00295       ax *= two113;
00296       n -= 113;
00297       o.value = ax;
00298       ix = o.parts32.w0;
00299     }
00300   n += ((ix) >> 16) - 0x3fff;
00301   j = ix & 0x0000ffff;
00302   /* determine interval */
00303   ix = j | 0x3fff0000;             /* normalize ix */
00304   if (j <= 0x3988)
00305     k = 0;                  /* |x|<sqrt(3/2) */
00306   else if (j < 0xbb67)
00307     k = 1;                  /* |x|<sqrt(3)   */
00308   else
00309     {
00310       k = 0;
00311       n += 1;
00312       ix -= 0x00010000;
00313     }
00314 
00315   o.value = ax;
00316   o.parts32.w0 = ix;
00317   ax = o.value;
00318 
00319   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
00320   u = ax - bp[k];           /* bp[0]=1.0, bp[1]=1.5 */
00321   v = one / (ax + bp[k]);
00322   s = u * v;
00323   s_h = s;
00324 
00325   o.value = s_h;
00326   o.parts32.w3 = 0;
00327   o.parts32.w2 &= 0xf8000000;
00328   s_h = o.value;
00329   /* t_h=ax+bp[k] High */
00330   t_h = ax + bp[k];
00331   o.value = t_h;
00332   o.parts32.w3 = 0;
00333   o.parts32.w2 &= 0xf8000000;
00334   t_h = o.value;
00335   t_l = ax - (t_h - bp[k]);
00336   s_l = v * ((u - s_h * t_h) - s_h * t_l);
00337   /* compute log(ax) */
00338   s2 = s * s;
00339   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
00340   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
00341   r = s2 * s2 * u / v;
00342   r += s_l * (s_h + s);
00343   s2 = s_h * s_h;
00344   t_h = 3.0 + s2 + r;
00345   o.value = t_h;
00346   o.parts32.w3 = 0;
00347   o.parts32.w2 &= 0xf8000000;
00348   t_h = o.value;
00349   t_l = r - ((t_h - 3.0) - s2);
00350   /* u+v = s*(1+...) */
00351   u = s_h * t_h;
00352   v = s_l * t_h + t_l * s;
00353   /* 2/(3log2)*(s+...) */
00354   p_h = u + v;
00355   o.value = p_h;
00356   o.parts32.w3 = 0;
00357   o.parts32.w2 &= 0xf8000000;
00358   p_h = o.value;
00359   p_l = v - (p_h - u);
00360   z_h = cp_h * p_h;         /* cp_h+cp_l = 2/(3*log2) */
00361   z_l = cp_l * p_h + p_l * cp + dp_l[k];
00362   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
00363   t = (long double) n;
00364   t1 = (((z_h + z_l) + dp_h[k]) + t);
00365   o.value = t1;
00366   o.parts32.w3 = 0;
00367   o.parts32.w2 &= 0xf8000000;
00368   t1 = o.value;
00369   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
00370 
00371   /* s (sign of result -ve**odd) = -1 else = 1 */
00372   s = one;
00373   if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
00374     s = -one;               /* (-ve)**(odd int) */
00375 
00376   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
00377   y1 = y;
00378   o.value = y1;
00379   o.parts32.w3 = 0;
00380   o.parts32.w2 &= 0xf8000000;
00381   y1 = o.value;
00382   p_l = (y - y1) * t1 + y * t2;
00383   p_h = y1 * t1;
00384   z = p_l + p_h;
00385   o.value = z;
00386   j = o.parts32.w0;
00387   if (j >= 0x400d0000) /* z >= 16384 */
00388     {
00389       /* if z > 16384 */
00390       if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
00391        return s * huge * huge;     /* overflow */
00392       else
00393        {
00394          if (p_l + ovt > z - p_h)
00395            return s * huge * huge; /* overflow */
00396        }
00397     }
00398   else if ((j & 0x7fffffff) >= 0x400d01b9)       /* z <= -16495 */
00399     {
00400       /* z < -16495 */
00401       if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
00402          != 0)
00403        return s * tiny * tiny;     /* underflow */
00404       else
00405        {
00406          if (p_l <= z - p_h)
00407            return s * tiny * tiny; /* underflow */
00408        }
00409     }
00410   /* compute 2**(p_h+p_l) */
00411   i = j & 0x7fffffff;
00412   k = (i >> 16) - 0x3fff;
00413   n = 0;
00414   if (i > 0x3ffe0000)
00415     {                       /* if |z| > 0.5, set n = [z+0.5] */
00416       n = __floorl (z + 0.5L);
00417       t = n;
00418       p_h -= t;
00419     }
00420   t = p_l + p_h;
00421   o.value = t;
00422   o.parts32.w3 = 0;
00423   o.parts32.w2 &= 0xf8000000;
00424   t = o.value;
00425   u = t * lg2_h;
00426   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
00427   z = u + v;
00428   w = v - (z - u);
00429   /*  exp(z) */
00430   t = z * z;
00431   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
00432   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
00433   t1 = z - t * u / v;
00434   r = (z * t1) / (t1 - two) - (w + z * w);
00435   z = one - (r - z);
00436   o.value = z;
00437   j = o.parts32.w0;
00438   j += (n << 16);
00439   if ((j >> 16) <= 0)
00440     z = __scalbnl (z, n);   /* subnormal output */
00441   else
00442     {
00443       o.parts32.w0 = j;
00444       z = o.value;
00445     }
00446   return s * z;
00447 }