Back to index

glibc  2.9
e_powl.c
Go to the documentation of this file.
00001 /*
00002  * ====================================================
00003  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
00004  *
00005  * Developed at SunPro, a Sun Microsystems, Inc. business.
00006  * Permission to use, copy, modify, and distribute this
00007  * software is freely granted, provided that this notice
00008  * is preserved.
00009  * ====================================================
00010  */
00011 
00012 /* Expansions and modifications for 128-bit long double are
00013    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
00014    and are incorporated herein by permission of the author.  The author
00015    reserves the right to distribute this material elsewhere under different
00016    copying permissions.  These modifications are distributed here under
00017    the following terms:
00018 
00019     This library is free software; you can redistribute it and/or
00020     modify it under the terms of the GNU Lesser General Public
00021     License as published by the Free Software Foundation; either
00022     version 2.1 of the License, or (at your option) any later version.
00023 
00024     This library is distributed in the hope that it will be useful,
00025     but WITHOUT ANY WARRANTY; without even the implied warranty of
00026     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00027     Lesser General Public License for more details.
00028 
00029     You should have received a copy of the GNU Lesser General Public
00030     License along with this library; if not, write to the Free Software
00031     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA */
00032 
00033 /* __ieee754_powl(x,y) return x**y
00034  *
00035  *                  n
00036  * Method:  Let x =  2   * (1+f)
00037  *     1. Compute and return log2(x) in two pieces:
00038  *            log2(x) = w1 + w2,
00039  *        where w1 has 113-53 = 60 bit trailing zeros.
00040  *     2. Perform y*log2(x) = n+y' by simulating muti-precision
00041  *        arithmetic, where |y'|<=0.5.
00042  *     3. Return x**y = 2**n*exp(y'*log2)
00043  *
00044  * Special cases:
00045  *     1.  (anything) ** 0  is 1
00046  *     2.  (anything) ** 1  is itself
00047  *     3.  (anything) ** NAN is NAN
00048  *     4.  NAN ** (anything except 0) is NAN
00049  *     5.  +-(|x| > 1) **  +INF is +INF
00050  *     6.  +-(|x| > 1) **  -INF is +0
00051  *     7.  +-(|x| < 1) **  +INF is +0
00052  *     8.  +-(|x| < 1) **  -INF is +INF
00053  *     9.  +-1         ** +-INF is NAN
00054  *     10. +0 ** (+anything except 0, NAN)               is +0
00055  *     11. -0 ** (+anything except 0, NAN, odd integer)  is +0
00056  *     12. +0 ** (-anything except 0, NAN)               is +INF
00057  *     13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
00058  *     14. -0 ** (odd integer) = -( +0 ** (odd integer) )
00059  *     15. +INF ** (+anything except 0,NAN) is +INF
00060  *     16. +INF ** (-anything except 0,NAN) is +0
00061  *     17. -INF ** (anything)  = -0 ** (-anything)
00062  *     18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
00063  *     19. (-anything except 0 and inf) ** (non-integer) is NAN
00064  *
00065  */
00066 
00067 #include "math.h"
00068 #include "math_private.h"
00069 
00070 static const long double bp[] = {
00071   1.0L,
00072   1.5L,
00073 };
00074 
00075 /* log_2(1.5) */
00076 static const long double dp_h[] = {
00077   0.0,
00078   5.8496250072115607565592654282227158546448E-1L
00079 };
00080 
00081 /* Low part of log_2(1.5) */
00082 static const long double dp_l[] = {
00083   0.0,
00084   1.0579781240112554492329533686862998106046E-16L
00085 };
00086 
00087 static const long double zero = 0.0L,
00088   one = 1.0L,
00089   two = 2.0L,
00090   two113 = 1.0384593717069655257060992658440192E34L,
00091   huge = 1.0e3000L,
00092   tiny = 1.0e-3000L;
00093 
00094 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
00095    z = (x-1)/(x+1)
00096    1 <= x <= 1.25
00097    Peak relative error 2.3e-37 */
00098 static const long double LN[] =
00099 {
00100  -3.0779177200290054398792536829702930623200E1L,
00101   6.5135778082209159921251824580292116201640E1L,
00102  -4.6312921812152436921591152809994014413540E1L,
00103   1.2510208195629420304615674658258363295208E1L,
00104  -9.9266909031921425609179910128531667336670E-1L
00105 };
00106 static const long double LD[] =
00107 {
00108  -5.129862866715009066465422805058933131960E1L,
00109   1.452015077564081884387441590064272782044E2L,
00110  -1.524043275549860505277434040464085593165E2L,
00111   7.236063513651544224319663428634139768808E1L,
00112  -1.494198912340228235853027849917095580053E1L
00113   /* 1.0E0 */
00114 };
00115 
00116 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
00117    0 <= x <= 0.5
00118    Peak relative error 5.7e-38  */
00119 static const long double PN[] =
00120 {
00121   5.081801691915377692446852383385968225675E8L,
00122   9.360895299872484512023336636427675327355E6L,
00123   4.213701282274196030811629773097579432957E4L,
00124   5.201006511142748908655720086041570288182E1L,
00125   9.088368420359444263703202925095675982530E-3L,
00126 };
00127 static const long double PD[] =
00128 {
00129   3.049081015149226615468111430031590411682E9L,
00130   1.069833887183886839966085436512368982758E8L,
00131   8.259257717868875207333991924545445705394E5L,
00132   1.872583833284143212651746812884298360922E3L,
00133   /* 1.0E0 */
00134 };
00135 
00136 static const long double
00137   /* ln 2 */
00138   lg2 = 6.9314718055994530941723212145817656807550E-1L,
00139   lg2_h = 6.9314718055994528622676398299518041312695E-1L,
00140   lg2_l = 2.3190468138462996154948554638754786504121E-17L,
00141   ovt = 8.0085662595372944372e-0017L,
00142   /* 2/(3*log(2)) */
00143   cp = 9.6179669392597560490661645400126142495110E-1L,
00144   cp_h = 9.6179669392597555432899980587535537779331E-1L,
00145   cp_l = 5.0577616648125906047157785230014751039424E-17L;
00146 
00147 #ifdef __STDC__
00148 long double
00149 __ieee754_powl (long double x, long double y)
00150 #else
00151 long double
00152 __ieee754_powl (x, y)
00153      long double x, y;
00154 #endif
00155 {
00156   long double z, ax, z_h, z_l, p_h, p_l;
00157   long double y1, t1, t2, r, s, t, u, v, w;
00158   long double s2, s_h, s_l, t_h, t_l;
00159   int32_t i, j, k, yisint, n;
00160   u_int32_t ix, iy;
00161   int32_t hx, hy;
00162   ieee854_long_double_shape_type o, p, q;
00163 
00164   p.value = x;
00165   hx = p.parts32.w0;
00166   ix = hx & 0x7fffffff;
00167 
00168   q.value = y;
00169   hy = q.parts32.w0;
00170   iy = hy & 0x7fffffff;
00171 
00172 
00173   /* y==zero: x**0 = 1 */
00174   if ((iy | q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
00175     return one;
00176 
00177   /* 1.0**y = 1; -1.0**+-Inf = 1 */
00178   if (x == one)
00179     return one;
00180   if (x == -1.0L && iy == 0x7ff00000
00181       && (q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
00182     return one;
00183 
00184   /* +-NaN return x+y */
00185   if ((ix > 0x7ff00000)
00186       || ((ix == 0x7ff00000)
00187          && ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) != 0))
00188       || (iy > 0x7ff00000)
00189       || ((iy == 0x7ff00000)
00190          && ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) != 0)))
00191     return x + y;
00192 
00193   /* determine if y is an odd int when x < 0
00194    * yisint = 0       ... y is not an integer
00195    * yisint = 1       ... y is an odd int
00196    * yisint = 2       ... y is an even int
00197    */
00198   yisint = 0;
00199   if (hx < 0)
00200     {
00201       if ((q.parts32.w2 & 0x7fffffff) >= 0x43400000)    /* Low part >= 2^53 */
00202        yisint = 2;          /* even integer y */
00203       else if (iy >= 0x3ff00000)   /* 1.0 */
00204        {
00205          if (__floorl (y) == y)
00206            {
00207              z = 0.5 * y;
00208              if (__floorl (z) == z)
00209               yisint = 2;
00210              else
00211               yisint = 1;
00212            }
00213        }
00214     }
00215 
00216   /* special value of y */
00217   if ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
00218     {
00219       if (iy == 0x7ff00000 && q.parts32.w1 == 0) /* y is +-inf */
00220        {
00221          if (((ix - 0x3ff00000) | p.parts32.w1
00222               | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
00223            return y - y;    /* inf**+-1 is NaN */
00224          else if (ix > 0x3ff00000 || fabsl (x) > 1.0L)
00225            /* (|x|>1)**+-inf = inf,0 */
00226            return (hy >= 0) ? y : zero;
00227          else
00228            /* (|x|<1)**-,+inf = inf,0 */
00229            return (hy < 0) ? -y : zero;
00230        }
00231       if (iy == 0x3ff00000)
00232        {                    /* y is  +-1 */
00233          if (hy < 0)
00234            return one / x;
00235          else
00236            return x;
00237        }
00238       if (hy == 0x40000000)
00239        return x * x;        /* y is  2 */
00240       if (hy == 0x3fe00000)
00241        {                    /* y is  0.5 */
00242          if (hx >= 0)              /* x >= +0 */
00243            return __ieee754_sqrtl (x);
00244        }
00245     }
00246 
00247   ax = fabsl (x);
00248   /* special value of x */
00249   if ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
00250     {
00251       if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000)
00252        {
00253          z = ax;            /*x is +-0,+-inf,+-1 */
00254          if (hy < 0)
00255            z = one / z;     /* z = (1/|x|) */
00256          if (hx < 0)
00257            {
00258              if (((ix - 0x3ff00000) | yisint) == 0)
00259               {
00260                 z = (z - z) / (z - z);    /* (-1)**non-int is NaN */
00261               }
00262              else if (yisint == 1)
00263               z = -z;              /* (x<0)**odd = -(|x|**odd) */
00264            }
00265          return z;
00266        }
00267     }
00268 
00269   /* (x<0)**(non-int) is NaN */
00270   if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
00271     return (x - x) / (x - x);
00272 
00273   /* |y| is huge.
00274      2^-16495 = 1/2 of smallest representable value.
00275      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
00276   if (iy > 0x41d654b0)
00277     {
00278       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
00279       if (iy > 0x47d654b0)
00280        {
00281          if (ix <= 0x3fefffff)
00282            return (hy < 0) ? huge * huge : tiny * tiny;
00283          if (ix >= 0x3ff00000)
00284            return (hy > 0) ? huge * huge : tiny * tiny;
00285        }
00286       /* over/underflow if x is not close to one */
00287       if (ix < 0x3fefffff)
00288        return (hy < 0) ? huge * huge : tiny * tiny;
00289       if (ix > 0x3ff00000)
00290        return (hy > 0) ? huge * huge : tiny * tiny;
00291     }
00292 
00293   n = 0;
00294   /* take care subnormal number */
00295   if (ix < 0x00100000)
00296     {
00297       ax *= two113;
00298       n -= 113;
00299       o.value = ax;
00300       ix = o.parts32.w0;
00301     }
00302   n += ((ix) >> 20) - 0x3ff;
00303   j = ix & 0x000fffff;
00304   /* determine interval */
00305   ix = j | 0x3ff00000;             /* normalize ix */
00306   if (j <= 0x39880)
00307     k = 0;                  /* |x|<sqrt(3/2) */
00308   else if (j < 0xbb670)
00309     k = 1;                  /* |x|<sqrt(3)   */
00310   else
00311     {
00312       k = 0;
00313       n += 1;
00314       ix -= 0x00100000;
00315     }
00316 
00317   o.value = ax;
00318   o.value = __scalbnl (o.value, ((int) ((ix - o.parts32.w0) * 2)) >> 21);
00319   ax = o.value;
00320 
00321   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
00322   u = ax - bp[k];           /* bp[0]=1.0, bp[1]=1.5 */
00323   v = one / (ax + bp[k]);
00324   s = u * v;
00325   s_h = s;
00326 
00327   o.value = s_h;
00328   o.parts32.w3 = 0;
00329   o.parts32.w2 &= 0xffff8000;
00330   s_h = o.value;
00331   /* t_h=ax+bp[k] High */
00332   t_h = ax + bp[k];
00333   o.value = t_h;
00334   o.parts32.w3 = 0;
00335   o.parts32.w2 &= 0xffff8000;
00336   t_h = o.value;
00337   t_l = ax - (t_h - bp[k]);
00338   s_l = v * ((u - s_h * t_h) - s_h * t_l);
00339   /* compute log(ax) */
00340   s2 = s * s;
00341   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
00342   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
00343   r = s2 * s2 * u / v;
00344   r += s_l * (s_h + s);
00345   s2 = s_h * s_h;
00346   t_h = 3.0 + s2 + r;
00347   o.value = t_h;
00348   o.parts32.w3 = 0;
00349   o.parts32.w2 &= 0xffff8000;
00350   t_h = o.value;
00351   t_l = r - ((t_h - 3.0) - s2);
00352   /* u+v = s*(1+...) */
00353   u = s_h * t_h;
00354   v = s_l * t_h + t_l * s;
00355   /* 2/(3log2)*(s+...) */
00356   p_h = u + v;
00357   o.value = p_h;
00358   o.parts32.w3 = 0;
00359   o.parts32.w2 &= 0xffff8000;
00360   p_h = o.value;
00361   p_l = v - (p_h - u);
00362   z_h = cp_h * p_h;         /* cp_h+cp_l = 2/(3*log2) */
00363   z_l = cp_l * p_h + p_l * cp + dp_l[k];
00364   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
00365   t = (long double) n;
00366   t1 = (((z_h + z_l) + dp_h[k]) + t);
00367   o.value = t1;
00368   o.parts32.w3 = 0;
00369   o.parts32.w2 &= 0xffff8000;
00370   t1 = o.value;
00371   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
00372 
00373   /* s (sign of result -ve**odd) = -1 else = 1 */
00374   s = one;
00375   if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
00376     s = -one;               /* (-ve)**(odd int) */
00377 
00378   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
00379   y1 = y;
00380   o.value = y1;
00381   o.parts32.w3 = 0;
00382   o.parts32.w2 &= 0xffff8000;
00383   y1 = o.value;
00384   p_l = (y - y1) * t1 + y * t2;
00385   p_h = y1 * t1;
00386   z = p_l + p_h;
00387   o.value = z;
00388   j = o.parts32.w0;
00389   if (j >= 0x40d00000) /* z >= 16384 */
00390     {
00391       /* if z > 16384 */
00392       if (((j - 0x40d00000) | o.parts32.w1
00393         | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
00394        return s * huge * huge;     /* overflow */
00395       else
00396        {
00397          if (p_l + ovt > z - p_h)
00398            return s * huge * huge; /* overflow */
00399        }
00400     }
00401   else if ((j & 0x7fffffff) >= 0x40d01b90)       /* z <= -16495 */
00402     {
00403       /* z < -16495 */
00404       if (((j - 0xc0d01bc0) | o.parts32.w1
00405          | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
00406        return s * tiny * tiny;     /* underflow */
00407       else
00408        {
00409          if (p_l <= z - p_h)
00410            return s * tiny * tiny; /* underflow */
00411        }
00412     }
00413   /* compute 2**(p_h+p_l) */
00414   i = j & 0x7fffffff;
00415   k = (i >> 20) - 0x3ff;
00416   n = 0;
00417   if (i > 0x3fe00000)
00418     {                       /* if |z| > 0.5, set n = [z+0.5] */
00419       n = __floorl (z + 0.5L);
00420       t = n;
00421       p_h -= t;
00422     }
00423   t = p_l + p_h;
00424   o.value = t;
00425   o.parts32.w3 = 0;
00426   o.parts32.w2 &= 0xffff8000;
00427   t = o.value;
00428   u = t * lg2_h;
00429   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
00430   z = u + v;
00431   w = v - (z - u);
00432   /*  exp(z) */
00433   t = z * z;
00434   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
00435   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
00436   t1 = z - t * u / v;
00437   r = (z * t1) / (t1 - two) - (w + z * w);
00438   z = one - (r - z);
00439   z = __scalbnl (z, n);
00440   return s * z;
00441 }