Back to index

glibc  2.9
e_expl.c
Go to the documentation of this file.
00001 /* Quad-precision floating point e^x.
00002    Copyright (C) 1999,2004,2006, 2008 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Jakub Jelinek <jj@ultra.linux.cz>
00005    Partly based on double-precision code
00006    by Geoffrey Keating <geoffk@ozemail.com.au>
00007 
00008    The GNU C Library is free software; you can redistribute it and/or
00009    modify it under the terms of the GNU Lesser General Public
00010    License as published by the Free Software Foundation; either
00011    version 2.1 of the License, or (at your option) any later version.
00012 
00013    The GNU C Library is distributed in the hope that it will be useful,
00014    but WITHOUT ANY WARRANTY; without even the implied warranty of
00015    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016    Lesser General Public License for more details.
00017 
00018    You should have received a copy of the GNU Lesser General Public
00019    License along with the GNU C Library; if not, write to the Free
00020    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00021    02111-1307 USA.  */
00022 
00023 /* The basic design here is from
00024    Abraham Ziv, "Fast Evaluation of Elementary Mathematical Functions with
00025    Correctly Rounded Last Bit", ACM Trans. Math. Soft., 17 (3), September 1991,
00026    pp. 410-423.
00027 
00028    We work with number pairs where the first number is the high part and
00029    the second one is the low part. Arithmetic with the high part numbers must
00030    be exact, without any roundoff errors.
00031 
00032    The input value, X, is written as
00033    X = n * ln(2)_0 + arg1[t1]_0 + arg2[t2]_0 + x
00034        - n * ln(2)_1 + arg1[t1]_1 + arg2[t2]_1 + xl
00035 
00036    where:
00037    - n is an integer, 16384 >= n >= -16495;
00038    - ln(2)_0 is the first 93 bits of ln(2), and |ln(2)_0-ln(2)-ln(2)_1| < 2^-205
00039    - t1 is an integer, 89 >= t1 >= -89
00040    - t2 is an integer, 65 >= t2 >= -65
00041    - |arg1[t1]-t1/256.0| < 2^-53
00042    - |arg2[t2]-t2/32768.0| < 2^-53
00043    - x + xl is whatever is left, |x + xl| < 2^-16 + 2^-53
00044 
00045    Then e^x is approximated as
00046 
00047    e^x = 2^n_1 ( 2^n_0 e^(arg1[t1]_0 + arg1[t1]_1) e^(arg2[t2]_0 + arg2[t2]_1)
00048               + 2^n_0 e^(arg1[t1]_0 + arg1[t1]_1) e^(arg2[t2]_0 + arg2[t2]_1)
00049                * p (x + xl + n * ln(2)_1))
00050    where:
00051    - p(x) is a polynomial approximating e(x)-1
00052    - e^(arg1[t1]_0 + arg1[t1]_1) is obtained from a table
00053    - e^(arg2[t2]_0 + arg2[t2]_1) likewise
00054    - n_1 + n_0 = n, so that |n_0| < -LDBL_MIN_EXP-1.
00055 
00056    If it happens that n_1 == 0 (this is the usual case), that multiplication
00057    is omitted.
00058    */
00059 
00060 #ifndef _GNU_SOURCE
00061 #define _GNU_SOURCE
00062 #endif
00063 #include <float.h>
00064 #include <ieee754.h>
00065 #include <math.h>
00066 #include <fenv.h>
00067 #include <inttypes.h>
00068 #include <math_private.h>
00069 #include <sysdeps/ieee754/ldbl-128/t_expl.h>
00070 
00071 static const long double C[] = {
00072 /* Smallest integer x for which e^x overflows.  */
00073 #define himark C[0]
00074  709.08956571282405153382846025171462914L,
00075 
00076 /* Largest integer x for which e^x underflows.  */
00077 #define lomark C[1]
00078 -709.08956571282405153382846025171462914L,
00079 
00080 /* 3x2^96 */
00081 #define THREEp96 C[2]
00082  59421121885698253195157962752.0L,
00083 
00084 /* 3x2^103 */
00085 #define THREEp103 C[3]
00086  30423614405477505635920876929024.0L,
00087 
00088 /* 3x2^111 */
00089 #define THREEp111 C[4]
00090  7788445287802241442795744493830144.0L,
00091 
00092 /* 1/ln(2) */
00093 #define M_1_LN2 C[5]
00094  1.44269504088896340735992468100189204L,
00095 
00096 /* first 93 bits of ln(2) */
00097 #define M_LN2_0 C[6]
00098  0.693147180559945309417232121457981864L,
00099 
00100 /* ln2_0 - ln(2) */
00101 #define M_LN2_1 C[7]
00102 -1.94704509238074995158795957333327386E-31L,
00103 
00104 /* very small number */
00105 #define TINY C[8]
00106  1.0e-308L,
00107 
00108 /* 2^16383 */
00109 #define TWO1023 C[9]
00110  8.988465674311579538646525953945123668E+307L,
00111 
00112 /* 256 */
00113 #define TWO8 C[10]
00114  256.0L,
00115 
00116 /* 32768 */
00117 #define TWO15 C[11]
00118  32768.0L,
00119 
00120 /* Chebyshev polynom coeficients for (exp(x)-1)/x */
00121 #define P1 C[12]
00122 #define P2 C[13]
00123 #define P3 C[14]
00124 #define P4 C[15]
00125 #define P5 C[16]
00126 #define P6 C[17]
00127  0.5L,
00128  1.66666666666666666666666666666666683E-01L,
00129  4.16666666666666666666654902320001674E-02L,
00130  8.33333333333333333333314659767198461E-03L,
00131  1.38888888889899438565058018857254025E-03L,
00132  1.98412698413981650382436541785404286E-04L,
00133 };
00134 
00135 long double
00136 __ieee754_expl (long double x)
00137 {
00138   /* Check for usual case.  */
00139   if (isless (x, himark) && isgreater (x, lomark))
00140     {
00141       int tval1, tval2, unsafe, n_i, exponent2;
00142       long double x22, n, result, xl;
00143       union ibm_extended_long_double ex2_u, scale_u;
00144       fenv_t oldenv;
00145 
00146       feholdexcept (&oldenv);
00147 #ifdef FE_TONEAREST
00148       fesetround (FE_TONEAREST);
00149 #endif
00150 
00151       n = __roundl (x*M_1_LN2);
00152       x = x-n*M_LN2_0;
00153       xl = n*M_LN2_1;
00154 
00155       tval1 = __roundl (x*TWO8);
00156       x -= __expl_table[T_EXPL_ARG1+2*tval1];
00157       xl -= __expl_table[T_EXPL_ARG1+2*tval1+1];
00158 
00159       tval2 = __roundl (x*TWO15);
00160       x -= __expl_table[T_EXPL_ARG2+2*tval2];
00161       xl -= __expl_table[T_EXPL_ARG2+2*tval2+1];
00162 
00163       x = x + xl;
00164 
00165       /* Compute ex2 = 2^n_0 e^(argtable[tval1]) e^(argtable[tval2]).  */
00166       ex2_u.d = __expl_table[T_EXPL_RES1 + tval1]
00167               * __expl_table[T_EXPL_RES2 + tval2];
00168       n_i = (int)n;
00169       /* 'unsafe' is 1 iff n_1 != 0.  */
00170       unsafe = fabsl(n_i) >= -LDBL_MIN_EXP - 1;
00171       ex2_u.ieee.exponent += n_i >> unsafe;
00172       /* Fortunately, there are no subnormal lowpart doubles in
00173         __expl_table, only normal values and zeros.
00174         But after scaling it can be subnormal.  */
00175       exponent2 = ex2_u.ieee.exponent2 + (n_i >> unsafe);
00176       if (ex2_u.ieee.exponent2 == 0)
00177        /* assert ((ex2_u.ieee.mantissa2|ex2_u.ieee.mantissa3) == 0) */;
00178       else if (exponent2 > 0)
00179        ex2_u.ieee.exponent2 = exponent2;
00180       else if (exponent2 <= -54)
00181        {
00182          ex2_u.ieee.exponent2 = 0;
00183          ex2_u.ieee.mantissa2 = 0;
00184          ex2_u.ieee.mantissa3 = 0;
00185        }
00186       else
00187        {
00188          static const double
00189            two54 = 1.80143985094819840000e+16, /* 4350000000000000 */
00190            twom54 = 5.55111512312578270212e-17; /* 3C90000000000000 */
00191          ex2_u.dd[1] *= two54;
00192          ex2_u.ieee.exponent2 += n_i >> unsafe;
00193          ex2_u.dd[1] *= twom54;
00194        }
00195 
00196       /* Compute scale = 2^n_1.  */
00197       scale_u.d = 1.0L;
00198       scale_u.ieee.exponent += n_i - (n_i >> unsafe);
00199 
00200       /* Approximate e^x2 - 1, using a seventh-degree polynomial,
00201         with maximum error in [-2^-16-2^-53,2^-16+2^-53]
00202         less than 4.8e-39.  */
00203       x22 = x + x*x*(P1+x*(P2+x*(P3+x*(P4+x*(P5+x*P6)))));
00204 
00205       /* Return result.  */
00206       fesetenv (&oldenv);
00207 
00208       result = x22 * ex2_u.d + ex2_u.d;
00209 
00210       /* Now we can test whether the result is ultimate or if we are unsure.
00211         In the later case we should probably call a mpn based routine to give
00212         the ultimate result.
00213         Empirically, this routine is already ultimate in about 99.9986% of
00214         cases, the test below for the round to nearest case will be false
00215         in ~ 99.9963% of cases.
00216         Without proc2 routine maximum error which has been seen is
00217         0.5000262 ulp.
00218 
00219          union ieee854_long_double ex3_u;
00220 
00221          #ifdef FE_TONEAREST
00222            fesetround (FE_TONEAREST);
00223          #endif
00224          ex3_u.d = (result - ex2_u.d) - x22 * ex2_u.d;
00225          ex2_u.d = result;
00226          ex3_u.ieee.exponent += LDBL_MANT_DIG + 15 + IEEE854_LONG_DOUBLE_BIAS
00227                              - ex2_u.ieee.exponent;
00228          n_i = abs (ex3_u.d);
00229          n_i = (n_i + 1) / 2;
00230          fesetenv (&oldenv);
00231          #ifdef FE_TONEAREST
00232          if (fegetround () == FE_TONEAREST)
00233            n_i -= 0x4000;
00234          #endif
00235          if (!n_i) {
00236            return __ieee754_expl_proc2 (origx);
00237          }
00238        */
00239       if (!unsafe)
00240        return result;
00241       else
00242        return result * scale_u.d;
00243     }
00244   /* Exceptional cases:  */
00245   else if (isless (x, himark))
00246     {
00247       if (__isinfl (x))
00248        /* e^-inf == 0, with no error.  */
00249        return 0;
00250       else
00251        /* Underflow */
00252        return TINY * TINY;
00253     }
00254   else
00255     /* Return x, if x is a NaN or Inf; or overflow, otherwise.  */
00256     return TWO1023*x;
00257 }