Back to index

glibc  2.9
op-4.h
Go to the documentation of this file.
00001 /* Software floating-point emulation.
00002    Basic four-word fraction declaration and manipulation.
00003    Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
00004    This file is part of the GNU C Library.
00005    Contributed by Richard Henderson (rth@cygnus.com),
00006                 Jakub Jelinek (jj@ultra.linux.cz),
00007                 David S. Miller (davem@redhat.com) and
00008                 Peter Maydell (pmaydell@chiark.greenend.org.uk).
00009 
00010    The GNU C Library is free software; you can redistribute it and/or
00011    modify it under the terms of the GNU Lesser General Public
00012    License as published by the Free Software Foundation; either
00013    version 2.1 of the License, or (at your option) any later version.
00014 
00015    In addition to the permissions in the GNU Lesser General Public
00016    License, the Free Software Foundation gives you unlimited
00017    permission to link the compiled version of this file into
00018    combinations with other programs, and to distribute those
00019    combinations without any restriction coming from the use of this
00020    file.  (The Lesser General Public License restrictions do apply in
00021    other respects; for example, they cover modification of the file,
00022    and distribution when not linked into a combine executable.)
00023 
00024    The GNU C Library is distributed in the hope that it will be useful,
00025    but WITHOUT ANY WARRANTY; without even the implied warranty of
00026    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00027    Lesser General Public License for more details.
00028 
00029    You should have received a copy of the GNU Lesser General Public
00030    License along with the GNU C Library; if not, write to the Free
00031    Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
00032    MA 02110-1301, USA.  */
00033 
00034 #define _FP_FRAC_DECL_4(X)  _FP_W_TYPE X##_f[4]
00035 #define _FP_FRAC_COPY_4(D,S)                     \
00036   (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],     \
00037    D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
00038 #define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I)
00039 #define _FP_FRAC_HIGH_4(X)  (X##_f[3])
00040 #define _FP_FRAC_LOW_4(X)   (X##_f[0])
00041 #define _FP_FRAC_WORD_4(X,w)       (X##_f[w])
00042 
00043 #define _FP_FRAC_SLL_4(X,N)                                    \
00044   do {                                                         \
00045     _FP_I_TYPE _up, _down, _skip, _i;                                 \
00046     _skip = (N) / _FP_W_TYPE_SIZE;                             \
00047     _up = (N) % _FP_W_TYPE_SIZE;                               \
00048     _down = _FP_W_TYPE_SIZE - _up;                             \
00049     if (!_up)                                                  \
00050       for (_i = 3; _i >= _skip; --_i)                                 \
00051        X##_f[_i] = X##_f[_i-_skip];                                   \
00052     else                                                       \
00053       {                                                               \
00054        for (_i = 3; _i > _skip; --_i)                                 \
00055          X##_f[_i] = X##_f[_i-_skip] << _up                           \
00056                     | X##_f[_i-_skip-1] >> _down;                     \
00057        X##_f[_i--] = X##_f[0] << _up;                                 \
00058       }                                                               \
00059     for (; _i >= 0; --_i)                                      \
00060       X##_f[_i] = 0;                                           \
00061   } while (0)
00062 
00063 /* This one was broken too */
00064 #define _FP_FRAC_SRL_4(X,N)                                    \
00065   do {                                                         \
00066     _FP_I_TYPE _up, _down, _skip, _i;                                 \
00067     _skip = (N) / _FP_W_TYPE_SIZE;                             \
00068     _down = (N) % _FP_W_TYPE_SIZE;                             \
00069     _up = _FP_W_TYPE_SIZE - _down;                             \
00070     if (!_down)                                                       \
00071       for (_i = 0; _i <= 3-_skip; ++_i)                               \
00072        X##_f[_i] = X##_f[_i+_skip];                                   \
00073     else                                                       \
00074       {                                                               \
00075        for (_i = 0; _i < 3-_skip; ++_i)                        \
00076          X##_f[_i] = X##_f[_i+_skip] >> _down                         \
00077                     | X##_f[_i+_skip+1] << _up;                \
00078        X##_f[_i++] = X##_f[3] >> _down;                        \
00079       }                                                               \
00080     for (; _i < 4; ++_i)                                       \
00081       X##_f[_i] = 0;                                           \
00082   } while (0)
00083 
00084 
00085 /* Right shift with sticky-lsb. 
00086  * What this actually means is that we do a standard right-shift,
00087  * but that if any of the bits that fall off the right hand side
00088  * were one then we always set the LSbit.
00089  */
00090 #define _FP_FRAC_SRST_4(X,S,N,size)                     \
00091   do {                                           \
00092     _FP_I_TYPE _up, _down, _skip, _i;                   \
00093     _FP_W_TYPE _s;                               \
00094     _skip = (N) / _FP_W_TYPE_SIZE;               \
00095     _down = (N) % _FP_W_TYPE_SIZE;               \
00096     _up = _FP_W_TYPE_SIZE - _down;               \
00097     for (_s = _i = 0; _i < _skip; ++_i)                 \
00098       _s |= X##_f[_i];                                  \
00099     if (!_down)                                         \
00100       for (_i = 0; _i <= 3-_skip; ++_i)                 \
00101        X##_f[_i] = X##_f[_i+_skip];                     \
00102     else                                         \
00103       {                                                 \
00104        _s |= X##_f[_i] << _up;                          \
00105        for (_i = 0; _i < 3-_skip; ++_i)          \
00106          X##_f[_i] = X##_f[_i+_skip] >> _down           \
00107                     | X##_f[_i+_skip+1] << _up;  \
00108        X##_f[_i++] = X##_f[3] >> _down;          \
00109       }                                                 \
00110     for (; _i < 4; ++_i)                         \
00111       X##_f[_i] = 0;                             \
00112     S = (_s != 0);                               \
00113   } while (0)
00114 
00115 #define _FP_FRAC_SRS_4(X,N,size)          \
00116   do {                                    \
00117     int _sticky;                          \
00118     _FP_FRAC_SRST_4(X, _sticky, N, size); \
00119     X##_f[0] |= _sticky;                  \
00120   } while (0)
00121 
00122 #define _FP_FRAC_ADD_4(R,X,Y)                                         \
00123   __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],             \
00124                 X##_f[3], X##_f[2], X##_f[1], X##_f[0],        \
00125                 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
00126 
00127 #define _FP_FRAC_SUB_4(R,X,Y)                                         \
00128   __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],             \
00129                 X##_f[3], X##_f[2], X##_f[1], X##_f[0],        \
00130                 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
00131 
00132 #define _FP_FRAC_DEC_4(X,Y)                                    \
00133   __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],             \
00134                 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
00135 
00136 #define _FP_FRAC_ADDI_4(X,I)                                          \
00137   __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
00138 
00139 #define _FP_ZEROFRAC_4  0,0,0,0
00140 #define _FP_MINFRAC_4   0,0,0,1
00141 #define _FP_MAXFRAC_4       (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
00142 
00143 #define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
00144 #define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
00145 #define _FP_FRAC_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
00146 #define _FP_FRAC_CLEAR_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
00147 
00148 #define _FP_FRAC_EQ_4(X,Y)                       \
00149  (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]          \
00150   && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
00151 
00152 #define _FP_FRAC_GT_4(X,Y)                       \
00153  (X##_f[3] > Y##_f[3] ||                         \
00154   (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
00155    (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
00156     (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])       \
00157    ))                                            \
00158   ))                                             \
00159  )
00160 
00161 #define _FP_FRAC_GE_4(X,Y)                       \
00162  (X##_f[3] > Y##_f[3] ||                         \
00163   (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
00164    (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
00165     (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])      \
00166    ))                                            \
00167   ))                                             \
00168  )
00169 
00170 
00171 #define _FP_FRAC_CLZ_4(R,X)        \
00172   do {                             \
00173     if (X##_f[3])                  \
00174     {                              \
00175        __FP_CLZ(R,X##_f[3]);              \
00176     }                              \
00177     else if (X##_f[2])                    \
00178     {                              \
00179        __FP_CLZ(R,X##_f[2]);              \
00180        R += _FP_W_TYPE_SIZE;              \
00181     }                              \
00182     else if (X##_f[1])                    \
00183     {                              \
00184        __FP_CLZ(R,X##_f[1]);              \
00185        R += _FP_W_TYPE_SIZE*2;            \
00186     }                              \
00187     else                           \
00188     {                              \
00189        __FP_CLZ(R,X##_f[0]);              \
00190        R += _FP_W_TYPE_SIZE*3;            \
00191     }                              \
00192   } while(0)
00193 
00194 
00195 #define _FP_UNPACK_RAW_4(fs, X, val)                           \
00196   do {                                                  \
00197     union _FP_UNION_##fs _flo; _flo.flt = (val);        \
00198     X##_f[0] = _flo.bits.frac0;                                \
00199     X##_f[1] = _flo.bits.frac1;                                \
00200     X##_f[2] = _flo.bits.frac2;                                \
00201     X##_f[3] = _flo.bits.frac3;                                \
00202     X##_e  = _flo.bits.exp;                             \
00203     X##_s  = _flo.bits.sign;                                   \
00204   } while (0)
00205 
00206 #define _FP_UNPACK_RAW_4_P(fs, X, val)                         \
00207   do {                                                  \
00208     union _FP_UNION_##fs *_flo =                        \
00209       (union _FP_UNION_##fs *)(val);                           \
00210                                                         \
00211     X##_f[0] = _flo->bits.frac0;                        \
00212     X##_f[1] = _flo->bits.frac1;                        \
00213     X##_f[2] = _flo->bits.frac2;                        \
00214     X##_f[3] = _flo->bits.frac3;                        \
00215     X##_e  = _flo->bits.exp;                                   \
00216     X##_s  = _flo->bits.sign;                                  \
00217   } while (0)
00218 
00219 #define _FP_PACK_RAW_4(fs, val, X)                      \
00220   do {                                                  \
00221     union _FP_UNION_##fs _flo;                                 \
00222     _flo.bits.frac0 = X##_f[0];                                \
00223     _flo.bits.frac1 = X##_f[1];                                \
00224     _flo.bits.frac2 = X##_f[2];                                \
00225     _flo.bits.frac3 = X##_f[3];                                \
00226     _flo.bits.exp   = X##_e;                                   \
00227     _flo.bits.sign  = X##_s;                                   \
00228     (val) = _flo.flt;                                          \
00229   } while (0)
00230 
00231 #define _FP_PACK_RAW_4_P(fs, val, X)                           \
00232   do {                                                  \
00233     union _FP_UNION_##fs *_flo =                        \
00234       (union _FP_UNION_##fs *)(val);                           \
00235                                                         \
00236     _flo->bits.frac0 = X##_f[0];                        \
00237     _flo->bits.frac1 = X##_f[1];                        \
00238     _flo->bits.frac2 = X##_f[2];                        \
00239     _flo->bits.frac3 = X##_f[3];                        \
00240     _flo->bits.exp   = X##_e;                                  \
00241     _flo->bits.sign  = X##_s;                                  \
00242   } while (0)
00243 
00244 /*
00245  * Multiplication algorithms:
00246  */
00247 
00248 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
00249 
00250 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)                     \
00251   do {                                                             \
00252     _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);        \
00253     _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f);        \
00254                                                                    \
00255     doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \
00256     doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]);                               \
00257     doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]);                               \
00258     doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]);                               \
00259     doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]);                               \
00260     doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]);                               \
00261     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),          \
00262                   _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0,            \
00263                   0,0,_FP_FRAC_WORD_8(_z,1));                             \
00264     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),          \
00265                   _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0,            \
00266                   _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),     \
00267                   _FP_FRAC_WORD_8(_z,1));                          \
00268     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),          \
00269                   _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0,            \
00270                   0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2));         \
00271     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),          \
00272                   _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0,            \
00273                   _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),     \
00274                   _FP_FRAC_WORD_8(_z,2));                          \
00275     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),          \
00276                   _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0,            \
00277                   _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),     \
00278                   _FP_FRAC_WORD_8(_z,2));                          \
00279     doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]);                               \
00280     doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]);                               \
00281     doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]);                               \
00282     doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]);                               \
00283     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),          \
00284                   _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0,            \
00285                   0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3));         \
00286     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),          \
00287                   _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0,            \
00288                   _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),     \
00289                   _FP_FRAC_WORD_8(_z,3));                          \
00290     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),          \
00291                   _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0,            \
00292                   _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),     \
00293                   _FP_FRAC_WORD_8(_z,3));                          \
00294     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),          \
00295                   _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0,            \
00296                   _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),     \
00297                   _FP_FRAC_WORD_8(_z,3));                          \
00298     doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]);                               \
00299     doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]);                               \
00300     doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]);                               \
00301     doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]);                               \
00302     doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]);                               \
00303     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),          \
00304                   _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0,            \
00305                   0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4));         \
00306     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),          \
00307                   _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0,            \
00308                   _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),     \
00309                   _FP_FRAC_WORD_8(_z,4));                          \
00310     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),          \
00311                   _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0,            \
00312                   _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),     \
00313                   _FP_FRAC_WORD_8(_z,4));                          \
00314     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),          \
00315                   _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0,            \
00316                   0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5));         \
00317     __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),          \
00318                   _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0,            \
00319                   _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),     \
00320                   _FP_FRAC_WORD_8(_z,5));                          \
00321     doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]);                               \
00322     __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),          \
00323                   _b_f1,_b_f0,                                     \
00324                   _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6));           \
00325                                                                    \
00326     /* Normalize since we know where the msb of the multiplicands         \
00327        were (bit B), we know that the msb of the of the product is        \
00328        at either 2B or 2B-1.  */                                   \
00329     _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);                         \
00330     __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),      \
00331                   _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));          \
00332   } while (0)
00333 
00334 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)                            \
00335   do {                                                             \
00336     _FP_FRAC_DECL_8(_z);                                           \
00337                                                                    \
00338     mpn_mul_n(_z_f, _x_f, _y_f, 4);                                       \
00339                                                                    \
00340     /* Normalize since we know where the msb of the multiplicands         \
00341        were (bit B), we know that the msb of the of the product is        \
00342        at either 2B or 2B-1.  */                                   \
00343     _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);                         \
00344     __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),      \
00345                   _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));          \
00346   } while (0)
00347 
00348 /*
00349  * Helper utility for _FP_DIV_MEAT_4_udiv:
00350  * pppp = m * nnn
00351  */
00352 #define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0)                             \
00353   do {                                                             \
00354     UWtype _t;                                                            \
00355     umul_ppmm(p1,p0,m,n0);                                         \
00356     umul_ppmm(p2,_t,m,n1);                                         \
00357     __FP_FRAC_ADDI_2(p2,p1,_t);                                           \
00358     umul_ppmm(p3,_t,m,n2);                                         \
00359     __FP_FRAC_ADDI_2(p3,p2,_t);                                           \
00360   } while (0)
00361 
00362 /*
00363  * Division algorithms:
00364  */
00365 
00366 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)                           \
00367   do {                                                             \
00368     int _i;                                                        \
00369     _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m);                             \
00370     _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4);                                   \
00371     if (_FP_FRAC_GT_4(X, Y))                                              \
00372       {                                                                   \
00373        _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1);                       \
00374        _FP_FRAC_SRL_4(X, 1);                                              \
00375       }                                                                   \
00376     else                                                           \
00377       R##_e--;                                                            \
00378                                                                    \
00379     /* Normalize, i.e. make the most significant bit of the               \
00380        denominator set. */                                         \
00381     _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs);                               \
00382                                                                    \
00383     for (_i = 3; ; _i--)                                           \
00384       {                                                                   \
00385         if (X##_f[3] == Y##_f[3])                                  \
00386           {                                                        \
00387             /* This is a special case, not an optimization                \
00388                (X##_f[3]/Y##_f[3] would not fit into UWtype).             \
00389                As X## is guaranteed to be < Y,  R##_f[_i] can be either          \
00390                (UWtype)-1 or (UWtype)-2.  */                              \
00391             R##_f[_i] = -1;                                        \
00392             if (!_i)                                               \
00393              break;                                                \
00394             __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],       \
00395                          Y##_f[2], Y##_f[1], Y##_f[0], 0,                 \
00396                          X##_f[2], X##_f[1], X##_f[0], _n_f[_i]);         \
00397             _FP_FRAC_SUB_4(X, Y, X);                                      \
00398             if (X##_f[3] > Y##_f[3])                                      \
00399               {                                                           \
00400                 R##_f[_i] = -2;                                           \
00401                 _FP_FRAC_ADD_4(X, Y, X);                           \
00402               }                                                           \
00403           }                                                        \
00404         else                                                       \
00405           {                                                        \
00406             udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]);  \
00407             umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0],             \
00408                        R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]);          \
00409             X##_f[2] = X##_f[1];                                   \
00410             X##_f[1] = X##_f[0];                                   \
00411             X##_f[0] = _n_f[_i];                                   \
00412             if (_FP_FRAC_GT_4(_m, X))                                     \
00413               {                                                           \
00414                 R##_f[_i]--;                                              \
00415                 _FP_FRAC_ADD_4(X, Y, X);                           \
00416                 if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X))          \
00417                   {                                                \
00418                   R##_f[_i]--;                                     \
00419                   _FP_FRAC_ADD_4(X, Y, X);                                \
00420                   }                                                \
00421               }                                                           \
00422             _FP_FRAC_DEC_4(X, _m);                                 \
00423             if (!_i)                                               \
00424              {                                                            \
00425               if (!_FP_FRAC_EQ_4(X, _m))                           \
00426                 R##_f[0] |= _FP_WORK_STICKY;                              \
00427               break;                                               \
00428              }                                                            \
00429           }                                                        \
00430       }                                                                   \
00431   } while (0)
00432 
00433 
00434 /*
00435  * Square root algorithms:
00436  * We have just one right now, maybe Newton approximation
00437  * should be added for those machines where division is fast.
00438  */
00439  
00440 #define _FP_SQRT_MEAT_4(R, S, T, X, q)                         \
00441   do {                                                  \
00442     while (q)                                           \
00443       {                                                        \
00444        T##_f[3] = S##_f[3] + q;                         \
00445        if (T##_f[3] <= X##_f[3])                        \
00446          {                                              \
00447            S##_f[3] = T##_f[3] + q;                            \
00448            X##_f[3] -= T##_f[3];                        \
00449            R##_f[3] += q;                               \
00450          }                                              \
00451        _FP_FRAC_SLL_4(X, 1);                                   \
00452        q >>= 1;                                         \
00453       }                                                        \
00454     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                \
00455     while (q)                                           \
00456       {                                                        \
00457        T##_f[2] = S##_f[2] + q;                         \
00458        T##_f[3] = S##_f[3];                             \
00459        if (T##_f[3] < X##_f[3] ||                       \
00460            (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))     \
00461          {                                              \
00462            S##_f[2] = T##_f[2] + q;                            \
00463            S##_f[3] += (T##_f[2] > S##_f[2]);                  \
00464            __FP_FRAC_DEC_2(X##_f[3], X##_f[2],                 \
00465                          T##_f[3], T##_f[2]);           \
00466            R##_f[2] += q;                               \
00467          }                                              \
00468        _FP_FRAC_SLL_4(X, 1);                                   \
00469        q >>= 1;                                         \
00470       }                                                        \
00471     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                \
00472     while (q)                                           \
00473       {                                                        \
00474        T##_f[1] = S##_f[1] + q;                         \
00475        T##_f[2] = S##_f[2];                             \
00476        T##_f[3] = S##_f[3];                             \
00477        if (T##_f[3] < X##_f[3] ||                       \
00478            (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] ||    \
00479             (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1]))))  \
00480          {                                              \
00481            S##_f[1] = T##_f[1] + q;                            \
00482            S##_f[2] += (T##_f[1] > S##_f[1]);                  \
00483            S##_f[3] += (T##_f[2] > S##_f[2]);                  \
00484            __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1],       \
00485                          T##_f[3], T##_f[2], T##_f[1]); \
00486            R##_f[1] += q;                               \
00487          }                                              \
00488        _FP_FRAC_SLL_4(X, 1);                                   \
00489        q >>= 1;                                         \
00490       }                                                        \
00491     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);                \
00492     while (q != _FP_WORK_ROUND)                                \
00493       {                                                        \
00494        T##_f[0] = S##_f[0] + q;                         \
00495        T##_f[1] = S##_f[1];                             \
00496        T##_f[2] = S##_f[2];                             \
00497        T##_f[3] = S##_f[3];                             \
00498        if (_FP_FRAC_GE_4(X,T))                                 \
00499          {                                              \
00500            S##_f[0] = T##_f[0] + q;                            \
00501            S##_f[1] += (T##_f[0] > S##_f[0]);                  \
00502            S##_f[2] += (T##_f[1] > S##_f[1]);                  \
00503            S##_f[3] += (T##_f[2] > S##_f[2]);                  \
00504            _FP_FRAC_DEC_4(X, T);                        \
00505            R##_f[0] += q;                               \
00506          }                                              \
00507        _FP_FRAC_SLL_4(X, 1);                                   \
00508        q >>= 1;                                         \
00509       }                                                        \
00510     if (!_FP_FRAC_ZEROP_4(X))                                  \
00511       {                                                        \
00512        if (_FP_FRAC_GT_4(X,S))                                 \
00513          R##_f[0] |= _FP_WORK_ROUND;                           \
00514        R##_f[0] |= _FP_WORK_STICKY;                            \
00515       }                                                        \
00516   } while (0)
00517 
00518 
00519 /*
00520  * Internals 
00521  */
00522 
00523 #define __FP_FRAC_SET_4(X,I3,I2,I1,I0)                                \
00524   (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
00525 
00526 #ifndef __FP_FRAC_ADD_3
00527 #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)            \
00528   do {                                                  \
00529     _FP_W_TYPE _c1, _c2;                                \
00530     r0 = x0 + y0;                                       \
00531     _c1 = r0 < x0;                                      \
00532     r1 = x1 + y1;                                       \
00533     _c2 = r1 < x1;                                      \
00534     r1 += _c1;                                                 \
00535     _c2 |= r1 < _c1;                                    \
00536     r2 = x2 + y2 + _c2;                                        \
00537   } while (0)
00538 #endif
00539 
00540 #ifndef __FP_FRAC_ADD_4
00541 #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)   \
00542   do {                                                  \
00543     _FP_W_TYPE _c1, _c2, _c3;                                  \
00544     r0 = x0 + y0;                                       \
00545     _c1 = r0 < x0;                                      \
00546     r1 = x1 + y1;                                       \
00547     _c2 = r1 < x1;                                      \
00548     r1 += _c1;                                                 \
00549     _c2 |= r1 < _c1;                                    \
00550     r2 = x2 + y2;                                       \
00551     _c3 = r2 < x2;                                      \
00552     r2 += _c2;                                                 \
00553     _c3 |= r2 < _c2;                                    \
00554     r3 = x3 + y3 + _c3;                                        \
00555   } while (0)
00556 #endif
00557 
00558 #ifndef __FP_FRAC_SUB_3
00559 #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)            \
00560   do {                                                  \
00561     _FP_W_TYPE _c1, _c2;                                \
00562     r0 = x0 - y0;                                       \
00563     _c1 = r0 > x0;                                      \
00564     r1 = x1 - y1;                                       \
00565     _c2 = r1 > x1;                                      \
00566     r1 -= _c1;                                                 \
00567     _c2 |= _c1 && (y1 == x1);                                  \
00568     r2 = x2 - y2 - _c2;                                        \
00569   } while (0)
00570 #endif
00571 
00572 #ifndef __FP_FRAC_SUB_4
00573 #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)   \
00574   do {                                                  \
00575     _FP_W_TYPE _c1, _c2, _c3;                                  \
00576     r0 = x0 - y0;                                       \
00577     _c1 = r0 > x0;                                      \
00578     r1 = x1 - y1;                                       \
00579     _c2 = r1 > x1;                                      \
00580     r1 -= _c1;                                                 \
00581     _c2 |= _c1 && (y1 == x1);                                  \
00582     r2 = x2 - y2;                                       \
00583     _c3 = r2 > x2;                                      \
00584     r2 -= _c2;                                                 \
00585     _c3 |= _c2 && (y2 == x2);                                  \
00586     r3 = x3 - y3 - _c3;                                        \
00587   } while (0)
00588 #endif
00589 
00590 #ifndef __FP_FRAC_DEC_3
00591 #define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0)                            \
00592   do {                                                         \
00593     UWtype _t0, _t1, _t2;                                      \
00594     _t0 = x0, _t1 = x1, _t2 = x2;                              \
00595     __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0);          \
00596   } while (0)
00597 #endif
00598 
00599 #ifndef __FP_FRAC_DEC_4
00600 #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0)               \
00601   do {                                                         \
00602     UWtype _t0, _t1, _t2, _t3;                                        \
00603     _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3;                           \
00604     __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0);              \
00605   } while (0)
00606 #endif
00607 
00608 #ifndef __FP_FRAC_ADDI_4
00609 #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)                               \
00610   do {                                                         \
00611     UWtype _t;                                                        \
00612     _t = ((x0 += i) < i);                                      \
00613     x1 += _t; _t = (x1 < _t);                                         \
00614     x2 += _t; _t = (x2 < _t);                                         \
00615     x3 += _t;                                                  \
00616   } while (0)
00617 #endif
00618 
00619 /* Convert FP values between word sizes. This appears to be more
00620  * complicated than I'd have expected it to be, so these might be
00621  * wrong... These macros are in any case somewhat bogus because they
00622  * use information about what various FRAC_n variables look like 
00623  * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
00624  * the ones in op-2.h and op-1.h. 
00625  */
00626 #define _FP_FRAC_COPY_1_4(D, S)           (D##_f = S##_f[0])
00627 
00628 #define _FP_FRAC_COPY_2_4(D, S)                  \
00629 do {                                      \
00630   D##_f0 = S##_f[0];                      \
00631   D##_f1 = S##_f[1];                      \
00632 } while (0)
00633 
00634 /* Assembly/disassembly for converting to/from integral types.  
00635  * No shifting or overflow handled here.
00636  */
00637 /* Put the FP value X into r, which is an integer of size rsize. */
00638 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize)                       \
00639   do {                                                         \
00640     if (rsize <= _FP_W_TYPE_SIZE)                              \
00641       r = X##_f[0];                                            \
00642     else if (rsize <= 2*_FP_W_TYPE_SIZE)                       \
00643     {                                                          \
00644       r = X##_f[1];                                            \
00645       r <<= _FP_W_TYPE_SIZE;                                          \
00646       r += X##_f[0];                                           \
00647     }                                                          \
00648     else                                                       \
00649     {                                                          \
00650       /* I'm feeling lazy so we deal with int == 3words (implausible)*/      \
00651       /* and int == 4words as a single case.                    */    \
00652       r = X##_f[3];                                            \
00653       r <<= _FP_W_TYPE_SIZE;                                          \
00654       r += X##_f[2];                                           \
00655       r <<= _FP_W_TYPE_SIZE;                                          \
00656       r += X##_f[1];                                           \
00657       r <<= _FP_W_TYPE_SIZE;                                          \
00658       r += X##_f[0];                                           \
00659     }                                                          \
00660   } while (0)
00661 
00662 /* "No disassemble Number Five!" */
00663 /* move an integer of size rsize into X's fractional part. We rely on
00664  * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
00665  * having to mask the values we store into it.
00666  */
00667 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)                           \
00668   do {                                                         \
00669     X##_f[0] = r;                                              \
00670     X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
00671     X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
00672     X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
00673   } while (0);
00674 
00675 #define _FP_FRAC_COPY_4_1(D, S)                  \
00676 do {                                      \
00677   D##_f[0] = S##_f;                       \
00678   D##_f[1] = D##_f[2] = D##_f[3] = 0;            \
00679 } while (0)
00680 
00681 #define _FP_FRAC_COPY_4_2(D, S)                  \
00682 do {                                      \
00683   D##_f[0] = S##_f0;                      \
00684   D##_f[1] = S##_f1;                      \
00685   D##_f[2] = D##_f[3] = 0;                \
00686 } while (0)
00687 
00688 #define _FP_FRAC_COPY_4_4(D,S)     _FP_FRAC_COPY_4(D,S)