Back to index

lightning-sunbird  0.9+nobinonly
montmulf.c
Go to the documentation of this file.
00001 /* ***** BEGIN LICENSE BLOCK *****
00002  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00003  *
00004  * The contents of this file are subject to the Mozilla Public License Version
00005  * 1.1 (the "License"); you may not use this file except in compliance with
00006  * the License. You may obtain a copy of the License at
00007  * http://www.mozilla.org/MPL/
00008  *
00009  * Software distributed under the License is distributed on an "AS IS" basis,
00010  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00011  * for the specific language governing rights and limitations under the
00012  * License.
00013  *
00014  * The Original Code is SPARC optimized Montgomery multiply functions.
00015  *
00016  * The Initial Developer of the Original Code is
00017  * Sun Microsystems Inc.
00018  * Portions created by the Initial Developer are Copyright (C) 1999-2000
00019  * the Initial Developer. All Rights Reserved.
00020  *
00021  * Contributor(s):
00022  *   Netscape Communications Corporation
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either the GNU General Public License Version 2 or later (the "GPL"), or
00026  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 /* $Id: montmulf.c,v 1.7 2004/04/27 23:04:36 gerv%gerv.net Exp $ */
00038 
00039 #ifdef SOLARIS
00040 #define RF_INLINE_MACROS 1
00041 #endif
00042 
00043 static const double TwoTo16=65536.0;
00044 static const double TwoToMinus16=1.0/65536.0;
00045 static const double Zero=0.0;
00046 static const double TwoTo32=65536.0*65536.0;
00047 static const double TwoToMinus32=1.0/(65536.0*65536.0);
00048 
00049 #ifdef RF_INLINE_MACROS
00050 
00051 double upper32(double);
00052 double lower32(double, double);
00053 double mod(double, double, double);
00054 
00055 void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/, 
00056                        const double * /* 2^16*/,
00057                        const double * /* 0 */,
00058                        double *       /*result16*/, 
00059                        double *       /* result32 */,
00060                        float *  /*source - should be unsigned int*
00061                                    converted to float* */);
00062 
00063 #else
00064 #ifdef MP_USE_FLOOR
00065 #include <math.h>
00066 #else
00067 #define floor(d) ((double)((unsigned long long)(d)))
00068 #endif
00069 
00070 static double upper32(double x)
00071 {
00072   return floor(x*TwoToMinus32);
00073 }
00074 
00075 static double lower32(double x, double y)
00076 {
00077   return x-TwoTo32*floor(x*TwoToMinus32);
00078 }
00079 
00080 static double mod(double x, double oneoverm, double m)
00081 {
00082   return x-m*floor(x*oneoverm);
00083 }
00084 
00085 #endif
00086 
00087 
00088 static void cleanup(double *dt, int from, int tlen)
00089 {
00090  int i;
00091  double tmp,tmp1,x,x1;
00092 
00093  tmp=tmp1=Zero;
00094  /* original code **
00095  for(i=2*from;i<2*tlen-2;i++)
00096    {
00097      x=dt[i];
00098      dt[i]=lower32(x,Zero)+tmp1;
00099      tmp1=tmp;
00100      tmp=upper32(x);
00101    }
00102  dt[tlen-2]+=tmp1;
00103  dt[tlen-1]+=tmp;
00104  **end original code ***/
00105  /* new code ***/
00106  for(i=2*from;i<2*tlen;i+=2)
00107    {
00108      x=dt[i];
00109      x1=dt[i+1];
00110      dt[i]=lower32(x,Zero)+tmp;
00111      dt[i+1]=lower32(x1,Zero)+tmp1;
00112      tmp=upper32(x);
00113      tmp1=upper32(x1);
00114    }
00116 }
00117 
00118 
00119 void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
00120 {
00121 int i;
00122 long long t, t1, a, b, c, d;
00123 
00124  t1=0;
00125  a=(long long)d16[0];
00126  b=(long long)d16[1];
00127  for(i=0; i<ilen-1; i++)
00128    {
00129      c=(long long)d16[2*i+2];
00130      t1+=(unsigned int)a;
00131      t=(a>>32);
00132      d=(long long)d16[2*i+3];
00133      t1+=(b&0xffff)<<16;
00134      t+=(b>>16)+(t1>>32);
00135      i32[i]=(unsigned int)t1;
00136      t1=t;
00137      a=c;
00138      b=d;
00139    }
00140  t1+=(unsigned int)a;
00141  t=(a>>32);
00142  t1+=(b&0xffff)<<16;
00143  i32[i]=(unsigned int)t1;
00144 }
00145 
00146 void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
00147 {
00148 int i;
00149 
00150 #pragma pipeloop(0)
00151  for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
00152 }
00153 
00154 
00155 void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
00156 {
00157 int i;
00158 unsigned int a;
00159 
00160 #pragma pipeloop(0)
00161  for(i=0;i<len;i++)
00162    {
00163      a=i32[i];
00164      d16[2*i]=(double)(a&0xffff);
00165      d16[2*i+1]=(double)(a>>16);
00166    }
00167 }
00168 
00169 
00170 void conv_i32_to_d32_and_d16(double *d32, double *d16, 
00171                           unsigned int *i32, int len)
00172 {
00173 int i = 0;
00174 unsigned int a;
00175 
00176 #pragma pipeloop(0)
00177 #ifdef RF_INLINE_MACROS
00178  for(;i<len-3;i+=4)
00179    {
00180      i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
00181                        &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
00182    }
00183 #endif
00184  for(;i<len;i++)
00185    {
00186      a=i32[i];
00187      d32[i]=(double)(i32[i]);
00188      d16[2*i]=(double)(a&0xffff);
00189      d16[2*i+1]=(double)(a>>16);
00190    }
00191 }
00192 
00193 
00194 void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
00195 {
00196 long long acc;
00197 int i;
00198 
00199  if(i32[len]>0) i=-1;
00200  else
00201    {
00202      for(i=len-1; i>=0; i--)
00203        {
00204         if(i32[i]!=nint[i]) break;
00205        }
00206    }
00207  if((i<0)||(i32[i]>nint[i]))
00208    {
00209      acc=0;
00210      for(i=0;i<len;i++)
00211        {
00212         acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
00213         i32[i]=(unsigned int)acc;
00214         acc=acc>>32;
00215        }
00216    }
00217 }
00218 
00219 
00220 
00221 
00222 /*
00223 ** the lengths of the input arrays should be at least the following:
00224 ** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
00225 ** all of them should be different from one another
00226 **
00227 */
00228 void mont_mulf_noconv(unsigned int *result,
00229                    double *dm1, double *dm2, double *dt,
00230                    double *dn, unsigned int *nint,
00231                    int nlen, double dn0)
00232 {
00233  int i, j, jj;
00234  int tmp;
00235  double digit, m2j, nextm2j, a, b;
00236  double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
00237 
00238  pdm1=&(dm1[0]);
00239  pdm2=&(dm2[0]);
00240  pdn=&(dn[0]);
00241  pdm2[2*nlen]=Zero;
00242 
00243  if (nlen!=16)
00244    {
00245      for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
00246 
00247      a=dt[0]=pdm1[0]*pdm2[0];
00248      digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
00249 
00250      pdtj=&(dt[0]);
00251      for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
00252        {
00253         m2j=pdm2[j];
00254         a=pdtj[0]+pdn[0]*digit;
00255         b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
00256         pdtj[1]=b;
00257 
00258 #pragma pipeloop(0)
00259         for(i=1;i<nlen;i++)
00260           {
00261             pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
00262           }
00263         if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
00264         
00265         digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
00266        }
00267    }
00268  else
00269    {
00270      a=dt[0]=pdm1[0]*pdm2[0];
00271 
00272      dt[65]=     dt[64]=     dt[63]=     dt[62]=     dt[61]=     dt[60]=
00273      dt[59]=     dt[58]=     dt[57]=     dt[56]=     dt[55]=     dt[54]=
00274      dt[53]=     dt[52]=     dt[51]=     dt[50]=     dt[49]=     dt[48]=
00275      dt[47]=     dt[46]=     dt[45]=     dt[44]=     dt[43]=     dt[42]=
00276      dt[41]=     dt[40]=     dt[39]=     dt[38]=     dt[37]=     dt[36]=
00277      dt[35]=     dt[34]=     dt[33]=     dt[32]=     dt[31]=     dt[30]=
00278      dt[29]=     dt[28]=     dt[27]=     dt[26]=     dt[25]=     dt[24]=
00279      dt[23]=     dt[22]=     dt[21]=     dt[20]=     dt[19]=     dt[18]=
00280      dt[17]=     dt[16]=     dt[15]=     dt[14]=     dt[13]=     dt[12]=
00281      dt[11]=     dt[10]=     dt[ 9]=     dt[ 8]=     dt[ 7]=     dt[ 6]=
00282      dt[ 5]=     dt[ 4]=     dt[ 3]=     dt[ 2]=     dt[ 1]=Zero;
00283 
00284      pdn_0=pdn[0];
00285      pdm1_0=pdm1[0];
00286 
00287      digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
00288      pdtj=&(dt[0]);
00289 
00290      for(j=0;j<32;j++,pdtj++)
00291        {
00292 
00293         m2j=pdm2[j];
00294         a=pdtj[0]+pdn_0*digit;
00295         b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
00296         pdtj[1]=b;
00297 
00298         /**** this loop will be fully unrolled:
00299         for(i=1;i<16;i++)
00300           {
00301             pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
00302           }
00303         *************************************/
00304             pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
00305             pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
00306             pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
00307             pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
00308             pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
00309             pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
00310             pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
00311             pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
00312             pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
00313             pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
00314             pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
00315             pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
00316             pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
00317             pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
00318             pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
00319         /* no need for cleenup, cannot overflow */
00320         digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
00321        }
00322    }
00323 
00324  conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
00325 
00326  adjust_montf_result(result,nint,nlen); 
00327  
00328 }
00329