Back to index

lightning-sunbird  0.9+nobinonly
mpi_sparc.c
Go to the documentation of this file.
00001 /* ***** BEGIN LICENSE BLOCK *****
00002  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00003  *
00004  * The contents of this file are subject to the Mozilla Public License Version
00005  * 1.1 (the "License"); you may not use this file except in compliance with
00006  * the License. You may obtain a copy of the License at
00007  * http://www.mozilla.org/MPL/
00008  *
00009  * Software distributed under the License is distributed on an "AS IS" basis,
00010  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00011  * for the specific language governing rights and limitations under the
00012  * License.
00013  *
00014  * The Original Code is the Netscape security libraries.
00015  *
00016  * The Initial Developer of the Original Code is
00017  * Netscape Communications Corporation.
00018  * Portions created by the Initial Developer are Copyright (C) 2000
00019  * the Initial Developer. All Rights Reserved.
00020  *
00021  * Contributor(s):
00022  *
00023  * Alternatively, the contents of this file may be used under the terms of
00024  * either the GNU General Public License Version 2 or later (the "GPL"), or
00025  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00026  * in which case the provisions of the GPL or the LGPL are applicable instead
00027  * of those above. If you wish to allow use of your version of this file only
00028  * under the terms of either the GPL or the LGPL, and not to allow others to
00029  * use your version of this file under the terms of the MPL, indicate your
00030  * decision by deleting the provisions above and replace them with the notice
00031  * and other provisions required by the GPL or the LGPL. If you do not delete
00032  * the provisions above, a recipient may use your version of this file under
00033  * the terms of any one of the MPL, the GPL or the LGPL.
00034  *
00035  * ***** END LICENSE BLOCK ***** */
00036 /* $Id: mpi_sparc.c,v 1.6.30.1 2006/01/23 00:39:33 nelsonb%netscape.com Exp $ */
00037 
00038 /* Multiplication performance enhancements for sparc v8+vis CPUs. */
00039 
00040 #include "mpi-priv.h"
00041 #include <stddef.h>
00042 #include <sys/systeminfo.h>
00043 #include <strings.h>
00044 
00045 /* In the functions below, */
00046 /* vector y must be 8-byte aligned, and n must be even */
00047 /* returns carry out of high order word of result */
00048 /* maximum n is 256 */
00049 
00050 /* vector x += vector y * scaler a; where y is of length n words. */
00051 extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
00052 
00053 /* vector z = vector x + vector y * scaler a; where y is of length n words. */
00054 extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y, 
00055                      int n, mp_digit a);
00056 
00057 /* v8 versions of these functions run on any Sparc v8 CPU. */
00058 
00059 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
00060 #define MP_MUL_DxD(a, b, Phi, Plo) \
00061   { unsigned long long product = (unsigned long long)a * b; \
00062     Plo = (mp_digit)product; \
00063     Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
00064 
00065 /* c = a * b */
00066 static void 
00067 v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
00068 {
00069 #if !defined(MP_NO_MP_WORD)
00070   mp_digit   d = 0;
00071 
00072   /* Inner product:  Digits of a */
00073   while (a_len--) {
00074     mp_word w = ((mp_word)b * *a++) + d;
00075     *c++ = ACCUM(w);
00076     d = CARRYOUT(w);
00077   }
00078   *c = d;
00079 #else
00080   mp_digit carry = 0;
00081   while (a_len--) {
00082     mp_digit a_i = *a++;
00083     mp_digit a0b0, a1b1;
00084 
00085     MP_MUL_DxD(a_i, b, a1b1, a0b0);
00086 
00087     a0b0 += carry;
00088     if (a0b0 < carry)
00089       ++a1b1;
00090     *c++ = a0b0;
00091     carry = a1b1;
00092   }
00093   *c = carry;
00094 #endif
00095 }
00096 
00097 /* c += a * b */
00098 static void 
00099 v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
00100 {
00101 #if !defined(MP_NO_MP_WORD)
00102   mp_digit   d = 0;
00103 
00104   /* Inner product:  Digits of a */
00105   while (a_len--) {
00106     mp_word w = ((mp_word)b * *a++) + *c + d;
00107     *c++ = ACCUM(w);
00108     d = CARRYOUT(w);
00109   }
00110   *c = d;
00111 #else
00112   mp_digit carry = 0;
00113   while (a_len--) {
00114     mp_digit a_i = *a++;
00115     mp_digit a0b0, a1b1;
00116 
00117     MP_MUL_DxD(a_i, b, a1b1, a0b0);
00118 
00119     a0b0 += carry;
00120     if (a0b0 < carry)
00121       ++a1b1;
00122     a0b0 += a_i = *c;
00123     if (a0b0 < a_i)
00124       ++a1b1;
00125     *c++ = a0b0;
00126     carry = a1b1;
00127   }
00128   *c = carry;
00129 #endif
00130 }
00131 
00132 /* Presently, this is only used by the Montgomery arithmetic code. */
00133 /* c += a * b */
00134 static void 
00135 v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
00136 {
00137 #if !defined(MP_NO_MP_WORD)
00138   mp_digit   d = 0;
00139 
00140   /* Inner product:  Digits of a */
00141   while (a_len--) {
00142     mp_word w = ((mp_word)b * *a++) + *c + d;
00143     *c++ = ACCUM(w);
00144     d = CARRYOUT(w);
00145   }
00146 
00147   while (d) {
00148     mp_word w = (mp_word)*c + d;
00149     *c++ = ACCUM(w);
00150     d = CARRYOUT(w);
00151   }
00152 #else
00153   mp_digit carry = 0;
00154   while (a_len--) {
00155     mp_digit a_i = *a++;
00156     mp_digit a0b0, a1b1;
00157 
00158     MP_MUL_DxD(a_i, b, a1b1, a0b0);
00159 
00160     a0b0 += carry;
00161     if (a0b0 < carry)
00162       ++a1b1;
00163 
00164     a0b0 += a_i = *c;
00165     if (a0b0 < a_i)
00166       ++a1b1;
00167 
00168     *c++ = a0b0;
00169     carry = a1b1;
00170   }
00171   while (carry) {
00172     mp_digit c_i = *c;
00173     carry += c_i;
00174     *c++ = carry;
00175     carry = carry < c_i;
00176   }
00177 #endif
00178 }
00179 
00180 /* These functions run only on v8plus+vis or v9+vis CPUs. */
00181 
00182 /* c = a * b */
00183 void 
00184 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
00185 {
00186     mp_digit d;
00187     mp_digit x[258];
00188     if (a_len <= 256) {
00189        if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
00190            mp_digit * px;
00191            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
00192            memcpy(px, a, a_len * sizeof(*a));
00193            a = px;
00194            if (a_len & 1) {
00195               px[a_len] = 0;
00196            }
00197        }
00198        s_mp_setz(c, a_len + 1);
00199        d = mul_add_inp(c, a, a_len, b);
00200        c[a_len] = d;
00201     } else {
00202        v8_mpv_mul_d(a, a_len, b, c);
00203     }
00204 }
00205 
00206 /* c += a * b, where a is a_len words long. */
00207 void     
00208 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
00209 {
00210     mp_digit d;
00211     mp_digit x[258];
00212     if (a_len <= 256) {
00213        if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
00214            mp_digit * px;
00215            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
00216            memcpy(px, a, a_len * sizeof(*a));
00217            a = px;
00218            if (a_len & 1) {
00219               px[a_len] = 0;
00220            }
00221        }
00222        d = mul_add_inp(c, a, a_len, b);
00223        c[a_len] = d;
00224     } else {
00225        v8_mpv_mul_d_add(a, a_len, b, c);
00226     }
00227 }
00228 
00229 /* c += a * b, where a is y words long. */
00230 void     
00231 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
00232 {
00233     mp_digit d;
00234     mp_digit x[258];
00235     if (a_len <= 256) {
00236        if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
00237            mp_digit * px;
00238            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
00239            memcpy(px, a, a_len * sizeof(*a));
00240            a = px;
00241            if (a_len & 1) {
00242               px[a_len] = 0;
00243            }
00244        }
00245        d = mul_add_inp(c, a, a_len, b);
00246        if (d) {
00247            c += a_len;
00248            do {
00249               mp_digit sum = d + *c;
00250               *c++ = sum;
00251               d = sum < d;
00252            } while (d);
00253        }
00254     } else {
00255        v8_mpv_mul_d_add_prop(a, a_len, b, c);
00256     }
00257 }