Back to index

lightning-sunbird  0.9+nobinonly
mpvalpha.c
Go to the documentation of this file.
00001 /* ***** BEGIN LICENSE BLOCK *****
00002  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00003  *
00004  * The contents of this file are subject to the Mozilla Public License Version
00005  * 1.1 (the "License"); you may not use this file except in compliance with
00006  * the License. You may obtain a copy of the License at
00007  * http://www.mozilla.org/MPL/
00008  *
00009  * Software distributed under the License is distributed on an "AS IS" basis,
00010  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00011  * for the specific language governing rights and limitations under the
00012  * License.
00013  *
00014  * The Original Code is Multiple Precision Integer optimization code for 
00015  * the Compaq Alpha processor.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Richard C. Swift.
00019  * Portions created by the Initial Developer are Copyright (C) 2001
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):   Richard C. Swift     (swift@netscape.com)
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either the GNU General Public License Version 2 or later (the "GPL"), or
00026  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 #include "mpi-priv.h"
00039 #include <c_asm.h>
00040 
00041 
00042 #define MP_MUL_DxD(a, b, Phi, Plo)        \
00043  { Plo = asm ("mulq %a0, %a1, %v0", a, b);       \
00044    Phi = asm ("umulh %a0, %a1, %v0", a, b); }    \
00045 
00046 /* This is empty for the loop in s_mpv_mul_d     */
00047 #define CARRY_ADD
00048 
00049 #define ONE_MUL                           \
00050     a_i = *a++;                           \
00051     MP_MUL_DxD(a_i, b, a1b1, a0b0);       \
00052     a0b0 += carry;                 \
00053     if (a0b0 < carry)                     \
00054       ++a1b1;                      \
00055     CARRY_ADD                      \
00056     *c++ = a0b0;                   \
00057     carry = a1b1;                  \
00058 
00059 #define FOUR_MUL                   \
00060        ONE_MUL                            \
00061        ONE_MUL                            \
00062        ONE_MUL                            \
00063        ONE_MUL                            \
00064 
00065 #define SIXTEEN_MUL                \
00066        FOUR_MUL                    \
00067        FOUR_MUL                    \
00068        FOUR_MUL                    \
00069        FOUR_MUL                    \
00070 
00071 #define THIRTYTWO_MUL                     \
00072        SIXTEEN_MUL                 \
00073        SIXTEEN_MUL                 \
00074 
00075 #define ONETWENTYEIGHT_MUL         \
00076        THIRTYTWO_MUL               \
00077        THIRTYTWO_MUL               \
00078        THIRTYTWO_MUL               \
00079        THIRTYTWO_MUL               \
00080 
00081 
00082 #define EXPAND_256(CALL)           \
00083  mp_digit carry = 0;               \
00084  mp_digit a_i;                            \
00085  mp_digit a0b0, a1b1;                     \
00086  if (a_len &255) {                 \
00087        if (a_len &1) {                    \
00088          ONE_MUL                   \
00089        }                           \
00090        if (a_len &2) {                    \
00091          ONE_MUL                   \
00092          ONE_MUL                   \
00093        }                           \
00094        if (a_len &4) {                    \
00095          FOUR_MUL                  \
00096        }                           \
00097        if (a_len &8) {                    \
00098          FOUR_MUL                  \
00099          FOUR_MUL                  \
00100        }                           \
00101        if (a_len & 16 ) {          \
00102          SIXTEEN_MUL               \
00103        }                           \
00104        if (a_len & 32 ) {          \
00105          THIRTYTWO_MUL                    \
00106        }                           \
00107        if (a_len & 64 ) {          \
00108          THIRTYTWO_MUL                    \
00109          THIRTYTWO_MUL                    \
00110        }                           \
00111        if (a_len & 128) {          \
00112          ONETWENTYEIGHT_MUL        \
00113        }                           \
00114        a_len = a_len & (-256);            \
00115   }                                \
00116   if (a_len>=256 ) {               \
00117        carry = CALL(a, a_len, b, c, carry);      \
00118        c += a_len;                 \
00119   }                                \
00120 
00121 #define FUNC_NAME(NAME)                   \
00122 mp_digit NAME(const mp_digit *a,   \
00123        mp_size a_len,                     \
00124        mp_digit b, mp_digit *c,    \
00125        mp_digit carry)                    \
00126 
00127 #define DECLARE_MUL_256(FNAME)            \
00128 FUNC_NAME(FNAME)                   \
00129 {                                  \
00130   mp_digit a_i;                           \
00131   mp_digit a0b0, a1b1;                    \
00132   while (a_len) {                  \
00133        ONETWENTYEIGHT_MUL          \
00134        ONETWENTYEIGHT_MUL          \
00135        a_len-= 256;                \
00136   }                                \
00137   return carry;                           \
00138 }                                  \
00139 
00140 /* Expanding the loop in s_mpv_mul_d appeared to slow down the
00141    (admittedly) small number of tests (i.e., timetest) used to
00142    measure performance, so this define disables that optimization. */
00143 #define DO_NOT_EXPAND 1
00144 
00145 /* Need forward declaration so it can be instantiated after
00146        the routine that uses it; this helps locality somewhat  */
00147 #if !defined(DO_NOT_EXPAND)
00148 FUNC_NAME(s_mpv_mul_d_MUL256);
00149 #endif
00150 
00151 /* c = a * b */
00152 void s_mpv_mul_d(const mp_digit *a, mp_size a_len, 
00153                      mp_digit b, mp_digit *c)
00154 {
00155 #if defined(DO_NOT_EXPAND)
00156   mp_digit carry = 0;
00157   while (a_len--) {
00158     mp_digit a_i = *a++;
00159     mp_digit a0b0, a1b1;
00160 
00161     MP_MUL_DxD(a_i, b, a1b1, a0b0);
00162 
00163     a0b0 += carry;
00164     if (a0b0 < carry)
00165       ++a1b1;
00166     *c++ = a0b0;
00167     carry = a1b1;
00168   }
00169 #else
00170   EXPAND_256(s_mpv_mul_d_MUL256)
00171 #endif
00172   *c = carry;
00173 }
00174 
00175 #if !defined(DO_NOT_EXPAND)
00176 DECLARE_MUL_256(s_mpv_mul_d_MUL256)
00177 #endif
00178 
00179 #undef CARRY_ADD
00180 /* This is redefined for the loop in s_mpv_mul_d_add */
00181 #define CARRY_ADD                  \
00182     a0b0 += a_i = *c;                     \
00183     if (a0b0 < a_i)                \
00184       ++a1b1;                      \
00185 
00186 /* Need forward declaration so it can be instantiated between the
00187        two routines that use it; this helps locality somewhat  */
00188 FUNC_NAME(s_mpv_mul_d_add_MUL256);
00189 
00190 /* c += a * b */
00191 void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, 
00192                      mp_digit b, mp_digit *c)
00193 {
00194   EXPAND_256(s_mpv_mul_d_add_MUL256)
00195   *c = carry;
00196 }
00197 
00198 /* Instantiate multiply 256 routine here */
00199 DECLARE_MUL_256(s_mpv_mul_d_add_MUL256)
00200 
00201 /* Presently, this is only used by the Montgomery arithmetic code. */
00202 /* c += a * b */
00203 void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, 
00204                      mp_digit b, mp_digit *c)
00205 {
00206   EXPAND_256(s_mpv_mul_d_add_MUL256)
00207   while (carry) {
00208     mp_digit c_i = *c;
00209     carry += c_i;
00210     *c++ = carry;
00211     carry = carry < c_i;
00212   }
00213 }
00214