Back to index

lightning-sunbird  0.9+nobinonly
mpi_x86_asm.c
Go to the documentation of this file.
00001 /*
00002  *  mpi_x86.c - MSVC inline assembly implementation of s_mpv_ functions.
00003  * 
00004  * ***** BEGIN LICENSE BLOCK *****
00005  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00006  *
00007  * The contents of this file are subject to the Mozilla Public License Version
00008  * 1.1 (the "License"); you may not use this file except in compliance with
00009  * the License. You may obtain a copy of the License at
00010  * http://www.mozilla.org/MPL/
00011  *
00012  * Software distributed under the License is distributed on an "AS IS" basis,
00013  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00014  * for the specific language governing rights and limitations under the
00015  * License.
00016  *
00017  * The Original Code is the Netscape security libraries.
00018  *
00019  * The Initial Developer of the Original Code is
00020  * Netscape Communications Corporation.
00021  * Portions created by the Initial Developer are Copyright (C) 2000
00022  * the Initial Developer. All Rights Reserved.
00023  *
00024  * Contributor(s):
00025  *   Benjamin Smedberg <benjamin@smedbergs.us>
00026  *
00027  * Alternatively, the contents of this file may be used under the terms of
00028  * either the GNU General Public License Version 2 or later (the "GPL"), or
00029  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00030  * in which case the provisions of the GPL or the LGPL are applicable instead
00031  * of those above. If you wish to allow use of your version of this file only
00032  * under the terms of either the GPL or the LGPL, and not to allow others to
00033  * use your version of this file under the terms of the MPL, indicate your
00034  * decision by deleting the provisions above and replace them with the notice
00035  * and other provisions required by the GPL or the LGPL. If you do not delete
00036  * the provisions above, a recipient may use your version of this file under
00037  * the terms of any one of the MPL, the GPL or the LGPL.
00038  *
00039  * ***** END LICENSE BLOCK ***** */
00040 
00041 #include "mpi-priv.h"
00042 
00043 /*
00044  *   ebp - 36:       caller's esi
00045  *   ebp - 32:       caller's edi
00046  *   ebp - 28:       
00047  *   ebp - 24:       
00048  *   ebp - 20:       
00049  *   ebp - 16:       
00050  *   ebp - 12:       
00051  *   ebp - 8: 
00052  *   ebp - 4: 
00053  *   ebp + 0: caller's ebp
00054  *   ebp + 4: return address
00055  *   ebp + 8: a      argument
00056  *   ebp + 12:       a_len  argument
00057  *   ebp + 16:       b      argument
00058  *   ebp + 20:       c      argument
00059  *   registers:
00060  *     eax:
00061  *     ebx:   carry
00062  *     ecx:   a_len
00063  *     edx:
00064  *     esi:   a ptr
00065  *     edi:   c ptr
00066  */
00067 __declspec(naked) void
00068 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
00069 {
00070   __asm {
00071     push   ebp
00072     mov    ebp,esp
00073     sub    esp,28
00074     push   edi
00075     push   esi
00076     push   ebx
00077     mov    ebx,0            ; carry = 0
00078     mov    ecx,[ebp+12]            ; ecx = a_len
00079     mov    edi,[ebp+20]
00080     cmp    ecx,0
00081     je     L_2                     ; jmp if a_len == 0
00082     mov    esi,[ebp+8]             ; esi = a
00083     cld
00084 L_1:
00085     lodsd                   ; eax = [ds:esi]; esi += 4
00086     mov    edx,[ebp+16]            ; edx = b
00087     mul    edx                     ; edx:eax = Phi:Plo = a_i * b
00088 
00089     add    eax,ebx          ; add carry (ebx) to edx:eax
00090     adc    edx,0
00091     mov    ebx,edx          ; high half of product becomes next carry
00092 
00093     stosd                   ; [es:edi] = ax; edi += 4;
00094     dec    ecx                     ; --a_len
00095     jnz    L_1                     ; jmp if a_len != 0
00096 L_2:
00097     mov    [edi],ebx        ; *c = carry
00098     pop    ebx
00099     pop    esi
00100     pop    edi
00101     leave  
00102     ret    
00103     nop
00104   }
00105 }
00106 
00107 /*
00108  *   ebp - 36:       caller's esi
00109  *   ebp - 32:       caller's edi
00110  *   ebp - 28:       
00111  *   ebp - 24:       
00112  *   ebp - 20:       
00113  *   ebp - 16:       
00114  *   ebp - 12:       
00115  *   ebp - 8: 
00116  *   ebp - 4: 
00117  *   ebp + 0: caller's ebp
00118  *   ebp + 4: return address
00119  *   ebp + 8: a      argument
00120  *   ebp + 12:       a_len  argument
00121  *   ebp + 16:       b      argument
00122  *   ebp + 20:       c      argument
00123  *   registers:
00124  *     eax:
00125  *     ebx:   carry
00126  *     ecx:   a_len
00127  *     edx:
00128  *     esi:   a ptr
00129  *     edi:   c ptr
00130  */
00131 __declspec(naked) void
00132 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
00133 {
00134   __asm {
00135     push   ebp
00136     mov    ebp,esp
00137     sub    esp,28
00138     push   edi
00139     push   esi
00140     push   ebx
00141     mov    ebx,0            ; carry = 0
00142     mov    ecx,[ebp+12]            ; ecx = a_len
00143     mov    edi,[ebp+20]
00144     cmp    ecx,0
00145     je     L_4                     ; jmp if a_len == 0
00146     mov    esi,[ebp+8]             ; esi = a
00147     cld
00148 L_3:
00149     lodsd                   ; eax = [ds:esi]; esi += 4
00150     mov    edx,[ebp+16]            ; edx = b
00151     mul    edx                     ; edx:eax = Phi:Plo = a_i * b
00152 
00153     add    eax,ebx          ; add carry (ebx) to edx:eax
00154     adc    edx,0
00155     mov    ebx,[edi]        ; add in current word from *c
00156     add    eax,ebx          
00157     adc    edx,0
00158     mov    ebx,edx          ; high half of product becomes next carry
00159 
00160     stosd                   ; [es:edi] = ax; edi += 4;
00161     dec    ecx                     ; --a_len
00162     jnz    L_3                     ; jmp if a_len != 0
00163 L_4:
00164     mov    [edi],ebx        ; *c = carry
00165     pop    ebx
00166     pop    esi
00167     pop    edi
00168     leave  
00169     ret    
00170     nop
00171   }
00172 }
00173 
00174 /*
00175  *   ebp - 36:       caller's esi
00176  *   ebp - 32:       caller's edi
00177  *   ebp - 28:       
00178  *   ebp - 24:       
00179  *   ebp - 20:       
00180  *   ebp - 16:       
00181  *   ebp - 12:       
00182  *   ebp - 8: 
00183  *   ebp - 4: 
00184  *   ebp + 0: caller's ebp
00185  *   ebp + 4: return address
00186  *   ebp + 8: a      argument
00187  *   ebp + 12:       a_len  argument
00188  *   ebp + 16:       b      argument
00189  *   ebp + 20:       c      argument
00190  *   registers:
00191  *     eax:
00192  *     ebx:   carry
00193  *     ecx:   a_len
00194  *     edx:
00195  *     esi:   a ptr
00196  *     edi:   c ptr
00197  */
00198 __declspec(naked) void
00199 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
00200 {
00201   __asm {
00202     push   ebp
00203     mov    ebp,esp
00204     sub    esp,28
00205     push   edi
00206     push   esi
00207     push   ebx
00208     mov    ebx,0            ; carry = 0
00209     mov    ecx,[ebp+12]            ; ecx = a_len
00210     mov    edi,[ebp+20]
00211     cmp    ecx,0
00212     je     L_6                     ; jmp if a_len == 0
00213     cld
00214     mov    esi,[ebp+8]             ; esi = a
00215 L_5:
00216     lodsd                   ; eax = [ds:esi]; esi += 4
00217     mov    edx,[ebp+16]            ; edx = b
00218     mul    edx                     ; edx:eax = Phi:Plo = a_i * b
00219 
00220     add    eax,ebx          ; add carry (ebx) to edx:eax
00221     adc    edx,0
00222     mov    ebx,[edi]        ; add in current word from *c
00223     add    eax,ebx          
00224     adc    edx,0
00225     mov    ebx,edx          ; high half of product becomes next carry
00226 
00227     stosd                   ; [es:edi] = ax; edi += 4;
00228     dec    ecx                     ; --a_len
00229     jnz    L_5                     ; jmp if a_len != 0
00230 L_6:
00231     cmp    ebx,0            ; is carry zero?
00232     jz     L_8
00233     mov    eax,[edi]        ; add in current word from *c
00234     add    eax,ebx
00235     stosd                   ; [es:edi] = ax; edi += 4;
00236     jnc    L_8
00237 L_7:
00238     mov    eax,[edi]        ; add in current word from *c
00239     adc    eax,0
00240     stosd                   ; [es:edi] = ax; edi += 4;
00241     jc     L_7
00242 L_8:
00243     pop    ebx
00244     pop    esi
00245     pop    edi
00246     leave  
00247     ret    
00248     nop
00249   }
00250 }
00251 
00252 /*
00253  *   ebp - 20:       caller's esi
00254  *   ebp - 16:       caller's edi
00255  *   ebp - 12:       
00256  *   ebp - 8: carry
00257  *   ebp - 4: a_len  local
00258  *   ebp + 0: caller's ebp
00259  *   ebp + 4: return address
00260  *   ebp + 8: pa     argument
00261  *   ebp + 12:       a_len  argument
00262  *   ebp + 16:       ps     argument
00263  *   ebp + 20:       
00264  *   registers:
00265  *     eax:
00266  *     ebx:   carry
00267  *     ecx:   a_len
00268  *     edx:
00269  *     esi:   a ptr
00270  *     edi:   c ptr
00271  */
00272 __declspec(naked) void
00273 s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
00274 {
00275   __asm {
00276      push   ebp
00277      mov    ebp,esp
00278      sub    esp,12
00279      push   edi
00280      push   esi
00281      push   ebx
00282      mov    ebx,0           ; carry = 0
00283      mov    ecx,[ebp+12]    ; a_len
00284      mov    edi,[ebp+16]    ; edi = ps
00285      cmp    ecx,0
00286      je     L_11            ; jump if a_len == 0
00287      cld
00288      mov    esi,[ebp+8]            ; esi = pa
00289 L_10:
00290      lodsd                  ; eax = [ds:si]; si += 4;
00291      mul    eax
00292 
00293      add    eax,ebx         ; add "carry"
00294      adc    edx,0
00295      mov    ebx,[edi]
00296      add    eax,ebx         ; add low word from result
00297      mov    ebx,[edi+4]
00298      stosd                  ; [es:di] = eax; di += 4;
00299      adc    edx,ebx         ; add high word from result
00300      mov    ebx,0
00301      mov    eax,edx
00302      adc    ebx,0
00303      stosd                  ; [es:di] = eax; di += 4;
00304      dec    ecx                    ; --a_len
00305      jnz    L_10            ; jmp if a_len != 0
00306 L_11:
00307     cmp    ebx,0            ; is carry zero?
00308     jz     L_14
00309     mov    eax,[edi]        ; add in current word from *c
00310     add    eax,ebx
00311     stosd                   ; [es:edi] = ax; edi += 4;
00312     jnc    L_14
00313 L_12:
00314     mov    eax,[edi]        ; add in current word from *c
00315     adc    eax,0
00316     stosd                   ; [es:edi] = ax; edi += 4;
00317     jc     L_12
00318 L_14:
00319     pop    ebx
00320     pop    esi
00321     pop    edi
00322     leave  
00323     ret    
00324     nop
00325   }
00326 }
00327 
00328 /* 
00329  *  Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
00330  *  so its high bit is 1.   This code is from NSPR.
00331  *
00332  *  Dump of assembler code for function s_mpv_div_2dx1d:
00333  *  
00334  *   esp +  0:   Caller's ebx
00335  *   esp +  4:       return address
00336  *   esp +  8:       Nhi    argument
00337  *   esp + 12:       Nlo    argument
00338  *   esp + 16:       divisor       argument
00339  *   esp + 20:       qp     argument
00340  *   esp + 24:   rp  argument
00341  *   registers:
00342  *     eax:
00343  *     ebx:   carry
00344  *     ecx:   a_len
00345  *     edx:
00346  *     esi:   a ptr
00347  *     edi:   c ptr
00348  */  
00349 __declspec(naked) mp_err
00350 s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
00351               mp_digit *qp, mp_digit *rp)
00352 {
00353   __asm {
00354        push   ebx
00355        mov    edx,[esp+8]
00356        mov    eax,[esp+12]
00357        mov    ebx,[esp+16]
00358        div    ebx
00359        mov    ebx,[esp+20]
00360        mov    [ebx],eax
00361        mov    ebx,[esp+24]
00362        mov    [ebx],edx
00363        xor    eax,eax              ; return zero
00364        pop    ebx
00365        ret    
00366        nop
00367   }
00368 }