Back to index

lightning-sunbird  0.9+nobinonly
mpv_sparc.c
Go to the documentation of this file.
00001 /* ***** BEGIN LICENSE BLOCK *****
00002  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00003  *
00004  * The contents of this file are subject to the Mozilla Public License Version
00005  * 1.1 (the "License"); you may not use this file except in compliance with
00006  * the License. You may obtain a copy of the License at
00007  * http://www.mozilla.org/MPL/
00008  *
00009  * Software distributed under the License is distributed on an "AS IS" basis,
00010  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00011  * for the specific language governing rights and limitations under the
00012  * License.
00013  *
00014  * The Original Code is a SPARC/VIS optimized multiply and add function.
00015  *
00016  * The Initial Developer of the Original Code is
00017  * Sun Microsystems Inc.
00018  * Portions created by the Initial Developer are Copyright (C) 1999-2000
00019  * the Initial Developer. All Rights Reserved.
00020  *
00021  * Contributor(s):
00022  *
00023  * Alternatively, the contents of this file may be used under the terms of
00024  * either the GNU General Public License Version 2 or later (the "GPL"), or
00025  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00026  * in which case the provisions of the GPL or the LGPL are applicable instead
00027  * of those above. If you wish to allow use of your version of this file only
00028  * under the terms of either the GPL or the LGPL, and not to allow others to
00029  * use your version of this file under the terms of the MPL, indicate your
00030  * decision by deleting the provisions above and replace them with the notice
00031  * and other provisions required by the GPL or the LGPL. If you do not delete
00032  * the provisions above, a recipient may use your version of this file under
00033  * the terms of any one of the MPL, the GPL or the LGPL.
00034  *
00035  * ***** END LICENSE BLOCK ***** */
00036 /* $Id: mpv_sparc.c,v 1.4 2004/04/27 23:04:36 gerv%gerv.net Exp $ */
00037 
00038 #include "vis_proto.h"
00039 
00040 /***************************************************************/
00041 
00042 typedef  int                t_s32;
00043 typedef  unsigned int       t_u32;
00044 #if defined(__sparcv9)
00045 typedef  long               t_s64;
00046 typedef  unsigned long      t_u64;
00047 #else
00048 typedef  long long          t_s64;
00049 typedef  unsigned long long t_u64;
00050 #endif
00051 typedef  double             t_d64;
00052 
00053 /***************************************************************/
00054 
00055 typedef union {
00056   t_d64 d64;
00057   struct {
00058     t_s32 i0;
00059     t_s32 i1;
00060   } i32s;
00061 } d64_2_i32;
00062 
00063 /***************************************************************/
00064 
00065 #define BUFF_SIZE  256
00066 
00067 #define A_BITS  19
00068 #define A_MASK  ((1 << A_BITS) - 1)
00069 
00070 /***************************************************************/
00071 
00072 static t_u64 mask_cnst[] = {
00073   0x8000000080000000ull
00074 };
00075 
00076 /***************************************************************/
00077 
00078 #define DEF_VARS(N)                     \
00079   t_d64 *py = (t_d64*)y;                \
00080   t_d64 mask = *((t_d64*)mask_cnst);    \
00081   t_d64 ca = (1u << 31) - 1;            \
00082   t_d64 da = (t_d64)a;                  \
00083   t_s64 buff[N], s;                     \
00084   d64_2_i32 dy
00085 
00086 /***************************************************************/
00087 
00088 #define MUL_U32_S64_2(i)                                \
00089   dy.d64 = vis_fxnor(mask, py[i]);                      \
00090   buff[2*(i)  ] = (ca - (t_d64)dy.i32s.i0) * da;        \
00091   buff[2*(i)+1] = (ca - (t_d64)dy.i32s.i1) * da
00092 
00093 #define MUL_U32_S64_2_D(i)              \
00094   dy.d64 = vis_fxnor(mask, py[i]);      \
00095   d0 = ca - (t_d64)dy.i32s.i0;          \
00096   d1 = ca - (t_d64)dy.i32s.i1;          \
00097   buff[4*(i)  ] = (t_s64)(d0 * da);     \
00098   buff[4*(i)+1] = (t_s64)(d0 * db);     \
00099   buff[4*(i)+2] = (t_s64)(d1 * da);     \
00100   buff[4*(i)+3] = (t_s64)(d1 * db)
00101 
00102 /***************************************************************/
00103 
00104 #define ADD_S64_U32(i)          \
00105   s = buff[i] + x[i] + c;       \
00106   z[i] = s;                     \
00107   c = (s >> 32)
00108 
00109 #define ADD_S64_U32_D(i)                        \
00110   s = buff[2*(i)] +(((t_s64)(buff[2*(i)+1]))<<A_BITS) + x[i] + uc;   \
00111   z[i] = s;                                     \
00112   uc = ((t_u64)s >> 32)
00113 
00114 /***************************************************************/
00115 
00116 #define MUL_U32_S64_8(i)        \
00117   MUL_U32_S64_2(i);             \
00118   MUL_U32_S64_2(i+1);           \
00119   MUL_U32_S64_2(i+2);           \
00120   MUL_U32_S64_2(i+3)
00121 
00122 #define MUL_U32_S64_D_8(i)      \
00123   MUL_U32_S64_2_D(i);           \
00124   MUL_U32_S64_2_D(i+1);         \
00125   MUL_U32_S64_2_D(i+2);         \
00126   MUL_U32_S64_2_D(i+3)
00127 
00128 /***************************************************************/
00129 
00130 #define ADD_S64_U32_8(i)        \
00131   ADD_S64_U32(i);               \
00132   ADD_S64_U32(i+1);             \
00133   ADD_S64_U32(i+2);             \
00134   ADD_S64_U32(i+3);             \
00135   ADD_S64_U32(i+4);             \
00136   ADD_S64_U32(i+5);             \
00137   ADD_S64_U32(i+6);             \
00138   ADD_S64_U32(i+7)
00139 
00140 #define ADD_S64_U32_D_8(i)      \
00141   ADD_S64_U32_D(i);             \
00142   ADD_S64_U32_D(i+1);           \
00143   ADD_S64_U32_D(i+2);           \
00144   ADD_S64_U32_D(i+3);           \
00145   ADD_S64_U32_D(i+4);           \
00146   ADD_S64_U32_D(i+5);           \
00147   ADD_S64_U32_D(i+6);           \
00148   ADD_S64_U32_D(i+7)
00149 
00150 /***************************************************************/
00151 
00152 t_u32 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)
00153 {
00154   if (a < (1 << A_BITS)) {
00155 
00156     if (n == 8) {
00157       DEF_VARS(8);
00158       t_s32 c = 0;
00159 
00160       MUL_U32_S64_8(0);
00161       ADD_S64_U32_8(0);
00162 
00163       return c;
00164 
00165     } else if (n == 16) {
00166       DEF_VARS(16);
00167       t_s32 c = 0;
00168 
00169       MUL_U32_S64_8(0);
00170       MUL_U32_S64_8(4);
00171       ADD_S64_U32_8(0);
00172       ADD_S64_U32_8(8);
00173 
00174       return c;
00175 
00176     } else {
00177       DEF_VARS(BUFF_SIZE);
00178       t_s32 i, c = 0;
00179 
00180 #pragma pipeloop(0)
00181       for (i = 0; i < (n+1)/2; i ++) {
00182         MUL_U32_S64_2(i);
00183       }
00184 
00185 #pragma pipeloop(0)
00186       for (i = 0; i < n; i ++) {
00187         ADD_S64_U32(i);
00188       }
00189 
00190       return c;
00191 
00192     }
00193   } else {
00194 
00195     if (n == 8) {
00196       DEF_VARS(2*8);
00197       t_d64 d0, d1, db;
00198       t_u32 uc = 0;
00199 
00200       da = (t_d64)(a &  A_MASK);
00201       db = (t_d64)(a >> A_BITS);
00202 
00203       MUL_U32_S64_D_8(0);
00204       ADD_S64_U32_D_8(0);
00205 
00206       return uc;
00207 
00208     } else if (n == 16) {
00209       DEF_VARS(2*16);
00210       t_d64 d0, d1, db;
00211       t_u32 uc = 0;
00212 
00213       da = (t_d64)(a &  A_MASK);
00214       db = (t_d64)(a >> A_BITS);
00215 
00216       MUL_U32_S64_D_8(0);
00217       MUL_U32_S64_D_8(4);
00218       ADD_S64_U32_D_8(0);
00219       ADD_S64_U32_D_8(8);
00220 
00221       return uc;
00222 
00223     } else {
00224       DEF_VARS(2*BUFF_SIZE);
00225       t_d64 d0, d1, db;
00226       t_u32 i, uc = 0;
00227 
00228       da = (t_d64)(a &  A_MASK);
00229       db = (t_d64)(a >> A_BITS);
00230 
00231 #pragma pipeloop(0)
00232       for (i = 0; i < (n+1)/2; i ++) {
00233         MUL_U32_S64_2_D(i);
00234       }
00235 
00236 #pragma pipeloop(0)
00237       for (i = 0; i < n; i ++) {
00238         ADD_S64_U32_D(i);
00239       }
00240 
00241       return uc;
00242     }
00243   }
00244 }
00245 
00246 /***************************************************************/
00247 
00248 t_u32 mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a)
00249 {
00250   return mul_add(x, x, y, n, a);
00251 }
00252 
00253 /***************************************************************/