Back to index

lightning-sunbird  0.9+nobinonly
mpcpucache.c
Go to the documentation of this file.
00001 /* ***** BEGIN LICENSE BLOCK *****
00002  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00003  *
00004  * The contents of this file are subject to the Mozilla Public License Version
00005  * 1.1 (the "License"); you may not use this file except in compliance with
00006  * the License. You may obtain a copy of the License at
00007  * http://www.mozilla.org/MPL/
00008  *
00009  * Software distributed under the License is distributed on an "AS IS" basis,
00010  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00011  * for the specific language governing rights and limitations under the
00012  * License.
00013  *
00014  * The Original Code is the Netscape security libraries.
00015  *
00016  * The Initial Developer of the Original Code is
00017  * Red Hat, Inc
00018  * Portions created by the Initial Developer are Copyright (C) 2005
00019  * the Initial Developer. All Rights Reserved.
00020  *
00021  * Contributor(s):
00022  *   Robert Relyea <rrelyea@redhat.com>
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either the GNU General Public License Version 2 or later (the "GPL"), or
00026  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 #include "mpi.h"
00039 
00040 /*
00041  * This file implements a single function: s_mpi_getProcessorLineSize();
00042  * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
00043  * if a cache exists, or zero if there is no cache. If more than one
00044  * cache line exists, it should return the smallest line size (which is 
00045  * usually the L1 cache).
00046  *
00047  * mp_modexp uses this information to make sure that private key information
00048  * isn't being leaked through the cache.
00049  *
00050  * Currently the file returns good data for most modern x86 processors, and
00051  * reasonable data on 64-bit ppc processors. All other processors are assumed
00052  * to have a cache line size of 32 bytes unless modified by target.mk.
00053  * 
00054  */
00055 
00056 #if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64)
00057 /* X86 processors have special instructions that tell us about the cache */
00058 #include "string.h"
00059 
00060 #if defined(__x86_64__) || defined(__x86_64)
00061 #define AMD_64 1
00062 #endif
00063 
00064 /* Generic CPUID function */
00065 #if defined(AMD_64)
00066 static void cpuid(unsigned long op, unsigned long *eax, 
00067                         unsigned long *ebx, unsigned long *ecx, 
00068                          unsigned long *edx)
00069 {
00070        __asm__("cpuid\n\t"
00071               : "=a" (*eax),
00072                 "=b" (*ebx),
00073                 "=c" (*ecx),
00074                 "=d" (*edx)
00075               : "0" (op));
00076 }
00077 #elif !defined(_MSC_VER)
00078 static void cpuid(unsigned long op, unsigned long *eax, 
00079                         unsigned long *ebx, unsigned long *ecx, 
00080                          unsigned long *edx)
00081 {
00082 /* sigh GCC isn't smart enough to save the ebx PIC register on it's own
00083  * in this case, so do it by hand. */
00084        __asm__("pushl %%ebx\n\t"
00085                 "cpuid\n\t"
00086                 "mov %%ebx,%1\n\t"
00087                 "popl %%ebx\n\t"
00088               : "=a" (*eax),
00089                 "=r" (*ebx),
00090                 "=c" (*ecx),
00091                 "=d" (*edx)
00092               : "0" (op));
00093 }
00094 
00095 /*
00096  * try flipping a processor flag to determine CPU type
00097  */
00098 static unsigned long changeFlag(unsigned long flag)
00099 {
00100        unsigned long changedFlags, originalFlags;
00101        __asm__("pushfl\n\t"            /* get the flags */
00102                "popl %0\n\t"
00103                "movl %0,%1\n\t"    /* save the original flags */
00104                "xorl %2,%0\n\t"    /* flip the bit */
00105               "pushl %0\n\t"       /* set the flags */
00106                "popfl\n\t"
00107               "pushfl\n\t"         /* get the flags again (for return) */
00108               "popl %0\n\t"
00109               "pushl %1\n\t"              /* restore the original flags */
00110                "popfl\n\t"
00111               : "=r" (changedFlags),
00112                 "=r" (originalFlags),
00113                 "=r" (flag)
00114               : "2" (flag));
00115        return changedFlags ^ originalFlags;
00116 }
00117 
00118 #else
00119 
00120 /*
00121  * windows versions of the above assembler
00122  */
00123 #define wcpuid __asm __emit 0fh __asm __emit 0a2h
00124 static void cpuid(unsigned long op,    unsigned long *Reax, 
00125     unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
00126 {
00127         unsigned long  Leax, Lebx, Lecx, Ledx;
00128         __asm {
00129         pushad
00130         mov     eax,op
00131         wcpuid
00132         mov     Leax,eax
00133         mov     Lebx,ebx
00134         mov     Lecx,ecx
00135         mov     Ledx,edx
00136         popad
00137         }
00138         *Reax = Leax;
00139         *Rebx = Lebx;
00140         *Recx = Lecx;
00141         *Redx = Ledx;
00142 }
00143 
00144 static unsigned long changeFlag(unsigned long flag)
00145 {
00146        unsigned long changedFlags, originalFlags;
00147        __asm {
00148               push eax
00149               push ebx
00150               pushfd                        /* get the flags */
00151                pop  eax
00152               push eax             /* save the flags on the stack */
00153                mov  originalFlags,eax  /* save the original flags */
00154               mov  ebx,flag
00155                xor  eax,ebx            /* flip the bit */
00156               push eax                /* set the flags */
00157                popfd
00158               pushfd                  /* get the flags again (for return) */
00159               pop  eax      
00160               popfd                   /* restore the original flags */
00161               mov changedFlags,eax
00162               pop ebx
00163               pop eax
00164        }
00165        return changedFlags ^ originalFlags;
00166 }
00167 #endif
00168 
00169 #if !defined(AMD_64)
00170 #define AC_FLAG 0x40000
00171 #define ID_FLAG 0x200000
00172 
00173 /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
00174 static int is386()
00175 {
00176     return changeFlag(AC_FLAG) == 0;
00177 }
00178 
00179 /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
00180 static int is486()
00181 {
00182     return changeFlag(ID_FLAG) == 0;
00183 }
00184 #endif
00185 
00186 
00187 /*
00188  * table for Intel Cache.
00189  * See Intel Application Note AP-485 for more information 
00190  */
00191 
00192 typedef unsigned char CacheTypeEntry;
00193 
00194 typedef enum {
00195     Cache_NONE    = 0,
00196     Cache_UNKNOWN = 1,
00197     Cache_TLB     = 2,
00198     Cache_TLBi    = 3,
00199     Cache_TLBd    = 4,
00200     Cache_Trace   = 5,
00201     Cache_L1      = 6,
00202     Cache_L1i     = 7,
00203     Cache_L1d     = 8,
00204     Cache_L2      = 9 ,
00205     Cache_L2i     = 10 ,
00206     Cache_L2d     = 11 ,
00207     Cache_L3      = 12 ,
00208     Cache_L3i     = 13,
00209     Cache_L3d     = 14
00210 } CacheType;
00211 
00212 struct _cache {
00213     CacheTypeEntry type;
00214     unsigned char lineSize;
00215 };
00216 static const struct _cache CacheMap[256] = {
00217 /* 00 */ {Cache_NONE,    0   },
00218 /* 01 */ {Cache_TLBi,    0   },
00219 /* 02 */ {Cache_TLBi,    0   },
00220 /* 03 */ {Cache_TLBd,    0   },
00221 /* 04 */ {Cache_TLBd,        },
00222 /* 05 */ {Cache_UNKNOWN, 0   },
00223 /* 06 */ {Cache_L1i,     32  },
00224 /* 07 */ {Cache_UNKNOWN, 0   },
00225 /* 08 */ {Cache_L1i,     32  },
00226 /* 09 */ {Cache_UNKNOWN, 0   },
00227 /* 0a */ {Cache_L1d,     32  },
00228 /* 0b */ {Cache_UNKNOWN, 0   },
00229 /* 0c */ {Cache_L1d,     32  },
00230 /* 0d */ {Cache_UNKNOWN, 0   },
00231 /* 0e */ {Cache_UNKNOWN, 0   },
00232 /* 0f */ {Cache_UNKNOWN, 0   },
00233 /* 10 */ {Cache_UNKNOWN, 0   },
00234 /* 11 */ {Cache_UNKNOWN, 0   },
00235 /* 12 */ {Cache_UNKNOWN, 0   },
00236 /* 13 */ {Cache_UNKNOWN, 0   },
00237 /* 14 */ {Cache_UNKNOWN, 0   },
00238 /* 15 */ {Cache_UNKNOWN, 0   },
00239 /* 16 */ {Cache_UNKNOWN, 0   },
00240 /* 17 */ {Cache_UNKNOWN, 0   },
00241 /* 18 */ {Cache_UNKNOWN, 0   },
00242 /* 19 */ {Cache_UNKNOWN, 0   },
00243 /* 1a */ {Cache_UNKNOWN, 0   },
00244 /* 1b */ {Cache_UNKNOWN, 0   },
00245 /* 1c */ {Cache_UNKNOWN, 0   },
00246 /* 1d */ {Cache_UNKNOWN, 0   },
00247 /* 1e */ {Cache_UNKNOWN, 0   },
00248 /* 1f */ {Cache_UNKNOWN, 0   },
00249 /* 20 */ {Cache_UNKNOWN, 0   },
00250 /* 21 */ {Cache_UNKNOWN, 0   },
00251 /* 22 */ {Cache_L3,      64  },
00252 /* 23 */ {Cache_L3,      64  },
00253 /* 24 */ {Cache_UNKNOWN, 0   },
00254 /* 25 */ {Cache_L3,      64  },
00255 /* 26 */ {Cache_UNKNOWN, 0   },
00256 /* 27 */ {Cache_UNKNOWN, 0   },
00257 /* 28 */ {Cache_UNKNOWN, 0   },
00258 /* 29 */ {Cache_L3,      64  },
00259 /* 2a */ {Cache_UNKNOWN, 0   },
00260 /* 2b */ {Cache_UNKNOWN, 0   },
00261 /* 2c */ {Cache_L1d,     64  },
00262 /* 2d */ {Cache_UNKNOWN, 0   },
00263 /* 2e */ {Cache_UNKNOWN, 0   },
00264 /* 2f */ {Cache_UNKNOWN, 0   },
00265 /* 30 */ {Cache_L1i,     64  },
00266 /* 31 */ {Cache_UNKNOWN, 0   },
00267 /* 32 */ {Cache_UNKNOWN, 0   },
00268 /* 33 */ {Cache_UNKNOWN, 0   },
00269 /* 34 */ {Cache_UNKNOWN, 0   },
00270 /* 35 */ {Cache_UNKNOWN, 0   },
00271 /* 36 */ {Cache_UNKNOWN, 0   },
00272 /* 37 */ {Cache_UNKNOWN, 0   },
00273 /* 38 */ {Cache_UNKNOWN, 0   },
00274 /* 39 */ {Cache_L2,      64  },
00275 /* 3a */ {Cache_UNKNOWN, 0   },
00276 /* 3b */ {Cache_L2,      64  },
00277 /* 3c */ {Cache_L2,      64  },
00278 /* 3d */ {Cache_UNKNOWN, 0   },
00279 /* 3e */ {Cache_UNKNOWN, 0   },
00280 /* 3f */ {Cache_UNKNOWN, 0   },
00281 /* 40 */ {Cache_L2,      0   },
00282 /* 41 */ {Cache_L2,      32  },
00283 /* 42 */ {Cache_L2,      32  },
00284 /* 43 */ {Cache_L2,      32  },
00285 /* 44 */ {Cache_L2,      32  },
00286 /* 45 */ {Cache_L2,      32  },
00287 /* 46 */ {Cache_UNKNOWN, 0   },
00288 /* 47 */ {Cache_UNKNOWN, 0   },
00289 /* 48 */ {Cache_UNKNOWN, 0   },
00290 /* 49 */ {Cache_UNKNOWN, 0   },
00291 /* 4a */ {Cache_UNKNOWN, 0   },
00292 /* 4b */ {Cache_UNKNOWN, 0   },
00293 /* 4c */ {Cache_UNKNOWN, 0   },
00294 /* 4d */ {Cache_UNKNOWN, 0   },
00295 /* 4e */ {Cache_UNKNOWN, 0   },
00296 /* 4f */ {Cache_UNKNOWN, 0   },
00297 /* 50 */ {Cache_TLBi,    0   },
00298 /* 51 */ {Cache_TLBi,    0   },
00299 /* 52 */ {Cache_TLBi,    0   },
00300 /* 53 */ {Cache_UNKNOWN, 0   },
00301 /* 54 */ {Cache_UNKNOWN, 0   },
00302 /* 55 */ {Cache_UNKNOWN, 0   },
00303 /* 56 */ {Cache_UNKNOWN, 0   },
00304 /* 57 */ {Cache_UNKNOWN, 0   },
00305 /* 58 */ {Cache_UNKNOWN, 0   },
00306 /* 59 */ {Cache_UNKNOWN, 0   },
00307 /* 5a */ {Cache_UNKNOWN, 0   },
00308 /* 5b */ {Cache_TLBd,    0   },
00309 /* 5c */ {Cache_TLBd,    0   },
00310 /* 5d */ {Cache_TLBd,    0   },
00311 /* 5e */ {Cache_UNKNOWN, 0   },
00312 /* 5f */ {Cache_UNKNOWN, 0   },
00313 /* 60 */ {Cache_UNKNOWN, 0   },
00314 /* 61 */ {Cache_UNKNOWN, 0   },
00315 /* 62 */ {Cache_UNKNOWN, 0   },
00316 /* 63 */ {Cache_UNKNOWN, 0   },
00317 /* 64 */ {Cache_UNKNOWN, 0   },
00318 /* 65 */ {Cache_UNKNOWN, 0   },
00319 /* 66 */ {Cache_L1d,     64  },
00320 /* 67 */ {Cache_L1d,     64  },
00321 /* 68 */ {Cache_L1d,     64  },
00322 /* 69 */ {Cache_UNKNOWN, 0   },
00323 /* 6a */ {Cache_UNKNOWN, 0   },
00324 /* 6b */ {Cache_UNKNOWN, 0   },
00325 /* 6c */ {Cache_UNKNOWN, 0   },
00326 /* 6d */ {Cache_UNKNOWN, 0   },
00327 /* 6e */ {Cache_UNKNOWN, 0   },
00328 /* 6f */ {Cache_UNKNOWN, 0   },
00329 /* 70 */ {Cache_Trace,   1   },
00330 /* 71 */ {Cache_Trace,   1   },
00331 /* 72 */ {Cache_Trace,   1   },
00332 /* 73 */ {Cache_UNKNOWN, 0   },
00333 /* 74 */ {Cache_UNKNOWN, 0   },
00334 /* 75 */ {Cache_UNKNOWN, 0   },
00335 /* 76 */ {Cache_UNKNOWN, 0   },
00336 /* 77 */ {Cache_UNKNOWN, 0   },
00337 /* 78 */ {Cache_UNKNOWN, 0   },
00338 /* 79 */ {Cache_L2,      64  },
00339 /* 7a */ {Cache_L2,      64  },
00340 /* 7b */ {Cache_L2,      64  },
00341 /* 7c */ {Cache_L2,      64  },
00342 /* 7d */ {Cache_UNKNOWN, 0   },
00343 /* 7e */ {Cache_UNKNOWN, 0   },
00344 /* 7f */ {Cache_UNKNOWN, 0   },
00345 /* 80 */ {Cache_UNKNOWN, 0   },
00346 /* 81 */ {Cache_UNKNOWN, 0   },
00347 /* 82 */ {Cache_L2,      32  },
00348 /* 83 */ {Cache_L2,      32  },
00349 /* 84 */ {Cache_L2,      32  },
00350 /* 85 */ {Cache_L2,      32  },
00351 /* 86 */ {Cache_L2,      64  },
00352 /* 87 */ {Cache_L2,      64  },
00353 /* 88 */ {Cache_UNKNOWN, 0   },
00354 /* 89 */ {Cache_UNKNOWN, 0   },
00355 /* 8a */ {Cache_UNKNOWN, 0   },
00356 /* 8b */ {Cache_UNKNOWN, 0   },
00357 /* 8c */ {Cache_UNKNOWN, 0   },
00358 /* 8d */ {Cache_UNKNOWN, 0   },
00359 /* 8e */ {Cache_UNKNOWN, 0   },
00360 /* 8f */ {Cache_UNKNOWN, 0   },
00361 /* 90 */ {Cache_UNKNOWN, 0   },
00362 /* 91 */ {Cache_UNKNOWN, 0   },
00363 /* 92 */ {Cache_UNKNOWN, 0   },
00364 /* 93 */ {Cache_UNKNOWN, 0   },
00365 /* 94 */ {Cache_UNKNOWN, 0   },
00366 /* 95 */ {Cache_UNKNOWN, 0   },
00367 /* 96 */ {Cache_UNKNOWN, 0   },
00368 /* 97 */ {Cache_UNKNOWN, 0   },
00369 /* 98 */ {Cache_UNKNOWN, 0   },
00370 /* 99 */ {Cache_UNKNOWN, 0   },
00371 /* 9a */ {Cache_UNKNOWN, 0   },
00372 /* 9b */ {Cache_UNKNOWN, 0   },
00373 /* 9c */ {Cache_UNKNOWN, 0   },
00374 /* 9d */ {Cache_UNKNOWN, 0   },
00375 /* 9e */ {Cache_UNKNOWN, 0   },
00376 /* 9f */ {Cache_UNKNOWN, 0   },
00377 /* a0 */ {Cache_UNKNOWN, 0   },
00378 /* a1 */ {Cache_UNKNOWN, 0   },
00379 /* a2 */ {Cache_UNKNOWN, 0   },
00380 /* a3 */ {Cache_UNKNOWN, 0   },
00381 /* a4 */ {Cache_UNKNOWN, 0   },
00382 /* a5 */ {Cache_UNKNOWN, 0   },
00383 /* a6 */ {Cache_UNKNOWN, 0   },
00384 /* a7 */ {Cache_UNKNOWN, 0   },
00385 /* a8 */ {Cache_UNKNOWN, 0   },
00386 /* a9 */ {Cache_UNKNOWN, 0   },
00387 /* aa */ {Cache_UNKNOWN, 0   },
00388 /* ab */ {Cache_UNKNOWN, 0   },
00389 /* ac */ {Cache_UNKNOWN, 0   },
00390 /* ad */ {Cache_UNKNOWN, 0   },
00391 /* ae */ {Cache_UNKNOWN, 0   },
00392 /* af */ {Cache_UNKNOWN, 0   },
00393 /* b0 */ {Cache_TLBi,    0   },
00394 /* b1 */ {Cache_UNKNOWN, 0   },
00395 /* b2 */ {Cache_UNKNOWN, 0   },
00396 /* b3 */ {Cache_TLBd,    0   },
00397 /* b4 */ {Cache_UNKNOWN, 0   },
00398 /* b5 */ {Cache_UNKNOWN, 0   },
00399 /* b6 */ {Cache_UNKNOWN, 0   },
00400 /* b7 */ {Cache_UNKNOWN, 0   },
00401 /* b8 */ {Cache_UNKNOWN, 0   },
00402 /* b9 */ {Cache_UNKNOWN, 0   },
00403 /* ba */ {Cache_UNKNOWN, 0   },
00404 /* bb */ {Cache_UNKNOWN, 0   },
00405 /* bc */ {Cache_UNKNOWN, 0   },
00406 /* bd */ {Cache_UNKNOWN, 0   },
00407 /* be */ {Cache_UNKNOWN, 0   },
00408 /* bf */ {Cache_UNKNOWN, 0   },
00409 /* c0 */ {Cache_UNKNOWN, 0   },
00410 /* c1 */ {Cache_UNKNOWN, 0   },
00411 /* c2 */ {Cache_UNKNOWN, 0   },
00412 /* c3 */ {Cache_UNKNOWN, 0   },
00413 /* c4 */ {Cache_UNKNOWN, 0   },
00414 /* c5 */ {Cache_UNKNOWN, 0   },
00415 /* c6 */ {Cache_UNKNOWN, 0   },
00416 /* c7 */ {Cache_UNKNOWN, 0   },
00417 /* c8 */ {Cache_UNKNOWN, 0   },
00418 /* c9 */ {Cache_UNKNOWN, 0   },
00419 /* ca */ {Cache_UNKNOWN, 0   },
00420 /* cb */ {Cache_UNKNOWN, 0   },
00421 /* cc */ {Cache_UNKNOWN, 0   },
00422 /* cd */ {Cache_UNKNOWN, 0   },
00423 /* ce */ {Cache_UNKNOWN, 0   },
00424 /* cf */ {Cache_UNKNOWN, 0   },
00425 /* d0 */ {Cache_UNKNOWN, 0   },
00426 /* d1 */ {Cache_UNKNOWN, 0   },
00427 /* d2 */ {Cache_UNKNOWN, 0   },
00428 /* d3 */ {Cache_UNKNOWN, 0   },
00429 /* d4 */ {Cache_UNKNOWN, 0   },
00430 /* d5 */ {Cache_UNKNOWN, 0   },
00431 /* d6 */ {Cache_UNKNOWN, 0   },
00432 /* d7 */ {Cache_UNKNOWN, 0   },
00433 /* d8 */ {Cache_UNKNOWN, 0   },
00434 /* d9 */ {Cache_UNKNOWN, 0   },
00435 /* da */ {Cache_UNKNOWN, 0   },
00436 /* db */ {Cache_UNKNOWN, 0   },
00437 /* dc */ {Cache_UNKNOWN, 0   },
00438 /* dd */ {Cache_UNKNOWN, 0   },
00439 /* de */ {Cache_UNKNOWN, 0   },
00440 /* df */ {Cache_UNKNOWN, 0   },
00441 /* e0 */ {Cache_UNKNOWN, 0   },
00442 /* e1 */ {Cache_UNKNOWN, 0   },
00443 /* e2 */ {Cache_UNKNOWN, 0   },
00444 /* e3 */ {Cache_UNKNOWN, 0   },
00445 /* e4 */ {Cache_UNKNOWN, 0   },
00446 /* e5 */ {Cache_UNKNOWN, 0   },
00447 /* e6 */ {Cache_UNKNOWN, 0   },
00448 /* e7 */ {Cache_UNKNOWN, 0   },
00449 /* e8 */ {Cache_UNKNOWN, 0   },
00450 /* e9 */ {Cache_UNKNOWN, 0   },
00451 /* ea */ {Cache_UNKNOWN, 0   },
00452 /* eb */ {Cache_UNKNOWN, 0   },
00453 /* ec */ {Cache_UNKNOWN, 0   },
00454 /* ed */ {Cache_UNKNOWN, 0   },
00455 /* ee */ {Cache_UNKNOWN, 0   },
00456 /* ef */ {Cache_UNKNOWN, 0   },
00457 /* f0 */ {Cache_UNKNOWN, 0   },
00458 /* f1 */ {Cache_UNKNOWN, 0   },
00459 /* f2 */ {Cache_UNKNOWN, 0   },
00460 /* f3 */ {Cache_UNKNOWN, 0   },
00461 /* f4 */ {Cache_UNKNOWN, 0   },
00462 /* f5 */ {Cache_UNKNOWN, 0   },
00463 /* f6 */ {Cache_UNKNOWN, 0   },
00464 /* f7 */ {Cache_UNKNOWN, 0   },
00465 /* f8 */ {Cache_UNKNOWN, 0   },
00466 /* f9 */ {Cache_UNKNOWN, 0   },
00467 /* fa */ {Cache_UNKNOWN, 0   },
00468 /* fb */ {Cache_UNKNOWN, 0   },
00469 /* fc */ {Cache_UNKNOWN, 0   },
00470 /* fd */ {Cache_UNKNOWN, 0   },
00471 /* fe */ {Cache_UNKNOWN, 0   },
00472 /* ff */ {Cache_UNKNOWN, 0   }
00473 };
00474 
00475 
00476 /*
00477  * use the above table to determine the CacheEntryLineSize.
00478  */
00479 static void
00480 getIntelCacheEntryLineSize(unsigned long val, int *level, 
00481                                           unsigned long *lineSize)
00482 {
00483     CacheType type;
00484 
00485     type = CacheMap[val].type;
00486     /* only interested in data caches */
00487     /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
00488      * this data check has the side effect of rejecting that entry. If
00489      * that wasn't the case, we could have to reject it explicitly */
00490     if (CacheMap[val].lineSize == 0) {
00491        return;
00492     }
00493     /* look at the caches, skip types we aren't interested in.
00494      * if we already have a value for a lower level cache, skip the
00495      * current entry */
00496     if ((type == Cache_L1)|| (type == Cache_L1d)) {
00497        *level = 1;
00498        *lineSize = CacheMap[val].lineSize;
00499     } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
00500        *level = 2;
00501        *lineSize = CacheMap[val].lineSize;
00502     } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
00503        *level = 3;
00504        *lineSize = CacheMap[val].lineSize;
00505     }
00506     return;
00507 }
00508 
00509 
00510 static void
00511 getIntelRegisterCacheLineSize(unsigned long val, 
00512                      int *level, unsigned long *lineSize)
00513 {
00514     getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
00515     getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
00516     getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
00517     getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
00518 }
00519 
00520 /*
00521  * returns '0' if no recognized cache is found, or if the cache
00522  * information is supported by this processor 
00523  */
00524 static unsigned long
00525 getIntelCacheLineSize(int cpuidLevel)
00526 {
00527     int level = 4;
00528     unsigned long lineSize = 0;
00529     unsigned long eax, ebx, ecx, edx;
00530     int repeat, count;
00531 
00532     if (cpuidLevel < 2) {
00533        return 0;
00534     }
00535 
00536     /* command '2' of the cpuid is intel's cache info call. Each byte of the
00537      * 4 registers contain a potential descriptor for the cache. The CacheMap       
00538      * table maps the cache entry with the processor cache. Register 'al'
00539      * contains a count value that cpuid '2' needs to be called in order to 
00540      * find all the cache descriptors. Only registers with the high bit set
00541      * to 'zero' have valid descriptors. This code loops through all the
00542      * required calls to cpuid '2' and passes any valid descriptors it finds
00543      * to the getIntelRegisterCacheLineSize code, which breaks the registers
00544      * down into their component descriptors. In the end the lineSize of the
00545      * lowest level cache data cache is returned. */
00546     cpuid(2, &eax, &ebx, &ecx, &edx);
00547     repeat = eax & 0xf;
00548     for (count = 0; count < repeat; count++) {
00549        if ((eax & 0x80000000) == 0) {
00550            getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
00551        }
00552        if ((ebx & 0x80000000) == 0) {
00553            getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
00554        }
00555        if ((ecx & 0x80000000) == 0) {
00556            getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
00557        }
00558        if ((edx & 0x80000000) == 0) {
00559            getIntelRegisterCacheLineSize(edx, &level, &lineSize);
00560        }
00561        if (count+1 != repeat) {
00562            cpuid(2, &eax, &ebx, &ecx, &edx);
00563        }
00564     }
00565     return lineSize;
00566 }
00567 
00568 /*
00569  * returns '0' if the cache info is not supported by this processor.
00570  * This is based on the AMD extended cache commands for cpuid. 
00571  * (see "AMD Processor Recognition Application Note" Publication 20734).
00572  * Some other processors use the identical scheme.
00573  * (see "Processor Recognition, Transmeta Corporation").
00574  */
00575 static unsigned long
00576 getOtherCacheLineSize(unsigned long cpuidLevel)
00577 {
00578     unsigned long lineSize = 0;
00579     unsigned long eax, ebx, ecx, edx;
00580 
00581     /* get the Extended CPUID level */
00582     cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
00583     cpuidLevel = eax;
00584 
00585     if (cpuidLevel >= 0x80000005) {
00586        cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
00587        lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
00588     }
00589     return lineSize;
00590 }
00591 
00592 static const char * const manMap[] = {
00593 #define INTEL     0
00594     "GenuineIntel",
00595 #define AMD       1
00596     "AuthenticAMD",
00597 #define CYRIX     2
00598     "CyrixInstead",
00599 #define CENTAUR   2
00600     "CentaurHauls",
00601 #define NEXGEN    3
00602     "NexGenDriven",
00603 #define TRANSMETA 4
00604     "GenuineTMx86",
00605 #define RISE      5
00606     "RiseRiseRise",
00607 #define UMC       6
00608     "UMC UMC UMC ",
00609 #define SIS       7
00610     "Sis Sis Sis ",
00611 #define NATIONAL  8
00612     "Geode by NSC",
00613 };
00614 
00615 static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]);
00616 
00617 #define MAN_UNKNOWN 9
00618 
00619 
00620 unsigned long
00621 s_mpi_getProcessorLineSize()
00622 {
00623     unsigned long eax, ebx, ecx, edx;
00624     unsigned long cpuidLevel;
00625     unsigned long cacheLineSize = 0;
00626     int manufacturer = MAN_UNKNOWN;
00627     int i;
00628     char string[65];
00629 
00630 #if !defined(AMD_64)
00631     if (is386()) {
00632        return 0; /* 386 had no cache */
00633     } if (is486()) {
00634        return 32; /* really? need more info */
00635     }
00636 #endif
00637 
00638     /* Pentium, cpuid command is available */
00639     cpuid(0, &eax, &ebx, &ecx, &edx);
00640     cpuidLevel = eax;
00641     *(int *)string = ebx;
00642     *(int *)&string[4] = edx;
00643     *(int *)&string[8] = ecx;
00644     string[12] = 0;
00645 
00646     manufacturer = MAN_UNKNOWN;
00647     for (i=0; i < n_manufacturers; i++) {
00648        if ( strcmp(manMap[i],string) == 0) {
00649            manufacturer = i;
00650        }
00651     }
00652 
00653     if (manufacturer == INTEL) {
00654        cacheLineSize = getIntelCacheLineSize(cpuidLevel);
00655     } else {
00656        cacheLineSize = getOtherCacheLineSize(cpuidLevel);
00657     }
00658     /* doesn't support cache info based on cpuid. This means
00659      * an old pentium class processor, which have cache lines of
00660      * 32. If we learn differently, we can use a switch based on
00661      * the Manufacturer id  */
00662     if (cacheLineSize == 0) {
00663        cacheLineSize = 32;
00664     }
00665     return cacheLineSize;
00666 }
00667 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
00668 #endif
00669 
00670 #if defined(__ppc64__) 
00671 /*
00672  *  Sigh, The PPC has some really nice features to help us determine cache
00673  *  size, since it had lots of direct control functions to do so. The POWER
00674  *  processor even has an instruction to do this, but it was dropped in
00675  *  PowerPC. Unfortunately most of them are not available in user mode.
00676  *
00677  *  The dcbz function would be a great way to determine cache line size except
00678  *  1) it only works on write-back memory (it throws an exception otherwise), 
00679  *  and 2) because so many mac programs 'knew' the processor cache size was
00680  *  32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
00681  *  G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
00682  *  these programs happy. dcbzl work if 64 bit instructions are supported.
00683  *  If you know 64 bit instructions are supported, and that stack is 
00684  *  write-back, you can use this code.
00685  */
00686 #include "memory.h"
00687 
00688 /* clear the cache line that contains 'array' */
00689 static inline void dcbzl(char *array)
00690 {
00691        register char *a asm("r2") = array;
00692        __asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) );
00693 }
00694 
00695 
00696 #define PPC_DO_ALIGN(x,y) ((char *)\
00697                      ((((long long) (x))+((y)-1))&~((y)-1)))
00698 
00699 #define PPC_MAX_LINE_SIZE 256
00700 unsigned long
00701 s_mpi_getProcessorLineSize()
00702 {
00703     char testArray[2*PPC_MAX_LINE_SIZE+1];
00704     char *test;
00705     int i;
00706 
00707     /* align the array on a maximum line size boundary, so we
00708      * know we are starting to clear from the first address */
00709     test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); 
00710     /* set all the values to 1's */
00711     memset(test, 0xff, PPC_MAX_LINE_SIZE);
00712     /* clear one cache block starting at 'test' */
00713     dcbzl(test);
00714 
00715     /* find the size of the cleared area, that's our block size */
00716     for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) {
00717        if (test[i-1] == 0) {
00718            return i;
00719        }
00720     }
00721     return 0;
00722 }
00723 
00724 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
00725 #endif
00726 
00727 
00728 /*
00729  * put other processor and platform specific cache code here
00730  * return the smallest cache line size in bytes on the processor 
00731  * (usually the L1 cache). If the OS has a call, this would be
00732  * a greate place to put it.
00733  *
00734  * If there is no cache, return 0;
00735  * 
00736  * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
00737  * below aren't compiled.
00738  *
00739  */
00740 
00741 
00742 /* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or 
00743  * OS */
00744 #if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
00745 
00746 unsigned long
00747 s_mpi_getProcessorLineSize()
00748 {
00749    return MPI_CACHE_LINE_SIZE;
00750 }
00751 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
00752 #endif
00753 
00754 
00755 /* If no way to get the processor cache line size has been defined, assume
00756  * it's 32 bytes (most common value, does not significantly impact performance)
00757  */ 
00758 #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
00759 unsigned long
00760 s_mpi_getProcessorLineSize()
00761 {
00762    return 32;
00763 }
00764 #endif
00765 
00766 #ifdef TEST_IT
00767 #include <stdio.h>
00768 
00769 main()
00770 {
00771     printf("line size = %d\n", s_mpi_getProcessorLineSize());
00772 } 
00773 #endif