Back to index

lightning-sunbird  0.9+nobinonly
mpcpucache.c
Go to the documentation of this file.
00001 /* ***** BEGIN LICENSE BLOCK *****
00002  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00003  *
00004  * The contents of this file are subject to the Mozilla Public License Version
00005  * 1.1 (the "License"); you may not use this file except in compliance with
00006  * the License. You may obtain a copy of the License at
00007  * http://www.mozilla.org/MPL/
00008  *
00009  * Software distributed under the License is distributed on an "AS IS" basis,
00010  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00011  * for the specific language governing rights and limitations under the
00012  * License.
00013  *
00014  * The Original Code is the Netscape security libraries.
00015  *
00016  * The Initial Developer of the Original Code is
00017  * Red Hat, Inc
00018  * Portions created by the Initial Developer are Copyright (C) 2005
00019  * the Initial Developer. All Rights Reserved.
00020  *
00021  * Contributor(s):
00022  *   Robert Relyea <rrelyea@redhat.com>
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either the GNU General Public License Version 2 or later (the "GPL"), or
00026  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 #include "mpi.h"
00039 
00040 /*
00041  * This file implements a single function: s_mpi_getProcessorLineSize();
00042  * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
00043  * if a cache exists, or zero if there is no cache. If more than one
00044  * cache line exists, it should return the smallest line size (which is 
00045  * usually the L1 cache).
00046  *
00047  * mp_modexp uses this information to make sure that private key information
00048  * isn't being leaked through the cache.
00049  *
00050  * Currently the file returns good data for most modern x86 processors, and
00051  * reasonable data on 64-bit ppc processors. All other processors are assumed
00052  * to have a cache line size of 32 bytes unless modified by target.mk.
00053  * 
00054  */
00055 
00056 #if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
00057 /* X86 processors have special instructions that tell us about the cache */
00058 #include "string.h"
00059 
00060 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
00061 #define AMD_64 1
00062 #endif
00063 
00064 /* Generic CPUID function */
00065 #if defined(AMD_64)
00066 
00067 #if defined(__GNUC__)
00068 
00069 static void cpuid(unsigned long op, unsigned long *eax, 
00070                         unsigned long *ebx, unsigned long *ecx, 
00071                          unsigned long *edx)
00072 {
00073        __asm__("cpuid\n\t"
00074               : "=a" (*eax),
00075                 "=b" (*ebx),
00076                 "=c" (*ecx),
00077                 "=d" (*edx)
00078               : "0" (op));
00079 }
00080 
00081 #elif defined(_MSC_VER)
00082 
00083 #include <intrin.h>
00084 
00085 static void cpuid(unsigned long op, unsigned long *eax, 
00086            unsigned long *ebx, unsigned long *ecx, 
00087            unsigned long *edx)
00088 {
00089     int intrinsic_out[4];
00090 
00091     __cpuid(intrinsic_out, op);
00092     *eax = intrinsic_out[0];
00093     *ebx = intrinsic_out[1];
00094     *ecx = intrinsic_out[2];
00095     *edx = intrinsic_out[3];
00096 }
00097 
00098 #endif
00099 
00100 #else /* !defined(AMD_64) */
00101 
00102 /* x86 */
00103 
00104 #if defined(__GNUC__)
00105 static void cpuid(unsigned long op, unsigned long *eax, 
00106                         unsigned long *ebx, unsigned long *ecx, 
00107                          unsigned long *edx)
00108 {
00109 /* sigh GCC isn't smart enough to save the ebx PIC register on it's own
00110  * in this case, so do it by hand. */
00111        __asm__("pushl %%ebx\n\t"
00112                 "cpuid\n\t"
00113                 "mov %%ebx,%1\n\t"
00114                 "popl %%ebx\n\t"
00115               : "=a" (*eax),
00116                 "=r" (*ebx),
00117                 "=c" (*ecx),
00118                 "=d" (*edx)
00119               : "0" (op));
00120 }
00121 
00122 /*
00123  * try flipping a processor flag to determine CPU type
00124  */
00125 static unsigned long changeFlag(unsigned long flag)
00126 {
00127        unsigned long changedFlags, originalFlags;
00128        __asm__("pushfl\n\t"            /* get the flags */
00129                "popl %0\n\t"
00130                "movl %0,%1\n\t"    /* save the original flags */
00131                "xorl %2,%0\n\t"    /* flip the bit */
00132               "pushl %0\n\t"       /* set the flags */
00133                "popfl\n\t"
00134               "pushfl\n\t"         /* get the flags again (for return) */
00135               "popl %0\n\t"
00136               "pushl %1\n\t"              /* restore the original flags */
00137                "popfl\n\t"
00138               : "=r" (changedFlags),
00139                 "=r" (originalFlags),
00140                 "=r" (flag)
00141               : "2" (flag));
00142        return changedFlags ^ originalFlags;
00143 }
00144 
00145 #elif defined(_MSC_VER)
00146 
00147 /*
00148  * windows versions of the above assembler
00149  */
00150 #define wcpuid __asm __emit 0fh __asm __emit 0a2h
00151 static void cpuid(unsigned long op,    unsigned long *Reax, 
00152     unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
00153 {
00154         unsigned long  Leax, Lebx, Lecx, Ledx;
00155         __asm {
00156         pushad
00157         mov     eax,op
00158         wcpuid
00159         mov     Leax,eax
00160         mov     Lebx,ebx
00161         mov     Lecx,ecx
00162         mov     Ledx,edx
00163         popad
00164         }
00165         *Reax = Leax;
00166         *Rebx = Lebx;
00167         *Recx = Lecx;
00168         *Redx = Ledx;
00169 }
00170 
00171 static unsigned long changeFlag(unsigned long flag)
00172 {
00173        unsigned long changedFlags, originalFlags;
00174        __asm {
00175               push eax
00176               push ebx
00177               pushfd                        /* get the flags */
00178                pop  eax
00179               push eax             /* save the flags on the stack */
00180                mov  originalFlags,eax  /* save the original flags */
00181               mov  ebx,flag
00182                xor  eax,ebx            /* flip the bit */
00183               push eax                /* set the flags */
00184                popfd
00185               pushfd                  /* get the flags again (for return) */
00186               pop  eax      
00187               popfd                   /* restore the original flags */
00188               mov changedFlags,eax
00189               pop ebx
00190               pop eax
00191        }
00192        return changedFlags ^ originalFlags;
00193 }
00194 #endif
00195 
00196 #endif
00197 
00198 #if !defined(AMD_64)
00199 #define AC_FLAG 0x40000
00200 #define ID_FLAG 0x200000
00201 
00202 /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
00203 static int is386()
00204 {
00205     return changeFlag(AC_FLAG) == 0;
00206 }
00207 
00208 /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
00209 static int is486()
00210 {
00211     return changeFlag(ID_FLAG) == 0;
00212 }
00213 #endif
00214 
00215 
00216 /*
00217  * table for Intel Cache.
00218  * See Intel Application Note AP-485 for more information 
00219  */
00220 
00221 typedef unsigned char CacheTypeEntry;
00222 
00223 typedef enum {
00224     Cache_NONE    = 0,
00225     Cache_UNKNOWN = 1,
00226     Cache_TLB     = 2,
00227     Cache_TLBi    = 3,
00228     Cache_TLBd    = 4,
00229     Cache_Trace   = 5,
00230     Cache_L1      = 6,
00231     Cache_L1i     = 7,
00232     Cache_L1d     = 8,
00233     Cache_L2      = 9 ,
00234     Cache_L2i     = 10 ,
00235     Cache_L2d     = 11 ,
00236     Cache_L3      = 12 ,
00237     Cache_L3i     = 13,
00238     Cache_L3d     = 14
00239 } CacheType;
00240 
00241 struct _cache {
00242     CacheTypeEntry type;
00243     unsigned char lineSize;
00244 };
00245 static const struct _cache CacheMap[256] = {
00246 /* 00 */ {Cache_NONE,    0   },
00247 /* 01 */ {Cache_TLBi,    0   },
00248 /* 02 */ {Cache_TLBi,    0   },
00249 /* 03 */ {Cache_TLBd,    0   },
00250 /* 04 */ {Cache_TLBd,        },
00251 /* 05 */ {Cache_UNKNOWN, 0   },
00252 /* 06 */ {Cache_L1i,     32  },
00253 /* 07 */ {Cache_UNKNOWN, 0   },
00254 /* 08 */ {Cache_L1i,     32  },
00255 /* 09 */ {Cache_UNKNOWN, 0   },
00256 /* 0a */ {Cache_L1d,     32  },
00257 /* 0b */ {Cache_UNKNOWN, 0   },
00258 /* 0c */ {Cache_L1d,     32  },
00259 /* 0d */ {Cache_UNKNOWN, 0   },
00260 /* 0e */ {Cache_UNKNOWN, 0   },
00261 /* 0f */ {Cache_UNKNOWN, 0   },
00262 /* 10 */ {Cache_UNKNOWN, 0   },
00263 /* 11 */ {Cache_UNKNOWN, 0   },
00264 /* 12 */ {Cache_UNKNOWN, 0   },
00265 /* 13 */ {Cache_UNKNOWN, 0   },
00266 /* 14 */ {Cache_UNKNOWN, 0   },
00267 /* 15 */ {Cache_UNKNOWN, 0   },
00268 /* 16 */ {Cache_UNKNOWN, 0   },
00269 /* 17 */ {Cache_UNKNOWN, 0   },
00270 /* 18 */ {Cache_UNKNOWN, 0   },
00271 /* 19 */ {Cache_UNKNOWN, 0   },
00272 /* 1a */ {Cache_UNKNOWN, 0   },
00273 /* 1b */ {Cache_UNKNOWN, 0   },
00274 /* 1c */ {Cache_UNKNOWN, 0   },
00275 /* 1d */ {Cache_UNKNOWN, 0   },
00276 /* 1e */ {Cache_UNKNOWN, 0   },
00277 /* 1f */ {Cache_UNKNOWN, 0   },
00278 /* 20 */ {Cache_UNKNOWN, 0   },
00279 /* 21 */ {Cache_UNKNOWN, 0   },
00280 /* 22 */ {Cache_L3,      64  },
00281 /* 23 */ {Cache_L3,      64  },
00282 /* 24 */ {Cache_UNKNOWN, 0   },
00283 /* 25 */ {Cache_L3,      64  },
00284 /* 26 */ {Cache_UNKNOWN, 0   },
00285 /* 27 */ {Cache_UNKNOWN, 0   },
00286 /* 28 */ {Cache_UNKNOWN, 0   },
00287 /* 29 */ {Cache_L3,      64  },
00288 /* 2a */ {Cache_UNKNOWN, 0   },
00289 /* 2b */ {Cache_UNKNOWN, 0   },
00290 /* 2c */ {Cache_L1d,     64  },
00291 /* 2d */ {Cache_UNKNOWN, 0   },
00292 /* 2e */ {Cache_UNKNOWN, 0   },
00293 /* 2f */ {Cache_UNKNOWN, 0   },
00294 /* 30 */ {Cache_L1i,     64  },
00295 /* 31 */ {Cache_UNKNOWN, 0   },
00296 /* 32 */ {Cache_UNKNOWN, 0   },
00297 /* 33 */ {Cache_UNKNOWN, 0   },
00298 /* 34 */ {Cache_UNKNOWN, 0   },
00299 /* 35 */ {Cache_UNKNOWN, 0   },
00300 /* 36 */ {Cache_UNKNOWN, 0   },
00301 /* 37 */ {Cache_UNKNOWN, 0   },
00302 /* 38 */ {Cache_UNKNOWN, 0   },
00303 /* 39 */ {Cache_L2,      64  },
00304 /* 3a */ {Cache_UNKNOWN, 0   },
00305 /* 3b */ {Cache_L2,      64  },
00306 /* 3c */ {Cache_L2,      64  },
00307 /* 3d */ {Cache_UNKNOWN, 0   },
00308 /* 3e */ {Cache_UNKNOWN, 0   },
00309 /* 3f */ {Cache_UNKNOWN, 0   },
00310 /* 40 */ {Cache_L2,      0   },
00311 /* 41 */ {Cache_L2,      32  },
00312 /* 42 */ {Cache_L2,      32  },
00313 /* 43 */ {Cache_L2,      32  },
00314 /* 44 */ {Cache_L2,      32  },
00315 /* 45 */ {Cache_L2,      32  },
00316 /* 46 */ {Cache_UNKNOWN, 0   },
00317 /* 47 */ {Cache_UNKNOWN, 0   },
00318 /* 48 */ {Cache_UNKNOWN, 0   },
00319 /* 49 */ {Cache_UNKNOWN, 0   },
00320 /* 4a */ {Cache_UNKNOWN, 0   },
00321 /* 4b */ {Cache_UNKNOWN, 0   },
00322 /* 4c */ {Cache_UNKNOWN, 0   },
00323 /* 4d */ {Cache_UNKNOWN, 0   },
00324 /* 4e */ {Cache_UNKNOWN, 0   },
00325 /* 4f */ {Cache_UNKNOWN, 0   },
00326 /* 50 */ {Cache_TLBi,    0   },
00327 /* 51 */ {Cache_TLBi,    0   },
00328 /* 52 */ {Cache_TLBi,    0   },
00329 /* 53 */ {Cache_UNKNOWN, 0   },
00330 /* 54 */ {Cache_UNKNOWN, 0   },
00331 /* 55 */ {Cache_UNKNOWN, 0   },
00332 /* 56 */ {Cache_UNKNOWN, 0   },
00333 /* 57 */ {Cache_UNKNOWN, 0   },
00334 /* 58 */ {Cache_UNKNOWN, 0   },
00335 /* 59 */ {Cache_UNKNOWN, 0   },
00336 /* 5a */ {Cache_UNKNOWN, 0   },
00337 /* 5b */ {Cache_TLBd,    0   },
00338 /* 5c */ {Cache_TLBd,    0   },
00339 /* 5d */ {Cache_TLBd,    0   },
00340 /* 5e */ {Cache_UNKNOWN, 0   },
00341 /* 5f */ {Cache_UNKNOWN, 0   },
00342 /* 60 */ {Cache_UNKNOWN, 0   },
00343 /* 61 */ {Cache_UNKNOWN, 0   },
00344 /* 62 */ {Cache_UNKNOWN, 0   },
00345 /* 63 */ {Cache_UNKNOWN, 0   },
00346 /* 64 */ {Cache_UNKNOWN, 0   },
00347 /* 65 */ {Cache_UNKNOWN, 0   },
00348 /* 66 */ {Cache_L1d,     64  },
00349 /* 67 */ {Cache_L1d,     64  },
00350 /* 68 */ {Cache_L1d,     64  },
00351 /* 69 */ {Cache_UNKNOWN, 0   },
00352 /* 6a */ {Cache_UNKNOWN, 0   },
00353 /* 6b */ {Cache_UNKNOWN, 0   },
00354 /* 6c */ {Cache_UNKNOWN, 0   },
00355 /* 6d */ {Cache_UNKNOWN, 0   },
00356 /* 6e */ {Cache_UNKNOWN, 0   },
00357 /* 6f */ {Cache_UNKNOWN, 0   },
00358 /* 70 */ {Cache_Trace,   1   },
00359 /* 71 */ {Cache_Trace,   1   },
00360 /* 72 */ {Cache_Trace,   1   },
00361 /* 73 */ {Cache_UNKNOWN, 0   },
00362 /* 74 */ {Cache_UNKNOWN, 0   },
00363 /* 75 */ {Cache_UNKNOWN, 0   },
00364 /* 76 */ {Cache_UNKNOWN, 0   },
00365 /* 77 */ {Cache_UNKNOWN, 0   },
00366 /* 78 */ {Cache_UNKNOWN, 0   },
00367 /* 79 */ {Cache_L2,      64  },
00368 /* 7a */ {Cache_L2,      64  },
00369 /* 7b */ {Cache_L2,      64  },
00370 /* 7c */ {Cache_L2,      64  },
00371 /* 7d */ {Cache_UNKNOWN, 0   },
00372 /* 7e */ {Cache_UNKNOWN, 0   },
00373 /* 7f */ {Cache_UNKNOWN, 0   },
00374 /* 80 */ {Cache_UNKNOWN, 0   },
00375 /* 81 */ {Cache_UNKNOWN, 0   },
00376 /* 82 */ {Cache_L2,      32  },
00377 /* 83 */ {Cache_L2,      32  },
00378 /* 84 */ {Cache_L2,      32  },
00379 /* 85 */ {Cache_L2,      32  },
00380 /* 86 */ {Cache_L2,      64  },
00381 /* 87 */ {Cache_L2,      64  },
00382 /* 88 */ {Cache_UNKNOWN, 0   },
00383 /* 89 */ {Cache_UNKNOWN, 0   },
00384 /* 8a */ {Cache_UNKNOWN, 0   },
00385 /* 8b */ {Cache_UNKNOWN, 0   },
00386 /* 8c */ {Cache_UNKNOWN, 0   },
00387 /* 8d */ {Cache_UNKNOWN, 0   },
00388 /* 8e */ {Cache_UNKNOWN, 0   },
00389 /* 8f */ {Cache_UNKNOWN, 0   },
00390 /* 90 */ {Cache_UNKNOWN, 0   },
00391 /* 91 */ {Cache_UNKNOWN, 0   },
00392 /* 92 */ {Cache_UNKNOWN, 0   },
00393 /* 93 */ {Cache_UNKNOWN, 0   },
00394 /* 94 */ {Cache_UNKNOWN, 0   },
00395 /* 95 */ {Cache_UNKNOWN, 0   },
00396 /* 96 */ {Cache_UNKNOWN, 0   },
00397 /* 97 */ {Cache_UNKNOWN, 0   },
00398 /* 98 */ {Cache_UNKNOWN, 0   },
00399 /* 99 */ {Cache_UNKNOWN, 0   },
00400 /* 9a */ {Cache_UNKNOWN, 0   },
00401 /* 9b */ {Cache_UNKNOWN, 0   },
00402 /* 9c */ {Cache_UNKNOWN, 0   },
00403 /* 9d */ {Cache_UNKNOWN, 0   },
00404 /* 9e */ {Cache_UNKNOWN, 0   },
00405 /* 9f */ {Cache_UNKNOWN, 0   },
00406 /* a0 */ {Cache_UNKNOWN, 0   },
00407 /* a1 */ {Cache_UNKNOWN, 0   },
00408 /* a2 */ {Cache_UNKNOWN, 0   },
00409 /* a3 */ {Cache_UNKNOWN, 0   },
00410 /* a4 */ {Cache_UNKNOWN, 0   },
00411 /* a5 */ {Cache_UNKNOWN, 0   },
00412 /* a6 */ {Cache_UNKNOWN, 0   },
00413 /* a7 */ {Cache_UNKNOWN, 0   },
00414 /* a8 */ {Cache_UNKNOWN, 0   },
00415 /* a9 */ {Cache_UNKNOWN, 0   },
00416 /* aa */ {Cache_UNKNOWN, 0   },
00417 /* ab */ {Cache_UNKNOWN, 0   },
00418 /* ac */ {Cache_UNKNOWN, 0   },
00419 /* ad */ {Cache_UNKNOWN, 0   },
00420 /* ae */ {Cache_UNKNOWN, 0   },
00421 /* af */ {Cache_UNKNOWN, 0   },
00422 /* b0 */ {Cache_TLBi,    0   },
00423 /* b1 */ {Cache_UNKNOWN, 0   },
00424 /* b2 */ {Cache_UNKNOWN, 0   },
00425 /* b3 */ {Cache_TLBd,    0   },
00426 /* b4 */ {Cache_UNKNOWN, 0   },
00427 /* b5 */ {Cache_UNKNOWN, 0   },
00428 /* b6 */ {Cache_UNKNOWN, 0   },
00429 /* b7 */ {Cache_UNKNOWN, 0   },
00430 /* b8 */ {Cache_UNKNOWN, 0   },
00431 /* b9 */ {Cache_UNKNOWN, 0   },
00432 /* ba */ {Cache_UNKNOWN, 0   },
00433 /* bb */ {Cache_UNKNOWN, 0   },
00434 /* bc */ {Cache_UNKNOWN, 0   },
00435 /* bd */ {Cache_UNKNOWN, 0   },
00436 /* be */ {Cache_UNKNOWN, 0   },
00437 /* bf */ {Cache_UNKNOWN, 0   },
00438 /* c0 */ {Cache_UNKNOWN, 0   },
00439 /* c1 */ {Cache_UNKNOWN, 0   },
00440 /* c2 */ {Cache_UNKNOWN, 0   },
00441 /* c3 */ {Cache_UNKNOWN, 0   },
00442 /* c4 */ {Cache_UNKNOWN, 0   },
00443 /* c5 */ {Cache_UNKNOWN, 0   },
00444 /* c6 */ {Cache_UNKNOWN, 0   },
00445 /* c7 */ {Cache_UNKNOWN, 0   },
00446 /* c8 */ {Cache_UNKNOWN, 0   },
00447 /* c9 */ {Cache_UNKNOWN, 0   },
00448 /* ca */ {Cache_UNKNOWN, 0   },
00449 /* cb */ {Cache_UNKNOWN, 0   },
00450 /* cc */ {Cache_UNKNOWN, 0   },
00451 /* cd */ {Cache_UNKNOWN, 0   },
00452 /* ce */ {Cache_UNKNOWN, 0   },
00453 /* cf */ {Cache_UNKNOWN, 0   },
00454 /* d0 */ {Cache_UNKNOWN, 0   },
00455 /* d1 */ {Cache_UNKNOWN, 0   },
00456 /* d2 */ {Cache_UNKNOWN, 0   },
00457 /* d3 */ {Cache_UNKNOWN, 0   },
00458 /* d4 */ {Cache_UNKNOWN, 0   },
00459 /* d5 */ {Cache_UNKNOWN, 0   },
00460 /* d6 */ {Cache_UNKNOWN, 0   },
00461 /* d7 */ {Cache_UNKNOWN, 0   },
00462 /* d8 */ {Cache_UNKNOWN, 0   },
00463 /* d9 */ {Cache_UNKNOWN, 0   },
00464 /* da */ {Cache_UNKNOWN, 0   },
00465 /* db */ {Cache_UNKNOWN, 0   },
00466 /* dc */ {Cache_UNKNOWN, 0   },
00467 /* dd */ {Cache_UNKNOWN, 0   },
00468 /* de */ {Cache_UNKNOWN, 0   },
00469 /* df */ {Cache_UNKNOWN, 0   },
00470 /* e0 */ {Cache_UNKNOWN, 0   },
00471 /* e1 */ {Cache_UNKNOWN, 0   },
00472 /* e2 */ {Cache_UNKNOWN, 0   },
00473 /* e3 */ {Cache_UNKNOWN, 0   },
00474 /* e4 */ {Cache_UNKNOWN, 0   },
00475 /* e5 */ {Cache_UNKNOWN, 0   },
00476 /* e6 */ {Cache_UNKNOWN, 0   },
00477 /* e7 */ {Cache_UNKNOWN, 0   },
00478 /* e8 */ {Cache_UNKNOWN, 0   },
00479 /* e9 */ {Cache_UNKNOWN, 0   },
00480 /* ea */ {Cache_UNKNOWN, 0   },
00481 /* eb */ {Cache_UNKNOWN, 0   },
00482 /* ec */ {Cache_UNKNOWN, 0   },
00483 /* ed */ {Cache_UNKNOWN, 0   },
00484 /* ee */ {Cache_UNKNOWN, 0   },
00485 /* ef */ {Cache_UNKNOWN, 0   },
00486 /* f0 */ {Cache_UNKNOWN, 0   },
00487 /* f1 */ {Cache_UNKNOWN, 0   },
00488 /* f2 */ {Cache_UNKNOWN, 0   },
00489 /* f3 */ {Cache_UNKNOWN, 0   },
00490 /* f4 */ {Cache_UNKNOWN, 0   },
00491 /* f5 */ {Cache_UNKNOWN, 0   },
00492 /* f6 */ {Cache_UNKNOWN, 0   },
00493 /* f7 */ {Cache_UNKNOWN, 0   },
00494 /* f8 */ {Cache_UNKNOWN, 0   },
00495 /* f9 */ {Cache_UNKNOWN, 0   },
00496 /* fa */ {Cache_UNKNOWN, 0   },
00497 /* fb */ {Cache_UNKNOWN, 0   },
00498 /* fc */ {Cache_UNKNOWN, 0   },
00499 /* fd */ {Cache_UNKNOWN, 0   },
00500 /* fe */ {Cache_UNKNOWN, 0   },
00501 /* ff */ {Cache_UNKNOWN, 0   }
00502 };
00503 
00504 
00505 /*
00506  * use the above table to determine the CacheEntryLineSize.
00507  */
00508 static void
00509 getIntelCacheEntryLineSize(unsigned long val, int *level, 
00510                                           unsigned long *lineSize)
00511 {
00512     CacheType type;
00513 
00514     type = CacheMap[val].type;
00515     /* only interested in data caches */
00516     /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
00517      * this data check has the side effect of rejecting that entry. If
00518      * that wasn't the case, we could have to reject it explicitly */
00519     if (CacheMap[val].lineSize == 0) {
00520        return;
00521     }
00522     /* look at the caches, skip types we aren't interested in.
00523      * if we already have a value for a lower level cache, skip the
00524      * current entry */
00525     if ((type == Cache_L1)|| (type == Cache_L1d)) {
00526        *level = 1;
00527        *lineSize = CacheMap[val].lineSize;
00528     } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
00529        *level = 2;
00530        *lineSize = CacheMap[val].lineSize;
00531     } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
00532        *level = 3;
00533        *lineSize = CacheMap[val].lineSize;
00534     }
00535     return;
00536 }
00537 
00538 
00539 static void
00540 getIntelRegisterCacheLineSize(unsigned long val, 
00541                      int *level, unsigned long *lineSize)
00542 {
00543     getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
00544     getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
00545     getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
00546     getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
00547 }
00548 
00549 /*
00550  * returns '0' if no recognized cache is found, or if the cache
00551  * information is supported by this processor 
00552  */
00553 static unsigned long
00554 getIntelCacheLineSize(int cpuidLevel)
00555 {
00556     int level = 4;
00557     unsigned long lineSize = 0;
00558     unsigned long eax, ebx, ecx, edx;
00559     int repeat, count;
00560 
00561     if (cpuidLevel < 2) {
00562        return 0;
00563     }
00564 
00565     /* command '2' of the cpuid is intel's cache info call. Each byte of the
00566      * 4 registers contain a potential descriptor for the cache. The CacheMap       
00567      * table maps the cache entry with the processor cache. Register 'al'
00568      * contains a count value that cpuid '2' needs to be called in order to 
00569      * find all the cache descriptors. Only registers with the high bit set
00570      * to 'zero' have valid descriptors. This code loops through all the
00571      * required calls to cpuid '2' and passes any valid descriptors it finds
00572      * to the getIntelRegisterCacheLineSize code, which breaks the registers
00573      * down into their component descriptors. In the end the lineSize of the
00574      * lowest level cache data cache is returned. */
00575     cpuid(2, &eax, &ebx, &ecx, &edx);
00576     repeat = eax & 0xf;
00577     for (count = 0; count < repeat; count++) {
00578        if ((eax & 0x80000000) == 0) {
00579            getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
00580        }
00581        if ((ebx & 0x80000000) == 0) {
00582            getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
00583        }
00584        if ((ecx & 0x80000000) == 0) {
00585            getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
00586        }
00587        if ((edx & 0x80000000) == 0) {
00588            getIntelRegisterCacheLineSize(edx, &level, &lineSize);
00589        }
00590        if (count+1 != repeat) {
00591            cpuid(2, &eax, &ebx, &ecx, &edx);
00592        }
00593     }
00594     return lineSize;
00595 }
00596 
00597 /*
00598  * returns '0' if the cache info is not supported by this processor.
00599  * This is based on the AMD extended cache commands for cpuid. 
00600  * (see "AMD Processor Recognition Application Note" Publication 20734).
00601  * Some other processors use the identical scheme.
00602  * (see "Processor Recognition, Transmeta Corporation").
00603  */
00604 static unsigned long
00605 getOtherCacheLineSize(unsigned long cpuidLevel)
00606 {
00607     unsigned long lineSize = 0;
00608     unsigned long eax, ebx, ecx, edx;
00609 
00610     /* get the Extended CPUID level */
00611     cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
00612     cpuidLevel = eax;
00613 
00614     if (cpuidLevel >= 0x80000005) {
00615        cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
00616        lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
00617     }
00618     return lineSize;
00619 }
00620 
00621 static const char * const manMap[] = {
00622 #define INTEL     0
00623     "GenuineIntel",
00624 #define AMD       1
00625     "AuthenticAMD",
00626 #define CYRIX     2
00627     "CyrixInstead",
00628 #define CENTAUR   2
00629     "CentaurHauls",
00630 #define NEXGEN    3
00631     "NexGenDriven",
00632 #define TRANSMETA 4
00633     "GenuineTMx86",
00634 #define RISE      5
00635     "RiseRiseRise",
00636 #define UMC       6
00637     "UMC UMC UMC ",
00638 #define SIS       7
00639     "Sis Sis Sis ",
00640 #define NATIONAL  8
00641     "Geode by NSC",
00642 };
00643 
00644 static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]);
00645 
00646 #define MAN_UNKNOWN 9
00647 
00648 
00649 unsigned long
00650 s_mpi_getProcessorLineSize()
00651 {
00652     unsigned long eax, ebx, ecx, edx;
00653     unsigned long cpuidLevel;
00654     unsigned long cacheLineSize = 0;
00655     int manufacturer = MAN_UNKNOWN;
00656     int i;
00657     char string[65];
00658 
00659 #if !defined(AMD_64)
00660     if (is386()) {
00661        return 0; /* 386 had no cache */
00662     } if (is486()) {
00663        return 32; /* really? need more info */
00664     }
00665 #endif
00666 
00667     /* Pentium, cpuid command is available */
00668     cpuid(0, &eax, &ebx, &ecx, &edx);
00669     cpuidLevel = eax;
00670     *(int *)string = ebx;
00671     *(int *)&string[4] = edx;
00672     *(int *)&string[8] = ecx;
00673     string[12] = 0;
00674 
00675     manufacturer = MAN_UNKNOWN;
00676     for (i=0; i < n_manufacturers; i++) {
00677        if ( strcmp(manMap[i],string) == 0) {
00678            manufacturer = i;
00679        }
00680     }
00681 
00682     if (manufacturer == INTEL) {
00683        cacheLineSize = getIntelCacheLineSize(cpuidLevel);
00684     } else {
00685        cacheLineSize = getOtherCacheLineSize(cpuidLevel);
00686     }
00687     /* doesn't support cache info based on cpuid. This means
00688      * an old pentium class processor, which have cache lines of
00689      * 32. If we learn differently, we can use a switch based on
00690      * the Manufacturer id  */
00691     if (cacheLineSize == 0) {
00692        cacheLineSize = 32;
00693     }
00694     return cacheLineSize;
00695 }
00696 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
00697 #endif
00698 
00699 #if defined(__ppc64__) 
00700 /*
00701  *  Sigh, The PPC has some really nice features to help us determine cache
00702  *  size, since it had lots of direct control functions to do so. The POWER
00703  *  processor even has an instruction to do this, but it was dropped in
00704  *  PowerPC. Unfortunately most of them are not available in user mode.
00705  *
00706  *  The dcbz function would be a great way to determine cache line size except
00707  *  1) it only works on write-back memory (it throws an exception otherwise), 
00708  *  and 2) because so many mac programs 'knew' the processor cache size was
00709  *  32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
00710  *  G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
00711  *  these programs happy. dcbzl work if 64 bit instructions are supported.
00712  *  If you know 64 bit instructions are supported, and that stack is 
00713  *  write-back, you can use this code.
00714  */
00715 #include "memory.h"
00716 
00717 /* clear the cache line that contains 'array' */
00718 static inline void dcbzl(char *array)
00719 {
00720        register char *a asm("r2") = array;
00721        __asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) );
00722 }
00723 
00724 
00725 #define PPC_DO_ALIGN(x,y) ((char *)\
00726                      ((((long long) (x))+((y)-1))&~((y)-1)))
00727 
00728 #define PPC_MAX_LINE_SIZE 256
00729 unsigned long
00730 s_mpi_getProcessorLineSize()
00731 {
00732     char testArray[2*PPC_MAX_LINE_SIZE+1];
00733     char *test;
00734     int i;
00735 
00736     /* align the array on a maximum line size boundary, so we
00737      * know we are starting to clear from the first address */
00738     test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); 
00739     /* set all the values to 1's */
00740     memset(test, 0xff, PPC_MAX_LINE_SIZE);
00741     /* clear one cache block starting at 'test' */
00742     dcbzl(test);
00743 
00744     /* find the size of the cleared area, that's our block size */
00745     for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) {
00746        if (test[i-1] == 0) {
00747            return i;
00748        }
00749     }
00750     return 0;
00751 }
00752 
00753 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
00754 #endif
00755 
00756 
00757 /*
00758  * put other processor and platform specific cache code here
00759  * return the smallest cache line size in bytes on the processor 
00760  * (usually the L1 cache). If the OS has a call, this would be
00761  * a greate place to put it.
00762  *
00763  * If there is no cache, return 0;
00764  * 
00765  * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
00766  * below aren't compiled.
00767  *
00768  */
00769 
00770 
00771 /* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or 
00772  * OS */
00773 #if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
00774 
00775 unsigned long
00776 s_mpi_getProcessorLineSize()
00777 {
00778    return MPI_CACHE_LINE_SIZE;
00779 }
00780 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
00781 #endif
00782 
00783 
00784 /* If no way to get the processor cache line size has been defined, assume
00785  * it's 32 bytes (most common value, does not significantly impact performance)
00786  */ 
00787 #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
00788 unsigned long
00789 s_mpi_getProcessorLineSize()
00790 {
00791    return 32;
00792 }
00793 #endif
00794 
00795 #ifdef TEST_IT
00796 #include <stdio.h>
00797 
00798 main()
00799 {
00800     printf("line size = %d\n", s_mpi_getProcessorLineSize());
00801 } 
00802 #endif