Back to index

glibc  2.9
cacheinfo.c
Go to the documentation of this file.
00001 /* x86_64 cache info.
00002    Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004 
00005    The GNU C Library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009 
00010    The GNU C Library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Lesser General Public License for more details.
00014 
00015    You should have received a copy of the GNU Lesser General Public
00016    License along with the GNU C Library; if not, write to the Free
00017    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00018    02111-1307 USA.
00019 */
00020 
00021 #include <assert.h>
00022 #include <stdbool.h>
00023 #include <stdlib.h>
00024 #include <unistd.h>
00025 
00026 static const struct intel_02_cache_info
00027 {
00028   unsigned int idx;
00029   int name;
00030   long int size;
00031   long int assoc;
00032   long int linesize;
00033 } intel_02_known [] =
00034   {
00035     { 0x06, _SC_LEVEL1_ICACHE_SIZE,    8192,  4, 32 },
00036     { 0x08, _SC_LEVEL1_ICACHE_SIZE,   16384,  4, 32 },
00037     { 0x0a, _SC_LEVEL1_DCACHE_SIZE,    8192,  2, 32 },
00038     { 0x0c, _SC_LEVEL1_DCACHE_SIZE,   16384,  4, 32 },
00039     { 0x22, _SC_LEVEL3_CACHE_SIZE,   524288,  4, 64 },
00040     { 0x23, _SC_LEVEL3_CACHE_SIZE,  1048576,  8, 64 },
00041     { 0x25, _SC_LEVEL3_CACHE_SIZE,  2097152,  8, 64 },
00042     { 0x29, _SC_LEVEL3_CACHE_SIZE,  4194304,  8, 64 },
00043     { 0x2c, _SC_LEVEL1_DCACHE_SIZE,   32768,  8, 64 },
00044     { 0x30, _SC_LEVEL1_ICACHE_SIZE,   32768,  8, 64 },
00045     { 0x39, _SC_LEVEL2_CACHE_SIZE,   131072,  4, 64 },
00046     { 0x3a, _SC_LEVEL2_CACHE_SIZE,   196608,  6, 64 },
00047     { 0x3b, _SC_LEVEL2_CACHE_SIZE,   131072,  2, 64 },
00048     { 0x3c, _SC_LEVEL2_CACHE_SIZE,   262144,  4, 64 },
00049     { 0x3d, _SC_LEVEL2_CACHE_SIZE,   393216,  6, 64 },
00050     { 0x3e, _SC_LEVEL2_CACHE_SIZE,   524288,  4, 64 },
00051     { 0x3f, _SC_LEVEL2_CACHE_SIZE,   262144,  2, 64 },
00052     { 0x41, _SC_LEVEL2_CACHE_SIZE,   131072,  4, 32 },
00053     { 0x42, _SC_LEVEL2_CACHE_SIZE,   262144,  4, 32 },
00054     { 0x43, _SC_LEVEL2_CACHE_SIZE,   524288,  4, 32 },
00055     { 0x44, _SC_LEVEL2_CACHE_SIZE,  1048576,  4, 32 },
00056     { 0x45, _SC_LEVEL2_CACHE_SIZE,  2097152,  4, 32 },
00057     { 0x46, _SC_LEVEL3_CACHE_SIZE,  4194304,  4, 64 },
00058     { 0x47, _SC_LEVEL3_CACHE_SIZE,  8388608,  8, 64 },
00059     { 0x48, _SC_LEVEL2_CACHE_SIZE,  3145728, 12, 64 },
00060     { 0x49, _SC_LEVEL2_CACHE_SIZE,  4194304, 16, 64 },
00061     { 0x4a, _SC_LEVEL3_CACHE_SIZE,  6291456, 12, 64 },
00062     { 0x4b, _SC_LEVEL3_CACHE_SIZE,  8388608, 16, 64 },
00063     { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 },
00064     { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 },
00065     { 0x4e, _SC_LEVEL2_CACHE_SIZE,  6291456, 24, 64 },
00066     { 0x60, _SC_LEVEL1_DCACHE_SIZE,   16384,  8, 64 },
00067     { 0x66, _SC_LEVEL1_DCACHE_SIZE,    8192,  4, 64 },
00068     { 0x67, _SC_LEVEL1_DCACHE_SIZE,   16384,  4, 64 },
00069     { 0x68, _SC_LEVEL1_DCACHE_SIZE,   32768,  4, 64 },
00070     { 0x78, _SC_LEVEL2_CACHE_SIZE,  1048576,  8, 64 },
00071     { 0x79, _SC_LEVEL2_CACHE_SIZE,   131072,  8, 64 },
00072     { 0x7a, _SC_LEVEL2_CACHE_SIZE,   262144,  8, 64 },
00073     { 0x7b, _SC_LEVEL2_CACHE_SIZE,   524288,  8, 64 },
00074     { 0x7c, _SC_LEVEL2_CACHE_SIZE,  1048576,  8, 64 },
00075     { 0x7d, _SC_LEVEL2_CACHE_SIZE,  2097152,  8, 64 },
00076     { 0x7f, _SC_LEVEL2_CACHE_SIZE,   524288,  2, 64 },
00077     { 0x82, _SC_LEVEL2_CACHE_SIZE,   262144,  8, 32 },
00078     { 0x83, _SC_LEVEL2_CACHE_SIZE,   524288,  8, 32 },
00079     { 0x84, _SC_LEVEL2_CACHE_SIZE,  1048576,  8, 32 },
00080     { 0x85, _SC_LEVEL2_CACHE_SIZE,  2097152,  8, 32 },
00081     { 0x86, _SC_LEVEL2_CACHE_SIZE,   524288,  4, 64 },
00082     { 0x87, _SC_LEVEL2_CACHE_SIZE,  1048576,  8, 64 },
00083   };
00084 
00085 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
00086 
00087 static int
00088 intel_02_known_compare (const void *p1, const void *p2)
00089 {
00090   const struct intel_02_cache_info *i1;
00091   const struct intel_02_cache_info *i2;
00092 
00093   i1 = (const struct intel_02_cache_info *) p1;
00094   i2 = (const struct intel_02_cache_info *) p2;
00095 
00096   if (i1->idx == i2->idx)
00097     return 0;
00098 
00099   return i1->idx < i2->idx ? -1 : 1;
00100 }
00101 
00102 
00103 static long int
00104 __attribute__ ((noinline))
00105 intel_check_word (int name, unsigned int value, bool *has_level_2,
00106                 bool *no_level_2_or_3)
00107 {
00108   if ((value & 0x80000000) != 0)
00109     /* The register value is reserved.  */
00110     return 0;
00111 
00112   /* Fold the name.  The _SC_ constants are always in the order SIZE,
00113      ASSOC, LINESIZE.  */
00114   int folded_name = (_SC_LEVEL1_ICACHE_SIZE
00115                    + ((name - _SC_LEVEL1_ICACHE_SIZE) / 3) * 3);
00116 
00117   while (value != 0)
00118     {
00119       unsigned int byte = value & 0xff;
00120 
00121       if (byte == 0x40)
00122        {
00123          *no_level_2_or_3 = true;
00124 
00125          if (folded_name == _SC_LEVEL3_CACHE_SIZE)
00126            /* No need to look further.  */
00127            break;
00128        }
00129       else
00130        {
00131          if (byte == 0x49 && folded_name == _SC_LEVEL3_CACHE_SIZE)
00132            {
00133              /* Intel reused this value.  For family 15, model 6 it
00134                specifies the 3rd level cache.  Otherwise the 2nd
00135                level cache.  */
00136              unsigned int eax;
00137              unsigned int ebx;
00138              unsigned int ecx;
00139              unsigned int edx;
00140              asm volatile ("cpuid"
00141                          : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
00142                          : "0" (1));
00143 
00144              unsigned int family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
00145              unsigned int model = ((((eax >>16) & 0xf) << 4)
00146                                 + ((eax >> 4) & 0xf));
00147              if (family == 15 && model == 6)
00148               {
00149                 /* The level 3 cache is encoded for this model like
00150                    the level 2 cache is for other models.  Pretend
00151                    the caller asked for the level 2 cache.  */
00152                 name = (_SC_LEVEL2_CACHE_SIZE
00153                        + (name - _SC_LEVEL3_CACHE_SIZE));
00154                 folded_name = _SC_LEVEL3_CACHE_SIZE;
00155               }
00156            }
00157 
00158          struct intel_02_cache_info *found;
00159          struct intel_02_cache_info search;
00160 
00161          search.idx = byte;
00162          found = bsearch (&search, intel_02_known, nintel_02_known,
00163                         sizeof (intel_02_known[0]), intel_02_known_compare);
00164          if (found != NULL)
00165            {
00166              if (found->name == folded_name)
00167               {
00168                 unsigned int offset = name - folded_name;
00169 
00170                 if (offset == 0)
00171                   /* Cache size.  */
00172                   return found->size;
00173                 if (offset == 1)
00174                   return found->assoc;
00175 
00176                 assert (offset == 2);
00177                 return found->linesize;
00178               }
00179 
00180              if (found->name == _SC_LEVEL2_CACHE_SIZE)
00181               *has_level_2 = true;
00182            }
00183        }
00184 
00185       /* Next byte for the next round.  */
00186       value >>= 8;
00187     }
00188 
00189   /* Nothing found.  */
00190   return 0;
00191 }
00192 
00193 
00194 static long int __attribute__ ((noinline))
00195 handle_intel (int name, unsigned int maxidx)
00196 {
00197   assert (maxidx >= 2);
00198 
00199   /* OK, we can use the CPUID instruction to get all info about the
00200      caches.  */
00201   unsigned int cnt = 0;
00202   unsigned int max = 1;
00203   long int result = 0;
00204   bool no_level_2_or_3 = false;
00205   bool has_level_2 = false;
00206 
00207   while (cnt++ < max)
00208     {
00209       unsigned int eax;
00210       unsigned int ebx;
00211       unsigned int ecx;
00212       unsigned int edx;
00213       asm volatile ("cpuid"
00214                   : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
00215                   : "0" (2));
00216 
00217       /* The low byte of EAX in the first round contain the number of
00218         rounds we have to make.  At least one, the one we are already
00219         doing.  */
00220       if (cnt == 1)
00221        {
00222          max = eax & 0xff;
00223          eax &= 0xffffff00;
00224        }
00225 
00226       /* Process the individual registers' value.  */
00227       result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
00228       if (result != 0)
00229        return result;
00230 
00231       result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
00232       if (result != 0)
00233        return result;
00234 
00235       result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
00236       if (result != 0)
00237        return result;
00238 
00239       result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
00240       if (result != 0)
00241        return result;
00242     }
00243 
00244   if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
00245       && no_level_2_or_3)
00246     return -1;
00247 
00248   return 0;
00249 }
00250 
00251 
00252 static long int __attribute__ ((noinline))
00253 handle_amd (int name)
00254 {
00255   unsigned int eax;
00256   unsigned int ebx;
00257   unsigned int ecx;
00258   unsigned int edx;
00259   asm volatile ("cpuid"
00260               : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
00261               : "0" (0x80000000));
00262 
00263   /* No level 4 cache (yet).  */
00264   if (name > _SC_LEVEL3_CACHE_LINESIZE)
00265     return 0;
00266 
00267   unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
00268   if (eax < fn)
00269     return 0;
00270 
00271   asm volatile ("cpuid"
00272               : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
00273               : "0" (fn));
00274 
00275   if (name < _SC_LEVEL1_DCACHE_SIZE)
00276     {
00277       name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
00278       ecx = edx;
00279     }
00280 
00281   switch (name)
00282     {
00283     case _SC_LEVEL1_DCACHE_SIZE:
00284       return (ecx >> 14) & 0x3fc00;
00285 
00286     case _SC_LEVEL1_DCACHE_ASSOC:
00287       ecx >>= 16;
00288       if ((ecx & 0xff) == 0xff)
00289        /* Fully associative.  */
00290        return (ecx << 2) & 0x3fc00;
00291       return ecx & 0xff;
00292 
00293     case _SC_LEVEL1_DCACHE_LINESIZE:
00294       return ecx & 0xff;
00295 
00296     case _SC_LEVEL2_CACHE_SIZE:
00297       return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
00298 
00299     case _SC_LEVEL2_CACHE_ASSOC:
00300       switch ((ecx >> 12) & 0xf)
00301         {
00302         case 0:
00303         case 1:
00304         case 2:
00305         case 4:
00306          return (ecx >> 12) & 0xf;
00307        case 6:
00308          return 8;
00309        case 8:
00310          return 16;
00311        case 10:
00312          return 32;
00313        case 11:
00314          return 48;
00315        case 12:
00316          return 64;
00317        case 13:
00318          return 96;
00319        case 14:
00320          return 128;
00321        case 15:
00322          return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
00323        default:
00324          return 0;
00325         }
00326       /* NOTREACHED */
00327 
00328     case _SC_LEVEL2_CACHE_LINESIZE:
00329       return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
00330 
00331     case _SC_LEVEL3_CACHE_SIZE:
00332       return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
00333 
00334     case _SC_LEVEL3_CACHE_ASSOC:
00335       switch ((edx >> 12) & 0xf)
00336        {
00337        case 0:
00338        case 1:
00339        case 2:
00340        case 4:
00341          return (edx >> 12) & 0xf;
00342        case 6:
00343          return 8;
00344        case 8:
00345          return 16;
00346        case 10:
00347          return 32;
00348        case 11:
00349          return 48;
00350        case 12:
00351          return 64;
00352        case 13:
00353          return 96;
00354        case 14:
00355          return 128;
00356        case 15:
00357          return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
00358        default:
00359          return 0;
00360        }
00361       /* NOTREACHED */
00362 
00363     case _SC_LEVEL3_CACHE_LINESIZE:
00364       return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
00365 
00366     default:
00367       assert (! "cannot happen");
00368     }
00369   return -1;
00370 }
00371 
00372 
00373 /* Get the value of the system variable NAME.  */
00374 long int
00375 attribute_hidden
00376 __cache_sysconf (int name)
00377 {
00378   /* Find out what brand of processor.  */
00379   unsigned int eax;
00380   unsigned int ebx;
00381   unsigned int ecx;
00382   unsigned int edx;
00383   asm volatile ("cpuid"
00384               : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
00385               : "0" (0));
00386 
00387   /* This spells out "GenuineIntel".  */
00388   if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
00389     return handle_intel (name, eax);
00390 
00391   /* This spells out "AuthenticAMD".  */
00392   if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
00393     return handle_amd (name);
00394 
00395   // XXX Fill in more vendors.
00396 
00397   /* CPU not known, we have no information.  */
00398   return 0;
00399 }
00400 
00401 
00402 /* Half the data cache size for use in memory and string routines, typically
00403    L1 size.  */
00404 long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
00405 /* Shared cache size for use in memory and string routines, typically
00406    L2 or L3 size.  */
00407 long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
00408 long int __x86_64_shared_cache_size attribute_hidden = 1024 * 1024;
00409 /* PREFETCHW support flag for use in memory and string routines.  */
00410 int __x86_64_prefetchw attribute_hidden;
00411 
00412 /* Instructions preferred for memory and string routines.
00413 
00414   0: Regular instructions
00415   1: MMX instructions
00416   2: SSE2 instructions
00417   3: SSSE3 instructions
00418 
00419   */
00420 int __x86_64_preferred_memory_instruction attribute_hidden;
00421 
00422 
00423 static void
00424 __attribute__((constructor))
00425 init_cacheinfo (void)
00426 {
00427   /* Find out what brand of processor.  */
00428   unsigned int eax;
00429   unsigned int ebx;
00430   unsigned int ecx;
00431   unsigned int edx;
00432   int max_cpuid;
00433   int max_cpuid_ex;
00434   long int data = -1;
00435   long int shared = -1;
00436   unsigned int level;
00437   unsigned int threads = 0;
00438 
00439   asm volatile ("cpuid"
00440               : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx)
00441               : "0" (0));
00442 
00443   /* This spells out "GenuineIntel".  */
00444   if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
00445     {
00446       data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
00447 
00448       /* Try L3 first.  */
00449       level  = 3;
00450       shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
00451 
00452       if (shared <= 0)
00453         {
00454          /* Try L2 otherwise.  */
00455           level  = 2;
00456           shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
00457        }
00458 
00459       asm volatile ("cpuid"
00460                   : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
00461                   : "0" (1));
00462 
00463       /* Intel prefers SSSE3 instructions for memory/string routines
00464         if they are avaiable.  */
00465       if ((ecx & 0x200))
00466        __x86_64_preferred_memory_instruction = 3;
00467       else
00468        __x86_64_preferred_memory_instruction = 2;
00469 
00470       /* Figure out the number of logical threads that share the
00471         highest cache level.  */
00472       if (max_cpuid >= 4)
00473         {
00474          int i = 0;
00475 
00476          /* Query until desired cache level is enumerated.  */
00477          do
00478            {
00479               asm volatile ("cpuid"
00480                           : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
00481                           : "0" (4), "2" (i++));
00482 
00483              /* There seems to be a bug in at least some Pentium Ds
00484                which sometimes fail to iterate all cache parameters.
00485                Do not loop indefinitely here, stop in this case and
00486                assume there is no such information.  */
00487              if ((eax & 0x1f) == 0)
00488               goto intel_bug_no_cache_info;
00489            }
00490           while (((eax >> 5) & 0x7) != level);
00491 
00492          threads = ((eax >> 14) & 0x3ff) + 1;
00493        }
00494       else
00495         {
00496        intel_bug_no_cache_info:
00497          /* Assume that all logical threads share the highest cache level.  */
00498 
00499          threads = (ebx >> 16) & 0xff;
00500        }
00501 
00502       /* Cap usage of highest cache level to the number of supported
00503         threads.  */
00504       if (shared > 0 && threads > 0)
00505         shared /= threads;
00506     }
00507   /* This spells out "AuthenticAMD".  */
00508   else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
00509     {
00510       data   = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
00511       long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
00512       shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
00513 
00514       /* Get maximum extended function. */
00515       asm volatile ("cpuid"
00516                   : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), "=d" (edx)
00517                   : "0" (0x80000000));
00518 
00519       if (shared <= 0)
00520        /* No shared L3 cache.  All we have is the L2 cache.  */
00521        shared = core;
00522       else
00523        {
00524          /* Figure out the number of logical threads that share L3.  */
00525          if (max_cpuid_ex >= 0x80000008)
00526            {
00527              /* Get width of APIC ID.  */
00528              asm volatile ("cpuid"
00529                          : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx),
00530                            "=d" (edx)
00531                          : "0" (0x80000008));
00532              threads = 1 << ((ecx >> 12) & 0x0f);
00533            }
00534 
00535          if (threads == 0)
00536            {
00537              /* If APIC ID width is not available, use logical
00538                processor count.  */
00539              asm volatile ("cpuid"
00540                          : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx),
00541                            "=d" (edx)
00542                          : "0" (0x00000001));
00543 
00544              if ((edx & (1 << 28)) != 0)
00545               threads = (ebx >> 16) & 0xff;
00546            }
00547 
00548          /* Cap usage of highest cache level to the number of
00549             supported threads.  */
00550          if (threads > 0)
00551            shared /= threads;
00552 
00553          /* Account for exclusive L2 and L3 caches.  */
00554          shared += core;
00555        }
00556 
00557       if (max_cpuid_ex >= 0x80000001)
00558        {
00559          asm volatile ("cpuid"
00560                      : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
00561                      : "0" (0x80000001));
00562          /*  PREFETCHW     || 3DNow!  */
00563          if ((ecx & 0x100) || (edx & 0x80000000))
00564            __x86_64_prefetchw = -1;
00565        }
00566     }
00567 
00568   if (data > 0)
00569     __x86_64_data_cache_size_half = data / 2;
00570 
00571   if (shared > 0)
00572     {
00573       __x86_64_shared_cache_size_half = shared / 2;
00574       __x86_64_shared_cache_size = shared;
00575     }
00576 }