Back to index

glibc  2.9
dl-profile.c
Go to the documentation of this file.
00001 /* Profiling of shared libraries.
00002    Copyright (C) 1997-2002, 2003, 2004, 2006 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
00005    Based on the BSD mcount implementation.
00006 
00007    The GNU C Library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Lesser General Public
00009    License as published by the Free Software Foundation; either
00010    version 2.1 of the License, or (at your option) any later version.
00011 
00012    The GNU C Library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Lesser General Public License for more details.
00016 
00017    You should have received a copy of the GNU Lesser General Public
00018    License along with the GNU C Library; if not, write to the Free
00019    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00020    02111-1307 USA.  */
00021 
00022 #include <assert.h>
00023 #include <errno.h>
00024 #include <fcntl.h>
00025 #include <inttypes.h>
00026 #include <limits.h>
00027 #include <stdio.h>
00028 #include <stdlib.h>
00029 #include <string.h>
00030 #include <unistd.h>
00031 #include <ldsodefs.h>
00032 #include <sys/gmon.h>
00033 #include <sys/gmon_out.h>
00034 #include <sys/mman.h>
00035 #include <sys/param.h>
00036 #include <sys/stat.h>
00037 #include <atomic.h>
00038 
00039 /* The LD_PROFILE feature has to be implemented different to the
00040    normal profiling using the gmon/ functions.  The problem is that an
00041    arbitrary amount of processes simulataneously can be run using
00042    profiling and all write the results in the same file.  To provide
00043    this mechanism one could implement a complicated mechanism to merge
00044    the content of two profiling runs or one could extend the file
00045    format to allow more than one data set.  For the second solution we
00046    would have the problem that the file can grow in size beyond any
00047    limit and both solutions have the problem that the concurrency of
00048    writing the results is a big problem.
00049 
00050    Another much simpler method is to use mmap to map the same file in
00051    all using programs and modify the data in the mmap'ed area and so
00052    also automatically on the disk.  Using the MAP_SHARED option of
00053    mmap(2) this can be done without big problems in more than one
00054    file.
00055 
00056    This approach is very different from the normal profiling.  We have
00057    to use the profiling data in exactly the way they are expected to
00058    be written to disk.  But the normal format used by gprof is not usable
00059    to do this.  It is optimized for size.  It writes the tags as single
00060    bytes but this means that the following 32/64 bit values are
00061    unaligned.
00062 
00063    Therefore we use a new format.  This will look like this
00064 
00065                                    0  1  2  3    <- byte is 32 bit word
00066        0000                        g  m  o  n
00067        0004                        *version*     <- GMON_SHOBJ_VERSION
00068        0008                        00 00 00 00
00069        000c                        00 00 00 00
00070        0010                        00 00 00 00
00071 
00072        0014                        *tag*         <- GMON_TAG_TIME_HIST
00073        0018                        ?? ?? ?? ??
00074                                    ?? ?? ?? ??   <- 32/64 bit LowPC
00075        0018+A                      ?? ?? ?? ??
00076                                    ?? ?? ?? ??   <- 32/64 bit HighPC
00077        0018+2*A                    *histsize*
00078        001c+2*A                    *profrate*
00079        0020+2*A                    s  e  c  o
00080        0024+2*A                    n  d  s  \0
00081        0028+2*A                    \0 \0 \0 \0
00082        002c+2*A                    \0 \0 \0
00083        002f+2*A                    s
00084 
00085        0030+2*A                    ?? ?? ?? ??   <- Count data
00086        ...                         ...
00087        0030+2*A+K                  ?? ?? ?? ??
00088 
00089        0030+2*A+K                  *tag*         <- GMON_TAG_CG_ARC
00090        0034+2*A+K                  *lastused*
00091        0038+2*A+K                  ?? ?? ?? ??
00092                                    ?? ?? ?? ??   <- FromPC#1
00093        0038+3*A+K                  ?? ?? ?? ??
00094                                    ?? ?? ?? ??   <- ToPC#1
00095        0038+4*A+K                  ?? ?? ?? ??   <- Count#1
00096        ...                         ...              ...
00097        0038+(2*(CN-1)+2)*A+(CN-1)*4+K     ?? ?? ?? ??
00098                                    ?? ?? ?? ??   <- FromPC#CGN
00099        0038+(2*(CN-1)+3)*A+(CN-1)*4+K     ?? ?? ?? ??
00100                                    ?? ?? ?? ??   <- ToPC#CGN
00101        0038+(2*CN+2)*A+(CN-1)*4+K  ?? ?? ?? ??   <- Count#CGN
00102 
00103    We put (for now?) no basic block information in the file since this would
00104    introduce rase conditions among all the processes who want to write them.
00105 
00106    `K' is the number of count entries which is computed as
00107 
00108               textsize / HISTFRACTION
00109 
00110    `CG' in the above table is the number of call graph arcs.  Normally,
00111    the table is sparse and the profiling code writes out only the those
00112    entries which are really used in the program run.  But since we must
00113    not extend this table (the profiling file) we'll keep them all here.
00114    So CN can be executed in advance as
00115 
00116               MINARCS <= textsize*(ARCDENSITY/100) <= MAXARCS
00117 
00118    Now the remaining question is: how to build the data structures we can
00119    work with from this data.  We need the from set and must associate the
00120    froms with all the associated tos.  We will do this by constructing this
00121    data structures at the program start.  To do this we'll simply visit all
00122    entries in the call graph table and add it to the appropriate list.  */
00123 
00124 extern int __profile_frequency (void);
00125 libc_hidden_proto (__profile_frequency)
00126 
00127 /* We define a special type to address the elements of the arc table.
00128    This is basically the `gmon_cg_arc_record' format but it includes
00129    the room for the tag and it uses real types.  */
00130 struct here_cg_arc_record
00131   {
00132     uintptr_t from_pc;
00133     uintptr_t self_pc;
00134     uint32_t count;
00135   } __attribute__ ((packed));
00136 
00137 static struct here_cg_arc_record *data;
00138 
00139 /* Nonzero if profiling is under way.  */
00140 static int running;
00141 
00142 /* This is the number of entry which have been incorporated in the toset.  */
00143 static uint32_t narcs;
00144 /* This is a pointer to the object representing the number of entries
00145    currently in the mmaped file.  At no point of time this has to be the
00146    same as NARCS.  If it is equal all entries from the file are in our
00147    lists.  */
00148 static volatile uint32_t *narcsp;
00149 
00150 
00151 struct here_fromstruct
00152   {
00153     struct here_cg_arc_record volatile *here;
00154     uint16_t link;
00155   };
00156 
00157 static volatile uint16_t *tos;
00158 
00159 static struct here_fromstruct *froms;
00160 static uint32_t fromlimit;
00161 static volatile uint32_t fromidx;
00162 
00163 static uintptr_t lowpc;
00164 static size_t textsize;
00165 static unsigned int log_hashfraction;
00166 
00167 
00168 
00169 /* Set up profiling data to profile object desribed by MAP.  The output
00170    file is found (or created) in OUTPUT_DIR.  */
00171 void
00172 internal_function
00173 _dl_start_profile (void)
00174 {
00175   char *filename;
00176   int fd;
00177   struct stat64 st;
00178   const ElfW(Phdr) *ph;
00179   ElfW(Addr) mapstart = ~((ElfW(Addr)) 0);
00180   ElfW(Addr) mapend = 0;
00181   struct gmon_hdr gmon_hdr;
00182   struct gmon_hist_hdr hist_hdr;
00183   char *hist, *cp;
00184   size_t idx;
00185   size_t tossize;
00186   size_t fromssize;
00187   uintptr_t highpc;
00188   uint16_t *kcount;
00189   size_t kcountsize;
00190   struct gmon_hdr *addr = NULL;
00191   off_t expected_size;
00192   /* See profil(2) where this is described.  */
00193   int s_scale;
00194 #define SCALE_1_TO_1 0x10000L
00195   const char *errstr = NULL;
00196 
00197   /* Compute the size of the sections which contain program code.  */
00198   for (ph = GL(dl_profile_map)->l_phdr;
00199        ph < &GL(dl_profile_map)->l_phdr[GL(dl_profile_map)->l_phnum]; ++ph)
00200     if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
00201       {
00202        ElfW(Addr) start = (ph->p_vaddr & ~(GLRO(dl_pagesize) - 1));
00203        ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + GLRO(dl_pagesize) - 1)
00204                        & ~(GLRO(dl_pagesize) - 1));
00205 
00206        if (start < mapstart)
00207          mapstart = start;
00208        if (end > mapend)
00209          mapend = end;
00210       }
00211 
00212   /* Now we can compute the size of the profiling data.  This is done
00213      with the same formulars as in `monstartup' (see gmon.c).  */
00214   running = 0;
00215   lowpc = ROUNDDOWN (mapstart + GL(dl_profile_map)->l_addr,
00216                    HISTFRACTION * sizeof (HISTCOUNTER));
00217   highpc = ROUNDUP (mapend + GL(dl_profile_map)->l_addr,
00218                   HISTFRACTION * sizeof (HISTCOUNTER));
00219   textsize = highpc - lowpc;
00220   kcountsize = textsize / HISTFRACTION;
00221   if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
00222     {
00223       /* If HASHFRACTION is a power of two, mcount can use shifting
00224         instead of integer division.  Precompute shift amount.
00225 
00226         This is a constant but the compiler cannot compile the
00227         expression away since the __ffs implementation is not known
00228         to the compiler.  Help the compiler by precomputing the
00229         usual cases.  */
00230       assert (HASHFRACTION == 2);
00231 
00232       if (sizeof (*froms) == 8)
00233        log_hashfraction = 4;
00234       else if (sizeof (*froms) == 16)
00235        log_hashfraction = 5;
00236       else
00237        log_hashfraction = __ffs (HASHFRACTION * sizeof (*froms)) - 1;
00238     }
00239   else
00240     log_hashfraction = -1;
00241   tossize = textsize / HASHFRACTION;
00242   fromlimit = textsize * ARCDENSITY / 100;
00243   if (fromlimit < MINARCS)
00244     fromlimit = MINARCS;
00245   if (fromlimit > MAXARCS)
00246     fromlimit = MAXARCS;
00247   fromssize = fromlimit * sizeof (struct here_fromstruct);
00248 
00249   expected_size = (sizeof (struct gmon_hdr)
00250                  + 4 + sizeof (struct gmon_hist_hdr) + kcountsize
00251                  + 4 + 4 + fromssize * sizeof (struct here_cg_arc_record));
00252 
00253   /* Create the gmon_hdr we expect or write.  */
00254   memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr));
00255   memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie));
00256   *(int32_t *) gmon_hdr.version = GMON_SHOBJ_VERSION;
00257 
00258   /* Create the hist_hdr we expect or write.  */
00259   *(char **) hist_hdr.low_pc = (char *) mapstart;
00260   *(char **) hist_hdr.high_pc = (char *) mapend;
00261   *(int32_t *) hist_hdr.hist_size = kcountsize / sizeof (HISTCOUNTER);
00262   *(int32_t *) hist_hdr.prof_rate = __profile_frequency ();
00263   if (sizeof (hist_hdr.dimen) >= sizeof ("seconds"))
00264     {
00265       memcpy (hist_hdr.dimen, "seconds", sizeof ("seconds"));
00266       memset (hist_hdr.dimen + sizeof ("seconds"), '\0',
00267              sizeof (hist_hdr.dimen) - sizeof ("seconds"));
00268     }
00269   else
00270     strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
00271   hist_hdr.dimen_abbrev = 's';
00272 
00273   /* First determine the output name.  We write in the directory
00274      OUTPUT_DIR and the name is composed from the shared objects
00275      soname (or the file name) and the ending ".profile".  */
00276   filename = (char *) alloca (strlen (GLRO(dl_profile_output)) + 1
00277                            + strlen (GLRO(dl_profile)) + sizeof ".profile");
00278   cp = __stpcpy (filename, GLRO(dl_profile_output));
00279   *cp++ = '/';
00280   __stpcpy (__stpcpy (cp, GLRO(dl_profile)), ".profile");
00281 
00282 #ifdef O_NOFOLLOW
00283 # define EXTRA_FLAGS | O_NOFOLLOW
00284 #else
00285 # define EXTRA_FLAGS
00286 #endif
00287   fd = __open (filename, O_RDWR | O_CREAT EXTRA_FLAGS, DEFFILEMODE);
00288   if (fd == -1)
00289     {
00290       char buf[400];
00291       int errnum;
00292 
00293       /* We cannot write the profiling data so don't do anything.  */
00294       errstr = "%s: cannot open file: %s\n";
00295     print_error:
00296       errnum = errno;
00297       if (fd != -1)
00298        __close (fd);
00299       _dl_error_printf (errstr, filename,
00300                      __strerror_r (errnum, buf, sizeof buf));
00301       return;
00302     }
00303 
00304   if (__fxstat64 (_STAT_VER, fd, &st) < 0 || !S_ISREG (st.st_mode))
00305     {
00306       /* Not stat'able or not a regular file => don't use it.  */
00307       errstr = "%s: cannot stat file: %s\n";
00308       goto print_error;
00309     }
00310 
00311   /* Test the size.  If it does not match what we expect from the size
00312      values in the map MAP we don't use it and warn the user.  */
00313   if (st.st_size == 0)
00314     {
00315       /* We have to create the file.  */
00316       char buf[GLRO(dl_pagesize)];
00317 
00318       memset (buf, '\0', GLRO(dl_pagesize));
00319 
00320       if (__lseek (fd, expected_size & ~(GLRO(dl_pagesize) - 1), SEEK_SET) == -1)
00321        {
00322        cannot_create:
00323          errstr = "%s: cannot create file: %s\n";
00324          goto print_error;
00325        }
00326 
00327       if (TEMP_FAILURE_RETRY (__libc_write (fd, buf, (expected_size
00328                                                 & (GLRO(dl_pagesize)
00329                                                   - 1))))
00330          < 0)
00331        goto cannot_create;
00332     }
00333   else if (st.st_size != expected_size)
00334     {
00335       __close (fd);
00336     wrong_format:
00337 
00338       if (addr != NULL)
00339        __munmap ((void *) addr, expected_size);
00340 
00341       _dl_error_printf ("%s: file is no correct profile data file for `%s'\n",
00342                      filename, GLRO(dl_profile));
00343       return;
00344     }
00345 
00346   addr = (struct gmon_hdr *) __mmap (NULL, expected_size, PROT_READ|PROT_WRITE,
00347                                  MAP_SHARED|MAP_FILE, fd, 0);
00348   if (addr == (struct gmon_hdr *) MAP_FAILED)
00349     {
00350       errstr = "%s: cannot map file: %s\n";
00351       goto print_error;
00352     }
00353 
00354   /* We don't need the file descriptor anymore.  */
00355   __close (fd);
00356 
00357   /* Pointer to data after the header.  */
00358   hist = (char *) (addr + 1);
00359   kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t)
00360                       + sizeof (struct gmon_hist_hdr));
00361 
00362   /* Compute pointer to array of the arc information.  */
00363   narcsp = (uint32_t *) ((char *) kcount + kcountsize + sizeof (uint32_t));
00364   data = (struct here_cg_arc_record *) ((char *) narcsp + sizeof (uint32_t));
00365 
00366   if (st.st_size == 0)
00367     {
00368       /* Create the signature.  */
00369       memcpy (addr, &gmon_hdr, sizeof (struct gmon_hdr));
00370 
00371       *(uint32_t *) hist = GMON_TAG_TIME_HIST;
00372       memcpy (hist + sizeof (uint32_t), &hist_hdr,
00373              sizeof (struct gmon_hist_hdr));
00374 
00375       narcsp[-1] = GMON_TAG_CG_ARC;
00376     }
00377   else
00378     {
00379       /* Test the signature in the file.  */
00380       if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0
00381          || *(uint32_t *) hist != GMON_TAG_TIME_HIST
00382          || memcmp (hist + sizeof (uint32_t), &hist_hdr,
00383                    sizeof (struct gmon_hist_hdr)) != 0
00384          || narcsp[-1] != GMON_TAG_CG_ARC)
00385        goto wrong_format;
00386     }
00387 
00388   /* Allocate memory for the froms data and the pointer to the tos records.  */
00389   tos = (uint16_t *) calloc (tossize + fromssize, 1);
00390   if (tos == NULL)
00391     {
00392       __munmap ((void *) addr, expected_size);
00393       _dl_fatal_printf ("Out of memory while initializing profiler\n");
00394       /* NOTREACHED */
00395     }
00396 
00397   froms = (struct here_fromstruct *) ((char *) tos + tossize);
00398   fromidx = 0;
00399 
00400   /* Now we have to process all the arc count entries.  BTW: it is
00401      not critical whether the *NARCSP value changes meanwhile.  Before
00402      we enter a new entry in to toset we will check that everything is
00403      available in TOS.  This happens in _dl_mcount.
00404 
00405      Loading the entries in reverse order should help to get the most
00406      frequently used entries at the front of the list.  */
00407   for (idx = narcs = MIN (*narcsp, fromlimit); idx > 0; )
00408     {
00409       size_t to_index;
00410       size_t newfromidx;
00411       --idx;
00412       to_index = (data[idx].self_pc / (HASHFRACTION * sizeof (*tos)));
00413       newfromidx = fromidx++;
00414       froms[newfromidx].here = &data[idx];
00415       froms[newfromidx].link = tos[to_index];
00416       tos[to_index] = newfromidx;
00417     }
00418 
00419   /* Setup counting data.  */
00420   if (kcountsize < highpc - lowpc)
00421     {
00422 #if 0
00423       s_scale = ((double) kcountsize / (highpc - lowpc)) * SCALE_1_TO_1;
00424 #else
00425       size_t range = highpc - lowpc;
00426       size_t quot = range / kcountsize;
00427 
00428       if (quot >= SCALE_1_TO_1)
00429        s_scale = 1;
00430       else if (quot >= SCALE_1_TO_1 / 256)
00431        s_scale = SCALE_1_TO_1 / quot;
00432       else if (range > ULONG_MAX / 256)
00433        s_scale = (SCALE_1_TO_1 * 256) / (range / (kcountsize / 256));
00434       else
00435        s_scale = (SCALE_1_TO_1 * 256) / ((range * 256) / kcountsize);
00436 #endif
00437     }
00438   else
00439     s_scale = SCALE_1_TO_1;
00440 
00441   /* Start the profiler.  */
00442   __profil ((void *) kcount, kcountsize, lowpc, s_scale);
00443 
00444   /* Turn on profiling.  */
00445   running = 1;
00446 }
00447 
00448 
00449 void
00450 _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc)
00451 {
00452   volatile uint16_t *topcindex;
00453   size_t i, fromindex;
00454   struct here_fromstruct *fromp;
00455 
00456   if (! running)
00457     return;
00458 
00459   /* Compute relative addresses.  The shared object can be loaded at
00460      any address.  The value of frompc could be anything.  We cannot
00461      restrict it in any way, just set to a fixed value (0) in case it
00462      is outside the allowed range.  These calls show up as calls from
00463      <external> in the gprof output.  */
00464   frompc -= lowpc;
00465   if (frompc >= textsize)
00466     frompc = 0;
00467   selfpc -= lowpc;
00468   if (selfpc >= textsize)
00469     goto done;
00470 
00471   /* Getting here we now have to find out whether the location was
00472      already used.  If yes we are lucky and only have to increment a
00473      counter (this also has to be atomic).  If the entry is new things
00474      are getting complicated...  */
00475 
00476   /* Avoid integer divide if possible.  */
00477   if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
00478     i = selfpc >> log_hashfraction;
00479   else
00480     i = selfpc / (HASHFRACTION * sizeof (*tos));
00481 
00482   topcindex = &tos[i];
00483   fromindex = *topcindex;
00484 
00485   if (fromindex == 0)
00486     goto check_new_or_add;
00487 
00488   fromp = &froms[fromindex];
00489 
00490   /* We have to look through the chain of arcs whether there is already
00491      an entry for our arc.  */
00492   while (fromp->here->from_pc != frompc)
00493     {
00494       if (fromp->link != 0)
00495        do
00496          fromp = &froms[fromp->link];
00497        while (fromp->link != 0 && fromp->here->from_pc != frompc);
00498 
00499       if (fromp->here->from_pc != frompc)
00500        {
00501          topcindex = &fromp->link;
00502 
00503        check_new_or_add:
00504          /* Our entry is not among the entries we read so far from the
00505             data file.  Now see whether we have to update the list.  */
00506          while (narcs != *narcsp && narcs < fromlimit)
00507            {
00508              size_t to_index;
00509              size_t newfromidx;
00510              to_index = (data[narcs].self_pc
00511                        / (HASHFRACTION * sizeof (*tos)));
00512              newfromidx = catomic_exchange_and_add (&fromidx, 1) + 1;
00513              froms[newfromidx].here = &data[narcs];
00514              froms[newfromidx].link = tos[to_index];
00515              tos[to_index] = newfromidx;
00516              catomic_increment (&narcs);
00517            }
00518 
00519          /* If we still have no entry stop searching and insert.  */
00520          if (*topcindex == 0)
00521            {
00522              uint_fast32_t newarc = catomic_exchange_and_add (narcsp, 1);
00523 
00524              /* In rare cases it could happen that all entries in FROMS are
00525                occupied.  So we cannot count this anymore.  */
00526              if (newarc >= fromlimit)
00527               goto done;
00528 
00529              *topcindex = catomic_exchange_and_add (&fromidx, 1) + 1;
00530              fromp = &froms[*topcindex];
00531 
00532              fromp->here = &data[newarc];
00533              data[newarc].from_pc = frompc;
00534              data[newarc].self_pc = selfpc;
00535              data[newarc].count = 0;
00536              fromp->link = 0;
00537              catomic_increment (&narcs);
00538 
00539              break;
00540            }
00541 
00542          fromp = &froms[*topcindex];
00543        }
00544       else
00545        /* Found in.  */
00546        break;
00547     }
00548 
00549   /* Increment the counter.  */
00550   catomic_increment (&fromp->here->count);
00551 
00552  done:
00553   ;
00554 }
00555 INTDEF(_dl_mcount)