Back to index

cell-binutils  2.17cvs20070401
hist.c
Go to the documentation of this file.
00001 /* hist.c  -  Histogram related operations.
00002 
00003    Copyright 1999, 2000, 2001, 2002, 2004, 2005
00004    Free Software Foundation, Inc.
00005 
00006    This file is part of GNU Binutils.
00007 
00008    This program is free software; you can redistribute it and/or modify
00009    it under the terms of the GNU General Public License as published by
00010    the Free Software Foundation; either version 2 of the License, or
00011    (at your option) any later version.
00012 
00013    This program is distributed in the hope that it will be useful,
00014    but WITHOUT ANY WARRANTY; without even the implied warranty of
00015    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016    GNU General Public License for more details.
00017 
00018    You should have received a copy of the GNU General Public License
00019    along with this program; if not, write to the Free Software
00020    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
00021    02110-1301, USA.  */
00022 
00023 #include "libiberty.h"
00024 #include "gprof.h"
00025 #include "search_list.h"
00026 #include "source.h"
00027 #include "symtab.h"
00028 #include "corefile.h"
00029 #include "gmon_io.h"
00030 #include "gmon_out.h"
00031 #include "hist.h"
00032 #include "sym_ids.h"
00033 #include "utils.h"
00034 
00035 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
00036 
00037 static void scale_and_align_entries (void);
00038 static void print_header (int);
00039 static void print_line (Sym *, double);
00040 static int cmp_time (const PTR, const PTR);
00041 
00042 /* Declarations of automatically generated functions to output blurbs.  */
00043 extern void flat_blurb (FILE * fp);
00044 
00045 bfd_vma s_lowpc;            /* Lowest address in .text.  */
00046 bfd_vma s_highpc = 0;              /* Highest address in .text.  */
00047 bfd_vma lowpc, highpc;             /* Same, but expressed in UNITs.  */
00048 unsigned int hist_num_bins = 0;    /* Number of histogram samples.  */
00049 int *hist_sample = 0;              /* Histogram samples (shorts in the file!).  */
00050 double hist_scale;
00051 static char hist_dimension[16] = "seconds";
00052 static char hist_dimension_abbrev = 's';
00053 
00054 static double accum_time;   /* Accumulated time so far for print_line(). */
00055 static double total_time;   /* Total time for all routines.  */
00056 
00057 /* Table of SI prefixes for powers of 10 (used to automatically
00058    scale some of the values in the flat profile).  */
00059 const struct
00060   {
00061     char prefix;
00062     double scale;
00063   }
00064 SItab[] =
00065 {
00066   { 'T', 1e-12 },                         /* tera */
00067   { 'G', 1e-09 },                         /* giga */
00068   { 'M', 1e-06 },                         /* mega */
00069   { 'K', 1e-03 },                         /* kilo */
00070   { ' ', 1e-00 },
00071   { 'm', 1e+03 },                         /* milli */
00072   { 'u', 1e+06 },                         /* micro */
00073   { 'n', 1e+09 },                         /* nano */
00074   { 'p', 1e+12 },                         /* pico */
00075   { 'f', 1e+15 },                         /* femto */
00076   { 'a', 1e+18 }                          /* ato */
00077 };
00078 
00079 
00080 /* Read the histogram from file IFP.  FILENAME is the name of IFP and
00081    is provided for formatting error messages only.  */
00082 
00083 void
00084 hist_read_rec (FILE * ifp, const char *filename)
00085 {
00086   bfd_vma n_lowpc, n_highpc;
00087   unsigned int i, ncnt, profrate;
00088   UNIT count;
00089 
00090   if (gmon_io_read_vma (ifp, &n_lowpc)
00091       || gmon_io_read_vma (ifp, &n_highpc)
00092       || gmon_io_read_32 (ifp, &ncnt)
00093       || gmon_io_read_32 (ifp, &profrate)
00094       || gmon_io_read (ifp, hist_dimension, 15)
00095       || gmon_io_read (ifp, &hist_dimension_abbrev, 1))
00096     {
00097       fprintf (stderr, _("%s: %s: unexpected end of file\n"),
00098               whoami, filename);
00099 
00100       done (1);
00101     }
00102 
00103   if (!s_highpc)
00104     {
00105       /* This is the first histogram record.  */
00106       s_lowpc = n_lowpc;
00107       s_highpc = n_highpc;
00108       lowpc = (bfd_vma) n_lowpc / sizeof (UNIT);
00109       highpc = (bfd_vma) n_highpc / sizeof (UNIT);
00110       hist_num_bins = ncnt;
00111       hz = profrate;
00112     }
00113 
00114   DBG (SAMPLEDEBUG,
00115        printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
00116               (unsigned long) n_lowpc, (unsigned long) n_highpc, ncnt);
00117        printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %u\n",
00118               (unsigned long) s_lowpc, (unsigned long) s_highpc,
00119               hist_num_bins);
00120        printf ("[hist_read_rec]   lowpc 0x%lx   highpc 0x%lx\n",
00121               (unsigned long) lowpc, (unsigned long) highpc));
00122 
00123   if (n_lowpc != s_lowpc || n_highpc != s_highpc
00124       || ncnt != hist_num_bins || hz != (int) profrate)
00125     {
00126       fprintf (stderr, _("%s: `%s' is incompatible with first gmon file\n"),
00127               whoami, filename);
00128       done (1);
00129     }
00130 
00131   if (!hist_sample)
00132     {
00133       hist_sample = (int *) xmalloc (hist_num_bins * sizeof (hist_sample[0]));
00134       memset (hist_sample, 0, hist_num_bins * sizeof (hist_sample[0]));
00135     }
00136 
00137   for (i = 0; i < hist_num_bins; ++i)
00138     {
00139       if (fread (&count[0], sizeof (count), 1, ifp) != 1)
00140        {
00141          fprintf (stderr,
00142                 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
00143                  whoami, filename, i, hist_num_bins);
00144          done (1);
00145        }
00146       hist_sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
00147       DBG (SAMPLEDEBUG,
00148           printf ("[hist_read_rec] 0x%lx: %u\n",
00149                  (unsigned long) (n_lowpc + i * (n_highpc - n_lowpc) / ncnt),
00150                  hist_sample[i]));
00151     }
00152 }
00153 
00154 
00155 /* Write execution histogram to file OFP.  FILENAME is the name
00156    of OFP and is provided for formatting error-messages only.  */
00157 
00158 void
00159 hist_write_hist (FILE * ofp, const char *filename)
00160 {
00161   UNIT count;
00162   unsigned int i;
00163 
00164   /* Write header.  */
00165 
00166   if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
00167       || gmon_io_write_vma (ofp, s_lowpc)
00168       || gmon_io_write_vma (ofp, s_highpc)
00169       || gmon_io_write_32 (ofp, hist_num_bins)
00170       || gmon_io_write_32 (ofp, hz)
00171       || gmon_io_write (ofp, hist_dimension, 15)
00172       || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
00173     {
00174       perror (filename);
00175       done (1);
00176     }
00177 
00178   for (i = 0; i < hist_num_bins; ++i)
00179     {
00180       bfd_put_16 (core_bfd, (bfd_vma) hist_sample[i], (bfd_byte *) &count[0]);
00181 
00182       if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
00183        {
00184          perror (filename);
00185          done (1);
00186        }
00187     }
00188 }
00189 
00190 
00191 /* Calculate scaled entry point addresses (to save time in
00192    hist_assign_samples), and, on architectures that have procedure
00193    entry masks at the start of a function, possibly push the scaled
00194    entry points over the procedure entry mask, if it turns out that
00195    the entry point is in one bin and the code for a routine is in the
00196    next bin.  */
00197 
00198 static void
00199 scale_and_align_entries ()
00200 {
00201   Sym *sym;
00202   bfd_vma bin_of_entry;
00203   bfd_vma bin_of_code;
00204 
00205   for (sym = symtab.base; sym < symtab.limit; sym++)
00206     {
00207       sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
00208       bin_of_entry = (sym->hist.scaled_addr - lowpc) / hist_scale;
00209       bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - lowpc)
00210                    / hist_scale);
00211       if (bin_of_entry < bin_of_code)
00212        {
00213          DBG (SAMPLEDEBUG,
00214               printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
00215                      (unsigned long) sym->hist.scaled_addr,
00216                      (unsigned long) (sym->hist.scaled_addr
00217                                    + UNITS_TO_CODE)));
00218          sym->hist.scaled_addr += UNITS_TO_CODE;
00219        }
00220     }
00221 }
00222 
00223 
00224 /* Assign samples to the symbol to which they belong.
00225 
00226    Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
00227    which may overlap one more symbol address ranges.  If a symbol
00228    overlaps with the bin's address range by O percent, then O percent
00229    of the bin's count is credited to that symbol.
00230 
00231    There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
00232    with respect to the symbol's address range [SYM_LOW_PC,
00233    SYM_HIGH_PC) as shown in the following diagram.  OVERLAP computes
00234    the distance (in UNITs) between the arrows, the fraction of the
00235    sample that is to be credited to the symbol which starts at
00236    SYM_LOW_PC.
00237 
00238          sym_low_pc                                      sym_high_pc
00239               |                                               |
00240               v                                               v
00241 
00242               +-----------------------------------------------+
00243               |                                               |
00244          |  ->|    |<-         ->|         |<-         ->|    |<-  |
00245          |         |             |         |             |         |
00246          +---------+             +---------+             +---------+
00247 
00248          ^         ^             ^         ^             ^         ^
00249          |         |             |         |             |         |
00250      bin_low_pc bin_high_pc  bin_low_pc bin_high_pc  bin_low_pc bin_high_pc
00251 
00252    For the VAX we assert that samples will never fall in the first two
00253    bytes of any routine, since that is the entry mask, thus we call
00254    scale_and_align_entries() to adjust the entry points if the entry
00255    mask falls in one bin but the code for the routine doesn't start
00256    until the next bin.  In conjunction with the alignment of routine
00257    addresses, this should allow us to have only one sample for every
00258    four bytes of text space and never have any overlap (the two end
00259    cases, above).  */
00260 
00261 void
00262 hist_assign_samples ()
00263 {
00264   bfd_vma bin_low_pc, bin_high_pc;
00265   bfd_vma sym_low_pc, sym_high_pc;
00266   bfd_vma overlap, addr;
00267   unsigned int bin_count;
00268   unsigned int i, j;
00269   double time, credit;
00270 
00271   /* Read samples and assign to symbols.  */
00272   hist_scale = highpc - lowpc;
00273   hist_scale /= hist_num_bins;
00274   scale_and_align_entries ();
00275 
00276   /* Iterate over all sample bins.  */
00277   for (i = 0, j = 1; i < hist_num_bins; ++i)
00278     {
00279       bin_count = hist_sample[i];
00280       if (! bin_count)
00281        continue;
00282 
00283       bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
00284       bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
00285       time = bin_count;
00286 
00287       DBG (SAMPLEDEBUG,
00288           printf (
00289       "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
00290                   (unsigned long) (sizeof (UNIT) * bin_low_pc),
00291                   (unsigned long) (sizeof (UNIT) * bin_high_pc),
00292                   bin_count));
00293       total_time += time;
00294 
00295       /* Credit all symbols that are covered by bin I.  */
00296       for (j = j - 1; j < symtab.len; ++j)
00297        {
00298          sym_low_pc = symtab.base[j].hist.scaled_addr;
00299          sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
00300 
00301          /* If high end of bin is below entry address,
00302             go for next bin.  */
00303          if (bin_high_pc < sym_low_pc)
00304            break;
00305 
00306          /* If low end of bin is above high end of symbol,
00307             go for next symbol.  */
00308          if (bin_low_pc >= sym_high_pc)
00309            continue;
00310 
00311          overlap =
00312            MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
00313          if (overlap > 0)
00314            {
00315              DBG (SAMPLEDEBUG,
00316                  printf (
00317               "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
00318                         (unsigned long) symtab.base[j].addr,
00319                         (unsigned long) (sizeof (UNIT) * sym_high_pc),
00320                         symtab.base[j].name, overlap * time / hist_scale,
00321                         (long) overlap));
00322 
00323              addr = symtab.base[j].addr;
00324              credit = overlap * time / hist_scale;
00325 
00326              /* Credit symbol if it appears in INCL_FLAT or that
00327                table is empty and it does not appear it in
00328                EXCL_FLAT.  */
00329              if (sym_lookup (&syms[INCL_FLAT], addr)
00330                 || (syms[INCL_FLAT].len == 0
00331                     && !sym_lookup (&syms[EXCL_FLAT], addr)))
00332               {
00333                 symtab.base[j].hist.time += credit;
00334               }
00335              else
00336               {
00337                 total_time -= credit;
00338               }
00339            }
00340        }
00341     }
00342 
00343   DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
00344                          total_time));
00345 }
00346 
00347 
00348 /* Print header for flag histogram profile.  */
00349 
00350 static void
00351 print_header (int prefix)
00352 {
00353   char unit[64];
00354 
00355   sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
00356 
00357   if (bsd_style_output)
00358     {
00359       printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
00360              (long) hist_scale * sizeof (UNIT));
00361       if (total_time > 0.0)
00362        {
00363          printf (_(" for %.2f%% of %.2f %s\n\n"),
00364                 100.0 / total_time, total_time / hz, hist_dimension);
00365        }
00366     }
00367   else
00368     {
00369       printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
00370     }
00371 
00372   if (total_time <= 0.0)
00373     {
00374       printf (_(" no time accumulated\n\n"));
00375 
00376       /* This doesn't hurt since all the numerators will be zero.  */
00377       total_time = 1.0;
00378     }
00379 
00380   printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s  %-8.8s\n",
00381          "%  ", _("cumulative"), _("self  "), "", _("self  "), _("total "),
00382          "");
00383   printf ("%5.5s %9.9s  %8.8s %8.8s %8.8s %8.8s  %-8.8s\n",
00384          _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
00385          _("name"));
00386 }
00387 
00388 
00389 static void
00390 print_line (Sym *sym, double scale)
00391 {
00392   if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
00393     return;
00394 
00395   accum_time += sym->hist.time;
00396 
00397   if (bsd_style_output)
00398     printf ("%5.1f %10.2f %8.2f",
00399            total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
00400            accum_time / hz, sym->hist.time / hz);
00401   else
00402     printf ("%6.2f %9.2f %8.2f",
00403            total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
00404            accum_time / hz, sym->hist.time / hz);
00405 
00406   if (sym->ncalls != 0)
00407     printf (" %8lu %8.2f %8.2f  ",
00408            sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
00409            scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
00410   else
00411     printf (" %8.8s %8.8s %8.8s  ", "", "", "");
00412 
00413   if (bsd_style_output)
00414     print_name (sym);
00415   else
00416     print_name_only (sym);
00417 
00418   printf ("\n");
00419 }
00420 
00421 
00422 /* Compare LP and RP.  The primary comparison key is execution time,
00423    the secondary is number of invocation, and the tertiary is the
00424    lexicographic order of the function names.  */
00425 
00426 static int
00427 cmp_time (const PTR lp, const PTR rp)
00428 {
00429   const Sym *left = *(const Sym **) lp;
00430   const Sym *right = *(const Sym **) rp;
00431   double time_diff;
00432 
00433   time_diff = right->hist.time - left->hist.time;
00434 
00435   if (time_diff > 0.0)
00436     return 1;
00437 
00438   if (time_diff < 0.0)
00439     return -1;
00440 
00441   if (right->ncalls > left->ncalls)
00442     return 1;
00443 
00444   if (right->ncalls < left->ncalls)
00445     return -1;
00446 
00447   return strcmp (left->name, right->name);
00448 }
00449 
00450 
00451 /* Print the flat histogram profile.  */
00452 
00453 void
00454 hist_print ()
00455 {
00456   Sym **time_sorted_syms, *top_dog, *sym;
00457   unsigned int index;
00458   unsigned log_scale;
00459   double top_time, time;
00460   bfd_vma addr;
00461 
00462   if (first_output)
00463     first_output = FALSE;
00464   else
00465     printf ("\f\n");
00466 
00467   accum_time = 0.0;
00468 
00469   if (bsd_style_output)
00470     {
00471       if (print_descriptions)
00472        {
00473          printf (_("\n\n\nflat profile:\n"));
00474          flat_blurb (stdout);
00475        }
00476     }
00477   else
00478     {
00479       printf (_("Flat profile:\n"));
00480     }
00481 
00482   /* Sort the symbol table by time (call-count and name as secondary
00483      and tertiary keys).  */
00484   time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
00485 
00486   for (index = 0; index < symtab.len; ++index)
00487     time_sorted_syms[index] = &symtab.base[index];
00488 
00489   qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
00490 
00491   if (bsd_style_output)
00492     {
00493       log_scale = 5;        /* Milli-seconds is BSD-default.  */
00494     }
00495   else
00496     {
00497       /* Search for symbol with highest per-call
00498         execution time and scale accordingly.  */
00499       log_scale = 0;
00500       top_dog = 0;
00501       top_time = 0.0;
00502 
00503       for (index = 0; index < symtab.len; ++index)
00504        {
00505          sym = time_sorted_syms[index];
00506 
00507          if (sym->ncalls != 0)
00508            {
00509              time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
00510 
00511              if (time > top_time)
00512               {
00513                 top_dog = sym;
00514                 top_time = time;
00515               }
00516            }
00517        }
00518 
00519       if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
00520        {
00521          top_time /= hz;
00522 
00523          for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
00524            {
00525              double scaled_value = SItab[log_scale].scale * top_time;
00526 
00527              if (scaled_value >= 1.0 && scaled_value < 1000.0) 
00528               break;
00529            }
00530        }
00531     }
00532 
00533   /* For now, the dimension is always seconds.  In the future, we
00534      may also want to support other (pseudo-)dimensions (such as
00535      I-cache misses etc.).  */
00536   print_header (SItab[log_scale].prefix);
00537 
00538   for (index = 0; index < symtab.len; ++index)
00539     {
00540       addr = time_sorted_syms[index]->addr;
00541 
00542       /* Print symbol if its in INCL_FLAT table or that table
00543        is empty and the symbol is not in EXCL_FLAT.  */
00544       if (sym_lookup (&syms[INCL_FLAT], addr)
00545          || (syms[INCL_FLAT].len == 0
00546              && !sym_lookup (&syms[EXCL_FLAT], addr)))
00547        print_line (time_sorted_syms[index], SItab[log_scale].scale);
00548     }
00549 
00550   free (time_sorted_syms);
00551 
00552   if (print_descriptions && !bsd_style_output)
00553     flat_blurb (stdout);
00554 }