Back to index

cell-binutils  2.17cvs20070401
cg_print.c
Go to the documentation of this file.
00001 /* cg_print.c -  Print routines for displaying call graphs.
00002 
00003    Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
00004 
00005    This file is part of GNU Binutils.
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 2 of the License, or
00010    (at your option) any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License
00018    along with this program; if not, write to the Free Software
00019    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
00020    02110-1301, USA.  */
00021 
00022 #include "libiberty.h"
00023 #include "gprof.h"
00024 #include "search_list.h"
00025 #include "source.h"
00026 #include "symtab.h"
00027 #include "cg_arcs.h"
00028 #include "cg_print.h"
00029 #include "hist.h"
00030 #include "utils.h"
00031 #include "corefile.h"
00032 
00033 /* Return value of comparison functions used to sort tables.  */
00034 #define       LESSTHAN      -1
00035 #define       EQUALTO              0
00036 #define       GREATERTHAN   1
00037 
00038 static void print_header (void);
00039 static void print_cycle (Sym *);
00040 static int cmp_member (Sym *, Sym *);
00041 static void sort_members (Sym *);
00042 static void print_members (Sym *);
00043 static int cmp_arc (Arc *, Arc *);
00044 static void sort_parents (Sym *);
00045 static void print_parents (Sym *);
00046 static void sort_children (Sym *);
00047 static void print_children (Sym *);
00048 static void print_line (Sym *);
00049 static int cmp_name (const PTR, const PTR);
00050 static int cmp_arc_count (const PTR, const PTR);
00051 static int cmp_fun_nuses (const PTR, const PTR);
00052 static void order_and_dump_functions_by_arcs
00053   (Arc **, unsigned long, int, Arc **, unsigned long *);
00054 
00055 /* Declarations of automatically generated functions to output blurbs.  */
00056 extern void bsd_callg_blurb (FILE * fp);
00057 extern void fsf_callg_blurb (FILE * fp);
00058 
00059 double print_time = 0.0;
00060 
00061 
00062 static void
00063 print_header ()
00064 {
00065   if (first_output)
00066     first_output = FALSE;
00067   else
00068     printf ("\f\n");
00069 
00070   if (!bsd_style_output)
00071     {
00072       if (print_descriptions)
00073        printf (_("\t\t     Call graph (explanation follows)\n\n"));
00074       else
00075        printf (_("\t\t\tCall graph\n\n"));
00076     }
00077 
00078   printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
00079          (long) hist_scale * sizeof (UNIT));
00080 
00081   if (print_time > 0.0)
00082     printf (_(" for %.2f%% of %.2f seconds\n\n"),
00083            100.0 / print_time, print_time / hz);
00084   else
00085     {
00086       printf (_(" no time propagated\n\n"));
00087 
00088       /* This doesn't hurt, since all the numerators will be 0.0.  */
00089       print_time = 1.0;
00090     }
00091 
00092   if (bsd_style_output)
00093     {
00094       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
00095              "", "", "", "", _("called"), _("total"), _("parents"));
00096       printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
00097              _("index"), _("%time"), _("self"), _("descendants"),
00098              _("called"), _("self"), _("name"), _("index"));
00099       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
00100              "", "", "", "", _("called"), _("total"), _("children"));
00101       printf ("\n");
00102     }
00103   else
00104     {
00105       printf (_("index %% time    self  children    called     name\n"));
00106     }
00107 }
00108 
00109 /* Print a cycle header.  */
00110 
00111 static void
00112 print_cycle (Sym *cyc)
00113 {
00114   char buf[BUFSIZ];
00115 
00116   sprintf (buf, "[%d]", cyc->cg.index);
00117   printf (bsd_style_output
00118          ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
00119          : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
00120          100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
00121          cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
00122 
00123   if (cyc->cg.self_calls != 0)
00124     printf ("+%-7lu", cyc->cg.self_calls);
00125   else
00126     printf (" %7.7s", "");
00127 
00128   printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
00129 }
00130 
00131 /* Compare LEFT and RIGHT membmer.  Major comparison key is
00132    CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.  */
00133 
00134 static int
00135 cmp_member (Sym *left, Sym *right)
00136 {
00137   double left_time = left->cg.prop.self + left->cg.prop.child;
00138   double right_time = right->cg.prop.self + right->cg.prop.child;
00139   unsigned long left_calls = left->ncalls + left->cg.self_calls;
00140   unsigned long right_calls = right->ncalls + right->cg.self_calls;
00141 
00142   if (left_time > right_time)
00143     return GREATERTHAN;
00144 
00145   if (left_time < right_time)
00146     return LESSTHAN;
00147 
00148   if (left_calls > right_calls)
00149     return GREATERTHAN;
00150 
00151   if (left_calls < right_calls)
00152     return LESSTHAN;
00153 
00154   return EQUALTO;
00155 }
00156 
00157 /* Sort members of a cycle.  */
00158 
00159 static void
00160 sort_members (Sym *cyc)
00161 {
00162   Sym *todo, *doing, *prev;
00163 
00164   /* Detach cycle members from cyclehead,
00165      and insertion sort them back on.  */
00166   todo = cyc->cg.cyc.next;
00167   cyc->cg.cyc.next = 0;
00168 
00169   for (doing = todo; doing && doing->cg.cyc.next; doing = todo)
00170     {
00171       todo = doing->cg.cyc.next;
00172 
00173       for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
00174        {
00175          if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
00176            break;
00177        }
00178 
00179       doing->cg.cyc.next = prev->cg.cyc.next;
00180       prev->cg.cyc.next = doing;
00181     }
00182 }
00183 
00184 /* Print the members of a cycle.  */
00185 
00186 static void
00187 print_members (Sym *cyc)
00188 {
00189   Sym *member;
00190 
00191   sort_members (cyc);
00192 
00193   for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
00194     {
00195       printf (bsd_style_output
00196              ? "%6.6s %5.5s %7.2f %11.2f %7lu"
00197              : "%6.6s %5.5s %7.2f %7.2f %7lu",
00198              "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
00199              member->ncalls);
00200 
00201       if (member->cg.self_calls != 0)
00202        printf ("+%-7lu", member->cg.self_calls);
00203       else
00204        printf (" %7.7s", "");
00205 
00206       printf ("     ");
00207       print_name (member);
00208       printf ("\n");
00209     }
00210 }
00211 
00212 /* Compare two arcs to/from the same child/parent.
00213        - if one arc is a self arc, it's least.
00214        - if one arc is within a cycle, it's less than.
00215        - if both arcs are within a cycle, compare arc counts.
00216        - if neither arc is within a cycle, compare with
00217               time + child_time as major key
00218               arc count as minor key.  */
00219 
00220 static int
00221 cmp_arc (Arc *left, Arc *right)
00222 {
00223   Sym *left_parent = left->parent;
00224   Sym *left_child = left->child;
00225   Sym *right_parent = right->parent;
00226   Sym *right_child = right->child;
00227   double left_time, right_time;
00228 
00229   DBG (TIMEDEBUG,
00230        printf ("[cmp_arc] ");
00231        print_name (left_parent);
00232        printf (" calls ");
00233        print_name (left_child);
00234        printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
00235               left->count, left_child->ncalls);
00236        printf ("[cmp_arc] ");
00237        print_name (right_parent);
00238        printf (" calls ");
00239        print_name (right_child);
00240        printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
00241               right->count, right_child->ncalls);
00242        printf ("\n");
00243     );
00244 
00245   if (left_parent == left_child)
00246     return LESSTHAN;        /* Left is a self call.  */
00247 
00248   if (right_parent == right_child)
00249     return GREATERTHAN;            /* Right is a self call.  */
00250 
00251   if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
00252       && left_parent->cg.cyc.num == left_child->cg.cyc.num)
00253     {
00254       /* Left is a call within a cycle.  */
00255       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
00256          && right_parent->cg.cyc.num == right_child->cg.cyc.num)
00257        {
00258          /* Right is a call within the cycle, too.  */
00259          if (left->count < right->count)
00260            return LESSTHAN;
00261 
00262          if (left->count > right->count)
00263            return GREATERTHAN;
00264 
00265          return EQUALTO;
00266        }
00267       else
00268        {
00269          /* Right isn't a call within the cycle.  */
00270          return LESSTHAN;
00271        }
00272     }
00273   else
00274     {
00275       /* Left isn't a call within a cycle.  */
00276       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
00277          && right_parent->cg.cyc.num == right_child->cg.cyc.num)
00278        {
00279          /* Right is a call within a cycle.  */
00280          return GREATERTHAN;
00281        }
00282       else
00283        {
00284          /* Neither is a call within a cycle.  */
00285          left_time = left->time + left->child_time;
00286          right_time = right->time + right->child_time;
00287 
00288          if (left_time < right_time)
00289            return LESSTHAN;
00290 
00291          if (left_time > right_time)
00292            return GREATERTHAN;
00293 
00294          if (left->count < right->count)
00295            return LESSTHAN;
00296 
00297          if (left->count > right->count)
00298            return GREATERTHAN;
00299 
00300          return EQUALTO;
00301        }
00302     }
00303 }
00304 
00305 
00306 static void
00307 sort_parents (Sym * child)
00308 {
00309   Arc *arc, *detached, sorted, *prev;
00310 
00311   /* Unlink parents from child, then insertion sort back on to
00312      sorted's parents.
00313          *arc        the arc you have detached and are inserting.
00314          *detached   the rest of the arcs to be sorted.
00315          sorted      arc list onto which you insertion sort.
00316          *prev       arc before the arc you are comparing.  */
00317   sorted.next_parent = 0;
00318 
00319   for (arc = child->cg.parents; arc; arc = detached)
00320     {
00321       detached = arc->next_parent;
00322 
00323       /* Consider *arc as disconnected; insert it into sorted.  */
00324       for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
00325        {
00326          if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
00327            break;
00328        }
00329 
00330       arc->next_parent = prev->next_parent;
00331       prev->next_parent = arc;
00332     }
00333 
00334   /* Reattach sorted arcs to child.  */
00335   child->cg.parents = sorted.next_parent;
00336 }
00337 
00338 
00339 static void
00340 print_parents (Sym *child)
00341 {
00342   Sym *parent;
00343   Arc *arc;
00344   Sym *cycle_head;
00345 
00346   if (child->cg.cyc.head != 0)
00347     cycle_head = child->cg.cyc.head;
00348   else
00349     cycle_head = child;
00350 
00351   if (!child->cg.parents)
00352     {
00353       printf (bsd_style_output
00354              ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s     <spontaneous>\n")
00355              : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s     <spontaneous>\n"),
00356              "", "", "", "", "", "");
00357       return;
00358     }
00359 
00360   sort_parents (child);
00361 
00362   for (arc = child->cg.parents; arc; arc = arc->next_parent)
00363     {
00364       parent = arc->parent;
00365       if (child == parent || (child->cg.cyc.num != 0
00366                            && parent->cg.cyc.num == child->cg.cyc.num))
00367        {
00368          /* Selfcall or call among siblings.  */
00369          printf (bsd_style_output
00370                 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
00371                 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
00372                 "", "", "", "",
00373                 arc->count, "");
00374          print_name (parent);
00375          printf ("\n");
00376        }
00377       else
00378        {
00379          /* Regular parent of child.  */
00380          printf (bsd_style_output
00381                 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
00382                 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
00383                 "", "",
00384                 arc->time / hz, arc->child_time / hz,
00385                 arc->count, cycle_head->ncalls);
00386          print_name (parent);
00387          printf ("\n");
00388        }
00389     }
00390 }
00391 
00392 
00393 static void
00394 sort_children (Sym *parent)
00395 {
00396   Arc *arc, *detached, sorted, *prev;
00397 
00398   /* Unlink children from parent, then insertion sort back on to
00399      sorted's children.
00400          *arc        the arc you have detached and are inserting.
00401          *detached   the rest of the arcs to be sorted.
00402          sorted      arc list onto which you insertion sort.
00403          *prev       arc before the arc you are comparing.  */
00404   sorted.next_child = 0;
00405 
00406   for (arc = parent->cg.children; arc; arc = detached)
00407     {
00408       detached = arc->next_child;
00409 
00410       /* Consider *arc as disconnected; insert it into sorted.  */
00411       for (prev = &sorted; prev->next_child; prev = prev->next_child)
00412        {
00413          if (cmp_arc (arc, prev->next_child) != LESSTHAN)
00414            break;
00415        }
00416 
00417       arc->next_child = prev->next_child;
00418       prev->next_child = arc;
00419     }
00420 
00421   /* Reattach sorted children to parent.  */
00422   parent->cg.children = sorted.next_child;
00423 }
00424 
00425 
00426 static void
00427 print_children (Sym *parent)
00428 {
00429   Sym *child;
00430   Arc *arc;
00431 
00432   sort_children (parent);
00433   arc = parent->cg.children;
00434 
00435   for (arc = parent->cg.children; arc; arc = arc->next_child)
00436     {
00437       child = arc->child;
00438       if (child == parent || (child->cg.cyc.num != 0
00439                            && child->cg.cyc.num == parent->cg.cyc.num))
00440        {
00441          /* Self call or call to sibling.  */
00442          printf (bsd_style_output
00443                 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
00444                 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
00445                 "", "", "", "", arc->count, "");
00446          print_name (child);
00447          printf ("\n");
00448        }
00449       else
00450        {
00451          /* Regular child of parent.  */
00452          printf (bsd_style_output
00453                 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
00454                 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
00455                 "", "",
00456                 arc->time / hz, arc->child_time / hz,
00457                 arc->count, child->cg.cyc.head->ncalls);
00458          print_name (child);
00459          printf ("\n");
00460        }
00461     }
00462 }
00463 
00464 
00465 static void
00466 print_line (Sym *np)
00467 {
00468   char buf[BUFSIZ];
00469 
00470   sprintf (buf, "[%d]", np->cg.index);
00471   printf (bsd_style_output
00472          ? "%-6.6s %5.1f %7.2f %11.2f"
00473          : "%-6.6s %5.1f %7.2f %7.2f", buf,
00474          100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
00475          np->cg.prop.self / hz, np->cg.prop.child / hz);
00476 
00477   if ((np->ncalls + np->cg.self_calls) != 0)
00478     {
00479       printf (" %7lu", np->ncalls);
00480 
00481       if (np->cg.self_calls != 0)
00482          printf ("+%-7lu ", np->cg.self_calls);
00483       else
00484          printf (" %7.7s ", "");
00485     }
00486   else
00487     {
00488       printf (" %7.7s %7.7s ", "", "");
00489     }
00490 
00491   print_name (np);
00492   printf ("\n");
00493 }
00494 
00495 
00496 /* Print dynamic call graph.  */
00497 
00498 void
00499 cg_print (Sym ** timesortsym)
00500 {
00501   unsigned int index;
00502   Sym *parent;
00503 
00504   if (print_descriptions && bsd_style_output)
00505     bsd_callg_blurb (stdout);
00506 
00507   print_header ();
00508 
00509   for (index = 0; index < symtab.len + num_cycles; ++index)
00510     {
00511       parent = timesortsym[index];
00512 
00513       if ((ignore_zeros && parent->ncalls == 0
00514           && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
00515           && parent->cg.prop.child == 0)
00516          || !parent->cg.print_flag
00517          || (line_granularity && ! parent->is_func))
00518        continue;
00519 
00520       if (!parent->name && parent->cg.cyc.num != 0)
00521        {
00522          /* Cycle header.  */
00523          print_cycle (parent);
00524          print_members (parent);
00525        }
00526       else
00527        {
00528          print_parents (parent);
00529          print_line (parent);
00530          print_children (parent);
00531        }
00532 
00533       if (bsd_style_output)
00534        printf ("\n");
00535 
00536       printf ("-----------------------------------------------\n");
00537 
00538       if (bsd_style_output)
00539        printf ("\n");
00540     }
00541 
00542   free (timesortsym);
00543 
00544   if (print_descriptions && !bsd_style_output)
00545     fsf_callg_blurb (stdout);
00546 }
00547 
00548 
00549 static int
00550 cmp_name (const PTR left, const PTR right)
00551 {
00552   const Sym **npp1 = (const Sym **) left;
00553   const Sym **npp2 = (const Sym **) right;
00554 
00555   return strcmp ((*npp1)->name, (*npp2)->name);
00556 }
00557 
00558 
00559 void
00560 cg_print_index ()
00561 {
00562   unsigned int index;
00563   unsigned int nnames, todo, i, j;
00564   int col, starting_col;
00565   Sym **name_sorted_syms, *sym;
00566   const char *filename;
00567   char buf[20];
00568   int column_width = (output_width - 1) / 3;     /* Don't write in last col!  */
00569 
00570   /* Now, sort regular function name
00571      alphabetically to create an index.  */
00572   name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
00573 
00574   for (index = 0, nnames = 0; index < symtab.len; index++)
00575     {
00576       if (ignore_zeros && symtab.base[index].ncalls == 0
00577          && symtab.base[index].hist.time == 0)
00578        continue;
00579 
00580       name_sorted_syms[nnames++] = &symtab.base[index];
00581     }
00582 
00583   qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
00584 
00585   for (index = 1, todo = nnames; index <= num_cycles; index++)
00586     name_sorted_syms[todo++] = &cycle_header[index];
00587 
00588   printf ("\f\n");
00589   printf (_("Index by function name\n\n"));
00590   index = (todo + 2) / 3;
00591 
00592   for (i = 0; i < index; i++)
00593     {
00594       col = 0;
00595       starting_col = 0;
00596 
00597       for (j = i; j < todo; j += index)
00598        {
00599          sym = name_sorted_syms[j];
00600 
00601          if (sym->cg.print_flag)
00602            sprintf (buf, "[%d]", sym->cg.index);
00603          else
00604            sprintf (buf, "(%d)", sym->cg.index);
00605 
00606          if (j < nnames)
00607            {
00608              if (bsd_style_output)
00609               {
00610                 printf ("%6.6s %-19.19s", buf, sym->name);
00611               }
00612              else
00613               {
00614                 col += strlen (buf);
00615 
00616                 for (; col < starting_col + 5; ++col)
00617                   putchar (' ');
00618 
00619                 printf (" %s ", buf);
00620                 col += print_name_only (sym);
00621 
00622                 if (!line_granularity && sym->is_static && sym->file)
00623                   {
00624                     filename = sym->file->name;
00625 
00626                     if (!print_path)
00627                      {
00628                        filename = strrchr (filename, '/');
00629 
00630                        if (filename)
00631                          ++filename;
00632                        else
00633                          filename = sym->file->name;
00634                      }
00635 
00636                     printf (" (%s)", filename);
00637                     col += strlen (filename) + 3;
00638                   }
00639               }
00640            }
00641          else
00642            {
00643              if (bsd_style_output)
00644               {
00645                 printf ("%6.6s ", buf);
00646                 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
00647                 printf ("%-19.19s", buf);
00648               }
00649              else
00650               {
00651                 col += strlen (buf);
00652                 for (; col < starting_col + 5; ++col)
00653                   putchar (' ');
00654                 printf (" %s ", buf);
00655                 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
00656                 printf ("%s", buf);
00657                 col += strlen (buf);
00658               }
00659            }
00660 
00661          starting_col += column_width;
00662        }
00663 
00664       printf ("\n");
00665     }
00666 
00667   free (name_sorted_syms);
00668 }
00669 
00670 /* Compare two arcs based on their usage counts.
00671    We want to sort in descending order.  */
00672 
00673 static int
00674 cmp_arc_count (const PTR left, const PTR right)
00675 {
00676   const Arc **npp1 = (const Arc **) left;
00677   const Arc **npp2 = (const Arc **) right;
00678 
00679   if ((*npp1)->count > (*npp2)->count)
00680     return -1;
00681   else if ((*npp1)->count < (*npp2)->count)
00682     return 1;
00683   else
00684     return 0;
00685 }
00686 
00687 /* Compare two funtions based on their usage counts.
00688    We want to sort in descending order.  */
00689 
00690 static int
00691 cmp_fun_nuses (const PTR left, const PTR right)
00692 {
00693   const Sym **npp1 = (const Sym **) left;
00694   const Sym **npp2 = (const Sym **) right;
00695 
00696   if ((*npp1)->nuses > (*npp2)->nuses)
00697     return -1;
00698   else if ((*npp1)->nuses < (*npp2)->nuses)
00699     return 1;
00700   else
00701     return 0;
00702 }
00703 
00704 /* Print a suggested function ordering based on the profiling data.
00705 
00706    We perform 4 major steps when ordering functions:
00707 
00708        * Group unused functions together and place them at the
00709        end of the function order.
00710 
00711        * Search the highest use arcs (those which account for 90% of
00712        the total arc count) for functions which have several parents.
00713 
00714        Group those with the most call sites together (currently the
00715        top 1.25% which have at least five different call sites).
00716 
00717        These are emitted at the start of the function order.
00718 
00719        * Use a greedy placement algorithm to place functions which
00720        occur in the top 99% of the arcs in the profile.  Some provisions
00721        are made to handle high usage arcs where the parent and/or
00722        child has already been placed.
00723 
00724        * Run the same greedy placement algorithm on the remaining
00725        arcs to place the leftover functions.
00726 
00727 
00728    The various "magic numbers" should (one day) be tuneable by command
00729    line options.  They were arrived at by benchmarking a few applications
00730    with various values to see which values produced better overall function
00731    orderings.
00732 
00733    Of course, profiling errors, machine limitations (PA long calls), and
00734    poor cutoff values for the placement algorithm may limit the usefullness
00735    of the resulting function order.  Improvements would be greatly appreciated.
00736 
00737    Suggestions:
00738 
00739        * Place the functions with many callers near the middle of the
00740        list to reduce long calls.
00741 
00742        * Propagate arc usage changes as functions are placed.  Ie if
00743        func1 and func2 are placed together, arcs to/from those arcs
00744        to the same parent/child should be combined, then resort the
00745        arcs to choose the next one.
00746 
00747        * Implement some global positioning algorithm to place the
00748        chains made by the greedy local positioning algorithm.  Probably
00749        by examining arcs which haven't been placed yet to tie two
00750        chains together.
00751 
00752        * Take a function's size and time into account in the algorithm;
00753        size in particular is important on the PA (long calls).  Placing
00754        many small functions onto their own page may be wise.
00755 
00756        * Use better profiling information; many published algorithms
00757        are based on call sequences through time, rather than just
00758        arc counts.
00759 
00760        * Prodecure cloning could improve performance when a small number
00761        of arcs account for most of the calls to a particular function.
00762 
00763        * Use relocation information to avoid moving unused functions
00764        completely out of the code stream; this would avoid severe lossage
00765        when the profile data bears little resemblance to actual runs.
00766 
00767        * Propagation of arc usages should also improve .o link line
00768        ordering which shares the same arc placement algorithm with
00769        the function ordering code (in fact it is a degenerate case
00770        of function ordering).  */
00771 
00772 void
00773 cg_print_function_ordering ()
00774 {
00775   unsigned long index, used, unused, scratch_index;
00776   unsigned long  unplaced_arc_count, high_arc_count, scratch_arc_count;
00777 #ifdef __GNUC__
00778   unsigned long long total_arcs, tmp_arcs_count;
00779 #else
00780   unsigned long total_arcs, tmp_arcs_count;
00781 #endif
00782   Sym **unused_syms, **used_syms, **scratch_syms;
00783   Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
00784 
00785   index = 0;
00786   used = 0;
00787   unused = 0;
00788   scratch_index = 0;
00789   unplaced_arc_count = 0;
00790   high_arc_count = 0;
00791   scratch_arc_count = 0;
00792 
00793   /* First group all the unused functions together.  */
00794   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
00795   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
00796   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
00797   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
00798   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
00799   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
00800 
00801   /* Walk through all the functions; mark those which are never
00802      called as placed (we'll emit them as a group later).  */
00803   for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
00804     {
00805       if (symtab.base[index].ncalls == 0)
00806        {
00807          /* Filter out gprof generated names.  */
00808          if (strcmp (symtab.base[index].name, "<locore>")
00809              && strcmp (symtab.base[index].name, "<hicore>"))
00810            {
00811              unused_syms[unused++] = &symtab.base[index];
00812              symtab.base[index].has_been_placed = 1;
00813            }
00814        }
00815       else
00816        {
00817          used_syms[used++] = &symtab.base[index];
00818          symtab.base[index].has_been_placed = 0;
00819          symtab.base[index].next = 0;
00820          symtab.base[index].prev = 0;
00821          symtab.base[index].nuses = 0;
00822        }
00823     }
00824 
00825   /* Sort the arcs from most used to least used.  */
00826   qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
00827 
00828   /* Compute the total arc count.  Also mark arcs as unplaced.
00829 
00830      Note we don't compensate for overflow if that happens!
00831      Overflow is much less likely when this file is compiled
00832      with GCC as it can double-wide integers via long long.  */
00833   total_arcs = 0;
00834   for (index = 0; index < numarcs; index++)
00835     {
00836       total_arcs += arcs[index]->count;
00837       arcs[index]->has_been_placed = 0;
00838     }
00839 
00840   /* We want to pull out those functions which are referenced
00841      by many highly used arcs and emit them as a group.  This
00842      could probably use some tuning.  */
00843   tmp_arcs_count = 0;
00844   for (index = 0; index < numarcs; index++)
00845     {
00846       tmp_arcs_count += arcs[index]->count;
00847 
00848       /* Count how many times each parent and child are used up
00849         to our threshhold of arcs (90%).  */
00850       if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
00851        break;
00852 
00853       arcs[index]->child->nuses++;
00854     }
00855 
00856   /* Now sort a temporary symbol table based on the number of
00857      times each function was used in the highest used arcs.  */
00858   memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
00859   qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
00860 
00861   /* Now pick out those symbols we're going to emit as
00862      a group.  We take up to 1.25% of the used symbols.  */
00863   for (index = 0; index < used / 80; index++)
00864     {
00865       Sym *sym = scratch_syms[index];
00866       Arc *arc;
00867 
00868       /* If we hit symbols that aren't used from many call sites,
00869         then we can quit.  We choose five as the low limit for
00870         no particular reason.  */
00871       if (sym->nuses == 5)
00872        break;
00873 
00874       /* We're going to need the arcs between these functions.
00875         Unfortunately, we don't know all these functions
00876         until we're done.  So we keep track of all the arcs
00877         to the functions we care about, then prune out those
00878         which are uninteresting.
00879 
00880         An interesting variation would be to quit when we found
00881         multi-call site functions which account for some percentage
00882         of the arcs.  */
00883       arc = sym->cg.children;
00884 
00885       while (arc)
00886        {
00887          if (arc->parent != arc->child)
00888            scratch_arcs[scratch_arc_count++] = arc;
00889          arc->has_been_placed = 1;
00890          arc = arc->next_child;
00891        }
00892 
00893       arc = sym->cg.parents;
00894 
00895       while (arc)
00896        {
00897          if (arc->parent != arc->child)
00898            scratch_arcs[scratch_arc_count++] = arc;
00899          arc->has_been_placed = 1;
00900          arc = arc->next_parent;
00901        }
00902 
00903       /* Keep track of how many symbols we're going to place.  */
00904       scratch_index = index;
00905 
00906       /* A lie, but it makes identifying
00907         these functions easier later.  */
00908       sym->has_been_placed = 1;
00909     }
00910 
00911   /* Now walk through the temporary arcs and copy
00912      those we care about into the high arcs array.  */
00913   for (index = 0; index < scratch_arc_count; index++)
00914     {
00915       Arc *arc = scratch_arcs[index];
00916 
00917       /* If this arc refers to highly used functions, then
00918         then we want to keep it.  */
00919       if (arc->child->has_been_placed
00920          && arc->parent->has_been_placed)
00921        {
00922          high_arcs[high_arc_count++] = scratch_arcs[index];
00923 
00924          /* We need to turn of has_been_placed since we're going to
00925             use the main arc placement algorithm on these arcs.  */
00926          arc->child->has_been_placed = 0;
00927          arc->parent->has_been_placed = 0;
00928        }
00929     }
00930 
00931   /* Dump the multi-site high usage functions which are not
00932      going to be ordered by the main ordering algorithm.  */
00933   for (index = 0; index < scratch_index; index++)
00934     {
00935       if (scratch_syms[index]->has_been_placed)
00936        printf ("%s\n", scratch_syms[index]->name);
00937     }
00938 
00939   /* Now we can order the multi-site high use
00940      functions based on the arcs between them.  */
00941   qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
00942   order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
00943                                 unplaced_arcs, &unplaced_arc_count);
00944 
00945   /* Order and dump the high use functions left,
00946      these typically have only a few call sites.  */
00947   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
00948                                 unplaced_arcs, &unplaced_arc_count);
00949 
00950   /* Now place the rarely used functions.  */
00951   order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
00952                                 scratch_arcs, &scratch_arc_count);
00953 
00954   /* Output any functions not emitted by the order_and_dump calls.  */
00955   for (index = 0; index < used; index++)
00956     if (used_syms[index]->has_been_placed == 0)
00957       printf("%s\n", used_syms[index]->name);
00958 
00959   /* Output the unused functions.  */
00960   for (index = 0; index < unused; index++)
00961     printf("%s\n", unused_syms[index]->name);
00962 
00963   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
00964   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
00965   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
00966   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
00967   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
00968   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
00969 
00970   free (unused_syms);
00971   free (used_syms);
00972   free (scratch_syms);
00973   free (high_arcs);
00974   free (scratch_arcs);
00975   free (unplaced_arcs);
00976 }
00977 
00978 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
00979    place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
00980 
00981    If ALL is nonzero, then place all functions referenced by THE_ARCS,
00982    else only place those referenced in the top 99% of the arcs in THE_ARCS.  */
00983 
00984 #define MOST 0.99
00985 static void
00986 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
00987                               unplaced_arcs, unplaced_arc_count)
00988      Arc **the_arcs;
00989      unsigned long arc_count;
00990      int all;
00991      Arc **unplaced_arcs;
00992      unsigned long *unplaced_arc_count;
00993 {
00994 #ifdef __GNUC__
00995   unsigned long long tmp_arcs, total_arcs;
00996 #else
00997   unsigned long tmp_arcs, total_arcs;
00998 #endif
00999   unsigned int index;
01000 
01001   /* If needed, compute the total arc count.
01002 
01003      Note we don't compensate for overflow if that happens!  */
01004   if (! all)
01005     {
01006       total_arcs = 0;
01007       for (index = 0; index < arc_count; index++)
01008        total_arcs += the_arcs[index]->count;
01009     }
01010   else
01011     total_arcs = 0;
01012 
01013   tmp_arcs = 0;
01014 
01015   for (index = 0; index < arc_count; index++)
01016     {
01017       Sym *sym1, *sym2;
01018       Sym *child, *parent;
01019 
01020       tmp_arcs += the_arcs[index]->count;
01021 
01022       /* Ignore this arc if it's already been placed.  */
01023       if (the_arcs[index]->has_been_placed)
01024        continue;
01025 
01026       child = the_arcs[index]->child;
01027       parent = the_arcs[index]->parent;
01028 
01029       /* If we're not using all arcs, and this is a rarely used
01030         arc, then put it on the unplaced_arc list.  Similarly
01031         if both the parent and child of this arc have been placed.  */
01032       if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
01033          || child->has_been_placed || parent->has_been_placed)
01034        {
01035          unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
01036          continue;
01037        }
01038 
01039       /* If all slots in the parent and child are full, then there isn't
01040         anything we can do right now.  We'll place this arc on the
01041         unplaced arc list in the hope that a global positioning
01042         algorithm can use it to place function chains.  */
01043       if (parent->next && parent->prev && child->next && child->prev)
01044        {
01045          unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
01046          continue;
01047        }
01048 
01049       /* If the parent is unattached, then find the closest
01050         place to attach it onto child's chain.   Similarly
01051         for the opposite case.  */
01052       if (!parent->next && !parent->prev)
01053        {
01054          int next_count = 0;
01055          int prev_count = 0;
01056          Sym *prev = child;
01057          Sym *next = child;
01058 
01059          /* Walk to the beginning and end of the child's chain.  */
01060          while (next->next)
01061            {
01062              next = next->next;
01063              next_count++;
01064            }
01065 
01066          while (prev->prev)
01067            {
01068              prev = prev->prev;
01069              prev_count++;
01070            }
01071 
01072          /* Choose the closest.  */
01073          child = next_count < prev_count ? next : prev;
01074        }
01075       else if (! child->next && !child->prev)
01076        {
01077          int next_count = 0;
01078          int prev_count = 0;
01079          Sym *prev = parent;
01080          Sym *next = parent;
01081 
01082          while (next->next)
01083            {
01084              next = next->next;
01085              next_count++;
01086            }
01087 
01088          while (prev->prev)
01089            {
01090              prev = prev->prev;
01091              prev_count++;
01092            }
01093 
01094          parent = prev_count < next_count ? prev : next;
01095        }
01096       else
01097        {
01098          /* Couldn't find anywhere to attach the functions,
01099             put the arc on the unplaced arc list.  */
01100          unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
01101          continue;
01102        }
01103 
01104       /* Make sure we don't tie two ends together.  */
01105       sym1 = parent;
01106       if (sym1->next)
01107        while (sym1->next)
01108          sym1 = sym1->next;
01109       else
01110        while (sym1->prev)
01111          sym1 = sym1->prev;
01112 
01113       sym2 = child;
01114       if (sym2->next)
01115        while (sym2->next)
01116          sym2 = sym2->next;
01117       else
01118        while (sym2->prev)
01119          sym2 = sym2->prev;
01120 
01121       if (sym1 == child
01122          && sym2 == parent)
01123        {
01124          /* This would tie two ends together.  */
01125          unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
01126          continue;
01127        }
01128 
01129       if (parent->next)
01130        {
01131          /* Must attach to the parent's prev field.  */
01132          if (! child->next)
01133            {
01134              /* parent-prev and child-next */
01135              parent->prev = child;
01136              child->next = parent;
01137              the_arcs[index]->has_been_placed = 1;
01138            }
01139        }
01140       else if (parent->prev)
01141        {
01142          /* Must attach to the parent's next field.  */
01143          if (! child->prev)
01144            {
01145              /* parent-next and child-prev */
01146              parent->next = child;
01147              child->prev = parent;
01148              the_arcs[index]->has_been_placed = 1;
01149            }
01150        }
01151       else
01152        {
01153          /* Can attach to either field in the parent, depends
01154             on where we've got space in the child.  */
01155          if (child->prev)
01156            {
01157              /* parent-prev and child-next.  */
01158              parent->prev = child;
01159              child->next = parent;
01160              the_arcs[index]->has_been_placed = 1;
01161            }
01162          else
01163            {
01164              /* parent-next and child-prev.  */
01165              parent->next = child;
01166              child->prev = parent;
01167              the_arcs[index]->has_been_placed = 1;
01168            }
01169        }
01170     }
01171 
01172   /* Dump the chains of functions we've made.  */
01173   for (index = 0; index < arc_count; index++)
01174     {
01175       Sym *sym;
01176       if (the_arcs[index]->parent->has_been_placed
01177          || the_arcs[index]->child->has_been_placed)
01178        continue;
01179 
01180       sym = the_arcs[index]->parent;
01181 
01182       /* If this symbol isn't attached to any other
01183         symbols, then we've got a rarely used arc.
01184 
01185         Skip it for now, we'll deal with them later.  */
01186       if (sym->next == NULL
01187          && sym->prev == NULL)
01188        continue;
01189 
01190       /* Get to the start of this chain.  */
01191       while (sym->prev)
01192        sym = sym->prev;
01193 
01194       while (sym)
01195        {
01196          /* Mark it as placed.  */
01197          sym->has_been_placed = 1;
01198          printf ("%s\n", sym->name);
01199          sym = sym->next;
01200        }
01201     }
01202 
01203   /* If we want to place all the arcs, then output
01204      those which weren't placed by the main algorithm.  */
01205   if (all)
01206     for (index = 0; index < arc_count; index++)
01207       {
01208        Sym *sym;
01209        if (the_arcs[index]->parent->has_been_placed
01210            || the_arcs[index]->child->has_been_placed)
01211          continue;
01212 
01213        sym = the_arcs[index]->parent;
01214 
01215        sym->has_been_placed = 1;
01216        printf ("%s\n", sym->name);
01217       }
01218 }
01219 
01220 /* Print a suggested .o ordering for files on a link line based
01221    on profiling information.  This uses the function placement
01222    code for the bulk of its work.  */
01223 
01224 void
01225 cg_print_file_ordering ()
01226 {
01227   unsigned long scratch_arc_count, index;
01228   Arc **scratch_arcs;
01229   char *last;
01230 
01231   scratch_arc_count = 0;
01232 
01233   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
01234   for (index = 0; index < numarcs; index++)
01235     {
01236       if (! arcs[index]->parent->mapped
01237          || ! arcs[index]->child->mapped)
01238        arcs[index]->has_been_placed = 1;
01239     }
01240 
01241   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
01242                                 scratch_arcs, &scratch_arc_count);
01243 
01244   /* Output .o's not handled by the main placement algorithm.  */
01245   for (index = 0; index < symtab.len; index++)
01246     {
01247       if (symtab.base[index].mapped
01248          && ! symtab.base[index].has_been_placed)
01249        printf ("%s\n", symtab.base[index].name);
01250     }
01251 
01252   /* Now output any .o's that didn't have any text symbols.  */
01253   last = NULL;
01254   for (index = 0; index < symbol_map_count; index++)
01255     {
01256       unsigned int index2;
01257 
01258       /* Don't bother searching if this symbol
01259         is the same as the previous one.  */
01260       if (last && !strcmp (last, symbol_map[index].file_name))
01261        continue;
01262 
01263       for (index2 = 0; index2 < symtab.len; index2++)
01264        {
01265          if (! symtab.base[index2].mapped)
01266            continue;
01267 
01268          if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
01269            break;
01270        }
01271 
01272       /* If we didn't find it in the symbol table, then it must
01273         be a .o with no text symbols.  Output it last.  */
01274       if (index2 == symtab.len)
01275        printf ("%s\n", symbol_map[index].file_name);
01276       last = symbol_map[index].file_name;
01277     }
01278 }