Back to index

glibc  2.9
perf.c
Go to the documentation of this file.
00001 /* Copyright (C) 2002, 2005 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
00004 
00005    The GNU C Library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009 
00010    The GNU C Library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Lesser General Public License for more details.
00014 
00015    You should have received a copy of the GNU Lesser General Public
00016    License along with the GNU C Library; if not, write to the Free
00017    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00018    02111-1307 USA.  */
00019 
00020 #define _GNU_SOURCE  1
00021 #include <argp.h>
00022 #include <error.h>
00023 #include <errno.h>
00024 #include <fcntl.h>
00025 #include <inttypes.h>
00026 #include <limits.h>
00027 #include <pthread.h>
00028 #include <signal.h>
00029 #include <stdbool.h>
00030 #include <stdlib.h>
00031 #include <string.h>
00032 #include <time.h>
00033 #include <unistd.h>
00034 #include <sys/param.h>
00035 #include <sys/types.h>
00036 
00037 #ifndef MAX_THREADS
00038 # define MAX_THREADS        100000
00039 #endif
00040 #ifndef DEFAULT_THREADS
00041 # define DEFAULT_THREADS    50
00042 #endif
00043 
00044 
00045 #define OPT_TO_THREAD              300
00046 #define OPT_TO_PROCESS             301
00047 #define OPT_SYNC_SIGNAL            302
00048 #define OPT_SYNC_JOIN              303
00049 #define OPT_TOPLEVEL        304
00050 
00051 
00052 static const struct argp_option options[] =
00053   {
00054     { NULL, 0, NULL, 0, "\
00055 This is a test for threads so we allow ther user to selection the number of \
00056 threads which are used at any one time.  Independently the total number of \
00057 rounds can be selected.  This is the total number of threads which will have \
00058 run when the process terminates:" },
00059     { "threads", 't', "NUMBER", 0, "Number of threads used at once" },
00060     { "starts", 's', "NUMBER", 0, "Total number of working threads" },
00061     { "toplevel", OPT_TOPLEVEL, "NUMBER", 0,
00062       "Number of toplevel threads which start the other threads; this \
00063 implies --sync-join" },
00064 
00065     { NULL, 0, NULL, 0, "\
00066 Each thread can do one of two things: sleep or do work.  The latter is 100% \
00067 CPU bound.  The work load is the probability a thread does work.  All values \
00068 from zero to 100 (inclusive) are valid.  How often each thread repeats this \
00069 can be determined by the number of rounds.  The work cost determines how long \
00070 each work session (not sleeping) takes.  If it is zero a thread would \
00071 effectively nothing.  By setting the number of rounds to zero the thread \
00072 does no work at all and pure thread creation times can be measured." },
00073     { "workload", 'w', "PERCENT", 0, "Percentage of time spent working" },
00074     { "workcost", 'c', "NUMBER", 0,
00075       "Factor in the cost of each round of working" },
00076     { "rounds", 'r', "NUMBER", 0, "Number of rounds each thread runs" },
00077 
00078     { NULL, 0, NULL, 0, "\
00079 There are a number of different methods how thread creation can be \
00080 synchronized.  Synchronization is necessary since the number of concurrently \
00081 running threads is limited." },
00082     { "sync-signal", OPT_SYNC_SIGNAL, NULL, 0,
00083       "Synchronize using a signal (default)" },
00084     { "sync-join", OPT_SYNC_JOIN, NULL, 0, "Synchronize using pthread_join" },
00085 
00086     { NULL, 0, NULL, 0, "\
00087 One parameter for each threads execution is the size of the stack.  If this \
00088 parameter is not used the system's default stack size is used.  If many \
00089 threads are used the stack size should be chosen quite small." },
00090     { "stacksize", 'S', "BYTES", 0, "Size of threads stack" },
00091     { "guardsize", 'g', "BYTES", 0,
00092       "Size of stack guard area; must fit into the stack" },
00093 
00094     { NULL, 0, NULL, 0, "Signal options:" },
00095     { "to-thread", OPT_TO_THREAD, NULL, 0, "Send signal to main thread" },
00096     { "to-process", OPT_TO_PROCESS, NULL, 0,
00097       "Send signal to process (default)" },
00098 
00099     { NULL, 0, NULL, 0, "Administrative options:" },
00100     { "progress", 'p', NULL, 0, "Show signs of progress" },
00101     { "timing", 'T', NULL, 0,
00102       "Measure time from startup to the last thread finishing" },
00103     { NULL, 0, NULL, 0, NULL }
00104   };
00105 
00106 /* Prototype for option handler.  */
00107 static error_t parse_opt (int key, char *arg, struct argp_state *state);
00108 
00109 /* Data structure to communicate with argp functions.  */
00110 static struct argp argp =
00111 {
00112   options, parse_opt
00113 };
00114 
00115 
00116 static unsigned long int threads = DEFAULT_THREADS;
00117 static unsigned long int workload = 75;
00118 static unsigned long int workcost = 20;
00119 static unsigned long int rounds = 10;
00120 static long int starts = 5000;
00121 static unsigned long int stacksize;
00122 static long int guardsize = -1;
00123 static bool progress;
00124 static bool timing;
00125 static bool to_thread;
00126 static unsigned long int toplevel = 1;
00127 
00128 
00129 static long int running;
00130 static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER;
00131 
00132 static pid_t pid;
00133 static pthread_t tmain;
00134 
00135 static clockid_t cl;
00136 static struct timespec start_time;
00137 
00138 
00139 static pthread_mutex_t sum_mutex = PTHREAD_MUTEX_INITIALIZER;
00140 unsigned int sum;
00141 
00142 static enum
00143   {
00144     sync_signal,
00145     sync_join
00146   }
00147 sync_method;
00148 
00149 
00150 /* We use 64bit values for the times.  */
00151 typedef unsigned long long int hp_timing_t;
00152 
00153 
00154 /* Attributes for all created threads.  */
00155 static pthread_attr_t attr;
00156 
00157 
00158 static void *
00159 work (void *arg)
00160 {
00161   unsigned long int i;
00162   unsigned int state = (unsigned long int) arg;
00163 
00164   for (i = 0; i < rounds; ++i)
00165     {
00166       /* Determine what to do.  */
00167       unsigned int rnum;
00168 
00169       /* Uniform distribution.  */
00170       do
00171        rnum = rand_r (&state);
00172       while (rnum >= UINT_MAX - (UINT_MAX % 100));
00173 
00174       rnum %= 100;
00175 
00176       if (rnum < workload)
00177        {
00178          int j;
00179          int a[4] = { i, rnum, i + rnum, rnum - i };
00180 
00181          if (progress)
00182            write (STDERR_FILENO, "c", 1);
00183 
00184          for (j = 0; j < workcost; ++j)
00185            {
00186              a[0] += a[3] >> 12;
00187              a[1] += a[2] >> 20;
00188              a[2] += a[1] ^ 0x3423423;
00189              a[3] += a[0] - a[1];
00190            }
00191 
00192          pthread_mutex_lock (&sum_mutex);
00193          sum += a[0] + a[1] + a[2] + a[3];
00194          pthread_mutex_unlock (&sum_mutex);
00195        }
00196       else
00197        {
00198          /* Just sleep.  */
00199          struct timespec tv;
00200 
00201          tv.tv_sec = 0;
00202          tv.tv_nsec = 10000000;
00203 
00204          if (progress)
00205            write (STDERR_FILENO, "w", 1);
00206 
00207          nanosleep (&tv, NULL);
00208        }
00209     }
00210 
00211   return NULL;
00212 }
00213 
00214 
00215 static void *
00216 thread_function (void *arg)
00217 {
00218   work (arg);
00219 
00220   pthread_mutex_lock (&running_mutex);
00221   if (--running <= 0 && starts <= 0)
00222     {
00223       /* We are done.  */
00224       if (progress)
00225        write (STDERR_FILENO, "\n", 1);
00226 
00227       if (timing)
00228        {
00229          struct timespec end_time;
00230 
00231          if (clock_gettime (cl, &end_time) == 0)
00232            {
00233              end_time.tv_sec -= start_time.tv_sec;
00234              end_time.tv_nsec -= start_time.tv_nsec;
00235              if (end_time.tv_nsec < 0)
00236               {
00237                 end_time.tv_nsec += 1000000000;
00238                 --end_time.tv_sec;
00239               }
00240 
00241              printf ("\nRuntime: %lu.%09lu seconds\n",
00242                     (unsigned long int) end_time.tv_sec,
00243                     (unsigned long int) end_time.tv_nsec);
00244            }
00245        }
00246 
00247       printf ("Result: %08x\n", sum);
00248 
00249       exit (0);
00250     }
00251   pthread_mutex_unlock (&running_mutex);
00252 
00253   if (sync_method == sync_signal)
00254     {
00255       if (to_thread)
00256        /* This code sends a signal to the main thread.  */
00257        pthread_kill (tmain, SIGUSR1);
00258       else
00259        /* Use this code to test sending a signal to the process.  */
00260        kill (pid, SIGUSR1);
00261     }
00262 
00263   if (progress)
00264     write (STDERR_FILENO, "f", 1);
00265 
00266   return NULL;
00267 }
00268 
00269 
00270 struct start_info
00271 {
00272   unsigned int starts;
00273   unsigned int threads;
00274 };
00275 
00276 
00277 static void *
00278 start_threads (void *arg)
00279 {
00280   struct start_info *si = arg;
00281   unsigned int starts = si->starts;
00282   pthread_t ths[si->threads];
00283   unsigned int state = starts;
00284   unsigned int n;
00285   unsigned int i = 0;
00286   int err;
00287 
00288   if (progress)
00289     write (STDERR_FILENO, "T", 1);
00290 
00291   memset (ths, '\0', sizeof (pthread_t) * si->threads);
00292 
00293   while (starts-- > 0)
00294     {
00295       if (ths[i] != 0)
00296        {
00297          /* Wait for the threads in the order they were created.  */
00298          err = pthread_join (ths[i], NULL);
00299          if (err != 0)
00300            error (EXIT_FAILURE, err, "cannot join thread");
00301 
00302          if (progress)
00303            write (STDERR_FILENO, "f", 1);
00304        }
00305 
00306       err = pthread_create (&ths[i], &attr, work,
00307                          (void *) (long) (rand_r (&state) + starts + i));
00308 
00309       if (err != 0)
00310        error (EXIT_FAILURE, err, "cannot start thread");
00311 
00312       if (progress)
00313        write (STDERR_FILENO, "t", 1);
00314 
00315       if (++i == si->threads)
00316        i = 0;
00317     }
00318 
00319   n = i;
00320   do
00321     {
00322       if (ths[i] != 0)
00323        {
00324          err = pthread_join (ths[i], NULL);
00325          if (err != 0)
00326            error (EXIT_FAILURE, err, "cannot join thread");
00327 
00328          if (progress)
00329            write (STDERR_FILENO, "f", 1);
00330        }
00331 
00332       if (++i == si->threads)
00333        i = 0;
00334     }
00335   while (i != n);
00336 
00337   if (progress)
00338     write (STDERR_FILENO, "F", 1);
00339 
00340   return NULL;
00341 }
00342 
00343 
00344 int
00345 main (int argc, char *argv[])
00346 {
00347   int remaining;
00348   sigset_t ss;
00349   pthread_t th;
00350   pthread_t *ths = NULL;
00351   int empty = 0;
00352   int last;
00353   bool cont = true;
00354 
00355   /* Parse and process arguments.  */
00356   argp_parse (&argp, argc, argv, 0, &remaining, NULL);
00357 
00358   if (sync_method == sync_join)
00359     {
00360       ths = (pthread_t *) calloc (threads, sizeof (pthread_t));
00361       if (ths == NULL)
00362        error (EXIT_FAILURE, errno,
00363               "cannot allocate memory for thread descriptor array");
00364 
00365       last = threads;
00366     }
00367   else
00368     {
00369       ths = &th;
00370       last = 1;
00371     }
00372 
00373   if (toplevel > threads)
00374     {
00375       printf ("resetting number of toplevel threads to %lu to not surpass number to concurrent threads\n",
00376              threads);
00377       toplevel = threads;
00378     }
00379 
00380   if (timing)
00381     {
00382       if (clock_getcpuclockid (0, &cl) != 0
00383          || clock_gettime (cl, &start_time) != 0)
00384        timing = false;
00385     }
00386 
00387   /* We need this later.  */
00388   pid = getpid ();
00389   tmain = pthread_self ();
00390 
00391   /* We use signal SIGUSR1 for communication between the threads and
00392      the main thread.  We only want sychronous notification.  */
00393   if (sync_method == sync_signal)
00394     {
00395       sigemptyset (&ss);
00396       sigaddset (&ss, SIGUSR1);
00397       if (sigprocmask (SIG_BLOCK, &ss, NULL) != 0)
00398        error (EXIT_FAILURE, errno, "cannot set signal mask");
00399     }
00400 
00401   /* Create the thread attributes.  */
00402   pthread_attr_init (&attr);
00403 
00404   /* If the user provided a stack size use it.  */
00405   if (stacksize != 0
00406       && pthread_attr_setstacksize (&attr, stacksize) != 0)
00407     puts ("could not set stack size; will use default");
00408   /* And stack guard size.  */
00409   if (guardsize != -1
00410       && pthread_attr_setguardsize (&attr, guardsize) != 0)
00411     puts ("invalid stack guard size; will use default");
00412 
00413   /* All threads are created detached if we are not using pthread_join
00414      to synchronize.  */
00415   if (sync_method != sync_join)
00416     pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
00417 
00418   if (sync_method == sync_signal)
00419     {
00420       while (1)
00421        {
00422          int err;
00423          bool do_wait = false;
00424 
00425          pthread_mutex_lock (&running_mutex);
00426          if (starts-- < 0)
00427            cont = false;
00428          else
00429            do_wait = ++running >= threads && starts > 0;
00430 
00431          pthread_mutex_unlock (&running_mutex);
00432 
00433          if (! cont)
00434            break;
00435 
00436          if (progress)
00437            write (STDERR_FILENO, "t", 1);
00438 
00439          err = pthread_create (&ths[empty], &attr, thread_function,
00440                             (void *) starts);
00441          if (err != 0)
00442            error (EXIT_FAILURE, err, "cannot start thread %lu", starts);
00443 
00444          if (++empty == last)
00445            empty = 0;
00446 
00447          if (do_wait)
00448            sigwaitinfo (&ss, NULL);
00449        }
00450 
00451       /* Do nothing anymore.  On of the threads will terminate the program.  */
00452       sigfillset (&ss);
00453       sigdelset (&ss, SIGINT);
00454       while (1)
00455        sigsuspend (&ss);
00456     }
00457   else
00458     {
00459       pthread_t ths[toplevel];
00460       struct start_info si[toplevel];
00461       unsigned int i;
00462 
00463       for (i = 0; i < toplevel; ++i)
00464        {
00465          unsigned int child_starts = starts / (toplevel - i);
00466          unsigned int child_threads = threads / (toplevel - i);
00467          int err;
00468 
00469          si[i].starts = child_starts;
00470          si[i].threads = child_threads;
00471 
00472          err = pthread_create (&ths[i], &attr, start_threads, &si[i]);
00473          if (err != 0)
00474            error (EXIT_FAILURE, err, "cannot start thread");
00475 
00476          starts -= child_starts;
00477          threads -= child_threads;
00478        }
00479 
00480       for (i = 0; i < toplevel; ++i)
00481        {
00482          int err = pthread_join (ths[i], NULL);
00483 
00484          if (err != 0)
00485            error (EXIT_FAILURE, err, "cannot join thread");
00486        }
00487 
00488       /* We are done.  */
00489       if (progress)
00490        write (STDERR_FILENO, "\n", 1);
00491 
00492       if (timing)
00493        {
00494          struct timespec end_time;
00495 
00496          if (clock_gettime (cl, &end_time) == 0)
00497            {
00498              end_time.tv_sec -= start_time.tv_sec;
00499              end_time.tv_nsec -= start_time.tv_nsec;
00500              if (end_time.tv_nsec < 0)
00501               {
00502                 end_time.tv_nsec += 1000000000;
00503                 --end_time.tv_sec;
00504               }
00505 
00506              printf ("\nRuntime: %lu.%09lu seconds\n",
00507                     (unsigned long int) end_time.tv_sec,
00508                     (unsigned long int) end_time.tv_nsec);
00509            }
00510        }
00511 
00512       printf ("Result: %08x\n", sum);
00513 
00514       exit (0);
00515     }
00516 
00517   /* NOTREACHED */
00518   return 0;
00519 }
00520 
00521 
00522 /* Handle program arguments.  */
00523 static error_t
00524 parse_opt (int key, char *arg, struct argp_state *state)
00525 {
00526   unsigned long int num;
00527   long int snum;
00528 
00529   switch (key)
00530     {
00531     case 't':
00532       num = strtoul (arg, NULL, 0);
00533       if (num <= MAX_THREADS)
00534        threads = num;
00535       else
00536        printf ("\
00537 number of threads limited to %u; recompile with a higher limit if necessary",
00538               MAX_THREADS);
00539       break;
00540 
00541     case 'w':
00542       num = strtoul (arg, NULL, 0);
00543       if (num <= 100)
00544        workload = num;
00545       else
00546        puts ("workload must be between 0 and 100 percent");
00547       break;
00548 
00549     case 'c':
00550       workcost = strtoul (arg, NULL, 0);
00551       break;
00552 
00553     case 'r':
00554       rounds = strtoul (arg, NULL, 0);
00555       break;
00556 
00557     case 's':
00558       starts = strtoul (arg, NULL, 0);
00559       break;
00560 
00561     case 'S':
00562       num = strtoul (arg, NULL, 0);
00563       if (num >= PTHREAD_STACK_MIN)
00564        stacksize = num;
00565       else
00566        printf ("minimum stack size is %d\n", PTHREAD_STACK_MIN);
00567       break;
00568 
00569     case 'g':
00570       snum = strtol (arg, NULL, 0);
00571       if (snum < 0)
00572        printf ("invalid guard size %s\n", arg);
00573       else
00574        guardsize = snum;
00575       break;
00576 
00577     case 'p':
00578       progress = true;
00579       break;
00580 
00581     case 'T':
00582       timing = true;
00583       break;
00584 
00585     case OPT_TO_THREAD:
00586       to_thread = true;
00587       break;
00588 
00589     case OPT_TO_PROCESS:
00590       to_thread = false;
00591       break;
00592 
00593     case OPT_SYNC_SIGNAL:
00594       sync_method = sync_signal;
00595       break;
00596 
00597     case OPT_SYNC_JOIN:
00598       sync_method = sync_join;
00599       break;
00600 
00601     case OPT_TOPLEVEL:
00602       num = strtoul (arg, NULL, 0);
00603       if (num < MAX_THREADS)
00604        toplevel = num;
00605       else
00606        printf ("\
00607 number of threads limited to %u; recompile with a higher limit if necessary",
00608               MAX_THREADS);
00609       sync_method = sync_join;
00610       break;
00611 
00612     default:
00613       return ARGP_ERR_UNKNOWN;
00614     }
00615 
00616   return 0;
00617 }
00618 
00619 
00620 static hp_timing_t
00621 get_clockfreq (void)
00622 {
00623   /* We read the information from the /proc filesystem.  It contains at
00624      least one line like
00625        cpu MHz         : 497.840237
00626      or also
00627        cpu MHz         : 497.841
00628      We search for this line and convert the number in an integer.  */
00629   static hp_timing_t result;
00630   int fd;
00631 
00632   /* If this function was called before, we know the result.  */
00633   if (result != 0)
00634     return result;
00635 
00636   fd = open ("/proc/cpuinfo", O_RDONLY);
00637   if (__builtin_expect (fd != -1, 1))
00638     {
00639       /* XXX AFAIK the /proc filesystem can generate "files" only up
00640          to a size of 4096 bytes.  */
00641       char buf[4096];
00642       ssize_t n;
00643 
00644       n = read (fd, buf, sizeof buf);
00645       if (__builtin_expect (n, 1) > 0)
00646        {
00647          char *mhz = memmem (buf, n, "cpu MHz", 7);
00648 
00649          if (__builtin_expect (mhz != NULL, 1))
00650            {
00651              char *endp = buf + n;
00652              int seen_decpoint = 0;
00653              int ndigits = 0;
00654 
00655              /* Search for the beginning of the string.  */
00656              while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
00657               ++mhz;
00658 
00659              while (mhz < endp && *mhz != '\n')
00660               {
00661                 if (*mhz >= '0' && *mhz <= '9')
00662                   {
00663                     result *= 10;
00664                     result += *mhz - '0';
00665                     if (seen_decpoint)
00666                      ++ndigits;
00667                   }
00668                 else if (*mhz == '.')
00669                   seen_decpoint = 1;
00670 
00671                 ++mhz;
00672               }
00673 
00674              /* Compensate for missing digits at the end.  */
00675              while (ndigits++ < 6)
00676               result *= 10;
00677            }
00678        }
00679 
00680       close (fd);
00681     }
00682 
00683   return result;
00684 }
00685 
00686 
00687 int
00688 clock_getcpuclockid (pid_t pid, clockid_t *clock_id)
00689 {
00690   /* We don't allow any process ID but our own.  */
00691   if (pid != 0 && pid != getpid ())
00692     return EPERM;
00693 
00694 #ifdef CLOCK_PROCESS_CPUTIME_ID
00695   /* Store the number.  */
00696   *clock_id = CLOCK_PROCESS_CPUTIME_ID;
00697 
00698   return 0;
00699 #else
00700   /* We don't have a timer for that.  */
00701   return ENOENT;
00702 #endif
00703 }
00704 
00705 
00706 #ifdef i386
00707 #define HP_TIMING_NOW(Var)  __asm__ __volatile__ ("rdtsc" : "=A" (Var))
00708 #elif defined __x86_64__
00709 # define HP_TIMING_NOW(Var) \
00710   ({ unsigned int _hi, _lo; \
00711      asm volatile ("rdtsc" : "=a" (_lo), "=d" (_hi)); \
00712      (Var) = ((unsigned long long int) _hi << 32) | _lo; })
00713 #elif defined __ia64__
00714 #define HP_TIMING_NOW(Var)  __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (Var) : : "memory")
00715 #else
00716 #error "HP_TIMING_NOW missing"
00717 #endif
00718 
00719 /* Get current value of CLOCK and store it in TP.  */
00720 int
00721 clock_gettime (clockid_t clock_id, struct timespec *tp)
00722 {
00723   int retval = -1;
00724 
00725   switch (clock_id)
00726     {
00727     case CLOCK_PROCESS_CPUTIME_ID:
00728       {
00729 
00730        static hp_timing_t freq;
00731        hp_timing_t tsc;
00732 
00733        /* Get the current counter.  */
00734        HP_TIMING_NOW (tsc);
00735 
00736        if (freq == 0)
00737          {
00738            freq = get_clockfreq ();
00739            if (freq == 0)
00740              return EINVAL;
00741          }
00742 
00743        /* Compute the seconds.  */
00744        tp->tv_sec = tsc / freq;
00745 
00746        /* And the nanoseconds.  This computation should be stable until
00747           we get machines with about 16GHz frequency.  */
00748        tp->tv_nsec = ((tsc % freq) * UINT64_C (1000000000)) / freq;
00749 
00750        retval = 0;
00751       }
00752     break;
00753 
00754     default:
00755       errno = EINVAL;
00756       break;
00757     }
00758 
00759   return retval;
00760 }