Back to index

glibc  2.9
Classes | Defines | Typedefs | Enumerations | Functions | Variables
perf.c File Reference
#include <argp.h>
#include <error.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <limits.h>
#include <pthread.h>
#include <signal.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/param.h>
#include <sys/types.h>

Go to the source code of this file.

Classes

struct  start_info

Defines

#define _GNU_SOURCE   1
#define MAX_THREADS   100000
#define DEFAULT_THREADS   50
#define OPT_TO_THREAD   300
#define OPT_TO_PROCESS   301
#define OPT_SYNC_SIGNAL   302
#define OPT_SYNC_JOIN   303
#define OPT_TOPLEVEL   304

Typedefs

typedef unsigned long long int hp_timing_t

Enumerations

enum  { sync_signal, sync_join }

Functions

static error_t parse_opt (int key, char *arg, struct argp_state *state)
static void * work (void *arg)
static void * thread_function (void *arg)
static void * start_threads (void *arg)
int main (int argc, char *argv[])
static hp_timing_t get_clockfreq (void)
int clock_getcpuclockid (pid_t pid, clockid_t *clock_id)
int clock_gettime (clockid_t clock_id, struct timespec *tp)

Variables

static struct argp_option []
static struct argp
static unsigned long int threads = DEFAULT_THREADS
static unsigned long int workload = 75
static unsigned long int workcost = 20
static unsigned long int rounds = 10
static long int starts = 5000
static unsigned long int stacksize
static long int guardsize = -1
static bool progress
static bool timing
static bool to_thread
static unsigned long int toplevel = 1
static long int running
static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER
static pid_t pid
static pthread_t tmain
static clockid_t cl
static struct timespec
static pthread_mutex_t sum_mutex = PTHREAD_MUTEX_INITIALIZER
unsigned int sum
static enum { ... }  sync_method
static pthread_attr_t attr

Class Documentation

struct start_info

Definition at line 270 of file perf.c.

Class Members
unsigned int starts
unsigned int threads

Define Documentation

#define _GNU_SOURCE   1

Definition at line 20 of file perf.c.

#define DEFAULT_THREADS   50

Definition at line 41 of file perf.c.

#define MAX_THREADS   100000

Definition at line 38 of file perf.c.

#define OPT_SYNC_JOIN   303

Definition at line 48 of file perf.c.

#define OPT_SYNC_SIGNAL   302

Definition at line 47 of file perf.c.

#define OPT_TO_PROCESS   301

Definition at line 46 of file perf.c.

#define OPT_TO_THREAD   300

Definition at line 45 of file perf.c.

#define OPT_TOPLEVEL   304

Definition at line 49 of file perf.c.


Typedef Documentation

typedef unsigned long long int hp_timing_t

Definition at line 151 of file perf.c.


Enumeration Type Documentation

anonymous enum
Enumerator:
sync_signal 
sync_join 

Definition at line 142 of file perf.c.


Function Documentation

int clock_getcpuclockid ( pid_t  pid,
clockid_t *  clock_id 
)

Definition at line 688 of file perf.c.

{
  /* We don't allow any process ID but our own.  */
  if (pid != 0 && pid != getpid ())
    return EPERM;

#ifdef CLOCK_PROCESS_CPUTIME_ID
  /* Store the number.  */
  *clock_id = CLOCK_PROCESS_CPUTIME_ID;

  return 0;
#else
  /* We don't have a timer for that.  */
  return ENOENT;
#endif
}

Here is the call graph for this function:

Here is the caller graph for this function:

int clock_gettime ( clockid_t  clock_id,
struct timespec tp 
)

Definition at line 721 of file perf.c.

{
  int retval = -1;

  switch (clock_id)
    {
    case CLOCK_PROCESS_CPUTIME_ID:
      {

       static hp_timing_t freq;
       hp_timing_t tsc;

       /* Get the current counter.  */
       HP_TIMING_NOW (tsc);

       if (freq == 0)
         {
           freq = get_clockfreq ();
           if (freq == 0)
             return EINVAL;
         }

       /* Compute the seconds.  */
       tp->tv_sec = tsc / freq;

       /* And the nanoseconds.  This computation should be stable until
          we get machines with about 16GHz frequency.  */
       tp->tv_nsec = ((tsc % freq) * UINT64_C (1000000000)) / freq;

       retval = 0;
      }
    break;

    default:
      errno = EINVAL;
      break;
    }

  return retval;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static hp_timing_t get_clockfreq ( void  ) [static]

Definition at line 621 of file perf.c.

{
  /* We read the information from the /proc filesystem.  It contains at
     least one line like
       cpu MHz         : 497.840237
     or also
       cpu MHz         : 497.841
     We search for this line and convert the number in an integer.  */
  static hp_timing_t result;
  int fd;

  /* If this function was called before, we know the result.  */
  if (result != 0)
    return result;

  fd = open ("/proc/cpuinfo", O_RDONLY);
  if (__builtin_expect (fd != -1, 1))
    {
      /* XXX AFAIK the /proc filesystem can generate "files" only up
         to a size of 4096 bytes.  */
      char buf[4096];
      ssize_t n;

      n = read (fd, buf, sizeof buf);
      if (__builtin_expect (n, 1) > 0)
       {
         char *mhz = memmem (buf, n, "cpu MHz", 7);

         if (__builtin_expect (mhz != NULL, 1))
           {
             char *endp = buf + n;
             int seen_decpoint = 0;
             int ndigits = 0;

             /* Search for the beginning of the string.  */
             while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
              ++mhz;

             while (mhz < endp && *mhz != '\n')
              {
                if (*mhz >= '0' && *mhz <= '9')
                  {
                    result *= 10;
                    result += *mhz - '0';
                    if (seen_decpoint)
                     ++ndigits;
                  }
                else if (*mhz == '.')
                  seen_decpoint = 1;

                ++mhz;
              }

             /* Compensate for missing digits at the end.  */
             while (ndigits++ < 6)
              result *= 10;
           }
       }

      close (fd);
    }

  return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int main ( int  argc,
char *  argv[] 
)

Definition at line 345 of file perf.c.

{
  int remaining;
  sigset_t ss;
  pthread_t th;
  pthread_t *ths = NULL;
  int empty = 0;
  int last;
  bool cont = true;

  /* Parse and process arguments.  */
  argp_parse (&argp, argc, argv, 0, &remaining, NULL);

  if (sync_method == sync_join)
    {
      ths = (pthread_t *) calloc (threads, sizeof (pthread_t));
      if (ths == NULL)
       error (EXIT_FAILURE, errno,
              "cannot allocate memory for thread descriptor array");

      last = threads;
    }
  else
    {
      ths = &th;
      last = 1;
    }

  if (toplevel > threads)
    {
      printf ("resetting number of toplevel threads to %lu to not surpass number to concurrent threads\n",
             threads);
      toplevel = threads;
    }

  if (timing)
    {
      if (clock_getcpuclockid (0, &cl) != 0
         || clock_gettime (cl, &start_time) != 0)
       timing = false;
    }

  /* We need this later.  */
  pid = getpid ();
  tmain = pthread_self ();

  /* We use signal SIGUSR1 for communication between the threads and
     the main thread.  We only want sychronous notification.  */
  if (sync_method == sync_signal)
    {
      sigemptyset (&ss);
      sigaddset (&ss, SIGUSR1);
      if (sigprocmask (SIG_BLOCK, &ss, NULL) != 0)
       error (EXIT_FAILURE, errno, "cannot set signal mask");
    }

  /* Create the thread attributes.  */
  pthread_attr_init (&attr);

  /* If the user provided a stack size use it.  */
  if (stacksize != 0
      && pthread_attr_setstacksize (&attr, stacksize) != 0)
    puts ("could not set stack size; will use default");
  /* And stack guard size.  */
  if (guardsize != -1
      && pthread_attr_setguardsize (&attr, guardsize) != 0)
    puts ("invalid stack guard size; will use default");

  /* All threads are created detached if we are not using pthread_join
     to synchronize.  */
  if (sync_method != sync_join)
    pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);

  if (sync_method == sync_signal)
    {
      while (1)
       {
         int err;
         bool do_wait = false;

         pthread_mutex_lock (&running_mutex);
         if (starts-- < 0)
           cont = false;
         else
           do_wait = ++running >= threads && starts > 0;

         pthread_mutex_unlock (&running_mutex);

         if (! cont)
           break;

         if (progress)
           write (STDERR_FILENO, "t", 1);

         err = pthread_create (&ths[empty], &attr, thread_function,
                            (void *) starts);
         if (err != 0)
           error (EXIT_FAILURE, err, "cannot start thread %lu", starts);

         if (++empty == last)
           empty = 0;

         if (do_wait)
           sigwaitinfo (&ss, NULL);
       }

      /* Do nothing anymore.  On of the threads will terminate the program.  */
      sigfillset (&ss);
      sigdelset (&ss, SIGINT);
      while (1)
       sigsuspend (&ss);
    }
  else
    {
      pthread_t ths[toplevel];
      struct start_info si[toplevel];
      unsigned int i;

      for (i = 0; i < toplevel; ++i)
       {
         unsigned int child_starts = starts / (toplevel - i);
         unsigned int child_threads = threads / (toplevel - i);
         int err;

         si[i].starts = child_starts;
         si[i].threads = child_threads;

         err = pthread_create (&ths[i], &attr, start_threads, &si[i]);
         if (err != 0)
           error (EXIT_FAILURE, err, "cannot start thread");

         starts -= child_starts;
         threads -= child_threads;
       }

      for (i = 0; i < toplevel; ++i)
       {
         int err = pthread_join (ths[i], NULL);

         if (err != 0)
           error (EXIT_FAILURE, err, "cannot join thread");
       }

      /* We are done.  */
      if (progress)
       write (STDERR_FILENO, "\n", 1);

      if (timing)
       {
         struct timespec end_time;

         if (clock_gettime (cl, &end_time) == 0)
           {
             end_time.tv_sec -= start_time.tv_sec;
             end_time.tv_nsec -= start_time.tv_nsec;
             if (end_time.tv_nsec < 0)
              {
                end_time.tv_nsec += 1000000000;
                --end_time.tv_sec;
              }

             printf ("\nRuntime: %lu.%09lu seconds\n",
                    (unsigned long int) end_time.tv_sec,
                    (unsigned long int) end_time.tv_nsec);
           }
       }

      printf ("Result: %08x\n", sum);

      exit (0);
    }

  /* NOTREACHED */
  return 0;
}

Here is the call graph for this function:

static error_t parse_opt ( int  key,
char *  arg,
struct argp_state state 
) [static]

Definition at line 524 of file perf.c.

{
  unsigned long int num;
  long int snum;

  switch (key)
    {
    case 't':
      num = strtoul (arg, NULL, 0);
      if (num <= MAX_THREADS)
       threads = num;
      else
       printf ("\
number of threads limited to %u; recompile with a higher limit if necessary",
              MAX_THREADS);
      break;

    case 'w':
      num = strtoul (arg, NULL, 0);
      if (num <= 100)
       workload = num;
      else
       puts ("workload must be between 0 and 100 percent");
      break;

    case 'c':
      workcost = strtoul (arg, NULL, 0);
      break;

    case 'r':
      rounds = strtoul (arg, NULL, 0);
      break;

    case 's':
      starts = strtoul (arg, NULL, 0);
      break;

    case 'S':
      num = strtoul (arg, NULL, 0);
      if (num >= PTHREAD_STACK_MIN)
       stacksize = num;
      else
       printf ("minimum stack size is %d\n", PTHREAD_STACK_MIN);
      break;

    case 'g':
      snum = strtol (arg, NULL, 0);
      if (snum < 0)
       printf ("invalid guard size %s\n", arg);
      else
       guardsize = snum;
      break;

    case 'p':
      progress = true;
      break;

    case 'T':
      timing = true;
      break;

    case OPT_TO_THREAD:
      to_thread = true;
      break;

    case OPT_TO_PROCESS:
      to_thread = false;
      break;

    case OPT_SYNC_SIGNAL:
      sync_method = sync_signal;
      break;

    case OPT_SYNC_JOIN:
      sync_method = sync_join;
      break;

    case OPT_TOPLEVEL:
      num = strtoul (arg, NULL, 0);
      if (num < MAX_THREADS)
       toplevel = num;
      else
       printf ("\
number of threads limited to %u; recompile with a higher limit if necessary",
              MAX_THREADS);
      sync_method = sync_join;
      break;

    default:
      return ARGP_ERR_UNKNOWN;
    }

  return 0;
}

Here is the call graph for this function:

static void* start_threads ( void *  arg) [static]

Definition at line 278 of file perf.c.

{
  struct start_info *si = arg;
  unsigned int starts = si->starts;
  pthread_t ths[si->threads];
  unsigned int state = starts;
  unsigned int n;
  unsigned int i = 0;
  int err;

  if (progress)
    write (STDERR_FILENO, "T", 1);

  memset (ths, '\0', sizeof (pthread_t) * si->threads);

  while (starts-- > 0)
    {
      if (ths[i] != 0)
       {
         /* Wait for the threads in the order they were created.  */
         err = pthread_join (ths[i], NULL);
         if (err != 0)
           error (EXIT_FAILURE, err, "cannot join thread");

         if (progress)
           write (STDERR_FILENO, "f", 1);
       }

      err = pthread_create (&ths[i], &attr, work,
                         (void *) (long) (rand_r (&state) + starts + i));

      if (err != 0)
       error (EXIT_FAILURE, err, "cannot start thread");

      if (progress)
       write (STDERR_FILENO, "t", 1);

      if (++i == si->threads)
       i = 0;
    }

  n = i;
  do
    {
      if (ths[i] != 0)
       {
         err = pthread_join (ths[i], NULL);
         if (err != 0)
           error (EXIT_FAILURE, err, "cannot join thread");

         if (progress)
           write (STDERR_FILENO, "f", 1);
       }

      if (++i == si->threads)
       i = 0;
    }
  while (i != n);

  if (progress)
    write (STDERR_FILENO, "F", 1);

  return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void* thread_function ( void *  arg) [static]

Definition at line 216 of file perf.c.

{
  work (arg);

  pthread_mutex_lock (&running_mutex);
  if (--running <= 0 && starts <= 0)
    {
      /* We are done.  */
      if (progress)
       write (STDERR_FILENO, "\n", 1);

      if (timing)
       {
         struct timespec end_time;

         if (clock_gettime (cl, &end_time) == 0)
           {
             end_time.tv_sec -= start_time.tv_sec;
             end_time.tv_nsec -= start_time.tv_nsec;
             if (end_time.tv_nsec < 0)
              {
                end_time.tv_nsec += 1000000000;
                --end_time.tv_sec;
              }

             printf ("\nRuntime: %lu.%09lu seconds\n",
                    (unsigned long int) end_time.tv_sec,
                    (unsigned long int) end_time.tv_nsec);
           }
       }

      printf ("Result: %08x\n", sum);

      exit (0);
    }
  pthread_mutex_unlock (&running_mutex);

  if (sync_method == sync_signal)
    {
      if (to_thread)
       /* This code sends a signal to the main thread.  */
       pthread_kill (tmain, SIGUSR1);
      else
       /* Use this code to test sending a signal to the process.  */
       kill (pid, SIGUSR1);
    }

  if (progress)
    write (STDERR_FILENO, "f", 1);

  return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void* work ( void *  arg) [static]

Definition at line 159 of file perf.c.

{
  unsigned long int i;
  unsigned int state = (unsigned long int) arg;

  for (i = 0; i < rounds; ++i)
    {
      /* Determine what to do.  */
      unsigned int rnum;

      /* Uniform distribution.  */
      do
       rnum = rand_r (&state);
      while (rnum >= UINT_MAX - (UINT_MAX % 100));

      rnum %= 100;

      if (rnum < workload)
       {
         int j;
         int a[4] = { i, rnum, i + rnum, rnum - i };

         if (progress)
           write (STDERR_FILENO, "c", 1);

         for (j = 0; j < workcost; ++j)
           {
             a[0] += a[3] >> 12;
             a[1] += a[2] >> 20;
             a[2] += a[1] ^ 0x3423423;
             a[3] += a[0] - a[1];
           }

         pthread_mutex_lock (&sum_mutex);
         sum += a[0] + a[1] + a[2] + a[3];
         pthread_mutex_unlock (&sum_mutex);
       }
      else
       {
         /* Just sleep.  */
         struct timespec tv;

         tv.tv_sec = 0;
         tv.tv_nsec = 10000000;

         if (progress)
           write (STDERR_FILENO, "w", 1);

         nanosleep (&tv, NULL);
       }
    }

  return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

struct argp [static]
Initial value:

Definition at line 110 of file perf.c.

struct argp_option[] [static]

Definition at line 52 of file perf.c.

pthread_attr_t attr [static]

Definition at line 155 of file perf.c.

clockid_t cl [static]

Definition at line 135 of file perf.c.

long int guardsize = -1 [static]

Definition at line 122 of file perf.c.

pid_t pid [static]

Definition at line 132 of file perf.c.

bool progress [static]

Definition at line 123 of file perf.c.

unsigned long int rounds = 10 [static]

Definition at line 119 of file perf.c.

long int running [static]

Definition at line 129 of file perf.c.

Definition at line 130 of file perf.c.

unsigned long int stacksize [static]

Definition at line 121 of file perf.c.

long int starts = 5000 [static]

Definition at line 120 of file perf.c.

unsigned int sum

Definition at line 140 of file perf.c.

Definition at line 139 of file perf.c.

enum { ... } sync_method [static]
unsigned long int threads = DEFAULT_THREADS [static]

Definition at line 116 of file perf.c.

struct timespec [static]

Definition at line 136 of file perf.c.

bool timing [static]

Definition at line 124 of file perf.c.

pthread_t tmain [static]

Definition at line 133 of file perf.c.

bool to_thread [static]

Definition at line 125 of file perf.c.

unsigned long int toplevel = 1 [static]

Definition at line 126 of file perf.c.

unsigned long int workcost = 20 [static]

Definition at line 118 of file perf.c.

unsigned long int workload = 75 [static]

Definition at line 117 of file perf.c.