Back to index

glibc  2.9
hp-timing.h
Go to the documentation of this file.
00001 /* High precision, low overhead timing functions.  i686 version.
00002    Copyright (C) 1998, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #ifndef _HP_TIMING_H
00022 #define _HP_TIMING_H 1
00023 
00024 #include <string.h>
00025 #include <sys/param.h>
00026 #include <stdio-common/_itoa.h>
00027 
00028 /* The macros defined here use the timestamp counter in i586 and up versions
00029    of the x86 processors.  They provide a very accurate way to measure the
00030    time with very little overhead.  The time values themself have no real
00031    meaning, only differences are interesting.
00032 
00033    This version is for the i686 processors.  The difference to the i586
00034    version is that the timerstamp register is unconditionally used.  This is
00035    not the case for the i586 version where we have to perform runtime test
00036    whether the processor really has this capability.  We have to make this
00037    distinction since the sysdeps/i386/i586 code is supposed to work on all
00038    platforms while the i686 already contains i686-specific code.
00039 
00040    The list of macros we need includes the following:
00041 
00042    - HP_TIMING_AVAIL: test for availability.
00043 
00044    - HP_TIMING_INLINE: this macro is non-zero if the functionality is not
00045      implemented using function calls but instead uses some inlined code
00046      which might simply consist of a few assembler instructions.  We have to
00047      know this since we might want to use the macros here in places where we
00048      cannot make function calls.
00049 
00050    - hp_timing_t: This is the type for variables used to store the time
00051      values.
00052 
00053    - HP_TIMING_ZERO: clear `hp_timing_t' object.
00054 
00055    - HP_TIMING_NOW: place timestamp for current time in variable given as
00056      parameter.
00057 
00058    - HP_TIMING_DIFF_INIT: do whatever is necessary to be able to use the
00059      HP_TIMING_DIFF macro.
00060 
00061    - HP_TIMING_DIFF: compute difference between two times and store it
00062      in a third.  Source and destination might overlap.
00063 
00064    - HP_TIMING_ACCUM: add time difference to another variable.  This might
00065      be a bit more complicated to implement for some platforms as the
00066      operation should be thread-safe and 64bit arithmetic on 32bit platforms
00067      is not.
00068 
00069    - HP_TIMING_ACCUM_NT: this is the variant for situations where we know
00070      there are no threads involved.
00071 
00072    - HP_TIMING_PRINT: write decimal representation of the timing value into
00073      the given string.  This operation need not be inline even though
00074      HP_TIMING_INLINE is specified.
00075 
00076 */
00077 
00078 /* We always assume having the timestamp register.  */
00079 #define HP_TIMING_AVAIL            (1)
00080 
00081 /* We indeed have inlined functions.  */
00082 #define HP_TIMING_INLINE    (1)
00083 
00084 /* We use 64bit values for the times.  */
00085 typedef unsigned long long int hp_timing_t;
00086 
00087 /* Set timestamp value to zero.  */
00088 #define HP_TIMING_ZERO(Var) (Var) = (0)
00089 
00090 /* That's quite simple.  Use the `rdtsc' instruction.  Note that the value
00091    might not be 100% accurate since there might be some more instructions
00092    running in this moment.  This could be changed by using a barrier like
00093    'cpuid' right before the `rdtsc' instruciton.  But we are not interested
00094    in accurate clock cycles here so we don't do this.  */
00095 #define HP_TIMING_NOW(Var)  __asm__ __volatile__ ("rdtsc" : "=A" (Var))
00096 
00097 /* Use two 'rdtsc' instructions in a row to find out how long it takes.  */
00098 #define HP_TIMING_DIFF_INIT() \
00099   do {                                                               \
00100     if (GLRO(dl_hp_timing_overhead) == 0)                            \
00101       {                                                                     \
00102        int __cnt = 5;                                                       \
00103        GLRO(dl_hp_timing_overhead) = ~0ull;                                 \
00104        do                                                            \
00105          {                                                           \
00106            hp_timing_t __t1, __t2;                                   \
00107            HP_TIMING_NOW (__t1);                                     \
00108            HP_TIMING_NOW (__t2);                                     \
00109            if (__t2 - __t1 < GLRO(dl_hp_timing_overhead))                   \
00110              GLRO(dl_hp_timing_overhead) = __t2 - __t1;              \
00111          }                                                           \
00112        while (--__cnt > 0);                                          \
00113       }                                                                     \
00114   } while (0)
00115 
00116 /* It's simple arithmetic for us.  */
00117 #define HP_TIMING_DIFF(Diff, Start, End)  (Diff) = ((End) - (Start))
00118 
00119 /* We have to jump through hoops to get this correctly implemented.  */
00120 #define HP_TIMING_ACCUM(Sum, Diff) \
00121   do {                                                               \
00122     int __not_done;                                                  \
00123     hp_timing_t __oldval = (Sum);                                    \
00124     hp_timing_t __diff = (Diff) - GLRO(dl_hp_timing_overhead);              \
00125     do                                                               \
00126       {                                                                     \
00127        hp_timing_t __newval = __oldval + __diff;                     \
00128        int __temp0, __temp1;                                                \
00129        __asm__ __volatile__ ("xchgl %0, %%ebx\n\t"                          \
00130                            "lock; cmpxchg8b %1\n\t"                         \
00131                            "sete %%bl\n\t"                                  \
00132                            "xchgl %0, %%ebx"                                \
00133                            : "=SD" (__not_done), "=m" (Sum),                \
00134                             "=A" (__oldval), "=c" (__temp0)                 \
00135                            : "m" (Sum), "2" (__oldval),              \
00136                             "3" ((unsigned int) (__newval >> 32)),          \
00137                             "0" ((unsigned int) __newval));                 \
00138       }                                                                     \
00139     while ((unsigned char) __not_done);                                     \
00140   } while (0)
00141 
00142 /* No threads, no extra work.  */
00143 #define HP_TIMING_ACCUM_NT(Sum, Diff)     (Sum) += (Diff)
00144 
00145 /* Print the time value.  */
00146 #define HP_TIMING_PRINT(Buf, Len, Val) \
00147   do {                                                               \
00148     char __buf[20];                                                  \
00149     char *__cp = _itoa (Val, __buf + sizeof (__buf), 10, 0);                \
00150     size_t __len = (Len);                                            \
00151     char *__dest = (Buf);                                            \
00152     while (__len-- > 0 && __cp < __buf + sizeof (__buf))                    \
00153       *__dest++ = *__cp++;                                           \
00154     memcpy (__dest, " clock cycles", MIN (__len, sizeof (" clock cycles")));  \
00155   } while (0)
00156 
00157 #endif /* hp-timing.h */