Back to index

glibc  2.9
hp-timing.h
Go to the documentation of this file.
00001 /* High precision, low overhead timing functions.  powerpc64 version.
00002    Copyright (C) 2005, 2008 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #ifndef _HP_TIMING_H
00022 #define _HP_TIMING_H 1
00023 
00024 #include <string.h>
00025 #include <sys/param.h>
00026 #include <stdio-common/_itoa.h>
00027 #include <atomic.h>
00028 
00029 /* The macros defined here use the powerpc 64-bit time base register.
00030    The time base is nominally clocked at 1/8th the CPU clock, but this
00031    can vary.
00032 
00033    The list of macros we need includes the following:
00034 
00035    - HP_TIMING_AVAIL: test for availability.
00036 
00037    - HP_TIMING_INLINE: this macro is non-zero if the functionality is not
00038      implemented using function calls but instead uses some inlined code
00039      which might simply consist of a few assembler instructions.  We have to
00040      know this since we might want to use the macros here in places where we
00041      cannot make function calls.
00042 
00043    - hp_timing_t: This is the type for variables used to store the time
00044      values.
00045 
00046    - HP_TIMING_ZERO: clear `hp_timing_t' object.
00047 
00048    - HP_TIMING_NOW: place timestamp for current time in variable given as
00049      parameter.
00050 
00051    - HP_TIMING_DIFF_INIT: do whatever is necessary to be able to use the
00052      HP_TIMING_DIFF macro.
00053 
00054    - HP_TIMING_DIFF: compute difference between two times and store it
00055      in a third.  Source and destination might overlap.
00056 
00057    - HP_TIMING_ACCUM: add time difference to another variable.  This might
00058      be a bit more complicated to implement for some platforms as the
00059      operation should be thread-safe and 64bit arithmetic on 32bit platforms
00060      is not.
00061 
00062    - HP_TIMING_ACCUM_NT: this is the variant for situations where we know
00063      there are no threads involved.
00064 
00065    - HP_TIMING_PRINT: write decimal representation of the timing value into
00066      the given string.  This operation need not be inline even though
00067      HP_TIMING_INLINE is specified.
00068 
00069 */
00070 
00071 /* We always assume having the timestamp register.  */
00072 #define HP_TIMING_AVAIL            (1)
00073 
00074 /* We indeed have inlined functions.  */
00075 #define HP_TIMING_INLINE    (1)
00076 
00077 /* We use 64bit values for the times.  */
00078 typedef unsigned long long int hp_timing_t;
00079 
00080 /* Set timestamp value to zero.  */
00081 #define HP_TIMING_ZERO(Var) (Var) = (0)
00082 
00083 /* That's quite simple.  Use the `mftb' instruction.  Note that the value
00084    might not be 100% accurate since there might be some more instructions
00085    running in this moment.  This could be changed by using a barrier like
00086    'lwsync' right before the `mftb' instruciton.  But we are not interested
00087    in accurate clock cycles here so we don't do this.  */
00088 #ifdef _ARCH_PWR4
00089 #define HP_TIMING_NOW(Var)  __asm__ __volatile__ ("mfspr %0,268" : "=r" (Var))
00090 #else
00091 #define HP_TIMING_NOW(Var)  __asm__ __volatile__ ("mftb %0" : "=r" (Var))
00092 #endif
00093 
00094 /* Use two 'mftb' instructions in a row to find out how long it takes.
00095    On current POWER4, POWER5, and 970 processors mftb take ~10 cycles.  */
00096 #define HP_TIMING_DIFF_INIT() \
00097   do {                                                               \
00098     if (GLRO(dl_hp_timing_overhead) == 0)                            \
00099       {                                                                     \
00100        int __cnt = 5;                                                       \
00101        GLRO(dl_hp_timing_overhead) = ~0ull;                                 \
00102        do                                                            \
00103          {                                                           \
00104            hp_timing_t __t1, __t2;                                   \
00105            HP_TIMING_NOW (__t1);                                     \
00106            HP_TIMING_NOW (__t2);                                     \
00107            if (__t2 - __t1 < GLRO(dl_hp_timing_overhead))                   \
00108              GLRO(dl_hp_timing_overhead) = __t2 - __t1;              \
00109          }                                                           \
00110        while (--__cnt > 0);                                          \
00111       }                                                                     \
00112   } while (0)
00113 
00114 /* It's simple arithmetic in 64-bit.  */
00115 #define HP_TIMING_DIFF(Diff, Start, End)  (Diff) = ((End) - (Start))
00116 
00117 /* We need to insure that this add is atomic in threaded environments.  We use
00118    __arch_atomic_exchange_and_add_64 from atomic.h to get thread safety.  */
00119 #define HP_TIMING_ACCUM(Sum, Diff) \
00120   do {                                                               \
00121     hp_timing_t __diff = (Diff) - GLRO(dl_hp_timing_overhead);              \
00122     __arch_atomic_exchange_and_add_64 (&(Sum), __diff);                       \
00123   } while (0)
00124 
00125 /* No threads, no extra work.  */
00126 #define HP_TIMING_ACCUM_NT(Sum, Diff)     (Sum) += (Diff)
00127 
00128 /* Print the time value.  */
00129 #define HP_TIMING_PRINT(Buf, Len, Val) \
00130   do {                                                               \
00131     char __buf[20];                                                  \
00132     char *__cp = _itoa (Val, __buf + sizeof (__buf), 10, 0);                \
00133     size_t __len = (Len);                                            \
00134     char *__dest = (Buf);                                            \
00135     while (__len-- > 0 && __cp < __buf + sizeof (__buf))                    \
00136       *__dest++ = *__cp++;                                           \
00137     memcpy (__dest, " ticks", MIN (__len, sizeof (" ticks")));  \
00138   } while (0)
00139 
00140 #endif /* hp-timing.h */