Back to index

glibc  2.9
hp-timing.h
Go to the documentation of this file.
00001 /* High precision, low overhead timing functions.  powerpc64 version.
00002    Copyright (C) 2005, 2008 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #ifndef _HP_TIMING_H
00022 #define _HP_TIMING_H 1
00023 
00024 #include <string.h>
00025 #include <sys/param.h>
00026 #include <stdio-common/_itoa.h>
00027 #include <atomic.h>
00028 
00029 /* The macros defined here use the powerpc 64-bit time base register.
00030    The time base is nominally clocked at 1/8th the CPU clock, but this
00031    can vary.
00032 
00033    The list of macros we need includes the following:
00034 
00035    - HP_TIMING_AVAIL: test for availability.
00036 
00037    - HP_TIMING_INLINE: this macro is non-zero if the functionality is not
00038      implemented using function calls but instead uses some inlined code
00039      which might simply consist of a few assembler instructions.  We have to
00040      know this since we might want to use the macros here in places where we
00041      cannot make function calls.
00042 
00043    - hp_timing_t: This is the type for variables used to store the time
00044      values.
00045 
00046    - HP_TIMING_ZERO: clear `hp_timing_t' object.
00047 
00048    - HP_TIMING_NOW: place timestamp for current time in variable given as
00049      parameter.
00050 
00051    - HP_TIMING_DIFF_INIT: do whatever is necessary to be able to use the
00052      HP_TIMING_DIFF macro.
00053 
00054    - HP_TIMING_DIFF: compute difference between two times and store it
00055      in a third.  Source and destination might overlap.
00056 
00057    - HP_TIMING_ACCUM: add time difference to another variable.  This might
00058      be a bit more complicated to implement for some platforms as the
00059      operation should be thread-safe and 64bit arithmetic on 32bit platforms
00060      is not.
00061 
00062    - HP_TIMING_ACCUM_NT: this is the variant for situations where we know
00063      there are no threads involved.
00064 
00065    - HP_TIMING_PRINT: write decimal representation of the timing value into
00066      the given string.  This operation need not be inline even though
00067      HP_TIMING_INLINE is specified.
00068 
00069 */
00070 
00071 /* We always assume having the timestamp register.  */
00072 #define HP_TIMING_AVAIL            (1)
00073 
00074 /* We indeed have inlined functions.  */
00075 #define HP_TIMING_INLINE    (1)
00076 
00077 /* We use 64bit values for the times.  */
00078 typedef unsigned long long int hp_timing_t;
00079 
00080 /* Set timestamp value to zero.  */
00081 #define HP_TIMING_ZERO(Var) (Var) = (0)
00082 
00083 /* That's quite simple.  Use the `mftb' instruction.  Note that the value
00084    might not be 100% accurate since there might be some more instructions
00085    running in this moment.  This could be changed by using a barrier like
00086    'lwsync' right before the `mftb' instruciton.  But we are not interested
00087    in accurate clock cycles here so we don't do this.  */
00088 
00089 #define HP_TIMING_NOW(Var)                                     \
00090   do {                                                         \
00091         union { long long ll; long ii[2]; } _var;                     \
00092        long tmp;                                               \
00093         __asm__ __volatile__ (                                        \
00094               "1:    mfspr  %0,269;"                           \
00095               "      mfspr  %1,268;"                           \
00096               "      mfspr  %2,269;"                           \
00097               "      cmpw   %0,%2;"                                   \
00098               "      bne    1b;"                               \
00099               : "=r" (_var.ii[0]), "=r" (_var.ii[1]) , "=r" (tmp)     \
00100               : : "cr0"                                        \
00101               );                                               \
00102        Var = _var.ll;                                                 \
00103   } while (0)
00104 
00105 
00106 /* Use two 'mftb' instructions in a row to find out how long it takes.
00107    On current POWER4, POWER5, and 970 processors mftb take ~10 cycles.  */
00108 #define HP_TIMING_DIFF_INIT() \
00109   do {                                                               \
00110     if (GLRO(dl_hp_timing_overhead) == 0)                            \
00111       {                                                                     \
00112        int __cnt = 5;                                                       \
00113        GLRO(dl_hp_timing_overhead) = ~0ull;                                 \
00114        do                                                            \
00115          {                                                           \
00116            hp_timing_t __t1, __t2;                                   \
00117            HP_TIMING_NOW (__t1);                                     \
00118            HP_TIMING_NOW (__t2);                                     \
00119            if (__t2 - __t1 < GLRO(dl_hp_timing_overhead))                   \
00120              GLRO(dl_hp_timing_overhead) = __t2 - __t1;              \
00121          }                                                           \
00122        while (--__cnt > 0);                                          \
00123       }                                                                     \
00124   } while (0)
00125 
00126 /* It's simple arithmetic in 64-bit.  */
00127 #define HP_TIMING_DIFF(Diff, Start, End)  (Diff) = ((End) - (Start))
00128 
00129 /* We need to insure that this add is atomic in threaded environments.  We use
00130    __arch_atomic_exchange_and_add_64 from atomic.h to get thread safety.  */
00131 #define HP_TIMING_ACCUM(Sum, Diff) \
00132   do {                                                               \
00133     hp_timing_t __diff = (Diff) - GLRO(dl_hp_timing_overhead);              \
00134     __arch_atomic_exchange_and_add_64 (&(Sum), __diff);                       \
00135   } while (0)
00136 
00137 /* No threads, no extra work.  */
00138 #define HP_TIMING_ACCUM_NT(Sum, Diff)     (Sum) += (Diff)
00139 
00140 /* Print the time value.  */
00141 #define HP_TIMING_PRINT(Buf, Len, Val) \
00142   do {                                                               \
00143     char __buf[20];                                                  \
00144     char *__cp = _itoa (Val, __buf + sizeof (__buf), 10, 0);                \
00145     size_t __len = (Len);                                            \
00146     char *__dest = (Buf);                                            \
00147     while (__len-- > 0 && __cp < __buf + sizeof (__buf))                    \
00148       *__dest++ = *__cp++;                                           \
00149     memcpy (__dest, " ticks", MIN (__len, sizeof (" ticks")));  \
00150   } while (0)
00151 
00152 #endif /* hp-timing.h */