Back to index

python3.2  3.2.2
profile.py
Go to the documentation of this file.
00001 #! /usr/bin/env python3
00002 #
00003 # Class for profiling python code. rev 1.0  6/2/94
00004 #
00005 # Written by James Roskind
00006 # Based on prior profile module by Sjoerd Mullender...
00007 #   which was hacked somewhat by: Guido van Rossum
00008 
00009 """Class for profiling Python code."""
00010 
00011 # Copyright Disney Enterprises, Inc.  All Rights Reserved.
00012 # Licensed to PSF under a Contributor Agreement
00013 #
00014 # Licensed under the Apache License, Version 2.0 (the "License");
00015 # you may not use this file except in compliance with the License.
00016 # You may obtain a copy of the License at
00017 #
00018 # http://www.apache.org/licenses/LICENSE-2.0
00019 #
00020 # Unless required by applicable law or agreed to in writing, software
00021 # distributed under the License is distributed on an "AS IS" BASIS,
00022 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
00023 # either express or implied.  See the License for the specific language
00024 # governing permissions and limitations under the License.
00025 
00026 
00027 import sys
00028 import os
00029 import time
00030 import marshal
00031 from optparse import OptionParser
00032 
00033 __all__ = ["run", "runctx", "Profile"]
00034 
00035 # Sample timer for use with
00036 #i_count = 0
00037 #def integer_timer():
00038 #       global i_count
00039 #       i_count = i_count + 1
00040 #       return i_count
00041 #itimes = integer_timer # replace with C coded timer returning integers
00042 
00043 #**************************************************************************
00044 # The following are the static member functions for the profiler class
00045 # Note that an instance of Profile() is *not* needed to call them.
00046 #**************************************************************************
00047 
00048 def run(statement, filename=None, sort=-1):
00049     """Run statement under profiler optionally saving results in filename
00050 
00051     This function takes a single argument that can be passed to the
00052     "exec" statement, and an optional file name.  In all cases this
00053     routine attempts to "exec" its first argument and gather profiling
00054     statistics from the execution. If no file name is present, then this
00055     function automatically prints a simple profiling report, sorted by the
00056     standard name string (file/line/function-name) that is presented in
00057     each line.
00058     """
00059     prof = Profile()
00060     try:
00061         prof = prof.run(statement)
00062     except SystemExit:
00063         pass
00064     if filename is not None:
00065         prof.dump_stats(filename)
00066     else:
00067         return prof.print_stats(sort)
00068 
00069 def runctx(statement, globals, locals, filename=None, sort=-1):
00070     """Run statement under profiler, supplying your own globals and locals,
00071     optionally saving results in filename.
00072 
00073     statement and filename have the same semantics as profile.run
00074     """
00075     prof = Profile()
00076     try:
00077         prof = prof.runctx(statement, globals, locals)
00078     except SystemExit:
00079         pass
00080 
00081     if filename is not None:
00082         prof.dump_stats(filename)
00083     else:
00084         return prof.print_stats(sort)
00085 
00086 if hasattr(os, "times"):
00087     def _get_time_times(timer=os.times):
00088         t = timer()
00089         return t[0] + t[1]
00090 
00091 # Using getrusage(3) is better than clock(3) if available:
00092 # on some systems (e.g. FreeBSD), getrusage has a higher resolution
00093 # Furthermore, on a POSIX system, returns microseconds, which
00094 # wrap around after 36min.
00095 _has_res = 0
00096 try:
00097     import resource
00098     resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
00099     def _get_time_resource(timer=resgetrusage):
00100         t = timer()
00101         return t[0] + t[1]
00102     _has_res = 1
00103 except ImportError:
00104     pass
00105 
00106 class Profile:
00107     """Profiler class.
00108 
00109     self.cur is always a tuple.  Each such tuple corresponds to a stack
00110     frame that is currently active (self.cur[-2]).  The following are the
00111     definitions of its members.  We use this external "parallel stack" to
00112     avoid contaminating the program that we are profiling. (old profiler
00113     used to write into the frames local dictionary!!) Derived classes
00114     can change the definition of some entries, as long as they leave
00115     [-2:] intact (frame and previous tuple).  In case an internal error is
00116     detected, the -3 element is used as the function name.
00117 
00118     [ 0] = Time that needs to be charged to the parent frame's function.
00119            It is used so that a function call will not have to access the
00120            timing data for the parent frame.
00121     [ 1] = Total time spent in this frame's function, excluding time in
00122            subfunctions (this latter is tallied in cur[2]).
00123     [ 2] = Total time spent in subfunctions, excluding time executing the
00124            frame's function (this latter is tallied in cur[1]).
00125     [-3] = Name of the function that corresponds to this frame.
00126     [-2] = Actual frame that we correspond to (used to sync exception handling).
00127     [-1] = Our parent 6-tuple (corresponds to frame.f_back).
00128 
00129     Timing data for each function is stored as a 5-tuple in the dictionary
00130     self.timings[].  The index is always the name stored in self.cur[-3].
00131     The following are the definitions of the members:
00132 
00133     [0] = The number of times this function was called, not counting direct
00134           or indirect recursion,
00135     [1] = Number of times this function appears on the stack, minus one
00136     [2] = Total time spent internal to this function
00137     [3] = Cumulative time that this function was present on the stack.  In
00138           non-recursive functions, this is the total execution time from start
00139           to finish of each invocation of a function, including time spent in
00140           all subfunctions.
00141     [4] = A dictionary indicating for each function name, the number of times
00142           it was called by us.
00143     """
00144 
00145     bias = 0  # calibration constant
00146 
00147     def __init__(self, timer=None, bias=None):
00148         self.timings = {}
00149         self.cur = None
00150         self.cmd = ""
00151         self.c_func_name = ""
00152 
00153         if bias is None:
00154             bias = self.bias
00155         self.bias = bias     # Materialize in local dict for lookup speed.
00156 
00157         if not timer:
00158             if _has_res:
00159                 self.timer = resgetrusage
00160                 self.dispatcher = self.trace_dispatch
00161                 self.get_time = _get_time_resource
00162             elif hasattr(time, 'clock'):
00163                 self.timer = self.get_time = time.clock
00164                 self.dispatcher = self.trace_dispatch_i
00165             elif hasattr(os, 'times'):
00166                 self.timer = os.times
00167                 self.dispatcher = self.trace_dispatch
00168                 self.get_time = _get_time_times
00169             else:
00170                 self.timer = self.get_time = time.time
00171                 self.dispatcher = self.trace_dispatch_i
00172         else:
00173             self.timer = timer
00174             t = self.timer() # test out timer function
00175             try:
00176                 length = len(t)
00177             except TypeError:
00178                 self.get_time = timer
00179                 self.dispatcher = self.trace_dispatch_i
00180             else:
00181                 if length == 2:
00182                     self.dispatcher = self.trace_dispatch
00183                 else:
00184                     self.dispatcher = self.trace_dispatch_l
00185                 # This get_time() implementation needs to be defined
00186                 # here to capture the passed-in timer in the parameter
00187                 # list (for performance).  Note that we can't assume
00188                 # the timer() result contains two values in all
00189                 # cases.
00190                 def get_time_timer(timer=timer, sum=sum):
00191                     return sum(timer())
00192                 self.get_time = get_time_timer
00193         self.t = self.get_time()
00194         self.simulate_call('profiler')
00195 
00196     # Heavily optimized dispatch routine for os.times() timer
00197 
00198     def trace_dispatch(self, frame, event, arg):
00199         timer = self.timer
00200         t = timer()
00201         t = t[0] + t[1] - self.t - self.bias
00202 
00203         if event == "c_call":
00204             self.c_func_name = arg.__name__
00205 
00206         if self.dispatch[event](self, frame,t):
00207             t = timer()
00208             self.t = t[0] + t[1]
00209         else:
00210             r = timer()
00211             self.t = r[0] + r[1] - t # put back unrecorded delta
00212 
00213     # Dispatch routine for best timer program (return = scalar, fastest if
00214     # an integer but float works too -- and time.clock() relies on that).
00215 
00216     def trace_dispatch_i(self, frame, event, arg):
00217         timer = self.timer
00218         t = timer() - self.t - self.bias
00219 
00220         if event == "c_call":
00221             self.c_func_name = arg.__name__
00222 
00223         if self.dispatch[event](self, frame, t):
00224             self.t = timer()
00225         else:
00226             self.t = timer() - t  # put back unrecorded delta
00227 
00228     # Dispatch routine for macintosh (timer returns time in ticks of
00229     # 1/60th second)
00230 
00231     def trace_dispatch_mac(self, frame, event, arg):
00232         timer = self.timer
00233         t = timer()/60.0 - self.t - self.bias
00234 
00235         if event == "c_call":
00236             self.c_func_name = arg.__name__
00237 
00238         if self.dispatch[event](self, frame, t):
00239             self.t = timer()/60.0
00240         else:
00241             self.t = timer()/60.0 - t  # put back unrecorded delta
00242 
00243     # SLOW generic dispatch routine for timer returning lists of numbers
00244 
00245     def trace_dispatch_l(self, frame, event, arg):
00246         get_time = self.get_time
00247         t = get_time() - self.t - self.bias
00248 
00249         if event == "c_call":
00250             self.c_func_name = arg.__name__
00251 
00252         if self.dispatch[event](self, frame, t):
00253             self.t = get_time()
00254         else:
00255             self.t = get_time() - t # put back unrecorded delta
00256 
00257     # In the event handlers, the first 3 elements of self.cur are unpacked
00258     # into vrbls w/ 3-letter names.  The last two characters are meant to be
00259     # mnemonic:
00260     #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
00261     #     _it  self.cur[1] "internal time" time spent directly in the function
00262     #     _et  self.cur[2] "external time" time spent in subfunctions
00263 
00264     def trace_dispatch_exception(self, frame, t):
00265         rpt, rit, ret, rfn, rframe, rcur = self.cur
00266         if (rframe is not frame) and rcur:
00267             return self.trace_dispatch_return(rframe, t)
00268         self.cur = rpt, rit+t, ret, rfn, rframe, rcur
00269         return 1
00270 
00271 
00272     def trace_dispatch_call(self, frame, t):
00273         if self.cur and frame.f_back is not self.cur[-2]:
00274             rpt, rit, ret, rfn, rframe, rcur = self.cur
00275             if not isinstance(rframe, Profile.fake_frame):
00276                 assert rframe.f_back is frame.f_back, ("Bad call", rfn,
00277                                                        rframe, rframe.f_back,
00278                                                        frame, frame.f_back)
00279                 self.trace_dispatch_return(rframe, 0)
00280                 assert (self.cur is None or \
00281                         frame.f_back is self.cur[-2]), ("Bad call",
00282                                                         self.cur[-3])
00283         fcode = frame.f_code
00284         fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
00285         self.cur = (t, 0, 0, fn, frame, self.cur)
00286         timings = self.timings
00287         if fn in timings:
00288             cc, ns, tt, ct, callers = timings[fn]
00289             timings[fn] = cc, ns + 1, tt, ct, callers
00290         else:
00291             timings[fn] = 0, 0, 0, 0, {}
00292         return 1
00293 
00294     def trace_dispatch_c_call (self, frame, t):
00295         fn = ("", 0, self.c_func_name)
00296         self.cur = (t, 0, 0, fn, frame, self.cur)
00297         timings = self.timings
00298         if fn in timings:
00299             cc, ns, tt, ct, callers = timings[fn]
00300             timings[fn] = cc, ns+1, tt, ct, callers
00301         else:
00302             timings[fn] = 0, 0, 0, 0, {}
00303         return 1
00304 
00305     def trace_dispatch_return(self, frame, t):
00306         if frame is not self.cur[-2]:
00307             assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
00308             self.trace_dispatch_return(self.cur[-2], 0)
00309 
00310         # Prefix "r" means part of the Returning or exiting frame.
00311         # Prefix "p" means part of the Previous or Parent or older frame.
00312 
00313         rpt, rit, ret, rfn, frame, rcur = self.cur
00314         rit = rit + t
00315         frame_total = rit + ret
00316 
00317         ppt, pit, pet, pfn, pframe, pcur = rcur
00318         self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
00319 
00320         timings = self.timings
00321         cc, ns, tt, ct, callers = timings[rfn]
00322         if not ns:
00323             # This is the only occurrence of the function on the stack.
00324             # Else this is a (directly or indirectly) recursive call, and
00325             # its cumulative time will get updated when the topmost call to
00326             # it returns.
00327             ct = ct + frame_total
00328             cc = cc + 1
00329 
00330         if pfn in callers:
00331             callers[pfn] = callers[pfn] + 1  # hack: gather more
00332             # stats such as the amount of time added to ct courtesy
00333             # of this specific call, and the contribution to cc
00334             # courtesy of this call.
00335         else:
00336             callers[pfn] = 1
00337 
00338         timings[rfn] = cc, ns - 1, tt + rit, ct, callers
00339 
00340         return 1
00341 
00342 
00343     dispatch = {
00344         "call": trace_dispatch_call,
00345         "exception": trace_dispatch_exception,
00346         "return": trace_dispatch_return,
00347         "c_call": trace_dispatch_c_call,
00348         "c_exception": trace_dispatch_return,  # the C function returned
00349         "c_return": trace_dispatch_return,
00350         }
00351 
00352 
00353     # The next few functions play with self.cmd. By carefully preloading
00354     # our parallel stack, we can force the profiled result to include
00355     # an arbitrary string as the name of the calling function.
00356     # We use self.cmd as that string, and the resulting stats look
00357     # very nice :-).
00358 
00359     def set_cmd(self, cmd):
00360         if self.cur[-1]: return   # already set
00361         self.cmd = cmd
00362         self.simulate_call(cmd)
00363 
00364     class fake_code:
00365         def __init__(self, filename, line, name):
00366             self.co_filename = filename
00367             self.co_line = line
00368             self.co_name = name
00369             self.co_firstlineno = 0
00370 
00371         def __repr__(self):
00372             return repr((self.co_filename, self.co_line, self.co_name))
00373 
00374     class fake_frame:
00375         def __init__(self, code, prior):
00376             self.f_code = code
00377             self.f_back = prior
00378 
00379     def simulate_call(self, name):
00380         code = self.fake_code('profile', 0, name)
00381         if self.cur:
00382             pframe = self.cur[-2]
00383         else:
00384             pframe = None
00385         frame = self.fake_frame(code, pframe)
00386         self.dispatch['call'](self, frame, 0)
00387 
00388     # collect stats from pending stack, including getting final
00389     # timings for self.cmd frame.
00390 
00391     def simulate_cmd_complete(self):
00392         get_time = self.get_time
00393         t = get_time() - self.t
00394         while self.cur[-1]:
00395             # We *can* cause assertion errors here if
00396             # dispatch_trace_return checks for a frame match!
00397             self.dispatch['return'](self, self.cur[-2], t)
00398             t = 0
00399         self.t = get_time() - t
00400 
00401 
00402     def print_stats(self, sort=-1):
00403         import pstats
00404         pstats.Stats(self).strip_dirs().sort_stats(sort). \
00405                   print_stats()
00406 
00407     def dump_stats(self, file):
00408         f = open(file, 'wb')
00409         self.create_stats()
00410         marshal.dump(self.stats, f)
00411         f.close()
00412 
00413     def create_stats(self):
00414         self.simulate_cmd_complete()
00415         self.snapshot_stats()
00416 
00417     def snapshot_stats(self):
00418         self.stats = {}
00419         for func, (cc, ns, tt, ct, callers) in self.timings.items():
00420             callers = callers.copy()
00421             nc = 0
00422             for callcnt in callers.values():
00423                 nc += callcnt
00424             self.stats[func] = cc, nc, tt, ct, callers
00425 
00426 
00427     # The following two methods can be called by clients to use
00428     # a profiler to profile a statement, given as a string.
00429 
00430     def run(self, cmd):
00431         import __main__
00432         dict = __main__.__dict__
00433         return self.runctx(cmd, dict, dict)
00434 
00435     def runctx(self, cmd, globals, locals):
00436         self.set_cmd(cmd)
00437         sys.setprofile(self.dispatcher)
00438         try:
00439             exec(cmd, globals, locals)
00440         finally:
00441             sys.setprofile(None)
00442         return self
00443 
00444     # This method is more useful to profile a single function call.
00445     def runcall(self, func, *args, **kw):
00446         self.set_cmd(repr(func))
00447         sys.setprofile(self.dispatcher)
00448         try:
00449             return func(*args, **kw)
00450         finally:
00451             sys.setprofile(None)
00452 
00453 
00454     #******************************************************************
00455     # The following calculates the overhead for using a profiler.  The
00456     # problem is that it takes a fair amount of time for the profiler
00457     # to stop the stopwatch (from the time it receives an event).
00458     # Similarly, there is a delay from the time that the profiler
00459     # re-starts the stopwatch before the user's code really gets to
00460     # continue.  The following code tries to measure the difference on
00461     # a per-event basis.
00462     #
00463     # Note that this difference is only significant if there are a lot of
00464     # events, and relatively little user code per event.  For example,
00465     # code with small functions will typically benefit from having the
00466     # profiler calibrated for the current platform.  This *could* be
00467     # done on the fly during init() time, but it is not worth the
00468     # effort.  Also note that if too large a value specified, then
00469     # execution time on some functions will actually appear as a
00470     # negative number.  It is *normal* for some functions (with very
00471     # low call counts) to have such negative stats, even if the
00472     # calibration figure is "correct."
00473     #
00474     # One alternative to profile-time calibration adjustments (i.e.,
00475     # adding in the magic little delta during each event) is to track
00476     # more carefully the number of events (and cumulatively, the number
00477     # of events during sub functions) that are seen.  If this were
00478     # done, then the arithmetic could be done after the fact (i.e., at
00479     # display time).  Currently, we track only call/return events.
00480     # These values can be deduced by examining the callees and callers
00481     # vectors for each functions.  Hence we *can* almost correct the
00482     # internal time figure at print time (note that we currently don't
00483     # track exception event processing counts).  Unfortunately, there
00484     # is currently no similar information for cumulative sub-function
00485     # time.  It would not be hard to "get all this info" at profiler
00486     # time.  Specifically, we would have to extend the tuples to keep
00487     # counts of this in each frame, and then extend the defs of timing
00488     # tuples to include the significant two figures. I'm a bit fearful
00489     # that this additional feature will slow the heavily optimized
00490     # event/time ratio (i.e., the profiler would run slower, fur a very
00491     # low "value added" feature.)
00492     #**************************************************************
00493 
00494     def calibrate(self, m, verbose=0):
00495         if self.__class__ is not Profile:
00496             raise TypeError("Subclasses must override .calibrate().")
00497 
00498         saved_bias = self.bias
00499         self.bias = 0
00500         try:
00501             return self._calibrate_inner(m, verbose)
00502         finally:
00503             self.bias = saved_bias
00504 
00505     def _calibrate_inner(self, m, verbose):
00506         get_time = self.get_time
00507 
00508         # Set up a test case to be run with and without profiling.  Include
00509         # lots of calls, because we're trying to quantify stopwatch overhead.
00510         # Do not raise any exceptions, though, because we want to know
00511         # exactly how many profile events are generated (one call event, +
00512         # one return event, per Python-level call).
00513 
00514         def f1(n):
00515             for i in range(n):
00516                 x = 1
00517 
00518         def f(m, f1=f1):
00519             for i in range(m):
00520                 f1(100)
00521 
00522         f(m)    # warm up the cache
00523 
00524         # elapsed_noprofile <- time f(m) takes without profiling.
00525         t0 = get_time()
00526         f(m)
00527         t1 = get_time()
00528         elapsed_noprofile = t1 - t0
00529         if verbose:
00530             print("elapsed time without profiling =", elapsed_noprofile)
00531 
00532         # elapsed_profile <- time f(m) takes with profiling.  The difference
00533         # is profiling overhead, only some of which the profiler subtracts
00534         # out on its own.
00535         p = Profile()
00536         t0 = get_time()
00537         p.runctx('f(m)', globals(), locals())
00538         t1 = get_time()
00539         elapsed_profile = t1 - t0
00540         if verbose:
00541             print("elapsed time with profiling =", elapsed_profile)
00542 
00543         # reported_time <- "CPU seconds" the profiler charged to f and f1.
00544         total_calls = 0.0
00545         reported_time = 0.0
00546         for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
00547                 p.timings.items():
00548             if funcname in ("f", "f1"):
00549                 total_calls += cc
00550                 reported_time += tt
00551 
00552         if verbose:
00553             print("'CPU seconds' profiler reported =", reported_time)
00554             print("total # calls =", total_calls)
00555         if total_calls != m + 1:
00556             raise ValueError("internal error: total calls = %d" % total_calls)
00557 
00558         # reported_time - elapsed_noprofile = overhead the profiler wasn't
00559         # able to measure.  Divide by twice the number of calls (since there
00560         # are two profiler events per call in this test) to get the hidden
00561         # overhead per event.
00562         mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
00563         if verbose:
00564             print("mean stopwatch overhead per profile event =", mean)
00565         return mean
00566 
00567 #****************************************************************************
00568 
00569 def main():
00570     usage = "profile.py [-o output_file_path] [-s sort] scriptfile [arg] ..."
00571     parser = OptionParser(usage=usage)
00572     parser.allow_interspersed_args = False
00573     parser.add_option('-o', '--outfile', dest="outfile",
00574         help="Save stats to <outfile>", default=None)
00575     parser.add_option('-s', '--sort', dest="sort",
00576         help="Sort order when printing to stdout, based on pstats.Stats class",
00577         default=-1)
00578 
00579     if not sys.argv[1:]:
00580         parser.print_usage()
00581         sys.exit(2)
00582 
00583     (options, args) = parser.parse_args()
00584     sys.argv[:] = args
00585 
00586     if len(args) > 0:
00587         progname = args[0]
00588         sys.path.insert(0, os.path.dirname(progname))
00589         with open(progname, 'rb') as fp:
00590             code = compile(fp.read(), progname, 'exec')
00591         globs = {
00592             '__file__': progname,
00593             '__name__': '__main__',
00594             '__package__': None,
00595             '__cached__': None,
00596         }
00597         runctx(code, globs, None, options.outfile, options.sort)
00598     else:
00599         parser.print_usage()
00600     return parser
00601 
00602 # When invoked as main program, invoke the profiler on a script
00603 if __name__ == '__main__':
00604     main()