Back to index

enigmail  1.4.3
find_OOM_errors.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 usage = """%prog: A test for OOM conditions in the shell.
00004 
00005 %prog finds segfaults and other errors caused by incorrect handling of
00006 allocation during OOM (out-of-memory) conditions.
00007 """
00008 
00009 help = """Check for regressions only. This runs a set of files with a known
00010 number of OOM errors (specified by REGRESSION_COUNT), and exits with a non-zero
00011 result if more or less errors are found. See js/src/Makefile.in for invocation.
00012 """
00013 
00014 
00015 import hashlib
00016 import re
00017 import shlex
00018 import subprocess
00019 import sys
00020 import threading
00021 import time
00022 
00023 from optparse import OptionParser
00024 
00025 #####################################################################
00026 # Utility functions
00027 #####################################################################
00028 def run(args, stdin=None):
00029   class ThreadWorker(threading.Thread):
00030     def __init__(self, pipe):
00031       super(ThreadWorker, self).__init__()
00032       self.all = ""
00033       self.pipe = pipe
00034       self.setDaemon(True)
00035 
00036     def run(self):
00037       while True:
00038         line = self.pipe.readline()
00039         if line == '': break
00040         else:
00041           self.all += line
00042 
00043   try:
00044     if type(args) == str:
00045       args = shlex.split(args)
00046 
00047     args = [str(a) for a in args] # convert to strs
00048 
00049     stdin_pipe = subprocess.PIPE if stdin else None
00050     proc = subprocess.Popen(args, stdin=stdin_pipe, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
00051     if stdin_pipe:
00052       proc.stdin.write(stdin)
00053       proc.stdin.close()
00054 
00055     stdout_worker = ThreadWorker(proc.stdout)
00056     stderr_worker = ThreadWorker(proc.stderr)
00057     stdout_worker.start()
00058     stderr_worker.start()
00059 
00060     proc.wait()
00061     stdout_worker.join()
00062     stderr_worker.join()
00063 
00064   except KeyboardInterrupt, e:
00065     sys.exit(-1)
00066 
00067   stdout, stderr = stdout_worker.all, stderr_worker.all
00068   result = (stdout, stderr, proc.returncode)
00069   return result
00070 
00071 def get_js_files():
00072   (out, err, exit) = run('find ../jit-test/tests -name "*.js"')
00073   if (err, exit) != ("", 0):
00074     sys.exit("Wrong directory, run from an objdir")
00075   return out.split()
00076 
00077 
00078 
00079 #####################################################################
00080 # Blacklisting
00081 #####################################################################
00082 def in_blacklist(sig):
00083   return sig in blacklist
00084 
00085 def add_to_blacklist(sig):
00086   blacklist[sig] = blacklist.get(sig, 0)
00087   blacklist[sig] += 1
00088 
00089 # How often is a particular lines important for this.
00090 def count_lines():
00091   """Keep track of the amount of times individual lines occur, in order to
00092      prioritize the errors which occur most frequently."""
00093   counts = {}
00094   for string,count in blacklist.items():
00095     for line in string.split("\n"):
00096       counts[line] = counts.get(line, 0) + count
00097 
00098   lines = []
00099   for k,v in counts.items():
00100     lines.append("%6d: %s" % (v,k))
00101 
00102   lines.sort()
00103 
00104   countlog = file("../OOM_count_log", "w")
00105   countlog.write("\n".join(lines))
00106   countlog.flush()
00107   countlog.close()
00108 
00109 
00110 #####################################################################
00111 # Output cleaning
00112 #####################################################################
00113 def clean_voutput(err):
00114   # Skip what we can't reproduce
00115   err = re.sub(r"^--\d+-- run: /usr/bin/dsymutil \"shell/js\"$", "", err, flags=re.MULTILINE)
00116   err = re.sub(r"^==\d+==", "", err, flags=re.MULTILINE)
00117   err = re.sub(r"^\*\*\d+\*\*", "", err, flags=re.MULTILINE)
00118   err = re.sub(r"^\s+by 0x[0-9A-Fa-f]+: ", "by: ", err, flags=re.MULTILINE)
00119   err = re.sub(r"^\s+at 0x[0-9A-Fa-f]+: ", "at: ", err, flags=re.MULTILINE)
00120   err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is not stack'd)", r"\1\2", err, flags=re.MULTILINE)
00121   err = re.sub(r"(^\s+Invalid write of size )\d+", r"\1x", err, flags=re.MULTILINE)
00122   err = re.sub(r"(^\s+Invalid read of size )\d+", r"\1x", err, flags=re.MULTILINE)
00123   err = re.sub(r"(^\s+Address 0x)[0-9A-Fa-f]+( is )\d+( bytes inside a block of size )[0-9,]+( free'd)", r"\1\2\3\4", err, flags=re.MULTILINE)
00124 
00125   # Skip the repeating bit due to the segfault
00126   lines = []
00127   for l in err.split('\n'):
00128     if l == " Process terminating with default action of signal 11 (SIGSEGV)":
00129       break
00130     lines.append(l)
00131   err = '\n'.join(lines)
00132 
00133   return err
00134 
00135 def remove_failed_allocation_backtraces(err):
00136   lines = []
00137 
00138   add = True
00139   for l in err.split('\n'):
00140 
00141     # Set start and end conditions for including text
00142     if l == " The site of the failed allocation is:":
00143       add = False
00144     elif l[:2] not in ['by: ', 'at:']:
00145       add = True
00146 
00147     if add:
00148       lines.append(l)
00149 
00150 
00151   err = '\n'.join(lines)
00152 
00153   return err
00154 
00155 
00156 def clean_output(err):
00157   err = re.sub(r"^js\(\d+,0x[0-9a-f]+\) malloc: \*\*\* error for object 0x[0-9a-f]+: pointer being freed was not allocated\n\*\*\* set a breakppoint in malloc_error_break to debug\n$", "pointer being freed was not allocated", err, flags=re.MULTILINE)
00158 
00159   return err
00160 
00161 
00162 #####################################################################
00163 # Consts, etc
00164 #####################################################################
00165 
00166 command_template = 'shell/js' \
00167                  + ' -m -j -p' \
00168                  + ' -e "const platform=\'darwin\'; const libdir=\'../jit-test/lib/\';"' \
00169                  + ' -f ../jit-test/lib/prolog.js' \
00170                  + ' -f %s'
00171 
00172 
00173 # Blacklists are things we don't want to see in our logs again (though we do
00174 # want to count them when they happen). Whitelists we do want to see in our
00175 # logs again, principally because the information we have isn't enough.
00176 
00177 blacklist = {}
00178 add_to_blacklist(r"('', '', 1)") # 1 means OOM if the shell hasn't launched yet.
00179 add_to_blacklist(r"('', 'out of memory\n', 1)")
00180 
00181 whitelist = set()
00182 whitelist.add(r"('', 'out of memory\n', -11)") # -11 means OOM
00183 whitelist.add(r"('', 'out of memory\nout of memory\n', -11)")
00184 
00185 
00186 
00187 #####################################################################
00188 # Program
00189 #####################################################################
00190 
00191 # Options
00192 parser = OptionParser(usage=usage)
00193 parser.add_option("-r", "--regression", action="store", metavar="REGRESSION_COUNT", help=help,
00194                   type="int", dest="regression", default=None)
00195                   
00196 (OPTIONS, args) = parser.parse_args()
00197 
00198 
00199 if OPTIONS.regression != None:
00200   # TODO: This should be expanded as we get a better hang of the OOM problems.
00201   # For now, we'll just check that the number of OOMs in one short file does not
00202   # increase.
00203   files = ["../jit-test/tests/arguments/args-createontrace.js"]
00204 else:
00205   files = get_js_files()
00206 
00207   # Use a command-line arg to reduce the set of files
00208   if len (args):
00209     files = [f for f in files if f.find(args[0]) != -1]
00210 
00211 
00212 if OPTIONS.regression == None:
00213   # Don't use a logfile, this is automated for tinderbox.
00214   log = file("../OOM_log", "w")
00215 
00216 
00217 num_failures = 0
00218 for f in files:
00219 
00220   # Run it once to establish boundaries
00221   command = (command_template + ' -O') % (f)
00222   out, err, exit = run(command)
00223   max = re.match(".*OOM max count: (\d+).*", out, flags=re.DOTALL).groups()[0]
00224   max = int(max)
00225   
00226   # OOMs don't recover well for the first 20 allocations or so.
00227   # TODO: revisit this.
00228   for i in range(20, max): 
00229 
00230     if OPTIONS.regression == None:
00231       print "Testing allocation %d/%d in %s" % (i,max,f)
00232     else:
00233       sys.stdout.write('.') # something short for tinderbox, no space or \n
00234 
00235     command = (command_template + ' -A %d') % (f, i)
00236     out, err, exit = run(command)
00237 
00238     # Success (5 is SM's exit code for controlled errors)
00239     if exit == 5 and err.find("out of memory") != -1:
00240       continue
00241 
00242     # Failure
00243     else:
00244 
00245       if OPTIONS.regression != None:
00246         # Just count them
00247         num_failures += 1
00248         continue
00249 
00250       #########################################################################
00251       # The regression tests ends above. The rest of this is for running  the
00252       # script manually.
00253       #########################################################################
00254 
00255       problem = str((out, err, exit))
00256       if in_blacklist(problem) and problem not in whitelist:
00257         add_to_blacklist(problem)
00258         continue
00259 
00260       add_to_blacklist(problem)
00261 
00262 
00263       # Get valgrind output for a good stack trace
00264       vcommand = "valgrind --dsymutil=yes -q --log-file=OOM_valgrind_log_file " + command
00265       run(vcommand)
00266       vout = file("OOM_valgrind_log_file").read()
00267       vout = clean_voutput(vout)
00268       sans_alloc_sites = remove_failed_allocation_backtraces(vout)
00269 
00270       # Don't print duplicate information
00271       if in_blacklist(sans_alloc_sites):
00272         add_to_blacklist(sans_alloc_sites)
00273         continue
00274 
00275       add_to_blacklist(sans_alloc_sites)
00276 
00277       log.write ("\n")
00278       log.write ("\n")
00279       log.write ("=========================================================================")
00280       log.write ("\n")
00281       log.write ("An allocation failure at\n\tallocation %d/%d in %s\n\tcauses problems (detected using bug 624094)" % (i, max, f))
00282       log.write ("\n")
00283       log.write ("\n")
00284 
00285       log.write ("Command (from obj directory, using patch from bug 624094):\n  " + command)
00286       log.write ("\n")
00287       log.write ("\n")
00288       log.write ("stdout, stderr, exitcode:\n  " + problem)
00289       log.write ("\n")
00290       log.write ("\n")
00291 
00292       double_free = err.find("pointer being freed was not allocated") != -1
00293       oom_detected = err.find("out of memory") != -1
00294       multiple_oom_detected = err.find("out of memory\nout of memory") != -1
00295       segfault_detected = exit == -11
00296 
00297       log.write ("Diagnosis: ")
00298       log.write ("\n")
00299       if multiple_oom_detected:
00300         log.write ("  - Multiple OOMs reported")
00301         log.write ("\n")
00302       if segfault_detected:
00303         log.write ("  - segfault")
00304         log.write ("\n")
00305       if not oom_detected:
00306         log.write ("  - No OOM checking")
00307         log.write ("\n")
00308       if double_free:
00309         log.write ("  - Double free")
00310         log.write ("\n")
00311 
00312       log.write ("\n")
00313 
00314       log.write ("Valgrind info:\n" + vout)
00315       log.write ("\n")
00316       log.write ("\n")
00317       log.flush()
00318 
00319   if OPTIONS.regression == None:
00320     count_lines()
00321 
00322 print '\n',
00323 
00324 # Do the actual regression check
00325 if OPTIONS.regression != None:
00326   expected_num_failures = OPTIONS.regression
00327 
00328   if num_failures != expected_num_failures:
00329 
00330     print "TEST-UNEXPECTED-FAIL |",
00331     if num_failures > expected_num_failures:
00332       print "More out-of-memory errors were found (%s) than expected (%d). This probably means an allocation site has been added without a NULL-check. If this is unavoidable, you can account for it by updating Makefile.in." % (num_failures, expected_num_failures),
00333     else:
00334       print "Congratulations, you have removed %d out-of-memory error(s) (%d remain)! Please account for it by updating Makefile.in." % (expected_num_failures - num_failures, num_failures),
00335     sys.exit(-1)
00336   else:
00337     print 'TEST-PASS | find_OOM_errors | Found the expected number of OOM errors (%d)' % (expected_num_failures)
00338