Back to index

apport  2.3
report.py
Go to the documentation of this file.
00001 '''Representation of and data collection for a problem report.'''
00002 
00003 # Copyright (C) 2006 - 2009 Canonical Ltd.
00004 # Author: Martin Pitt <martin.pitt@ubuntu.com>
00005 #
00006 # This program is free software; you can redistribute it and/or modify it
00007 # under the terms of the GNU General Public License as published by the
00008 # Free Software Foundation; either version 2 of the License, or (at your
00009 # option) any later version.  See http://www.gnu.org/copyleft/gpl.html for
00010 # the full text of the license.
00011 
00012 import subprocess, tempfile, os.path, re, pwd, grp, os
00013 import fnmatch, glob, traceback, errno, sys
00014 
00015 import xml.dom, xml.dom.minidom
00016 from xml.parsers.expat import ExpatError
00017 
00018 if sys.version > '3':
00019     _python2 = False
00020     from urllib.error import URLError
00021     from urllib.request import urlopen
00022     (urlopen, URLError)  # pyflakes
00023 else:
00024     _python2 = True
00025     from urllib import urlopen
00026     URLError = IOError
00027 
00028 import problem_report
00029 import apport
00030 import apport.fileutils
00031 from apport.packaging_impl import impl as packaging
00032 
00033 _data_dir = os.environ.get('APPORT_DATA_DIR', '/usr/share/apport')
00034 _hook_dir = '%s/package-hooks/' % (_data_dir)
00035 _common_hook_dir = '%s/general-hooks/' % (_data_dir)
00036 
00037 # path of the ignore file
00038 _ignore_file = '~/.apport-ignore.xml'
00039 
00040 # system-wide blacklist
00041 _blacklist_dir = '/etc/apport/blacklist.d'
00042 _whitelist_dir = '/etc/apport/whitelist.d'
00043 
00044 # programs that we consider interpreters
00045 interpreters = ['sh', 'bash', 'dash', 'csh', 'tcsh', 'python*',
00046                 'ruby*', 'php', 'perl*', 'mono*', 'awk']
00047 
00048 #
00049 # helper functions
00050 #
00051 
00052 
00053 def _transitive_dependencies(package, depends_set):
00054     '''Recursively add dependencies of package to depends_set.'''
00055 
00056     try:
00057         packaging.get_version(package)
00058     except ValueError:
00059         return
00060     for d in packaging.get_dependencies(package):
00061         if not d in depends_set:
00062             depends_set.add(d)
00063             _transitive_dependencies(d, depends_set)
00064 
00065 
00066 def _read_file(path):
00067     '''Read file content.
00068 
00069     Return its content, or return a textual error if it failed.
00070     '''
00071     try:
00072         with open(path) as fd:
00073             return fd.read().strip()
00074     except (OSError, IOError) as e:
00075         return 'Error: ' + str(e)
00076 
00077 
00078 def _read_maps(pid):
00079     '''Read /proc/pid/maps.
00080 
00081     Since /proc/$pid/maps may become unreadable unless we are ptracing the
00082     process, detect this, and attempt to attach/detach.
00083     '''
00084     maps = 'Error: unable to read /proc maps file'
00085     try:
00086         with open('/proc/%d/maps' % pid) as fd:
00087             maps = fd.read().strip()
00088     except (OSError, IOError) as e:
00089         return 'Error: ' + str(e)
00090     return maps
00091 
00092 
00093 def _command_output(command, input=None, stderr=subprocess.STDOUT):
00094     '''Run command and capture its output.
00095 
00096     Try to execute given command (argv list) and return its stdout, or return
00097     a textual error if it failed.
00098     '''
00099     sp = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=stderr)
00100 
00101     (out, err) = sp.communicate(input)
00102     if sp.returncode == 0:
00103         return out
00104     else:
00105         if err:
00106             err = err.decode('UTF-8', errors='replace')
00107         else:
00108             err = ''
00109         raise OSError('Error: command %s failed with exit code %i: %s' % (
00110             str(command), sp.returncode, err))
00111 
00112 
00113 def _check_bug_pattern(report, pattern):
00114     '''Check if given report matches the given bug pattern XML DOM node.
00115 
00116     Return the bug URL on match, otherwise None.
00117     '''
00118     if _python2:
00119         if not pattern.attributes.has_key('url'):
00120             return None
00121     else:
00122         if 'url' not in pattern.attributes:
00123             return None
00124 
00125     for c in pattern.childNodes:
00126         # regular expression condition
00127         if c.nodeType == xml.dom.Node.ELEMENT_NODE and c.nodeName == 're':
00128             try:
00129                 key = c.attributes['key'].nodeValue
00130             except KeyError:
00131                 continue
00132             if key not in report:
00133                 return None
00134             c.normalize()
00135             if c.hasChildNodes() and c.childNodes[0].nodeType == xml.dom.Node.TEXT_NODE:
00136                 regexp = c.childNodes[0].nodeValue
00137                 v = report[key]
00138                 if isinstance(v, problem_report.CompressedValue):
00139                     v = v.get_value()
00140                     regexp = regexp.encode('UTF-8')
00141                 try:
00142                     re_c = re.compile(regexp)
00143                 except:
00144                     continue
00145                 if not re_c.search(v):
00146                     return None
00147 
00148     return pattern.attributes['url'].nodeValue
00149 
00150 
00151 def _check_bug_patterns(report, patterns):
00152     try:
00153         dom = xml.dom.minidom.parseString(patterns)
00154     except (ExpatError, UnicodeEncodeError):
00155         return None
00156 
00157     for pattern in dom.getElementsByTagName('pattern'):
00158         url = _check_bug_pattern(report, pattern)
00159         if url:
00160             return url
00161 
00162     return None
00163 
00164 
00165 def _dom_remove_space(node):
00166     '''Recursively remove whitespace from given XML DOM node.'''
00167 
00168     for c in node.childNodes:
00169         if c.nodeType == xml.dom.Node.TEXT_NODE and c.nodeValue.strip() == '':
00170             c.unlink()
00171             node.removeChild(c)
00172         else:
00173             _dom_remove_space(c)
00174 
00175 #
00176 # Report class
00177 #
00178 
00179 
00180 class Report(problem_report.ProblemReport):
00181     '''A problem report specific to apport (crash or bug).
00182 
00183     This class wraps a standard ProblemReport and adds methods for collecting
00184     standard debugging data.'''
00185 
00186     def __init__(self, type='Crash', date=None):
00187         '''Initialize a fresh problem report.
00188 
00189         date is the desired date/time string; if None (default), the current
00190         local time is used.
00191 
00192         If the report is attached to a process ID, this should be set in
00193         self.pid, so that e. g. hooks can use it to collect additional data.
00194         '''
00195         problem_report.ProblemReport.__init__(self, type, date)
00196         self.pid = None
00197         self._proc_maps_cache = None
00198 
00199     def _customized_package_suffix(self, package):
00200         '''Return a string suitable for appending to Package/Dependencies.
00201 
00202         If package has only unmodified files, return the empty string. If not,
00203         return ' [modified: ...]' with a list of modified files.
00204         '''
00205         suffix = ''
00206         mod = packaging.get_modified_files(package)
00207         if mod:
00208             suffix += ' [modified: %s]' % ' '.join(mod)
00209         try:
00210             if not packaging.is_distro_package(package):
00211                 origin = packaging.get_package_origin(package)
00212                 if origin:
00213                     suffix += ' [origin: %s]' % origin
00214         except ValueError:
00215             # no-op for nonexisting packages
00216             pass
00217 
00218         return suffix
00219 
00220     def add_package_info(self, package=None):
00221         '''Add packaging information.
00222 
00223         If package is not given, the report must have ExecutablePath.
00224         This adds:
00225         - Package: package name and installed version
00226         - SourcePackage: source package name
00227         - PackageArchitecture: processor architecture this package was built
00228           for
00229         - Dependencies: package names and versions of all dependencies and
00230           pre-dependencies; this also checks if the files are unmodified and
00231           appends a list of all modified files
00232         '''
00233         if not package:
00234             # the kernel does not have a executable path but a package
00235             if not 'ExecutablePath' in self and self['ProblemType'] == 'KernelCrash':
00236                 package = self['Package']
00237             else:
00238                 package = apport.fileutils.find_file_package(self['ExecutablePath'])
00239             if not package:
00240                 return
00241 
00242         try:
00243             version = packaging.get_version(package)
00244         except ValueError:
00245             # package not installed
00246             version = None
00247         self['Package'] = '%s %s%s' % (package, version or '(not installed)',
00248                                        self._customized_package_suffix(package))
00249         if version or 'SourcePackage' not in self:
00250             self['SourcePackage'] = packaging.get_source(package)
00251         if not version:
00252             return
00253 
00254         self['PackageArchitecture'] = packaging.get_architecture(package)
00255 
00256         # get set of all transitive dependencies
00257         dependencies = set([])
00258         _transitive_dependencies(package, dependencies)
00259 
00260         # get dependency versions
00261         self['Dependencies'] = ''
00262         for dep in sorted(dependencies):
00263             try:
00264                 v = packaging.get_version(dep)
00265             except ValueError:
00266                 # can happen with uninstalled alternate dependencies
00267                 continue
00268 
00269             if self['Dependencies']:
00270                 self['Dependencies'] += '\n'
00271             self['Dependencies'] += '%s %s%s' % (
00272                 dep, v, self._customized_package_suffix(dep))
00273 
00274     def add_os_info(self):
00275         '''Add operating system information.
00276 
00277         This adds:
00278         - DistroRelease: lsb_release -sir output
00279         - Architecture: system architecture in distro specific notation
00280         - Uname: uname -srm output
00281         - NonfreeKernelModules: loaded kernel modules which are not free (if
00282             there are none, this field will not be present)
00283         '''
00284         p = subprocess.Popen(['lsb_release', '-sir'], stdout=subprocess.PIPE,
00285                              stderr=subprocess.PIPE)
00286         self['DistroRelease'] = p.communicate()[0].decode().strip().replace('\n', ' ')
00287 
00288         u = os.uname()
00289         self['Uname'] = '%s %s %s' % (u[0], u[2], u[4])
00290         self['Architecture'] = packaging.get_system_architecture()
00291 
00292     def add_user_info(self):
00293         '''Add information about the user.
00294 
00295         This adds:
00296         - UserGroups: system groups the user is in
00297         '''
00298         user = pwd.getpwuid(os.getuid()).pw_name
00299         groups = [name for name, p, gid, memb in grp.getgrall()
00300                   if user in memb and gid < 1000]
00301         groups.sort()
00302         self['UserGroups'] = ' '.join(groups)
00303 
00304     def _check_interpreted(self):
00305         '''Check if process is a script.
00306 
00307         Use ExecutablePath, ProcStatus and ProcCmdline to determine if
00308         process is an interpreted script. If so, set InterpreterPath
00309         accordingly.
00310         '''
00311         if 'ExecutablePath' not in self:
00312             return
00313 
00314         exebasename = os.path.basename(self['ExecutablePath'])
00315 
00316         # check if we consider ExecutablePath an interpreter; we have to do
00317         # this, otherwise 'gedit /tmp/foo.txt' would be detected as interpreted
00318         # script as well
00319         if not any(filter(lambda i: fnmatch.fnmatch(exebasename, i), interpreters)):
00320             return
00321 
00322         # first, determine process name
00323         name = None
00324         for l in self['ProcStatus'].splitlines():
00325             try:
00326                 (k, v) = l.split('\t', 1)
00327             except ValueError:
00328                 continue
00329             if k == 'Name:':
00330                 name = v
00331                 break
00332         if not name:
00333             return
00334 
00335         cmdargs = self['ProcCmdline'].split('\0')
00336         bindirs = ['/bin/', '/sbin/', '/usr/bin/', '/usr/sbin/']
00337 
00338         # filter out interpreter options
00339         while len(cmdargs) >= 2 and cmdargs[1].startswith('-'):
00340             # check for -m
00341             if name.startswith('python') and cmdargs[1] == '-m' and len(cmdargs) >= 3:
00342                 path = self._python_module_path(cmdargs[2])
00343                 if path:
00344                     self['InterpreterPath'] = self['ExecutablePath']
00345                     self['ExecutablePath'] = path
00346                 else:
00347                     self['UnreportableReason'] = 'Cannot determine path of python module %s' % cmdargs[2]
00348                 return
00349 
00350             del cmdargs[1]
00351 
00352         # catch scripts explicitly called with interpreter
00353         if len(cmdargs) >= 2:
00354             # ensure that cmdargs[1] is an absolute path
00355             if cmdargs[1].startswith('.') and 'ProcCwd' in self:
00356                 cmdargs[1] = os.path.join(self['ProcCwd'], cmdargs[1])
00357             if os.access(cmdargs[1], os.R_OK):
00358                 self['InterpreterPath'] = self['ExecutablePath']
00359                 self['ExecutablePath'] = os.path.realpath(cmdargs[1])
00360 
00361         # catch directly executed scripts
00362         if 'InterpreterPath' not in self and name != exebasename:
00363             for p in bindirs:
00364                 if os.access(p + cmdargs[0], os.R_OK):
00365                     argvexe = p + cmdargs[0]
00366                     if os.path.basename(os.path.realpath(argvexe)) == name:
00367                         self['InterpreterPath'] = self['ExecutablePath']
00368                         self['ExecutablePath'] = argvexe
00369                     break
00370 
00371         # special case: crashes from twistd are usually the fault of the
00372         # launched program
00373         if 'InterpreterPath' in self and os.path.basename(self['ExecutablePath']) == 'twistd':
00374             self['InterpreterPath'] = self['ExecutablePath']
00375             exe = self._twistd_executable()
00376             if exe:
00377                 self['ExecutablePath'] = exe
00378             else:
00379                 self['UnreportableReason'] = 'Cannot determine twistd client program'
00380 
00381     def _twistd_executable(self):
00382         '''Determine the twistd client program from ProcCmdline.'''
00383 
00384         args = self['ProcCmdline'].split('\0')[2:]
00385 
00386         # search for a -f/--file, -y/--python or -s/--source argument
00387         while args:
00388             arg = args[0].split('=', 1)
00389             if arg[0].startswith('--file') or arg[0].startswith('--python') or arg[0].startswith('--source'):
00390                 if len(arg) == 2:
00391                     return arg[1]
00392                 else:
00393                     return args[1]
00394             elif len(arg[0]) > 1 and arg[0][0] == '-' and arg[0][1] != '-':
00395                 opts = arg[0][1:]
00396                 if 'f' in opts or 'y' in opts or 's' in opts:
00397                     return args[1]
00398 
00399             args.pop(0)
00400 
00401         return None
00402 
00403     @classmethod
00404     def _python_module_path(klass, module):
00405         '''Determine path of given Python module'''
00406 
00407         try:
00408             m = __import__(module.replace('/', '.'))
00409             m
00410         except:
00411             return None
00412 
00413         # chop off the first component, as it's already covered by m
00414         path = eval('m.%s.__file__' % '.'.join(module.split('/')[1:]))
00415         if path.endswith('.pyc'):
00416             path = path[:-1]
00417         return path
00418 
00419     def add_proc_info(self, pid=None, extraenv=[]):
00420         '''Add /proc/pid information.
00421 
00422         If neither pid nor self.pid are given, it defaults to the process'
00423         current pid and sets self.pid.
00424 
00425         This adds the following fields:
00426         - ExecutablePath: /proc/pid/exe contents; if the crashed process is
00427           interpreted, this contains the script path instead
00428         - InterpreterPath: /proc/pid/exe contents if the crashed process is
00429           interpreted; otherwise this key does not exist
00430         - ExecutableTimestamp: time stamp of ExecutablePath, for comparing at
00431           report time
00432         - ProcEnviron: A subset of the process' environment (only some standard
00433           variables that do not disclose potentially sensitive information, plus
00434           the ones mentioned in extraenv)
00435         - ProcCmdline: /proc/pid/cmdline contents
00436         - ProcStatus: /proc/pid/status contents
00437         - ProcMaps: /proc/pid/maps contents
00438         - ProcAttrCurrent: /proc/pid/attr/current contents, if not "unconfined"
00439         '''
00440         if not pid:
00441             pid = self.pid or os.getpid()
00442         if not self.pid:
00443             self.pid = int(pid)
00444         pid = str(pid)
00445 
00446         try:
00447             self['ProcCwd'] = os.readlink('/proc/' + pid + '/cwd')
00448         except OSError:
00449             pass
00450         self.add_proc_environ(pid, extraenv)
00451         self['ProcStatus'] = _read_file('/proc/' + pid + '/status')
00452         self['ProcCmdline'] = _read_file('/proc/' + pid + '/cmdline').rstrip('\0')
00453         self['ProcMaps'] = _read_maps(int(pid))
00454         try:
00455             self['ExecutablePath'] = os.readlink('/proc/' + pid + '/exe')
00456         except OSError as e:
00457             if e.errno == errno.ENOENT:
00458                 raise ValueError('invalid process')
00459             else:
00460                 raise
00461         for p in ('rofs', 'rwfs', 'squashmnt', 'persistmnt'):
00462             if self['ExecutablePath'].startswith('/%s/' % p):
00463                 self['ExecutablePath'] = self['ExecutablePath'][len('/%s' % p):]
00464                 break
00465         assert os.path.exists(self['ExecutablePath'])
00466 
00467         # check if we have an interpreted program
00468         self._check_interpreted()
00469 
00470         self['ExecutableTimestamp'] = str(int(os.stat(self['ExecutablePath']).st_mtime))
00471 
00472         # make ProcCmdline ASCII friendly, do shell escaping
00473         self['ProcCmdline'] = self['ProcCmdline'].replace('\\', '\\\\').replace(' ', '\\ ').replace('\0', ' ')
00474 
00475         # grab AppArmor or SELinux context
00476         # If no LSM is loaded, reading will return -EINVAL
00477         try:
00478             # On Linux 2.6.28+, 'current' is world readable, but read() gives
00479             # EPERM; Python 2.5.3+ crashes on that (LP: #314065)
00480             if os.getuid() == 0:
00481                 with open('/proc/' + pid + '/attr/current') as fd:
00482                     val = fd.read().strip()
00483                 if val != 'unconfined':
00484                     self['ProcAttrCurrent'] = val
00485         except (IOError, OSError):
00486             pass
00487 
00488     def add_proc_environ(self, pid=None, extraenv=[]):
00489         '''Add environment information.
00490 
00491         If pid is not given, it defaults to the process' current pid.
00492 
00493         This adds the following fields:
00494         - ProcEnviron: A subset of the process' environment (only some standard
00495           variables that do not disclose potentially sensitive information, plus
00496           the ones mentioned in extraenv)
00497         '''
00498         safe_vars = ['SHELL', 'TERM', 'LANGUAGE', 'LANG', 'LC_CTYPE',
00499                      'LC_COLLATE', 'LC_TIME', 'LC_NUMERIC', 'LC_MONETARY',
00500                      'LC_MESSAGES', 'LC_PAPER', 'LC_NAME', 'LC_ADDRESS',
00501                      'LC_TELEPHONE', 'LC_MEASUREMENT', 'LC_IDENTIFICATION',
00502                      'LOCPATH'] + extraenv
00503 
00504         if not pid:
00505             pid = os.getpid()
00506         pid = str(pid)
00507 
00508         self['ProcEnviron'] = ''
00509         env = _read_file('/proc/' + pid + '/environ').replace('\n', '\\n')
00510         if env.startswith('Error:'):
00511             self['ProcEnviron'] = env
00512         else:
00513             for l in env.split('\0'):
00514                 if l.split('=', 1)[0] in safe_vars:
00515                     if self['ProcEnviron']:
00516                         self['ProcEnviron'] += '\n'
00517                     self['ProcEnviron'] += l
00518                 elif l.startswith('PATH='):
00519                     p = l.split('=', 1)[1]
00520                     if '/home' in p or '/tmp' in p:
00521                         if self['ProcEnviron']:
00522                             self['ProcEnviron'] += '\n'
00523                         self['ProcEnviron'] += 'PATH=(custom, user)'
00524                     elif p != '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games':
00525                         if self['ProcEnviron']:
00526                             self['ProcEnviron'] += '\n'
00527                         self['ProcEnviron'] += 'PATH=(custom, no user)'
00528 
00529     def add_kernel_crash_info(self, debugdir=None):
00530         '''Add information from kernel crash.
00531 
00532         This needs a VmCore in the Report.
00533         '''
00534         if 'VmCore' not in self:
00535             return
00536         unlink_core = False
00537         ret = False
00538         try:
00539             if hasattr(self['VmCore'], 'find'):
00540                 (fd, core) = tempfile.mkstemp()
00541                 os.write(fd, self['VmCore'])
00542                 os.close(fd)
00543                 unlink_core = True
00544             kver = self['Uname'].split()[1]
00545             command = ['crash',
00546                        '/usr/lib/debug/boot/vmlinux-%s' % kver,
00547                        core,
00548                        ]
00549             try:
00550                 p = subprocess.Popen(command,
00551                                      stdin=subprocess.PIPE,
00552                                      stdout=subprocess.PIPE,
00553                                      stderr=subprocess.STDOUT)
00554             except OSError:
00555                 return False
00556             p.stdin.write('bt -a -f\n')
00557             p.stdin.write('ps\n')
00558             p.stdin.write('runq\n')
00559             p.stdin.write('quit\n')
00560             # FIXME: split it up nicely etc
00561             out = p.stdout.read()
00562             ret = (p.wait() == 0)
00563             if ret:
00564                 self['Stacktrace'] = out
00565         finally:
00566             if unlink_core:
00567                 os.unlink(core)
00568         return ret
00569 
00570     def add_gdb_info(self, rootdir=None):
00571         '''Add information from gdb.
00572 
00573         This requires that the report has a CoreDump and an
00574         ExecutablePath. This adds the following fields:
00575         - Registers: Output of gdb's 'info registers' command
00576         - Disassembly: Output of gdb's 'x/16i $pc' command
00577         - Stacktrace: Output of gdb's 'bt full' command
00578         - ThreadStacktrace: Output of gdb's 'thread apply all bt full' command
00579         - StacktraceTop: simplified stacktrace (topmost 5 functions) for inline
00580           inclusion into bug reports and easier processing
00581         - AssertionMessage: Value of __abort_msg, if present
00582 
00583         The optional rootdir can specify a root directory which has the
00584         executable, libraries, and debug symbols. This does not require
00585         chroot() or root privileges, it just instructs gdb to search for the
00586         files there.
00587         '''
00588         if 'CoreDump' not in self or 'ExecutablePath' not in self:
00589             return
00590 
00591         unlink_core = False
00592         try:
00593             if hasattr(self['CoreDump'], 'find'):
00594                 (fd, core) = tempfile.mkstemp()
00595                 unlink_core = True
00596                 os.write(fd, self['CoreDump'])
00597                 os.close(fd)
00598             elif hasattr(self['CoreDump'], 'gzipvalue'):
00599                 (fd, core) = tempfile.mkstemp()
00600                 unlink_core = True
00601                 os.close(fd)
00602                 with open(core, 'wb') as f:
00603                     self['CoreDump'].write(f)
00604             else:
00605                 core = self['CoreDump'][0]
00606 
00607             gdb_reports = {'Registers': 'info registers',
00608                            'Disassembly': 'x/16i $pc',
00609                            'Stacktrace': 'bt full',
00610                            'ThreadStacktrace': 'thread apply all bt full',
00611                            'AssertionMessage': 'print __abort_msg->msg'}
00612 
00613             command = ['gdb', '--batch']
00614             executable = self.get('InterpreterPath', self['ExecutablePath'])
00615             if rootdir:
00616                 command += ['--ex', 'set debug-file-directory %s/usr/lib/debug' % rootdir,
00617                             '--ex', 'set solib-absolute-prefix ' + rootdir]
00618                 executable = rootdir + '/' + executable
00619             command += ['--ex', 'file "%s"' % executable, '--ex', 'core-file ' + core]
00620             # limit maximum backtrace depth (to avoid looped stacks)
00621             command += ['--ex', 'set backtrace limit 2000']
00622             value_keys = []
00623             # append the actual commands and something that acts as a separator
00624             for name, cmd in gdb_reports.items():
00625                 value_keys.append(name)
00626                 command += ['--ex', 'p -99', '--ex', cmd]
00627 
00628             assert os.path.exists(executable)
00629 
00630             # call gdb
00631             try:
00632                 out = _command_output(command).decode('UTF-8', errors='replace')
00633             except OSError:
00634                 return
00635 
00636             # split the output into the various fields
00637             part_re = re.compile('^\$\d+\s*=\s*-99$', re.MULTILINE)
00638             parts = part_re.split(out)
00639             # drop the gdb startup text prior to first separator
00640             parts.pop(0)
00641             for part in parts:
00642                 self[value_keys.pop(0)] = part.replace('\n\n', '\n.\n').strip()
00643         finally:
00644             if unlink_core:
00645                 os.unlink(core)
00646 
00647         # clean up AssertionMessage
00648         if 'AssertionMessage' in self:
00649             # chop off "$n = 0x...." prefix, drop empty ones
00650             m = re.match('^\$\d+\s+=\s+0x[0-9a-fA-F]+\s+"(.*)"\s*$',
00651                          self['AssertionMessage'])
00652             if m:
00653                 self['AssertionMessage'] = m.group(1)
00654                 if self['AssertionMessage'].endswith('\\n'):
00655                     self['AssertionMessage'] = self['AssertionMessage'][0:-2]
00656             else:
00657                 del self['AssertionMessage']
00658 
00659         if 'Stacktrace' in self:
00660             self._gen_stacktrace_top()
00661             addr_signature = self.crash_signature_addresses()
00662             if addr_signature:
00663                 self['StacktraceAddressSignature'] = addr_signature
00664 
00665     def _gen_stacktrace_top(self):
00666         '''Build field StacktraceTop as the top five functions of Stacktrace.
00667 
00668         Signal handler invocations and related functions are skipped since they
00669         are generally not useful for triaging and duplicate detection.
00670         '''
00671         unwind_functions = set(['g_logv', 'g_log', 'IA__g_log', 'IA__g_logv',
00672                                 'g_assert_warning', 'IA__g_assert_warning',
00673                                 '__GI_abort', '_XError'])
00674         toptrace = [''] * 5
00675         depth = 0
00676         unwound = False
00677         unwinding = False
00678         unwinding_xerror = False
00679         bt_fn_re = re.compile('^#(\d+)\s+(?:0x(?:\w+)\s+in\s+\*?(.*)|(<signal handler called>)\s*)$')
00680         bt_fn_noaddr_re = re.compile('^#(\d+)\s+(?:(.*)|(<signal handler called>)\s*)$')
00681         # some internal functions like the SSE stubs cause unnecessary jitter
00682         ignore_functions_re = re.compile('^(__.*_s?sse\d+(?:_\w+)?|__kernel_vsyscall)$')
00683 
00684         for line in self['Stacktrace'].splitlines():
00685             m = bt_fn_re.match(line)
00686             if not m:
00687                 m = bt_fn_noaddr_re.match(line)
00688                 if not m:
00689                     continue
00690 
00691             if not unwound or unwinding:
00692                 if m.group(2):
00693                     fn = m.group(2).split()[0].split('(')[0]
00694                 else:
00695                     fn = None
00696 
00697                 # handle XErrors
00698                 if unwinding_xerror:
00699                     if fn.startswith('_X') or fn in ['handle_response', 'handle_error', 'XWindowEvent']:
00700                         continue
00701                     else:
00702                         unwinding_xerror = False
00703 
00704                 if m.group(3) or fn in unwind_functions:
00705                     unwinding = True
00706                     depth = 0
00707                     toptrace = [''] * 5
00708                     if m.group(3):
00709                         # we stop unwinding when we found a <signal handler>,
00710                         # but we continue unwinding otherwise, as e. g. a glib
00711                         # abort is usually sitting on top of an XError
00712                         unwound = True
00713 
00714                     if fn == '_XError':
00715                         unwinding_xerror = True
00716                     continue
00717                 else:
00718                     unwinding = False
00719 
00720             frame = m.group(2) or m.group(3)
00721             function = frame.split()[0]
00722             if depth < len(toptrace) and not ignore_functions_re.match(function):
00723                 toptrace[depth] = frame
00724                 depth += 1
00725         self['StacktraceTop'] = '\n'.join(toptrace).strip()
00726 
00727     def add_hooks_info(self, ui, package=None, srcpackage=None):
00728         '''Run hook script for collecting package specific data.
00729 
00730         A hook script needs to be in _hook_dir/<Package>.py or in
00731         _common_hook_dir/*.py and has to contain a function 'add_info(report,
00732         ui)' that takes and modifies a Report, and gets an UserInterface
00733         reference for interactivity.
00734 
00735         return True if the hook requested to stop the report filing process,
00736         False otherwise.
00737         '''
00738         symb = {}
00739 
00740         # common hooks
00741         for hook in glob.glob(_common_hook_dir + '/*.py'):
00742             try:
00743                 with open(hook) as fd:
00744                     exec(compile(fd.read(), hook, 'exec'), symb)
00745                 try:
00746                     symb['add_info'](self, ui)
00747                 except TypeError as e:
00748                     if str(e).startswith('add_info()'):
00749                         # older versions of apport did not pass UI, and hooks that
00750                         # do not require it don't need to take it
00751                         symb['add_info'](self)
00752                     else:
00753                         raise
00754             except StopIteration:
00755                 return True
00756             except:
00757                 apport.error('hook %s crashed:', hook)
00758                 traceback.print_exc()
00759                 pass
00760 
00761         # binary package hook
00762         if not package:
00763             package = self.get('Package')
00764         if package:
00765             hook = '%s/%s.py' % (_hook_dir, package.split()[0])
00766             if os.path.exists(hook):
00767                 try:
00768                     with open(hook) as fd:
00769                         exec(compile(fd.read(), hook, 'exec'), symb)
00770                     try:
00771                         symb['add_info'](self, ui)
00772                     except TypeError as e:
00773                         if str(e).startswith('add_info()'):
00774                             # older versions of apport did not pass UI, and hooks that
00775                             # do not require it don't need to take it
00776                             symb['add_info'](self)
00777                         else:
00778                             raise
00779                 except StopIteration:
00780                     return True
00781                 except:
00782                     apport.error('hook %s crashed:', hook)
00783                     traceback.print_exc()
00784                     pass
00785 
00786         # source package hook
00787         if not srcpackage:
00788             srcpackage = self.get('SourcePackage')
00789         if srcpackage:
00790             hook = '%s/source_%s.py' % (_hook_dir, srcpackage.split()[0])
00791             if os.path.exists(hook):
00792                 try:
00793                     with open(hook) as fd:
00794                         exec(compile(fd.read(), hook, 'exec'), symb)
00795                     try:
00796                         symb['add_info'](self, ui)
00797                     except TypeError as e:
00798                         if str(e).startswith('add_info()'):
00799                             # older versions of apport did not pass UI, and hooks that
00800                             # do not require it don't need to take it
00801                             symb['add_info'](self)
00802                         else:
00803                             raise
00804                 except StopIteration:
00805                     return True
00806                 except:
00807                     apport.error('hook %s crashed:', hook)
00808                     traceback.print_exc()
00809                     pass
00810 
00811         return False
00812 
00813     def search_bug_patterns(self, url):
00814         '''Check bug patterns loaded from the specified url.
00815 
00816         Return bug URL on match, or None otherwise.
00817 
00818         The url must refer to a valid XML document with the following syntax:
00819         root element := <patterns>
00820         patterns := <pattern url="http://bug.url"> *
00821         pattern := <re key="report_key">regular expression*</re> +
00822 
00823         For example:
00824         <?xml version="1.0"?>
00825         <patterns>
00826             <pattern url="http://bugtracker.net/bugs/1">
00827                 <re key="Foo">ba.*r</re>
00828             </pattern>
00829             <pattern url="http://bugtracker.net/bugs/2">
00830                 <re key="Package">^\S* 1-2$</re> <!-- test for a particular version -->
00831                 <re key="Foo">write_(hello|goodbye)</re>
00832             </pattern>
00833         </patterns>
00834         '''
00835         # some distros might not want to support these
00836         if not url:
00837             return
00838 
00839         try:
00840             f = urlopen(url)
00841             patterns = f.read().decode('UTF-8', errors='replace')
00842             f.close()
00843         except (IOError, URLError):
00844             # doesn't exist or failed to load
00845             return
00846 
00847         if '<title>404 Not Found' in patterns:
00848             return
00849 
00850         url = _check_bug_patterns(self, patterns)
00851         if url:
00852             return url
00853 
00854         return None
00855 
00856     def _get_ignore_dom(self):
00857         '''Read ignore list XML file and return a DOM tree.
00858 
00859         Return an empty DOM tree if file does not exist.
00860 
00861         Raises ValueError if the file exists but is invalid XML.
00862         '''
00863         ifpath = os.path.expanduser(_ignore_file)
00864         if not os.access(ifpath, os.R_OK) or os.path.getsize(ifpath) == 0:
00865             # create a document from scratch
00866             dom = xml.dom.getDOMImplementation().createDocument(None, 'apport', None)
00867         else:
00868             try:
00869                 dom = xml.dom.minidom.parse(ifpath)
00870             except ExpatError as e:
00871                 raise ValueError('%s has invalid format: %s' % (_ignore_file, str(e)))
00872 
00873         # remove whitespace so that writing back the XML does not accumulate
00874         # whitespace
00875         dom.documentElement.normalize()
00876         _dom_remove_space(dom.documentElement)
00877 
00878         return dom
00879 
00880     def check_ignored(self):
00881         '''Check if current report should not be presented.
00882 
00883         Reports can be suppressed by per-user blacklisting in
00884         ~/.apport-ignore.xml (in the real UID's home) and
00885         /etc/apport/blacklist.d/. For environments where you are only
00886         interested in crashes of some programs, you can also create a whitelist
00887         in /etc/apport/whitelist.d/, everything which does not match gets
00888         ignored then.
00889 
00890         This requires the ExecutablePath attribute. Throws a ValueError if the
00891         file has an invalid format.
00892         '''
00893         assert 'ExecutablePath' in self
00894 
00895         # check blacklist
00896         try:
00897             for f in os.listdir(_blacklist_dir):
00898                 try:
00899                     with open(os.path.join(_blacklist_dir, f)) as fd:
00900                         for line in fd:
00901                             if line.strip() == self['ExecutablePath']:
00902                                 return True
00903                 except IOError:
00904                     continue
00905         except OSError:
00906             pass
00907 
00908         # check whitelist
00909         try:
00910             whitelist = set()
00911             for f in os.listdir(_whitelist_dir):
00912                 try:
00913                     with open(os.path.join(_whitelist_dir, f)) as fd:
00914                         for line in fd:
00915                             whitelist.add(line.strip())
00916                 except IOError:
00917                     continue
00918 
00919             if whitelist and self['ExecutablePath'] not in whitelist:
00920                 return True
00921         except OSError:
00922             pass
00923 
00924         dom = self._get_ignore_dom()
00925 
00926         try:
00927             cur_mtime = int(os.stat(self['ExecutablePath']).st_mtime)
00928         except OSError:
00929             # if it does not exist any more, do nothing
00930             return False
00931 
00932         # search for existing entry and update it
00933         for ignore in dom.getElementsByTagName('ignore'):
00934             if ignore.getAttribute('program') == self['ExecutablePath']:
00935                 if float(ignore.getAttribute('mtime')) >= cur_mtime:
00936                     return True
00937 
00938         return False
00939 
00940     def mark_ignore(self):
00941         '''Ignore future crashes of this executable.
00942 
00943         Add a ignore list entry for this report to ~/.apport-ignore.xml, so
00944         that future reports for this ExecutablePath are not presented to the
00945         user any more.
00946 
00947         Throws a ValueError if the file already exists and has an invalid
00948         format.
00949         '''
00950         assert 'ExecutablePath' in self
00951 
00952         dom = self._get_ignore_dom()
00953         try:
00954             mtime = str(int(os.stat(self['ExecutablePath']).st_mtime))
00955         except OSError as e:
00956             # file went away underneath us, ignore
00957             if e.errno == errno.ENOENT:
00958                 return
00959             else:
00960                 raise
00961 
00962         # search for existing entry and update it
00963         for ignore in dom.getElementsByTagName('ignore'):
00964             if ignore.getAttribute('program') == self['ExecutablePath']:
00965                 ignore.setAttribute('mtime', mtime)
00966                 break
00967         else:
00968             # none exists yet, create new ignore node if none exists yet
00969             e = dom.createElement('ignore')
00970             e.setAttribute('program', self['ExecutablePath'])
00971             e.setAttribute('mtime', mtime)
00972             dom.documentElement.appendChild(e)
00973 
00974         # write back file
00975         with open(os.path.expanduser(_ignore_file), 'w') as fd:
00976             dom.writexml(fd, addindent='  ', newl='\n')
00977 
00978         dom.unlink()
00979 
00980     def has_useful_stacktrace(self):
00981         '''Check whether StackTrace can be considered 'useful'.
00982 
00983         The current heuristic is to consider it useless if it either is shorter
00984         than three lines and has any unknown function, or for longer traces, a
00985         minority of known functions.
00986         '''
00987         if not self.get('StacktraceTop'):
00988             return False
00989 
00990         unknown_fn = [f.startswith('??') for f in self['StacktraceTop'].splitlines()]
00991 
00992         if len(unknown_fn) < 3:
00993             return unknown_fn.count(True) == 0
00994 
00995         return unknown_fn.count(True) <= len(unknown_fn) / 2.
00996 
00997     def stacktrace_top_function(self):
00998         '''Return topmost function in StacktraceTop'''
00999 
01000         for l in self.get('StacktraceTop', '').splitlines():
01001             fname = l.split('(')[0].strip()
01002             if fname != '??':
01003                 return fname
01004 
01005         return None
01006 
01007     def standard_title(self):
01008         '''Create an appropriate title for a crash database entry.
01009 
01010         This contains the topmost function name from the stack trace and the
01011         signal (for signal crashes) or the Python exception (for unhandled
01012         Python exceptions).
01013 
01014         Return None if the report is not a crash or a default title could not
01015         be generated.
01016         '''
01017         # assertion failure
01018         if self.get('Signal') == '6' and \
01019                 'ExecutablePath' in self and \
01020                 'AssertionMessage' in self:
01021             return '%s assert failure: %s' % (
01022                 os.path.basename(self['ExecutablePath']),
01023                 self['AssertionMessage'])
01024 
01025         # signal crash
01026         if 'Signal' in self and 'ExecutablePath' in self and 'StacktraceTop' in self:
01027 
01028             signal_names = {
01029                 '4': 'SIGILL',
01030                 '6': 'SIGABRT',
01031                 '8': 'SIGFPE',
01032                 '11': 'SIGSEGV',
01033                 '13': 'SIGPIPE'}
01034 
01035             fn = self.stacktrace_top_function()
01036             if fn:
01037                 fn = ' in %s()' % fn
01038             else:
01039                 fn = ''
01040 
01041             arch_mismatch = ''
01042             if 'Architecture' in self and 'PackageArchitecture' in self and self['Architecture'] != self['PackageArchitecture'] and self['PackageArchitecture'] != 'all':
01043                 arch_mismatch = ' [non-native %s package]' % self['PackageArchitecture']
01044 
01045             return '%s crashed with %s%s%s' % (
01046                 os.path.basename(self['ExecutablePath']),
01047                 signal_names.get(self.get('Signal'), 'signal ' + self.get('Signal')),
01048                 fn, arch_mismatch
01049             )
01050 
01051         # Python exception
01052         if 'Traceback' in self and 'ExecutablePath' in self:
01053 
01054             trace = self['Traceback'].splitlines()
01055 
01056             if len(trace) < 1:
01057                 return None
01058             if len(trace) < 3:
01059                 return '%s crashed with %s' % (
01060                     os.path.basename(self['ExecutablePath']),
01061                     trace[0])
01062 
01063             trace_re = re.compile('^\s*File\s*"(\S+)".* in (.+)$')
01064             i = len(trace) - 1
01065             function = 'unknown'
01066             while i >= 0:
01067                 m = trace_re.match(trace[i])
01068                 if m:
01069                     module_path = m.group(1)
01070                     function = m.group(2)
01071                     break
01072                 i -= 1
01073 
01074             path = os.path.basename(self['ExecutablePath'])
01075             last_line = trace[-1]
01076             exception = last_line.split(':')[0]
01077             m = re.match('^%s: (.+)$' % re.escape(exception), last_line)
01078             if m:
01079                 message = m.group(1)
01080             else:
01081                 message = None
01082 
01083             if function == '<module>':
01084                 if module_path == self['ExecutablePath']:
01085                     context = '__main__'
01086                 else:
01087                     # Maybe use os.path.basename?
01088                     context = module_path
01089             else:
01090                 context = '%s()' % function
01091 
01092             title = '%s crashed with %s in %s' % (
01093                 path,
01094                 exception,
01095                 context
01096             )
01097 
01098             if message:
01099                 title += ': %s' % message
01100 
01101             return title
01102 
01103         # package problem
01104         if self.get('ProblemType') == 'Package' and 'Package' in self:
01105 
01106             title = 'package %s failed to install/upgrade' % \
01107                 self['Package']
01108             if self.get('ErrorMessage'):
01109                 title += ': ' + self['ErrorMessage'].splitlines()[-1]
01110 
01111             return title
01112 
01113         if self.get('ProblemType') == 'KernelOops' and 'OopsText' in self:
01114 
01115             oops = self['OopsText']
01116             if oops.startswith('------------[ cut here ]------------'):
01117                 title = oops.split('\n', 2)[1]
01118             else:
01119                 title = oops.split('\n', 1)[0]
01120 
01121             return title
01122 
01123         if self.get('ProblemType') == 'KernelOops' and 'Failure' in self:
01124             # Title the report with suspend or hibernate as appropriate,
01125             # and mention any non-free modules loaded up front.
01126             title = ''
01127             if 'MachineType' in self:
01128                 title += '[' + self['MachineType'] + '] '
01129             title += self['Failure'] + ' failure'
01130             if 'NonfreeKernelModules' in self:
01131                 title += ' [non-free: ' + self['NonfreeKernelModules'] + ']'
01132             title += '\n'
01133 
01134             return title
01135 
01136         return None
01137 
01138     def obsolete_packages(self):
01139         '''Return list of obsolete packages in Package and Dependencies.'''
01140 
01141         obsolete = []
01142         for l in (self.get('Package', '') + '\n' + self.get('Dependencies', '')).splitlines():
01143             if not l:
01144                 continue
01145             pkg, ver = l.split()[:2]
01146             avail = packaging.get_available_version(pkg)
01147             if ver is not None and ver != 'None' and avail is not None and packaging.compare_versions(ver, avail) < 0:
01148                 obsolete.append(pkg)
01149         return obsolete
01150 
01151     def crash_signature(self):
01152         '''Get a signature string for a crash.
01153 
01154         This is suitable for identifying duplicates.
01155 
01156         For signal crashes this the concatenation of ExecutablePath, Signal
01157         number, and StacktraceTop function names, separated by a colon. If
01158         StacktraceTop has unknown functions or the report lacks any of those
01159         fields, return None. In this case, you can use
01160         crash_signature_addresses() to get a less precise duplicate signature
01161         based on addresses instead of symbol names.
01162 
01163         For assertion failures, it is the concatenation of ExecutablePath
01164         and assertion message, separated by colons.
01165 
01166         For Python crashes, this concatenates the ExecutablePath, exception
01167         name, and Traceback function names, again separated by a colon.
01168         '''
01169         if 'ExecutablePath' not in self and not self['ProblemType'] == 'KernelCrash':
01170             return None
01171 
01172         # kernel crash
01173         if 'Stacktrace' in self and self['ProblemType'] == 'KernelCrash':
01174             sig = 'kernel'
01175             regex = re.compile('^\s*\#\d+\s\[\w+\]\s(\w+)')
01176             for line in self['Stacktrace'].splitlines():
01177                 m = regex.match(line)
01178                 if m:
01179                     sig += ':' + (m.group(1))
01180             return sig
01181 
01182         # assertion failures
01183         if self.get('Signal') == '6' and 'AssertionMessage' in self:
01184             sig = self['ExecutablePath'] + ':' + self['AssertionMessage']
01185             # filter out addresses, to help match duplicates more sanely
01186             return re.sub(r'0x[0-9a-f]{6,}', 'ADDR', sig)
01187 
01188         # signal crashes
01189         if 'StacktraceTop' in self and 'Signal' in self:
01190             sig = '%s:%s' % (self['ExecutablePath'], self['Signal'])
01191             bt_fn_re = re.compile('^(?:([\w:~]+).*|(<signal handler called>)\s*)$')
01192 
01193             lines = self['StacktraceTop'].splitlines()
01194             if len(lines) < 2:
01195                 return None
01196 
01197             for line in lines:
01198                 m = bt_fn_re.match(line)
01199                 if m:
01200                     sig += ':' + (m.group(1) or m.group(2))
01201                 else:
01202                     # this will also catch ??
01203                     return None
01204             return sig
01205 
01206         # Python crashes
01207         if 'Traceback' in self:
01208             trace = self['Traceback'].splitlines()
01209 
01210             sig = ''
01211             if len(trace) == 1:
01212                 # sometimes, Python exceptions do not have file references
01213                 m = re.match('(\w+): ', trace[0])
01214                 if m:
01215                     return self['ExecutablePath'] + ':' + m.group(1)
01216                 else:
01217                     return None
01218             elif len(trace) < 3:
01219                 return None
01220 
01221             loc_re = re.compile('^\s+File "([^"]+).*line (\d+).*\sin (.*)$')
01222             for l in trace:
01223                 m = loc_re.match(l)
01224                 if m:
01225                     # if we have a function name, use this; for a a crash
01226                     # outside of a function/method, fall back to the source
01227                     # file location
01228                     if m.group(3) != '<module>':
01229                         sig += ':' + m.group(3)
01230                     else:
01231                         sig += ':%s@%s' % (m.group(1), m.group(2))
01232 
01233             return self['ExecutablePath'] + ':' + trace[-1].split(':')[0] + sig
01234 
01235         return None
01236 
01237     def crash_signature_addresses(self):
01238         '''Compute heuristic duplicate signature for a signal crash.
01239 
01240         This should be used if crash_signature() fails, i. e. Stacktrace does
01241         not have enough symbols.
01242 
01243         This approach only uses addresses in the stack trace and does not rely
01244         on symbol resolution. As we can't unwind these stack traces, we cannot
01245         limit them to the top five frames and thus will end up with several or
01246         many different signatures for a particular crash. But these can be
01247         computed and synchronously checked with a crash database at the client
01248         side, which avoids having to upload and process the full report. So on
01249         the server-side crash database we will only have to deal with all the
01250         equivalence classes (i. e. same crash producing a number of possible
01251         signatures) instead of every single report.
01252 
01253         Return None when signature cannot be determined.
01254         '''
01255         if not 'ProcMaps' in self or not 'Stacktrace' in self or not 'Signal' in self:
01256             return None
01257 
01258         stack = []
01259         failed = 0
01260         for line in self['Stacktrace'].splitlines():
01261             if line.startswith('#'):
01262                 addr = line.split()[1]
01263                 if not addr.startswith('0x'):
01264                     continue
01265                 addr = int(addr, 16)  # we do want to know about ValueErrors here, so don't catch
01266                 offset = self._address_to_offset(addr)
01267                 if offset:
01268                     # avoid ':' in ELF paths, we use that as separator
01269                     stack.append(offset.replace(':', '..'))
01270                 else:
01271                     failed += 1
01272 
01273             # stack unwinding chops off ~ 5 functions, and we need some more
01274             # accuracy because we do not have symbols; but beyond a depth of 15
01275             # we get too much noise, so we can abort there
01276             if len(stack) >= 15:
01277                 break
01278 
01279         # we only accept a small minority (< 20%) of failed resolutions, otherwise we
01280         # discard
01281         if failed > 0 and len(stack) / failed < 4:
01282             return None
01283 
01284         # we also discard if the trace is too short
01285         if (failed == 0 and len(stack) < 3) or (failed > 0 and len(stack) < 6):
01286             return None
01287 
01288         return '%s:%s:%s:%s' % (
01289             self['ExecutablePath'],
01290             self['Signal'],
01291             os.uname()[4],
01292             ':'.join(stack))
01293 
01294     def anonymize(self):
01295         '''Remove user identifying strings from the report.
01296 
01297         This particularly removes the user name, host name, and IPs
01298         from attributes which contain data read from the environment, and
01299         removes the ProcCwd attribute completely.
01300         '''
01301         replacements = []
01302         if (os.getuid() > 0):
01303             # do not replace "root"
01304             p = pwd.getpwuid(os.getuid())
01305             if len(p[0]) >= 2:
01306                 replacements.append((re.compile('\\b%s\\b' % p[0]), 'username'))
01307             replacements.append((re.compile('\\b%s\\b' % p[5]), '/home/username'))
01308 
01309             for s in p[4].split(','):
01310                 s = s.strip()
01311                 if len(s) > 2:
01312                     replacements.append((re.compile('\\b%s\\b' % s), 'User Name'))
01313 
01314         hostname = os.uname()[1]
01315         if len(hostname) >= 2:
01316             replacements.append((re.compile('\\b%s\\b' % hostname), 'hostname'))
01317 
01318         try:
01319             del self['ProcCwd']
01320         except KeyError:
01321             pass
01322 
01323         for k in self:
01324             is_proc_field = k.startswith('Proc') and not k in [
01325                 'ProcCpuinfo', 'ProcMaps', 'ProcStatus', 'ProcInterrupts', 'ProcModules']
01326             if is_proc_field or 'Stacktrace' in k or k in ['Traceback', 'PythonArgs', 'Title']:
01327                 if not hasattr(self[k], 'isspace'):
01328                     continue
01329                 for (pattern, repl) in replacements:
01330                     if type(self[k]) == bytes:
01331                         self[k] = pattern.sub(repl, self[k].decode('UTF-8', errors='replace')).encode('UTF-8')
01332                     else:
01333                         self[k] = pattern.sub(repl, self[k])
01334 
01335     def _address_to_offset(self, addr):
01336         '''Resolve a memory address to an ELF name and offset.
01337 
01338         This can be used for building duplicate signatures from non-symbolic
01339         stack traces. These often do not have enough symbols available to
01340         resolve function names, but taking the raw addresses also is not
01341         suitable due to ASLR. But the offsets within a library should be
01342         constant between crashes (assuming the same version of all libraries).
01343 
01344         This needs and uses the "ProcMaps" field to resolve addresses.
01345 
01346         Return 'path+offset' when found, or None if address is not in any
01347         mapped range.
01348         '''
01349         self._build_proc_maps_cache()
01350 
01351         for (start, end, elf) in self._proc_maps_cache:
01352             if start <= addr and end >= addr:
01353                 return '%s+%x' % (elf, addr - start)
01354 
01355         return None
01356 
01357     def _build_proc_maps_cache(self):
01358         '''Generate self._proc_maps_cache from ProcMaps field.
01359 
01360         This only gets done once.
01361         '''
01362         if self._proc_maps_cache:
01363             return
01364 
01365         assert 'ProcMaps' in self
01366         self._proc_maps_cache = []
01367         # library paths might have spaces, so we need to make some assumptions
01368         # about the intermediate fields. But we know that in between the pre-last
01369         # data field and the path there are many spaces, while between the
01370         # other data fields there is only one. So we take 4 or more spaces as
01371         # the separator of the last data field and the path.
01372         fmt = re.compile('^([0-9a-fA-F]+)-([0-9a-fA-F]+).*\s{4,}(\S.*$)')
01373         fmt_unknown = re.compile('^([0-9a-fA-F]+)-([0-9a-fA-F]+)\s')
01374 
01375         for line in self['ProcMaps'].splitlines():
01376             if not line.strip():
01377                 continue
01378             m = fmt.match(line)
01379             if not m:
01380                 # ignore lines with unknown ELF
01381                 if fmt_unknown.match(line):
01382                     continue
01383                 # but complain otherwise, as this means we encounter an
01384                 # architecture or new kernel version where the format changed
01385                 assert m, 'cannot parse ProcMaps line: ' + line
01386             self._proc_maps_cache.append((int(m.group(1), 16),
01387                                           int(m.group(2), 16), m.group(3)))