Back to index

apport  2.3
parse_segv.py
Go to the documentation of this file.
00001 #!/usr/bin/python
00002 # Examine the crash files saved by apport to attempt to determine the cause
00003 # of a segfault.  Currently very very simplistic, and only finds commonly
00004 # understood situations for x86/x86_64.
00005 #
00006 # Copyright 2009-2010  Canonical, Ltd.
00007 # Author: Kees Cook <kees@ubuntu.com>
00008 #
00009 # This program is free software; you can redistribute it and/or modify it
00010 # under the terms of the GNU General Public License as published by the
00011 # Free Software Foundation; either version 2 of the License, or (at your
00012 # option) any later version.  See http://www.gnu.org/copyleft/gpl.html for
00013 # the full text of the license.
00014 
00015 import sys, re, logging, io
00016 
00017 
00018 class ParseSegv(object):
00019     def __init__(self, registers, disassembly, maps, debug=False):
00020         if debug:
00021             if sys.version > '3':
00022                 logging.basicConfig(level=logging.DEBUG,
00023                                     stream=io.TextIOWrapper(sys.stderr, encoding='UTF-8'))
00024             else:
00025                 logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
00026 
00027         self.regs = self.parse_regs(registers)
00028         self.sp = None
00029         for reg in ['rsp', 'esp']:
00030             if reg in self.regs:
00031                 self.sp = self.regs[reg]
00032 
00033         self.line, self.pc, self.insn, self.src, self.dest = \
00034             self.parse_disassembly(disassembly)
00035 
00036         self.stack_vma = None
00037         self.maps = self.parse_maps(maps)
00038 
00039     def find_vma(self, addr):
00040         for vma in self.maps:
00041             if addr >= vma['start'] and addr < vma['end']:
00042                 return vma
00043         return None
00044 
00045     def parse_maps(self, maps_str):
00046         maps = []
00047         for line in maps_str.splitlines():
00048             items = line.strip().split()
00049             try:
00050                 span, perms, bits, dev = items[0:4]
00051             except:
00052                 raise ValueError('Cannot parse maps line: %s' % (line.strip()))
00053             if len(items) == 5:
00054                 name = None
00055             else:
00056                 name = items[5]
00057             start, end = [int(x, 16) for x in span.split('-')]
00058             if name == '[stack]':
00059                 self.stack_vma = len(maps)
00060             maps.append({'start': start, 'end': end, 'perms': perms, 'name': name})
00061             logging.debug('start: %s, end: %s, perms: %s, name: %s', start, end, perms, name)
00062         return maps
00063 
00064     def parse_regs(self, reg_str):
00065         regs = dict()
00066         for line in reg_str.splitlines():
00067             reg, hexvalue = line.split()[0:2]
00068             regs[reg] = int(hexvalue, 16)
00069             logging.debug('%s:0x%08x', reg, regs[reg])
00070         return regs
00071 
00072     def parse_disassembly(self, disassembly):
00073         if not self.regs:
00074             raise ValueError('Registers not loaded yet!?')
00075         lines = disassembly.splitlines()
00076         # Throw away possible 'Dump' gdb report line
00077         if len(lines) > 0 and lines[0].startswith('Dump'):
00078             lines.pop(0)
00079         if len(lines) < 1:
00080             raise ValueError('Failed to load empty disassembly')
00081         line = lines[0].strip()
00082         # Drop GDB 7.1's leading $pc mark
00083         if line.startswith('=>'):
00084             line = line[2:].strip()
00085         logging.debug(line)
00086         pc_str = line.split()[0]
00087         if pc_str.startswith('0x'):
00088             pc = int(pc_str.split(':')[0], 16)
00089         else:
00090             # Could not identify this instruction line
00091             raise ValueError('Could not parse PC "%s" from disassembly line: %s' % (pc_str, line))
00092         logging.debug('pc: 0x%08x', pc)
00093 
00094         full_insn_str = line.split(':', 1)[1].strip()
00095         # Handle invalid memory
00096         if 'Cannot access memory at address' in full_insn_str or (full_insn_str == '' and len(lines) == 1):
00097             return line, pc, None, None, None
00098         # Handle wrapped lines
00099         if full_insn_str == '' and lines[1].startswith(' '):
00100             line = line + ' ' + lines[1].strip()
00101             full_insn_str = line.split(':', 1)[1].strip()
00102 
00103         insn_parts = full_insn_str.split()
00104         # Drop call target names "call   0xb7a805af <_Unwind_Find_FDE@plt+111>"
00105         if insn_parts[-1].endswith('>') and insn_parts[-1].startswith('<'):
00106             insn_parts.pop(-1)
00107         # Attempt to find arguments
00108         args_str = ''
00109         if len(insn_parts) > 1:
00110             args_str = insn_parts.pop(-1)
00111         # Assume remainder is the insn itself
00112         insn = ' '.join(insn_parts)
00113         logging.debug('insn: %s', insn)
00114 
00115         args = []
00116         src = None
00117         dest = None
00118         if args_str == '':
00119             # Could not find insn args
00120             args = None
00121         else:
00122             logging.debug('args: "%s"', args_str)
00123 
00124             for m in re.finditer('([^,\(]*(\(:?[^\)]+\))*)', args_str):
00125                 if len(m.group(0)):
00126                     args.append(m.group(0))
00127             if len(args) > 0:
00128                 src = args[0]
00129                 logging.debug('src: %s', src)
00130             if len(args) > 1:
00131                 dest = args[1]
00132                 logging.debug('dest: %s', dest)
00133 
00134         # Set up possible implicit memory destinations (stack actions)
00135         if insn in ['push', 'pop', 'pushl', 'popl', 'call', 'callq', 'ret', 'retq']:
00136             for reg in ['rsp', 'esp']:
00137                 if reg in self.regs:
00138                     dest = '(%%%s)' % (reg)
00139                     break
00140 
00141         return line, pc, insn, src, dest
00142 
00143     def validate_vma(self, perm, addr, name):
00144         perm_name = {'x': ['executable', 'executing'], 'r': ['readable', 'reading'], 'w': ['writable', 'writing']}
00145         vma = self.find_vma(addr)
00146         if vma is None:
00147             alarmist = 'unknown'
00148             if addr < 65536:
00149                 alarmist = 'NULL'
00150             return False, '%s (0x%08x) not located in a known VMA region (needed %s region)!' % (name, addr, perm_name[perm][0]), '%s %s VMA' % (perm_name[perm][1], alarmist)
00151         elif perm not in vma['perms']:
00152             alarmist = ''
00153             if perm == 'x':
00154                 if 'w' in vma['perms']:
00155                     alarmist = 'writable '
00156                 else:
00157                     alarmist = 'non-writable '
00158             short = '%s %sVMA %s' % (perm_name[perm][1], alarmist, vma['name'])
00159 
00160             return False, '%s (0x%08x) in non-%s VMA region: 0x%08x-0x%08x %s %s' % (name, addr, perm_name[perm][0], vma['start'], vma['end'], vma['perms'], vma['name']), short
00161         else:
00162             return True, '%s (0x%08x) ok' % (name, addr), '%s ok' % (perm_name[perm][1])
00163 
00164     def register_value(self, reg):
00165         reg_orig = reg
00166 
00167         #print reg
00168         mask = 0
00169         if reg.startswith('%'):
00170             #print('%s -> %s' % (reg, reg[1:]))
00171             reg = reg[1:]
00172         if reg in self.regs:
00173             #print('got %s (%d & %d == %d)' % (reg, self.regs[reg], mask, self.regs[reg] & ~mask))
00174             return self.regs[reg]
00175 
00176         if len(reg) == 2 and reg.endswith('l'):
00177             mask |= 0xff00
00178             #print('%s -> %sx' % (reg, reg[0]))
00179             reg = '%sx' % reg[0]
00180         if reg in self.regs:
00181             #print('got %s (%d & %d == %d)' % (reg, self.regs[reg], mask, self.regs[reg] & ~mask))
00182             return self.regs[reg] & ~mask
00183 
00184         if len(reg) == 2 and reg.endswith('x'):
00185             mask |= 0xffff0000
00186             #print('%s -> e%s' % (reg, reg))
00187             reg = 'e%s' % reg
00188         if reg in self.regs:
00189             #print('got %s (%d & %d == %d)' % (reg, self.regs[reg], mask, self.regs[reg] & ~mask))
00190             return self.regs[reg] & ~mask
00191 
00192         if len(reg) == 3 and reg.startswith('e'):
00193             mask |= 0xffffffff00000000
00194             #print('%s -> r%s' % (reg, reg[1:]))
00195             reg = 'r%s' % reg[1:]
00196         if reg in self.regs:
00197             #print('got %s (%d & %d == %d)' % (reg, self.regs[reg], mask, self.regs[reg] & ~mask))
00198             return self.regs[reg] & ~mask
00199         raise ValueError("Could not resolve register '%s'" % (reg_orig))
00200 
00201     def calculate_arg(self, arg):
00202         # Check for and pre-remove segment offset
00203         segment = 0
00204         if arg.startswith('%') and ':' in arg:
00205             parts = arg.split(':', 1)
00206             segment = self.regs[parts[0][1:]]
00207             arg = parts[1]
00208 
00209         # Handle standard offsets
00210         parts = arg.split('(')
00211         offset = parts[0]
00212         # Handle negative signs
00213         sign = 1
00214         if offset.startswith('-'):
00215             sign = -1
00216             offset = offset[1:]
00217         # Skip call target dereferences
00218         if offset.startswith('*'):
00219             offset = offset[1:]
00220         if len(offset) > 0:
00221             if offset.startswith('%'):
00222                 # Handle the *%REG case
00223                 add = self.regs[offset[1:]]
00224             else:
00225                 if not offset.startswith('0x'):
00226                     raise ValueError('Unknown offset literal: %s' % (parts[0]))
00227                 add = int(offset[2:], 16) * sign
00228         else:
00229             add = 0
00230 
00231         def _reg_val(self, text, val=0):
00232             if text.startswith('%'):
00233                 val = self.regs[text[1:]]
00234             elif text == "":
00235                 val = 0
00236             else:
00237                 val = int(text)
00238             return val
00239 
00240         # (%ebx, %ecx, 4) style
00241         value = 0
00242         if len(parts) > 1:
00243             parens = parts[1][0:-1]
00244             reg_list = parens.split(',')
00245 
00246             base = 0
00247             if len(reg_list) > 0:
00248                 base = _reg_val(self, reg_list[0], base)
00249             index = 0
00250             if len(reg_list) > 1:
00251                 index = _reg_val(self, reg_list[1], index)
00252             scale = 1
00253             if len(reg_list) > 2:
00254                 scale = _reg_val(self, reg_list[2], scale)
00255             value = base + index * scale
00256 
00257         value = segment + value + add
00258         if 'esp' in self.regs:
00259             # 32bit
00260             return value % 0x100000000
00261         else:
00262             # 64bit
00263             return value % 0x10000000000000000
00264 
00265     def report(self):
00266         understood = False
00267         reason = []
00268         details = ['Segfault happened at: %s' % (self.line)]
00269 
00270         # Verify PC is in an executable region
00271         valid, out, short = self.validate_vma('x', self.pc, 'PC')
00272         details.append(out)
00273         if not valid:
00274             reason.append(short)
00275             understood = True
00276 
00277         if self.insn in ['lea', 'leal']:
00278             # Short-circuit for instructions that do not cause vma access
00279             details.append('insn (%s) does not access VMA' % (self.insn))
00280         else:
00281             # Verify source is readable
00282             if self.src:
00283                 if not ':' in self.src and (self.src[0] in ['%', '$', '*']) and not self.src.startswith('*%'):
00284                     details.append('source "%s" ok' % (self.src))
00285                 else:
00286                     addr = self.calculate_arg(self.src)
00287                     valid, out, short = self.validate_vma('r', addr, 'source "%s"' % (self.src))
00288                     details.append(out)
00289                     if not valid:
00290                         reason.append(short)
00291                         understood = True
00292 
00293             # Verify destination is writable
00294             if self.dest:
00295                 if not ':' in self.dest and (self.dest[0] in ['%', '$', '*']):
00296                     details.append('destination "%s" ok' % (self.dest))
00297                 else:
00298                     addr = self.calculate_arg(self.dest)
00299                     valid, out, short = self.validate_vma('w', addr, 'destination "%s"' % (self.dest))
00300                     details.append(out)
00301                     if not valid:
00302                         reason.append(short)
00303                         understood = True
00304 
00305         # Handle I/O port operations
00306         if self.insn in ['out', 'in'] and not understood:
00307             reason.append('disallowed I/O port operation on port %d' % (self.register_value(self.src)))
00308             details.append('disallowed I/O port operation on port %d' % (self.register_value(self.src)))
00309             understood = True
00310 
00311         # Note position of SP with regard to "[stack]" VMA
00312         if self.sp is not None:
00313             if self.stack_vma is not None:
00314                 if self.sp < self.maps[self.stack_vma]['start']:
00315                     details.append("Stack memory exhausted (SP below stack segment)")
00316                 if self.sp >= self.maps[self.stack_vma]['end']:
00317                     details.append("Stack pointer not within stack segment")
00318             if not understood:
00319                 valid, out, short = self.validate_vma('r', self.sp, 'SP')
00320                 details.append(out)
00321                 if not valid:
00322                     reason.append(short)
00323                     understood = True
00324 
00325         if not understood:
00326             vma = self.find_vma(self.pc)
00327             if vma and (vma['name'] == '[vdso]' or vma['name'] == '[vsyscall]'):
00328                 reason.append('Reason could not be automatically determined. (Unhandled exception in kernel code?)')
00329                 details.append('Reason could not be automatically determined. (Unhandled exception in kernel code?)')
00330             else:
00331                 reason.append('Reason could not be automatically determined.')
00332                 details.append('Reason could not be automatically determined.')
00333         return understood, '\n'.join(reason), '\n'.join(details)
00334 
00335 
00336 def add_info(report):
00337     # Only interested in segmentation faults...
00338     if report.get('Signal', '0') != '11':
00339         return
00340 
00341     needed = ['Signal', 'Architecture', 'Disassembly', 'ProcMaps', 'Registers']
00342     for field in needed:
00343         if field not in report:
00344             report['SegvAnalysis'] = 'Skipped: missing required field "%s"' % (field)
00345             return
00346 
00347     # Only run on segv for x86 and x86_64...
00348     if not report['Architecture'] in ['i386', 'amd64']:
00349         return
00350 
00351     try:
00352         segv = ParseSegv(report['Registers'], report['Disassembly'], report['ProcMaps'])
00353         understood, reason, details = segv.report()
00354         if understood:
00355             report['SegvReason'] = reason
00356         report['SegvAnalysis'] = details
00357     except BaseException as e:
00358         report['SegvAnalysis'] = 'Failure: %s' % (str(e))
00359 
00360 
00361 if __name__ == '__main__':
00362     if len(sys.argv) != 4 or sys.argv[1] in ['-h', '--help']:
00363         print('To run self-test, run without any arguments (or with -v)')
00364         print('To do stand-alone crash parsing:')
00365         print('  Usage: %s Registers.txt Disassembly.txt ProcMaps.txt' % (sys.argv[0]))
00366         sys.exit(0)
00367 
00368     segv = ParseSegv(open(sys.argv[1]).read(),
00369                      open(sys.argv[2]).read(),
00370                      open(sys.argv[3]).read())
00371     understood, reason, details = segv.report()
00372     print('%s\n\n%s' % (reason, details))
00373     rc = 0
00374     if not understood:
00375         rc = 1
00376     sys.exit(rc)