Back to index

apport  2.4
problem_report.py
Go to the documentation of this file.
00001 # vim: set encoding=UTF-8 fileencoding=UTF-8 :
00002 
00003 '''Store, load, and handle problem reports.'''
00004 
00005 # Copyright (C) 2006 - 2012 Canonical Ltd.
00006 # Author: Martin Pitt <martin.pitt@ubuntu.com>
00007 #
00008 # This program is free software; you can redistribute it and/or modify it
00009 # under the terms of the GNU General Public License as published by the
00010 # Free Software Foundation; either version 2 of the License, or (at your
00011 # option) any later version.  See http://www.gnu.org/copyleft/gpl.html for
00012 # the full text of the license.
00013 
00014 import zlib, base64, time, sys, gzip, struct, os
00015 from email.encoders import encode_base64
00016 from email.mime.multipart import MIMEMultipart
00017 from email.mime.base import MIMEBase
00018 from email.mime.text import MIMEText
00019 from io import BytesIO
00020 
00021 if sys.version[0] < '3':
00022     from UserDict import IterableUserDict as UserDict
00023     UserDict  # pyflakes
00024     _python2 = True
00025 else:
00026     from collections import UserDict
00027     _python2 = False
00028 
00029 
00030 class CompressedValue:
00031     '''Represent a ProblemReport value which is gzip compressed.'''
00032 
00033     def __init__(self, value=None, name=None):
00034         '''Initialize an empty CompressedValue object with an optional name.'''
00035 
00036         self.gzipvalue = None
00037         self.name = name
00038         # By default, compressed values are in gzip format. Earlier versions of
00039         # problem_report used zlib format (without gzip header). If you have
00040         # such a case, set legacy_zlib to True.
00041         self.legacy_zlib = False
00042 
00043         if value:
00044             self.set_value(value)
00045 
00046     def set_value(self, value):
00047         '''Set uncompressed value.'''
00048 
00049         out = BytesIO()
00050         gzip.GzipFile(self.name, mode='wb', fileobj=out).write(value)
00051         self.gzipvalue = out.getvalue()
00052         self.legacy_zlib = False
00053 
00054     def get_value(self):
00055         '''Return uncompressed value.'''
00056 
00057         if not self.gzipvalue:
00058             return None
00059 
00060         if self.legacy_zlib:
00061             return zlib.decompress(self.gzipvalue)
00062         return gzip.GzipFile(fileobj=BytesIO(self.gzipvalue)).read()
00063 
00064     def write(self, file):
00065         '''Write uncompressed value into given file-like object.'''
00066 
00067         assert self.gzipvalue
00068 
00069         if self.legacy_zlib:
00070             file.write(zlib.decompress(self.gzipvalue))
00071             return
00072 
00073         gz = gzip.GzipFile(fileobj=BytesIO(self.gzipvalue))
00074         while True:
00075             block = gz.read(1048576)
00076             if not block:
00077                 break
00078             file.write(block)
00079 
00080     def __len__(self):
00081         '''Return length of uncompressed value.'''
00082 
00083         assert self.gzipvalue
00084         if self.legacy_zlib:
00085             return len(self.get_value())
00086         return int(struct.unpack('<L', self.gzipvalue[-4:])[0])
00087 
00088     def splitlines(self):
00089         '''Behaves like splitlines() for a normal string.'''
00090 
00091         return self.get_value().splitlines()
00092 
00093 
00094 class ProblemReport(UserDict):
00095     def __init__(self, type='Crash', date=None):
00096         '''Initialize a fresh problem report.
00097 
00098         type can be 'Crash', 'Packaging', 'KernelCrash' or 'KernelOops'.
00099         date is the desired date/time string; if None (default), the
00100         current local time is used.
00101         '''
00102         if date is None:
00103             date = time.asctime()
00104         self.data = {'ProblemType': type, 'Date': date}
00105 
00106         # keeps track of keys which were added since the last ctor or load()
00107         self.old_keys = set()
00108 
00109     def load(self, file, binary=True):
00110         '''Initialize problem report from a file-like object.
00111 
00112         If binary is False, binary data is not loaded; the dictionary key is
00113         created, but its value will be an empty string. If it is True, it is
00114         transparently uncompressed and available as dictionary byte array values.
00115         If binary is 'compressed', the compressed value is retained, and the
00116         dictionary value will be a CompressedValue object. This is useful if
00117         the compressed value is still useful (to avoid recompression if the
00118         file needs to be written back).
00119 
00120         file needs to be opened in binary mode.
00121 
00122         Files are in RFC822 format.
00123         '''
00124         self._assert_bin_mode(file)
00125         self.data.clear()
00126         key = None
00127         value = None
00128         b64_block = False
00129         bd = None
00130         for line in file:
00131             # continuation line
00132             if line.startswith(b' '):
00133                 if b64_block and not binary:
00134                     continue
00135                 assert (key is not None and value is not None)
00136                 if b64_block:
00137                     l = base64.b64decode(line)
00138                     if bd:
00139                         value += bd.decompress(l)
00140                     else:
00141                         if binary == 'compressed':
00142                             # check gzip header; if absent, we have legacy zlib
00143                             # data
00144                             if value.gzipvalue == b'' and not l.startswith(b'\037\213\010'):
00145                                 value.legacy_zlib = True
00146                             value.gzipvalue += l
00147                         else:
00148                             # lazy initialization of bd
00149                             # skip gzip header, if present
00150                             if l.startswith(b'\037\213\010'):
00151                                 bd = zlib.decompressobj(-zlib.MAX_WBITS)
00152                                 value = bd.decompress(self._strip_gzip_header(l))
00153                             else:
00154                                 # legacy zlib-only format used default block
00155                                 # size
00156                                 bd = zlib.decompressobj()
00157                                 value += bd.decompress(l)
00158                 else:
00159                     if len(value) > 0:
00160                         value += b'\n'
00161                     if line.endswith(b'\n'):
00162                         value += line[1:-1]
00163                     else:
00164                         value += line[1:]
00165             else:
00166                 if b64_block:
00167                     if bd:
00168                         value += bd.flush()
00169                     b64_block = False
00170                     bd = None
00171                 if key:
00172                     assert value is not None
00173                     self.data[key] = self._try_unicode(value)
00174                 (key, value) = line.split(b':', 1)
00175                 if not _python2:
00176                     key = key.decode('ASCII')
00177                 value = value.strip()
00178                 if value == b'base64':
00179                     if binary == 'compressed':
00180                         value = CompressedValue(key.encode())
00181                         value.gzipvalue = b''
00182                     else:
00183                         value = b''
00184                     b64_block = True
00185 
00186         if key is not None:
00187             self.data[key] = self._try_unicode(value)
00188 
00189         self.old_keys = set(self.data.keys())
00190 
00191     def has_removed_fields(self):
00192         '''Check if the report has any keys which were not loaded.
00193 
00194         This could happen when using binary=False in load().
00195         '''
00196         return ('' in self.values())
00197 
00198     @classmethod
00199     def _is_binary(klass, string):
00200         '''Check if the given strings contains binary data.'''
00201 
00202         if _python2:
00203             return klass._is_binary_py2(string)
00204 
00205         if type(string) == bytes:
00206             for c in string:
00207                 if c < 32 and not chr(c).isspace():
00208                     return True
00209         return False
00210 
00211     @classmethod
00212     def _is_binary_py2(klass, string):
00213         '''Check if the given strings contains binary data. (Python 2)'''
00214 
00215         for c in string:
00216             if c < ' ' and not c.isspace():
00217                 return True
00218         return False
00219 
00220     @classmethod
00221     def _try_unicode(klass, value):
00222         '''Try to convert bytearray value to unicode'''
00223 
00224         if type(value) == bytes and not klass._is_binary(value):
00225             try:
00226                 return value.decode('UTF-8')
00227             except UnicodeDecodeError:
00228                 return value
00229         return value
00230 
00231     def write(self, file, only_new=False):
00232         '''Write information into the given file-like object.
00233 
00234         If only_new is True, only keys which have been added since the last
00235         load() are written (i. e. those returned by new_keys()).
00236 
00237         If a value is a string, it is written directly. Otherwise it must be a
00238         tuple of the form (file, encode=True, limit=None, fail_on_empty=False).
00239         The first argument can be a file name or a file-like object,
00240         which will be read and its content will become the value of this key.
00241         'encode' specifies whether the contents will be
00242         gzip compressed and base64-encoded (this defaults to True). If limit is
00243         set to a positive integer, the file is not attached if it's larger
00244         than the given limit, and the entire key will be removed. If
00245         fail_on_empty is True, reading zero bytes will cause an IOError.
00246 
00247         file needs to be opened in binary mode.
00248 
00249         Files are written in RFC822 format.
00250         '''
00251         self._assert_bin_mode(file)
00252 
00253         # sort keys into ASCII non-ASCII/binary attachment ones, so that
00254         # the base64 ones appear last in the report
00255         asckeys = []
00256         binkeys = []
00257         for k in self.data.keys():
00258             if only_new and k in self.old_keys:
00259                 continue
00260             v = self.data[k]
00261             if hasattr(v, 'find'):
00262                 if self._is_binary(v):
00263                     binkeys.append(k)
00264                 else:
00265                     asckeys.append(k)
00266             else:
00267                 if not isinstance(v, CompressedValue) and len(v) >= 2 and not v[1]:
00268                     # force uncompressed
00269                     asckeys.append(k)
00270                 else:
00271                     binkeys.append(k)
00272 
00273         asckeys.sort()
00274         if 'ProblemType' in asckeys:
00275             asckeys.remove('ProblemType')
00276             asckeys.insert(0, 'ProblemType')
00277         binkeys.sort()
00278 
00279         # write the ASCII keys first
00280         for k in asckeys:
00281             v = self.data[k]
00282 
00283             # if it's a tuple, we have a file reference; read the contents
00284             if not hasattr(v, 'find'):
00285                 if len(v) >= 3 and v[2] is not None:
00286                     limit = v[2]
00287                 else:
00288                     limit = None
00289 
00290                 fail_on_empty = len(v) >= 4 and v[3]
00291 
00292                 if hasattr(v[0], 'read'):
00293                     v = v[0].read()  # file-like object
00294                 else:
00295                     with open(v[0], 'rb') as f:  # file name
00296                         v = f.read()
00297 
00298                 if fail_on_empty and len(v) == 0:
00299                     raise IOError('did not get any data for field ' + k)
00300 
00301                 if limit is not None and len(v) > limit:
00302                     del self.data[k]
00303                     continue
00304 
00305             if _python2:
00306                 if isinstance(v, unicode):
00307                     # unicode → str
00308                     v = v.encode('UTF-8')
00309             else:
00310                 if isinstance(v, str):
00311                     # unicode → str
00312                     v = v.encode('UTF-8')
00313 
00314             file.write(k.encode('ASCII'))
00315             if b'\n' in v:
00316                 # multiline value
00317                 file.write(b':\n ')
00318                 file.write(v.replace(b'\n', b'\n '))
00319             else:
00320                 file.write(b': ')
00321                 file.write(v)
00322             file.write(b'\n')
00323 
00324         # now write the binary keys with gzip compression and base64 encoding
00325         for k in binkeys:
00326             v = self.data[k]
00327             limit = None
00328             size = 0
00329 
00330             curr_pos = file.tell()
00331             file.write(k.encode('ASCII'))
00332             file.write(b': base64\n ')
00333 
00334             # CompressedValue
00335             if isinstance(v, CompressedValue):
00336                 file.write(base64.b64encode(v.gzipvalue))
00337                 file.write(b'\n')
00338                 continue
00339 
00340             # write gzip header
00341             gzip_header = b'\037\213\010\010\000\000\000\000\002\377' + k.encode('UTF-8') + b'\000'
00342             file.write(base64.b64encode(gzip_header))
00343             file.write(b'\n ')
00344             crc = zlib.crc32(b'')
00345 
00346             bc = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
00347                                   zlib.DEF_MEM_LEVEL, 0)
00348             # direct value
00349             if hasattr(v, 'find'):
00350                 size += len(v)
00351                 crc = zlib.crc32(v, crc)
00352                 outblock = bc.compress(v)
00353                 if outblock:
00354                     file.write(base64.b64encode(outblock))
00355                     file.write(b'\n ')
00356             # file reference
00357             else:
00358                 if len(v) >= 3 and v[2] is not None:
00359                     limit = v[2]
00360 
00361                 if hasattr(v[0], 'read'):
00362                     f = v[0]  # file-like object
00363                 else:
00364                     f = open(v[0], 'rb')  # file name
00365                 while True:
00366                     block = f.read(1048576)
00367                     size += len(block)
00368                     crc = zlib.crc32(block, crc)
00369                     if limit is not None:
00370                         if size > limit:
00371                             # roll back
00372                             file.seek(curr_pos)
00373                             file.truncate(curr_pos)
00374                             del self.data[k]
00375                             crc = None
00376                             break
00377                     if block:
00378                         outblock = bc.compress(block)
00379                         if outblock:
00380                             file.write(base64.b64encode(outblock))
00381                             file.write(b'\n ')
00382                     else:
00383                         break
00384                 if not hasattr(v[0], 'read'):
00385                     f.close()
00386 
00387                 if len(v) >= 4 and v[3]:
00388                     if size == 0:
00389                         raise IOError('did not get any data for field %s from %s' % (k, str(v[0])))
00390 
00391             # flush compressor and write the rest
00392             if not limit or size <= limit:
00393                 block = bc.flush()
00394                 # append gzip trailer: crc (32 bit) and size (32 bit)
00395                 if crc:
00396                     block += struct.pack('<L', crc & 0xFFFFFFFF)
00397                     block += struct.pack('<L', size & 0xFFFFFFFF)
00398 
00399                 file.write(base64.b64encode(block))
00400                 file.write(b'\n')
00401 
00402     def add_to_existing(self, reportfile, keep_times=False):
00403         '''Add this report's data to an already existing report file.
00404 
00405         The file will be temporarily chmod'ed to 000 to prevent frontends
00406         from picking up a hal-updated report file. If keep_times
00407         is True, then the file's atime and mtime restored after updating.
00408         '''
00409         st = os.stat(reportfile)
00410         try:
00411             f = open(reportfile, 'ab')
00412             os.chmod(reportfile, 0)
00413             self.write(f)
00414             f.close()
00415         finally:
00416             if keep_times:
00417                 os.utime(reportfile, (st.st_atime, st.st_mtime))
00418             os.chmod(reportfile, st.st_mode)
00419 
00420     def write_mime(self, file, attach_treshold=5, extra_headers={},
00421                    skip_keys=None, priority_fields=None):
00422         '''Write MIME/Multipart RFC 2822 formatted data into file.
00423 
00424         file must be a file-like object, not a path.  It needs to be opened in
00425         binary mode.
00426 
00427         If a value is a string or a CompressedValue, it is written directly.
00428         Otherwise it must be a tuple containing the source file and an optional
00429         boolean value (in that order); the first argument can be a file name or
00430         a file-like object, which will be read and its content will become the
00431         value of this key.  The file will be gzip compressed, unless the key
00432         already ends in .gz.
00433 
00434         attach_treshold specifies the maximum number of lines for a value to be
00435         included into the first inline text part. All bigger values (as well as
00436         all non-ASCII ones) will become an attachment, as well as text
00437         values bigger than 1 kB.
00438 
00439         Extra MIME preamble headers can be specified, too, as a dictionary.
00440 
00441         skip_keys is a set/list specifying keys which are filtered out and not
00442         written to the destination file.
00443 
00444         priority_fields is a set/list specifying the order in which keys should
00445         appear in the destination file.
00446         '''
00447         self._assert_bin_mode(file)
00448 
00449         keys = sorted(self.data.keys())
00450 
00451         text = b''
00452         attachments = []
00453 
00454         if 'ProblemType' in keys:
00455             keys.remove('ProblemType')
00456             keys.insert(0, 'ProblemType')
00457 
00458         if priority_fields:
00459             counter = 0
00460             for priority_field in priority_fields:
00461                 if priority_field in keys:
00462                     keys.remove(priority_field)
00463                     keys.insert(counter, priority_field)
00464                     counter += 1
00465 
00466         for k in keys:
00467             if skip_keys and k in skip_keys:
00468                 continue
00469             v = self.data[k]
00470             attach_value = None
00471 
00472             # compressed values are ready for attaching in gzip form
00473             if isinstance(v, CompressedValue):
00474                 attach_value = v.gzipvalue
00475 
00476             # if it's a tuple, we have a file reference; read the contents
00477             # and gzip it
00478             elif not hasattr(v, 'find'):
00479                 attach_value = ''
00480                 if hasattr(v[0], 'read'):
00481                     f = v[0]  # file-like object
00482                 else:
00483                     f = open(v[0], 'rb')  # file name
00484                 if k.endswith('.gz'):
00485                     attach_value = f.read()
00486                 else:
00487                     io = BytesIO()
00488                     gf = gzip.GzipFile(k, mode='wb', fileobj=io)
00489                     while True:
00490                         block = f.read(1048576)
00491                         if block:
00492                             gf.write(block)
00493                         else:
00494                             gf.close()
00495                             break
00496                     attach_value = io.getvalue()
00497                 f.close()
00498 
00499             # binary value
00500             elif self._is_binary(v):
00501                 if k.endswith('.gz'):
00502                     attach_value = v
00503                 else:
00504                     attach_value = CompressedValue(v, k).gzipvalue
00505 
00506             # if we have an attachment value, create an attachment
00507             if attach_value:
00508                 att = MIMEBase('application', 'x-gzip')
00509                 if k.endswith('.gz'):
00510                     att.add_header('Content-Disposition', 'attachment', filename=k)
00511                 else:
00512                     att.add_header('Content-Disposition', 'attachment', filename=k + '.gz')
00513                 att.set_payload(attach_value)
00514                 encode_base64(att)
00515                 attachments.append(att)
00516             else:
00517                 # plain text value
00518                 size = len(v)
00519 
00520                 # ensure that byte arrays are valid UTF-8
00521                 if type(v) == bytes:
00522                     v = v.decode('UTF-8', 'replace')
00523                 # convert unicode to UTF-8 str
00524                 if _python2:
00525                     assert isinstance(v, unicode)
00526                 else:
00527                     assert isinstance(v, str)
00528                 v = v.encode('UTF-8')
00529 
00530                 lines = len(v.splitlines())
00531                 if size <= 1000 and lines == 1:
00532                     v = v.rstrip()
00533                     text += k.encode() + b': ' + v + b'\n'
00534                 elif size <= 1000 and lines <= attach_treshold:
00535                     text += k.encode() + b':\n '
00536                     if not v.endswith(b'\n'):
00537                         v += b'\n'
00538                     text += v.strip().replace(b'\n', b'\n ') + b'\n'
00539                 else:
00540                     # too large, separate attachment
00541                     att = MIMEText(v, _charset='UTF-8')
00542                     att.add_header('Content-Disposition', 'attachment', filename=k + '.txt')
00543                     attachments.append(att)
00544 
00545         # create initial text attachment
00546         att = MIMEText(text, _charset='UTF-8')
00547         att.add_header('Content-Disposition', 'inline')
00548         attachments.insert(0, att)
00549 
00550         msg = MIMEMultipart()
00551         for k, v in extra_headers.items():
00552             msg.add_header(k, v)
00553         for a in attachments:
00554             msg.attach(a)
00555 
00556         file.write(msg.as_string().encode('UTF-8'))
00557         file.write(b'\n')
00558 
00559     def __setitem__(self, k, v):
00560         assert hasattr(k, 'isalnum')
00561         assert k.replace('.', '').replace('-', '').replace('_', '').isalnum()
00562         # value must be a string or a CompressedValue or a file reference
00563         # (tuple (string|file [, bool]))
00564         assert (isinstance(v, CompressedValue) or hasattr(v, 'isalnum') or
00565                 (hasattr(v, '__getitem__') and (
00566                     len(v) == 1 or (len(v) >= 2 and v[1] in (True, False)))
00567                     and (hasattr(v[0], 'isalnum') or hasattr(v[0], 'read'))))
00568 
00569         return self.data.__setitem__(k, v)
00570 
00571     def new_keys(self):
00572         '''Return newly added keys.
00573 
00574         Return the set of keys which have been added to the report since it
00575         was constructed or loaded.
00576         '''
00577         return set(self.data.keys()) - self.old_keys
00578 
00579     @classmethod
00580     def _strip_gzip_header(klass, line):
00581         '''Strip gzip header from line and return the rest.'''
00582 
00583         if _python2:
00584             return klass._strip_gzip_header_py2(line)
00585 
00586         flags = line[3]
00587         offset = 10
00588         if flags & 4:  # FLG.FEXTRA
00589             offset += line[offset] + 1
00590         if flags & 8:  # FLG.FNAME
00591             while line[offset] != 0:
00592                 offset += 1
00593             offset += 1
00594         if flags & 16:  # FLG.FCOMMENT
00595             while line[offset] != 0:
00596                 offset += 1
00597             offset += 1
00598         if flags & 2:  # FLG.FHCRC
00599             offset += 2
00600 
00601         return line[offset:]
00602 
00603     @classmethod
00604     def _strip_gzip_header_py2(klass, line):
00605         '''Strip gzip header from line and return the rest. (Python 2)'''
00606 
00607         flags = ord(line[3])
00608         offset = 10
00609         if flags & 4:  # FLG.FEXTRA
00610             offset += line[offset] + 1
00611         if flags & 8:  # FLG.FNAME
00612             while ord(line[offset]) != 0:
00613                 offset += 1
00614             offset += 1
00615         if flags & 16:  # FLG.FCOMMENT
00616             while ord(line[offset]) != 0:
00617                 offset += 1
00618             offset += 1
00619         if flags & 2:  # FLG.FHCRC
00620             offset += 2
00621 
00622         return line[offset:]
00623 
00624     @classmethod
00625     def _assert_bin_mode(klass, file):
00626         '''Assert that given file object is in binary mode'''
00627 
00628         if _python2:
00629             assert (type(file) == BytesIO or 'b' in file.mode), 'file stream must be in binary mode'
00630         else:
00631             assert not hasattr(file, 'encoding'), 'file stream must be in binary mode'