Back to index

apport  2.3
crashdb.py
Go to the documentation of this file.
00001 '''Abstract crash database interface.'''
00002 
00003 # Copyright (C) 2007 - 2009 Canonical Ltd.
00004 # Author: Martin Pitt <martin.pitt@ubuntu.com>
00005 #
00006 # This program is free software; you can redistribute it and/or modify it
00007 # under the terms of the GNU General Public License as published by the
00008 # Free Software Foundation; either version 2 of the License, or (at your
00009 # option) any later version.  See http://www.gnu.org/copyleft/gpl.html for
00010 # the full text of the license.
00011 
00012 import os, os.path, sys, shutil
00013 
00014 try:
00015     from exceptions import Exception
00016     from urllib import quote_plus, urlopen
00017     URLError = IOError
00018     (quote_plus, urlopen)  # pyflakes
00019 except ImportError:
00020     # python 3
00021     from functools import cmp_to_key
00022     from urllib.parse import quote_plus
00023     from urllib.request import urlopen
00024     from urllib.error import URLError
00025 
00026 import apport
00027 
00028 
00029 def _u(str):
00030     '''Convert str to an unicode if it isn't already.'''
00031 
00032     if type(str) == type(b''):
00033         return str.decode('UTF-8', 'ignore')
00034     return str
00035 
00036 
00037 class CrashDatabase:
00038     def __init__(self, auth_file, options):
00039         '''Initialize crash database connection.
00040 
00041         You need to specify an implementation specific file with the
00042         authentication credentials for retracing access for download() and
00043         update(). For upload() and get_comment_url() you can use None.
00044 
00045         options is a dictionary with additional settings from crashdb.conf; see
00046         get_crashdb() for details.
00047         '''
00048         self.auth_file = auth_file
00049         self.options = options
00050         self.duplicate_db = None
00051 
00052     def get_bugpattern_baseurl(self):
00053         '''Return the base URL for bug patterns.
00054 
00055         See apport.report.Report.search_bug_patterns() for details. If this
00056         function returns None, bug patterns are disabled.
00057         '''
00058         return self.options.get('bug_pattern_url')
00059 
00060     def accepts(self, report):
00061         '''Check if this report can be uploaded to this database.
00062 
00063         Crash databases might limit the types of reports they get with e. g.
00064         the "problem_types" option.
00065         '''
00066         if 'problem_types' in self.options:
00067             return report.get('ProblemType') in self.options['problem_types']
00068 
00069         return True
00070 
00071     #
00072     # API for duplicate detection
00073     #
00074     # Tests are in apport/crashdb_impl/memory.py.
00075 
00076     def init_duplicate_db(self, path):
00077         '''Initialize duplicate database.
00078 
00079         path specifies an SQLite database. It will be created if it does not
00080         exist yet.
00081         '''
00082         import sqlite3 as dbapi2
00083 
00084         assert dbapi2.paramstyle == 'qmark', \
00085             'this module assumes qmark dbapi parameter style'
00086 
00087         self.format_version = 3
00088 
00089         init = not os.path.exists(path) or path == ':memory:' or \
00090             os.path.getsize(path) == 0
00091         self.duplicate_db = dbapi2.connect(path, timeout=7200)
00092 
00093         if init:
00094             cur = self.duplicate_db.cursor()
00095             cur.execute('CREATE TABLE version (format INTEGER NOT NULL)')
00096             cur.execute('INSERT INTO version VALUES (?)', [self.format_version])
00097 
00098             cur.execute('''CREATE TABLE crashes (
00099                 signature VARCHAR(255) NOT NULL,
00100                 crash_id INTEGER NOT NULL,
00101                 fixed_version VARCHAR(50),
00102                 last_change TIMESTAMP,
00103                 CONSTRAINT crashes_pk PRIMARY KEY (crash_id))''')
00104 
00105             cur.execute('''CREATE TABLE address_signatures (
00106                 signature VARCHAR(1000) NOT NULL,
00107                 crash_id INTEGER NOT NULL,
00108                 CONSTRAINT address_signatures_pk PRIMARY KEY (signature))''')
00109 
00110             self.duplicate_db.commit()
00111 
00112         # verify integrity
00113         cur = self.duplicate_db.cursor()
00114         cur.execute('PRAGMA integrity_check')
00115         result = cur.fetchall()
00116         if result != [('ok',)]:
00117             raise SystemError('Corrupt duplicate db:' + str(result))
00118 
00119         try:
00120             cur.execute('SELECT format FROM version')
00121             result = cur.fetchone()
00122         except self.duplicate_db.OperationalError as e:
00123             if 'no such table' in str(e):
00124                 # first db format did not have version table yet
00125                 result = [0]
00126         if result[0] > self.format_version:
00127             raise SystemError('duplicate DB has unknown format %i' % result[0])
00128         if result[0] < self.format_version:
00129             print('duplicate db has format %i, upgrading to %i' %
00130                   (result[0], self.format_version))
00131             self._duplicate_db_upgrade(result[0])
00132 
00133     def check_duplicate(self, id, report=None):
00134         '''Check whether a crash is already known.
00135 
00136         If the crash is new, it will be added to the duplicate database and the
00137         function returns None. If the crash is already known, the function
00138         returns a pair (crash_id, fixed_version), where fixed_version might be
00139         None if the crash is not fixed in the latest version yet. Depending on
00140         whether the version in report is smaller than/equal to the fixed
00141         version or larger, this calls close_duplicate() or mark_regression().
00142 
00143         If the report does not have a valid crash signature, this function does
00144         nothing and just returns None.
00145 
00146         By default, the report gets download()ed, but for performance reasons
00147         it can be explicitly passed to this function if it is already available.
00148         '''
00149         assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
00150 
00151         if not report:
00152             report = self.download(id)
00153 
00154         self._mark_dup_checked(id, report)
00155 
00156         if 'DuplicateSignature' in report:
00157             sig = report['DuplicateSignature']
00158         else:
00159             sig = report.crash_signature()
00160         existing = []
00161         if sig:
00162             # use real duplicate signature
00163             existing = self._duplicate_search_signature(sig, id)
00164 
00165             if existing:
00166                 # update status of existing master bugs
00167                 for (ex_id, _) in existing:
00168                     self._duplicate_db_sync_status(ex_id)
00169                 existing = self._duplicate_search_signature(sig, id)
00170 
00171         try:
00172             report_package_version = report['Package'].split()[1]
00173         except (KeyError, IndexError):
00174             report_package_version = None
00175 
00176         # check the existing IDs whether there is one that is unfixed or not
00177         # older than the report's package version; if so, we have a duplicate.
00178         master_id = None
00179         master_ver = None
00180         for (ex_id, ex_ver) in existing:
00181             if not ex_ver or not report_package_version or apport.packaging.compare_versions(report_package_version, ex_ver) < 0:
00182                 master_id = ex_id
00183                 master_ver = ex_ver
00184                 break
00185         else:
00186             # if we did not find a new enough open master report,
00187             # we have a regression of the latest fix. Mark it so, and create a
00188             # new unfixed ID for it later on
00189             if existing:
00190                 self.mark_regression(id, existing[-1][0])
00191 
00192         # now query address signatures, they might turn up another duplicate
00193         # (not necessarily the same, due to Stacktraces sometimes being
00194         # slightly different)
00195         addr_sig = report.crash_signature_addresses()
00196         if addr_sig:
00197             addr_match = self._duplicate_search_address_signature(addr_sig)
00198             if addr_match and addr_match != master_id:
00199                 if master_id is None:
00200                     # we have a duplicate only identified by address sig, close it
00201                     master_id = addr_match
00202                 else:
00203                     # our bug is a dupe of two different masters, one from
00204                     # symbolic, the other from addr matching (see LP#943117);
00205                     # make them all duplicates of each other, using the lower
00206                     # number as master
00207                     if master_id < addr_match:
00208                         self.close_duplicate(report, addr_match, master_id)
00209                         self._duplicate_db_merge_id(addr_match, master_id)
00210                     else:
00211                         self.close_duplicate(report, master_id, addr_match)
00212                         self._duplicate_db_merge_id(master_id, addr_match)
00213                         master_id = addr_match
00214                         master_ver = None  # no version tracking for address signatures yet
00215 
00216         if master_id is not None:
00217             if addr_sig:
00218                 self._duplicate_db_add_address_signature(addr_sig, master_id)
00219             self.close_duplicate(report, id, master_id)
00220             return (master_id, master_ver)
00221 
00222         # no duplicate detected; create a new record for the ID if we don't have one already
00223         if sig:
00224             cur = self.duplicate_db.cursor()
00225             cur.execute('SELECT count(*) FROM crashes WHERE crash_id == ?', [id])
00226             count_id = cur.fetchone()[0]
00227             if count_id == 0:
00228                 cur.execute('INSERT INTO crashes VALUES (?, ?, ?, CURRENT_TIMESTAMP)', (_u(sig), id, None))
00229                 self.duplicate_db.commit()
00230         if addr_sig:
00231             self._duplicate_db_add_address_signature(addr_sig, id)
00232 
00233         return None
00234 
00235     def known(self, report):
00236         '''Check if the crash db already knows about the crash signature.
00237 
00238         Check if the report has a DuplicateSignature, crash_signature(), or
00239         StacktraceAddressSignature, and ask the database whether the problem is
00240         already known. If so, return an URL where the user can check the status
00241         or subscribe (if available), or just return True if the report is known
00242         but there is no public URL. In that case the report will not be
00243         uploaded (i. e. upload() will not be called).
00244 
00245         Return None if the report does not have any signature or the crash
00246         database does not support checking for duplicates on the client side.
00247 
00248         The default implementation uses a text file format generated by
00249         duplicate_db_publish() at an URL specified by the "dupdb_url" option.
00250         Subclasses are free to override this with a custom implementation, such
00251         as a real database lookup.
00252         '''
00253         if not self.options.get('dupdb_url'):
00254             return None
00255 
00256         for kind in ('sig', 'address'):
00257             # get signature
00258             if kind == 'sig':
00259                 if 'DuplicateSignature' in report:
00260                     sig = report['DuplicateSignature']
00261                 else:
00262                     sig = report.crash_signature()
00263             else:
00264                 sig = report.crash_signature_addresses()
00265 
00266             if not sig:
00267                 continue
00268 
00269             # build URL where the data should be
00270             h = self.duplicate_sig_hash(sig)
00271             if not h:
00272                 return None
00273 
00274             # the hash is already quoted, but we really want to open the quoted
00275             # file names; as urlopen() unquotes, we need to double-quote here
00276             # again so that urlopen() sees the single-quoted file names
00277             url = os.path.join(self.options['dupdb_url'], kind, quote_plus(h))
00278 
00279             # read data file
00280             try:
00281                 f = urlopen(url)
00282                 contents = f.read().decode('UTF-8')
00283                 f.close()
00284                 if '<title>404 Not Found' in contents:
00285                     continue
00286             except (IOError, URLError):
00287                 # does not exist, failed to load, etc.
00288                 continue
00289 
00290             # now check if we find our signature
00291             for line in contents.splitlines():
00292                 try:
00293                     id, s = line.split(None, 1)
00294                     id = int(id)
00295                 except ValueError:
00296                     continue
00297                 if s == sig:
00298                     result = self.get_id_url(report, id)
00299                     if not result:
00300                         # if we can't have an URL, just report as "known"
00301                         result = '1'
00302                     return result
00303 
00304         return None
00305 
00306     def duplicate_db_fixed(self, id, version):
00307         '''Mark given crash ID as fixed in the duplicate database.
00308 
00309         version specifies the package version the crash was fixed in (None for
00310         'still unfixed').
00311         '''
00312         assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
00313 
00314         cur = self.duplicate_db.cursor()
00315         n = cur.execute('UPDATE crashes SET fixed_version = ?, last_change = CURRENT_TIMESTAMP WHERE crash_id = ?',
00316                         (version, id))
00317         assert n.rowcount == 1
00318         self.duplicate_db.commit()
00319 
00320     def duplicate_db_remove(self, id):
00321         '''Remove crash from the duplicate database.
00322 
00323         This happens when a report got rejected or manually duplicated.
00324         '''
00325         assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
00326 
00327         cur = self.duplicate_db.cursor()
00328         cur.execute('DELETE FROM crashes WHERE crash_id = ?', [id])
00329         cur.execute('DELETE FROM address_signatures WHERE crash_id = ?', [id])
00330         self.duplicate_db.commit()
00331 
00332     def duplicate_db_change_master_id(self, old_id, new_id):
00333         '''Change a crash ID.'''
00334 
00335         assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
00336 
00337         cur = self.duplicate_db.cursor()
00338         cur.execute('UPDATE crashes SET crash_id = ?, last_change = CURRENT_TIMESTAMP WHERE crash_id = ?',
00339                     [new_id, old_id])
00340         cur.execute('UPDATE address_signatures SET crash_id = ? WHERE crash_id = ?',
00341                     [new_id, old_id])
00342         self.duplicate_db.commit()
00343 
00344     def duplicate_db_publish(self, dir):
00345         '''Create text files suitable for www publishing.
00346 
00347         Create a number of text files in the given directory which Apport
00348         clients can use to determine whether a problem is already reported to
00349         the database, through the known() method. This directory is suitable
00350         for publishing to the web.
00351 
00352         The database is indexed by the first two fields of the duplicate or
00353         crash signature, to avoid having to download the entire database every
00354         time.
00355 
00356         If the directory already exists, it will be updated. The new content is
00357         built in a new directory which is the given one with ".new" appended,
00358         then moved to the given name in an almost atomic way.
00359         '''
00360         assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
00361 
00362         # first create the temporary new dir; if that fails, nothing has been
00363         # changed and we fail early
00364         out = dir + '.new'
00365         os.mkdir(out)
00366 
00367         # crash addresses
00368         addr_base = os.path.join(out, 'address')
00369         os.mkdir(addr_base)
00370         cur_hash = None
00371         cur_file = None
00372 
00373         cur = self.duplicate_db.cursor()
00374 
00375         cur.execute('SELECT * from address_signatures ORDER BY signature')
00376         for (sig, id) in cur.fetchall():
00377             h = self.duplicate_sig_hash(sig)
00378             if h is None:
00379                 # some entries can't be represented in a single line
00380                 continue
00381             if h != cur_hash:
00382                 cur_hash = h
00383                 if cur_file:
00384                     cur_file.close()
00385                 cur_file = open(os.path.join(addr_base, cur_hash), 'w')
00386 
00387             cur_file.write('%i %s\n' % (id, sig))
00388 
00389         if cur_file:
00390             cur_file.close()
00391 
00392         # duplicate signatures
00393         sig_base = os.path.join(out, 'sig')
00394         os.mkdir(sig_base)
00395         cur_hash = None
00396         cur_file = None
00397 
00398         cur.execute('SELECT signature, crash_id from crashes ORDER BY signature')
00399         for (sig, id) in cur.fetchall():
00400             h = self.duplicate_sig_hash(sig)
00401             if h is None:
00402                 # some entries can't be represented in a single line
00403                 continue
00404             if h != cur_hash:
00405                 cur_hash = h
00406                 if cur_file:
00407                     cur_file.close()
00408                 cur_file = open(os.path.join(sig_base, cur_hash), 'wb')
00409 
00410             cur_file.write(('%i %s\n' % (id, sig)).encode('UTF-8'))
00411 
00412         if cur_file:
00413             cur_file.close()
00414 
00415         # switch over tree; this is as atomic as we can be with directories
00416         if os.path.exists(dir):
00417             os.rename(dir, dir + '.old')
00418         os.rename(out, dir)
00419         if os.path.exists(dir + '.old'):
00420             shutil.rmtree(dir + '.old')
00421 
00422     def _duplicate_db_upgrade(self, cur_format):
00423         '''Upgrade database to current format'''
00424 
00425         # Format 3 added a primary key which can't be done as an upgrade in
00426         # SQLite
00427         if cur_format < 3:
00428             raise SystemError('Cannot upgrade database from format earlier than 3')
00429 
00430         cur = self.duplicate_db.cursor()
00431 
00432         cur.execute('UPDATE version SET format = ?', (cur_format,))
00433         self.duplicate_db.commit()
00434 
00435         assert cur_format == self.format_version
00436 
00437     def _duplicate_search_signature(self, sig, id):
00438         '''Look up signature in the duplicate db.
00439 
00440         Return [(id, fixed_version)] tuple list.
00441 
00442         There might be several matches if a crash has been reintroduced in a
00443         later version. The results are sorted so that the highest fixed version
00444         comes first, and "unfixed" being the last result.
00445 
00446         id is the bug we are looking to find a duplicate for. The result will
00447         never contain id, to avoid marking a bug as a duplicate of itself if a
00448         bug is reprocessed more than once.
00449         '''
00450         cur = self.duplicate_db.cursor()
00451         cur.execute('SELECT crash_id, fixed_version FROM crashes WHERE signature = ? AND crash_id <> ?', [_u(sig), id])
00452         existing = cur.fetchall()
00453 
00454         def cmp(x, y):
00455             x = x[1]
00456             y = y[1]
00457             if x == y:
00458                 return 0
00459             if x == '':
00460                 if y is None:
00461                     return -1
00462                 else:
00463                     return 1
00464             if y == '':
00465                 if x is None:
00466                     return 1
00467                 else:
00468                     return -1
00469             if x is None:
00470                 return 1
00471             if y is None:
00472                 return -1
00473             return apport.packaging.compare_versions(x, y)
00474 
00475         if sys.version[0] >= '3':
00476             existing.sort(key=cmp_to_key(cmp))
00477         else:
00478             existing.sort(cmp=cmp)
00479 
00480         return existing
00481 
00482     def _duplicate_search_address_signature(self, sig):
00483         '''Return ID for crash address signature.
00484 
00485         Return None if signature is unknown.
00486         '''
00487         if not sig:
00488             return None
00489 
00490         cur = self.duplicate_db.cursor()
00491 
00492         cur.execute('SELECT crash_id FROM address_signatures WHERE signature == ?', [sig])
00493         existing_ids = cur.fetchall()
00494         assert len(existing_ids) <= 1
00495         if existing_ids:
00496             return existing_ids[0][0]
00497         else:
00498             return None
00499 
00500     def _duplicate_db_dump(self, with_timestamps=False):
00501         '''Return the entire duplicate database as a dictionary.
00502 
00503         The returned dictionary maps "signature" to (crash_id, fixed_version)
00504         pairs.
00505 
00506         If with_timestamps is True, then the map will contain triples
00507         (crash_id, fixed_version, last_change) instead.
00508 
00509         This is mainly useful for debugging and test suites.
00510         '''
00511         assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
00512 
00513         dump = {}
00514         cur = self.duplicate_db.cursor()
00515         cur.execute('SELECT * FROM crashes')
00516         for (sig, id, ver, last_change) in cur:
00517             if with_timestamps:
00518                 dump[sig] = (id, ver, last_change)
00519             else:
00520                 dump[sig] = (id, ver)
00521         return dump
00522 
00523     def _duplicate_db_sync_status(self, id):
00524         '''Update the duplicate db to the reality of the report in the crash db.
00525 
00526         This uses get_fixed_version() to get the status of the given crash.
00527         An invalid ID gets removed from the duplicate db, and a crash which got
00528         fixed is marked as such in the database.
00529         '''
00530         assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
00531 
00532         cur = self.duplicate_db.cursor()
00533         cur.execute('SELECT fixed_version FROM crashes WHERE crash_id = ?', [id])
00534         db_fixed_version = cur.fetchone()
00535         if not db_fixed_version:
00536             return
00537         db_fixed_version = db_fixed_version[0]
00538 
00539         real_fixed_version = self.get_fixed_version(id)
00540 
00541         # crash got rejected
00542         if real_fixed_version == 'invalid':
00543             print('DEBUG: bug %i was invalidated, removing from database' % id)
00544             self.duplicate_db_remove(id)
00545             return
00546 
00547         # crash got fixed
00548         if not db_fixed_version and real_fixed_version:
00549             print('DEBUG: bug %i got fixed in version %s, updating database' % (id, real_fixed_version))
00550             self.duplicate_db_fixed(id, real_fixed_version)
00551             return
00552 
00553         # crash got reopened
00554         if db_fixed_version and not real_fixed_version:
00555             print('DEBUG: bug %i got reopened, dropping fixed version %s from database' % (id, db_fixed_version))
00556             self.duplicate_db_fixed(id, real_fixed_version)
00557             return
00558 
00559     def _duplicate_db_add_address_signature(self, sig, id):
00560         # sanity check
00561         existing = self._duplicate_search_address_signature(sig)
00562         if existing:
00563             if existing != id:
00564                 raise SystemError('ID %i has signature %s, but database already has that signature for ID %i' % (
00565                     id, sig, existing))
00566         else:
00567             cur = self.duplicate_db.cursor()
00568             cur.execute('INSERT INTO address_signatures VALUES (?, ?)', (_u(sig), id))
00569             self.duplicate_db.commit()
00570 
00571     def _duplicate_db_merge_id(self, dup, master):
00572         '''Merge two crash IDs.
00573 
00574         This is necessary when having to mark a bug as a duplicate if it
00575         already is in the duplicate DB.
00576         '''
00577         assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
00578 
00579         cur = self.duplicate_db.cursor()
00580         cur.execute('DELETE FROM crashes WHERE crash_id = ?', [dup])
00581         cur.execute('UPDATE address_signatures SET crash_id = ? WHERE crash_id = ?',
00582                     [master, dup])
00583         self.duplicate_db.commit()
00584 
00585     @classmethod
00586     def duplicate_sig_hash(klass, sig):
00587         '''Create a www/URL proof hash for a duplicate signature'''
00588 
00589         # cannot hash multi-line custom duplicate signatures
00590         if '\n' in sig:
00591             return None
00592 
00593         # custom DuplicateSignatures have a free format, split off first word
00594         i = sig.split(' ', 1)[0]
00595         # standard crash/address signatures use ':' as field separator, usually
00596         # for ExecutableName:Signal
00597         i = '_'.join(i.split(':', 2)[:2])
00598         # we manually quote '/' to make them nicer to read
00599         i = i.replace('/', '_')
00600         i = quote_plus(i.encode('UTF-8'))
00601         # avoid too long file names
00602         i = i[:200]
00603         return i
00604 
00605     #
00606     # Abstract functions that need to be implemented by subclasses
00607     #
00608 
00609     def upload(self, report, progress_callback=None):
00610         '''Upload given problem report return a handle for it.
00611 
00612         This should happen noninteractively.
00613 
00614         If the implementation supports it, and a function progress_callback is
00615         passed, that is called repeatedly with two arguments: the number of
00616         bytes already sent, and the total number of bytes to send. This can be
00617         used to provide a proper upload progress indication on frontends.
00618 
00619         Implementations ought to "assert self.accepts(report)". The UI logic
00620         already prevents uploading a report to a database which does not accept
00621         it, but for third-party users of the API this should still be checked.
00622 
00623         This method can raise a NeedsCredentials exception in case of failure.
00624         '''
00625         raise NotImplementedError('this method must be implemented by a concrete subclass')
00626 
00627     def get_comment_url(self, report, handle):
00628         '''Return an URL that should be opened after report has been uploaded
00629         and upload() returned handle.
00630 
00631         Should return None if no URL should be opened (anonymous filing without
00632         user comments); in that case this function should do whichever
00633         interactive steps it wants to perform.
00634         '''
00635         raise NotImplementedError('this method must be implemented by a concrete subclass')
00636 
00637     def get_id_url(self, report, id):
00638         '''Return URL for a given report ID.
00639 
00640         The report is passed in case building the URL needs additional
00641         information from it, such as the SourcePackage name.
00642 
00643         Return None if URL is not available or cannot be determined.
00644         '''
00645         raise NotImplementedError('this method must be implemented by a concrete subclass')
00646 
00647     def download(self, id):
00648         '''Download the problem report from given ID and return a Report.'''
00649 
00650         raise NotImplementedError('this method must be implemented by a concrete subclass')
00651 
00652     def update(self, id, report, comment, change_description=False,
00653                attachment_comment=None, key_filter=None):
00654         '''Update the given report ID with all data from report.
00655 
00656         This creates a text comment with the "short" data (see
00657         ProblemReport.write_mime()), and creates attachments for all the
00658         bulk/binary data.
00659 
00660         If change_description is True, and the crash db implementation supports
00661         it, the short data will be put into the description instead (like in a
00662         new bug).
00663 
00664         comment will be added to the "short" data. If attachment_comment is
00665         given, it will be added to the attachment uploads.
00666 
00667         If key_filter is a list or set, then only those keys will be added.
00668         '''
00669         raise NotImplementedError('this method must be implemented by a concrete subclass')
00670 
00671     def update_traces(self, id, report, comment=''):
00672         '''Update the given report ID for retracing results.
00673 
00674         This updates Stacktrace, ThreadStacktrace, StacktraceTop,
00675         and StacktraceSource. You can also supply an additional comment.
00676         '''
00677         self.update(id, report, comment, key_filter=[
00678             'Stacktrace', 'ThreadStacktrace', 'StacktraceSource', 'StacktraceTop'])
00679 
00680     def set_credentials(self, username, password):
00681         '''Set username and password.'''
00682 
00683         raise NotImplementedError('this method must be implemented by a concrete subclass')
00684 
00685     def get_distro_release(self, id):
00686         '''Get 'DistroRelease: <release>' from the report ID.'''
00687 
00688         raise NotImplementedError('this method must be implemented by a concrete subclass')
00689 
00690     def get_unretraced(self):
00691         '''Return set of crash IDs which have not been retraced yet.
00692 
00693         This should only include crashes which match the current host
00694         architecture.
00695         '''
00696         raise NotImplementedError('this method must be implemented by a concrete subclass')
00697 
00698     def get_dup_unchecked(self):
00699         '''Return set of crash IDs which need duplicate checking.
00700 
00701         This is mainly useful for crashes of scripting languages such as
00702         Python, since they do not need to be retraced. It should not return
00703         bugs that are covered by get_unretraced().
00704         '''
00705         raise NotImplementedError('this method must be implemented by a concrete subclass')
00706 
00707     def get_unfixed(self):
00708         '''Return an ID set of all crashes which are not yet fixed.
00709 
00710         The list must not contain bugs which were rejected or duplicate.
00711 
00712         This function should make sure that the returned list is correct. If
00713         there are any errors with connecting to the crash database, it should
00714         raise an exception (preferably IOError).
00715         '''
00716         raise NotImplementedError('this method must be implemented by a concrete subclass')
00717 
00718     def get_fixed_version(self, id):
00719         '''Return the package version that fixes a given crash.
00720 
00721         Return None if the crash is not yet fixed, or an empty string if the
00722         crash is fixed, but it cannot be determined by which version. Return
00723         'invalid' if the crash report got invalidated, such as closed a
00724         duplicate or rejected.
00725 
00726         This function should make sure that the returned result is correct. If
00727         there are any errors with connecting to the crash database, it should
00728         raise an exception (preferably IOError).
00729         '''
00730         raise NotImplementedError('this method must be implemented by a concrete subclass')
00731 
00732     def get_affected_packages(self, id):
00733         '''Return list of affected source packages for given ID.'''
00734 
00735         raise NotImplementedError('this method must be implemented by a concrete subclass')
00736 
00737     def is_reporter(self, id):
00738         '''Check whether the user is the reporter of given ID.'''
00739 
00740         raise NotImplementedError('this method must be implemented by a concrete subclass')
00741 
00742     def can_update(self, id):
00743         '''Check whether the user is eligible to update a report.
00744 
00745         A user should add additional information to an existing ID if (s)he is
00746         the reporter or subscribed, the bug is open, not a duplicate, etc. The
00747         exact policy and checks should be done according to  the particular
00748         implementation.
00749         '''
00750         raise NotImplementedError('this method must be implemented by a concrete subclass')
00751 
00752     def duplicate_of(self, id):
00753         '''Return master ID for a duplicate bug.
00754 
00755         If the bug is not a duplicate, return None.
00756         '''
00757         raise NotImplementedError('this method must be implemented by a concrete subclass')
00758 
00759     def close_duplicate(self, report, id, master):
00760         '''Mark a crash id as duplicate of given master ID.
00761 
00762         If master is None, id gets un-duplicated.
00763         '''
00764         raise NotImplementedError('this method must be implemented by a concrete subclass')
00765 
00766     def mark_regression(self, id, master):
00767         '''Mark a crash id as reintroducing an earlier crash which is
00768         already marked as fixed (having ID 'master').'''
00769 
00770         raise NotImplementedError('this method must be implemented by a concrete subclass')
00771 
00772     def mark_retraced(self, id):
00773         '''Mark crash id as retraced.'''
00774 
00775         raise NotImplementedError('this method must be implemented by a concrete subclass')
00776 
00777     def mark_retrace_failed(self, id, invalid_msg=None):
00778         '''Mark crash id as 'failed to retrace'.
00779 
00780         If invalid_msg is given, the bug should be closed as invalid with given
00781         message, otherwise just marked as a failed retrace.
00782 
00783         This can be a no-op if you are not interested in this.
00784         '''
00785         raise NotImplementedError('this method must be implemented by a concrete subclass')
00786 
00787     def _mark_dup_checked(self, id, report):
00788         '''Mark crash id as checked for being a duplicate
00789 
00790         This is an internal method that should not be called from outside.
00791         '''
00792         raise NotImplementedError('this method must be implemented by a concrete subclass')
00793 
00794 #
00795 # factory
00796 #
00797 
00798 
00799 def get_crashdb(auth_file, name=None, conf=None):
00800     '''Return a CrashDatabase object for the given crash db name.
00801 
00802     This reads the configuration file 'conf'.
00803 
00804     If name is None, it defaults to the 'default' value in conf.
00805 
00806     If conf is None, it defaults to the environment variable
00807     APPORT_CRASHDB_CONF; if that does not exist, the hardcoded default is
00808     /etc/apport/crashdb.conf. This Python syntax file needs to specify:
00809 
00810     - A string variable 'default', giving a default value for 'name' if that is
00811       None.
00812 
00813     - A dictionary 'databases' which maps names to crash db configuration
00814       dictionaries. These need to have at least the key 'impl' (Python module
00815       in apport.crashdb_impl which contains a concrete 'CrashDatabase' class
00816       implementation for that crash db type). Other generally known options are
00817       'bug_pattern_url', 'dupdb_url', and 'problem_types'.
00818     '''
00819     if not conf:
00820         conf = os.environ.get('APPORT_CRASHDB_CONF', '/etc/apport/crashdb.conf')
00821     settings = {}
00822     with open(conf) as f:
00823         exec(compile(f.read(), conf, 'exec'), settings)
00824 
00825     # Load third parties crashdb.conf
00826     confdDir = conf + '.d'
00827     if os.path.isdir(confdDir):
00828         for cf in os.listdir(confdDir):
00829             cfpath = os.path.join(confdDir, cf)
00830             if os.path.isfile(cfpath) and cf.endswith('.conf'):
00831                 try:
00832                     with open(cfpath) as f:
00833                         exec(compile(f.read(), cfpath, 'exec'), settings['databases'])
00834                 except Exception as e:
00835                     # ignore broken files
00836                     sys.stderr.write('Invalid file %s: %s\n' % (cfpath, str(e)))
00837                     pass
00838 
00839     if not name:
00840         name = settings['default']
00841 
00842     db = settings['databases'][name]
00843 
00844     m = __import__('apport.crashdb_impl.' + db['impl'], globals(), locals(), ['CrashDatabase'])
00845     return m.CrashDatabase(auth_file, db)
00846 
00847 
00848 class NeedsCredentials(Exception):
00849     '''This may be raised when unable to log in to the crashdb.'''
00850     pass