Back to index

python-biopython  1.60
NCBIXML.py
Go to the documentation of this file.
00001 # Copyright 2000 by Bertrand Frottier .  All rights reserved.
00002 # Revisions 2005-2006 copyright Michiel de Hoon
00003 # Revisions 2006-2009 copyright Peter Cock
00004 # This code is part of the Biopython distribution and governed by its
00005 # license.  Please see the LICENSE file that should have been included
00006 # as part of this package.
00007 """This module provides code to work with the BLAST XML output
00008 following the DTD available on the NCBI FTP
00009 ftp://ftp.ncbi.nlm.nih.gov/blast/documents/xml/NCBI_BlastOutput.dtd
00010 
00011 Classes:
00012 BlastParser         Parses XML output from BLAST (direct use discouraged).
00013                     This (now) returns a list of Blast records.
00014                     Historically it returned a single Blast record.
00015                     You are expected to use this via the parse or read functions.
00016 
00017 _XMLParser          Generic SAX parser (private).
00018 
00019 Functions:
00020 parse               Incremental parser, this is an iterator that returns
00021                     Blast records.  It uses the BlastParser internally.
00022 read                Returns a single Blast record. Uses the BlastParser internally.
00023 """
00024 from Bio.Blast import Record
00025 import xml.sax
00026 from xml.sax.handler import ContentHandler
00027 
00028 class _XMLparser(ContentHandler):
00029     """Generic SAX Parser
00030 
00031     Just a very basic SAX parser.
00032 
00033     Redefine the methods startElement, characters and endElement.
00034     """
00035     def __init__(self, debug=0):
00036         """Constructor
00037 
00038         debug - integer, amount of debug information to print
00039         """
00040         self._tag = []
00041         self._value = ''
00042         self._debug = debug
00043         self._debug_ignore_list = []
00044 
00045     def _secure_name(self, name):
00046         """Removes 'dangerous' from tag names
00047 
00048         name -- name to be 'secured'
00049         """
00050         # Replace '-' with '_' in XML tag names
00051         return name.replace('-', '_')
00052     
00053     def startElement(self, name, attr):
00054         """Found XML start tag
00055 
00056         No real need of attr, BLAST DTD doesn't use them
00057 
00058         name -- name of the tag
00059 
00060         attr -- tag attributes
00061         """
00062         self._tag.append(name)
00063         
00064         # Try to call a method (defined in subclasses)
00065         method = self._secure_name('_start_' + name)
00066 
00067         #Note could use try / except AttributeError
00068         #BUT I found often triggered by nested errors...
00069         if hasattr(self, method):
00070             eval("self.%s()" % method)
00071             if self._debug > 4:
00072                 print "NCBIXML: Parsed:  " + method
00073         else:
00074             # Doesn't exist (yet)
00075             if method not in self._debug_ignore_list:
00076                 if self._debug > 3:
00077                     print "NCBIXML: Ignored: " + method
00078                 self._debug_ignore_list.append(method)
00079 
00080         #We don't care about white space in parent tags like Hsp,
00081         #but that white space doesn't belong to child tags like Hsp_midline
00082         if self._value.strip():
00083             raise ValueError("What should we do with %s before the %s tag?" \
00084                              % (repr(self._value), name))
00085         self._value = ""
00086 
00087     def characters(self, ch):
00088         """Found some text
00089 
00090         ch -- characters read
00091         """
00092         self._value += ch # You don't ever get the whole string
00093 
00094     def endElement(self, name):
00095         """Found XML end tag
00096 
00097         name -- tag name
00098         """
00099         # DON'T strip any white space, we may need it e.g. the hsp-midline
00100         
00101         # Try to call a method (defined in subclasses)
00102         method = self._secure_name('_end_' + name)
00103         #Note could use try / except AttributeError
00104         #BUT I found often triggered by nested errors...
00105         if hasattr(self, method):
00106             eval("self.%s()" % method)
00107             if self._debug > 2:
00108                 print "NCBIXML: Parsed:  " + method, self._value
00109         else:
00110             # Doesn't exist (yet)
00111             if method not in self._debug_ignore_list:
00112                 if self._debug > 1:
00113                     print "NCBIXML: Ignored: " + method, self._value
00114                 self._debug_ignore_list.append(method)
00115         
00116         # Reset character buffer
00117         self._value = ''
00118         
00119 class BlastParser(_XMLparser):
00120     """Parse XML BLAST data into a Record.Blast object
00121 
00122     All XML 'action' methods are private methods and may be:
00123     _start_TAG      called when the start tag is found
00124     _end_TAG        called when the end tag is found
00125     """
00126 
00127     def __init__(self, debug=0):
00128         """Constructor
00129 
00130         debug - integer, amount of debug information to print
00131         """
00132         # Calling superclass method
00133         _XMLparser.__init__(self, debug)
00134         
00135         self._parser = xml.sax.make_parser()
00136         self._parser.setContentHandler(self)
00137         
00138         # To avoid ValueError: unknown url type: NCBI_BlastOutput.dtd
00139         self._parser.setFeature(xml.sax.handler.feature_validation, 0)
00140         self._parser.setFeature(xml.sax.handler.feature_namespaces, 0)
00141         self._parser.setFeature(xml.sax.handler.feature_external_pes, 0)
00142         self._parser.setFeature(xml.sax.handler.feature_external_ges, 0)
00143 
00144         self.reset()
00145 
00146     def reset(self):
00147         """Reset all the data allowing reuse of the BlastParser() object"""
00148         self._records = []
00149         self._header = Record.Header()
00150         self._parameters = Record.Parameters()
00151         self._parameters.filter = None #Maybe I should update the class?
00152 
00153     def _start_Iteration(self):
00154         self._blast = Record.Blast()
00155         pass
00156 
00157     def _end_Iteration(self):
00158         # We stored a lot of generic "top level" information
00159         # in self._header (an object of type Record.Header)
00160         self._blast.reference = self._header.reference
00161         self._blast.date = self._header.date
00162         self._blast.version = self._header.version
00163         self._blast.database = self._header.database
00164         self._blast.application = self._header.application
00165 
00166         # These are required for "old" pre 2.2.14 files
00167         # where only <BlastOutput_query-ID>, <BlastOutput_query-def>
00168         # and <BlastOutput_query-len> were used.  Now they
00169         # are suplemented/replaced by <Iteration_query-ID>,
00170         # <Iteration_query-def> and <Iteration_query-len>
00171         if not hasattr(self._blast, "query") \
00172         or not self._blast.query:
00173             self._blast.query = self._header.query
00174         if not hasattr(self._blast, "query_id") \
00175         or not self._blast.query_id:
00176             self._blast.query_id = self._header.query_id
00177         if not hasattr(self._blast, "query_letters") \
00178         or not self._blast.query_letters:
00179             self._blast.query_letters = self._header.query_letters
00180 
00181         # Hack to record the query length as both the query_letters and
00182         # query_length properties (as in the plain text parser, see
00183         # Bug 2176 comment 12):
00184         self._blast.query_length = self._blast.query_letters
00185         # Perhaps in the long term we should deprecate one, but I would
00186         # prefer to drop query_letters - so we need a transition period
00187         # with both.
00188 
00189         # Hack to record the claimed database size as database_length
00190         # (as well as in num_letters_in_database, see Bug 2176 comment 13):
00191         self._blast.database_length = self._blast.num_letters_in_database
00192         # TODO? Deprecate database_letters next?
00193 
00194         # Hack to record the claimed database sequence count as database_sequences
00195         self._blast.database_sequences = self._blast.num_sequences_in_database
00196 
00197         # Apply the "top level" parameter information
00198         self._blast.matrix = self._parameters.matrix
00199         self._blast.num_seqs_better_e = self._parameters.num_seqs_better_e
00200         self._blast.gap_penalties = self._parameters.gap_penalties
00201         self._blast.filter = self._parameters.filter
00202         self._blast.expect = self._parameters.expect
00203         self._blast.sc_match = self._parameters.sc_match
00204         self._blast.sc_mismatch = self._parameters.sc_mismatch
00205 
00206         #Add to the list
00207         self._records.append(self._blast)
00208         #Clear the object (a new empty one is create in _start_Iteration)
00209         self._blast = None
00210 
00211         if self._debug : "NCBIXML: Added Blast record to results"
00212 
00213     # Header
00214     def _end_BlastOutput_program(self):
00215         """BLAST program, e.g., blastp, blastn, etc.
00216 
00217         Save this to put on each blast record object
00218         """
00219         self._header.application = self._value.upper()
00220 
00221     def _end_BlastOutput_version(self):
00222         """version number and date of the BLAST engine.
00223 
00224         e.g. "BLASTX 2.2.12 [Aug-07-2005]" but there can also be
00225         variants like "BLASTP 2.2.18+" without the date.
00226 
00227         Save this to put on each blast record object
00228         """
00229         parts = self._value.split()
00230         #TODO - Check the first word starts with BLAST?
00231 
00232         #The version is the second word (field one)
00233         self._header.version = parts[1]
00234         
00235         #Check there is a third word (the date)
00236         if len(parts) >= 3:
00237             if parts[2][0] == "[" and parts[2][-1] == "]":
00238                 self._header.date = parts[2][1:-1]
00239             else:
00240                 #Assume this is still a date, but without the
00241                 #square brackets
00242                 self._header.date = parts[2]
00243 
00244     def _end_BlastOutput_reference(self):
00245         """a reference to the article describing the algorithm
00246 
00247         Save this to put on each blast record object
00248         """
00249         self._header.reference = self._value
00250 
00251     def _end_BlastOutput_db(self):
00252         """the database(s) searched
00253 
00254         Save this to put on each blast record object
00255         """
00256         self._header.database = self._value
00257 
00258     def _end_BlastOutput_query_ID(self):
00259         """the identifier of the query
00260 
00261         Important in old pre 2.2.14 BLAST, for recent versions
00262         <Iteration_query-ID> is enough
00263         """
00264         self._header.query_id = self._value
00265 
00266     def _end_BlastOutput_query_def(self):
00267         """the definition line of the query
00268 
00269         Important in old pre 2.2.14 BLAST, for recent versions
00270         <Iteration_query-def> is enough
00271         """
00272         self._header.query = self._value
00273 
00274     def _end_BlastOutput_query_len(self):
00275         """the length of the query
00276 
00277         Important in old pre 2.2.14 BLAST, for recent versions
00278         <Iteration_query-len> is enough
00279         """
00280         self._header.query_letters = int(self._value)
00281 
00282     def _end_Iteration_query_ID(self):
00283         """the identifier of the query
00284         """
00285         self._blast.query_id = self._value
00286 
00287     def _end_Iteration_query_def(self):
00288         """the definition line of the query
00289         """
00290         self._blast.query = self._value
00291 
00292     def _end_Iteration_query_len(self):
00293         """the length of the query
00294         """
00295         self._blast.query_letters = int(self._value)
00296 
00297 ##     def _end_BlastOutput_query_seq(self):
00298 ##         """the query sequence
00299 ##         """
00300 ##         pass # XXX Missing in Record.Blast ?
00301 
00302 ##     def _end_BlastOutput_iter_num(self):
00303 ##         """the psi-blast iteration number
00304 ##         """
00305 ##         pass # XXX TODO PSI
00306 
00307     def _end_BlastOutput_hits(self):
00308         """hits to the database sequences, one for every sequence
00309         """
00310         self._blast.num_hits = int(self._value)
00311 
00312 ##     def _end_BlastOutput_message(self):
00313 ##         """error messages
00314 ##         """
00315 ##         pass # XXX What to do ?
00316 
00317     # Parameters
00318     def _end_Parameters_matrix(self):
00319         """matrix used (-M)
00320         """
00321         self._parameters.matrix = self._value
00322         
00323     def _end_Parameters_expect(self):
00324         """expect values cutoff (-e)
00325         """
00326         # NOTE: In old text output there was a line:
00327         # Number of sequences better than 1.0e-004: 1
00328         # As far as I can see, parameters.num_seqs_better_e
00329         # would take the value of 1, and the expectation
00330         # value was not recorded.
00331         #
00332         # Anyway we should NOT record this against num_seqs_better_e
00333         self._parameters.expect = self._value
00334 
00335 ##     def _end_Parameters_include(self):
00336 ##         """inclusion threshold for a psi-blast iteration (-h)
00337 ##         """
00338 ##         pass # XXX TODO PSI
00339     
00340     def _end_Parameters_sc_match(self):
00341         """match score for nucleotide-nucleotide comparaison (-r)
00342         """
00343         self._parameters.sc_match = int(self._value)
00344 
00345     def _end_Parameters_sc_mismatch(self):
00346         """mismatch penalty for nucleotide-nucleotide comparaison (-r)
00347         """
00348         self._parameters.sc_mismatch = int(self._value)
00349 
00350     def _end_Parameters_gap_open(self):
00351         """gap existence cost (-G)
00352         """
00353         self._parameters.gap_penalties = int(self._value)
00354 
00355     def _end_Parameters_gap_extend(self):
00356         """gap extension cose (-E)
00357         """
00358         self._parameters.gap_penalties = (self._parameters.gap_penalties,
00359                                          int(self._value))
00360 
00361     def _end_Parameters_filter(self):
00362         """filtering options (-F)
00363         """
00364         self._parameters.filter = self._value
00365 
00366 ##     def _end_Parameters_pattern(self):
00367 ##         """pattern used for phi-blast search
00368 ##         """
00369 ##         pass # XXX TODO PSI
00370 
00371 ##     def _end_Parameters_entrez_query(self):
00372 ##         """entrez query used to limit search
00373 ##         """
00374 ##         pass # XXX TODO PSI
00375 
00376     # Hits
00377     def _start_Hit(self):
00378         self._blast.alignments.append(Record.Alignment())
00379         self._blast.descriptions.append(Record.Description())
00380         self._blast.multiple_alignment = []
00381         self._hit = self._blast.alignments[-1]
00382         self._descr = self._blast.descriptions[-1]
00383         self._descr.num_alignments = 0
00384 
00385     def _end_Hit(self):
00386         #Cleanup
00387         self._blast.multiple_alignment = None
00388         self._hit = None
00389         self._descr = None
00390 
00391     def _end_Hit_id(self):
00392         """identifier of the database sequence
00393         """
00394         self._hit.hit_id = self._value
00395         self._hit.title = self._value + ' '
00396 
00397     def _end_Hit_def(self):
00398         """definition line of the database sequence
00399         """
00400         self._hit.hit_def = self._value
00401         self._hit.title += self._value
00402         self._descr.title = self._hit.title
00403 
00404     def _end_Hit_accession(self):
00405         """accession of the database sequence
00406         """
00407         self._hit.accession = self._value
00408         self._descr.accession = self._value
00409 
00410     def _end_Hit_len(self):
00411         self._hit.length = int(self._value)
00412 
00413     # HSPs
00414     def _start_Hsp(self):
00415         #Note that self._start_Hit() should have been called
00416         #to setup things like self._blast.multiple_alignment
00417         self._hit.hsps.append(Record.HSP())
00418         self._hsp = self._hit.hsps[-1]
00419         self._descr.num_alignments += 1
00420         self._blast.multiple_alignment.append(Record.MultipleAlignment())
00421         self._mult_al = self._blast.multiple_alignment[-1]
00422 
00423     # Hsp_num is useless
00424     def _end_Hsp_score(self):
00425         """raw score of HSP
00426         """
00427         self._hsp.score = float(self._value)
00428         if self._descr.score == None:
00429             self._descr.score = float(self._value)
00430 
00431     def _end_Hsp_bit_score(self):
00432         """bit score of HSP
00433         """
00434         self._hsp.bits = float(self._value)
00435         if self._descr.bits == None:
00436             self._descr.bits = float(self._value)
00437 
00438     def _end_Hsp_evalue(self):
00439         """expect value value of the HSP
00440         """
00441         self._hsp.expect = float(self._value)
00442         if self._descr.e == None:
00443             self._descr.e = float(self._value)
00444 
00445     def _end_Hsp_query_from(self):
00446         """offset of query at the start of the alignment (one-offset)
00447         """
00448         self._hsp.query_start = int(self._value)
00449 
00450     def _end_Hsp_query_to(self):
00451         """offset of query at the end of the alignment (one-offset)
00452         """
00453         self._hsp.query_end = int(self._value)
00454 
00455     def _end_Hsp_hit_from(self):
00456         """offset of the database at the start of the alignment (one-offset)
00457         """
00458         self._hsp.sbjct_start = int(self._value)
00459 
00460     def _end_Hsp_hit_to(self):
00461         """offset of the database at the end of the alignment (one-offset)
00462         """
00463         self._hsp.sbjct_end = int(self._value)
00464 
00465 ##     def _end_Hsp_pattern_from(self):
00466 ##         """start of phi-blast pattern on the query (one-offset)
00467 ##         """
00468 ##         pass # XXX TODO PSI
00469 
00470 ##     def _end_Hsp_pattern_to(self):
00471 ##         """end of phi-blast pattern on the query (one-offset)
00472 ##         """
00473 ##         pass # XXX TODO PSI
00474 
00475     def _end_Hsp_query_frame(self):
00476         """frame of the query if applicable
00477         """
00478         self._hsp.frame = (int(self._value),)
00479 
00480     def _end_Hsp_hit_frame(self):
00481         """frame of the database sequence if applicable
00482         """
00483         self._hsp.frame += (int(self._value),)
00484 
00485     def _end_Hsp_identity(self):
00486         """number of identities in the alignment
00487         """
00488         self._hsp.identities = int(self._value)
00489 
00490     def _end_Hsp_positive(self):
00491         """number of positive (conservative) substitutions in the alignment
00492         """
00493         self._hsp.positives = int(self._value)
00494 
00495     def _end_Hsp_gaps(self):
00496         """number of gaps in the alignment
00497         """
00498         self._hsp.gaps = int(self._value)
00499 
00500     def _end_Hsp_align_len(self):
00501         """length of the alignment
00502         """
00503         self._hsp.align_length = int(self._value)
00504 
00505 ##     def _en_Hsp_density(self):
00506 ##         """score density
00507 ##         """
00508 ##         pass # XXX ???
00509 
00510     def _end_Hsp_qseq(self):
00511         """alignment string for the query
00512         """
00513         self._hsp.query = self._value
00514 
00515     def _end_Hsp_hseq(self):
00516         """alignment string for the database
00517         """
00518         self._hsp.sbjct = self._value
00519 
00520     def _end_Hsp_midline(self):
00521         """Formatting middle line as normally seen in BLAST report
00522         """
00523         self._hsp.match = self._value # do NOT strip spaces!
00524         assert len(self._hsp.match)==len(self._hsp.query)
00525         assert len(self._hsp.match)==len(self._hsp.sbjct)
00526 
00527     # Statistics
00528     def _end_Statistics_db_num(self):
00529         """number of sequences in the database
00530         """
00531         self._blast.num_sequences_in_database = int(self._value)
00532 
00533     def _end_Statistics_db_len(self):
00534         """number of letters in the database
00535         """
00536         self._blast.num_letters_in_database = int(self._value)
00537 
00538     def _end_Statistics_hsp_len(self):
00539         """the effective HSP length
00540         """
00541         self._blast.effective_hsp_length = int(self._value)
00542 
00543     def _end_Statistics_eff_space(self):
00544         """the effective search space
00545         """
00546         self._blast.effective_search_space = float(self._value)
00547 
00548     def _end_Statistics_kappa(self):
00549         """Karlin-Altschul parameter K
00550         """
00551         self._blast.ka_params = float(self._value)
00552 
00553     def _end_Statistics_lambda(self):
00554         """Karlin-Altschul parameter Lambda
00555         """
00556         self._blast.ka_params = (float(self._value),
00557                                  self._blast.ka_params)
00558 
00559     def _end_Statistics_entropy(self):
00560         """Karlin-Altschul parameter H
00561         """
00562         self._blast.ka_params = self._blast.ka_params + (float(self._value),)
00563     
00564 def read(handle, debug=0):
00565    """Returns a single Blast record (assumes just one query).
00566 
00567    This function is for use when there is one and only one BLAST
00568    result in your XML file.
00569 
00570    Use the Bio.Blast.NCBIXML.parse() function if you expect more than
00571    one BLAST record (i.e. if you have more than one query sequence).
00572 
00573    """
00574    iterator = parse(handle, debug)
00575    try:
00576        first = iterator.next()
00577    except StopIteration:
00578        first = None
00579    if first is None:
00580        raise ValueError("No records found in handle")
00581    try:
00582        second = iterator.next()
00583    except StopIteration:
00584        second = None
00585    if second is not None:
00586        raise ValueError("More than one record found in handle")
00587    return first
00588 
00589 
00590 def parse(handle, debug=0):
00591     """Returns an iterator a Blast record for each query.
00592 
00593     handle - file handle to and XML file to parse
00594     debug - integer, amount of debug information to print
00595 
00596     This is a generator function that returns multiple Blast records
00597     objects - one for each query sequence given to blast.  The file
00598     is read incrementally, returning complete records as they are read
00599     in.
00600 
00601     Should cope with new BLAST 2.2.14+ which gives a single XML file
00602     for mutliple query records.
00603 
00604     Should also cope with XML output from older versions BLAST which
00605     gave multiple XML files concatenated together (giving a single file
00606     which strictly speaking wasn't valid XML)."""
00607     from xml.parsers import expat
00608     BLOCK = 1024
00609     MARGIN = 10 # must be at least length of newline + XML start
00610     XML_START = "<?xml"
00611 
00612     text = handle.read(BLOCK)
00613     pending = ""
00614 
00615     if not text:
00616         #NO DATA FOUND!
00617         raise ValueError("Your XML file was empty")
00618     
00619     while text:
00620         #We are now starting a new XML file
00621         if not text.startswith(XML_START):
00622             raise ValueError("Your XML file did not start with %s... "
00623                              "but instead %s" \
00624                              % (XML_START, repr(text[:20])))
00625 
00626         expat_parser = expat.ParserCreate()
00627         blast_parser = BlastParser(debug)
00628         expat_parser.StartElementHandler = blast_parser.startElement
00629         expat_parser.EndElementHandler = blast_parser.endElement
00630         expat_parser.CharacterDataHandler = blast_parser.characters
00631 
00632         expat_parser.Parse(text, False)
00633         while blast_parser._records:
00634             record = blast_parser._records[0]
00635             blast_parser._records = blast_parser._records[1:]
00636             yield record
00637 
00638         while True:
00639             #Read in another block of the file...
00640             text, pending = pending + handle.read(BLOCK), ""
00641             if not text:
00642                 #End of the file!
00643                 expat_parser.Parse("", True) # End of XML record
00644                 break
00645 
00646             #Now read a little bit more so we can check for the
00647             #start of another XML file...
00648             pending = handle.read(MARGIN)
00649 
00650             if (text+pending).find("\n" + XML_START) == -1:
00651                 # Good - still dealing with the same XML file
00652                 expat_parser.Parse(text, False)        
00653                 while blast_parser._records:
00654                     yield blast_parser._records.pop(0)
00655             else:
00656                 # This is output from pre 2.2.14 BLAST,
00657                 # one XML file for each query!
00658                 
00659                 # Finish the old file:
00660                 text, pending = (text+pending).split("\n" + XML_START,1)
00661                 pending = XML_START + pending
00662 
00663                 expat_parser.Parse(text, True) # End of XML record
00664                 while blast_parser._records:
00665                     yield blast_parser._records.pop(0)
00666                
00667                 #Now we are going to re-loop, reset the
00668                 #parsers and start reading the next XML file
00669                 text, pending = pending, ""
00670                 break
00671 
00672         #this was added because it seems that the Jython expat parser
00673         #was adding records later then the Python one
00674         while blast_parser._records:
00675             yield blast_parser._records.pop(0)
00676             
00677         #At this point we have finished the first XML record.
00678         #If the file is from an old version of blast, it may
00679         #contain more XML records (check if text=="").
00680         assert pending==""
00681         assert len(blast_parser._records) == 0
00682         
00683     #We should have finished the file!
00684     assert text==""
00685     assert pending==""
00686     assert len(blast_parser._records) == 0
00687 
00688 if __name__ == '__main__':
00689     import sys
00690     import os
00691     handle = open(sys.argv[1])
00692     r_list = parse(handle)
00693 
00694     for r in r_list:
00695         # Small test
00696         print 'Blast of', r.query
00697         print 'Found %s alignments with a total of %s HSPs' % (len(r.alignments),
00698                   reduce(lambda a,b: a+b,
00699                          [len(a.hsps) for a in r.alignments]))
00700 
00701         for al in r.alignments:
00702             print al.title[:50], al.length, 'bp', len(al.hsps), 'HSPs'
00703 
00704         # Cookbook example
00705         E_VALUE_THRESH = 0.04
00706         for alignment in r.alignments:
00707             for hsp in alignment.hsps:
00708                 if hsp.expect < E_VALUE_THRESH:
00709                     print '*****'
00710                     print 'sequence', alignment.title
00711                     print 'length', alignment.length
00712                     print 'e value', hsp.expect
00713                     print hsp.query[:75] + '...'
00714                     print hsp.match[:75] + '...'
00715                     print hsp.sbjct[:75] + '...'