Back to index

python-biopython  1.60
Classes | Functions | Variables
Bio.Blast.NCBIXML Namespace Reference

Classes

class  _XMLparser
class  BlastParser

Functions

def _end_BlastOutput_hits
 def _end_BlastOutput_query_seq(self): """the query sequence """ pass # XXX Missing in Record.Blast ?
def _end_Parameters_matrix
 def _end_BlastOutput_message(self): """error messages """ pass # XXX What to do ?
def _end_Parameters_expect
def _end_Parameters_sc_match
 def _end_Parameters_include(self): """inclusion threshold for a psi-blast iteration (-h) """ pass # XXX TODO PSI
def _end_Parameters_sc_mismatch
def _end_Parameters_gap_open
def _end_Parameters_gap_extend
def _end_Parameters_filter
def _start_Hit
 def _end_Parameters_pattern(self): """pattern used for phi-blast search """ pass # XXX TODO PSI
def _end_Hit
def _end_Hit_id
def _end_Hit_def
def _end_Hit_accession
def _end_Hit_len
def _start_Hsp
def _end_Hsp_score
def _end_Hsp_bit_score
def _end_Hsp_evalue
def _end_Hsp_query_from
def _end_Hsp_query_to
def _end_Hsp_hit_from
def _end_Hsp_hit_to
def _end_Hsp_query_frame
 def _end_Hsp_pattern_from(self): """start of phi-blast pattern on the query (one-offset) """ pass # XXX TODO PSI
def _end_Hsp_hit_frame
def _end_Hsp_identity
def _end_Hsp_positive
def _end_Hsp_gaps
def _end_Hsp_align_len
def _end_Hsp_qseq
 def _en_Hsp_density(self): """score density """ pass # XXX ???
def _end_Hsp_hseq
def _end_Hsp_midline
def _end_Statistics_db_num
def _end_Statistics_db_len
def _end_Statistics_hsp_len
def _end_Statistics_eff_space
def _end_Statistics_kappa
def _end_Statistics_lambda
def _end_Statistics_entropy
def read
def parse

Variables

tuple handle = open(sys.argv[1])
tuple r_list = parse(handle)
float E_VALUE_THRESH = 0.04
 _hit
 _descr
 _hsp
 _mult_al

Function Documentation

def Bio.Blast.NCBIXML._end_BlastOutput_hits (   self) [private]

def _end_BlastOutput_query_seq(self): """the query sequence """ pass # XXX Missing in Record.Blast ?

 def _end_BlastOutput_iter_num(self):
     """the psi-blast iteration number
     """
     pass # XXX TODO PSI  @verbatim hits to the database sequences, one for every sequence

Definition at line 307 of file NCBIXML.py.

00307 
00308     def _end_BlastOutput_hits(self):
00309         """hits to the database sequences, one for every sequence
00310         """
00311         self._blast.num_hits = int(self._value)

def Bio.Blast.NCBIXML._end_Hit (   self) [private]

Definition at line 385 of file NCBIXML.py.

00385 
00386     def _end_Hit(self):
00387         #Cleanup
00388         self._blast.multiple_alignment = None
00389         self._hit = None
00390         self._descr = None

def Bio.Blast.NCBIXML._end_Hit_accession (   self) [private]
accession of the database sequence

Definition at line 404 of file NCBIXML.py.

00404 
00405     def _end_Hit_accession(self):
00406         """accession of the database sequence
00407         """
00408         self._hit.accession = self._value
00409         self._descr.accession = self._value

def Bio.Blast.NCBIXML._end_Hit_def (   self) [private]
definition line of the database sequence

Definition at line 397 of file NCBIXML.py.

00397 
00398     def _end_Hit_def(self):
00399         """definition line of the database sequence
00400         """
00401         self._hit.hit_def = self._value
00402         self._hit.title += self._value
00403         self._descr.title = self._hit.title

def Bio.Blast.NCBIXML._end_Hit_id (   self) [private]
identifier of the database sequence

Definition at line 391 of file NCBIXML.py.

00391 
00392     def _end_Hit_id(self):
00393         """identifier of the database sequence
00394         """
00395         self._hit.hit_id = self._value
00396         self._hit.title = self._value + ' '

def Bio.Blast.NCBIXML._end_Hit_len (   self) [private]

Definition at line 410 of file NCBIXML.py.

00410 
00411     def _end_Hit_len(self):
00412         self._hit.length = int(self._value)

def Bio.Blast.NCBIXML._end_Hsp_align_len (   self) [private]
length of the alignment

Definition at line 500 of file NCBIXML.py.

00500 
00501     def _end_Hsp_align_len(self):
00502         """length of the alignment
00503         """
00504         self._hsp.align_length = int(self._value)

def Bio.Blast.NCBIXML._end_Hsp_bit_score (   self) [private]
bit score of HSP

Definition at line 431 of file NCBIXML.py.

00431 
00432     def _end_Hsp_bit_score(self):
00433         """bit score of HSP
00434         """
00435         self._hsp.bits = float(self._value)
00436         if self._descr.bits == None:
00437             self._descr.bits = float(self._value)

def Bio.Blast.NCBIXML._end_Hsp_evalue (   self) [private]
expect value value of the HSP

Definition at line 438 of file NCBIXML.py.

00438 
00439     def _end_Hsp_evalue(self):
00440         """expect value value of the HSP
00441         """
00442         self._hsp.expect = float(self._value)
00443         if self._descr.e == None:
00444             self._descr.e = float(self._value)

def Bio.Blast.NCBIXML._end_Hsp_gaps (   self) [private]
number of gaps in the alignment

Definition at line 495 of file NCBIXML.py.

00495 
00496     def _end_Hsp_gaps(self):
00497         """number of gaps in the alignment
00498         """
00499         self._hsp.gaps = int(self._value)

def Bio.Blast.NCBIXML._end_Hsp_hit_frame (   self) [private]
frame of the database sequence if applicable

Definition at line 480 of file NCBIXML.py.

00480 
00481     def _end_Hsp_hit_frame(self):
00482         """frame of the database sequence if applicable
00483         """
00484         self._hsp.frame += (int(self._value),)

def Bio.Blast.NCBIXML._end_Hsp_hit_from (   self) [private]
offset of the database at the start of the alignment (one-offset)

Definition at line 455 of file NCBIXML.py.

00455 
00456     def _end_Hsp_hit_from(self):
00457         """offset of the database at the start of the alignment (one-offset)
00458         """
00459         self._hsp.sbjct_start = int(self._value)

def Bio.Blast.NCBIXML._end_Hsp_hit_to (   self) [private]
offset of the database at the end of the alignment (one-offset)

Definition at line 460 of file NCBIXML.py.

00460 
00461     def _end_Hsp_hit_to(self):
00462         """offset of the database at the end of the alignment (one-offset)
00463         """
00464         self._hsp.sbjct_end = int(self._value)

def Bio.Blast.NCBIXML._end_Hsp_hseq (   self) [private]
alignment string for the database

Definition at line 515 of file NCBIXML.py.

00515 
00516     def _end_Hsp_hseq(self):
00517         """alignment string for the database
00518         """
00519         self._hsp.sbjct = self._value

def Bio.Blast.NCBIXML._end_Hsp_identity (   self) [private]
number of identities in the alignment

Definition at line 485 of file NCBIXML.py.

00485 
00486     def _end_Hsp_identity(self):
00487         """number of identities in the alignment
00488         """
00489         self._hsp.identities = int(self._value)

def Bio.Blast.NCBIXML._end_Hsp_midline (   self) [private]
Formatting middle line as normally seen in BLAST report

Definition at line 520 of file NCBIXML.py.

00520 
00521     def _end_Hsp_midline(self):
00522         """Formatting middle line as normally seen in BLAST report
00523         """
00524         self._hsp.match = self._value # do NOT strip spaces!
00525         assert len(self._hsp.match)==len(self._hsp.query)
00526         assert len(self._hsp.match)==len(self._hsp.sbjct)

def Bio.Blast.NCBIXML._end_Hsp_positive (   self) [private]
number of positive (conservative) substitutions in the alignment

Definition at line 490 of file NCBIXML.py.

00490 
00491     def _end_Hsp_positive(self):
00492         """number of positive (conservative) substitutions in the alignment
00493         """
00494         self._hsp.positives = int(self._value)

def Bio.Blast.NCBIXML._end_Hsp_qseq (   self) [private]

def _en_Hsp_density(self): """score density """ pass # XXX ???

alignment string for the query

Definition at line 510 of file NCBIXML.py.

00510 
00511     def _end_Hsp_qseq(self):
00512         """alignment string for the query
00513         """
00514         self._hsp.query = self._value

def Bio.Blast.NCBIXML._end_Hsp_query_frame (   self) [private]

def _end_Hsp_pattern_from(self): """start of phi-blast pattern on the query (one-offset) """ pass # XXX TODO PSI

 def _end_Hsp_pattern_to(self):
     """end of phi-blast pattern on the query (one-offset)
     """
     pass # XXX TODO PSI  @verbatim frame of the query if applicable

Definition at line 475 of file NCBIXML.py.

00475 
00476     def _end_Hsp_query_frame(self):
00477         """frame of the query if applicable
00478         """
00479         self._hsp.frame = (int(self._value),)

def Bio.Blast.NCBIXML._end_Hsp_query_from (   self) [private]
offset of query at the start of the alignment (one-offset)

Definition at line 445 of file NCBIXML.py.

00445 
00446     def _end_Hsp_query_from(self):
00447         """offset of query at the start of the alignment (one-offset)
00448         """
00449         self._hsp.query_start = int(self._value)

def Bio.Blast.NCBIXML._end_Hsp_query_to (   self) [private]
offset of query at the end of the alignment (one-offset)

Definition at line 450 of file NCBIXML.py.

00450 
00451     def _end_Hsp_query_to(self):
00452         """offset of query at the end of the alignment (one-offset)
00453         """
00454         self._hsp.query_end = int(self._value)

def Bio.Blast.NCBIXML._end_Hsp_score (   self) [private]
raw score of HSP

Definition at line 424 of file NCBIXML.py.

00424 
00425     def _end_Hsp_score(self):
00426         """raw score of HSP
00427         """
00428         self._hsp.score = float(self._value)
00429         if self._descr.score == None:
00430             self._descr.score = float(self._value)

def Bio.Blast.NCBIXML._end_Parameters_expect (   self) [private]
expect values cutoff (-e)

Definition at line 323 of file NCBIXML.py.

00323 
00324     def _end_Parameters_expect(self):
00325         """expect values cutoff (-e)
00326         """
00327         # NOTE: In old text output there was a line:
00328         # Number of sequences better than 1.0e-004: 1
00329         # As far as I can see, parameters.num_seqs_better_e
00330         # would take the value of 1, and the expectation
00331         # value was not recorded.
00332         #
00333         # Anyway we should NOT record this against num_seqs_better_e
00334         self._parameters.expect = self._value

def Bio.Blast.NCBIXML._end_Parameters_filter (   self) [private]
filtering options (-F)

Definition at line 361 of file NCBIXML.py.

00361 
00362     def _end_Parameters_filter(self):
00363         """filtering options (-F)
00364         """
00365         self._parameters.filter = self._value

gap extension cose (-E)

Definition at line 355 of file NCBIXML.py.

00355 
00356     def _end_Parameters_gap_extend(self):
00357         """gap extension cose (-E)
00358         """
00359         self._parameters.gap_penalties = (self._parameters.gap_penalties,
00360                                          int(self._value))

gap existence cost (-G)

Definition at line 350 of file NCBIXML.py.

00350 
00351     def _end_Parameters_gap_open(self):
00352         """gap existence cost (-G)
00353         """
00354         self._parameters.gap_penalties = int(self._value)

def Bio.Blast.NCBIXML._end_Parameters_matrix (   self) [private]

def _end_BlastOutput_message(self): """error messages """ pass # XXX What to do ?

matrix used (-M)

Definition at line 318 of file NCBIXML.py.

00318 
00319     def _end_Parameters_matrix(self):
00320         """matrix used (-M)
00321         """
00322         self._parameters.matrix = self._value
        

def _end_Parameters_include(self): """inclusion threshold for a psi-blast iteration (-h) """ pass # XXX TODO PSI

match score for nucleotide-nucleotide comparaison (-r)

Definition at line 340 of file NCBIXML.py.

00340 
00341     def _end_Parameters_sc_match(self):
00342         """match score for nucleotide-nucleotide comparaison (-r)
00343         """
00344         self._parameters.sc_match = int(self._value)

mismatch penalty for nucleotide-nucleotide comparaison (-r)

Definition at line 345 of file NCBIXML.py.

00345 
00346     def _end_Parameters_sc_mismatch(self):
00347         """mismatch penalty for nucleotide-nucleotide comparaison (-r)
00348         """
00349         self._parameters.sc_mismatch = int(self._value)

def Bio.Blast.NCBIXML._end_Statistics_db_len (   self) [private]
number of letters in the database

Definition at line 533 of file NCBIXML.py.

00533 
00534     def _end_Statistics_db_len(self):
00535         """number of letters in the database
00536         """
00537         self._blast.num_letters_in_database = int(self._value)

def Bio.Blast.NCBIXML._end_Statistics_db_num (   self) [private]
number of sequences in the database

Definition at line 528 of file NCBIXML.py.

00528 
00529     def _end_Statistics_db_num(self):
00530         """number of sequences in the database
00531         """
00532         self._blast.num_sequences_in_database = int(self._value)

the effective search space

Definition at line 543 of file NCBIXML.py.

00543 
00544     def _end_Statistics_eff_space(self):
00545         """the effective search space
00546         """
00547         self._blast.effective_search_space = float(self._value)

Karlin-Altschul parameter H

Definition at line 559 of file NCBIXML.py.

00559 
00560     def _end_Statistics_entropy(self):
00561         """Karlin-Altschul parameter H
00562         """
00563         self._blast.ka_params = self._blast.ka_params + (float(self._value),)
    
the effective HSP length

Definition at line 538 of file NCBIXML.py.

00538 
00539     def _end_Statistics_hsp_len(self):
00540         """the effective HSP length
00541         """
00542         self._blast.effective_hsp_length = int(self._value)

def Bio.Blast.NCBIXML._end_Statistics_kappa (   self) [private]
Karlin-Altschul parameter K

Definition at line 548 of file NCBIXML.py.

00548 
00549     def _end_Statistics_kappa(self):
00550         """Karlin-Altschul parameter K
00551         """
00552         self._blast.ka_params = float(self._value)

def Bio.Blast.NCBIXML._end_Statistics_lambda (   self) [private]
Karlin-Altschul parameter Lambda

Definition at line 553 of file NCBIXML.py.

00553 
00554     def _end_Statistics_lambda(self):
00555         """Karlin-Altschul parameter Lambda
00556         """
00557         self._blast.ka_params = (float(self._value),
00558                                  self._blast.ka_params)

def Bio.Blast.NCBIXML._start_Hit (   self) [private]

def _end_Parameters_pattern(self): """pattern used for phi-blast search """ pass # XXX TODO PSI

def _end_Parameters_entrez_query(self): """entrez query used to limit search """ pass # XXX TODO PSI

Definition at line 377 of file NCBIXML.py.

00377 
00378     def _start_Hit(self):
00379         self._blast.alignments.append(Record.Alignment())
00380         self._blast.descriptions.append(Record.Description())
00381         self._blast.multiple_alignment = []
00382         self._hit = self._blast.alignments[-1]
00383         self._descr = self._blast.descriptions[-1]
00384         self._descr.num_alignments = 0

def Bio.Blast.NCBIXML._start_Hsp (   self) [private]

Definition at line 414 of file NCBIXML.py.

00414 
00415     def _start_Hsp(self):
00416         #Note that self._start_Hit() should have been called
00417         #to setup things like self._blast.multiple_alignment
00418         self._hit.hsps.append(Record.HSP())
00419         self._hsp = self._hit.hsps[-1]
00420         self._descr.num_alignments += 1
00421         self._blast.multiple_alignment.append(Record.MultipleAlignment())
00422         self._mult_al = self._blast.multiple_alignment[-1]

def Bio.Blast.NCBIXML.parse (   handle,
  debug = 0 
)
Returns an iterator a Blast record for each query.

handle - file handle to and XML file to parse
debug - integer, amount of debug information to print

This is a generator function that returns multiple Blast records
objects - one for each query sequence given to blast.  The file
is read incrementally, returning complete records as they are read
in.

Should cope with new BLAST 2.2.14+ which gives a single XML file
for mutliple query records.

Should also cope with XML output from older versions BLAST which
gave multiple XML files concatenated together (giving a single file
which strictly speaking wasn't valid XML).

Definition at line 590 of file NCBIXML.py.

00590 
00591 def parse(handle, debug=0):
00592     """Returns an iterator a Blast record for each query.
00593 
00594     handle - file handle to and XML file to parse
00595     debug - integer, amount of debug information to print
00596 
00597     This is a generator function that returns multiple Blast records
00598     objects - one for each query sequence given to blast.  The file
00599     is read incrementally, returning complete records as they are read
00600     in.
00601 
00602     Should cope with new BLAST 2.2.14+ which gives a single XML file
00603     for mutliple query records.
00604 
00605     Should also cope with XML output from older versions BLAST which
00606     gave multiple XML files concatenated together (giving a single file
00607     which strictly speaking wasn't valid XML)."""
00608     from xml.parsers import expat
00609     BLOCK = 1024
00610     MARGIN = 10 # must be at least length of newline + XML start
00611     XML_START = "<?xml"
00612 
00613     text = handle.read(BLOCK)
00614     pending = ""
00615 
00616     if not text:
00617         #NO DATA FOUND!
00618         raise ValueError("Your XML file was empty")
00619     
00620     while text:
00621         #We are now starting a new XML file
00622         if not text.startswith(XML_START):
00623             raise ValueError("Your XML file did not start with %s... "
00624                              "but instead %s" \
00625                              % (XML_START, repr(text[:20])))
00626 
00627         expat_parser = expat.ParserCreate()
00628         blast_parser = BlastParser(debug)
00629         expat_parser.StartElementHandler = blast_parser.startElement
00630         expat_parser.EndElementHandler = blast_parser.endElement
00631         expat_parser.CharacterDataHandler = blast_parser.characters
00632 
00633         expat_parser.Parse(text, False)
00634         while blast_parser._records:
00635             record = blast_parser._records[0]
00636             blast_parser._records = blast_parser._records[1:]
00637             yield record
00638 
00639         while True:
00640             #Read in another block of the file...
00641             text, pending = pending + handle.read(BLOCK), ""
00642             if not text:
00643                 #End of the file!
00644                 expat_parser.Parse("", True) # End of XML record
00645                 break
00646 
00647             #Now read a little bit more so we can check for the
00648             #start of another XML file...
00649             pending = handle.read(MARGIN)
00650 
00651             if (text+pending).find("\n" + XML_START) == -1:
00652                 # Good - still dealing with the same XML file
00653                 expat_parser.Parse(text, False)        
00654                 while blast_parser._records:
00655                     yield blast_parser._records.pop(0)
00656             else:
00657                 # This is output from pre 2.2.14 BLAST,
00658                 # one XML file for each query!
00659                 
00660                 # Finish the old file:
00661                 text, pending = (text+pending).split("\n" + XML_START,1)
00662                 pending = XML_START + pending
00663 
00664                 expat_parser.Parse(text, True) # End of XML record
00665                 while blast_parser._records:
00666                     yield blast_parser._records.pop(0)
00667                
00668                 #Now we are going to re-loop, reset the
00669                 #parsers and start reading the next XML file
00670                 text, pending = pending, ""
00671                 break
00672 
00673         #this was added because it seems that the Jython expat parser
00674         #was adding records later then the Python one
00675         while blast_parser._records:
00676             yield blast_parser._records.pop(0)
00677             
00678         #At this point we have finished the first XML record.
00679         #If the file is from an old version of blast, it may
00680         #contain more XML records (check if text=="").
00681         assert pending==""
00682         assert len(blast_parser._records) == 0
00683         
00684     #We should have finished the file!
00685     assert text==""
00686     assert pending==""
00687     assert len(blast_parser._records) == 0

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIXML.read (   handle,
  debug = 0 
)
Returns a single Blast record (assumes just one query).

This function is for use when there is one and only one BLAST
result in your XML file.

Use the Bio.Blast.NCBIXML.parse() function if you expect more than
one BLAST record (i.e. if you have more than one query sequence).

Definition at line 564 of file NCBIXML.py.

00564 
00565 def read(handle, debug=0):
00566    """Returns a single Blast record (assumes just one query).
00567 
00568    This function is for use when there is one and only one BLAST
00569    result in your XML file.
00570 
00571    Use the Bio.Blast.NCBIXML.parse() function if you expect more than
00572    one BLAST record (i.e. if you have more than one query sequence).
00573 
00574    """
00575    iterator = parse(handle, debug)
00576    try:
00577        first = iterator.next()
00578    except StopIteration:
00579        first = None
00580    if first is None:
00581        raise ValueError("No records found in handle")
00582    try:
00583        second = iterator.next()
00584    except StopIteration:
00585        second = None
00586    if second is not None:
00587        raise ValueError("More than one record found in handle")
00588    return first
00589 

Here is the call graph for this function:


Variable Documentation

Definition at line 382 of file NCBIXML.py.

Definition at line 381 of file NCBIXML.py.

Definition at line 418 of file NCBIXML.py.

Definition at line 421 of file NCBIXML.py.

Definition at line 705 of file NCBIXML.py.

tuple Bio.Blast.NCBIXML.handle = open(sys.argv[1])

Definition at line 691 of file NCBIXML.py.

Definition at line 692 of file NCBIXML.py.