Back to index

python-biopython  1.60
Public Member Functions | Private Member Functions
Bio.Blast.NCBIStandalone._Scanner Class Reference

List of all members.

Public Member Functions

def feed

Private Member Functions

def _scan_header
def _scan_rounds
def _scan_descriptions
def _scan_alignments
def _scan_pairwise_alignments
def _scan_one_pairwise_alignment
def _scan_alignment_header
def _scan_hsp
def _scan_hsp_header
def _scan_hsp_alignment
def _scan_masterslave_alignment
def _eof
def _scan_database_report
def _scan_parameters

Detailed Description

Scan BLAST output from blastall or blastpgp.

Tested with blastall and blastpgp v2.0.10, v2.0.11

Methods:
feed     Feed data into the scanner.

Definition at line 90 of file NCBIStandalone.py.


Member Function Documentation

def Bio.Blast.NCBIStandalone._Scanner._eof (   self,
  uhandle 
) [private]

Definition at line 542 of file NCBIStandalone.py.

00542 
00543     def _eof(self, uhandle):
00544         try:
00545             line = safe_peekline(uhandle)
00546         except ValueError, err:
00547             if str(err) != "Unexpected end of stream." : raise err
00548             line = ""
00549         return not line

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_alignment_header (   self,
  uhandle,
  consumer 
) [private]

Definition at line 440 of file NCBIStandalone.py.

00440 
00441     def _scan_alignment_header(self, uhandle, consumer):
00442         # >d1rip__ 2.24.7.1.1 Ribosomal S17 protein [Bacillus
00443         #           stearothermophilus]
00444         #           Length = 81
00445         #
00446         # Or, more recently with different white space:
00447         #
00448         # >gi|15799684|ref|NP_285696.1| threonine synthase ...
00449         #  gi|15829258|ref|NP_308031.1| threonine synthase 
00450         #  ...
00451         # Length=428
00452         read_and_call(uhandle, consumer.title, start='>')
00453         while 1:
00454             line = safe_readline(uhandle)
00455             if line.lstrip().startswith('Length =') \
00456             or line.lstrip().startswith('Length='):
00457                 consumer.length(line)
00458                 break
00459             elif is_blank_line(line):
00460                 # Check to make sure I haven't missed the Length line
00461                 raise ValueError("I missed the Length in an alignment header")
00462             consumer.title(line)
00463 
00464         # Older versions of BLAST will have a line with some spaces.
00465         # Version 2.0.14 (maybe 2.0.13?) and above print a true blank line.
00466         if not attempt_read_and_call(uhandle, consumer.noevent,
00467                                      start='          '):
00468             read_and_call(uhandle, consumer.noevent, blank=1)

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_alignments (   self,
  uhandle,
  consumer 
) [private]

Definition at line 396 of file NCBIStandalone.py.

00396 
00397     def _scan_alignments(self, uhandle, consumer):
00398         if self._eof(uhandle) : return
00399         
00400         # qblast inserts a helpful line here.
00401         attempt_read_and_call(uhandle, consumer.noevent, start="ALIGNMENTS")
00402 
00403         # First, check to see if I'm at the database report.
00404         line = safe_peekline(uhandle)
00405         if not line:
00406             #EOF
00407             return
00408         elif line.startswith('  Database') or line.startswith("Lambda"):
00409             return
00410         elif line[0] == '>':
00411             # XXX make a better check here between pairwise and masterslave
00412             self._scan_pairwise_alignments(uhandle, consumer)
00413         else:
00414             # XXX put in a check to make sure I'm in a masterslave alignment
00415             self._scan_masterslave_alignment(uhandle, consumer)

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_database_report (   self,
  uhandle,
  consumer 
) [private]

Definition at line 550 of file NCBIStandalone.py.

00550 
00551     def _scan_database_report(self, uhandle, consumer):
00552         #   Database: sdqib40-1.35.seg.fa
00553         #     Posted date:  Nov 1, 1999  4:25 PM
00554         #   Number of letters in database: 223,339
00555         #   Number of sequences in database:  1323
00556         #   
00557         # Lambda     K      H
00558         #    0.322    0.133    0.369 
00559         #
00560         # Gapped
00561         # Lambda     K      H
00562         #    0.270   0.0470    0.230 
00563         #
00564         ##########################################
00565         # Or, more recently Blast 2.2.15 gives less blank lines
00566         ##########################################
00567         #   Database: All non-redundant GenBank CDS translations+PDB+SwissProt+PIR+PRF excluding 
00568         # environmental samples
00569         #     Posted date:  Dec 12, 2006  5:51 PM
00570         #   Number of letters in database: 667,088,753
00571         #   Number of sequences in database:  2,094,974
00572         # Lambda     K      H
00573         #    0.319    0.136    0.395 
00574         # Gapped
00575         # Lambda     K      H
00576         #    0.267   0.0410    0.140
00577 
00578         if self._eof(uhandle) : return
00579 
00580         consumer.start_database_report()
00581         
00582         # Subset of the database(s) listed below
00583         #    Number of letters searched: 562,618,960
00584         #    Number of sequences searched:  228,924
00585         if attempt_read_and_call(uhandle, consumer.noevent, start="  Subset"):
00586             read_and_call(uhandle, consumer.noevent, contains="letters")
00587             read_and_call(uhandle, consumer.noevent, contains="sequences")
00588             read_and_call(uhandle, consumer.noevent, start="  ")
00589 
00590         # Sameet Mehta reported seeing output from BLASTN 2.2.9 that
00591         # was missing the "Database" stanza completely.
00592         while attempt_read_and_call(uhandle, consumer.database,
00593                 start='  Database'):
00594             # BLAT output ends abruptly here, without any of the other
00595             # information.  Check to see if this is the case.  If so,
00596             # then end the database report here gracefully.
00597             if not uhandle.peekline().strip() \
00598             or uhandle.peekline().startswith("BLAST"):
00599                 consumer.end_database_report()
00600                 return
00601             
00602             # Database can span multiple lines.
00603             read_and_call_until(uhandle, consumer.database, start='    Posted')
00604             read_and_call(uhandle, consumer.posted_date, start='    Posted')
00605             read_and_call(uhandle, consumer.num_letters_in_database,
00606                        start='  Number of letters')
00607             read_and_call(uhandle, consumer.num_sequences_in_database,
00608                        start='  Number of sequences')
00609             #There may not be a line starting with spaces...
00610             attempt_read_and_call(uhandle, consumer.noevent, start='  ')
00611 
00612             line = safe_readline(uhandle)
00613             uhandle.saveline(line)
00614             if line.find('Lambda') != -1:
00615                 break
00616 
00617         read_and_call(uhandle, consumer.noevent, start='Lambda')
00618         read_and_call(uhandle, consumer.ka_params)
00619 
00620         #This blank line is optional:
00621         attempt_read_and_call(uhandle, consumer.noevent, blank=1)
00622 
00623         # not BLASTP
00624         attempt_read_and_call(uhandle, consumer.gapped, start='Gapped')
00625         # not TBLASTX
00626         if attempt_read_and_call(uhandle, consumer.noevent, start='Lambda'):
00627             read_and_call(uhandle, consumer.ka_params_gap)
00628             
00629         # Blast 2.2.4 can sometimes skip the whole parameter section.
00630         # Thus, I need to be careful not to read past the end of the
00631         # file.
00632         try:
00633             read_and_call_while(uhandle, consumer.noevent, blank=1)
00634         except ValueError, x:
00635             if str(x) != "Unexpected end of stream.":
00636                 raise
00637         consumer.end_database_report()

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_descriptions (   self,
  uhandle,
  consumer 
) [private]

Definition at line 263 of file NCBIStandalone.py.

00263 
00264     def _scan_descriptions(self, uhandle, consumer):
00265         # Searching..................................................done
00266         # Results from round 2
00267         # 
00268         # 
00269         #                                                                    Sc
00270         # Sequences producing significant alignments:                        (b
00271         # Sequences used in model and found again:
00272         # 
00273         # d1tde_2 3.4.1.4.4 (119-244) Thioredoxin reductase [Escherichia ...   
00274         # d1tcob_ 1.31.1.5.16 Calcineurin regulatory subunit (B-chain) [B...   
00275         # d1symb_ 1.31.1.2.2 Calcyclin (S100) [RAT (RATTUS NORVEGICUS)]        
00276         # 
00277         # Sequences not found previously or not previously below threshold:
00278         # 
00279         # d1osa__ 1.31.1.5.11 Calmodulin [Paramecium tetraurelia]              
00280         # d1aoza3 2.5.1.3.3 (339-552) Ascorbate oxidase [zucchini (Cucurb...   
00281         #
00282 
00283         # If PSI-BLAST, may also have:
00284         #
00285         # CONVERGED!
00286 
00287         consumer.start_descriptions()
00288 
00289         # Read 'Searching'
00290         # This line seems to be missing in BLASTN 2.1.2 (others?)
00291         attempt_read_and_call(uhandle, consumer.noevent, start='Searching')
00292 
00293         # blastpgp 2.0.10 from NCBI 9/19/99 for Solaris sometimes crashes here.
00294         # If this happens, the handle will yield no more information.
00295         if not uhandle.peekline():
00296             raise ValueError("Unexpected end of blast report.  " + \
00297                   "Looks suspiciously like a PSI-BLAST crash.")
00298 
00299         # BLASTN 2.2.3 sometimes spews a bunch of warnings and errors here:
00300         # Searching[blastall] WARNING:  [000.000]  AT1G08320: SetUpBlastSearch 
00301         # [blastall] ERROR:  [000.000]  AT1G08320: Blast: 
00302         # [blastall] ERROR:  [000.000]  AT1G08320: Blast: Query must be at leas
00303         # done 
00304         # Reported by David Weisman.
00305         # Check for these error lines and ignore them for now.  Let
00306         # the BlastErrorParser deal with them.
00307         line = uhandle.peekline()
00308         if line.find("ERROR:") != -1 or line.startswith("done"):
00309             read_and_call_while(uhandle, consumer.noevent, contains="ERROR:")
00310             read_and_call(uhandle, consumer.noevent, start="done")
00311 
00312         # Check to see if this is PSI-BLAST.
00313         # If it is, the 'Searching' line will be followed by:
00314         # (version 2.0.10)
00315         #     Searching.............................
00316         #     Results from round 2
00317         # or (version 2.0.11)
00318         #     Searching.............................
00319         #
00320         #
00321         #     Results from round 2
00322         
00323         # Skip a bunch of blank lines.
00324         read_and_call_while(uhandle, consumer.noevent, blank=1)
00325         # Check for the results line if it's there.
00326         if attempt_read_and_call(uhandle, consumer.round, start='Results'):
00327             read_and_call_while(uhandle, consumer.noevent, blank=1)
00328         
00329         # Three things can happen here:
00330         # 1.  line contains 'Score     E'
00331         # 2.  line contains "No hits found"
00332         # 3.  no descriptions
00333         # The first one begins a bunch of descriptions.  The last two
00334         # indicates that no descriptions follow, and we should go straight
00335         # to the alignments.
00336         if not attempt_read_and_call(
00337             uhandle, consumer.description_header,
00338             has_re=re.compile(r'Score +E')):
00339             # Either case 2 or 3.  Look for "No hits found".
00340             attempt_read_and_call(uhandle, consumer.no_hits,
00341                                   contains='No hits found')
00342             try:
00343                 read_and_call_while(uhandle, consumer.noevent, blank=1)
00344             except ValueError, err:
00345                 if str(err) != "Unexpected end of stream." : raise err
00346 
00347             consumer.end_descriptions()
00348             # Stop processing.
00349             return
00350 
00351         # Read the score header lines
00352         read_and_call(uhandle, consumer.description_header,
00353                       start='Sequences producing')
00354 
00355         # If PSI-BLAST, read the 'Sequences used in model' line.
00356         attempt_read_and_call(uhandle, consumer.model_sequences,
00357                               start='Sequences used in model')
00358         read_and_call_while(uhandle, consumer.noevent, blank=1)
00359 
00360         # In BLAT, rather than a "No hits found" line, we just
00361         # get no descriptions (and no alignments). This can be
00362         # spotted because the next line is the database block:
00363         if safe_peekline(uhandle).startswith("  Database:"):
00364             consumer.end_descriptions()
00365             # Stop processing.
00366             return
00367 
00368         # Read the descriptions and the following blank lines, making
00369         # sure that there are descriptions.
00370         if not uhandle.peekline().startswith('Sequences not found'):
00371             read_and_call_until(uhandle, consumer.description, blank=1)
00372             read_and_call_while(uhandle, consumer.noevent, blank=1)
00373 
00374         # If PSI-BLAST, read the 'Sequences not found' line followed
00375         # by more descriptions.  However, I need to watch out for the
00376         # case where there were no sequences not found previously, in
00377         # which case there will be no more descriptions.
00378         if attempt_read_and_call(uhandle, consumer.nonmodel_sequences,
00379                                  start='Sequences not found'):
00380             # Read the descriptions and the following blank lines.
00381             read_and_call_while(uhandle, consumer.noevent, blank=1)
00382             l = safe_peekline(uhandle)
00383             # Brad -- added check for QUERY. On some PSI-BLAST outputs
00384             # there will be a 'Sequences not found' line followed by no
00385             # descriptions. Check for this case since the first thing you'll
00386             # get is a blank line and then 'QUERY'
00387             if not l.startswith('CONVERGED') and l[0] != '>' \
00388                     and not l.startswith('QUERY'):
00389                 read_and_call_until(uhandle, consumer.description, blank=1)
00390                 read_and_call_while(uhandle, consumer.noevent, blank=1)
00391 
00392         attempt_read_and_call(uhandle, consumer.converged, start='CONVERGED')
00393         read_and_call_while(uhandle, consumer.noevent, blank=1)
00394 
00395         consumer.end_descriptions()

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_header (   self,
  uhandle,
  consumer 
) [private]

Definition at line 120 of file NCBIStandalone.py.

00120 
00121     def _scan_header(self, uhandle, consumer):
00122         # BLASTP 2.0.10 [Aug-26-1999]
00123         # 
00124         # 
00125         # Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaf
00126         # Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
00127         # "Gapped BLAST and PSI-BLAST: a new generation of protein database sea
00128         # programs",  Nucleic Acids Res. 25:3389-3402.
00129         # 
00130         # Query= test
00131         #          (140 letters)
00132         # 
00133         # Database: sdqib40-1.35.seg.fa
00134         #            1323 sequences; 223,339 total letters
00135         #
00136         # ========================================================
00137         # This next example is from the online version of Blast,
00138         # note there are TWO references, an RID line, and also
00139         # the database is BEFORE the query line.
00140         # Note there possibleuse of non-ASCII in the author names.
00141         # ========================================================
00142         #
00143         # BLASTP 2.2.15 [Oct-15-2006]
00144         # Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Sch??ffer, 
00145         # Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman 
00146         # (1997), "Gapped BLAST and PSI-BLAST: a new generation of 
00147         # protein database search programs", Nucleic Acids Res. 25:3389-3402.
00148         #
00149         # Reference: Sch??ffer, Alejandro A., L. Aravind, Thomas L. Madden, Sergei 
00150         # Shavirin, John L. Spouge, Yuri I. Wolf, Eugene V. Koonin, and 
00151         # Stephen F. Altschul (2001), "Improving the accuracy of PSI-BLAST 
00152         # protein database searches with composition-based statistics 
00153         # and other refinements", Nucleic Acids Res. 29:2994-3005. 
00154         #
00155         # RID: 1166022616-19998-65316425856.BLASTQ1
00156         # 
00157         #
00158         # Database: All non-redundant GenBank CDS
00159         # translations+PDB+SwissProt+PIR+PRF excluding environmental samples
00160         #            4,254,166 sequences; 1,462,033,012 total letters
00161         # Query=  gi:16127998
00162         # Length=428
00163         #
00164 
00165         consumer.start_header()
00166 
00167         read_and_call(uhandle, consumer.version, contains='BLAST')
00168         read_and_call_while(uhandle, consumer.noevent, blank=1)
00169 
00170         # There might be a <pre> line, for qblast output.
00171         attempt_read_and_call(uhandle, consumer.noevent, start="<pre>")
00172 
00173         # Read the reference(s)
00174         while attempt_read_and_call(uhandle,
00175                                 consumer.reference, start='Reference'):
00176             # References are normally multiline terminated by a blank line
00177             # (or, based on the old code, the RID line)
00178             while 1:
00179                 line = uhandle.readline()
00180                 if is_blank_line(line):
00181                     consumer.noevent(line)
00182                     break
00183                 elif line.startswith("RID"):
00184                     break
00185                 else:
00186                     #More of the reference
00187                     consumer.reference(line)
00188 
00189         #Deal with the optional RID: ...
00190         read_and_call_while(uhandle, consumer.noevent, blank=1)
00191         attempt_read_and_call(uhandle, consumer.reference, start="RID:")
00192         read_and_call_while(uhandle, consumer.noevent, blank=1)
00193 
00194         # blastpgp may have a reference for compositional score matrix
00195         # adjustment (see Bug 2502):
00196         if attempt_read_and_call(
00197             uhandle, consumer.reference, start="Reference"):
00198             read_and_call_until(uhandle, consumer.reference, blank=1)
00199             read_and_call_while(uhandle, consumer.noevent, blank=1)
00200 
00201         # blastpgp has a Reference for composition-based statistics.
00202         if attempt_read_and_call(
00203             uhandle, consumer.reference, start="Reference"):
00204             read_and_call_until(uhandle, consumer.reference, blank=1)
00205             read_and_call_while(uhandle, consumer.noevent, blank=1)
00206 
00207         line = uhandle.peekline()
00208         assert line.strip() != ""
00209         assert not line.startswith("RID:")
00210         if line.startswith("Query="):
00211             #This is an old style query then database...
00212 
00213             # Read the Query lines and the following blank line.
00214             read_and_call(uhandle, consumer.query_info, start='Query=')
00215             read_and_call_until(uhandle, consumer.query_info, blank=1)
00216             read_and_call_while(uhandle, consumer.noevent, blank=1)
00217 
00218             # Read the database lines and the following blank line.
00219             read_and_call_until(uhandle, consumer.database_info, end='total letters')
00220             read_and_call(uhandle, consumer.database_info, contains='sequences')
00221             read_and_call_while(uhandle, consumer.noevent, blank=1)
00222         elif line.startswith("Database:"):
00223             #This is a new style database then query...
00224             read_and_call_until(uhandle, consumer.database_info, end='total letters')
00225             read_and_call(uhandle, consumer.database_info, contains='sequences')
00226             read_and_call_while(uhandle, consumer.noevent, blank=1)
00227 
00228             # Read the Query lines and the following blank line.
00229             # Or, on BLAST 2.2.22+ there is no blank link - need to spot
00230             # the "... Score     E" line instead.
00231             read_and_call(uhandle, consumer.query_info, start='Query=')
00232             # BLAST 2.2.25+ has a blank line before Length=
00233             read_and_call_until(uhandle, consumer.query_info, start='Length=')
00234             while True:
00235                 line = uhandle.peekline()
00236                 if not line.strip() : break
00237                 if "Score     E" in line : break
00238                 #It is more of the query (and its length)
00239                 read_and_call(uhandle, consumer.query_info)
00240             read_and_call_while(uhandle, consumer.noevent, blank=1)
00241         else:
00242             raise ValueError("Invalid header?")
00243 
00244         consumer.end_header()

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_hsp (   self,
  uhandle,
  consumer 
) [private]

Definition at line 469 of file NCBIStandalone.py.

00469 
00470     def _scan_hsp(self, uhandle, consumer):
00471         consumer.start_hsp()
00472         self._scan_hsp_header(uhandle, consumer)
00473         self._scan_hsp_alignment(uhandle, consumer)
00474         consumer.end_hsp()
        

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_hsp_alignment (   self,
  uhandle,
  consumer 
) [private]

Definition at line 490 of file NCBIStandalone.py.

00490 
00491     def _scan_hsp_alignment(self, uhandle, consumer):
00492         # Query: 11 GRGVSACA-------TCDGFFYRNQKVAVIGGGNTAVEEALYLSNIASEVHLIHRRDGF
00493         #           GRGVS+         TC    Y  + + V GGG+ + EE   L     +   I R+
00494         # Sbjct: 12 GRGVSSVVRRCIHKPTCKE--YAVKIIDVTGGGSFSAEEVQELREATLKEVDILRKVSG
00495         # 
00496         # Query: 64 AEKILIKR 71
00497         #              I +K 
00498         # Sbjct: 70 PNIIQLKD 77
00499         # 
00500 
00501         while 1:
00502             # Blastn adds an extra line filled with spaces before Query
00503             attempt_read_and_call(uhandle, consumer.noevent, start='     ')
00504             read_and_call(uhandle, consumer.query, start='Query')
00505             read_and_call(uhandle, consumer.align, start='     ')
00506             read_and_call(uhandle, consumer.sbjct, start='Sbjct')
00507             try:
00508                 read_and_call_while(uhandle, consumer.noevent, blank=1)
00509             except ValueError, err:
00510                 if str(err) != "Unexpected end of stream." : raise err
00511                 # End of File (well, it looks like it with recent versions
00512                 # of BLAST for multiple queries after the Iterator class
00513                 # has broken up the whole file into chunks).
00514                 break
00515             line = safe_peekline(uhandle)
00516             # Alignment continues if I see a 'Query' or the spaces for Blastn.
00517             if not (line.startswith('Query') or line.startswith('     ')):
00518                 break
 

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_hsp_header (   self,
  uhandle,
  consumer 
) [private]

Definition at line 475 of file NCBIStandalone.py.

00475 
00476     def _scan_hsp_header(self, uhandle, consumer):
00477         #  Score = 22.7 bits (47), Expect = 2.5
00478         #  Identities = 10/36 (27%), Positives = 18/36 (49%)
00479         #  Strand = Plus / Plus
00480         #  Frame = +3
00481         #
00482 
00483         read_and_call(uhandle, consumer.score, start=' Score')
00484         read_and_call(uhandle, consumer.identities, start=' Identities')
00485         # BLASTN
00486         attempt_read_and_call(uhandle, consumer.strand, start = ' Strand')
00487         # BLASTX, TBLASTN, TBLASTX
00488         attempt_read_and_call(uhandle, consumer.frame, start = ' Frame')
00489         read_and_call(uhandle, consumer.noevent, blank=1)

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_masterslave_alignment (   self,
  uhandle,
  consumer 
) [private]

Definition at line 519 of file NCBIStandalone.py.

00519 
00520     def _scan_masterslave_alignment(self, uhandle, consumer):
00521         consumer.start_alignment()
00522         while 1:
00523             line = safe_readline(uhandle)
00524             # Check to see whether I'm finished reading the alignment.
00525             # This is indicated by 1) database section, 2) next psi-blast
00526             # round, which can also be a 'Results from round' if no 
00527             # searching line is present
00528             # patch by chapmanb
00529             if line.startswith('Searching') or \
00530                     line.startswith('Results from round'):
00531                 uhandle.saveline(line)
00532                 break
00533             elif line.startswith('  Database'):
00534                 uhandle.saveline(line)
00535                 break
00536             elif is_blank_line(line):
00537                 consumer.noevent(line)
00538             else:
00539                 consumer.multalign(line)
00540         read_and_call_while(uhandle, consumer.noevent, blank=1)
00541         consumer.end_alignment()

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_one_pairwise_alignment (   self,
  uhandle,
  consumer 
) [private]

Definition at line 423 of file NCBIStandalone.py.

00423 
00424     def _scan_one_pairwise_alignment(self, uhandle, consumer):
00425         if self._eof(uhandle) : return
00426         consumer.start_alignment()
00427 
00428         self._scan_alignment_header(uhandle, consumer)
00429 
00430         # Scan a bunch of score/alignment pairs.
00431         while 1:
00432             if self._eof(uhandle):
00433                 #Shouldn't have issued that _scan_alignment_header event...
00434                 break
00435             line = safe_peekline(uhandle)
00436             if not line.startswith(' Score'):
00437                 break
00438             self._scan_hsp(uhandle, consumer)
00439         consumer.end_alignment()

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_pairwise_alignments (   self,
  uhandle,
  consumer 
) [private]

Definition at line 416 of file NCBIStandalone.py.

00416 
00417     def _scan_pairwise_alignments(self, uhandle, consumer):
00418         while not self._eof(uhandle):
00419             line = safe_peekline(uhandle)
00420             if line[0] != '>':
00421                 break
00422             self._scan_one_pairwise_alignment(uhandle, consumer)

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_parameters (   self,
  uhandle,
  consumer 
) [private]

Definition at line 638 of file NCBIStandalone.py.

00638 
00639     def _scan_parameters(self, uhandle, consumer):
00640         # Matrix: BLOSUM62
00641         # Gap Penalties: Existence: 11, Extension: 1
00642         # Number of Hits to DB: 50604
00643         # Number of Sequences: 1323
00644         # Number of extensions: 1526
00645         # Number of successful extensions: 6
00646         # Number of sequences better than 10.0: 5
00647         # Number of HSP's better than 10.0 without gapping: 5
00648         # Number of HSP's successfully gapped in prelim test: 0
00649         # Number of HSP's that attempted gapping in prelim test: 1
00650         # Number of HSP's gapped (non-prelim): 5
00651         # length of query: 140
00652         # length of database: 223,339
00653         # effective HSP length: 39
00654         # effective length of query: 101
00655         # effective length of database: 171,742
00656         # effective search space: 17345942
00657         # effective search space used: 17345942
00658         # T: 11
00659         # A: 40
00660         # X1: 16 ( 7.4 bits)
00661         # X2: 38 (14.8 bits)
00662         # X3: 64 (24.9 bits)
00663         # S1: 41 (21.9 bits)
00664         # S2: 42 (20.8 bits)
00665         ##########################################
00666         # Or, more recently Blast(x) 2.2.15 gives
00667         ##########################################
00668         # Matrix: BLOSUM62
00669         # Gap Penalties: Existence: 11, Extension: 1
00670         # Number of Sequences: 4535438
00671         # Number of Hits to DB: 2,588,844,100
00672         # Number of extensions: 60427286
00673         # Number of successful extensions: 126433
00674         # Number of sequences better than  2.0: 30
00675         # Number of HSP's gapped: 126387
00676         # Number of HSP's successfully gapped: 35
00677         # Length of query: 291
00678         # Length of database: 1,573,298,872
00679         # Length adjustment: 130
00680         # Effective length of query: 161
00681         # Effective length of database: 983,691,932
00682         # Effective search space: 158374401052
00683         # Effective search space used: 158374401052
00684         # Neighboring words threshold: 12
00685         # Window for multiple hits: 40
00686         # X1: 16 ( 7.3 bits)
00687         # X2: 38 (14.6 bits)
00688         # X3: 64 (24.7 bits)
00689         # S1: 41 (21.7 bits)
00690         # S2: 32 (16.9 bits)
00691 
00692 
00693         # Blast 2.2.4 can sometimes skip the whole parameter section.
00694         # BLAT also skips the whole parameter section.
00695         # Thus, check to make sure that the parameter section really
00696         # exists.
00697         if not uhandle.peekline().strip():
00698             return
00699 
00700         # BLASTN 2.2.9 looks like it reverses the "Number of Hits" and
00701         # "Number of Sequences" lines.
00702         consumer.start_parameters()
00703 
00704         # Matrix line may be missing in BLASTN 2.2.9
00705         attempt_read_and_call(uhandle, consumer.matrix, start='Matrix')
00706         # not TBLASTX
00707         attempt_read_and_call(uhandle, consumer.gap_penalties, start='Gap')
00708 
00709         attempt_read_and_call(uhandle, consumer.num_sequences,
00710                               start='Number of Sequences')
00711         attempt_read_and_call(uhandle, consumer.num_hits,
00712                       start='Number of Hits')
00713         attempt_read_and_call(uhandle, consumer.num_sequences,
00714                               start='Number of Sequences')
00715         attempt_read_and_call(uhandle, consumer.num_extends,
00716                       start='Number of extensions')
00717         attempt_read_and_call(uhandle, consumer.num_good_extends,
00718                       start='Number of successful')
00719 
00720         attempt_read_and_call(uhandle, consumer.num_seqs_better_e,
00721                       start='Number of sequences')
00722 
00723         # not BLASTN, TBLASTX
00724         if attempt_read_and_call(uhandle, consumer.hsps_no_gap,
00725                                  start="Number of HSP's better"):
00726             # BLASTN 2.2.9
00727             if attempt_read_and_call(uhandle, consumer.noevent,
00728                                      start="Number of HSP's gapped:"):
00729                 read_and_call(uhandle, consumer.noevent,
00730                               start="Number of HSP's successfully")
00731                 #This is ommitted in 2.2.15
00732                 attempt_read_and_call(uhandle, consumer.noevent,
00733                               start="Number of extra gapped extensions")
00734             else:
00735                 read_and_call(uhandle, consumer.hsps_prelim_gapped,
00736                               start="Number of HSP's successfully")
00737                 read_and_call(uhandle, consumer.hsps_prelim_gap_attempted,
00738                               start="Number of HSP's that")
00739                 read_and_call(uhandle, consumer.hsps_gapped,
00740                               start="Number of HSP's gapped")
00741         #e.g. BLASTX 2.2.15 where the "better" line is missing
00742         elif attempt_read_and_call(uhandle, consumer.noevent,
00743                                      start="Number of HSP's gapped"):
00744             read_and_call(uhandle, consumer.noevent,
00745                           start="Number of HSP's successfully")
00746 
00747         # not in blastx 2.2.1
00748         attempt_read_and_call(uhandle, consumer.query_length,
00749                               has_re=re.compile(r"[Ll]ength of query"))
00750         # Not in BLASTX 2.2.22+
00751         attempt_read_and_call(uhandle, consumer.database_length,
00752                           has_re=re.compile(r"[Ll]ength of \s*[Dd]atabase"))
00753 
00754         # BLASTN 2.2.9
00755         attempt_read_and_call(uhandle, consumer.noevent,
00756                               start="Length adjustment")
00757         attempt_read_and_call(uhandle, consumer.effective_hsp_length,
00758                               start='effective HSP')
00759         # Not in blastx 2.2.1
00760         attempt_read_and_call(
00761             uhandle, consumer.effective_query_length,
00762             has_re=re.compile(r'[Ee]ffective length of query'))
00763 
00764         # This is not in BLASTP 2.2.15
00765         attempt_read_and_call(
00766             uhandle, consumer.effective_database_length,
00767             has_re=re.compile(r'[Ee]ffective length of \s*[Dd]atabase'))
00768         # Not in blastx 2.2.1, added a ':' to distinguish between
00769         # this and the 'effective search space used' line
00770         attempt_read_and_call(
00771             uhandle, consumer.effective_search_space,
00772             has_re=re.compile(r'[Ee]ffective search space:'))
00773         # Does not appear in BLASTP 2.0.5
00774         attempt_read_and_call(
00775             uhandle, consumer.effective_search_space_used,
00776             has_re=re.compile(r'[Ee]ffective search space used'))
00777 
00778         # BLASTX, TBLASTN, TBLASTX
00779         attempt_read_and_call(uhandle, consumer.frameshift, start='frameshift')
00780 
00781         # not in BLASTN 2.2.9
00782         attempt_read_and_call(uhandle, consumer.threshold, start='T')
00783         # In BLASTX 2.2.15 replaced by: "Neighboring words threshold: 12"
00784         attempt_read_and_call(uhandle, consumer.threshold, start='Neighboring words threshold')
00785 
00786         # not in BLASTX 2.2.15
00787         attempt_read_and_call(uhandle, consumer.window_size, start='A')
00788         # get this instead: "Window for multiple hits: 40"
00789         attempt_read_and_call(uhandle, consumer.window_size, start='Window for multiple hits')
00790 
00791         # not in BLASTX 2.2.22+        
00792         attempt_read_and_call(uhandle, consumer.dropoff_1st_pass, start='X1')
00793         # not TBLASTN
00794         attempt_read_and_call(uhandle, consumer.gap_x_dropoff, start='X2')
00795 
00796         # not BLASTN, TBLASTX
00797         attempt_read_and_call(uhandle, consumer.gap_x_dropoff_final,
00798                               start='X3')
00799 
00800         # not TBLASTN
00801         attempt_read_and_call(uhandle, consumer.gap_trigger, start='S1')
00802         # not in blastx 2.2.1
00803         # first we make sure we have additional lines to work with, if
00804         # not then the file is done and we don't have a final S2
00805         if not is_blank_line(uhandle.peekline(), allow_spaces=1):
00806             read_and_call(uhandle, consumer.blast_cutoff, start='S2')
00807 
00808         consumer.end_parameters()

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner._scan_rounds (   self,
  uhandle,
  consumer 
) [private]

Definition at line 245 of file NCBIStandalone.py.

00245 
00246     def _scan_rounds(self, uhandle, consumer):
00247         # Scan a bunch of rounds.
00248         # Each round begins with either a "Searching......" line
00249         # or a 'Score     E' line followed by descriptions and alignments.
00250         # The email server doesn't give the "Searching....." line.
00251         # If there is no 'Searching.....' line then you'll first see a 
00252         # 'Results from round' line
00253 
00254         while not self._eof(uhandle):
00255             line = safe_peekline(uhandle)
00256             if (not line.startswith('Searching') and
00257                 not line.startswith('Results from round') and
00258                 re.search(r"Score +E", line) is None and
00259                 line.find('No hits found') == -1):
00260                 break
00261             self._scan_descriptions(uhandle, consumer)
00262             self._scan_alignments(uhandle, consumer)

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Blast.NCBIStandalone._Scanner.feed (   self,
  handle,
  consumer 
)
S.feed(handle, consumer)

Feed in a BLAST report for scanning.  handle is a file-like
object that contains the BLAST report.  consumer is a Consumer
object that will receive events as the report is scanned.

Definition at line 99 of file NCBIStandalone.py.

00099 
00100     def feed(self, handle, consumer):
00101         """S.feed(handle, consumer)
00102 
00103         Feed in a BLAST report for scanning.  handle is a file-like
00104         object that contains the BLAST report.  consumer is a Consumer
00105         object that will receive events as the report is scanned.
00106 
00107         """
00108         if isinstance(handle, File.UndoHandle):
00109             uhandle = handle
00110         else:
00111             uhandle = File.UndoHandle(handle)
00112 
00113         # Try to fast-forward to the beginning of the blast report.
00114         read_and_call_until(uhandle, consumer.noevent, contains='BLAST')
00115         # Now scan the BLAST report.
00116         self._scan_header(uhandle, consumer)
00117         self._scan_rounds(uhandle, consumer)
00118         self._scan_database_report(uhandle, consumer)
00119         self._scan_parameters(uhandle, consumer)

Here is the call graph for this function:

Here is the caller graph for this function:


The documentation for this class was generated from the following file: