Back to index

python-biopython  1.60
Classes | Functions | Variables
debug_blast_parser Namespace Reference

Classes

class  DebuggingConsumer

Functions

def chomp
def choose_parser
def test_blast_output

Variables

int CONTEXT = 5
string USAGE
int VERBOSITY = 0
 PROTEIN = NUCLEOTIDEOUTPUTNone
int NUCLEOTIDE = 1
int OUTPUT = 1

Function Documentation

def debug_blast_parser.chomp (   line)

Definition at line 55 of file debug_blast_parser.py.

00055 
00056 def chomp(line):
00057     return re.sub(r"[\r\n]*$", "", line)

Here is the caller graph for this function:

Definition at line 58 of file debug_blast_parser.py.

00058 
00059 def choose_parser(outfile):
00060     data = open(outfile).read()
00061     ldata = data.lower()
00062     if ldata.find("<html>") >= 0 or ldata.find("<pre>") >= 0:
00063         return NCBIWWW.BlastParser
00064     if ldata.find("results from round") >= 0 or ldata.find("converged!") >= 0:
00065         return NCBIStandalone.PSIBlastParser
00066     return NCBIStandalone.BlastParser

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 67 of file debug_blast_parser.py.

00067 
00068 def test_blast_output(outfile):
00069     # Try to auto-detect the format
00070     if 1:
00071         print "No parser specified.  I'll try to choose one for you based"
00072         print "on the format of the output file."
00073         print
00074         
00075         parser_class = choose_parser(outfile)
00076         print "It looks like you have given output that should be parsed"
00077         print "with %s.%s.  If I'm wrong, you can select the correct parser" %\
00078               (parser_class.__module__, parser_class.__name__)
00079         print "on the command line of this script (NOT IMPLEMENTED YET)."
00080     else:
00081         raise NotImplementedError
00082         parser_class = NCBIWWW.BlastParser
00083         print "Using %s to parse the file." % parser_class.__name__
00084     print
00085 
00086     scanner_class = parser_class()._scanner.__class__
00087     consumer_class = parser_class()._consumer.__class__
00088 
00089     #parser_class()._scanner.feed(
00090     #    open(outfile), ParserSupport.TaggingConsumer())
00091     print "I'm going to run the data through the parser to see what happens..."
00092     parser = parser_class()
00093     try:
00094         rec = parser.parse_file(outfile)
00095     except KeyboardInterrupt, SystemExit:
00096         raise
00097     except Exception, x:
00098         exception_info = str(x)
00099         print "Dang, the parsing failed."
00100     else:
00101         print "Parsing succeeded, no problems detected."
00102         print "However, you should check to make sure the following scanner"
00103         print "trace looks reasonable."
00104         print
00105         parser_class()._scanner.feed(
00106             open(outfile), ParserSupport.TaggingConsumer())
00107         return 0
00108     print
00109 
00110     print "Alright.  Let me try and figure out where in the parser the"
00111     print "problem occurred..."
00112     etype, value, tb = sys.exc_info()
00113     ftb = traceback.extract_tb(tb)
00114     ftb.reverse()
00115     class_found = None
00116     for err_file, err_line, err_function, err_text in ftb:
00117         if hasattr(consumer_class, err_function):
00118             class_found = consumer_class
00119             break
00120         elif hasattr(scanner_class, err_function):
00121             class_found = scanner_class
00122             break
00123     if class_found is None:
00124         print "Sorry, I could not pinpoint the error to the parser."
00125         print "There's nothing more I can tell you."
00126         print "Here's the traceback:"
00127         traceback.print_exception(etype, value, tb)
00128         return 1
00129     else:
00130         print "I found the problem in %s.%s.%s, line %d:" % \
00131               (class_found.__module__, class_found.__name__,
00132                err_function, err_line)
00133         print "    %s" % err_text
00134         print "This output caused an %s to be raised with the" % etype
00135         print "information %r." % exception_info
00136     print
00137 
00138     print "Let me find the line in the file that triggers the problem..."
00139     parser = parser_class()
00140     scanner, consumer = parser._scanner, parser._consumer
00141     consumer = DebuggingConsumer(consumer)
00142     try:
00143         scanner.feed(open(outfile), consumer)
00144     except etype, x:
00145         pass
00146     else:
00147         print "Odd, the exception disappeared!  What happened?"
00148         return 3
00149     print "It's caused by line %d:" % consumer.linenum
00150     lines = open(outfile).readlines()
00151     start, end = consumer.linenum-CONTEXT, consumer.linenum+CONTEXT+1
00152     if start < 0:
00153         start = 0
00154     if end > len(lines):
00155         end = len(lines)
00156     ndigits = len(str(end))
00157     for linenum in range(start, end):
00158         line = chomp(lines[linenum])
00159         if linenum == consumer.linenum:
00160             prefix = '*'
00161         else:
00162             prefix = ' '
00163         
00164         s = "%s%*d %s" % (prefix, ndigits, linenum, line)
00165         s = s[:80]
00166         print s
00167     print
00168 
00169     if class_found == scanner_class:
00170         print "Problems in %s are most likely caused by changed formats." % \
00171               class_found.__name__
00172         print "You can start to fix this by going to line %d in module %s." % \
00173               (err_line, class_found.__module__)
00174         print "Perhaps the scanner needs to be made more lenient by accepting"
00175         print "the changed format?"
00176         print
00177 
00178         if VERBOSITY <= 0:
00179             print "For more help, you can run this script in verbose mode"
00180             print "to see detailed information about how the scanner"
00181             print "identifies each line."
00182         else:
00183             print "OK, let's see what the scanner's doing!"
00184             print
00185             print "*"*20 + " BEGIN SCANNER TRACE " + "*"*20
00186             try:
00187                 parser_class()._scanner.feed(
00188                     open(outfile), ParserSupport.TaggingConsumer())
00189             except etype, x:
00190                 pass
00191             print "*"*20 + " END SCANNER TRACE " + "*"*20
00192         print
00193             
00194     elif class_found == consumer_class:
00195         print "Problems in %s can be caused by two things:" % \
00196               class_found.__name__
00197         print "    - The format of the line parsed by '%s' changed." % \
00198               err_function
00199         print "    - The scanner misidentified the line."
00200         print "Check to make sure '%s' should parse the line:" % \
00201               err_function
00202         s = "    %s" % chomp(lines[consumer.linenum])
00203         s = s[:80]
00204         print s
00205         print "If so, debug %s.%s.  Otherwise, debug %s." % \
00206               (class_found.__name__, err_function, scanner_class.__name__)
00207     

Here is the call graph for this function:


Variable Documentation

Definition at line 16 of file debug_blast_parser.py.

Definition at line 231 of file debug_blast_parser.py.

Definition at line 233 of file debug_blast_parser.py.

int debug_blast_parser.PROTEIN = NUCLEOTIDEOUTPUTNone

Definition at line 223 of file debug_blast_parser.py.

Initial value:
00001 """%s [-h] [-v] [-p] [-n] [-o] <testfile>
00002 
00003 This script helps diagnose problems with the BLAST parser.
00004 
00005 OPTIONS:
00006 
00007 -h    Show this help file.
00008 
00009 -v    Verbose output.
00010 
00011 -p    <testfile> is a protein file.
00012 
00013 -n    <testfile> is a nucleotide file.
00014 
00015 -o    <testfile> is a BLAST output file.
00016 
00017 """

Definition at line 18 of file debug_blast_parser.py.

Definition at line 208 of file debug_blast_parser.py.