Back to index

python-biopython  1.60
find_parser_problems.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """Find GenBank records that the parser has problems with within a big file.
00003 
00004 This is meant to make it easy to get accession numbers for records that
00005 don't parse properly.
00006 
00007 Usage:
00008 find_parser_problems.py <GenBank file to parse>
00009 """
00010 # standard library
00011 import sys
00012 
00013 # GenBank
00014 from Bio import GenBank
00015 
00016 verbose = 0
00017 
00018 if len(sys.argv) != 2:
00019     print "Usage ./find_parser_problems <GenBank file to parse>"
00020     sys.exit()
00021 
00022 feature_parser = GenBank.FeatureParser(debug_level = 0)
00023 parser = GenBank.ErrorParser(feature_parser)
00024 
00025 handle = open(sys.argv[1], 'r')
00026 iterator = GenBank.Iterator(handle, parser, has_header = 1)
00027 
00028 while 1:
00029     have_record = 0
00030     
00031     while have_record == 0:
00032         try:
00033             cur_record = iterator.next()
00034             have_record = 1
00035         except GenBank.ParserFailureError, msg:
00036             print "Parsing Problem:", msg
00037             sys.exit()
00038 
00039     if cur_record is None:
00040         break
00041 
00042     print "Successfully parsed record", cur_record.id
00043     
00044     if verbose:
00045         print "***Record"
00046         print "Seq:", cur_record.seq
00047         print "Id:", cur_record.id
00048         print "Name:", cur_record.name
00049         print "Description", cur_record.description
00050         print "Annotations", cur_record.annotations
00051         print "Feaures"
00052         for feature in cur_record.features:
00053             print feature
00054                 
00055 handle.close()
00056     
00057 
00058