Back to index

python-biopython  1.60
ParseBlastTable.py
Go to the documentation of this file.
00001 # Copyright 2003 Iddo Friedberg. All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """A parser for the NCBI blastpgp version 2.2.5 output format. Currently only supports
00007 the '-m 9' option, (table w/ annotations).
00008 Returns a BlastTableRec instance
00009 """
00010 
00011 class BlastTableEntry(object):
00012    def __init__(self,in_rec):
00013       bt_fields = in_rec.split()
00014       self.qid = bt_fields[0].split('|')
00015       self.sid = bt_fields[1].split('|')
00016       self.pid = float(bt_fields[2])
00017       self.ali_len = int(bt_fields[3])
00018       self.mis = int(bt_fields[4])
00019       self.gaps = int(bt_fields[5])
00020       self.q_bounds = (int(bt_fields[6]), int(bt_fields[7]))
00021       self.s_bounds = (int(bt_fields[8]), int(bt_fields[9]))
00022       self.e_value = float(bt_fields[10])
00023       self.bit_score = float(bt_fields[11])
00024       
00025 class BlastTableRec(object):
00026    def __init__(self):
00027       self.program = None
00028       self.version = None
00029       self.date = None
00030       self.iteration = None
00031       self.query = None
00032       self.database = None
00033       self.entries = []
00034    def add_entry(self, entry):
00035       self.entries.append(entry)
00036 
00037 reader_keywords = {'BLASTP': 'version',
00038                    'Iteration': 'iteration',
00039                    'Query': 'query',
00040                    'Database': 'database',
00041                    'Fields': 'fields'}
00042 class BlastTableReader(object):
00043    def __init__(self, handle):
00044       self.handle = handle
00045       inline = self.handle.readline()
00046       # zip forward to start of record
00047       while inline and inline.find('BLASTP') == -1:
00048          inline = self.handle.readline()
00049       self._lookahead = inline
00050       self._n = 0
00051       self._in_header = 1
00052    def next(self):
00053       self.table_record = BlastTableRec()
00054       self._n += 1
00055       inline = self._lookahead
00056       if not inline:
00057          return None
00058       while inline:
00059          if inline[0] == '#':
00060             if self._in_header:
00061                self._in_header = self._consume_header(inline)
00062             else:
00063                break
00064          else:
00065             self._consume_entry(inline)
00066             self._in_header = 0
00067 
00068          inline = self.handle.readline()
00069       self._lookahead = inline
00070       self._in_header = 1
00071       return self.table_record
00072          
00073    def _consume_entry(self, inline):
00074       current_entry = BlastTableEntry(inline)
00075       self.table_record.add_entry(current_entry)
00076    def _consume_header(self, inline):
00077       for keyword in reader_keywords:
00078          if inline.find(keyword) > -1:
00079             in_header = self._Parse('_parse_%s' % reader_keywords[keyword],inline)
00080             break
00081       return in_header
00082    def _parse_version(self, inline):
00083       program, version, date = inline.split()[1:]
00084       self.table_record.program = program
00085       self.table_record.version = version
00086       self.table_record.date = date
00087       return 1
00088    def _parse_iteration(self, inline):
00089       self.table_record.iteration = int(inline.split()[2])
00090       return 1
00091    def _parse_query(self, inline):
00092       self.table_record.query = inline.split()[2:]
00093       return 1
00094    def _parse_database(self, inline):
00095       self.table_record.database = inline.split()[2]
00096       return 1
00097    def _parse_fields(self, inline):
00098       return 0
00099    def _Parse(self, method_name, inline):
00100       return getattr(self,method_name)(inline)
00101       
00102 
00103