Back to index

python-biopython  1.60
FSSPTools.py
Go to the documentation of this file.
00001 from Bio import FSSP
00002 import copy
00003 from Bio.Align import Generic
00004 from Bio import Alphabet
00005 import time
00006 
00007 class FSSPAlign(Generic.Alignment):
00008     def _add_numbering_table(self, new_record):
00009         new_record.annotations['abs2pdb'] = {}
00010         new_record.annotations['pdb2abs'] = {}
00011 
00012    
00013 class FSSPMultAlign(dict):
00014     def __init__(self):
00015         self.abs_res = []
00016         self.pdb_res = []
00017         self.data = {}
00018 def mult_align(sum_dict,align_dict):
00019    """Returns a biopython multiple alignment instance (Bio.Align.Generic)"""
00020    mult_align_dict = {}
00021    for j in align_dict.abs(1).pos_align_dict:
00022       mult_align_dict[j] = ''
00023    
00024    for i in range(1,len(align_dict)+1):
00025       # loop on positions
00026       for j in align_dict.abs(i).pos_align_dict:
00027          # loop within a position
00028          mult_align_dict[j] += align_dict.abs(i).pos_align_dict[j].aa
00029    seq_order = mult_align_dict.keys()
00030    seq_order.sort()
00031    fssp_align = Generic.Alignment(Alphabet.Gapped(
00032                                   Alphabet.IUPAC.extended_protein))
00033    for i in seq_order:
00034       fssp_align.add_sequence(sum_dict[i].pdb2+sum_dict[i].chain2,
00035                                  mult_align_dict[i])
00036 #        fssp_align._add_numbering_table()
00037    return fssp_align
00038 
00039 
00040 # Several routines used to extract information from FSSP sections
00041 # filter:
00042 # filters a passed summary section and alignment section according to a numeric
00043 # attribute in the summary section. Returns new summary and alignment sections
00044 # For example, to filter in only  those records which have a zscore greater than
00045 # 4.0 and lesser than 7.5:
00046 # new_sum, new_align = filter(sum, align, 'zscore', 4, 7.5)
00047 #
00048 # Warning: this function really slows down when filtering large FSSP files.
00049 # The reason is the use of copy.deepcopy() to copy align_dict into
00050 # new_align_dict. I have to figure out something better.
00051 # Took me ~160 seconds for the largest FSSP file (1reqA.fssp)
00052 #
00053 
00054 def filter(sum_dict,align_dict,filter_attribute,low_bound, high_bound):
00055    """filters a passed summary section and alignment section according to a numeric
00056    attribute in the summary section. Returns new summary and alignment sections"""
00057    new_sum_dict = FSSP.FSSPSumDict()
00058    new_align_dict = copy.deepcopy(align_dict)
00059 #   for i in align_dict:
00060 #      new_align_dict[i]  = copy.copy(align_dict[i])
00061    # new_align_dict = copy.copy(align_dict)
00062    for prot_num in sum_dict:
00063       attr_value = getattr(sum_dict[prot_num],filter_attribute)
00064       if (attr_value >= low_bound and
00065           attr_value <= high_bound):
00066          new_sum_dict[prot_num] = sum_dict[prot_num]
00067    prot_numbers = new_sum_dict.keys()
00068    prot_numbers.sort()
00069    for pos_num in new_align_dict.abs_res_dict:
00070       new_align_dict.abs(pos_num).pos_align_dict = {}
00071       for prot_num in prot_numbers:
00072          new_align_dict.abs(pos_num).pos_align_dict[prot_num] = \
00073                    align_dict.abs(pos_num).pos_align_dict[prot_num]
00074    return new_sum_dict, new_align_dict
00075 
00076 def name_filter(sum_dict, align_dict, name_list):
00077    """ Accepts a list of names. Returns a new Summary block and Alignment block which
00078        contain the info only for those names passed."""
00079    new_sum_dict = FSSP.FSSPSumDict()
00080    new_align_dict = copy.deepcopy(align_dict)
00081    for cur_pdb_name in name_list:
00082       for prot_num in sum_dict:
00083          if sum_dict[prot_num].pdb2+sum_dict[prot_num].chain2 == cur_pdb_name:
00084             new_sum_dict[prot_num] = sum_dict[prot_num]
00085    prot_numbers = new_sum_dict.keys()
00086    prot_numbers.sort()
00087    for pos_num in new_align_dict.abs_res_dict:
00088       new_align_dict.abs(pos_num).pos_align_dict = {}
00089       for prot_num in prot_numbers:
00090          new_align_dict.abs(pos_num).pos_align_dict[prot_num] = \
00091                    align_dict.abs(pos_num).pos_align_dict[prot_num]
00092    return new_sum_dict, new_align_dict
00093