Back to index

python-biopython  1.60
StructureAlignment.py
Go to the documentation of this file.
00001 # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """Map the residues of two structures to each other based on a FASTA alignment
00007 file.
00008 """
00009 
00010 from Bio.SCOP.Raf import to_one_letter_code
00011 
00012 from Bio.PDB import Selection
00013 from Bio.PDB.Polypeptide import is_aa
00014 
00015 
00016 class StructureAlignment(object):
00017     """
00018     This class aligns two structures based on an alignment of their
00019     sequences.
00020     """
00021     def __init__(self, fasta_align, m1, m2, si=0, sj=1):
00022         """
00023         fasta_align --- Alignment object 
00024         m1, m2 --- two models
00025         si, sj --- the sequences in the Alignment object that
00026                 correspond to the structures
00027         """
00028         l=fasta_align.get_alignment_length()
00029         s1=fasta_align.get_seq_by_num(si)
00030         s2=fasta_align.get_seq_by_num(sj)
00031         # Get the residues in the models
00032         rl1=Selection.unfold_entities(m1, 'R')
00033         rl2=Selection.unfold_entities(m2, 'R')
00034         # Residue positions
00035         p1=0
00036         p2=0
00037         # Map equivalent residues to each other
00038         map12={}
00039         map21={}
00040         # List of residue pairs (None if -)
00041         duos=[]
00042         for i in range(0, l):
00043             column=fasta_align.get_column(i)
00044             aa1=column[si]
00045             aa2=column[sj]
00046             if aa1!="-":
00047                 # Position in seq1 is not -
00048                 while 1:
00049                     # Loop until an aa is found
00050                     r1=rl1[p1]
00051                     p1=p1+1
00052                     if is_aa(r1):
00053                         break
00054                 self._test_equivalence(r1, aa1)
00055             else:
00056                 r1=None
00057             if aa2!="-":
00058                 # Position in seq2 is not -
00059                 while 1:
00060                     # Loop until an aa is found
00061                     r2=rl2[p2]
00062                     p2=p2+1
00063                     if is_aa(r2):
00064                         break
00065                 self._test_equivalence(r2, aa2)
00066             else:
00067                 r2=None
00068             if r1:
00069                 # Map residue in seq1 to its equivalent in seq2
00070                 map12[r1]=r2
00071             if r2:
00072                 # Map residue in seq2 to its equivalent in seq1
00073                 map21[r2]=r1
00074             # Append aligned pair (r is None if gap)
00075             duos.append((r1, r2))
00076         self.map12=map12
00077         self.map21=map21
00078         self.duos=duos
00079 
00080     def _test_equivalence(self, r1, aa1):
00081         "Test if aa in sequence fits aa in structure."
00082         resname=r1.get_resname()
00083         resname=to_one_letter_code[resname]
00084         assert(aa1==resname)
00085 
00086     def get_maps(self):
00087         """
00088         Return two dictionaries that map a residue in one structure to 
00089         the equivealent residue in the other structure.
00090         """
00091         return self.map12, self.map21
00092 
00093     def get_iterator(self):
00094         """
00095         Iterator over all residue pairs.
00096         """
00097         for i in range(0, len(self.duos)):
00098             yield self.duos[i]
00099 
00100 
00101 if __name__=="__main__":
00102     import sys
00103     from Bio.Alphabet import generic_protein
00104     from Bio import AlignIO
00105     from Bio.PDB import PDBParser
00106 
00107     if len(sys.argv) != 4:
00108         print "Expects three arguments,"
00109         print " - FASTA alignment filename (expect two sequences)"
00110         print " - PDB file one"
00111         print " - PDB file two"
00112         sys.exit()
00113 
00114     # The alignment
00115     fa=AlignIO.read(open(sys.argv[1]), "fasta", generic_protein)
00116 
00117     pdb_file1=sys.argv[2]
00118     pdb_file2=sys.argv[3]
00119 
00120     # The structures
00121     p=PDBParser()
00122     s1=p.get_structure('1', pdb_file1)
00123     p=PDBParser()
00124     s2=p.get_structure('2', pdb_file2)
00125 
00126     # Get the models
00127     m1=s1[0]
00128     m2=s2[0]
00129 
00130     al=StructureAlignment(fa, m1, m2)
00131 
00132     # Print aligned pairs (r is None if gap)
00133     for (r1,r2) in al.get_iterator():
00134         print r1, r2
00135