Back to index

python-biopython  1.60
FragmentMapper.py
Go to the documentation of this file.
00001 # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """Classify protein backbone structure according to Kolodny et al's fragment
00007 libraries.
00008 
00009 It can be regarded as a form of objective secondary structure classification.
00010 Only fragments of length 5 or 7 are supported (ie. there is a 'central'
00011 residue).
00012 
00013 Full reference:
00014 
00015 Kolodny R, Koehl P, Guibas L, Levitt M.
00016 Small libraries of protein fragments model native protein structures accurately.
00017 J Mol Biol. 2002 323(2):297-307.
00018 
00019 The definition files of the fragments can be obtained from:
00020 
00021 U{http://csb.stanford.edu/~rachel/fragments/}
00022 
00023 You need these files to use this module.
00024 
00025 The following example uses the library with 10 fragments of length 5.
00026 The library files can be found in directory 'fragment_data'.
00027 
00028     >>> model = structure[0]
00029     >>> fm = FragmentMapper(model, lsize=10, flength=5, dir="fragment_data")
00030     >>> fragment = fm[residue]
00031 """
00032 
00033 import numpy
00034 
00035 from Bio.SVDSuperimposer import SVDSuperimposer
00036 
00037 from Bio.PDB import Selection
00038 from Bio.PDB.PDBExceptions import PDBException
00039 from Bio.PDB.PDBParser import PDBParser
00040 from Bio.PDB.Polypeptide import PPBuilder
00041 
00042 
00043 # fragment file (lib_SIZE_z_LENGTH.txt)
00044 # SIZE=number of fragments
00045 # LENGTH=length of fragment (4,5,6,7)
00046 _FRAGMENT_FILE="lib_%s_z_%s.txt"
00047 
00048 
00049 def _read_fragments(size, length, dir="."):
00050     """
00051     Read a fragment spec file (available from 
00052     U{http://csb.stanford.edu/rachel/fragments/} 
00053     and return a list of Fragment objects.
00054 
00055     @param size: number of fragments in the library
00056     @type size: int
00057 
00058     @param length: length of the fragments
00059     @type length: int
00060 
00061     @param dir: directory where the fragment spec files can be found
00062     @type dir: string
00063     """
00064     filename=(dir+"/"+_FRAGMENT_FILE) % (size, length)
00065     fp=open(filename, "r")
00066     flist=[]
00067     # ID of fragment=rank in spec file
00068     fid=0
00069     for l in fp.readlines():
00070                 # skip comment and blank lines
00071         if l[0]=="*" or l[0]=="\n":
00072             continue
00073         sl=l.split()
00074         if sl[1]=="------":
00075             # Start of fragment definition
00076             f=Fragment(length, fid)
00077             flist.append(f)
00078             # increase fragment id (rank)
00079             fid+=1
00080             continue
00081         # Add CA coord to Fragment
00082         coord=numpy.array(map(float, sl[0:3]))
00083         # XXX= dummy residue name
00084         f.add_residue("XXX", coord)
00085     fp.close()
00086     return flist
00087 
00088 
00089 class Fragment(object):
00090     """
00091     Represent a polypeptide C-alpha fragment.
00092     """
00093     def __init__(self, length, fid):
00094         """
00095         @param length: length of the fragment
00096         @type length: int
00097 
00098         @param fid: id for the fragment
00099         @type fid: int
00100         """
00101         # nr of residues in fragment
00102         self.length=length
00103         # nr of residues added
00104         self.counter=0
00105         self.resname_list=[]
00106         # CA coordinate matrix
00107         self.coords_ca=numpy.zeros((length, 3), "d")
00108         self.fid=fid
00109 
00110     def get_resname_list(self):
00111         """
00112         @return: the residue names
00113         @rtype: [string, string,...]
00114         """
00115         return self.resname_list
00116 
00117     def get_id(self):
00118         """
00119         @return: id for the fragment
00120         @rtype: int
00121         """
00122         return self.fid
00123 
00124     def get_coords(self):
00125         """
00126         @return: the CA coords in the fragment
00127         @rtype: Numeric (Nx3) array
00128         """
00129         return self.coords_ca
00130 
00131     def add_residue(self, resname, ca_coord):
00132         """
00133         @param resname: residue name (eg. GLY).
00134         @type resname: string
00135 
00136         @param ca_coord: the c-alpha coorinates of the residues
00137         @type ca_coord: Numeric array with length 3
00138         """
00139         if self.counter>=self.length:
00140             raise PDBException("Fragment boundary exceeded.")
00141         self.resname_list.append(resname)
00142         self.coords_ca[self.counter]=ca_coord
00143         self.counter=self.counter+1
00144 
00145     def __len__(self):
00146         """
00147         @return: length of fragment
00148         @rtype: int
00149         """
00150         return self.length
00151 
00152     def __sub__(self, other):
00153         """
00154         Return rmsd between two fragments.
00155 
00156         Example:
00157             >>> rmsd=fragment1-fragment2
00158 
00159         @return: rmsd between fragments
00160         @rtype: float
00161         """
00162         sup=SVDSuperimposer()
00163         sup.set(self.coords_ca, other.coords_ca)
00164         sup.run()
00165         return sup.get_rms()
00166 
00167     def __repr__(self):
00168         """
00169         Returns <Fragment length=L id=ID> where L=length of fragment
00170         and ID the identifier (rank in the library).
00171         """
00172         return "<Fragment length=%i id=%i>" % (self.length, self.fid)
00173 
00174 
00175 def _make_fragment_list(pp, length):
00176     """
00177     Dice up a peptide in fragments of length "length".
00178 
00179     @param pp: a list of residues (part of one peptide)
00180     @type pp: [L{Residue}, L{Residue}, ...]
00181 
00182     @param length: fragment length
00183     @type length: int
00184     """
00185     frag_list=[]
00186     for i in range(0, len(pp)-length+1):
00187         f=Fragment(length, -1)
00188         for j in range(0, length):
00189             residue=pp[i+j]
00190             resname=residue.get_resname()
00191             if residue.has_id("CA"):
00192                 ca=residue["CA"]
00193             else:
00194                 raise PDBException("CHAINBREAK")
00195             if ca.is_disordered():
00196                 raise PDBException("CHAINBREAK")
00197             ca_coord=ca.get_coord()
00198             f.add_residue(resname, ca_coord)
00199         frag_list.append(f)
00200     return frag_list
00201 
00202 
00203 def _map_fragment_list(flist, reflist):
00204     """
00205     Map all frgaments in flist to the closest
00206     (in RMSD) fragment in reflist.
00207 
00208     Returns a list of reflist indices.
00209 
00210     @param flist: list of protein fragments
00211     @type flist: [L{Fragment}, L{Fragment}, ...]
00212 
00213     @param reflist: list of reference (ie. library) fragments
00214     @type reflist: [L{Fragment}, L{Fragment}, ...]
00215     """
00216     mapped=[]
00217     for f in flist:
00218         rank=[]
00219         for i in range(0, len(reflist)):
00220             rf=reflist[i]
00221             rms=f-rf
00222             rank.append((rms, rf))
00223         rank.sort()
00224         fragment=rank[0][1]
00225         mapped.append(fragment)
00226     return mapped
00227 
00228 
00229 class FragmentMapper(object):
00230     """
00231     Map polypeptides in a model to lists of representative fragments.
00232     """
00233     def __init__(self, model, lsize=20, flength=5, fdir="."):
00234         """
00235         @param model: the model that will be mapped
00236         @type model: L{Model}
00237 
00238         @param lsize: number of fragments in the library
00239         @type lsize: int
00240 
00241         @param flength: length of fragments in the library
00242         @type flength: int
00243 
00244         @param fdir: directory where the definition files are
00245             found (default=".")
00246         @type fdir: string
00247         """
00248         if flength==5:
00249             self.edge=2
00250         elif flength==7:
00251             self.edge=3
00252         else:
00253             raise PDBException("Fragment length should be 5 or 7.")
00254         self.flength=flength
00255         self.lsize=lsize
00256         self.reflist=_read_fragments(lsize, flength, fdir)
00257         self.model=model
00258         self.fd=self._map(self.model)
00259 
00260     def _map(self, model):
00261         """
00262         @param model: the model that will be mapped
00263         @type model: L{Model}
00264         """
00265         ppb=PPBuilder()
00266         ppl=ppb.build_peptides(model)
00267         fd={}
00268         for pp in ppl:
00269             try:
00270                 # make fragments
00271                 flist=_make_fragment_list(pp, self.flength)
00272                 # classify fragments
00273                 mflist=_map_fragment_list(flist, self.reflist)
00274                 for i in range(0, len(pp)):
00275                     res=pp[i]
00276                     if i<self.edge:
00277                         # start residues
00278                         continue
00279                     elif i>=(len(pp)-self.edge):
00280                         # end residues
00281                         continue
00282                     else:
00283                         # fragment
00284                         index=i-self.edge
00285                         assert(index>=0)
00286                         fd[res]=mflist[index]
00287             except PDBException, why:
00288                 if why == 'CHAINBREAK':
00289                     # Funny polypeptide - skip
00290                     pass
00291                 else:
00292                     raise PDBException(why)
00293         return fd
00294 
00295     def has_key(self, res):
00296         """(Obsolete)
00297 
00298         @type res: L{Residue}
00299         """
00300         import warnings
00301         warnings.warn("has_key is obsolete; use 'res in object' instead", PendingDeprecationWarning)
00302         return (res in self)
00303 
00304     def __contains__(self, res):
00305         """True if the given residue is in any of the mapped fragments.
00306 
00307         @type res: L{Residue}
00308         """
00309         return (res in self.fd)
00310 
00311     def __getitem__(self, res):
00312         """
00313         @type res: L{Residue}
00314 
00315         @return: fragment classification
00316         @rtype: L{Fragment}
00317         """
00318         return self.fd[res]
00319 
00320 
00321 if __name__=="__main__":
00322 
00323     import sys
00324 
00325     p=PDBParser()
00326     s=p.get_structure("X", sys.argv[1])
00327 
00328     m=s[0]
00329     fm=FragmentMapper(m, 10, 5, "levitt_data")
00330 
00331 
00332     for r in Selection.unfold_entities(m, "R"):
00333 
00334         print r,
00335         if r in fm:
00336             print fm[r]
00337         else:
00338             print
00339