Back to index

python-biopython  1.60
PSEA.py
Go to the documentation of this file.
00001 # Copyright (C) 2006, Thomas Hamelryck (thamelry@binf.ku.dk)
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """Wrappers for PSEA, a program for secondary structure assignment.
00007 
00008 See this citation for P-SEA, PMID: 9183534
00009 
00010 Labesse G, Colloc'h N, Pothier J, Mornon J-P:  P-SEA: a new efficient
00011 assignment of secondary structure from C_alpha.
00012 Comput Appl Biosci 1997 , 13:291-295
00013 
00014 ftp://ftp.lmcp.jussieu.fr/pub/sincris/software/protein/p-sea/
00015 """
00016 
00017 import os
00018 
00019 from Bio.PDB.Polypeptide import is_aa
00020 
00021 
00022 def run_psea(fname):
00023     """Run PSEA and return output filename.
00024     
00025     Note that this assumes the P-SEA binary is called "psea" and that it is
00026     on the path.
00027     
00028     Note that P-SEA will write an output file in the current directory using
00029     the input filename with extension ".sea".
00030     
00031     Note that P-SEA will write output to the terminal while run.
00032     """
00033     os.system("psea "+fname)
00034     last=fname.split("/")[-1]
00035     base=last.split(".")[0]
00036     return base+".sea"
00037 
00038 def psea(pname):
00039     """Parse PSEA output file."""
00040     fname=run_psea(pname)
00041     start=0
00042     ss=""
00043     fp=open(fname, 'r')
00044     for l in fp.readlines():
00045         if l[0:6]==">p-sea":
00046             start=1
00047             continue
00048         if not start:
00049             continue
00050         if l[0]=="\n":
00051             break
00052         ss=ss+l[0:-1]
00053     fp.close()
00054     return ss
00055 
00056 def psea2HEC(pseq):
00057     """Translate PSEA secondary structure string into HEC."""
00058     seq=[]
00059     for ss in pseq:
00060         if ss=="a":
00061             n="H"
00062         elif ss=="b":
00063             n="E"
00064         elif ss=="c":
00065             n="C"
00066         seq.append(n)
00067     return seq
00068 
00069 def annotate(m, ss_seq):
00070     """Apply seconardary structure information to residues in model."""
00071     c=m.get_list()[0]
00072     all=c.get_list()
00073     residues=[]
00074     # Now remove HOH etc.
00075     for res in all:
00076         if is_aa(res):
00077             residues.append(res)
00078     L=len(residues)
00079     if not (L==len(ss_seq)):
00080         raise ValueError("Length mismatch %i %i" % (L, len(ss_seq)))
00081     for i in range(0, L):
00082         residues[i].xtra["SS_PSEA"]=ss_seq[i]
00083     #os.system("rm "+fname)
00084 
00085 class PSEA(object):
00086     def __init__(self, model, filename):
00087         ss_seq=psea(filename)
00088         ss_seq=psea2HEC(ss_seq)
00089         annotate(model, ss_seq)
00090         self.ss_seq=ss_seq
00091 
00092     def get_seq(self):
00093         """
00094         Return secondary structure string.
00095         """
00096         return self.ss_seq
00097         
00098 
00099 if __name__=="__main__":
00100 
00101     import sys
00102     from Bio.PDB import PDBParser
00103 
00104     # Parse PDB file
00105     p=PDBParser()
00106     s=p.get_structure('X', sys.argv[1])
00107 
00108     # Annotate structure with PSEA sceondary structure info
00109     PSEA(s[0], sys.argv[1])