Back to index

python-biopython  1.60
Cla.py
Go to the documentation of this file.
00001 # Copyright 2001 by Gavin E. Crooks.  All rights reserved.
00002 # Modifications Copyright 2010 Jeffrey Finkelstein. All rights reserved.
00003 #
00004 # This code is part of the Biopython distribution and governed by its
00005 # license.  Please see the LICENSE file that should have been included
00006 # as part of this package.
00007 
00008 """ Handle the SCOP CLAssification file, which describes SCOP domains.
00009 
00010 The file format is described in the scop
00011 "release notes.":http://scop.mrc-lmb.cam.ac.uk/scop/release-notes.html
00012 The latest CLA file can be found
00013 "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
00014   
00015 "Release 1.73": http://scop.mrc-lmb.cam.ac.uk/scop/parse/dir.cla.scop.txt_1.73
00016 (July 2008)
00017 
00018 """
00019 
00020 
00021 
00022 from Residues import * 
00023 
00024 
00025 class Record(object):
00026     """Holds information for one SCOP domain.
00027 
00028     sid         --  SCOP identifier. e.g. d1danl2
00029 
00030     residues    --  The domain definition as a Residues object
00031 
00032     sccs        --  SCOP concise classification strings.  e.g. b.1.2.1
00033 
00034     sunid       --  SCOP unique identifier for this domain
00035 
00036     hierarchy   --  A dictionary, keys are nodetype, values are sunid,
00037                     describing the location of this domain in the SCOP
00038                     hierarchy. See the Scop module for a description of
00039                     nodetypes. This used to be a list of (key,value) tuples
00040                     in older versions of Biopython (see Bug 3109).
00041     """
00042     def __init__(self, line=None):
00043         self.sid = ''
00044         self.residues = None 
00045         self.sccs = ''
00046         self.sunid =''
00047         self.hierarchy = {}
00048         if line:
00049             self._process(line)
00050         
00051     def _process(self, line):
00052         line = line.rstrip()         # no trailing whitespace
00053         columns = line.split('\t')   # separate the tab-delineated cols
00054         if len(columns) != 6:
00055             raise ValueError("I don't understand the format of %s" % line)
00056         
00057         self.sid, pdbid, residues, self.sccs, self.sunid, hierarchy = columns
00058         self.residues = Residues(residues)
00059         self.residues.pdbid = pdbid
00060         self.sunid = int(self.sunid)
00061         
00062         for ht in hierarchy.split(","):
00063             key, value = ht.split('=')
00064             self.hierarchy[key] = int(value)
00065 
00066     def __str__(self):
00067         s = []
00068         s.append(self.sid)
00069         s += str(self.residues).split(" ")
00070         s.append(self.sccs)
00071         s.append(self.sunid)
00072 
00073         s.append(','.join('='.join((key, str(value))) for key, value
00074                           in self.hierarchy.iteritems()))
00075 
00076         return "\t".join(map(str,s)) + "\n"
00077 
00078 
00079 def parse(handle):
00080     """Iterates over a CLA file, returning a Cla record for each line
00081     in the file.
00082 
00083     Arguments:
00084 
00085         handle -- file-like object.
00086     """
00087     for line in handle:
00088         if line.startswith('#'):
00089             continue
00090         yield Record(line)
00091 
00092 
00093 class Index(dict):
00094     """A CLA file indexed by SCOP identifiers, allowing rapid
00095        random access into a file."""
00096     def __init__(self, filename):
00097         """
00098         Arguments:
00099         
00100           filename  -- The file to index
00101         """
00102         dict.__init__(self)
00103         self.filename = filename
00104         f = open(self.filename, "rU")
00105         try:
00106             position = 0
00107             while True:
00108                 line = f.readline()
00109                 if not line: break
00110                 if line.startswith('#'):
00111                     continue
00112                 record = Record(line)
00113                 key = record.sid
00114                 if key != None:
00115                     self[key] = position
00116                 position = f.tell()
00117         finally:
00118             f.close()
00119 
00120     def __getitem__(self, key):
00121         """ Return an item from the indexed file. """
00122         position = dict.__getitem__(self,key)
00123 
00124         f = open(self.filename, "rU")
00125         try:
00126             f.seek(position)
00127             line = f.readline()
00128             record = Record(line)
00129         finally:
00130             f.close()
00131         return record