Back to index

python-biopython  1.60
Des.py
Go to the documentation of this file.
00001 # Copyright 2001 by Gavin E. Crooks.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 
00007 """ Handle the SCOP DEScription file.
00008 
00009 The file format is described in the scop
00010 "release notes.":http://scop.berkeley.edu/release-notes-1.55.html 
00011 The latest DES file can be found
00012 "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
00013   
00014 "Release 1.55":http://scop.berkeley.edu/parse/des.cla.scop.txt_1.55 (July 2001)
00015 """
00016 
00017 
00018 class Record(object):
00019     """Holds information for one node in the SCOP hierarchy.
00020 
00021     sunid       -- SCOP unique identifiers
00022 
00023     nodetype    -- One of 'cl' (class), 'cf' (fold), 'sf' (superfamily),
00024                    'fa' (family), 'dm' (protein), 'sp' (species),
00025                    'px' (domain). Additional node types may be added.
00026 
00027     sccs        -- SCOP concise classification strings. e.g. b.1.2.1
00028 
00029     name        -- The SCOP ID (sid) for domains (e.g. d1anu1),
00030                    currently empty for other node types
00031 
00032     description --  e.g. "All beta proteins","Fibronectin type III", 
00033     
00034     """
00035     def __init__(self, line=None):
00036         self.sunid = ''
00037         self.nodetype = ''
00038         self.sccs = ''
00039         self.name = ''
00040         self.description =''
00041         if line:
00042             self._process(line)
00043         
00044     def _process(self, line):
00045         """Parses DES records.
00046     
00047         Records consist of 5 tab deliminated fields,
00048         sunid, node type, sccs, node name, node description.
00049         """
00050         #For example ::
00051         #
00052         #21953   px      b.1.2.1 d1dan.1 1dan T:,U:91-106
00053         #48724   cl      b       -       All beta proteins
00054         #48725   cf      b.1     -       Immunoglobulin-like beta-sandwich
00055         #49265   sf      b.1.2   -       Fibronectin type III
00056         #49266   fa      b.1.2.1 -       Fibronectin type III
00057 
00058         line = line.rstrip()  # no trailing whitespace
00059         columns = line.split("\t")  # separate the tab-delineated cols
00060         if len(columns) != 5:
00061             raise ValueError("I don't understand the format of %s" % line)
00062         
00063         sunid, self.nodetype, self.sccs, self.name, self.description = columns
00064         if self.name=='-': self.name =''
00065         self.sunid = int(sunid)
00066 
00067 
00068     def __str__(self):
00069         s = []
00070         s.append(self.sunid)
00071         s.append(self.nodetype)        
00072         s.append(self.sccs)        
00073         if self.name:
00074             s.append(self.name)
00075         else:
00076             s.append("-")
00077         s.append(self.description)        
00078         return "\t".join(map(str,s)) + "\n"
00079 
00080 
00081 def parse(handle):
00082     """Iterates over a DES file, returning a Des record for each line
00083     in the file.
00084 
00085     Arguments:
00086         handle -- file-like object
00087     """
00088     for line in handle:
00089         if line.startswith('#'):
00090             continue
00091         yield Record(line)