Back to index

python-biopython  1.60
Hie.py
Go to the documentation of this file.
00001 # Copyright 2001 by Gavin E. Crooks.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 
00007 """ Handle the SCOP HIErarchy files, which describe the SCOP hierarchy in
00008 terms of SCOP unique identifiers (sunid).
00009 
00010 The file format is described in the scop
00011 "release notes.":http://scop.berkeley.edu/release-notes-1.55.html 
00012 The latest HIE file can be found
00013 "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
00014   
00015 "Release 1.55":http://scop.berkeley.edu/parse/dir.hie.scop.txt_1.55 (July 2001)
00016 """
00017 
00018 
00019 class Record(object):
00020     """Holds information for one node in the SCOP hierarchy.
00021 
00022     sunid      -- SCOP unique identifiers of this node
00023 
00024     parent     --  Parents sunid
00025 
00026     children   -- Sequence of childrens sunids
00027     """
00028     def __init__(self, line=None):
00029         self.sunid = ''
00030         self.parent = ''
00031         self.children = []
00032         if line:
00033             self._process(line)
00034 
00035     def _process(self, line):
00036         """Parses HIE records.
00037 
00038         Records consist of 3 tab deliminated fields; node's sunid,
00039         parent's sunid, and a list of children's sunids.
00040         """
00041         #For example ::
00042         #
00043         #0       -       46456,48724,51349,53931,56572,56835,56992,57942
00044         #21953   49268   -
00045         #49267   49266   49268,49269
00046         line = line.rstrip()        # no trailing whitespace
00047         columns = line.split('\t')   # separate the tab-delineated cols
00048         if len(columns) != 3:
00049             raise ValueError("I don't understand the format of %s" % line)
00050         
00051         sunid, parent, children = columns
00052 
00053         if sunid =='-':
00054             self.sunid = ''
00055         else:
00056             self.sunid = int(sunid)
00057 
00058         if parent=='-':
00059             self.parent = ''
00060         else:
00061             self.parent = int(parent)
00062 
00063         if children=='-':
00064             self.children = ()
00065         else:
00066             children = children.split(',')
00067             self.children = map(int, children)
00068 
00069 
00070     def __str__(self):
00071         s = []
00072         s.append(str(self.sunid))
00073 
00074         if self.parent:
00075             s.append(str(self.parent))
00076         else:
00077             if self.sunid != 0:
00078                 s.append('0')
00079             else:
00080                 s.append('-')
00081                 
00082 
00083         if self.children:
00084             child_str = map(str, self.children)
00085             s.append(",".join(child_str))
00086         else:
00087             s.append('-')
00088 
00089         return "\t".join(s) + "\n"
00090 
00091 
00092 def parse(handle):
00093     """Iterates over a HIE file, returning a Hie record for each line
00094     in the file.
00095 
00096     Arguments:
00097 
00098         handle -- file-like object.
00099     """
00100     for line in handle:
00101         if line.startswith('#'):
00102             continue
00103         yield Record(line)