Back to index

python-biopython  1.60
__init__.py
Go to the documentation of this file.
00001 # Copyright 2001 by Tarjei Mikkelsen.  All rights reserved.
00002 # Copyright 2007 by Michiel de Hoon.  All rights reserved.
00003 # This code is part of the Biopython distribution and governed by its
00004 # license.  Please see the LICENSE file that should have been included
00005 # as part of this package.
00006 
00007 """
00008 This module provides code to work with the KEGG Ligand/Compound database.
00009 
00010 Functions:
00011 parse - Returns an iterator giving Record objects.
00012 
00013 Classes:
00014 Record - A representation of a KEGG Ligand/Compound.
00015 """
00016 
00017 # other Biopython stuff
00018 from Bio.KEGG import _write_kegg
00019 from Bio.KEGG import _wrap_kegg
00020 
00021 
00022 # Set up line wrapping rules (see Bio.KEGG._wrap_kegg)
00023 name_wrap = [0, "",
00024              (" ","$",1,1),
00025              ("-","$",1,1)]
00026 id_wrap = lambda indent : [indent, "",
00027                            (" ","",1,0)]
00028 struct_wrap = lambda indent : [indent, "",
00029                                ("  ","",1,1)]
00030 
00031 class Record(object):
00032     """Holds info from a KEGG Ligand/Compound record.
00033 
00034     Members:
00035     entry       The entry identifier.
00036     name        A list of the compund names.
00037     formula     The chemical formula for the compound 
00038     mass        The molecular weight for the compound
00039     pathway     A list of 3-tuples: (database, id, pathway)
00040     enzyme      A list of 2-tuples: (enzyme id, role)
00041     structures  A list of 2-tuples: (database, list of struct ids)
00042     dblinks     A list of 2-tuples: (database, list of link ids)
00043 
00044     """
00045     def __init__(self):
00046         """__init___(self)
00047 
00048         Create a new Record.
00049         """
00050         self.entry      = ""
00051         self.name       = []
00052         self.formula    = ""
00053         self.mass       = ""
00054         self.pathway    = []
00055         self.enzyme     = []
00056         self.structures = []
00057         self.dblinks    = []
00058     def __str__(self):
00059         """__str__(self)
00060 
00061         Returns a string representation of this Record.
00062         """
00063         return self._entry() + \
00064                self._name()  + \
00065                self._formula() + \
00066                self._mass() + \
00067                self._pathway() + \
00068                self._enzyme() + \
00069                self._structures() + \
00070                self._dblinks() + \
00071                "///"
00072     def _entry(self):
00073         return _write_kegg("ENTRY",
00074                            [self.entry])
00075     def _name(self):
00076         return _write_kegg("NAME",
00077                            [_wrap_kegg(l, wrap_rule = name_wrap) \
00078                             for l in self.name])
00079     def _formula(self):
00080         return _write_kegg("FORMULA",
00081                            [self.formula])
00082 
00083     def _mass(self):
00084         return _write_kegg("MASS",
00085                            [self.mass])
00086     
00087     def _pathway(self):
00088         s = []
00089         for entry in self.pathway:
00090             s.append(entry[0] + ": " + entry[1] + "  " + entry[2])
00091         return _write_kegg("PATHWAY",
00092                            [_wrap_kegg(l, wrap_rule = id_wrap(16)) \
00093                             for l in s])
00094     def _enzyme(self):
00095         s = ""
00096         for entry in self.enzyme:
00097             if entry[1]:
00098                 t = entry[0] + " (" + entry[1] + ")"
00099             else:
00100                 t = entry[0]
00101             s = s + t.ljust(16)
00102         return _write_kegg("ENZYME",
00103                             [_wrap_kegg(s, wrap_rule = id_wrap(0))])
00104     def _structures(self):
00105         s = []
00106         for entry in self.structures:
00107             s.append(entry[0] + ": " + "  ".join(entry[1]) + "  ")
00108         return _write_kegg("STRUCTURES",
00109                            [_wrap_kegg(l, wrap_rule = struct_wrap(5)) \
00110                             for l in s])
00111     def _dblinks(self):
00112         s = []
00113         for entry in self.dblinks:
00114             s.append(entry[0] + ": " + " ".join(entry[1]))
00115         return _write_kegg("DBLINKS",
00116                            [_wrap_kegg(l, wrap_rule = id_wrap(9)) \
00117                             for l in s])
00118 
00119 
00120 def parse(handle):
00121     """Parse a KEGG Ligan/Compound file, returning Record objects.
00122 
00123     This is an iterator function, typically used in a for loop.  For
00124     example, using one of the example KEGG files in the Biopython
00125     test suite,
00126 
00127     >>> handle = open("KEGG/compound.sample")
00128     >>> for record in parse(handle):
00129     ...     print record.entry, record.name[0]
00130     ...
00131     C00023 Iron
00132     C00017 Protein
00133     C00099 beta-Alanine
00134     C00294 Inosine
00135     C00298 Trypsin
00136     C00348 Undecaprenyl phosphate
00137     C00349 2-Methyl-3-oxopropanoate
00138     C01386 NH2Mec
00139     """
00140     record = Record()
00141     for line in handle:
00142         if line[:3]=="///":
00143             yield record
00144             record = Record()
00145             continue
00146         if line[:12]!="            ":
00147             keyword = line[:12]
00148         data = line[12:].strip()
00149         if keyword=="ENTRY       ":
00150             words = data.split()
00151             record.entry = words[0]
00152         elif keyword=="NAME        ":
00153             data = data.strip(";")
00154             record.name.append(data)
00155         elif keyword=="ENZYME      ":
00156             while data:
00157                 column = data[:16]
00158                 data = data[16:]
00159                 if '(' in column:
00160                     entry = column.split()
00161                     enzyme = (entry[0], entry[1][1:-1])
00162                 else:
00163                     enzyme = (column.strip(), "")
00164                 record.enzyme.append(enzyme)
00165         elif keyword=="PATHWAY     ":
00166             if data[:5]=='PATH:':
00167                 path, map, name = data.split(None,2)
00168                 pathway = (path[:-1], map, name)
00169                 record.pathway.append(pathway)
00170             else:
00171                 pathway = record.pathway[-1]
00172                 path, map, name = pathway
00173                 name = name + " " + data
00174                 pathway = path, map, name
00175                 record.pathway[-1] = pathway
00176         elif keyword=="FORMULA     ":
00177             record.formula = data
00178         elif keyword=="MASS        ":
00179             record.mass = data
00180         elif keyword=="DBLINKS     ":
00181             if ":" in data:
00182                 key, values = data.split(":")
00183                 values = values.split()
00184                 row = (key, values)
00185                 record.dblinks.append(row)
00186             else:
00187                 row = record.dblinks[-1]
00188                 key, values = row
00189                 values.extend(data.split())
00190                 row = key, values
00191                 record.dblinks[-1] = row
00192 
00193 def _test():
00194     """Run the Bio.KEGG.Compound module's doctests.
00195     
00196     This will try and locate the unit tests directory, and run the doctests
00197     from there in order that the relative paths used in the examples work.
00198     """
00199     import doctest
00200     import os
00201     if os.path.isdir(os.path.join("..","..","..","Tests")):
00202         print "Runing doctests..."
00203         cur_dir = os.path.abspath(os.curdir)
00204         os.chdir(os.path.join("..","..","..","Tests"))
00205         doctest.testmod()
00206         os.chdir(cur_dir)
00207         del cur_dir
00208         print "Done"
00209 
00210 if __name__ == "__main__":
00211     _test()