Back to index

python-biopython  1.60
__init__.py
Go to the documentation of this file.
00001 # Copyright 2001 by Tarjei Mikkelsen.  All rights reserved.
00002 # Copyright 2007 by Michiel de Hoon.  All rights reserved.
00003 # This code is part of the Biopython distribution and governed by its
00004 # license.  Please see the LICENSE file that should have been included
00005 # as part of this package.
00006 
00007 """
00008 This module provides code to work with the KEGG Enzyme database.
00009 
00010 Functions:
00011 parse - Returns an iterator giving Record objects.
00012 
00013 Classes:
00014 Record               -- Holds the information from a KEGG Enzyme record.
00015 """
00016 
00017 from Bio.KEGG import _write_kegg
00018 from Bio.KEGG import _wrap_kegg
00019 
00020 
00021 # Set up line wrapping rules (see Bio.KEGG._wrap_kegg)
00022 rxn_wrap = [0, "",
00023             (" + ","",1,1),
00024             (" = ","",1,1),
00025             (" ","$",1,1),
00026             ("-","$",1,1)]
00027 name_wrap = [0, "",
00028              (" ","$",1,1),
00029              ("-","$",1,1)]
00030 id_wrap = lambda indent : [indent, "",
00031                            (" ","",1,0)]
00032 struct_wrap = lambda indent : [indent, "",
00033                                ("  ","",1,1)]
00034 
00035 class Record(object):
00036     """Holds info from a KEGG Enzyme record.
00037 
00038     Members:
00039     entry       The EC number (withou the 'EC ').
00040     name        A list of the enzyme names.
00041     classname   A list of the classification terms.
00042     sysname     The systematic name of the enzyme.
00043     reaction    A list of the reaction description strings.
00044     substrate   A list of the substrates.
00045     product     A list of the products.
00046     inhibitor   A list of the inhibitors.
00047     cofactor    A list of the cofactors.
00048     effector    A list of the effectors.
00049     comment     A list of the comment strings.
00050     pathway     A list of 3-tuples: (database, id, pathway)
00051     genes       A list of 2-tuples: (organism, list of gene ids)
00052     disease     A list of 3-tuples: (database, id, disease)
00053     structures  A list of 2-tuples: (database, list of struct ids)
00054     dblinks     A list of 2-tuples: (database, list of db ids)
00055     """
00056     def __init__(self):
00057         """__init___(self)
00058 
00059         Create a new Record.
00060         """
00061         self.entry      = ""
00062         self.name       = []
00063         self.classname  = []
00064         self.sysname    = []
00065         self.reaction   = []
00066         self.substrate  = []
00067         self.product    = []
00068         self.inhibitor  = []
00069         self.cofactor   = []
00070         self.effector   = []
00071         self.comment    = []
00072         self.pathway    = []
00073         self.genes      = []
00074         self.disease    = []
00075         self.structures = []
00076         self.dblinks    = []
00077     def __str__(self):
00078         """__str__(self)
00079 
00080         Returns a string representation of this Record.
00081         """
00082         return self._entry() + \
00083                self._name()  + \
00084                self._classname() + \
00085                self._sysname() + \
00086                self._reaction() + \
00087                self._substrate() + \
00088                self._product() + \
00089                self._inhibitor() + \
00090                self._cofactor() + \
00091                self._effector() + \
00092                self._comment() + \
00093                self._pathway() + \
00094                self._genes() + \
00095                self._disease() + \
00096                self._structures() + \
00097                self._dblinks() + \
00098                "///"
00099     def _entry(self):
00100         return _write_kegg("ENTRY",
00101                            ["EC " + self.entry])
00102     def _name(self):
00103         return _write_kegg("NAME",
00104                            [_wrap_kegg(l, wrap_rule = name_wrap) \
00105                             for l in self.name])
00106     def _classname(self):
00107         return _write_kegg("CLASS",
00108                            self.classname)
00109     def _sysname(self):
00110         return _write_kegg("SYSNAME",
00111                            [_wrap_kegg(l, wrap_rule = name_wrap) \
00112                             for l in self.sysname])
00113     def _reaction(self):
00114         return _write_kegg("REACTION",
00115                            [_wrap_kegg(l, wrap_rule = rxn_wrap) \
00116                             for l in self.reaction])
00117     def _substrate(self):
00118         return _write_kegg("SUBSTRATE",
00119                            [_wrap_kegg(l, wrap_rule = name_wrap) \
00120                             for l in self.substrate])
00121     def _product(self):
00122         return _write_kegg("PRODUCT",
00123                            [_wrap_kegg(l, wrap_rule = name_wrap) \
00124                             for l in self.product])
00125     def _inhibitor(self):
00126         return _write_kegg("INHIBITOR",
00127                            [_wrap_kegg(l, wrap_rule = name_wrap) \
00128                             for l in self.inhibitor])
00129     def _cofactor(self):
00130         return _write_kegg("COFACTOR",
00131                            [_wrap_kegg(l, wrap_rule = name_wrap) \
00132                             for l in self.cofactor])
00133     def _effector(self):
00134         return _write_kegg("EFFECTOR",
00135                            [_wrap_kegg(l, wrap_rule = name_wrap) \
00136                             for l in self.effector])
00137     def _comment(self):
00138         return _write_kegg("COMMENT",
00139                            [_wrap_kegg(l, wrap_rule = id_wrap(0)) \
00140                             for l in self.comment])
00141     def _pathway(self):
00142         s = []
00143         for entry in self.pathway:
00144             s.append(entry[0] + ": " + entry[1] + "  " + entry[2])
00145         return _write_kegg("PATHWAY",
00146                            [_wrap_kegg(l, wrap_rule = id_wrap(16)) \
00147                             for l in s])
00148     def _genes(self):
00149         s = []
00150         for entry in self.genes:
00151             s.append(entry[0] + ": " + " ".join(entry[1]))
00152         return _write_kegg("GENES",
00153                            [_wrap_kegg(l, wrap_rule = id_wrap(5)) \
00154                             for l in s])
00155     def _disease(self):
00156         s = []
00157         for entry in self.disease:
00158             s.append(entry[0] + ": " + entry[1] + "  " + entry[2])
00159         return _write_kegg("DISEASE",
00160                            [_wrap_kegg(l, wrap_rule = id_wrap(13)) \
00161                             for l in s])    
00162     def _structures(self):
00163         s = []
00164         for entry in self.structures:
00165             s.append(entry[0] + ": " + "  ".join(entry[1]) + "  ")
00166         return _write_kegg("STRUCTURES",
00167                            [_wrap_kegg(l, wrap_rule = struct_wrap(5)) \
00168                             for l in s])
00169     def _dblinks(self):
00170         # This is a bit of a cheat that won't work if enzyme entries
00171         # have more than one link id per db id. For now, that's not
00172         # the case - storing links ids in a list is only to make
00173         # this class similar to the Compound.Record class.
00174         s = []
00175         for entry in self.dblinks:
00176             s.append(entry[0] + ": " + "  ".join(entry[1]))
00177         return _write_kegg("DBLINKS", s)
00178 
00179 
00180 
00181 def parse(handle):
00182     """Parse a KEGG Enzyme file, returning Record objects.
00183 
00184     This is an iterator function, typically used in a for loop.  For
00185     example, using one of the example KEGG files in the Biopython
00186     test suite,
00187 
00188     >>> handle = open("KEGG/enzyme.sample")
00189     >>> for record in parse(handle):
00190     ...     print record.entry, record.name[0]
00191     ...
00192     1.1.1.1 Alcohol dehydrogenase
00193     1.1.1.62 Estradiol 17beta-dehydrogenase
00194     1.1.1.68 Transferred to EC 1.7.99.5
00195     1.6.5.3 NADH dehydrogenase (ubiquinone)
00196     1.14.13.28 3,9-Dihydroxypterocarpan 6a-monooxygenase
00197     2.4.1.68 Glycoprotein 6-alpha-L-fucosyltransferase
00198     3.1.1.6 Acetylesterase
00199     2.7.2.1 Acetate kinase
00200     """
00201     record = Record()
00202     for line in handle:
00203         if line[:3]=="///":
00204             yield record
00205             record = Record()
00206             continue
00207         if line[:12]!="            ":
00208             keyword = line[:12]
00209         data = line[12:].strip()
00210         if keyword=="ENTRY       ":
00211             words = data.split()
00212             record.entry = words[1]
00213         elif keyword=="CLASS       ":
00214             record.classname.append(data)
00215         elif keyword=="COFACTOR    ":
00216             record.cofactor.append(data)
00217         elif keyword=="COMMENT     ":
00218             record.comment.append(data)
00219         elif keyword=="DBLINKS     ":
00220             if ":" in data:
00221                 key, values = data.split(":")
00222                 values = values.split()
00223                 row = (key, values)
00224                 record.dblinks.append(row)
00225             else:
00226                 row = record.dblinks[-1]
00227                 key, values = row
00228                 values.extend(data.split())
00229                 row = key, values
00230                 record.dblinks[-1] = row
00231         elif keyword=="DISEASE     ":
00232             if ":" in data:
00233                 database, data = data.split(":")
00234                 number, name = data.split(None, 1)
00235                 row = (database, number, name)
00236                 record.disease.append(row)
00237             else:
00238                 row = record.disease[-1]
00239                 database, number, name = row
00240                 name = name + " " + data
00241                 row = database, number, name
00242                 record.disease[-1] = row
00243         elif keyword=="EFFECTOR    ":
00244              record.effector.append(data.strip(";"))
00245         elif keyword=="GENES       ":
00246             if data[3:5]==': ':
00247                 key, values = data.split(":",1)
00248                 values = [value.split("(")[0] for value in values.split()]
00249                 row = (key, values)
00250                 record.genes.append(row)
00251             else:
00252                 row = record.genes[-1]
00253                 key, values = row
00254                 for value in data.split():
00255                     value = value.split("(")[0]
00256                     values.append(value)
00257                 row = key, values
00258                 record.genes[-1] = row
00259         elif keyword=="INHIBITOR   ":
00260              record.inhibitor.append(data.strip(";"))
00261         elif keyword=="NAME        ":
00262              record.name.append(data.strip(";"))
00263         elif keyword=="PATHWAY     ":
00264             if data[:5]=='PATH:':
00265                 path, map, name = data.split(None,2)
00266                 pathway = (path[:-1], map, name)
00267                 record.pathway.append(pathway)
00268             else:
00269                 pathway = record.pathway[-1]
00270                 path, map, name = pathway
00271                 name = name + " " + data
00272                 pathway = path, map, name
00273                 record.pathway[-1] = pathway
00274         elif keyword=="PRODUCT     ":
00275              record.product.append(data.strip(";"))
00276         elif keyword=="REACTION    ":
00277              record.reaction.append(data.strip(";"))
00278         elif keyword=="STRUCTURES  ":
00279             if data[:4]=='PDB:':
00280                 database = data[:3]
00281                 accessions = data[4:].split()
00282                 row = (database, accessions)
00283                 record.structures.append(row)
00284             else:
00285                 row = record.structures[-1]
00286                 database, accessions = row
00287                 accessions.extend(data.split())
00288                 row = (database, accessions)
00289                 record.structures[-1] = row
00290         elif keyword=="SUBSTRATE   ":
00291              record.substrate.append(data.strip(";"))
00292         elif keyword=="SYSNAME     ":
00293              record.sysname.append(data.strip(";"))
00294 
00295 def _test():
00296     """Run the Bio.KEGG.Enzyme module's doctests.
00297     
00298     This will try and locate the unit tests directory, and run the doctests
00299     from there in order that the relative paths used in the examples work.
00300     """
00301     import doctest
00302     import os
00303     if os.path.isdir(os.path.join("..","..","..","Tests")):
00304         print "Runing doctests..."
00305         cur_dir = os.path.abspath(os.curdir)
00306         os.chdir(os.path.join("..","..","..","Tests"))
00307         doctest.testmod()
00308         os.chdir(cur_dir)
00309         del cur_dir
00310         print "Done"
00311 
00312 if __name__ == "__main__":
00313     _test()