Back to index

python-biopython  1.60
Enzyme.py
Go to the documentation of this file.
00001 # Copyright 1999 by Jeffrey Chang.  All rights reserved.
00002 # Copyright 2009 by Michiel de Hoon.  All rights reserved.
00003 # This code is part of the Biopython distribution and governed by its
00004 # license.  Please see the LICENSE file that should have been included
00005 # as part of this package.
00006 
00007 """
00008 This module provides code to work with the enzyme.dat file from
00009 Enzyme.
00010 http://www.expasy.ch/enzyme/
00011 
00012 Tested with the release of 03-Mar-2009.
00013 
00014 Functions:
00015 read       Reads a file containing one ENZYME entry
00016 parse      Reads a file containing multiple ENZYME entries
00017 
00018 Classes:
00019 Record     Holds ENZYME data.
00020 
00021 """
00022 
00023 def parse(handle):
00024     """Parse ENZYME records.
00025 
00026     This function is for parsing ENZYME files containing multiple
00027     records.
00028 
00029     handle   - handle to the file."""
00030 
00031     while True:
00032         record = __read(handle)
00033         if not record:
00034             break
00035         yield record
00036 
00037 def read(handle):
00038     """Read one ENZYME record.
00039 
00040     This function is for parsing ENZYME files containing
00041     exactly one record.
00042 
00043     handle   - handle to the file."""
00044 
00045     record = __read(handle)
00046     # We should have reached the end of the record by now
00047     remainder = handle.read()
00048     if remainder:
00049         raise ValueError("More than one ENZYME record found")
00050     return record
00051 
00052 
00053 class Record(dict):
00054     """\
00055 Holds information from an ExPASy ENZYME record as a Python dictionary.
00056 
00057 Each record contains the following keys:
00058     ID: EC number
00059     DE: Recommended name
00060     AN: Alternative names (if any)
00061     CA: Catalytic activity
00062     CF: Cofactors (if any)
00063     PR: Pointers to the Prosite documentation entrie(s) that
00064         correspond to the enzyme (if any)
00065     DR: Pointers to the Swiss-Prot protein sequence entrie(s)
00066         that correspond to the enzyme (if any)
00067     CC: Comments
00068 """
00069 
00070     def __init__(self):
00071         dict.__init__(self)
00072         self["ID"] = ''
00073         self["DE"] = ''
00074         self["AN"] = []
00075         self["CA"] = ''
00076         self["CF"] = ''
00077         self["CC"] = []   # one comment per line
00078         self["PR"] = []
00079         self["DR"] = []
00080     
00081     def __repr__(self):
00082         if self["ID"]:
00083             if self["DE"]:
00084                 return "%s (%s, %s)" % (self.__class__.__name__, 
00085                                         self["ID"], self["DE"])
00086             else:
00087                 return "%s (%s)" % (self.__class__.__name__, 
00088                                        self["ID"])
00089         else:
00090             return "%s ( )" % (self.__class__.__name__)
00091             
00092     def __str__(self):
00093         output = "ID: " + self["ID"]
00094         output += " DE: " + self["DE"]
00095         output += " AN: " + repr(self["AN"])
00096         output += " CA: '" + self["CA"] + "'"
00097         output += " CF: " + self["CF"]
00098         output += " CC: " + repr(self["CC"])
00099         output += " PR: " + repr(self["PR"])
00100         output += " DR: %d Records" % len(self["DR"])
00101         return output
00102 
00103 # Everything below is private
00104 
00105 def __read(handle):
00106     record = None
00107     for line in handle:
00108         key, value = line[:2], line[5:].rstrip()
00109         if key=="ID":
00110             record = Record()
00111             record["ID"] = value
00112         elif key=="DE":
00113             record["DE"]+=value
00114         elif key=="AN":
00115             if record["AN"] and not record["AN"][-1].endswith("."):
00116                 record["AN"][-1] += " " + value
00117             else:
00118                 record["AN"].append(value)
00119         elif key=="CA":
00120             record["CA"] += value
00121         elif key=="DR":
00122             pair_data = value.rstrip(";").split(';')
00123             for pair in pair_data:
00124                 t1, t2 = pair.split(',')
00125                 row = [t1.strip(), t2.strip()]
00126                 record["DR"].append(row)
00127         elif key=="CF":
00128             if record["CF"]:
00129                 record["CF"] += " " + value
00130             else:
00131                 record["CF"] = value
00132         elif key=="PR":
00133             assert value.startswith("PROSITE; ")
00134             value = value[9:].rstrip(";")
00135             record["PR"].append(value)
00136         elif key=='CC':
00137             if value.startswith("-!- "):
00138                 record["CC"].append(value[4:])
00139             elif value.startswith("    ") and record["CC"]:
00140                 record["CC"][-1] += value[3:]
00141             # copyright notice is silently skipped
00142         elif key=="//":
00143             if record:
00144                 return record
00145             else: # This was the copyright notice
00146                 continue
00147     if record:
00148         raise ValueError("Unexpected end of stream")