Back to index

python-biopython  1.60
Prodoc.py
Go to the documentation of this file.
00001 # Copyright 2000 by Jeffrey Chang.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """
00007 This module provides code to work with the prosite.doc file from
00008 Prosite.
00009 http://www.expasy.ch/prosite/
00010 
00011 Tested with:
00012 Release 15.0, July 1998
00013 Release 16.0, July 1999
00014 Release 20.22, 13 November 2007
00015 Release 20.43, 10 February 2009
00016 
00017 
00018 Functions:
00019 read               Read a Prodoc file containing exactly one Prodoc entry.
00020 parse              Iterates over entries in a Prodoc file.
00021 
00022 Classes:
00023 Record             Holds Prodoc data.
00024 Reference          Holds data from a Prodoc reference.
00025 """
00026 
00027 
00028 def read(handle):
00029     record = __read(handle)
00030     # We should have reached the end of the record by now
00031     line = handle.readline()
00032     if line:
00033         raise ValueError("More than one Prodoc record found")
00034     return record
00035 
00036 def parse(handle):
00037     while True:
00038         record = __read(handle)
00039         if not record:
00040             return
00041         yield record
00042  
00043 class Record(object):
00044     """Holds information from a Prodoc record.
00045 
00046     Members:
00047     accession      Accession number of the record.
00048     prosite_refs   List of tuples (prosite accession, prosite name).
00049     text           Free format text.
00050     references     List of reference objects.
00051 
00052     """
00053     def __init__(self):
00054         self.accession = ''
00055         self.prosite_refs = []
00056         self.text = ''
00057         self.references = []
00058 
00059 
00060 class Reference(object):
00061     """Holds information from a Prodoc citation.
00062 
00063     Members:
00064     number     Number of the reference. (string)
00065     authors    Names of the authors.
00066     citation   Describes the citation.
00067 
00068     """
00069     def __init__(self):
00070         self.number = ''
00071         self.authors = ''
00072         self.citation = ''
00073 
00074 # Below are private functions
00075 
00076 def __read_prosite_reference_line(record, line):
00077     line = line.rstrip()
00078     if line[-1] != '}':
00079         raise ValueError("I don't understand the Prosite reference on line\n%s" % line)
00080     acc, name = line[1:-1].split('; ')
00081     record.prosite_refs.append((acc, name))
00082 
00083 def __read_text_line(record, line):
00084     record.text += line
00085     return True
00086 
00087 def __read_reference_start(record, line):
00088     # Read the references
00089     reference = Reference()
00090     reference.number = line[1:3].strip()
00091     if line[1] == 'E':
00092         # If it's an electronic reference, then the URL is on the
00093         # line, instead of the author.
00094         reference.citation = line[4:].strip()
00095     else:
00096         reference.authors = line[4:].strip()
00097     record.references.append(reference)
00098 
00099 def __read_reference_line(record, line):
00100     if not line.strip():
00101         return False
00102     reference = record.references[-1]
00103     if line.startswith('     '):
00104         if reference.authors[-1]==',':
00105             reference.authors += line[4:].rstrip()
00106         else:
00107             reference.citation += line[5:]
00108         return True
00109     raise Exception("I don't understand the reference line\n%s" % line)
00110 
00111 def __read_copyright_line(record, line):
00112     # Skip the copyright statement
00113     if line.startswith('+----'):
00114         return False
00115     return True
00116 
00117 def __read(handle):
00118     # Skip blank lines between records
00119     for line in handle:
00120         line = line.rstrip()
00121         if line and not line.startswith("//"):
00122             break
00123     else:
00124         return None
00125     record = Record()
00126     # Read the accession number
00127     if not line.startswith("{PDOC"):
00128         raise ValueError("Line does not start with '{PDOC':\n%s" % line)
00129     if line[-1] != '}':
00130         raise ValueError("I don't understand accession line\n%s" % line)
00131     record.accession = line[1:-1]
00132     # Read the Prosite references
00133     for line in handle:
00134         if line.startswith('{PS'):
00135             __read_prosite_reference_line(record, line)
00136         else:
00137             break
00138     else:
00139         raise ValueError("Unexpected end of stream.")
00140     # Read the actual text
00141     if not line.startswith('{BEGIN'):
00142         raise ValueError("Line does not start with '{BEGIN':\n%s" % line)
00143     read_line = __read_text_line
00144     for line in handle:
00145         if line.startswith('{END}'):
00146             # Clean up the record and return
00147             for reference in record.references:
00148                 reference.citation = reference.citation.rstrip()
00149                 reference.authors = reference.authors.rstrip()
00150             return record
00151         elif line[0] == '[' and line[3] == ']' and line[4] == ' ':
00152             __read_reference_start(record, line)
00153             read_line = __read_reference_line
00154         elif line.startswith('+----'):
00155             read_line = __read_copyright_line
00156         elif read_line:
00157             if not read_line(record, line):
00158                 read_line = None
00159     raise ValueError("Unexpected end of stream.")