Back to index

python-biopython  1.60
ScanProsite.py
Go to the documentation of this file.
00001 import urllib
00002 from xml.sax import handler, make_parser, expatreader
00003 from xml.sax.expatreader import ExpatParser
00004 from xml.sax._exceptions import SAXParseException
00005 
00006 class Record(list):
00007     """\
00008 This record is a list containing the search results returned by
00009 ScanProsite. The record also contains the data members n_match, n_seq,
00010 capped, and warning."""
00011 
00012     def __init__(self):
00013         self.n_match = None
00014         self.n_seq = None
00015         self.capped = None
00016         self.warning = None
00017 
00018 
00019 def scan(seq="", mirror='http://www.expasy.org', output='xml', **keywords):
00020     """Execute a ScanProsite search.
00021 
00022     mirror:      The ScanProsite mirror to be used
00023                  (default: http://www.expasy.org).
00024     seq:         The query sequence, or UniProtKB (Swiss-Prot,
00025                  TrEMBL) accession
00026     output:      Format of the search results
00027                  (default: xml)
00028     
00029     Further search parameters can be passed as keywords; see the
00030     documentation for programmatic access to ScanProsite at
00031     http://www.expasy.org/tools/scanprosite/ScanPrositeREST.html
00032     for a description of such parameters.
00033 
00034     This function returns a handle to the search results returned by
00035     ScanProsite. Search results in the XML format can be parsed into a
00036     Python object, by using the Bio.ExPASy.ScanProsite.read function.
00037     """
00038     parameters = {'seq': seq,
00039                   'output': output}
00040     for key, value in keywords.iteritems():
00041         if value is not None:
00042             parameters[key] = value
00043     command = urllib.urlencode(parameters)
00044     url = "%s/cgi-bin/prosite/PSScan.cgi?%s" % (mirror, command)
00045     handle = urllib.urlopen(url)
00046     return handle
00047 
00048 def read(handle):
00049     "Parse search results returned by ScanProsite into a Python object"
00050     content_handler = ContentHandler()
00051     saxparser = Parser()
00052     saxparser.setContentHandler(content_handler)
00053     saxparser.parse(handle)
00054     record = content_handler.record
00055     return record
00056 
00057 # The functions below are considered private
00058 
00059 class Parser(ExpatParser):
00060 
00061     def __init__(self):
00062         ExpatParser.__init__(self)
00063         self.firsttime = True
00064 
00065     def feed(self, data, isFinal = 0):
00066         # Error messages returned by the ScanProsite server are formatted as
00067         # as plain text instead of an XML document. To catch such error
00068         # messages, we override the feed method of the Expat parser.
00069         # The error message is (hopefully) contained in the data that was just
00070         # fed to the parser.
00071         if self.firsttime:
00072             if data[:5]!="<?xml":
00073                 raise ValueError, data
00074         self.firsttime = False 
00075         return ExpatParser.feed(self, data, isFinal)
00076 
00077 
00078 class ContentHandler(handler.ContentHandler):
00079     integers = ("start", "stop")
00080     strings = ("sequence_ac", 
00081                "sequence_id",
00082                "sequence_db",
00083                "signature_ac",
00084                "level",
00085                "level_tag")
00086     def __init__(self):
00087         self.element = []
00088     def startElement(self, name, attrs):
00089         self.element.append(name)
00090         self.content = ""
00091         if self.element==["matchset"]:
00092             self.record = Record()
00093             self.record.n_match = int(attrs["n_match"])
00094             self.record.n_seq = int(attrs["n_seq"])
00095         elif self.element==["matchset", "match"]:
00096             match = {}
00097             self.record.append(match)
00098     def endElement(self, name):
00099         assert name==self.element.pop()
00100         name = str(name)
00101         if self.element==["matchset", "match"]:
00102             match = self.record[-1]
00103             if name in ContentHandler.integers:
00104                 match[name] = int(self.content)
00105             elif name in ContentHandler.strings:
00106                 match[name] = self.content
00107             else:
00108                 # Unknown type, treat it as a string
00109                 match[name] = self.content
00110     def characters(self, content):
00111         self.content += content