Back to index

python-biopython  1.60
Public Member Functions
Bio.SeqIO._index.UniprotRandomAccess Class Reference
Inheritance diagram for Bio.SeqIO._index.UniprotRandomAccess:
Inheritance graph
[legend]
Collaboration diagram for Bio.SeqIO._index.UniprotRandomAccess:
Collaboration graph
[legend]

List of all members.

Public Member Functions

def __iter__
def get_raw
def get

Detailed Description

Random access to a UniProt XML file.

Definition at line 827 of file _index.py.


Member Function Documentation

Returns (id,offset) tuples.

Reimplemented from Bio.SeqIO._index.SequentialSeqFileRandomAccess.

Definition at line 829 of file _index.py.

00829 
00830     def __iter__(self):
00831         handle = self._handle
00832         handle.seek(0)
00833         marker_re = self._marker_re
00834         start_acc_marker = _as_bytes("<accession>")
00835         end_acc_marker = _as_bytes("</accession>")
00836         end_entry_marker = _as_bytes("</entry>")
00837         less_than = _as_bytes("<")
00838         #Skip any header before first record
00839         while True:
00840             start_offset = handle.tell()
00841             line = handle.readline()
00842             if marker_re.match(line) or not line:
00843                 break
00844         #Should now be at the start of a record, or end of the file
00845         while marker_re.match(line):
00846             length = len(line)
00847             #We expect the next line to be <accession>xxx</accession>
00848             #(possibly with leading spaces)
00849             #but allow it to be later on within the <entry>
00850             key = None
00851             done = False
00852             while True:
00853                 line = handle.readline()
00854                 if key is None and start_acc_marker in line:
00855                     assert end_acc_marker in line, line
00856                     key = line[line.find(start_acc_marker)+11:].split(less_than,1)[0]
00857                     length += len(line)
00858                 elif end_entry_marker in line:
00859                     end_offset = handle.tell() - len(line) \
00860                                + line.find(end_entry_marker) + 8
00861                     break
00862                 elif marker_re.match(line) or not line:
00863                     #Start of next record or end of file
00864                     raise ValueError("Didn't find end of record")
00865                 else:
00866                     length += len(line)
00867             if not key:
00868                 raise ValueError("Did not find <accession> line in bytes %i to %i" \
00869                                  % (start_offset, end_offset))
00870             yield _bytes_to_string(key), start_offset, length
00871             #Find start of next record
00872             while not marker_re.match(line) and line:
00873                 start_offset = handle.tell()
00874                 line = handle.readline()
00875         assert not line, repr(line)

Here is the call graph for this function:

def Bio.SeqIO._index.UniprotRandomAccess.get (   self,
  offset 
)
Returns SeqRecord.

Reimplemented from Bio.SeqIO._index.SeqFileRandomAccess.

Definition at line 895 of file _index.py.

00895 
00896     def get(self, offset) :
00897         #TODO - Can we handle this directly in the parser?
00898         #This is a hack - use get_raw for <entry>...</entry> and wrap it with
00899         #the apparently required XML header and footer.
00900         data = """<?xml version='1.0' encoding='UTF-8'?>
00901         <uniprot xmlns="http://uniprot.org/uniprot"
00902         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
00903         xsi:schemaLocation="http://uniprot.org/uniprot
00904         http://www.uniprot.org/support/docs/uniprot.xsd">
00905         %s
00906         </uniprot>
00907         """ % _bytes_to_string(self.get_raw(offset))
00908         #TODO - For consistency, this function should not accept a string:
00909         return SeqIO.UniprotIO.UniprotIterator(data).next()
00910 

Here is the call graph for this function:

def Bio.SeqIO._index.UniprotRandomAccess.get_raw (   self,
  offset 
)
Similar to the get method, but returns the record as a raw string.

Reimplemented from Bio.SeqIO._index.SequentialSeqFileRandomAccess.

Definition at line 876 of file _index.py.

00876 
00877     def get_raw(self, offset):
00878         """Similar to the get method, but returns the record as a raw string."""
00879         handle = self._handle
00880         marker_re = self._marker_re
00881         end_entry_marker = _as_bytes("</entry>")
00882         handle.seek(offset)
00883         data = [handle.readline()]
00884         while True:
00885             line = handle.readline()
00886             i = line.find(end_entry_marker)
00887             if i != -1:
00888                 data.append(line[:i+8])
00889                 break
00890             if marker_re.match(line) or not line:
00891                 #End of file, or start of next record
00892                 raise ValueError("Didn't find end of record")
00893             data.append(line)
00894         return _as_bytes("").join(data)

Here is the call graph for this function:


The documentation for this class was generated from the following file: