Back to index

python-biopython  1.60
Public Member Functions | Public Attributes | Private Member Functions | Private Attributes
Bio.bgzf.BgzfReader Class Reference

List of all members.

Public Member Functions

def __init__
def tell
def seek
def read
def readline
def next
def __iter__
def close

Public Attributes

 max_cache

Private Member Functions

def _load_block

Private Attributes

 _text
 _newline
 _handle
 _buffers
 _block_start_offset
 _block_raw_length
 _within_block_offset
 _buffer

Detailed Description

Definition at line 414 of file bgzf.py.


Constructor & Destructor Documentation

def Bio.bgzf.BgzfReader.__init__ (   self,
  filename = None,
  mode = "r",
  fileobj = None,
  max_cache = 100 
)

Definition at line 479 of file bgzf.py.

00479 
00480     def __init__(self, filename=None, mode="r", fileobj=None, max_cache=100):
00481         #TODO - Assuming we can seek, check for 28 bytes EOF empty block
00482         #and if missing warn about possible truncation (as in samtools)?
00483         if max_cache < 1:
00484             raise ValueError("Use max_cache with a minimum of 1")
00485         #Must open the BGZF file in binary mode, but we may want to
00486         #treat the contents as either text or binary (unicode or
00487         #bytes under Python 3)
00488         if fileobj:
00489             assert filename is None
00490             handle = fileobj
00491             assert "b" in handle.mode.lower()
00492         else:
00493             if "w" in mode.lower() \
00494             or "a" in mode.lower():
00495                 raise ValueError("Must use read mode (default), not write or append mode")
00496             handle = __builtin__.open(filename, "rb")
00497         self._text = "b" not in mode.lower()
00498         if self._text:
00499             self._newline = "\n"
00500         else:
00501             self._newline = _bytes_newline
00502         self._handle = handle
00503         self.max_cache = max_cache
00504         self._buffers = {}
00505         self._block_start_offset = None
00506         self._block_raw_length = None
00507         self._load_block(handle.tell())

Here is the caller graph for this function:


Member Function Documentation

Definition at line 645 of file bgzf.py.

00645 
00646     def __iter__(self):
00647         return self

Here is the caller graph for this function:

def Bio.bgzf.BgzfReader._load_block (   self,
  start_offset = None 
) [private]

Definition at line 508 of file bgzf.py.

00508 
00509     def _load_block(self, start_offset=None):
00510         if start_offset is None:
00511             #If the file is being read sequentially, then _handle.tell()
00512             #should be pointing at the start of the next block.
00513             #However, if seek has been used, we can't assume that.
00514             start_offset = self._block_start_offset + self._block_raw_length
00515         if start_offset == self._block_start_offset:
00516             self._within_block_offset = 0
00517             return
00518         elif start_offset in self._buffers:
00519             #Already in cache
00520             self._buffer, self._block_raw_length = self._buffers[start_offset]
00521             self._within_block_offset = 0
00522             self._block_start_offset = start_offset
00523             return
00524         #Must hit the disk... first check cache limits,
00525         while len(self._buffers) >= self.max_cache:
00526             #TODO - Implemente LRU cache removal?
00527             self._buffers.popitem()
00528         #Now load the block
00529         handle = self._handle
00530         if start_offset is not None:
00531             handle.seek(start_offset)
00532         self._block_start_offset = handle.tell()
00533         try:
00534             block_size, self._buffer = _load_bgzf_block(handle, self._text)
00535         except StopIteration:
00536             #EOF
00537             block_size = 0
00538             if self._text:
00539                 self._buffer = ""
00540             else:
00541                 self._buffer = _empty_bytes_string
00542         self._within_block_offset = 0
00543         self._block_raw_length = block_size
00544         #Finally save the block in our cache,
00545         self._buffers[self._block_start_offset] = self._buffer, block_size

Here is the caller graph for this function:

def Bio.bgzf.BgzfReader.close (   self)

Definition at line 648 of file bgzf.py.

00648 
00649     def close(self):
00650         self._handle.close()
00651         self._buffer = None
00652         self._block_start_offset = None
00653 

Here is the caller graph for this function:

def Bio.bgzf.BgzfReader.next (   self)

Definition at line 639 of file bgzf.py.

00639 
00640     def next(self):
00641         line = self.readline()
00642         if not line:
00643             raise StopIteration
00644         return line

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.bgzf.BgzfReader.read (   self,
  size = -1 
)

Definition at line 583 of file bgzf.py.

00583 
00584     def read(self, size=-1):
00585         if size < 0:
00586             raise NotImplementedError("Don't be greedy, that could be massive!")
00587         elif size == 0:
00588             if self._text:
00589                 return ""
00590             else:
00591                 return _empty_bytes_string
00592         elif self._within_block_offset + size <= len(self._buffer):
00593             #This may leave us right at the end of a block
00594             #(lazy loading, don't load the next block unless we have too)
00595             data = self._buffer[self._within_block_offset:self._within_block_offset + size]
00596             self._within_block_offset += size
00597             assert data #Must be at least 1 byte
00598             return data
00599         else:
00600             data = self._buffer[self._within_block_offset:]
00601             size -= len(data)
00602             self._load_block() #will reset offsets
00603             #TODO - Test with corner case of an empty block followed by
00604             #a non-empty block
00605             if not self._buffer:
00606                 return data #EOF
00607             elif size:
00608                 #TODO - Avoid recursion
00609                 return data + self.read(size)
00610             else:
00611                 #Only needed the end of the last block
00612                 return data

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 613 of file bgzf.py.

00613 
00614     def readline(self):
00615         i = self._buffer.find(self._newline, self._within_block_offset)
00616         #Three cases to consider,
00617         if i==-1:
00618             #No newline, need to read in more data
00619             data = self._buffer[self._within_block_offset:]
00620             self._load_block() #will reset offsets
00621             if not self._buffer:
00622                 return data #EOF
00623             else:
00624                 #TODO - Avoid recursion
00625                 return data + self.readline()
00626         elif i + 1 == len(self._buffer):
00627             #Found new line, but right at end of block (SPECIAL)
00628             data = self._buffer[self._within_block_offset:]
00629             #Must now load the next block to ensure tell() works
00630             self._load_block() #will reset offsets
00631             assert data
00632             return data
00633         else:
00634             #Found new line, not at end of block (easy case, no IO)
00635             data = self._buffer[self._within_block_offset:i+1]
00636             self._within_block_offset = i + 1
00637             #assert data.endswith(self._newline)
00638             return data

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.bgzf.BgzfReader.seek (   self,
  virtual_offset 
)
Seek to a 64-bit unsigned BGZF virtual offset.

Definition at line 561 of file bgzf.py.

00561 
00562     def seek(self, virtual_offset):
00563         """Seek to a 64-bit unsigned BGZF virtual offset."""
00564         #Do this inline to avoid a function call,
00565         #start_offset, within_block = split_virtual_offset(virtual_offset)
00566         start_offset = virtual_offset>>16
00567         within_block = virtual_offset ^ (start_offset<<16)
00568         if start_offset != self._block_start_offset:
00569             #Don't need to load the block if already there
00570             #(this avoids a function call since _load_block would do nothing)
00571             self._load_block(start_offset)
00572             assert start_offset == self._block_start_offset
00573         if within_block >= len(self._buffer) \
00574         and not (within_block == 0 and len(self._buffer)==0):
00575             raise ValueError("Within offset %i but block size only %i" \
00576                              % (within_block, len(self._buffer)))
00577         self._within_block_offset = within_block
00578         #assert virtual_offset == self.tell(), \
00579         #    "Did seek to %i (%i, %i), but tell says %i (%i, %i)" \
00580         #    % (virtual_offset, start_offset, within_block,
00581         #       self.tell(), self._block_start_offset, self._within_block_offset)
00582         return virtual_offset

Here is the call graph for this function:

def Bio.bgzf.BgzfReader.tell (   self)
Returns a 64-bit unsigned BGZF virtual offset.

Definition at line 546 of file bgzf.py.

00546 
00547     def tell(self):
00548         """Returns a 64-bit unsigned BGZF virtual offset."""
00549         if 0 < self._within_block_offset == len(self._buffer):
00550             #Special case where we're right at the end of a (non empty) block.
00551             #For non-maximal blocks could give two possible virtual offsets,
00552             #but for a maximal block can't use 65536 as the within block
00553             #offset. Therefore for consistency, use the next block and a
00554             #within block offset of zero.
00555             return (self._block_start_offset + self._block_raw_length) << 16
00556         else:
00557             #return make_virtual_offset(self._block_start_offset,
00558             #                           self._within_block_offset)
00559             #TODO - Include bounds checking as in make_virtual_offset?
00560             return (self._block_start_offset<<16) | self._within_block_offset


Member Data Documentation

Definition at line 505 of file bgzf.py.

Definition at line 504 of file bgzf.py.

Definition at line 533 of file bgzf.py.

Definition at line 503 of file bgzf.py.

Definition at line 501 of file bgzf.py.

Definition at line 498 of file bgzf.py.

Definition at line 496 of file bgzf.py.

Definition at line 515 of file bgzf.py.

Definition at line 502 of file bgzf.py.


The documentation for this class was generated from the following file: