Back to index

python-biopython  1.60
Public Member Functions | Public Attributes
test_bgzf.BgzfTests Class Reference

List of all members.

Public Member Functions

def setUp
def tearDown
def rewrite
def check_blocks
def check_text
def check_by_line
def check_by_char
def check_random
def test_random_bam_ex1
def test_random_bam_ex1_refresh
def test_random_bam_ex1_header
def test_random_example_fastq
def test_text_example_fastq
def test_iter_example_fastq
def test_iter_example_gb
def test_bam_ex1
def test_iter_bam_ex1
def test_example_fastq

Public Attributes

 temp_file

Detailed Description

Definition at line 21 of file test_bgzf.py.


Member Function Documentation

def test_bgzf.BgzfTests.check_blocks (   self,
  old_file,
  new_file 
)

Definition at line 49 of file test_bgzf.py.

00049 
00050     def check_blocks(self, old_file, new_file):
00051         h = open(old_file, "rb")
00052         old = list(bgzf.BgzfBlocks(h))
00053         h.close()
00054         h = open(new_file, "rb")
00055         new = list(bgzf.BgzfBlocks(h))
00056         h.close()
00057         self.assertEqual(len(old), len(new))
00058         self.assertEqual(old, new)

Here is the call graph for this function:

Here is the caller graph for this function:

def test_bgzf.BgzfTests.check_by_char (   self,
  old_file,
  new_file,
  old_gzip = False 
)

Definition at line 102 of file test_bgzf.py.

00102 
00103     def check_by_char(self, old_file, new_file, old_gzip=False):
00104         for mode in ["r", "rb"]:
00105             if old_gzip:
00106                 h = gzip.open(old_file,mode)
00107             else:
00108                 h = open(old_file, mode)
00109             old = h.read()
00110             #Seems gzip can return bytes even if mode="r",
00111             #perhaps a bug in Python 3.2?
00112             if "b" in mode:
00113                 old = _as_bytes(old)
00114             else:
00115                 old = _as_string(old)
00116             h.close()
00117 
00118             for cache in [1,10]:
00119                 h = bgzf.BgzfReader(new_file, mode, max_cache=cache)
00120                 temp = []
00121                 while True:
00122                     char = h.read(1)
00123                     if not char: break
00124                     temp.append(char)
00125                 if "b" in mode:
00126                     new = _empty_bytes_string.join(temp)
00127                 else:
00128                     new = "".join(temp)
00129                 del temp
00130                 h.close()
00131 
00132                 self.assertEqual(len(old), len(new))
00133                 #If bytes vs unicode mismatch, give a short error message:
00134                 self.assertEqual(old[:10], new[:10], \
00135                                  "%r vs %r, mode %r" % (old[:10], new[:10], mode))
00136                 self.assertEqual(old, new)

Here is the call graph for this function:

Here is the caller graph for this function:

def test_bgzf.BgzfTests.check_by_line (   self,
  old_file,
  new_file,
  old_gzip = False 
)

Definition at line 74 of file test_bgzf.py.

00074 
00075     def check_by_line(self, old_file, new_file, old_gzip=False):
00076         for mode in ["r", "rb"]:
00077             if old_gzip:
00078                 h = gzip.open(old_file, mode)
00079             else:
00080                 h = open(old_file, mode)
00081             old = h.read()
00082             #Seems gzip can return bytes even if mode="r",
00083             #perhaps a bug in Python 3.2?
00084             if "b" in mode:
00085                 old = _as_bytes(old)
00086             else:
00087                 old = _as_string(old)
00088             h.close()
00089 
00090             for cache in [1,10]:
00091                 h = bgzf.BgzfReader(new_file, mode, max_cache=cache)
00092                 if "b" in mode:
00093                     new = _empty_bytes_string.join(line for line in h)
00094                 else:
00095                     new = "".join(line for line in h)
00096                 h.close()
00097 
00098                 self.assertEqual(len(old), len(new))
00099                 self.assertEqual(old[:10], new[:10], \
00100                                  "%r vs %r, mode %r" % (old[:10], new[:10], mode))
00101                 self.assertEqual(old, new)

Here is the call graph for this function:

Here is the caller graph for this function:

def test_bgzf.BgzfTests.check_random (   self,
  filename 
)
Check BGZF random access by reading blocks in forward & reverse order

Definition at line 137 of file test_bgzf.py.

00137 
00138     def check_random(self, filename):
00139         """Check BGZF random access by reading blocks in forward & reverse order"""
00140         h = gzip.open(filename, "rb")
00141         old = h.read()
00142         h.close()
00143 
00144         h = open(filename, "rb")
00145         blocks = list(bgzf.BgzfBlocks(h))
00146         h.close()
00147 
00148         #Forward
00149         new = _empty_bytes_string
00150         h = bgzf.BgzfReader(filename, "rb")
00151         for start, raw_len, data_start, data_len in blocks:
00152             #print start, raw_len, data_start, data_len
00153             h.seek(bgzf.make_virtual_offset(start,0))
00154             data = h.read(data_len)
00155             self.assertEqual(len(data), data_len)
00156             #self.assertEqual(start + raw_len, h._handle.tell())
00157             self.assertEqual(len(new), data_start)
00158             new += data
00159         h.close()
00160         self.assertEqual(len(old), len(new))
00161         self.assertEqual(old, new)
00162 
00163         #Reverse
00164         new = _empty_bytes_string
00165         h = bgzf.BgzfReader(filename, "rb")
00166         for start, raw_len, data_start, data_len in blocks[::-1]:
00167             #print start, raw_len, data_start, data_len
00168             h.seek(bgzf.make_virtual_offset(start,0))
00169             data = h.read(data_len)
00170             self.assertEqual(len(data), data_len)
00171             #self.assertEqual(start + raw_len, h._handle.tell())
00172             new = data + new
00173         h.close()
00174         self.assertEqual(len(old), len(new))
00175         self.assertEqual(old, new)
00176 
00177         #Jump back - non-sequential seeking
00178         if len(blocks) >= 3:
00179             h = bgzf.BgzfReader(filename, "rb", max_cache = 1)
00180             #Seek to a late block in the file,
00181             #half way into the third last block
00182             start, raw_len, data_start, data_len = blocks[-3]
00183             voffset = bgzf.make_virtual_offset(start, data_len // 2)
00184             h.seek(voffset)
00185             self.assertEqual(voffset, h.tell())
00186             data = h.read(1000)
00187             self.assertTrue(data in old)
00188             self.assertEqual(old.find(data), data_start + data_len // 2)
00189             #Now seek to an early block in the file,
00190             #half way into the second block
00191             start, raw_len, data_start, data_len = blocks[1]
00192             h.seek(bgzf.make_virtual_offset(start, data_len // 2))
00193             voffset = bgzf.make_virtual_offset(start, data_len // 2)
00194             h.seek(voffset)
00195             self.assertEqual(voffset, h.tell())
00196             #Now read all rest of this block and start of next block
00197             data = h.read(data_len + 1000)
00198             self.assertTrue(data in old)
00199             self.assertEqual(old.find(data), data_start + data_len // 2)
00200             h.close()
00201 
00202         #Check seek/tell at block boundaries
00203         v_offsets = []
00204         for start, raw_len, data_start, data_len in blocks:
00205             for within_offset in [0, 1, data_len // 2, data_len - 1]:
00206                 if within_offset < 0 or data_len <= within_offset:
00207                     continue
00208                 voffset = bgzf.make_virtual_offset(start, within_offset)
00209                 real_offset = data_start + within_offset
00210                 v_offsets.append((voffset, real_offset))
00211         shuffle(v_offsets)
00212         h = bgzf.BgzfReader(filename, "rb", max_cache = 1)
00213         for voffset, real_offset in v_offsets:
00214             h.seek(0)
00215             assert voffset >= 0 and real_offset >= 0
00216             self.assertEqual(h.read(real_offset), old[:real_offset])
00217             self.assertEqual(h.tell(), voffset)
00218         for voffset, real_offset in v_offsets:
00219             h.seek(voffset)
00220             self.assertEqual(h.tell(), voffset)
00221         h.close()
00222 

Here is the call graph for this function:

Here is the caller graph for this function:

def test_bgzf.BgzfTests.check_text (   self,
  old_file,
  new_file 
)

Definition at line 59 of file test_bgzf.py.

00059 
00060     def check_text(self, old_file, new_file):
00061         h = open(old_file) #text mode!
00062         old_line = h.readline()
00063         old = old_line + h.read()
00064         h.close()
00065 
00066         h = bgzf.BgzfReader(new_file, "r") #Text mode!
00067         new_line = h.readline()
00068         new = new_line + h.read(len(old))
00069         h.close()
00070 
00071         self.assertEqual(old_line, new_line)
00072         self.assertEqual(len(old), len(new))
00073         self.assertEqual(old, new)

Here is the call graph for this function:

Here is the caller graph for this function:

def test_bgzf.BgzfTests.rewrite (   self,
  compressed_input_file,
  output_file 
)

Definition at line 31 of file test_bgzf.py.

00031 
00032     def rewrite(self, compressed_input_file, output_file):
00033         h = gzip.open(compressed_input_file, "rb")
00034         data = h.read()
00035         h.close()
00036 
00037         h = bgzf.BgzfWriter(output_file, "wb")
00038         h.write(data)
00039         h.close() #Gives empty BGZF block as BAM EOF marker
00040 
00041         h = gzip.open(output_file)
00042         new_data = h.read()
00043         h.close()
00044 
00045         #Check the decompressed files agree
00046         self.assert_(new_data, "Empty BGZF file?")
00047         self.assertEqual(len(data), len(new_data))
00048         self.assertEqual(data, new_data)

Here is the caller graph for this function:

def test_bgzf.BgzfTests.setUp (   self)

Definition at line 22 of file test_bgzf.py.

00022 
00023     def setUp(self):
00024         self.temp_file = "temp.bgzf"
00025         if os.path.isfile(self.temp_file):
00026             os.remove(self.temp_file)

Definition at line 27 of file test_bgzf.py.

00027 
00028     def tearDown(self):
00029         if os.path.isfile(self.temp_file):
00030             os.remove(self.temp_file)

Reproduce BGZF compression for BAM file

Definition at line 253 of file test_bgzf.py.

00253 
00254     def test_bam_ex1(self):
00255         """Reproduce BGZF compression for BAM file"""
00256         temp_file = self.temp_file
00257 
00258         #Note this example is from an old version of samtools
00259         #and all the blocks are full (except the last one)
00260         self.rewrite("SamBam/ex1.bam", temp_file)
00261 
00262         #Now check the blocks agree (using the fact that
00263         #this example BAM file has simple block usage)
00264         self.check_blocks("SamBam/ex1.bam", temp_file)

Here is the call graph for this function:

Reproduce BGZF compression for a FASTQ file

Definition at line 269 of file test_bgzf.py.

00269 
00270     def test_example_fastq(self):
00271         """Reproduce BGZF compression for a FASTQ file"""
00272         temp_file = self.temp_file
00273         self.rewrite("Quality/example.fastq.gz", temp_file)
00274         self.check_blocks("Quality/example.fastq.bgz", temp_file)

Here is the call graph for this function:

Check iteration over SamBam/ex1.bam

Definition at line 265 of file test_bgzf.py.

00265 
00266     def test_iter_bam_ex1(self):
00267         """Check iteration over SamBam/ex1.bam"""
00268         self.check_by_char("SamBam/ex1.bam", "SamBam/ex1.bam", True)

Here is the call graph for this function:

Check iteration over Quality/example.fastq.bgz

Definition at line 243 of file test_bgzf.py.

00243 
00244     def test_iter_example_fastq(self):
00245         """Check iteration over Quality/example.fastq.bgz"""
00246         self.check_by_line("Quality/example.fastq", "Quality/example.fastq.bgz")
00247         self.check_by_char("Quality/example.fastq", "Quality/example.fastq.bgz")

Here is the call graph for this function:

Check iteration over GenBank/NC_000932.gb.bgz

Definition at line 248 of file test_bgzf.py.

00248 
00249     def test_iter_example_gb(self):
00250         """Check iteration over GenBank/NC_000932.gb.bgz"""
00251         self.check_by_line("GenBank/NC_000932.gb", "GenBank/NC_000932.gb.bgz")
00252         self.check_by_char("GenBank/NC_000932.gb", "GenBank/NC_000932.gb.bgz")

Here is the call graph for this function:

Check random access to SamBam/ex1.bam

Definition at line 223 of file test_bgzf.py.

00223 
00224     def test_random_bam_ex1(self):
00225         """Check random access to SamBam/ex1.bam"""
00226         self.check_random("SamBam/ex1.bam")

Here is the call graph for this function:

Check random access to SamBam/ex1_header.bam

Definition at line 231 of file test_bgzf.py.

00231 
00232     def test_random_bam_ex1_header(self):
00233         """Check random access to SamBam/ex1_header.bam"""
00234         self.check_random("SamBam/ex1_header.bam")

Here is the call graph for this function:

Check random access to SamBam/ex1_refresh.bam

Definition at line 227 of file test_bgzf.py.

00227 
00228     def test_random_bam_ex1_refresh(self):
00229         """Check random access to SamBam/ex1_refresh.bam"""
00230         self.check_random("SamBam/ex1_refresh.bam")

Here is the call graph for this function:

Check random access to Quality/example.fastq.bgz

Definition at line 235 of file test_bgzf.py.

00235 
00236     def test_random_example_fastq(self):
00237         """Check random access to Quality/example.fastq.bgz"""
00238         self.check_random("Quality/example.fastq.bgz")

Here is the call graph for this function:

Check text mode access to Quality/example.fastq.bgz

Definition at line 239 of file test_bgzf.py.

00239 
00240     def test_text_example_fastq(self):
00241         """Check text mode access to Quality/example.fastq.bgz"""
00242         self.check_text("Quality/example.fastq", "Quality/example.fastq.bgz")

Here is the call graph for this function:


Member Data Documentation

Definition at line 23 of file test_bgzf.py.


The documentation for this class was generated from the following file: