Back to index

python-biopython  1.60
test_Phd.py
Go to the documentation of this file.
00001 # Revisions copyright 2009 by Peter Cock.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 import unittest
00006 
00007 from Bio import SeqIO
00008 from Bio.Sequencing import Phd
00009 
00010 class PhdTestOne(unittest.TestCase):
00011     def setUp(self):
00012         self.handle = open("Phd/phd1")
00013 
00014     def tearDown(self):
00015         self.handle.close()
00016 
00017     def test_check_SeqIO(self):
00018         """Test phd1 using parser via SeqIO."""
00019         records = SeqIO.parse(self.handle, "phd")
00020         #Contig 1
00021         record = records.next()
00022         self.assertEqual(record.id, "34_222_(80-A03-19).b.ab1")
00023         self.assertEqual(record.name, "34_222_(80-A03-19).b.ab1")
00024         self.assertEqual(record.description, "34_222_(80-A03-19).b.ab1")
00025         self.assertTrue(record.seq.startswith("ctccgtcggaacatcatcggatcctatcaca"))
00026         self.assertTrue(record.seq.endswith("ctctcctctccctccctccgactccaaagcgtg"))
00027         self.assertEqual(record.letter_annotations["phred_quality"][:10],
00028                          [9, 9, 10, 19, 22, 37, 28, 28, 24, 22])
00029         self.assertEqual(record[:10].format("fasta"),
00030                          ">34_222_(80-A03-19).b.ab1\nctccgtcgga\n")
00031         self.assertEqual(record[:10].format("qual"),
00032                          ">34_222_(80-A03-19).b.ab1\n"
00033                          "9 9 10 19 22 37 28 28 24 22\n")
00034         self.assertEqual(record[:10].format("fastq"),
00035                          "@34_222_(80-A03-19).b.ab1\n"
00036                          "ctccgtcgga\n"
00037                          "+\n"
00038                          "**+47F==97\n")
00039         self.assertEqual(record[:10].format("fastq-illumina"),
00040                          "@34_222_(80-A03-19).b.ab1\n"
00041                          "ctccgtcgga\n"
00042                          "+\n"
00043                          "IIJSVe\\\\XV\n")
00044         #Contig 2
00045         record = records.next()
00046         self.assertEqual(record.id, "425_103_(81-A03-19).g.ab1")
00047         self.assertEqual(record.name, "425_103_(81-A03-19).g.ab1")
00048         self.assertEqual(record.letter_annotations["phred_quality"][:10],
00049                          [14, 17, 22, 10, 10, 10, 15, 8, 8, 9])
00050         #Contig 3
00051         record = records.next()
00052         self.assertEqual(record.id, '425_7_(71-A03-19).b.ab1')
00053         self.assertEqual(record.name, '425_7_(71-A03-19).b.ab1')
00054         self.assertEqual(record.letter_annotations["phred_quality"][:10],
00055                          [10, 10, 10, 10, 8, 8, 6, 6, 6, 6])
00056         # Make sure that no further records are found
00057         self.assertRaises(StopIteration, records.next)
00058 
00059     def test_check_record_parser(self):
00060         """Test phd1 file in detail."""
00061         records = Phd.parse(self.handle)
00062         # Record 1
00063         record = records.next()
00064         self.assertEqual(record.file_name, "34_222_(80-A03-19).b.ab1")
00065         self.assertEqual(record.comments['abi_thumbprint'], 0)
00066         self.assertEqual(record.comments['call_method'], "phred")
00067         self.assertEqual(record.comments['chem'], "term")
00068         self.assertEqual(record.comments['chromat_file'], "34_222_(80-A03-19).b.ab1")
00069         self.assertEqual(record.comments['dye'], "big")
00070         self.assertEqual(record.comments['phred_version'], "0.020425.c")
00071         self.assertEqual(record.comments['quality_levels'], 99)
00072         self.assertEqual(record.comments['time'], "Fri Feb 13 09:16:11 2004")
00073         self.assertEqual(record.comments['trace_array_max_index'], 10867)
00074         self.assertEqual(record.comments['trace_array_min_index'], 0)
00075         self.assertAlmostEqual(record.comments['trace_peak_area_ratio'], 0.1467)
00076         self.assertEqual(record.comments['trim'][0], 3)
00077         self.assertEqual(record.comments['trim'][1], 391)
00078         self.assertAlmostEqual(record.comments['trim'][2], 0.05)
00079         center = len(record.sites)//2
00080         self.assertEqual(record.sites[0], ('c', '9', '6'))
00081         self.assertEqual(record.sites[1], ('t', '9', '18'))
00082         self.assertEqual(record.sites[2], ('c', '10', '26'))
00083         self.assertEqual(record.sites[3], ('c', '19', '38'))
00084         self.assertEqual(record.sites[4], ('g', '22', '49'))
00085         self.assertEqual(record.sites[5], ('t', '37', '65'))
00086         self.assertEqual(record.sites[6], ('c', '28', '76'))
00087         self.assertEqual(record.sites[7], ('g', '28', '87'))
00088         self.assertEqual(record.sites[8], ('g', '24', '100'))
00089         self.assertEqual(record.sites[9], ('a', '22', '108'))
00090         self.assertEqual(record.sites[center-5], ('c', '11', '5259'))
00091         self.assertEqual(record.sites[center-4], ('c', '11', '5273'))
00092         self.assertEqual(record.sites[center-3], ('t', '9', '5286'))
00093         self.assertEqual(record.sites[center-2], ('g', '10', '5300'))
00094         self.assertEqual(record.sites[center-1], ('a', '10', '5316'))
00095         self.assertEqual(record.sites[center], ('t', '8', '5323'))
00096         self.assertEqual(record.sites[center+1], ('c', '8', '5343'))
00097         self.assertEqual(record.sites[center+2], ('g', '8', '5352'))
00098         self.assertEqual(record.sites[center+3], ('c', '8', '5366'))
00099         self.assertEqual(record.sites[center+4], ('c', '8', '5378'))
00100         self.assertEqual(record.sites[-10], ('c', '8', '10756'))
00101         self.assertEqual(record.sites[-9], ('c', '8', '10764'))
00102         self.assertEqual(record.sites[-8], ('a', '8', '10769'))
00103         self.assertEqual(record.sites[-7], ('a', '8', '10788'))
00104         self.assertEqual(record.sites[-6], ('a', '8', '10803'))
00105         self.assertEqual(record.sites[-5], ('g', '10', '10816'))
00106         self.assertEqual(record.sites[-4], ('c', '11', '10826'))
00107         self.assertEqual(record.sites[-3], ('g', '11', '10840'))
00108         self.assertEqual(record.sites[-2], ('t', '11', '10855'))
00109         self.assertEqual(record.sites[-1], ('g', '11', '10864'))
00110         self.assertEqual(record.seq.tostring()[:10], 'ctccgtcgga')
00111         self.assertEqual(record.seq.tostring()[-10:], 'ccaaagcgtg')
00112         self.assertEqual(record.seq_trimmed.tostring()[:10], 'cgtcggaaca')
00113         self.assertEqual(record.seq_trimmed.tostring()[-10:], 'tatttcggag')
00114         # Record 2
00115         record = records.next()
00116         center = len(record.sites)//2
00117         self.assertEqual(record.file_name, "425_103_(81-A03-19).g.ab1")
00118         self.assertEqual(record.comments['abi_thumbprint'], 0)
00119         self.assertEqual(record.comments['call_method'], 'phred')
00120         self.assertEqual(record.comments['chem'], 'term')
00121         self.assertEqual(record.comments['chromat_file'], '425_103_(81-A03-19).g.ab1')
00122         self.assertEqual(record.comments['dye'], 'big')
00123         self.assertEqual(record.comments['phred_version'], '0.020425.c')
00124         self.assertEqual(record.comments['quality_levels'], 99)
00125         self.assertEqual(record.comments['time'], 'Tue Feb 17 10:31:15 2004')
00126         self.assertEqual(record.comments['trace_array_max_index'], 10606)
00127         self.assertEqual(record.comments['trace_array_min_index'], 0)
00128         self.assertAlmostEqual(record.comments['trace_peak_area_ratio'], 0.0226)
00129         self.assertEqual(record.comments['trim'][0], 10)
00130         self.assertEqual(record.comments['trim'][1], 432)
00131         self.assertAlmostEqual(record.comments['trim'][2], 0.05)
00132         self.assertEqual(record.sites[0], ('c', '14', '3'))
00133         self.assertEqual(record.sites[1], ('g', '17', '11'))
00134         self.assertEqual(record.sites[2], ('g', '22', '23'))
00135         self.assertEqual(record.sites[3], ('g', '10', '35'))
00136         self.assertEqual(record.sites[4], ('a', '10', '53'))
00137         self.assertEqual(record.sites[5], ('t', '10', '68'))
00138         self.assertEqual(record.sites[6], ('c', '15', '75'))
00139         self.assertEqual(record.sites[7], ('c', '8', '85'))
00140         self.assertEqual(record.sites[8], ('c', '8', '94'))
00141         self.assertEqual(record.sites[9], ('a', '9', '115'))
00142         self.assertEqual(record.sites[center-5], ('c', '33', '5140'))
00143         self.assertEqual(record.sites[center-4], ('c', '28', '5156'))
00144         self.assertEqual(record.sites[center-3], ('g', '25', '5167'))
00145         self.assertEqual(record.sites[center-2], ('c', '28', '5178'))
00146         self.assertEqual(record.sites[center-1], ('c', '18', '5193'))
00147         self.assertEqual(record.sites[center], ('a', '16', '5204'))
00148         self.assertEqual(record.sites[center+1], ('a', '15', '5213'))
00149         self.assertEqual(record.sites[center+2], ('a', '10', '5230'))
00150         self.assertEqual(record.sites[center+3], ('a', '10', '5242'))
00151         self.assertEqual(record.sites[center+4], ('t', '8', '5249'))
00152         self.assertEqual(record.sites[-10], ('c', '8', '10489'))
00153         self.assertEqual(record.sites[-9], ('c', '8', '10503'))
00154         self.assertEqual(record.sites[-8], ('c', '8', '10514'))
00155         self.assertEqual(record.sites[-7], ('a', '8', '10516'))
00156         self.assertEqual(record.sites[-6], ('g', '8', '10530'))
00157         self.assertEqual(record.sites[-5], ('c', '8', '10550'))
00158         self.assertEqual(record.sites[-4], ('c', '10', '10566'))
00159         self.assertEqual(record.sites[-3], ('a', '8', '10574'))
00160         self.assertEqual(record.sites[-2], ('a', '7', '10584'))
00161         self.assertEqual(record.sites[-1], ('g', '7', '10599'))
00162         self.assertEqual(record.seq.tostring()[:10], 'cgggatccca')
00163         self.assertEqual(record.seq.tostring()[-10:], 'cccagccaag')
00164         self.assertEqual(record.seq_trimmed.tostring()[:10], 'cctgatccga')
00165         self.assertEqual(record.seq_trimmed.tostring()[-10:], 'ggggccgcca')
00166         # Record 3
00167         record = records.next()
00168         center = len(record.sites)//2
00169         self.assertEqual(record.file_name, '425_7_(71-A03-19).b.ab1')
00170         self.assertEqual(record.comments['abi_thumbprint'], 0)
00171         self.assertEqual(record.comments['call_method'], 'phred')
00172         self.assertEqual(record.comments['chem'], 'term')
00173         self.assertEqual(record.comments['chromat_file'], '425_7_(71-A03-19).b.ab1')
00174         self.assertEqual(record.comments['dye'], 'big')
00175         self.assertEqual(record.comments['phred_version'], '0.020425.c')
00176         self.assertEqual(record.comments['quality_levels'], 99)
00177         self.assertEqual(record.comments['time'], 'Thu Jan 29 11:46:14 2004')
00178         self.assertEqual(record.comments['trace_array_max_index'], 9513)
00179         self.assertEqual(record.comments['trace_array_min_index'], 0)
00180         self.assertAlmostEqual(record.comments['trace_peak_area_ratio'], 100.0)
00181         self.assertEqual(record.comments['trim'][0], -1)
00182         self.assertEqual(record.comments['trim'][1], -1)
00183         self.assertEqual(record.comments['trim'][2], 0.05)
00184         self.assertEqual(record.sites[0], ('a', '10', '7'))
00185         self.assertEqual(record.sites[1], ('c', '10', '13'))
00186         self.assertEqual(record.sites[2], ('a', '10', '21'))
00187         self.assertEqual(record.sites[3], ('t', '10', '28'))
00188         self.assertEqual(record.sites[4], ('a', '8', '33'))
00189         self.assertEqual(record.sites[5], ('a', '8', '40'))
00190         self.assertEqual(record.sites[6], ('a', '6', '50'))
00191         self.assertEqual(record.sites[7], ('t', '6', '53'))
00192         self.assertEqual(record.sites[8], ('c', '6', '66'))
00193         self.assertEqual(record.sites[9], ('a', '6', '68'))
00194         self.assertEqual(record.sites[center-5], ('a', '6', '4728'))
00195         self.assertEqual(record.sites[center-4], ('t', '10', '4737'))
00196         self.assertEqual(record.sites[center-3], ('a', '10', '4746'))
00197         self.assertEqual(record.sites[center-2], ('a', '8', '4756'))
00198         self.assertEqual(record.sites[center-1], ('t', '8', '4759'))
00199         self.assertEqual(record.sites[center], ('t', '8', '4768'))
00200         self.assertEqual(record.sites[center+1], ('a', '8', '4775'))
00201         self.assertEqual(record.sites[center+2], ('g', '10', '4783'))
00202         self.assertEqual(record.sites[center+3], ('t', '8', '4788'))
00203         self.assertEqual(record.sites[center+4], ('g', '8', '4794'))
00204         self.assertEqual(record.sites[-10], ('a', '8', '9445'))
00205         self.assertEqual(record.sites[-9], ('t', '6', '9453'))
00206         self.assertEqual(record.sites[-8], ('c', '6', '9462'))
00207         self.assertEqual(record.sites[-7], ('t', '6', '9465'))
00208         self.assertEqual(record.sites[-6], ('g', '6', '9478'))
00209         self.assertEqual(record.sites[-5], ('c', '6', '9483'))
00210         self.assertEqual(record.sites[-4], ('t', '6', '9485'))
00211         self.assertEqual(record.sites[-3], ('t', '8', '9495'))
00212         self.assertEqual(record.sites[-2], ('t', '3', '9504'))
00213         self.assertEqual(record.sites[-1], ('n', '0', '9511'))
00214         self.assertEqual(record.seq.tostring()[:10], 'acataaatca')
00215         self.assertEqual(record.seq.tostring()[-10:], 'atctgctttn')
00216         # Make sure that no further records are found
00217         self.assertRaises(StopIteration, records.next)
00218 
00219 class PhdTestTwo(unittest.TestCase):
00220     def setUp(self):
00221         self.handle = open("Phd/phd2")
00222 
00223     def tearDown(self):
00224         self.handle.close()
00225 
00226     def test_check_SeqIO(self):
00227         """Test phd2 using parser via SeqIO."""
00228         records = SeqIO.parse(self.handle, "phd")
00229         #Contig 1
00230         record = records.next()
00231         self.assertEqual(record.id, "ML4924R")
00232         self.assertEqual(record.name, "ML4924R")
00233         self.assertEqual(record.description, "ML4924R")
00234         self.assertTrue(record.seq.startswith("actttggtcgcctgcaggtaccggtccgnga"))
00235         self.assertTrue(record.seq.endswith("agaagctcgttctcaacatctccgttggtgaga"))
00236         self.assertEqual(record.letter_annotations["phred_quality"][:10],
00237                          [6, 6, 6, 8, 8, 12, 18, 16, 14, 11])
00238         self.assertEqual(record[:10].format("fasta"),
00239                          ">ML4924R\nactttggtcg\n")
00240         self.assertEqual(record[:10].format("qual"),
00241                          ">ML4924R\n6 6 6 8 8 12 18 16 14 11\n")
00242         self.assertEqual(record[:10].format("fastq"),
00243                          "@ML4924R\nactttggtcg\n+\n'''))-31/,\n")
00244         self.assertEqual(record[:10].format("fastq-illumina"),
00245                          "@ML4924R\nactttggtcg\n+\nFFFHHLRPNK\n")
00246         # Make sure that no further records are found
00247         self.assertRaises(StopIteration, records.next)
00248         
00249 class PhdTest454(unittest.TestCase):
00250     def setUp(self):
00251         self.handle = open("Phd/phd_454")
00252 
00253     def tearDown(self):
00254         self.handle.close()
00255 
00256     def test_check_SeqIO(self):
00257         """Test phd_454 using parser via SeqIO."""
00258         records = SeqIO.parse(self.handle, "phd")
00259         #Contig 1
00260         record = records.next()
00261         self.assertEqual(record.id, "EBE03TV04IHLTF.77-243")
00262         self.assertEqual(record.name, "EBE03TV04IHLTF.77-243")
00263         self.assertEqual(record.description, "EBE03TV04IHLTF.77-243 1")
00264         self.assertEqual(str(record.seq), "ggggatgaaagggatctcggtggtaggtga")
00265         self.assertEqual(record.letter_annotations["phred_quality"][:10],
00266                          [37, 37, 37, 37, 37, 37, 37, 37, 37, 37])
00267         self.assertEqual(record.format("fasta"),
00268                          ">EBE03TV04IHLTF.77-243 1\n"
00269                          "ggggatgaaagggatctcggtggtaggtga\n")
00270         self.assertEqual(record.format("qual"),
00271                          ">EBE03TV04IHLTF.77-243 1\n"
00272                          "37 37 37 37 37 37 37 37 37 37 "
00273                          "37 37 37 26 26 26 30 33 33 33\n"
00274                          "33 33 36 36 33 33 33 36 26 22\n")
00275         self.assertEqual(record.format("fastq"),
00276                          "@EBE03TV04IHLTF.77-243 1\n"
00277                          "ggggatgaaagggatctcggtggtaggtga\n"
00278                          "+\n"
00279                          "FFFFFFFFFFFFF;;;?BBBBBEEBBBE;7\n")
00280         self.assertEqual(record[:10].format("fastq-illumina"),
00281                          "@EBE03TV04IHLTF.77-243 1\n"
00282                          "ggggatgaaa\n"
00283                          "+\n"
00284                          "eeeeeeeeee\n")
00285         # Make sure that no further records are found
00286         self.assertRaises(StopIteration, records.next)
00287 
00288 class PhdTestSolexa(unittest.TestCase):
00289     def setUp(self):
00290         self.handle = open("Phd/phd_solexa")
00291 
00292     def tearDown(self):
00293         self.handle.close()
00294 
00295     def test_check_SeqIO(self):
00296         """Test phd2 using parser via SeqIO."""
00297         records = SeqIO.parse(self.handle, "phd")
00298         #Contig 1
00299         record = records.next()
00300         self.assertEqual(record.id, "HWI-EAS94_4_1_1_537_446")
00301         self.assertEqual(record.name, "HWI-EAS94_4_1_1_537_446")
00302         self.assertEqual(record.description, "HWI-EAS94_4_1_1_537_446 1")
00303         self.assertEqual(str(record.seq),
00304                          "gccaatcaggtttctctgcaagcccctttagcagctgagc")
00305         self.assertEqual(record.letter_annotations["phred_quality"],
00306                          [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
00307                           30, 30, 30, 30, 30, 30, 30, 30, 30, 28, 23,
00308                           30, 30, 30, 30, 30, 30, 28, 22, 8, 22, 7, 15,
00309                           15, 15, 10, 10, 11, 15])
00310         self.assertEqual(record.format("fasta"),
00311                          ">HWI-EAS94_4_1_1_537_446 1\n"
00312                          "gccaatcaggtttctctgcaagcccctttagcagctgagc\n")
00313         self.assertEqual(record.format("qual"),
00314                          ">HWI-EAS94_4_1_1_537_446 1\n"
00315                          "30 30 30 30 30 30 30 30 30 30 "
00316                          "30 30 30 30 30 30 30 30 30 30\n"
00317                          "28 23 30 30 30 30 30 30 28 22 "
00318                          "8 22 7 15 15 15 10 10 11 15\n")
00319         self.assertEqual(record.format("fastq"),
00320                          "@HWI-EAS94_4_1_1_537_446 1\n"
00321                          "gccaatcaggtttctctgcaagcccctttagcagctgagc\n"
00322                          "+\n"
00323                          "????????????????????=8??????=7)7(000++,0\n")
00324         self.assertEqual(record.format("fastq-illumina"),
00325                          "@HWI-EAS94_4_1_1_537_446 1\n"
00326                          "gccaatcaggtttctctgcaagcccctttagcagctgagc\n"
00327                          "+\n"
00328                          "^^^^^^^^^^^^^^^^^^^^\\W^^^^^^\\VHVGOOOJJKO\n")
00329         #Contig 2
00330         record = records.next()
00331         self.assertEqual(record.id, "HWI-EAS94_4_1_1_602_99")
00332         self.assertEqual(record.name, "HWI-EAS94_4_1_1_602_99")
00333         self.assertEqual(record.description, "HWI-EAS94_4_1_1_602_99 1")
00334         self.assertEqual(str(record.seq),
00335                          "gccatggcacatatatgaaggtcagaggacaacttgctgt")
00336         self.assertEqual(record.letter_annotations["phred_quality"],
00337                          [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
00338                           30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
00339                           30, 30, 16, 30, 28, 22, 22, 22, 14, 15, 15, 5,
00340                           10, 15, 10, 5])
00341         self.assertEqual(record.format("fasta"),
00342                          ">HWI-EAS94_4_1_1_602_99 1\n"
00343                          "gccatggcacatatatgaaggtcagaggacaacttgctgt\n")
00344         self.assertEqual(record.format("qual"),
00345                          ">HWI-EAS94_4_1_1_602_99 1\n"
00346                          "30 30 30 30 30 30 30 30 30 30 "
00347                          "30 30 30 30 30 30 30 30 30 30\n"
00348                          "30 30 30 30 30 30 16 30 28 22 "
00349                          "22 22 14 15 15 5 10 15 10 5\n")
00350         self.assertEqual(record.format("fastq"),
00351                          "@HWI-EAS94_4_1_1_602_99 1\n"
00352                          "gccatggcacatatatgaaggtcagaggacaacttgctgt\n"
00353                          "+\n"
00354                          "??????????????????????????1?=777/00&+0+&\n")
00355         self.assertEqual(record.format("fastq-illumina"),
00356                          "@HWI-EAS94_4_1_1_602_99 1\n"
00357                          "gccatggcacatatatgaaggtcagaggacaacttgctgt\n"
00358                          "+\n"
00359                          "^^^^^^^^^^^^^^^^^^^^^^^^^^P^\\VVVNOOEJOJE\n")
00360         # Make sure that no further records are found
00361         self.assertRaises(StopIteration, records.next)        
00362 
00363 if __name__ == "__main__":
00364     runner = unittest.TextTestRunner(verbosity = 2)
00365     unittest.main(testRunner=runner)
00366