Back to index

python-biopython  1.60
test_SeqIO_SeqXML.py
Go to the documentation of this file.
00001 # Copyright 2010 by Thomas Schmitt.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 import unittest
00006 import sys
00007 
00008 from Bio import SeqIO
00009 from Bio.Seq import Seq
00010 from Bio.SeqRecord import SeqRecord
00011 from StringIO import StringIO
00012 
00013 test_files = {
00014     "dna" : ["SeqXML/dna_example.xml",4],
00015     "rna" : ["SeqXML/rna_example.xml",5],
00016     "protein" : ["SeqXML/protein_example.xml",5],
00017     "globalSpecies" : ["SeqXML/global_species_example.xml",2],
00018 }
00019 
00020 corrupt_files = ["SeqXML/corrupt_example1.xml",
00021                  "SeqXML/corrupt_example2.xml",
00022                 ]
00023 
00024 
00025 def assert_equal_records(testCase,record_a,record_b):
00026     testCase.assertEqual(record_a.id,record_b.id)
00027     testCase.assertEqual(record_a.name,record_b.name)
00028     testCase.assertEqual(record_a.description,record_b.description)
00029     testCase.assertEqual(record_a.seq.tostring(),record_b.seq.tostring())
00030     testCase.assertEqual(record_a.dbxrefs,record_b.dbxrefs)
00031     testCase.assertEqual(record_a.annotations,record_a.annotations)
00032     
00033 
00034 class TestSimpleRead(unittest.TestCase):
00035         
00036     def test_check_SeqIO(self):
00037         """Files readable using parser via SeqIO."""
00038         for key in test_files:
00039             records = list(SeqIO.parse(test_files[key][0],"seqxml"))
00040             self.assertEqual(len(records),test_files[key][1])
00041             
00042 class TestDetailedRead(unittest.TestCase):
00043     
00044     records = {}
00045     
00046     def setUp(self):
00047         for key in test_files:
00048             self.records[key] = list(SeqIO.parse(test_files[key][0],"seqxml"))
00049 
00050     def test_special_characters_desc(self):
00051         """Read special XML characters in description."""
00052         self.assertEqual(self.records["dna"][2].description, u'some special characters in the description\n<tag> "quoted string"')
00053 
00054     #TODO - Fix this failure under Windows with Python 3.1 and 3.2   
00055     if not (sys.platform=="win32" and sys.version_info[0] >= 3):
00056         def test_unicode_characters_desc(self):
00057             """Test special unicode characters in the description."""
00058             self.assertEqual(self.records["rna"][2].description, u"\u00E5\u00C5\u00FC\u00F6\u00D6\u00DF\u00F8\u00E4\u00A2\u00A3$\u20AC\u9999\u80A0")
00059         
00060     def test_full_characters_set_read(self):
00061         """Read full characters set for each type"""
00062         self.assertEqual(self.records["dna"][1].seq.tostring(),"ACGTMRWSYKVHDBXN.-" )
00063         self.assertEqual(self.records["rna"][1].seq.tostring(),"ACGUMRWSYKVHDBXN.-" )
00064         self.assertEqual(self.records["protein"][1].seq.tostring(),"ABCDEFGHIJKLMNOPQRSTUVWXYZ.-*")
00065     
00066     def test_duplicated_property(self):
00067         """Read property with multiple values"""
00068         self.assertEqual(self.records["protein"][2].annotations["test"],[u"1",u"2",u"3"])
00069         
00070     def test_duplicated_dbxref(self):
00071         """Read multiple cross references to a single source"""
00072         self.assertEqual(self.records["protein"][2].dbxrefs,[u"someDB:G001",u"someDB:G002"])
00073     
00074     def test_read_minimal_required(self):
00075         """Check minimal record."""
00076         minimalRecord = SeqRecord(id="test",seq=Seq("abc"))
00077         minimalRecord.annotations["source"] = u"Ensembl"
00078         
00079         self.assertEqual(self.records["rna"][3].name,minimalRecord.name)
00080         self.assertEqual(self.records["dna"][3].annotations,minimalRecord.annotations)
00081         self.assertEqual(self.records["rna"][3].dbxrefs,minimalRecord.dbxrefs)
00082         self.assertEqual(self.records["protein"][3].description,minimalRecord.description)
00083         
00084     def test_local_species(self):
00085         """Check local species."""
00086         self.assertEqual(self.records["rna"][1].annotations["organism"],"Mus musculus")
00087         self.assertEqual(self.records["rna"][1].annotations["ncbi_taxid"],"10090")
00088         
00089         self.assertEqual(self.records["rna"][0].annotations["organism"],"Gallus gallus")
00090         self.assertEqual(self.records["rna"][0].annotations["ncbi_taxid"],"9031")
00091         
00092     def test_global_species(self):
00093         """Check global species."""
00094         self.assertEqual(self.records["globalSpecies"][0].annotations["organism"],"Mus musculus")
00095         self.assertEqual(self.records["globalSpecies"][0].annotations["ncbi_taxid"],"10090")
00096         
00097         self.assertEqual(self.records["globalSpecies"][1].annotations["organism"],"Homo sapiens")
00098         self.assertEqual(self.records["globalSpecies"][1].annotations["ncbi_taxid"],"9606")
00099 
00100     def test_local_source_definition(self):
00101         """Check local source."""
00102         self.assertEqual(self.records["protein"][4].annotations["source"],u"Uniprot")
00103 
00104     def test_empty_description(self):
00105         """Check empty description."""
00106         self.assertEqual(self.records["rna"][4].description,SeqRecord(id="",seq=Seq("")).description)
00107 
00108 
00109 class TestReadAndWrite(unittest.TestCase):
00110     
00111     def test_read_write_rna(self):
00112         """Read and write RNA."""
00113         read1_records = list(SeqIO.parse(test_files["rna"][0],"seqxml"))
00114         self._write_parse_and_compare(read1_records)
00115     
00116     def test_read_write_dna(self):
00117         """Read and write DNA."""
00118         read1_records = list(SeqIO.parse(test_files["dna"][0],"seqxml"))
00119         self._write_parse_and_compare(read1_records)
00120     
00121     def test_read_write_protein(self):
00122         """Read and write protein."""
00123         read1_records = list(SeqIO.parse(test_files["protein"][0],"seqxml"))
00124         self._write_parse_and_compare(read1_records)
00125         
00126     def test_read_write_globalSpecies(self):
00127         """Read and write global species."""
00128         read1_records = list(SeqIO.parse(test_files["globalSpecies"][0],"seqxml"))
00129         self._write_parse_and_compare(read1_records)
00130         
00131     
00132     def _write_parse_and_compare(self,read1_records):
00133         
00134         handle = StringIO()
00135         
00136         SeqIO.write(read1_records,handle,"seqxml")
00137         
00138         handle.seek(0)
00139         read2_records = list(SeqIO.parse(handle,"seqxml"))
00140         
00141         self.assertEquals(len(read1_records),len(read2_records))
00142         
00143         for record1,record2 in zip(read1_records,read2_records):
00144             assert_equal_records(self,record1,record2)
00145         
00146     
00147 class TestReadCorruptFiles(unittest.TestCase):
00148     
00149     def test_for_errors(self):
00150         """Handling of corrupt files."""        
00151         for filename in corrupt_files:
00152             iterator = SeqIO.parse(filename,"seqxml")
00153             self.assertRaises(ValueError,iterator.next);
00154 
00155         
00156 if __name__ == "__main__":
00157     runner = unittest.TextTestRunner(verbosity = 2)
00158     unittest.main(testRunner=runner)