Back to index

python-biopython  1.60
test_SeqIO_write.py
Go to the documentation of this file.
00001 # Copyright 2007-2010 by Peter Cock.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 import os
00007 import unittest
00008 
00009 from Bio import SeqIO
00010 from Bio import AlignIO
00011 from Bio.SeqRecord import SeqRecord
00012 from Bio.Seq import Seq, UnknownSeq
00013 from StringIO import StringIO
00014 from Bio import Alphabet
00015 from Bio.Align import MultipleSeqAlignment
00016 
00017 try:
00018     #This is in Python 2.6+, but we need it on Python 3
00019     from io import BytesIO
00020 except ImportError:
00021     BytesIO = StringIO
00022 
00023 
00024 #List of formats including alignment only file formats we can read AND write.
00025 #We don't care about the order
00026 test_write_read_alignment_formats = sorted(SeqIO._FormatToWriter.keys())
00027 for format in sorted(AlignIO._FormatToWriter):
00028     if format not in test_write_read_alignment_formats:
00029         test_write_read_alignment_formats.append(format)
00030 test_write_read_alignment_formats.remove("gb") #an alias for genbank
00031 test_write_read_alignment_formats.remove("fastq-sanger") #an alias for fastq
00032 
00033 
00034 # This is a list of three-tuples.  Each tuple contains a
00035 # list of SeqRecord objects, a description (string), and
00036 # a list of tuples for expected failures (each with a
00037 # list of formats, exception type, exception message).
00038 test_records = [
00039     ([], "zero records", {}),
00040     ([SeqRecord(Seq("CHSMAIKLSSEHNIPSGIANAL",Alphabet.generic_protein), id="Alpha"),
00041       SeqRecord(Seq("HNGFTALEGEIHHLTHGEKVAF",Alphabet.generic_protein), id="Gamma"),
00042       SeqRecord(Seq("DITHGVG",Alphabet.generic_protein), id="delta")],
00043      "three peptides of different lengths", []),
00044     ([SeqRecord(Seq("CHSMAIKLSSEHNIPSGIANAL",Alphabet.generic_protein), id="Alpha"),
00045       SeqRecord(Seq("VHGMAHPLGAFYNTPHGVANAI",Alphabet.generic_protein), id="Beta"),
00046       SeqRecord(Seq("HNGFTALEGEIHHLTHGEKVAF",Alphabet.generic_protein), id="Gamma")],
00047      "three proteins alignment", []),
00048     ([SeqRecord(Seq("AATAAACCTTGCTGGCCATTGTGATCCATCCA",Alphabet.generic_dna), id="X"),
00049       SeqRecord(Seq("ACTCAACCTTGCTGGTCATTGTGACCCCAGCA",Alphabet.generic_dna), id="Y"),
00050       SeqRecord(Seq("TTTCCTCGGAGGCCAATCTGGATCAAGACCAT",Alphabet.generic_dna), id="Z")],
00051      "three DNA sequence alignment", []),
00052     ([SeqRecord(Seq("AATAAACCTTGCTGGCCATTGTGATCCATCCA",Alphabet.generic_dna), id="X",
00053                 name="The\nMystery\rSequece:\r\nX"),
00054       SeqRecord(Seq("ACTCAACCTTGCTGGTCATTGTGACCCCAGCA",Alphabet.generic_dna), id="Y",
00055                 description="an%sevil\rdescription right\nhere" % os.linesep),
00056       SeqRecord(Seq("TTTCCTCGGAGGCCAATCTGGATCAAGACCAT",Alphabet.generic_dna), id="Z")],
00057      "3 DNA seq alignment with CR/LF in name/descr",
00058       [(["genbank"], ValueError, r"Locus identifier 'The\nMystery\rSequece:\r\nX' is too long")]),
00059     ([SeqRecord(Seq("CHSMAIKLSSEHNIPSGIANAL",Alphabet.generic_protein), id="Alpha"),
00060       SeqRecord(Seq("VHGMAHPLGAFYNTPHGVANAI",Alphabet.generic_protein), id="Beta"),
00061       SeqRecord(Seq("VHGMAHPLGAFYNTPHGVANAI",Alphabet.generic_protein), id="Beta"),
00062       SeqRecord(Seq("HNGFTALEGEIHHLTHGEKVAF",Alphabet.generic_protein), id="Gamma")],
00063      "alignment with repeated record",
00064      [(["stockholm"],ValueError,"Duplicate record identifier: Beta"),
00065       (["phylip","phylip-relaxed","phylip-sequential"],ValueError,"Repeated name 'Beta' (originally 'Beta'), possibly due to truncation")]),
00066     ]
00067 # Meddle with the annotation too:
00068 assert test_records[4][1] == "3 DNA seq alignment with CR/LF in name/descr"
00069 # Add a list of strings,
00070 test_records[4][0][2].annotations["note"] = ["Note%salso" % os.linesep \
00071                                     + "\r\nhas\n evil line\rbreaks!", "Wow"]
00072 # Add a simple string
00073 test_records[4][0][2].annotations["comment"] = "More%sof" % os.linesep \
00074                                           + "\r\nthese\n evil line\rbreaks!"
00075 # Add a float too:
00076 test_records[4][0][2].annotations["weight"] = 2.5
00077 
00078 
00079 class WriterTests(unittest.TestCase):
00080     """Cunning unit test where methods are added at run time."""
00081     def check(self, records, format):
00082         """General test function with with a little format specific information.
00083 
00084         This has some general expected exceptions hard coded!
00085         """
00086         #TODO - Check the exception messages?
00087         lengths = len(set(len(r) for r in records))
00088         if not records and format in ["stockholm", "phylip", "phylip-relaxed",
00089                                       "phylip-sequential", "nexus", "clustal",
00090                                       "sff"]:
00091             self.check_write_fails(records, format, ValueError,
00092                                    "Must have at least one sequence")
00093         elif lengths > 1 and format in AlignIO._FormatToWriter:
00094             self.check_write_fails(records, format, ValueError,
00095                                    "Sequences must all be the same length")
00096         elif records and format in ["fastq", "fastq-sanger", "fastq-solexa",
00097                                     "fastq-illumina", "qual", "phd"]:
00098             self.check_write_fails(records, format, ValueError,
00099                                    "No suitable quality scores found in "
00100                                    "letter_annotations of SeqRecord "
00101                                    "(id=%s)." % records[0].id)
00102         elif records and format == "sff":
00103             self.check_write_fails(records, format, ValueError,
00104                                    "Missing SFF flow information")
00105         else:
00106             self.check_simple(records, format)
00107 
00108     def check_simple(self, records, format):
00109         if format in SeqIO._BinaryFormats:
00110             handle = BytesIO()
00111         else:
00112             handle = StringIO()
00113         count = SeqIO.write(records, handle, format)
00114         self.assertEqual(count, len(records))
00115         #Now read them back...
00116         handle.seek(0)
00117         new_records = list(SeqIO.parse(handle, format))
00118         self.assertEqual(len(new_records), len(records))
00119         for record, new_record in zip(records, new_records):
00120             #Using compare_record(record, new_record) is too strict
00121             if format == "nexus":
00122                 #The nexus parser will dis-ambiguate repeated record ids.
00123                 self.assertTrue(record.id == new_record.id or \
00124                                 new_record.id.startswith(record.id+".copy"))
00125             else:
00126                 self.assertEqual(record.id, new_record.id)
00127             self.assertEqual(record.seq.tostring(), new_record.seq.tostring())
00128         handle.close()
00129 
00130     def check_write_fails(self, records, format, err_type, err_msg=""):
00131         if format in SeqIO._BinaryFormats:
00132             handle = BytesIO()
00133         else:
00134             handle = StringIO()
00135         if err_msg:
00136             try:
00137                 SeqIO.write(records, handle, format)
00138             except err_type, err:
00139                 self.assertEqual(str(err), err_msg)
00140         else:
00141             self.assertRaises(err_type, SeqIO.write, records, handle, format)
00142         handle.close()
00143 
00144 for (records, descr, errs) in test_records:
00145     for format in test_write_read_alignment_formats:
00146         #Assume no errors expected...
00147         def funct(records, format, descr):
00148             f = lambda x : x.check(records, format)
00149             f.__doc__ = "%s for %s" % (format, descr)
00150             return f
00151         setattr(WriterTests,
00152                 "test_%s_%s" % (format, descr.replace(" ","_")),
00153                 funct(records, format, descr))
00154         #Replace the method with an error specific one?
00155         for err_formats, err_type, err_msg in errs:
00156             if format in err_formats:
00157                 def funct_e(records, format, descr, err_type, err_msg):
00158                     f = lambda x : x.check_write_fails(records, format,
00159                                                        err_type, err_msg)
00160                     f.__doc__ = "%s for %s" % (format, descr)
00161                     return f
00162                 setattr(WriterTests,
00163                         "test_%s_%s" % (format, descr.replace(" ","_")),
00164                         funct_e(records, format, descr, err_type, err_msg))
00165                 break
00166         del funct
00167 
00168 if __name__ == "__main__":
00169     runner = unittest.TextTestRunner(verbosity = 2)
00170     unittest.main(testRunner=runner)