Back to index

python-biopython  1.60
test_seq.py
Go to the documentation of this file.
00001 # This code is part of the Biopython distribution and governed by its
00002 # license.  Please see the LICENSE file that should have been included
00003 # as part of this package.
00004 
00005 import sys
00006 from Bio import Seq
00007 from Bio.Alphabet import IUPAC
00008 from Bio import Alphabet
00009 from Bio.Data.IUPACData import ambiguous_dna_complement, ambiguous_rna_complement
00010 from Bio.Data.IUPACData import ambiguous_dna_values, ambiguous_rna_values
00011 from Bio.Data.CodonTable import TranslationError
00012 
00013 
00014 if sys.version_info[0] == 3:
00015    array_indicator = "u"
00016 else:
00017    array_indicator = "c"
00018 
00019 print
00020 print "Testing Seq"
00021 print "==========="
00022 
00023 s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna)
00024 
00025 print s.tostring()
00026 print len(s)
00027 print s[0]
00028 print s[-1]
00029 print s[3:5].tostring()
00030 
00031 print "Reverse using -1 stride:", repr(s[::-1])
00032 
00033 print "Extract every third nucleotide (slicing with stride 3):"
00034 print repr(s[0::3])
00035 print repr(s[1::3])
00036 print repr(s[2::3])
00037 
00038 print s.alphabet.letters
00039 
00040 t = Seq.Seq("T", IUPAC.unambiguous_dna)
00041 u = s + t
00042 print str(u.alphabet)
00043 print len(u)
00044 assert s.tostring() + "T" == u.tostring()
00045 
00046 t = Seq.Seq("T", IUPAC.protein)
00047 try:
00048     u = s + t
00049 except TypeError:
00050     print "expected error, and got it"
00051 else:
00052     print "huh?  ERROR"
00053 
00054 t = Seq.Seq("T", IUPAC.ambiguous_dna)
00055 u = s + t
00056 print str(u.alphabet)
00057 
00058 from Bio.Seq import MutableSeq
00059 import array
00060 
00061 print
00062 print "Testing MutableSeq"
00063 print "=================="
00064 
00065 print "Testing creating MutableSeqs in multiple ways"
00066 string_seq = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna)
00067 array_seq = MutableSeq(array.array(array_indicator, "TCAAAAGGATGCATCATG"),
00068                        IUPAC.ambiguous_dna)
00069 converted_seq = s.tomutable()
00070 
00071 for test_seq in [string_seq]:
00072     print repr(test_seq)
00073     print test_seq.tostring()
00074     print len(test_seq)
00075     print repr(test_seq.toseq())
00076 
00077     print test_seq[0]
00078     print repr(test_seq[1:5])
00079     
00080     test_seq[1:3] = "GAT"
00081     print "Set slice with string:", repr(test_seq)
00082     test_seq[1:3] = test_seq[5:7]
00083     print "Set slice with MutableSeq:", repr(test_seq)
00084     test_seq[1:3] = array.array(array_indicator, "GAT")
00085     print "Set slice with array:", repr(test_seq)
00086 
00087     test_seq[3] = "G"
00088     print "Set item:", repr(test_seq)
00089 
00090     del test_seq[4:5]
00091     print "Delete slice:", repr(test_seq)
00092     del test_seq[3]
00093     print "Delete item:", repr(test_seq)
00094 
00095     test_seq.append("C")
00096     print "Append:", repr(test_seq)
00097     test_seq.insert(4, "G")
00098     print "Insert:", repr(test_seq)
00099 
00100     print "Pop off the last item:", test_seq.pop()
00101 
00102     test_seq.remove("G")
00103     print "Removed Gs:", repr(test_seq)
00104 
00105     try:
00106         test_seq.remove("Z")
00107         raise AssertionError("Did not get expected value error.")
00108     except ValueError:
00109         print "Expected value error and got it"
00110 
00111     print "A count:", test_seq.count("A")
00112     print "A index:", test_seq.index("A")
00113 
00114     test_seq.reverse()
00115     print "Reversed Seq:", repr(test_seq)
00116 
00117     print "Reverse using -1 stride:", repr(test_seq[::-1])
00118     
00119 
00120     test_seq.extend("GAT")
00121     test_seq.extend(MutableSeq("TTT", IUPAC.ambiguous_dna))
00122     print "Extended Seq:", repr(test_seq)
00123 
00124     del test_seq[4:6:-1]
00125     print "Delete stride slice:", repr(test_seq)
00126 
00127     print "Extract every third nucleotide (slicing with stride 3):"
00128     print repr(test_seq[0::3])
00129     print repr(test_seq[1::3])
00130     print repr(test_seq[2::3])
00131     
00132     print "Setting wobble codon to N (set slice with stride 3):"
00133     test_seq[2::3] = "N" * len(test_seq[2::3])
00134     print repr(test_seq)
00135 
00136 ###########################################################################
00137 print
00138 print "Testing Seq addition"
00139 print "===================="
00140 dna = [Seq.Seq("ATCG", IUPAC.ambiguous_dna),
00141        Seq.Seq("gtca", Alphabet.generic_dna),
00142        Seq.MutableSeq("GGTCA", Alphabet.generic_dna),
00143        Seq.Seq("CTG-CA", Alphabet.Gapped(IUPAC.unambiguous_dna, "-")),
00144        "TGGTCA"]
00145 rna = [Seq.Seq("AUUUCG", IUPAC.ambiguous_rna),
00146        Seq.MutableSeq("AUUCG", IUPAC.ambiguous_rna),
00147        Seq.Seq("uCAg", Alphabet.generic_rna),
00148        Seq.MutableSeq("UC-AG", Alphabet.Gapped(Alphabet.generic_rna, "-")),
00149        Seq.Seq("U.CAG", Alphabet.Gapped(Alphabet.generic_rna, ".")),
00150        "UGCAU"]
00151 nuc = [Seq.Seq("ATCG", Alphabet.generic_nucleotide),"UUUTTTACG"]
00152 protein = [Seq.Seq("ATCGPK", IUPAC.protein),
00153            Seq.Seq("atcGPK", Alphabet.generic_protein),
00154            Seq.Seq("T.CGPK", Alphabet.Gapped(IUPAC.protein, ".")),
00155            Seq.Seq("T-CGPK", Alphabet.Gapped(IUPAC.protein, "-")),
00156            Seq.Seq("MEDG-KRXR*", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "*"), "-")),
00157            Seq.MutableSeq("ME-K-DRXR*XU", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "*"), "-")),
00158            Seq.Seq("MEDG-KRXR@", Alphabet.HasStopCodon(Alphabet.Gapped(IUPAC.extended_protein, "-"), "@")),
00159            Seq.Seq("ME-KR@", Alphabet.HasStopCodon(Alphabet.Gapped(IUPAC.protein, "-"), "@")),
00160            Seq.Seq("MEDG.KRXR@", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "@"), ".")),
00161            "TEDDF"]
00162 for a in dna+rna:
00163     for b in nuc:
00164         c=a+b
00165         assert str(c) == str(a) + str(b)
00166 for a in rna:
00167     for b in rna:
00168         try:
00169             c=a+b
00170             assert str(c) == str(a) + str(b)
00171         except ValueError, e:
00172             print "%s + %s\n-> %s" % (repr(a.alphabet), repr(b.alphabet), str(e))
00173 for a in dna:
00174     for b in dna:
00175         try:
00176             c=a+b
00177             assert str(c) == str(a) + str(b)
00178         except ValueError, e:
00179             print "%s + %s\n-> %s" % (repr(a.alphabet), repr(b.alphabet), str(e))
00180     for b in rna:
00181         try:
00182             c=a+b
00183             assert (isinstance(a,str) or isinstance(b,str)), \
00184                    "DNA+RNA addition should fail!"
00185         except TypeError:
00186             pass
00187         try:
00188             c=b+a
00189             assert (isinstance(a,str) or isinstance(b,str)), \
00190                    "RNA+DNA addition should fail!"
00191         except TypeError:
00192             pass
00193 for a in protein:
00194     for b in protein:
00195         try:
00196             c=a+b
00197             assert str(c) == str(a) + str(b)
00198         except ValueError, e:
00199             print "%s + %s\n-> %s" % (repr(a.alphabet), repr(b.alphabet), str(e))
00200     for b in nuc+dna+rna:
00201         try:
00202             c=a+b
00203             assert (isinstance(a,str) or isinstance(b,str)), \
00204                    "Protein+Nucleotide addition should fail!"
00205         except TypeError:
00206             pass
00207 for a in nuc:
00208     for b in dna+rna+nuc:
00209         c=a+b
00210         assert str(c) == str(a) + str(b)
00211 for a in dna+rna+nuc:
00212     for b in protein:
00213         try:
00214             c=a+b
00215             assert (isinstance(a,str) or isinstance(b,str)), \
00216                    "Nucleotide+Protein addition should fail!"
00217         except TypeError:
00218             pass
00219 
00220 ###########################################################################
00221 print
00222 print "Testing Seq string methods"
00223 print "=========================="
00224 for a in dna + rna + nuc + protein:
00225     if not isinstance(a, Seq.Seq) : continue
00226     assert a.strip().tostring() == a.tostring().strip()
00227     assert a.lstrip().tostring() == a.tostring().lstrip()
00228     assert a.rstrip().tostring() == a.tostring().rstrip()
00229     assert a.lower().tostring() == a.tostring().lower()
00230     assert a.upper().tostring() == a.tostring().upper()
00231     test_chars = ["-", Seq.Seq("-"), Seq.Seq("*"), "-X@"]
00232     alpha = Alphabet._get_base_alphabet(a.alphabet)
00233     if isinstance(alpha, Alphabet.DNAAlphabet):
00234         test_chars.append(Seq.Seq("A", IUPAC.ambiguous_dna))
00235     if isinstance(alpha, Alphabet.RNAAlphabet):
00236         test_chars.append(Seq.Seq("A", IUPAC.ambiguous_rna))
00237     if isinstance(alpha, Alphabet.NucleotideAlphabet):
00238         test_chars.append(Seq.Seq("A", Alphabet.generic_nucleotide))
00239     if isinstance(alpha, Alphabet.ProteinAlphabet):
00240         test_chars.append(Seq.Seq("K", Alphabet.generic_protein))
00241         test_chars.append(Seq.Seq("K-", Alphabet.Gapped(Alphabet.generic_protein,"-")))
00242         test_chars.append(Seq.Seq("K@", Alphabet.Gapped(IUPAC.protein,"@")))
00243         #Setup a clashing alphabet sequence
00244         b = Seq.Seq("-", Alphabet.generic_nucleotide)
00245     else:
00246         b = Seq.Seq("-", Alphabet.generic_protein)
00247     try:
00248         print a.strip(b).tostring()
00249         assert False, "Alphabet should have clashed!"
00250     except TypeError:
00251         pass #Good!
00252             
00253     for chars in  test_chars:
00254         str_chars = str(chars)
00255         assert a.strip(chars).tostring() == a.tostring().strip(str_chars)
00256         assert a.lstrip(chars).tostring() == a.tostring().lstrip(str_chars)
00257         assert a.rstrip(chars).tostring() == a.tostring().rstrip(str_chars)
00258         assert a.find(chars) == a.tostring().find(str_chars)
00259         assert a.find(chars,2,-2) == a.tostring().find(str_chars,2,-2)
00260         assert a.rfind(chars) == a.tostring().rfind(str_chars)
00261         assert a.rfind(chars,2,-2) == a.tostring().rfind(str_chars,2,-2)
00262         assert a.count(chars) == a.tostring().count(str_chars)
00263         assert a.count(chars,2,-2) == a.tostring().count(str_chars,2,-2)
00264         #Now check splits
00265         assert [x.tostring() for x in a.split(chars)] \
00266                == a.tostring().split(str(chars))
00267         assert [x.tostring() for x in a.rsplit(chars)] \
00268                == a.tostring().rsplit(str(chars))
00269         for max_sep in [0,1,2,999]:
00270             assert [x.tostring() for x in a.split(chars, max_sep)] \
00271                    == a.tostring().split(str(chars), max_sep)
00272             assert [x.tostring() for x in a.rsplit(chars, max_sep)] \
00273                    == a.tostring().rsplit(str(chars), max_sep)
00274 del a, alpha, chars, str_chars, test_chars
00275 del dna, rna, nuc, protein
00276 ###########################################################################
00277 print
00278 print "Checking ambiguous complements"
00279 print "=============================="
00280 
00281 #See bug 2380, Bio.Nexus was polluting the dictionary.
00282 assert "-" not in ambiguous_dna_values
00283 assert "?" not in ambiguous_dna_values
00284 
00285 def complement(sequence):
00286     #TODO - Add a complement function to Bio/Seq.py?
00287     #There is already a complement method on the Seq and MutableSeq objects.
00288     return Seq.reverse_complement(sequence)[::-1]
00289 
00290 def sorted_dict(d):
00291     """A sorted repr of a dictionary."""
00292     return "{%s}" % ", ".join("%s: %s" % (repr(k),repr(v)) \
00293                               for k,v in sorted(d.iteritems()))
00294 
00295 print
00296 print "DNA Ambiguity mapping:", sorted_dict(ambiguous_dna_values)
00297 print "DNA Complement mapping:", sorted_dict(ambiguous_dna_complement)
00298 for ambig_char, values in sorted(ambiguous_dna_values.iteritems()):
00299     compl_values = complement(values)
00300     print "%s={%s} --> {%s}=%s" % \
00301         (ambig_char, values, compl_values, ambiguous_dna_complement[ambig_char])
00302     assert set(compl_values) == set(ambiguous_dna_values[ambiguous_dna_complement[ambig_char]])
00303     
00304 print
00305 print "RNA Ambiguity mapping:", sorted_dict(ambiguous_rna_values)
00306 print "RNA Complement mapping:", sorted_dict(ambiguous_rna_complement)
00307 for ambig_char, values in sorted(ambiguous_rna_values.iteritems()):
00308     compl_values = complement(values).replace("T","U") #need to help as no alphabet
00309     print "%s={%s} --> {%s}=%s" % \
00310         (ambig_char, values, compl_values, ambiguous_rna_complement[ambig_char])
00311     assert set(compl_values) == set(ambiguous_rna_values[ambiguous_rna_complement[ambig_char]])
00312 
00313 print
00314 print "Reverse complements:"
00315 for sequence in [Seq.Seq("".join(sorted(ambiguous_rna_values))),
00316             Seq.Seq("".join(sorted(ambiguous_dna_values))),
00317             Seq.Seq("".join(sorted(ambiguous_rna_values)), Alphabet.generic_rna),
00318             Seq.Seq("".join(sorted(ambiguous_dna_values)), Alphabet.generic_dna),
00319             Seq.Seq("".join(sorted(ambiguous_rna_values)).replace("X",""), IUPAC.IUPACAmbiguousRNA()),
00320             Seq.Seq("".join(sorted(ambiguous_dna_values)).replace("X",""), IUPAC.IUPACAmbiguousDNA()),
00321             Seq.Seq("AWGAARCKG")]:  # Note no U or T
00322         print "%s -> %s" \
00323               % (repr(sequence), repr(Seq.reverse_complement(sequence)))
00324         assert sequence.tostring() \
00325            == Seq.reverse_complement(Seq.reverse_complement(sequence)).tostring(), \
00326            "Dobule reverse complement didn't preserve the sequence!"
00327 print
00328 
00329 ###########################################################################
00330 
00331 test_seqs = [s,t,u,
00332              Seq.Seq("ATGAAACTG"),
00333              "ATGAAACtg",
00334              #TODO - Fix ambiguous translation
00335              #Seq.Seq("ATGAARCTG"),
00336              #Seq.Seq("AWGAARCKG"),  # Note no U or T
00337              #Seq.Seq("".join(ambiguous_rna_values)),
00338              #Seq.Seq("".join(ambiguous_dna_values)),
00339              #Seq.Seq("".join(ambiguous_rna_values), Alphabet.generic_rna),
00340              #Seq.Seq("".join(ambiguous_dna_values), Alphabet.generic_dna),
00341              #Seq.Seq("".join(ambiguous_rna_values), IUPAC.IUPACAmbiguousDNA()),
00342              #Seq.Seq("".join(ambiguous_dna_values), IUPAC.IUPACAmbiguousRNA()),
00343              #Seq.Seq("AWGAARCKG", Alphabet.generic_dna), 
00344              Seq.Seq("AUGAAACUG", Alphabet.generic_rna), 
00345              Seq.Seq("ATGAAACTG", IUPAC.unambiguous_dna), 
00346              Seq.Seq("ATGAAA-CTG", Alphabet.Gapped(IUPAC.unambiguous_dna)),
00347              Seq.Seq("ATGAAACTGWN", IUPAC.ambiguous_dna), 
00348              Seq.Seq("AUGAAACUG", Alphabet.generic_rna), 
00349              Seq.Seq("AUGAAA==CUG", Alphabet.Gapped(Alphabet.generic_rna,"=")),
00350              Seq.Seq("AUGAAACUG", IUPAC.unambiguous_rna),
00351              Seq.Seq("AUGAAACUGWN", IUPAC.ambiguous_rna),
00352              Seq.Seq("ATGAAACTG", Alphabet.generic_nucleotide),
00353              Seq.Seq("AUGAAACTG", Alphabet.generic_nucleotide), #U and T
00354              Seq.MutableSeq("ATGAAACTG", Alphabet.generic_dna),
00355              Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna),
00356              Seq.Seq("ACTGTCGTCT", Alphabet.generic_protein)]
00357 protein_seqs = [Seq.Seq("ATCGPK", IUPAC.protein),
00358                 Seq.Seq("T.CGPK", Alphabet.Gapped(IUPAC.protein, ".")),
00359                 Seq.Seq("T-CGPK", Alphabet.Gapped(IUPAC.protein, "-")),
00360                 Seq.Seq("MEDG-KRXR*", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "*"), "-")),
00361                 Seq.MutableSeq("ME-K-DRXR*XU", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "*"), "-")),
00362                 Seq.Seq("MEDG-KRXR@", Alphabet.HasStopCodon(Alphabet.Gapped(IUPAC.extended_protein, "-"), "@")),
00363                 Seq.Seq("ME-KR@", Alphabet.HasStopCodon(Alphabet.Gapped(IUPAC.protein, "-"), "@")),
00364                 Seq.Seq("MEDG.KRXR@", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "@"), "."))]
00365 
00366 #Sanity test on the test sequence alphabets (see also enhancement bug 2597)
00367 for nucleotide_seq in test_seqs:
00368     if hasattr(nucleotide_seq, "alphabet"):
00369         if "U" in str(nucleotide_seq).upper():
00370             assert not isinstance(nucleotide_seq.alphabet, Alphabet.DNAAlphabet)
00371         if "T" in str(nucleotide_seq).upper():
00372             assert not isinstance(nucleotide_seq.alphabet, Alphabet.RNAAlphabet)
00373             
00374 
00375 print
00376 print "Transcribe DNA into RNA"
00377 print "======================="
00378 for nucleotide_seq in test_seqs:
00379     try:
00380         expected = Seq.transcribe(nucleotide_seq)
00381         assert str(nucleotide_seq).replace("t","u").replace("T","U") == str(expected)
00382         print "%s -> %s" \
00383         % (repr(nucleotide_seq) , repr(expected))
00384     except ValueError, e:
00385         expected = None
00386         print "%s -> %s" \
00387         % (repr(nucleotide_seq) , str(e))
00388     #Now test the Seq object's method
00389     if isinstance(nucleotide_seq, Seq.Seq):
00390         try:
00391             assert repr(expected) == repr(nucleotide_seq.transcribe())
00392         except ValueError:
00393             assert expected is None
00394 
00395 for s in protein_seqs:
00396     try:
00397         print Seq.transcribe(s)
00398         assert False, "Transcription shouldn't work on a protein!"
00399     except ValueError:
00400         pass
00401     if not isinstance(s, Seq.Seq) : continue #Only Seq has this method
00402     try:
00403         print s.transcribe()
00404         assert False, "Transcription shouldn't work on a protein!"
00405     except ValueError:
00406         pass
00407 
00408 print
00409 print "Back-transcribe RNA into DNA"
00410 print "============================"
00411 for nucleotide_seq in test_seqs:
00412     try:
00413         expected = Seq.back_transcribe(nucleotide_seq)
00414         assert str(nucleotide_seq).replace("u","t").replace("U","T") == str(expected)
00415         print "%s -> %s" \
00416         % (repr(nucleotide_seq) , repr(expected))
00417     except ValueError, e:
00418         expected = None
00419         print "%s -> %s" \
00420         % (repr(nucleotide_seq) , str(e))
00421     #Now test the Seq object's method
00422     if isinstance(nucleotide_seq, Seq.Seq):
00423         try:
00424             assert repr(expected) == repr(nucleotide_seq.back_transcribe())
00425         except ValueError:
00426             assert expected is None
00427             
00428 for s in protein_seqs:
00429     try:
00430         print Seq.back_transcribe(s)
00431         assert False, "Back transcription shouldn't work on a protein!"
00432     except ValueError:
00433         pass
00434     if not isinstance(s, Seq.Seq) : continue #Only Seq has this method
00435     try:
00436         print s.back_transcribe()
00437         assert False, "Back transcription shouldn't work on a protein!"
00438     except ValueError:
00439         pass
00440         
00441 print
00442 print "Reverse Complement"
00443 print "=================="
00444 for nucleotide_seq in test_seqs:
00445     try:
00446         expected = Seq.reverse_complement(nucleotide_seq)
00447         print "%s\n-> %s" \
00448         % (repr(nucleotide_seq) , repr(expected))
00449     except ValueError, e:
00450         expected = None
00451         print "%s\n-> %s" \
00452         % (repr(nucleotide_seq) , str(e))
00453     #Now test the Seq object's method
00454     #(The MutualSeq object acts in place)
00455     if isinstance(nucleotide_seq, Seq.Seq):
00456         try:
00457             assert repr(expected) == repr(nucleotide_seq.reverse_complement())
00458             assert repr(expected[::-1]) == repr(nucleotide_seq.complement())
00459         except ValueError:
00460             assert expected is None
00461 
00462 for s in protein_seqs:
00463     try:
00464         print Seq.reverse_complement(s)
00465         assert False, "Reverse complement shouldn't work on a protein!"
00466     except ValueError:
00467         pass
00468     #Note that these methods are "in place" for the MutableSeq:
00469     try:
00470         print s.complement()
00471         assert False, "Complement shouldn't work on a protein!"
00472     except ValueError:
00473         pass
00474     try:
00475         print s.reverse_complement()
00476         assert False, "Reverse complement shouldn't work on a protein!"
00477     except ValueError:
00478         pass
00479    
00480 print
00481 print "Translating"
00482 print "==========="
00483 for nucleotide_seq in test_seqs:
00484     try:
00485         expected = Seq.translate(nucleotide_seq)
00486         print "%s\n-> %s" \
00487         % (repr(nucleotide_seq) , repr(expected))
00488     except (ValueError, TranslationError), e:
00489         expected = None
00490         print "%s\n-> %s" \
00491         % (repr(nucleotide_seq) , str(e))
00492     #Now test the Seq object's method
00493     if isinstance(nucleotide_seq, Seq.Seq):
00494         try:
00495             assert repr(expected) == repr(nucleotide_seq.translate())
00496         except (ValueError, TranslationError):
00497             assert expected is None
00498     #Now check translate(..., to_stop=True)
00499     try:
00500         short = Seq.translate(nucleotide_seq, to_stop=True)
00501     except (ValueError, TranslationError), e:
00502         short = None
00503     if expected is not None:
00504         assert short is not None
00505         assert str(short) == str(expected.split("*")[0])
00506     if isinstance(nucleotide_seq, Seq.Seq):
00507         try:
00508             assert repr(short) == repr(nucleotide_seq.translate(to_stop=True))
00509         except (ValueError, TranslationError):
00510             assert short is None
00511 
00512 for s in protein_seqs:
00513     try:
00514         print Seq.translate(s)
00515         assert False, "Translation shouldn't work on a protein!"
00516     except ValueError:
00517         pass
00518     if not isinstance(s, Seq.Seq) : continue #Only Seq has this method
00519     try:
00520         print s.translate()
00521         assert False, "Translation shouldn't work on a protein!"
00522     except ValueError:
00523         pass
00524 
00525 
00526 misc_stops = "TAATAGTGAAGAAGG"
00527 for nucleotide_seq in [misc_stops, Seq.Seq(misc_stops),
00528                        Seq.Seq(misc_stops, Alphabet.generic_nucleotide),
00529                        Seq.Seq(misc_stops, Alphabet.DNAAlphabet()),
00530                        Seq.Seq(misc_stops, IUPAC.unambiguous_dna)]:
00531     assert "***RR" == str(Seq.translate(nucleotide_seq))
00532     assert "***RR" == str(Seq.translate(nucleotide_seq, table=1))
00533     assert "***RR" == str(Seq.translate(nucleotide_seq, table="SGC0"))
00534     assert "**W**" == str(Seq.translate(nucleotide_seq, table=2))
00535     assert "**WRR" == str(Seq.translate(nucleotide_seq, \
00536                                         table='Yeast Mitochondrial'))
00537     assert "**WSS" == str(Seq.translate(nucleotide_seq, table=5))
00538     assert "**WSS" == str(Seq.translate(nucleotide_seq, table=9))
00539     assert "**CRR" == str(Seq.translate(nucleotide_seq, \
00540                                         table='Euplotid Nuclear'))
00541     assert "***RR" == str(Seq.translate(nucleotide_seq, table=11))
00542     assert "***RR" == str(Seq.translate(nucleotide_seq, table='Bacterial'))
00543 del misc_stops
00544 
00545 for s in protein_seqs:
00546     try:
00547         print Seq.translate(s)
00548         assert False, "Shouldn't work on a protein!"
00549     except ValueError:
00550         pass
00551 
00552 assert Seq.translate("TAT")=="Y"
00553 assert Seq.translate("TAR")=="*"
00554 assert Seq.translate("TAN")=="X"
00555 assert Seq.translate("NNN")=="X"
00556 
00557 assert Seq.translate("TAt")=="Y"
00558 assert Seq.translate("TaR")=="*"
00559 assert Seq.translate("TaN")=="X"
00560 assert Seq.translate("nnN")=="X"
00561 
00562 assert Seq.translate("tat")=="Y"
00563 assert Seq.translate("tar")=="*"
00564 assert Seq.translate("tan")=="X"
00565 assert Seq.translate("nnn")=="X"
00566 
00567 for codon in ["TA?", "N-N", "AC_", "Ac_"]:
00568     try:
00569         print Seq.translate(codon)
00570         assert "Translating %s should have failed" % repr(codon)
00571     except TranslationError:
00572         pass
00573 
00574 ambig = set(IUPAC.IUPACAmbiguousDNA.letters)
00575 for c1 in ambig:
00576     for c2 in ambig:
00577         for c3 in ambig:
00578             values = set([Seq.translate(a+b+c, table=1) \
00579                           for a in ambiguous_dna_values[c1] \
00580                           for b in ambiguous_dna_values[c2] \
00581                           for c in ambiguous_dna_values[c3]])
00582             t = Seq.translate(c1+c2+c3)
00583             if t=="*":
00584                 assert values == set("*")
00585             elif t=="X":
00586                 assert len(values) > 1, \
00587                     "translate('%s') = '%s' not '%s'" \
00588                     % (c1+c2+c3, t, ",".join(values))
00589             elif t=="Z":
00590                 assert values == set("EQ")
00591             elif t=="B":
00592                 assert values == set("DN")
00593             elif t=="J":
00594                 assert values == set("LI")
00595             else:
00596                 assert values == set(t)
00597             #TODO - Use the Bio.Data.IUPACData module for the
00598             #ambiguous protein mappings?
00599 del t,c1,c2,c3,ambig
00600 
00601 print
00602 print "Seq's .complement() method"
00603 print "=========================="
00604 for nucleotide_seq in test_seqs:
00605     if isinstance(nucleotide_seq, Seq.Seq):
00606         try:
00607             print "%s -> %s" \
00608             % (repr(nucleotide_seq) , repr(nucleotide_seq.complement()))
00609             assert nucleotide_seq.complement().tostring() \
00610                 == Seq.reverse_complement(nucleotide_seq).tostring()[::-1], \
00611                 "Bio.Seq function and method disagree!"
00612         except ValueError, e:
00613             print "%s -> %s" \
00614             % (repr(nucleotide_seq) , str(e))
00615         
00616 print
00617 print "Seq's .reverse_complement() method"
00618 print "=================================="
00619 for nucleotide_seq in test_seqs:
00620     if isinstance(nucleotide_seq, Seq.Seq):
00621         try:
00622             print "%s -> %s" \
00623             % (repr(nucleotide_seq) , repr(nucleotide_seq.reverse_complement()))
00624             assert nucleotide_seq.reverse_complement().tostring() \
00625                 == Seq.reverse_complement(nucleotide_seq).tostring(), \
00626                 "Bio.Seq function and method disagree!"
00627         except ValueError, e:
00628             print "%s -> %s" \
00629             % (repr(nucleotide_seq) , str(e))