Back to index

python-biopython  1.60
test_translate.py
Go to the documentation of this file.
00001 # Make sure the translation functions work.
00002 # Start simple - unambiguous DNA to unambiguous protein
00003 
00004 from Bio import Seq
00005 from Bio import Alphabet
00006 from Bio.Alphabet import IUPAC
00007 
00008 # First, test the transcription functions
00009 
00010 s = "ATA"
00011 dna = Seq.Seq(s, IUPAC.unambiguous_dna)
00012 rna = dna.transcribe()
00013 assert rna.tostring()=="AUA"
00014 
00015 s = "GAAAATTCATTTTCTTTGGACTTTCTCTGAAATCCGAGTCCTAGGAAAGATGCGTGAGATTCTTCATATT"
00016 dna = Seq.Seq(s, IUPAC.unambiguous_dna)
00017 rna = dna.transcribe()
00018 assert rna.tostring()=='GAAAAUUCAUUUUCUUUGGACUUUCUCUGAAAUCCGAGUCCUAGGAAAGAUGCGUGAGAUUCUUCAUAUU'
00019 
00020 s = "GAAAAUUCAUUUUCUUUGGACUUUCUCUGAAAUCCGAGUCCUAGGAAAGAUGCGUGAGAUUCUUCAUAUU"
00021 rna = Seq.Seq(s, IUPAC.unambiguous_rna)
00022 dna = rna.back_transcribe()
00023 assert dna.tostring()=='GAAAATTCATTTTCTTTGGACTTTCTCTGAAATCCGAGTCCTAGGAAAGATGCGTGAGATTCTTCATATT'
00024 
00025 
00026 # use the standard table
00027 
00028 # Do some simple tests first
00029 s = "T"
00030 dna = Seq.Seq(s, IUPAC.unambiguous_dna)
00031 protein = dna.translate(to_stop=True)
00032 assert  protein.tostring()==""
00033 
00034 s = "TC"
00035 dna = Seq.Seq(s, IUPAC.unambiguous_dna)
00036 protein = dna.translate(to_stop=True)
00037 assert protein.tostring()==""
00038 
00039 s = "GAAAATTCATTTTCTTTGGACTTTCTCTGAAATCCGAGTCCTAGGAAAGATGCGTGAGATTCTTCATATT"
00040 dna = Seq.Seq(s, IUPAC.unambiguous_dna)
00041 protein = dna.translate(to_stop=True)
00042 assert protein.tostring()=='ENSFSLDFL'
00043 
00044 s = "GAA"
00045 dna = Seq.Seq(s, IUPAC.unambiguous_dna)
00046 protein = dna.translate(15, to_stop=True)
00047 assert protein.tostring()=="E"
00048 
00049 s = "ATA"
00050 dna = Seq.Seq(s, IUPAC.unambiguous_dna)
00051 protein = dna.translate('Vertebrate Mitochondrial', to_stop=True)
00052 assert protein.tostring()=="M"
00053 
00054 s = "GAAAATTCATTTTCTTTGGACTTTCTCTGAAATCCGAGTCCTAGGAAAGATGCGTGAGATTCTTCATATT"
00055 dna = Seq.Seq(s, IUPAC.unambiguous_dna)
00056 protein = dna.translate('SGC8', to_stop=True)
00057 assert protein.tostring()=='ENSFSLDFLWNPSPSNDAWDSSY'
00058 
00059 # use the standard table
00060 
00061 s = "TCAAAAAGGTGCATCTAGATG"
00062 print "Starting with", s
00063 dna = Seq.Seq(s, IUPAC.unambiguous_dna)
00064 protein = dna.translate(to_stop=True)
00065 assert isinstance(protein.alphabet, IUPAC.IUPACProtein)
00066 
00067 print len(protein), "ungapped residues translated"
00068 
00069 gapped_protein = dna.translate()
00070 assert isinstance(gapped_protein.alphabet, Alphabet.HasStopCodon)
00071 print protein.tostring()
00072 
00073 print len(gapped_protein), "residues translated, including gaps"
00074 print gapped_protein.tostring()
00075 
00076 # This has "AGG" as a stop codon
00077 p2 = dna.translate(table=2, to_stop=True)
00078 print len(p2), "SGC1 has a stop codon"
00079 print p2.tostring()
00080 p2 = dna.translate(table=2)
00081 print "Actually, there are", p2.count("*"), "stops."
00082 print p2.tostring()
00083 
00084 # Make sure I can change the stop character
00085 p2 = dna.translate(table=2, stop_symbol="+")
00086 print "Yep,", p2.count("+"), "stops."
00087 print p2.tostring()
00088 
00089 
00090 # Some of the same things, with RNA
00091 # (The code is the same, so I'm not doing all of the tests.)
00092 rna = Seq.Seq(s.replace("T", "U"), IUPAC.unambiguous_rna)
00093 
00094 print "RNA translation ...",
00095 protein_from_rna = rna.translate(to_stop=True)
00096 assert protein.alphabet is protein_from_rna.alphabet
00097 assert protein.tostring() == protein_from_rna.tostring()
00098 print "works."
00099 
00100 print "RNA translation to stop ...",
00101 gapped_protein_from_rna = rna.translate()
00102 assert len(gapped_protein) == len(gapped_protein_from_rna)
00103 assert gapped_protein.tostring() == gapped_protein_from_rna.tostring()
00104 print "works."
00105 
00106 # some tests for "by name"
00107 # How about some forward ambiguity?
00108 print "Forward ambiguous"
00109 s = "RATGATTARAATYTA"
00110 #     B  D  *  N  L
00111 dna = Seq.Seq(s, IUPAC.ambiguous_dna)
00112 protein = dna.translate('Vertebrate Mitochondrial')
00113 print protein.tostring()
00114 stop_protein = dna.translate('SGC1', to_stop=True)
00115 print stop_protein.tostring()
00116 
00117 # XXX (Backwards with ambiguity code is unfinished!)
00118