Back to index

python-biopython  1.60
CodonTable.py
Go to the documentation of this file.
00001 # This code is part of the Biopython distribution and governed by its
00002 # license.  Please see the LICENSE file that should have been included
00003 # as part of this package.
00004 """Codon tables based on those from the NCBI.
00005 
00006 These tables are based on parsing the NCBI file:
00007 ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
00008 
00009 Last updated for Version 3.9
00010 """
00011 
00012 from Bio import Alphabet
00013 from Bio.Alphabet import IUPAC
00014 from Bio.Data import IUPACData
00015 
00016 unambiguous_dna_by_name = {}
00017 unambiguous_dna_by_id = {}
00018 unambiguous_rna_by_name = {}
00019 unambiguous_rna_by_id = {}
00020 generic_by_name = {} # unambiguous DNA or RNA
00021 generic_by_id = {} # unambiguous DNA or RNA
00022 
00023 ambiguous_dna_by_name = {}
00024 ambiguous_dna_by_id = {}
00025 ambiguous_rna_by_name = {}
00026 ambiguous_rna_by_id = {}
00027 ambiguous_generic_by_name = {} # ambiguous DNA or RNA
00028 ambiguous_generic_by_id = {} # ambiguous DNA or RNA 
00029 
00030 # standard IUPAC unambiguous codons
00031 standard_dna_table = None
00032 standard_rna_table = None
00033 
00034 # In the future, the back_table could return a statistically
00035 # appropriate distribution of codons, so do not cache the results of
00036 # back_table lookups!
00037 
00038 class TranslationError(Exception):
00039     pass
00040 
00041 class CodonTable(object):
00042     nucleotide_alphabet = Alphabet.generic_nucleotide
00043     protein_alphabet = Alphabet.generic_protein
00044     
00045     forward_table = {}    # only includes codons which actually code
00046     back_table = {}       # for back translations
00047     start_codons = []
00048     stop_codons = []
00049     # Not always called from derived classes!
00050     def __init__(self, nucleotide_alphabet = nucleotide_alphabet,
00051                  protein_alphabet = protein_alphabet,
00052                  forward_table = forward_table, back_table = back_table,
00053                  start_codons = start_codons, stop_codons = stop_codons):
00054         self.nucleotide_alphabet = nucleotide_alphabet
00055         self.protein_alphabet = protein_alphabet
00056         self.forward_table = forward_table
00057         self.back_table = back_table
00058         self.start_codons = start_codons
00059         self.stop_codons = stop_codons
00060 
00061     def __str__(self):
00062         """Returns a simple text representation of the codon table
00063 
00064         e.g.
00065         >>> import Bio.Data.CodonTable
00066         >>> print Bio.Data.CodonTable.standard_dna_table
00067         >>> print Bio.Data.CodonTable.generic_by_id[1]
00068         """
00069 
00070         if self.id:
00071             answer = "Table %i" % self.id
00072         else:
00073             answer = "Table ID unknown"
00074         if self.names:
00075             answer += " " + ", ".join(filter(None, self.names))
00076 
00077         #Use the main four letters (and the conventional ordering)
00078         #even for ambiguous tables
00079         letters = self.nucleotide_alphabet.letters
00080         if isinstance(self.nucleotide_alphabet, Alphabet.DNAAlphabet) \
00081         or (letters is not None and "T" in letters):
00082             letters = "TCAG"
00083         else:
00084             #Should be either RNA or generic nucleotides,
00085             #e.g. Bio.Data.CodonTable.generic_by_id[1]
00086             letters = "UCAG"
00087 
00088         #Build the table...
00089         answer=answer + "\n\n  |" + "|".join( \
00090             ["  %s      " % c2 for c2 in letters] \
00091             ) + "|"
00092         answer=answer + "\n--+" \
00093                + "+".join(["---------" for c2 in letters]) + "+--"
00094         for c1 in letters:
00095             for c3 in letters:
00096                 line = c1 + " |"
00097                 for c2 in letters:
00098                     codon = c1+c2+c3
00099                     line = line + " %s" % codon
00100                     if codon in self.stop_codons:
00101                         line = line + " Stop|"
00102                     else:
00103                         try:
00104                             amino = self.forward_table[codon]
00105                         except KeyError:
00106                             amino = "?"
00107                         except TranslationError:
00108                             amino = "?"
00109                         if codon in self.start_codons:
00110                             line = line + " %s(s)|" % amino
00111                         else:
00112                             line = line + " %s   |" % amino
00113                 line = line + " " + c3
00114                 answer = answer + "\n"+ line 
00115             answer=answer + "\n--+" \
00116                   + "+".join(["---------" for c2 in letters]) + "+--"
00117         return answer
00118             
00119 def make_back_table(table, default_stop_codon):
00120     #  ONLY RETURNS A SINGLE CODON
00121     # Do the sort so changes in the hash implementation won't affect
00122     # the result when one amino acid is coded by more than one codon.
00123     back_table = {}
00124     for key in sorted(table):
00125         back_table[table[key]] = key
00126     back_table[None] = default_stop_codon
00127     return back_table
00128 
00129 
00130 class NCBICodonTable(CodonTable):
00131     nucleotide_alphabet = Alphabet.generic_nucleotide
00132     protein_alphabet = IUPAC.protein
00133     
00134     def __init__(self, id, names, table, start_codons, stop_codons):
00135         self.id = id
00136         self.names = names
00137         self.forward_table = table
00138         self.back_table = make_back_table(table, stop_codons[0])
00139         self.start_codons = start_codons
00140         self.stop_codons = stop_codons
00141 
00142 
00143 class NCBICodonTableDNA(NCBICodonTable):
00144     nucleotide_alphabet = IUPAC.unambiguous_dna
00145 
00146 class NCBICodonTableRNA(NCBICodonTable):
00147     nucleotide_alphabet = IUPAC.unambiguous_rna
00148 
00149 
00150 #########  Deal with ambiguous forward translations
00151 
00152 class AmbiguousCodonTable(CodonTable):
00153     def __init__(self, codon_table,
00154                  ambiguous_nucleotide_alphabet,
00155                  ambiguous_nucleotide_values,
00156                  ambiguous_protein_alphabet,
00157                  ambiguous_protein_values):
00158         CodonTable.__init__(self,
00159                             ambiguous_nucleotide_alphabet,
00160                             ambiguous_protein_alphabet,
00161                             AmbiguousForwardTable(codon_table.forward_table,
00162                                                   ambiguous_nucleotide_values,
00163                                                   ambiguous_protein_values),
00164                             codon_table.back_table,
00165 
00166                             # These two are WRONG!  I need to get the
00167                             # list of ambiguous codons which code for
00168                             # the stop codons  XXX
00169                             list_ambiguous_codons(codon_table.start_codons, ambiguous_nucleotide_values),
00170                             list_ambiguous_codons(codon_table.stop_codons, ambiguous_nucleotide_values)
00171                             )
00172         self._codon_table = codon_table
00173 
00174     # Be sneaky and forward attribute lookups to the original table.
00175     # This lets us get the names, if the original table is an NCBI
00176     # table.
00177     def __getattr__(self, name):
00178         return getattr(self._codon_table, name)
00179 
00180 def list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values):
00181         c1, c2, c3 = codon
00182         x1 = ambiguous_nucleotide_values[c1]
00183         x2 = ambiguous_nucleotide_values[c2]
00184         x3 = ambiguous_nucleotide_values[c3]
00185         possible = {}
00186         stops = []
00187         for y1 in x1:
00188             for y2 in x2:
00189                 for y3 in x3:
00190                     try:
00191                         possible[forward_table[y1+y2+y3]] = 1
00192                     except KeyError:
00193                         # If tripping over a stop codon
00194                         stops.append(y1+y2+y3)
00195         if stops:
00196             if possible:
00197                 raise TranslationError("ambiguous codon '%s' codes " % codon \
00198                                        + "for both proteins and stop codons")
00199             # This is a true stop codon - tell the caller about it
00200             raise KeyError(codon)
00201         return possible.keys()
00202 
00203 def list_ambiguous_codons(codons, ambiguous_nucleotide_values):
00204     """Extends a codon list to include all possible ambigous codons.
00205 
00206     e.g. ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR']
00207          ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA']
00208 
00209     Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'.
00210     Thus only two more codons are added in the following:
00211 
00212     e.g. ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR']
00213 
00214     Returns a new (longer) list of codon strings.
00215     """
00216 
00217     #Note ambiguous_nucleotide_values['R'] = 'AG' (etc)
00218     #This will generate things like 'TRR' from ['TAG', 'TGA'], which
00219     #we don't want to include:
00220     c1_list = sorted(letter for (letter, meanings) \
00221                in ambiguous_nucleotide_values.iteritems() \
00222                if set([codon[0] for codon in codons]).issuperset(set(meanings)))
00223     c2_list = sorted(letter for (letter, meanings) \
00224                in ambiguous_nucleotide_values.iteritems() \
00225                if set([codon[1] for codon in codons]).issuperset(set(meanings)))
00226     c3_list = sorted(letter for (letter, meanings) \
00227                in ambiguous_nucleotide_values.iteritems() \
00228                if set([codon[2] for codon in codons]).issuperset(set(meanings)))
00229     #candidates is a list (not a set) to preserve the iteration order
00230     candidates = []
00231     for c1 in c1_list:
00232         for c2 in c2_list:
00233             for c3 in c3_list:
00234                 codon = c1+c2+c3
00235                 if codon not in candidates and codon not in codons:
00236                     candidates.append(codon)
00237     answer = codons[:] #copy
00238     #print "Have %i new candidates" % len(candidates)
00239     for ambig_codon in candidates:
00240         wanted = True
00241         #e.g. 'TRR' -> 'TAA', 'TAG', 'TGA', 'TGG'
00242         for codon in [c1+c2+c3 \
00243                       for c1 in ambiguous_nucleotide_values[ambig_codon[0]] \
00244                       for c2 in ambiguous_nucleotide_values[ambig_codon[1]] \
00245                       for c3 in ambiguous_nucleotide_values[ambig_codon[2]]]:
00246             if codon not in codons:
00247                 #This ambiguous codon can code for a non-stop, exclude it!
00248                 wanted=False
00249                 #print "Rejecting %s" % ambig_codon
00250                 continue
00251         if wanted:
00252             answer.append(ambig_codon)
00253     return answer
00254 
00255 assert list_ambiguous_codons(['TGA', 'TAA'],IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TRA']
00256 assert list_ambiguous_codons(['TAG', 'TGA'],IUPACData.ambiguous_dna_values) == ['TAG', 'TGA']
00257 assert list_ambiguous_codons(['TAG', 'TAA'],IUPACData.ambiguous_dna_values) == ['TAG', 'TAA', 'TAR']
00258 assert list_ambiguous_codons(['UAG', 'UAA'],IUPACData.ambiguous_rna_values) == ['UAG', 'UAA', 'UAR']
00259 assert list_ambiguous_codons(['TGA', 'TAA', 'TAG'],IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TAG', 'TAR', 'TRA']
00260 
00261 # Forward translation is "onto", that is, any given codon always maps
00262 # to the same protein, or it doesn't map at all.  Thus, I can build
00263 # off of an existing table to produce the ambiguous mappings.
00264 #
00265 # This handles the general case.  Perhaps it's overkill?
00266 #  >>> t = CodonTable.ambiguous_dna_by_id[1]
00267 #  >>> t.forward_table["AAT"]
00268 #  'N'
00269 #  >>> t.forward_table["GAT"]
00270 #  'D'
00271 #  >>> t.forward_table["RAT"]
00272 #  'B'
00273 #  >>> t.forward_table["YTA"]
00274 #  'L'
00275 
00276 class AmbiguousForwardTable(object):
00277     def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
00278         self.forward_table = forward_table
00279 
00280         self.ambiguous_nucleotide = ambiguous_nucleotide
00281         self.ambiguous_protein = ambiguous_protein
00282 
00283         inverted = {}
00284         for name, val in ambiguous_protein.iteritems():
00285             for c in val:
00286                 x = inverted.get(c, {})
00287                 x[name] = 1
00288                 inverted[c] = x
00289         for name, val in inverted.iteritems():
00290             inverted[name] = val.keys()
00291         self._inverted = inverted
00292         
00293         self._cache = {}
00294 
00295     def get(self, codon, failobj = None):
00296         try:
00297             return self.__getitem__(codon)
00298         except KeyError:
00299             return failobj
00300         
00301     def __getitem__(self, codon):
00302         try:
00303             x = self._cache[codon]
00304         except KeyError:
00305             pass
00306         else:
00307             if x is TranslationError:
00308                 raise TranslationError(codon)   # no unique translation
00309             if x is KeyError:
00310                 raise KeyError(codon)  # it's a stop codon
00311             return x
00312         try:
00313             x = self.forward_table[codon]
00314             self._cache[codon] = x
00315             return x
00316         except KeyError:
00317             pass
00318 
00319         # XXX Need to make part of this into a method which returns
00320         # a list of all possible encodings for a codon!
00321         try:
00322             possible = list_possible_proteins(codon,
00323                                               self.forward_table,
00324                                               self.ambiguous_nucleotide)
00325         except KeyError:
00326             self._cache[codon] = KeyError
00327             raise KeyError(codon)  # stop codon
00328         except TranslationError:
00329             self._cache[codon] = TranslationError
00330             raise TranslationError(codon)  # does not code
00331         assert len(possible) > 0, "unambiguous codons must code"
00332 
00333         # Hah!  Only one possible protein, so use it
00334         if len(possible) == 1:
00335             self._cache[codon] = possible[0]
00336             return possible[0]
00337 
00338         # See if there's an ambiguous protein encoding for the multiples.
00339         # Find residues which exist in every coding set.
00340         ambiguous_possible = {}
00341         for amino in possible:
00342             for term in self._inverted[amino]:
00343                 ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1
00344 
00345         n = len(possible)
00346         possible = []
00347         for amino, val in ambiguous_possible.iteritems():
00348             if val == n:
00349                 possible.append(amino)
00350 
00351         # No amino acid encoding for the results
00352         if len(possible) == 0:
00353             self._cache[codon] = TranslationError
00354             raise TranslationError(codon)   # no valid translation
00355 
00356         # All of these are valid, so choose one
00357         # To be unique, sort by smallet ambiguity then alphabetically
00358         # Can get this if "X" encodes for everything.
00359         #def _sort(x, y, table = self.ambiguous_protein):
00360         #    a = cmp(len(table[x]), len(table[y]))
00361         #    if a == 0:
00362         #        return cmp(x, y)
00363         #    return a
00364 
00365         #Sort by key is 2.x and 3.x compatible
00366         possible.sort(key=lambda x:(len(self.ambiguous_protein[x]), x))
00367                           
00368         x = possible[0]
00369         self._cache[codon] = x
00370         return x
00371 
00372 
00373 def register_ncbi_table(name, alt_name, id,
00374                         table, start_codons, stop_codons):
00375     """Turns codon table data into objects, and stores them in the dictionaries (PRIVATE)."""
00376     #In most cases names are divided by "; ", however there is also
00377     #'Bacterial and Plant Plastid' (which used to be just 'Bacterial')
00378     names = [x.strip() for x in name.replace(" and ","; ").split("; ")]
00379     
00380     dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons,
00381                             stop_codons)
00382 
00383     ambig_dna = AmbiguousCodonTable(dna,
00384                                     IUPAC.ambiguous_dna,
00385                                     IUPACData.ambiguous_dna_values,
00386                                     IUPAC.extended_protein,
00387                                     IUPACData.extended_protein_values)
00388     
00389     # replace all T's with U's for the RNA tables
00390     rna_table = {}
00391     generic_table = {}
00392     for codon, val in table.iteritems():
00393         generic_table[codon] = val
00394         codon = codon.replace("T", "U")
00395         generic_table[codon] = val
00396         rna_table[codon] = val
00397     rna_start_codons = []
00398     generic_start_codons = []
00399     for codon in start_codons:
00400         generic_start_codons.append(codon)
00401         codon = codon.replace("T", "U")
00402         generic_start_codons.append(codon)
00403         rna_start_codons.append(codon)
00404     rna_stop_codons = []
00405     generic_stop_codons = []
00406     for codon in stop_codons:
00407         generic_stop_codons.append(codon)
00408         codon = codon.replace("T", "U")
00409         generic_stop_codons.append(codon)
00410         rna_stop_codons.append(codon)
00411     
00412     generic = NCBICodonTable(id, names + [alt_name], generic_table,
00413                              generic_start_codons, generic_stop_codons)
00414 
00415     #The following isn't very elegant, but seems to work nicely.
00416     _merged_values = dict(IUPACData.ambiguous_rna_values.iteritems())
00417     _merged_values["T"] = "U"
00418     ambig_generic = AmbiguousCodonTable(generic,
00419                                         Alphabet.NucleotideAlphabet(),
00420                                         _merged_values,
00421                                         IUPAC.extended_protein,
00422                                         IUPACData.extended_protein_values)
00423 
00424     rna = NCBICodonTableRNA(id, names + [alt_name], rna_table,
00425                             rna_start_codons, rna_stop_codons)
00426 
00427     ambig_rna = AmbiguousCodonTable(rna,
00428                                     IUPAC.ambiguous_rna,
00429                                     IUPACData.ambiguous_rna_values,
00430                                     IUPAC.extended_protein,
00431                                     IUPACData.extended_protein_values)
00432 
00433     if id == 1:
00434         global standard_dna_table, standard_rna_table
00435         standard_dna_table = dna
00436         standard_rna_table = rna
00437 
00438     unambiguous_dna_by_id[id] = dna
00439     unambiguous_rna_by_id[id] = rna
00440     generic_by_id[id] = generic
00441     ambiguous_dna_by_id[id] = ambig_dna
00442     ambiguous_rna_by_id[id] = ambig_rna
00443     ambiguous_generic_by_id[id] = ambig_generic
00444 
00445     if alt_name is not None:
00446         names.append(alt_name)
00447 
00448     for name in names:
00449         unambiguous_dna_by_name[name] = dna
00450         unambiguous_rna_by_name[name] = rna
00451         generic_by_name[name] = generic
00452         ambiguous_dna_by_name[name] = ambig_dna
00453         ambiguous_rna_by_name[name] = ambig_rna
00454         ambiguous_generic_by_name[name] = ambig_generic
00455 
00456 
00457 ### These tables created from the data file
00458 ###  ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
00459 ### using the following:
00460 ##import re
00461 ##for line in open("gc.prt").readlines():
00462 ##    if line[:2] == " {":
00463 ##        names = []
00464 ##        id = None
00465 ##        aa = None
00466 ##        start = None
00467 ##        bases = []
00468 ##    elif line[:6] == "  name":
00469 ##        names.append(re.search('"([^"]*)"', line).group(1))
00470 ##    elif line[:8] == "    name":
00471 ##        names.append(re.search('"(.*)$', line).group(1))
00472 ##    elif line == ' Mitochondrial; Mycoplasma; Spiroplasma" ,\n':
00473 ##        names[-1] = names[-1] + " Mitochondrial; Mycoplasma; Spiroplasma"
00474 ##    elif line[:4] == "  id":
00475 ##        id = int(re.search('(\d+)', line).group(1))
00476 ##    elif line[:10] == "  ncbieaa ":
00477 ##        aa = line[12:12+64]
00478 ##    elif line[:10] == "  sncbieaa":
00479 ##        start = line[12:12+64]
00480 ##    elif line[:9] == "  -- Base":
00481 ##        bases.append(line[12:12+64])
00482 ##    elif line[:2] == " }":
00483 ##        assert names != [] and id is not None and aa is not None
00484 ##        assert start is not None and bases != []
00485 ##        if len(names) == 1:
00486 ##            names.append(None)
00487 ##        print "register_ncbi_table(name = %s," % repr(names[0])
00488 ##        print "                    alt_name = %s, id = %d," % \
00489 ##              (repr(names[1]), id)
00490 ##        print "                    table = {"
00491 ##        s = "    "
00492 ##        for i in range(64):
00493 ##            if aa[i] != "*":
00494 ##                t = " '%s%s%s': '%s'," % (bases[0][i], bases[1][i],
00495 ##                                          bases[2][i], aa[i])
00496 ##                if len(s) + len(t) > 75:
00497 ##                    print s
00498 ##                    s = "    " + t
00499 ##                else:
00500 ##                    s = s + t
00501 ##        print s, "},"
00502 
00503 ##        s = "                    stop_codons = ["
00504 ##        for i in range(64):
00505 ##            if aa[i] == "*":
00506 ##                t = " '%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i])
00507 ##                if len(s) + len(t) > 75:
00508 ##                    print s
00509 ##                    s = "                                    " + t
00510 ##                else:
00511 ##                    s = s + t
00512 ##        print s, "],"
00513 
00514 ##        s = "                    start_codons = ["
00515 ##        for i in range(64):
00516 ##            if start[i] == "M":
00517 ##                t = " '%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i])
00518 ##                if len(s) + len(t) > 75:
00519 ##                    print s
00520 ##                    s = "                                    " + t
00521 ##                else:
00522 ##                    s = s + t
00523 ##        print s, "]"
00524 ##        print "                    )"
00525 ##    elif line[:2] == "--" or line == "\n" or line == "}\n" or \
00526 ##         line == 'Genetic-code-table ::= {\n':
00527 ##        pass
00528 ##    else:
00529 ##        raise Exception("Unparsed: " + repr(line))
00530 
00531 register_ncbi_table(name = 'Standard',
00532                     alt_name = 'SGC0', id = 1,
00533                     table = {
00534      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00535      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00536      'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
00537      'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
00538      'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
00539      'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
00540      'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
00541      'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
00542      'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
00543      'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
00544      'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
00545      'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
00546      'GGG': 'G', },
00547                     stop_codons = [ 'TAA', 'TAG', 'TGA', ],
00548                     start_codons = [ 'TTG', 'CTG', 'ATG', ]
00549                     )
00550 register_ncbi_table(name = 'Vertebrate Mitochondrial',
00551                     alt_name = 'SGC1', id = 2,
00552                     table = {
00553      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00554      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00555      'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
00556      'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
00557      'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
00558      'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
00559      'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
00560      'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
00561      'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'GTT': 'V',
00562      'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A',
00563      'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E',
00564      'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
00565                     stop_codons = [ 'TAA', 'TAG', 'AGA', 'AGG', ],
00566                     start_codons = [ 'ATT', 'ATC', 'ATA', 'ATG', 'GTG', ]
00567                     )
00568 register_ncbi_table(name = 'Yeast Mitochondrial',
00569                     alt_name = 'SGC2', id = 3,
00570                     table = {
00571      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00572      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00573      'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'T',
00574      'CTC': 'T', 'CTA': 'T', 'CTG': 'T', 'CCT': 'P', 'CCC': 'P',
00575      'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
00576      'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
00577      'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
00578      'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
00579      'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
00580      'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
00581      'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
00582      'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
00583      'GGA': 'G', 'GGG': 'G', },
00584                     stop_codons = [ 'TAA', 'TAG', ],
00585                     start_codons = [ 'ATA', 'ATG', ]
00586                     )
00587 register_ncbi_table(name = 'Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma',
00588                     alt_name = 'SGC3', id = 4,
00589                     table = {
00590      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00591      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00592      'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
00593      'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
00594      'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
00595      'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
00596      'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
00597      'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
00598      'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
00599      'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
00600      'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
00601      'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
00602      'GGA': 'G', 'GGG': 'G', },
00603                     stop_codons = [ 'TAA', 'TAG', ],
00604                     start_codons = [ 'TTA', 'TTG', 'CTG', 'ATT', 'ATC',
00605                                      'ATA', 'ATG', 'GTG', ]
00606                     )
00607 register_ncbi_table(name = 'Invertebrate Mitochondrial',
00608                     alt_name = 'SGC4', id = 5,
00609                     table = {
00610      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00611      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00612      'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
00613      'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
00614      'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
00615      'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
00616      'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
00617      'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
00618      'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S',
00619      'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
00620      'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
00621      'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
00622      'GGA': 'G', 'GGG': 'G', },
00623                     stop_codons = [ 'TAA', 'TAG', ],
00624                     start_codons = [ 'TTG', 'ATT', 'ATC', 'ATA', 'ATG',
00625                                      'GTG', ]
00626                     )
00627 register_ncbi_table(name = 'Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear',
00628                     alt_name = 'SGC5', id = 6,
00629                     table = {
00630      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00631      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00632      'TAA': 'Q', 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W',
00633      'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P',
00634      'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H',
00635      'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R',
00636      'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
00637      'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N',
00638      'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S',
00639      'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V',
00640      'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
00641      'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G',
00642      'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
00643                     stop_codons = [ 'TGA', ],
00644                     start_codons = [ 'ATG', ]
00645                     )
00646 register_ncbi_table(name = 'Echinoderm Mitochondrial; Flatworm Mitochondrial',
00647                     alt_name = 'SGC8', id = 9,
00648                     table = {
00649      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00650      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00651      'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
00652      'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
00653      'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
00654      'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
00655      'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
00656      'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
00657      'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S',
00658      'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
00659      'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
00660      'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
00661      'GGA': 'G', 'GGG': 'G', },
00662                     stop_codons = [ 'TAA', 'TAG', ],
00663                     start_codons = [ 'ATG', 'GTG', ]
00664                     )
00665 register_ncbi_table(name = 'Euplotid Nuclear',
00666                     alt_name = 'SGC9', id = 10,
00667                     table = {
00668      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00669      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00670      'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', 'CTT': 'L',
00671      'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
00672      'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
00673      'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
00674      'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
00675      'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
00676      'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
00677      'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
00678      'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
00679      'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
00680      'GGA': 'G', 'GGG': 'G', },
00681                     stop_codons = [ 'TAA', 'TAG', ],
00682                     start_codons = [ 'ATG', ]
00683                     )
00684 register_ncbi_table(name = 'Bacterial and Plant Plastid',
00685                     alt_name = None, id = 11,
00686                     table = {
00687      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00688      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00689      'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
00690      'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
00691      'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
00692      'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
00693      'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
00694      'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
00695      'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
00696      'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
00697      'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
00698      'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
00699      'GGG': 'G', },
00700                     stop_codons = [ 'TAA', 'TAG', 'TGA', ],
00701                     start_codons = [ 'TTG', 'CTG', 'ATT', 'ATC', 'ATA',
00702                                      'ATG', 'GTG', ]
00703                     )
00704 register_ncbi_table(name = 'Alternative Yeast Nuclear',
00705                     alt_name = None, id = 12,
00706                     table = {
00707      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00708      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00709      'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
00710      'CTA': 'L', 'CTG': 'S', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
00711      'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
00712      'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
00713      'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
00714      'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
00715      'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
00716      'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
00717      'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
00718      'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
00719      'GGG': 'G', },
00720                     stop_codons = [ 'TAA', 'TAG', 'TGA', ],
00721                     start_codons = [ 'CTG', 'ATG', ]
00722                     )
00723 register_ncbi_table(name = 'Ascidian Mitochondrial',
00724                     alt_name = None, id = 13,
00725                     table = {
00726      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00727      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00728      'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
00729      'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
00730      'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
00731      'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
00732      'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
00733      'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
00734      'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'G',
00735      'AGG': 'G', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
00736      'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
00737      'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
00738      'GGA': 'G', 'GGG': 'G', },
00739                     stop_codons = [ 'TAA', 'TAG', ],
00740                     start_codons = [ 'TTG', 'ATA', 'ATG', 'GTG', ]
00741                     )
00742 register_ncbi_table(name = 'Alternative Flatworm Mitochondrial',
00743                     alt_name = None, id = 14,
00744                     table = {
00745      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00746      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00747      'TAA': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W',
00748      'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P',
00749      'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H',
00750      'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R',
00751      'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
00752      'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N',
00753      'AAC': 'N', 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S',
00754      'AGA': 'S', 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V',
00755      'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
00756      'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G',
00757      'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
00758                     stop_codons = [ 'TAG', ],
00759                     start_codons = [ 'ATG', ]
00760                     )
00761 register_ncbi_table(name = 'Blepharisma Macronuclear',
00762                     alt_name = None, id = 15,
00763                     table = {
00764      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00765      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00766      'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L',
00767      'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
00768      'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
00769      'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
00770      'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
00771      'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
00772      'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
00773      'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
00774      'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
00775      'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
00776      'GGA': 'G', 'GGG': 'G', },
00777                     stop_codons = [ 'TAA', 'TGA', ],
00778                     start_codons = [ 'ATG', ]
00779                     )
00780 register_ncbi_table(name = 'Chlorophycean Mitochondrial',
00781                     alt_name = None, id = 16,
00782                     table = {
00783      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00784      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00785      'TAG': 'L', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L',
00786      'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
00787      'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
00788      'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
00789      'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
00790      'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
00791      'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
00792      'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
00793      'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
00794      'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
00795      'GGA': 'G', 'GGG': 'G', },
00796                     stop_codons = [ 'TAA', 'TGA', ],
00797                     start_codons = [ 'ATG', ]
00798                     )
00799 register_ncbi_table(name = 'Trematode Mitochondrial',
00800                     alt_name = None, id = 21,
00801                     table = {
00802      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00803      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00804      'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
00805      'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
00806      'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
00807      'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
00808      'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
00809      'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
00810      'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S',
00811      'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
00812      'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
00813      'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
00814      'GGA': 'G', 'GGG': 'G', },
00815                     stop_codons = [ 'TAA', 'TAG', ],
00816                     start_codons = [ 'ATG', 'GTG', ]
00817                     )
00818 register_ncbi_table(name = 'Scenedesmus obliquus Mitochondrial',
00819                     alt_name = None, id = 22,
00820                     table = {
00821      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00822      'TCC': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TAG': 'L',
00823      'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
00824      'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
00825      'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
00826      'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
00827      'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
00828      'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
00829      'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
00830      'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
00831      'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
00832      'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
00833      'GGG': 'G', },
00834                     stop_codons = [ 'TCA', 'TAA', 'TGA', ],
00835                     start_codons = [ 'ATG', ]
00836                     )
00837 register_ncbi_table(name = 'Thraustochytrium Mitochondrial',
00838                     alt_name = None, id = 23,
00839                     table = {
00840      'TTT': 'F', 'TTC': 'F', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S',
00841      'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C',
00842      'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L',
00843      'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
00844      'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R',
00845      'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I',
00846      'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T',
00847      'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
00848      'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 'GTT': 'V',
00849      'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A',
00850      'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E',
00851      'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
00852                     stop_codons = [ 'TTA', 'TAA', 'TAG', 'TGA', ],
00853                     start_codons = [ 'ATT', 'ATG', 'GTG', ]
00854                     )
00855 
00856 
00857 
00858 #Basic sanity test,
00859 for key, val in generic_by_name.iteritems():
00860     assert key in ambiguous_generic_by_name[key].names
00861 for key, val in generic_by_id.iteritems():
00862     assert ambiguous_generic_by_id[key].id == key
00863 del key, val
00864 
00865 for n in ambiguous_generic_by_id:
00866     assert ambiguous_rna_by_id[n].forward_table["GUU"] == "V"
00867     assert ambiguous_rna_by_id[n].forward_table["GUN"] == "V"
00868     if n != 23 :
00869         #For table 23, UUN = F, L or stop.
00870         assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X" #F or L
00871     #R = A or G, so URR = UAA or UGA / TRA = TAA or TGA = stop codons
00872     if "UAA" in unambiguous_rna_by_id[n].stop_codons \
00873     and "UGA" in unambiguous_rna_by_id[n].stop_codons:
00874         try:
00875             print ambiguous_dna_by_id[n].forward_table["TRA"]
00876             assert False, "Should be a stop only"
00877         except KeyError:
00878             pass
00879         assert "URA" in ambiguous_generic_by_id[n].stop_codons
00880         assert "URA" in ambiguous_rna_by_id[n].stop_codons
00881         assert "TRA" in ambiguous_generic_by_id[n].stop_codons
00882         assert "TRA" in ambiguous_dna_by_id[n].stop_codons
00883 del n
00884 assert ambiguous_generic_by_id[1] == ambiguous_generic_by_name["Standard"]
00885 assert ambiguous_generic_by_id[4] == ambiguous_generic_by_name["SGC3"]
00886 assert ambiguous_generic_by_id[11] == ambiguous_generic_by_name["Bacterial"]
00887 assert ambiguous_generic_by_id[11] == ambiguous_generic_by_name["Plant Plastid"]
00888 assert ambiguous_generic_by_id[15] == ambiguous_generic_by_name['Blepharisma Macronuclear']
00889 assert generic_by_id[1] == generic_by_name["Standard"]
00890 assert generic_by_id[4] == generic_by_name["SGC3"]
00891 assert generic_by_id[11] == generic_by_name["Bacterial"]
00892 assert generic_by_id[11] == generic_by_name["Plant Plastid"]
00893 assert generic_by_id[15] == generic_by_name['Blepharisma Macronuclear']