Back to index

python-biopython  1.60
Classes | Functions | Variables
Bio.Data.CodonTable Namespace Reference

Classes

class  TranslationError
class  CodonTable
class  NCBICodonTable
class  NCBICodonTableDNA
class  NCBICodonTableRNA
class  AmbiguousCodonTable
class  AmbiguousForwardTable

Functions

def make_back_table
def list_possible_proteins
def list_ambiguous_codons
def register_ncbi_table

Variables

dictionary unambiguous_dna_by_name = {}
dictionary unambiguous_dna_by_id = {}
dictionary unambiguous_rna_by_name = {}
dictionary unambiguous_rna_by_id = {}
dictionary generic_by_name = {}
dictionary generic_by_id = {}
dictionary ambiguous_dna_by_name = {}
dictionary ambiguous_dna_by_id = {}
dictionary ambiguous_rna_by_name = {}
dictionary ambiguous_rna_by_id = {}
dictionary ambiguous_generic_by_name = {}
dictionary ambiguous_generic_by_id = {}
 standard_dna_table = None
 standard_rna_table = None
string alt_name = 'SGC0'
 These tables created from the data file ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt using the following: import re for line in open("gc.prt").readlines(): if line[:2] == " {": names = [] id = None aa = None start = None bases = [] elif line[:6] == " name": names.append(re.search('"([^"]*)"', line).group(1)) elif line[:8] == " name": names.append(re.search('"(.
dictionary table
list stop_codons = [ 'TAA', 'TAG', 'TGA', ]
list start_codons = [ 'TTG', 'CTG', 'ATG', ]

Class Documentation

class Bio::Data::CodonTable::TranslationError

Definition at line 38 of file CodonTable.py.


Function Documentation

def Bio.Data.CodonTable.list_ambiguous_codons (   codons,
  ambiguous_nucleotide_values 
)
Extends a codon list to include all possible ambigous codons.

e.g. ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR']
     ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA']

Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'.
Thus only two more codons are added in the following:

e.g. ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR']

Returns a new (longer) list of codon strings.

Definition at line 203 of file CodonTable.py.

00203 
00204 def list_ambiguous_codons(codons, ambiguous_nucleotide_values):
00205     """Extends a codon list to include all possible ambigous codons.
00206 
00207     e.g. ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR']
00208          ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA']
00209 
00210     Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'.
00211     Thus only two more codons are added in the following:
00212 
00213     e.g. ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR']
00214 
00215     Returns a new (longer) list of codon strings.
00216     """
00217 
00218     #Note ambiguous_nucleotide_values['R'] = 'AG' (etc)
00219     #This will generate things like 'TRR' from ['TAG', 'TGA'], which
00220     #we don't want to include:
00221     c1_list = sorted(letter for (letter, meanings) \
00222                in ambiguous_nucleotide_values.iteritems() \
00223                if set([codon[0] for codon in codons]).issuperset(set(meanings)))
00224     c2_list = sorted(letter for (letter, meanings) \
00225                in ambiguous_nucleotide_values.iteritems() \
00226                if set([codon[1] for codon in codons]).issuperset(set(meanings)))
00227     c3_list = sorted(letter for (letter, meanings) \
00228                in ambiguous_nucleotide_values.iteritems() \
00229                if set([codon[2] for codon in codons]).issuperset(set(meanings)))
00230     #candidates is a list (not a set) to preserve the iteration order
00231     candidates = []
00232     for c1 in c1_list:
00233         for c2 in c2_list:
00234             for c3 in c3_list:
00235                 codon = c1+c2+c3
00236                 if codon not in candidates and codon not in codons:
00237                     candidates.append(codon)
00238     answer = codons[:] #copy
00239     #print "Have %i new candidates" % len(candidates)
00240     for ambig_codon in candidates:
00241         wanted = True
00242         #e.g. 'TRR' -> 'TAA', 'TAG', 'TGA', 'TGG'
00243         for codon in [c1+c2+c3 \
00244                       for c1 in ambiguous_nucleotide_values[ambig_codon[0]] \
00245                       for c2 in ambiguous_nucleotide_values[ambig_codon[1]] \
00246                       for c3 in ambiguous_nucleotide_values[ambig_codon[2]]]:
00247             if codon not in codons:
00248                 #This ambiguous codon can code for a non-stop, exclude it!
00249                 wanted=False
00250                 #print "Rejecting %s" % ambig_codon
00251                 continue
00252         if wanted:
00253             answer.append(ambig_codon)
00254     return answer

Here is the caller graph for this function:

def Bio.Data.CodonTable.list_possible_proteins (   codon,
  forward_table,
  ambiguous_nucleotide_values 
)

Definition at line 180 of file CodonTable.py.

00180 
00181 def list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values):
00182         c1, c2, c3 = codon
00183         x1 = ambiguous_nucleotide_values[c1]
00184         x2 = ambiguous_nucleotide_values[c2]
00185         x3 = ambiguous_nucleotide_values[c3]
00186         possible = {}
00187         stops = []
00188         for y1 in x1:
00189             for y2 in x2:
00190                 for y3 in x3:
00191                     try:
00192                         possible[forward_table[y1+y2+y3]] = 1
00193                     except KeyError:
00194                         # If tripping over a stop codon
00195                         stops.append(y1+y2+y3)
00196         if stops:
00197             if possible:
00198                 raise TranslationError("ambiguous codon '%s' codes " % codon \
00199                                        + "for both proteins and stop codons")
00200             # This is a true stop codon - tell the caller about it
00201             raise KeyError(codon)
00202         return possible.keys()

Here is the caller graph for this function:

def Bio.Data.CodonTable.make_back_table (   table,
  default_stop_codon 
)

Definition at line 119 of file CodonTable.py.

00119 
00120 def make_back_table(table, default_stop_codon):
00121     #  ONLY RETURNS A SINGLE CODON
00122     # Do the sort so changes in the hash implementation won't affect
00123     # the result when one amino acid is coded by more than one codon.
00124     back_table = {}
00125     for key in sorted(table):
00126         back_table[table[key]] = key
00127     back_table[None] = default_stop_codon
00128     return back_table
00129 

def Bio.Data.CodonTable.register_ncbi_table (   name,
  alt_name,
  id,
  table,
  start_codons,
  stop_codons 
)
Turns codon table data into objects, and stores them in the dictionaries (PRIVATE).

Definition at line 374 of file CodonTable.py.

00374 
00375                         table, start_codons, stop_codons):
00376     """Turns codon table data into objects, and stores them in the dictionaries (PRIVATE)."""
00377     #In most cases names are divided by "; ", however there is also
00378     #'Bacterial and Plant Plastid' (which used to be just 'Bacterial')
00379     names = [x.strip() for x in name.replace(" and ","; ").split("; ")]
00380     
00381     dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons,
00382                             stop_codons)
00383 
00384     ambig_dna = AmbiguousCodonTable(dna,
00385                                     IUPAC.ambiguous_dna,
00386                                     IUPACData.ambiguous_dna_values,
00387                                     IUPAC.extended_protein,
00388                                     IUPACData.extended_protein_values)
00389     
00390     # replace all T's with U's for the RNA tables
00391     rna_table = {}
00392     generic_table = {}
00393     for codon, val in table.iteritems():
00394         generic_table[codon] = val
00395         codon = codon.replace("T", "U")
00396         generic_table[codon] = val
00397         rna_table[codon] = val
00398     rna_start_codons = []
00399     generic_start_codons = []
00400     for codon in start_codons:
00401         generic_start_codons.append(codon)
00402         codon = codon.replace("T", "U")
00403         generic_start_codons.append(codon)
00404         rna_start_codons.append(codon)
00405     rna_stop_codons = []
00406     generic_stop_codons = []
00407     for codon in stop_codons:
00408         generic_stop_codons.append(codon)
00409         codon = codon.replace("T", "U")
00410         generic_stop_codons.append(codon)
00411         rna_stop_codons.append(codon)
00412     
00413     generic = NCBICodonTable(id, names + [alt_name], generic_table,
00414                              generic_start_codons, generic_stop_codons)
00415 
00416     #The following isn't very elegant, but seems to work nicely.
00417     _merged_values = dict(IUPACData.ambiguous_rna_values.iteritems())
00418     _merged_values["T"] = "U"
00419     ambig_generic = AmbiguousCodonTable(generic,
00420                                         Alphabet.NucleotideAlphabet(),
00421                                         _merged_values,
00422                                         IUPAC.extended_protein,
00423                                         IUPACData.extended_protein_values)
00424 
00425     rna = NCBICodonTableRNA(id, names + [alt_name], rna_table,
00426                             rna_start_codons, rna_stop_codons)
00427 
00428     ambig_rna = AmbiguousCodonTable(rna,
00429                                     IUPAC.ambiguous_rna,
00430                                     IUPACData.ambiguous_rna_values,
00431                                     IUPAC.extended_protein,
00432                                     IUPACData.extended_protein_values)
00433 
00434     if id == 1:
00435         global standard_dna_table, standard_rna_table
00436         standard_dna_table = dna
00437         standard_rna_table = rna
00438 
00439     unambiguous_dna_by_id[id] = dna
00440     unambiguous_rna_by_id[id] = rna
00441     generic_by_id[id] = generic
00442     ambiguous_dna_by_id[id] = ambig_dna
00443     ambiguous_rna_by_id[id] = ambig_rna
00444     ambiguous_generic_by_id[id] = ambig_generic
00445 
00446     if alt_name is not None:
00447         names.append(alt_name)
00448 
00449     for name in names:
00450         unambiguous_dna_by_name[name] = dna
00451         unambiguous_rna_by_name[name] = rna
00452         generic_by_name[name] = generic
00453         ambiguous_dna_by_name[name] = ambig_dna
00454         ambiguous_rna_by_name[name] = ambig_rna
00455         ambiguous_generic_by_name[name] = ambig_generic
00456 

Here is the caller graph for this function:


Variable Documentation

These tables created from the data file ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt using the following: import re for line in open("gc.prt").readlines(): if line[:2] == " {": names = [] id = None aa = None start = None bases = [] elif line[:6] == " name": names.append(re.search('"([^"]*)"', line).group(1)) elif line[:8] == " name": names.append(re.search('"(.

*)$', line).group(1)) elif line == ' Mitochondrial; Mycoplasma; Spiroplasma" ,\n': names[-1] = names[-1] + " Mitochondrial; Mycoplasma; Spiroplasma" elif line[:4] == " id": id = int(re.search('(+)', line).group(1)) elif line[:10] == " ncbieaa ": aa = line[12:12+64] elif line[:10] == " sncbieaa": start = line[12:12+64] elif line[:9] == " -- Base": bases.append(line[12:12+64]) elif line[:2] == " }": assert names != [] and id is not None and aa is not None assert start is not None and bases != [] if len(names) == 1: names.append(None) print "register_ncbi_table(name = %s," % repr(names[0]) print " alt_name = %s, id = %d," % \ (repr(names[1]), id) print " table = {" s = " " for i in range(64): if aa[i] != "*": t = " '%s%s%s': '%s'," % (bases[0][i], bases[1][i], bases[2][i], aa[i]) if len(s) + len(t) > 75: print s s = " " + t else: s = s + t print s, "}," s = " stop_codons = [" for i in range(64): if aa[i] == "*": t = " '%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i]) if len(s) + len(t) > 75: print s s = " " + t else: s = s + t print s, "]," s = " start_codons = [" for i in range(64): if start[i] == "M": t = " '%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i]) if len(s) + len(t) > 75: print s s = " " + t else: s = s + t print s, "]" print " )" elif line[:2] == "--" or line == "\n" or line == "}\n" or \ line == 'Genetic-code-table ::= {
': pass else: raise Exception("Unparsed: " + repr(line))

Definition at line 532 of file CodonTable.py.

Definition at line 24 of file CodonTable.py.

Definition at line 23 of file CodonTable.py.

Definition at line 28 of file CodonTable.py.

Definition at line 27 of file CodonTable.py.

Definition at line 26 of file CodonTable.py.

Definition at line 25 of file CodonTable.py.

Definition at line 21 of file CodonTable.py.

Definition at line 20 of file CodonTable.py.

Definition at line 31 of file CodonTable.py.

Definition at line 32 of file CodonTable.py.

list Bio.Data.CodonTable.start_codons = [ 'TTG', 'CTG', 'ATG', ]

Definition at line 548 of file CodonTable.py.

list Bio.Data.CodonTable.stop_codons = [ 'TAA', 'TAG', 'TGA', ]

Definition at line 547 of file CodonTable.py.

Initial value:
00001 {
00002      'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
00003      'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
00004      'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
00005      'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
00006      'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
00007      'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
00008      'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
00009      'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
00010      'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
00011      'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
00012      'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
00013      'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
00014      'GGG': 'G', }

Definition at line 533 of file CodonTable.py.

Definition at line 17 of file CodonTable.py.

Definition at line 16 of file CodonTable.py.

Definition at line 19 of file CodonTable.py.

Definition at line 18 of file CodonTable.py.