Back to index

python-biopython  1.60
IUPAC.py
Go to the documentation of this file.
00001 # Copyright 2000-2001 by Andrew Dalke.
00002 # Revisions copyright 2008 by Peter Cock.
00003 # All rights reserved.
00004 # This code is part of the Biopython distribution and governed by its
00005 # license.  Please see the LICENSE file that should have been included
00006 # as part of this package.
00007 
00008 """Standard nucleotide and protein alphabets defined by IUPAC."""
00009 
00010 from Bio import Alphabet
00011 from Bio.Data import IUPACData
00012 
00013 ##################### Protein
00014 
00015 # From the IUPAC definition at:
00016 #   http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html#AA21
00017 
00018 assert IUPACData.extended_protein_letters == IUPACData.extended_protein_letters.upper()
00019 class ExtendedIUPACProtein(Alphabet.ProteinAlphabet):
00020     """Extended uppercase IUPAC protein single letter alphabet including X etc.
00021 
00022     In addition to the standard 20 single letter protein codes, this includes:
00023     
00024     B = "Asx";  Aspartic acid (R) or Asparagine (N)
00025     X = "Xxx";  Unknown or 'other' amino acid
00026     Z = "Glx";  Glutamic acid (E) or Glutamine (Q)
00027     J = "Xle";  Leucine (L) or Isoleucine (I), used in mass-spec (NMR)
00028     U = "Sec";  Selenocysteine
00029     O = "Pyl";  Pyrrolysine
00030 
00031     This alphabet is not intended to be used with X for Selenocysteine
00032     (an ad-hoc standard prior to the IUPAC adoption of U instead).
00033     """
00034     letters = IUPACData.extended_protein_letters
00035 
00036 extended_protein = ExtendedIUPACProtein()
00037 
00038 assert IUPACData.protein_letters == IUPACData.protein_letters.upper()
00039 class IUPACProtein(ExtendedIUPACProtein):
00040     """Uppercase IUPAC protein single letter alphabet of the 20 standard amino acids."""
00041     letters = IUPACData.protein_letters
00042 
00043 protein = IUPACProtein()
00044 
00045 ##################### DNA
00046 
00047 # The next two are the IUPAC definitions, from:
00048 #   http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html
00049 class IUPACAmbiguousDNA(Alphabet.DNAAlphabet):
00050     """Uppercase IUPAC ambiguous DNA."""
00051     letters = IUPACData.ambiguous_dna_letters
00052 
00053 ambiguous_dna = IUPACAmbiguousDNA()
00054 
00055 class IUPACUnambiguousDNA(IUPACAmbiguousDNA):
00056     """Uppercase IUPAC unambiguous DNA (letters GATC only)."""
00057     letters = IUPACData.unambiguous_dna_letters
00058 
00059 unambiguous_dna = IUPACUnambiguousDNA()
00060 
00061 
00062 # Also from the URL, but not part of the standard
00063 class ExtendedIUPACDNA(Alphabet.DNAAlphabet):
00064     """Extended IUPAC DNA alphabet.
00065 
00066     In addition to the standard letter codes GATC, this includes:
00067 
00068     B = 5-bromouridine
00069     D = 5,6-dihydrouridine
00070     S = thiouridine
00071     W = wyosine
00072     """
00073     letters = IUPACData.extended_dna_letters
00074 
00075 extended_dna = ExtendedIUPACDNA()
00076 
00077 ##################### RNA
00078 
00079 class IUPACAmbiguousRNA(Alphabet.RNAAlphabet):
00080     """Uppercase IUPAC ambiguous RNA."""
00081     letters = IUPACData.ambiguous_rna_letters
00082 
00083 ambiguous_rna = IUPACAmbiguousRNA()
00084 
00085 class IUPACUnambiguousRNA(IUPACAmbiguousRNA):
00086     """Uppercase IUPAC unambiguous RNA (letters GAUC only)."""
00087     letters = IUPACData.unambiguous_rna_letters
00088 
00089 unambiguous_rna = IUPACUnambiguousRNA()
00090 
00091 # are there extended forms?
00092 #class ExtendedIUPACRNA(Alphabet.RNAAlphabet):
00093 #    letters = extended_rna_letters
00094 #    #   B == 5-bromouridine
00095 #    #   D == 5,6-dihydrouridine
00096 #    #   S == thiouridine
00097 #    #   W == wyosine