Back to index

python-biopython  1.60
Reduced.py
Go to the documentation of this file.
00001 # Copyright 2004 by Iddo Friedberg.
00002 # All rights reserved.
00003 # This code is part of the Biopython distribution and governed by its
00004 # license.  Please see the LICENSE file that should have been included
00005 # as part of this package.
00006 
00007 """Reduced alphabets which lump together several amino-acids into one letter.
00008 
00009 Reduced (redundant or simplified) alphabets are used to represent protein sequences using an
00010 alternative alphabet which lumps together several amino-acids into one letter, based
00011 on physico-chemical traits. For example, all the aliphatics (I,L,V) are usually
00012 quite interchangeable, so many sequence studies group them into one letter
00013 
00014 Examples of reduced alphabets are available in:
00015 
00016 http://viscose.ifg.uni-muenster.de/html/alphabets.html
00017 
00018 The Murphy tables are from here:
00019 
00020 Murphy L.R., Wallqvist A, Levy RM. (2000) Simplified amino acid
00021 alphabets for protein fold recognition and implications for folding.
00022 Protein Eng. 13(3):149-152
00023 
00024 Bio.utils.reduce_sequence is used to take a Protein alphabet, and reduce it using one of
00025 the tables here, or a user-defined table.
00026 """
00027 
00028 from Bio import Alphabet
00029 
00030 
00031 murphy_15_tab = {"L": "L",
00032              "V": "L",
00033              "I": "L",
00034              "M": "L",
00035              "C": "C",
00036              "A": "A",
00037              "G": "G",
00038              "S": "S",
00039              "T": "T",
00040              "P": "P",
00041              "F": "F",
00042              "Y": "F",
00043              "W": "W",
00044              "E": "E",
00045              "D": "D",
00046              "N": "N",
00047              "Q": "Q",
00048              "K": "K",
00049              "R": "K",
00050              "H": "H"}
00051 
00052 class Murphy15(Alphabet.ProteinAlphabet):
00053    letters = "LCAGSTPFWEDNQKH"
00054    size = 15
00055 murphy_15 = Murphy15()
00056 
00057 murphy_10_tab = {"L": "L",
00058              "V": "L",
00059              "I": "L",
00060              "M": "L",
00061              "C": "C",
00062              "A": "A",
00063              "G": "G",
00064              "S": "S",
00065              "T": "S",
00066              "P": "P",
00067              "F": "F",
00068              "Y": "F",
00069              "W": "F",
00070              "E": "E",
00071              "D": "E",
00072              "N": "E",
00073              "Q": "E",
00074              "K": "K",
00075              "R": "K",
00076              "H": "H"}
00077 class Murphy10(Alphabet.ProteinAlphabet):
00078    letters = "LCAGSPFEKH"
00079    size = 10
00080 murphy_10 = Murphy10()
00081 
00082 murphy_8_tab  = {"L": "L",
00083              "V": "L",
00084              "I": "L",
00085              "M": "L",
00086              "C": "L",
00087              "A": "A",
00088              "G": "A",
00089              "S": "S",
00090              "T": "S",
00091              "P": "P",
00092              "F": "F",
00093              "Y": "F",
00094              "W": "F",
00095              "E": "E",
00096              "D": "E",
00097              "N": "E",
00098              "Q": "E",
00099              "K": "K",
00100              "R": "K",
00101              "H": "H"}
00102 
00103 class Murphy8(Alphabet.ProteinAlphabet):
00104    letters = "LASPFEKH"
00105    size = 8
00106 murphy_8 = Murphy8()
00107 
00108 murphy_4_tab  = {"L": "L",
00109              "V": "L",
00110              "I": "L",
00111              "M": "L",
00112              "C": "L",
00113              "A": "A",
00114              "G": "A",
00115              "S": "A",
00116              "T": "A",
00117              "P": "A",
00118              "F": "F",
00119              "Y": "F",
00120              "W": "F",
00121              "E": "E",
00122              "D": "E",
00123              "N": "E",
00124              "Q": "E",
00125              "K": "E",
00126              "R": "E",
00127              "H": "E"}
00128 
00129 class Murphy4(Alphabet.ProteinAlphabet):
00130    letters = "LAFE"
00131    size = 4
00132 murphy_4 = Murphy4()
00133 
00134 hp_model_tab = {"A": "P",   # Hydrophilic
00135             "G": "P",
00136             "T": "P",
00137             "S": "P",
00138             "N": "P",
00139             "Q": "P",
00140             "D": "P",
00141             "E": "P",
00142             "H": "P",
00143             "R": "P",
00144             "K": "P",
00145             "P": "P",
00146             "C": "H",  # Hydrophobic
00147             "M": "H",
00148             "F": "H",
00149             "I": "H",
00150             "L": "H",
00151             "V": "H",
00152             "W": "H",
00153             "Y": "H"}
00154 
00155 class HPModel(Alphabet.ProteinAlphabet):
00156    letters = "HP"
00157    size = 2
00158 hp_model = HPModel()
00159 
00160 pc_5_table  = {"I": "A", # Aliphatic
00161          "V": "A",
00162          "L": "A",
00163          "F": "R", # Aromatic
00164          "Y": "R",
00165          "W": "R",
00166          "H": "R",
00167          "K": "C", # Charged
00168          "R": "C",
00169          "D": "C",
00170          "E": "C",
00171          "G": "T", # Tiny
00172          "A": "T",
00173          "C": "T",
00174          "S": "T",
00175          "T": "D", # Diverse
00176          "M": "D",
00177          "Q": "D",
00178          "N": "D",
00179          "P": "D"}
00180 
00181 class PC5(Alphabet.ProteinAlphabet):
00182    letters = "ARCTD"
00183    size = 5
00184 hp_model = HPModel()