Back to index

python-biopython  1.60
MeltingTemp.py
Go to the documentation of this file.
00001 # Copyright 2004-2008 by Sebastian Bassi.
00002 # All rights reserved.
00003 # This code is part of the Biopython distribution and governed by its
00004 # license.  Please see the LICENSE file that should have been included
00005 # as part of this package.
00006 
00007 """Calculate the thermodynamic melting temperatures of nucleotide sequences."""
00008 
00009 import math
00010 def Tm_staluc(s,dnac=50,saltc=50,rna=0):
00011     """Returns DNA/DNA tm using nearest neighbor thermodynamics.
00012 
00013     dnac is DNA concentration [nM]
00014     saltc is salt concentration [mM].
00015     rna=0 is for DNA/DNA (default), use 1 for RNA/RNA hybridisation.
00016 
00017     For DNA/DNA, see Allawi & SantaLucia (1997), Biochemistry 36: 10581-10594
00018     For RNA/RNA, see Xia et al (1998), Biochemistry 37: 14719-14735
00019 
00020     Example:
00021 
00022     >>> print "%0.2f" % Tm_staluc('CAGTCAGTACGTACGTGTACTGCCGTA')
00023     59.87
00024     >>> print "%0.2f" % Tm_staluc('CAGTCAGTACGTACGTGTACTGCCGTA', rna=True)
00025     68.14
00026 
00027     You can also use a Seq object instead of a string,
00028 
00029     >>> from Bio.Seq import Seq
00030     >>> from Bio.Alphabet import generic_nucleotide
00031     >>> s = Seq('CAGTCAGTACGTACGTGTACTGCCGTA', generic_nucleotide)
00032     >>> print "%0.2f" % Tm_staluc(s)
00033     59.87
00034     >>> print "%0.2f" % Tm_staluc(s, rna=True)
00035     68.14
00036 
00037     """
00038 
00039     #Credits: 
00040     #Main author: Sebastian Bassi <sbassi@genesdigitales.com>
00041     #Overcount function: Greg Singer <singerg@tcd.ie>
00042     #Based on the work of Nicolas Le Novere <lenov@ebi.ac.uk> Bioinformatics.
00043     #17:1226-1227(2001)
00044 
00045     #This function returns better results than EMBOSS DAN because it uses
00046     #updated thermodynamics values and takes into account inicialization
00047     #parameters from the work of SantaLucia (1998).
00048     
00049     #Things to do:
00050     #+Detect complementary sequences. Change K according to result.
00051     #+Add support for heteroduplex (see Sugimoto et al. 1995).
00052     #+Correction for Mg2+. Now supports only monovalent ions.
00053     #+Put thermodinamics table in a external file for users to change at will
00054     #+Add support for danglings ends (see Le Novele. 2001) and mismatches.
00055     
00056     dh = 0 #DeltaH. Enthalpy
00057     ds = 0 #deltaS Entropy
00058 
00059     def tercorr(stri):
00060         deltah = 0
00061         deltas = 0
00062         if rna==0:
00063             #DNA/DNA
00064             #Allawi and SantaLucia (1997). Biochemistry 36 : 10581-10594
00065             if stri.startswith('G') or stri.startswith('C'):
00066                 deltah -= 0.1
00067                 deltas += 2.8
00068             elif stri.startswith('A') or stri.startswith('T'):
00069                 deltah -= 2.3
00070                 deltas -= 4.1
00071             if stri.endswith('G') or stri.endswith('C'):
00072                 deltah -= 0.1
00073                 deltas += 2.8
00074             elif stri.endswith('A') or stri.endswith('T'):
00075                 deltah -= 2.3
00076                 deltas -= 4.1
00077             dhL = dh + deltah
00078             dsL = ds + deltas
00079             return dsL,dhL
00080         elif rna==1:
00081             #RNA
00082             if stri.startswith('G') or stri.startswith('C'):
00083                 deltah -= 3.61
00084                 deltas -= 1.5
00085             elif stri.startswith('A') or stri.startswith('T') or \
00086                  stri.startswith('U'):
00087                 deltah -= 3.72
00088                 deltas += 10.5
00089             if stri.endswith('G') or stri.endswith('C'):
00090                 deltah -= 3.61
00091                 deltas -= 1.5
00092             elif stri.endswith('A') or stri.endswith('T') or \
00093                  stri.endswith('U'):
00094                 deltah -= 3.72
00095                 deltas += 10.5
00096             dhL = dh + deltah
00097             dsL = ds + deltas
00098             # print "delta h=",dhL
00099             return dsL,dhL
00100         else:
00101             raise ValueError("rna = %r not supported" % rna)
00102 
00103     def overcount(st,p):
00104         """Returns how many p are on st, works even for overlapping"""
00105         ocu = 0
00106         x = 0
00107         while True:
00108             try:
00109                 i = st.index(p,x)
00110             except ValueError:
00111                 break
00112             ocu += 1
00113             x = i + 1
00114         return ocu
00115 
00116     R = 1.987 # universal gas constant in Cal/degrees C*Mol
00117     sup = str(s).upper() #turn any Seq object into a string (need index method)
00118     vsTC, vh = tercorr(sup)
00119     vs = vsTC
00120     
00121     k = (dnac/4.0)*1e-9
00122     #With complementary check on, the 4.0 should be changed to a variable.
00123     
00124     if rna==0:
00125         #DNA/DNA
00126         #Allawi and SantaLucia (1997). Biochemistry 36 : 10581-10594
00127         vh = vh + (overcount(sup,"AA"))*7.9 + (overcount(sup,"TT"))*\
00128         7.9 + (overcount(sup,"AT"))*7.2 + (overcount(sup,"TA"))*7.2 \
00129         + (overcount(sup,"CA"))*8.5 + (overcount(sup,"TG"))*8.5 + \
00130         (overcount(sup,"GT"))*8.4 + (overcount(sup,"AC"))*8.4
00131         vh = vh + (overcount(sup,"CT"))*7.8+(overcount(sup,"AG"))*\
00132         7.8 + (overcount(sup,"GA"))*8.2 + (overcount(sup,"TC"))*8.2
00133         vh = vh + (overcount(sup,"CG"))*10.6+(overcount(sup,"GC"))*\
00134         9.8 + (overcount(sup,"GG"))*8 + (overcount(sup,"CC"))*8
00135         vs = vs + (overcount(sup,"AA"))*22.2+(overcount(sup,"TT"))*\
00136         22.2 + (overcount(sup,"AT"))*20.4 + (overcount(sup,"TA"))*21.3
00137         vs = vs + (overcount(sup,"CA"))*22.7+(overcount(sup,"TG"))*\
00138         22.7 + (overcount(sup,"GT"))*22.4 + (overcount(sup,"AC"))*22.4
00139         vs = vs + (overcount(sup,"CT"))*21.0+(overcount(sup,"AG"))*\
00140         21.0 + (overcount(sup,"GA"))*22.2 + (overcount(sup,"TC"))*22.2
00141         vs = vs + (overcount(sup,"CG"))*27.2+(overcount(sup,"GC"))*\
00142         24.4 + (overcount(sup,"GG"))*19.9 + (overcount(sup,"CC"))*19.9
00143         ds = vs
00144         dh = vh        
00145     elif rna==1:
00146         #RNA/RNA hybridisation of Xia et al (1998)
00147         #Biochemistry 37: 14719-14735         
00148         vh = vh+(overcount(sup,"AA"))*6.82+(overcount(sup,"TT"))*6.6+\
00149         (overcount(sup,"AT"))*9.38 + (overcount(sup,"TA"))*7.69+\
00150         (overcount(sup,"CA"))*10.44 + (overcount(sup,"TG"))*10.5+\
00151         (overcount(sup,"GT"))*11.4 + (overcount(sup,"AC"))*10.2
00152         vh = vh + (overcount(sup,"CT"))*10.48 + (overcount(sup,"AG"))\
00153         *7.6+(overcount(sup,"GA"))*12.44+(overcount(sup,"TC"))*13.3
00154         vh = vh + (overcount(sup,"CG"))*10.64 + (overcount(sup,"GC"))\
00155         *14.88+(overcount(sup,"GG"))*13.39+(overcount(sup,"CC"))*12.2
00156         vs = vs + (overcount(sup,"AA"))*19.0 + (overcount(sup,"TT"))*\
00157         18.4+(overcount(sup,"AT"))*26.7+(overcount(sup,"TA"))*20.5
00158         vs = vs + (overcount(sup,"CA"))*26.9 + (overcount(sup,"TG"))*\
00159         27.8 + (overcount(sup,"GT"))*29.5 + (overcount(sup,"AC"))*26.2
00160         vs = vs + (overcount(sup,"CT"))*27.1 + (overcount(sup,"AG"))*\
00161         19.2 + (overcount(sup,"GA"))*32.5 + (overcount(sup,"TC"))*35.5
00162         vs = vs + (overcount(sup,"CG"))*26.7 + (overcount(sup,"GC"))\
00163         *36.9 + (overcount(sup,"GG"))*32.7 + (overcount(sup,"CC"))*29.7
00164         ds = vs
00165         dh = vh
00166     else:
00167         raise ValueError("rna = %r not supported" %rna)
00168 
00169     ds = ds-0.368*(len(s)-1)*math.log(saltc/1e3)
00170     tm = ((1000* (-dh))/(-ds+(R * (math.log(k)))))-273.15
00171     # print "ds="+str(ds)
00172     # print "dh="+str(dh)
00173     return tm
00174 
00175 
00176 def _test():
00177     """Run the module's doctests (PRIVATE)."""
00178     import doctest
00179     print "Runing doctests..."
00180     doctest.testmod()
00181     print "Done"
00182 
00183 if __name__ == "__main__":
00184     _test()