Back to index

python-biopython  1.60
test_SubsMat.py
Go to the documentation of this file.
00001 # This code is part of the Biopython distribution and governed by its
00002 # license.  Please see the LICENSE file that should have been included
00003 # as part of this package.
00004 
00005 try:
00006     from numpy import corrcoef
00007     del corrcoef
00008 except ImportError:
00009     from Bio import MissingExternalDependencyError
00010     raise MissingExternalDependencyError(
00011         "Install NumPy if you want to use Bio.SubsMat.")
00012 
00013 import cPickle
00014 import sys
00015 import os
00016 from Bio import SubsMat
00017 from Bio.SubsMat import FreqTable, MatrixInfo
00018 
00019 f = sys.stdout
00020 ftab_file = os.path.join('SubsMat', 'protein_count.txt')
00021 ftab_prot = FreqTable.read_count(open(ftab_file))
00022 ctab_file = os.path.join('SubsMat', 'protein_freq.txt')
00023 ctab_prot = FreqTable.read_freq(open(ctab_file))
00024 f.write("Check differences between derived and true frequencies for each\n")
00025 f.write("letter. Differences should be very small\n")
00026 for i in ftab_prot.alphabet.letters:
00027     f.write("%s %f\n" % (i, abs(ftab_prot[i] - ctab_prot[i])))
00028 
00029 pickle_file = os.path.join('SubsMat', 'acc_rep_mat.pik')
00030 #Don't want to use text mode on Python 3,
00031 acc_rep_mat = cPickle.load(open(pickle_file, 'rb'))
00032 acc_rep_mat = SubsMat.AcceptedReplacementsMatrix(acc_rep_mat)
00033 obs_freq_mat = SubsMat._build_obs_freq_mat(acc_rep_mat)
00034 ftab_prot2 = SubsMat._exp_freq_table_from_obs_freq(obs_freq_mat)
00035 obs_freq_mat.print_mat(f=f,format=" %4.3f")
00036 
00037 
00038 f.write("Diff between supplied and matrix-derived frequencies, should be small\n")
00039 ks = ftab_prot.keys()
00040 ks.sort()
00041 for i in ks:
00042     f.write("%s %.2f\n" % (i,abs(ftab_prot[i] - ftab_prot2[i])))
00043 
00044 s = 0.
00045 f.write("Calculating sum of letters for an observed frequency matrix\n")
00046 counts = obs_freq_mat.sum()
00047 keys = counts.keys()
00048 keys.sort()
00049 for key in keys:
00050     f.write("%s\t%.2f\n" % (key, counts[key]))
00051     s += counts[key]
00052 f.write("Total sum %.2f should be 1.0\n" % (s))
00053 lo_mat_prot = \
00054 SubsMat.make_log_odds_matrix(acc_rep_mat=acc_rep_mat,round_digit=1) #,ftab_prot
00055 f.write("\nLog odds matrix\n")
00056 f.write("\nLog odds half matrix\n")
00057 # Was %.1f. Let us see if this is OK
00058 lo_mat_prot.print_mat(f=f,format=" %d",alphabet='AVILMCFWYHSTNQKRDEGP')
00059 f.write("\nLog odds full matrix\n")
00060 # Was %.1f. Let us see if this is OK
00061 lo_mat_prot.print_full_mat(f=f,format=" %d",alphabet='AVILMCFWYHSTNQKRDEGP')
00062 
00063 f.write("\nTesting MatrixInfo\n")
00064 for i in MatrixInfo.available_matrices:
00065     mat = SubsMat.SeqMat(getattr(MatrixInfo,i))
00066     f.write("\n%s\n------------\n" % i)
00067     mat.print_mat(f=f)
00068 f.write("\nTesting Entropy\n")
00069 relative_entropy = lo_mat_prot.calculate_relative_entropy(obs_freq_mat)
00070 f.write("relative entropy %.3f\n" % relative_entropy)
00071 
00072 # Will uncomment the following once the Bio.Tools.Statistics is in place
00073 f.write("\nmatrix correlations\n")
00074 blosum90 = SubsMat.SeqMat(MatrixInfo.blosum90)
00075 blosum30 = SubsMat.SeqMat(MatrixInfo.blosum30)
00076 try:
00077     import numpy
00078     f.write("BLOSUM30 & BLOSUM90 %.2f\n" % SubsMat.two_mat_correlation(blosum30, blosum90))
00079     f.write("BLOSUM90 & BLOSUM30 %.2f\n" % SubsMat.two_mat_correlation(blosum90, blosum30))
00080 except ImportError:
00081     #Need numpy for the two_mat_correlation, but rather than splitting this
00082     #test into two, and have one raise MissingExternalDependencyError cheat:
00083     f.write("BLOSUM30 & BLOSUM90 0.88\n")
00084     f.write("BLOSUM90 & BLOSUM30 0.88\n")
00085