Back to index

python-biopython  1.60
test_MarkovModel.py
Go to the documentation of this file.
00001 # This code is part of the Biopython distribution and governed by its
00002 # license.  Please see the LICENSE file that should have been included
00003 # as part of this package.
00004 
00005 try:
00006     from numpy import array
00007     from numpy import random #missing in PyPy's micronumpy
00008 except ImportError:
00009     from Bio import MissingPythonDependencyError
00010     raise MissingPythonDependencyError(\
00011         "Install NumPy if you want to use Bio.MarkovModel.")
00012 
00013 import unittest
00014 
00015 import warnings
00016 #Silence this warning:
00017 #For optimal speed, please update to Numpy version 1.3 or later
00018 warnings.filterwarnings("ignore", category=UserWarning)
00019 from Bio import MarkovModel
00020 warnings.filters.pop()
00021 
00022 
00023 class TestMarkovModel(unittest.TestCase):
00024 
00025     def test_train_visible(self):
00026         states = ["0", "1", "2", "3"]
00027         alphabet = ["A", "C", "G", "T"]
00028         training_data = [
00029             ("AACCCGGGTTTTTTT", "001112223333333"),
00030             ("ACCGTTTTTTT", "01123333333"),
00031             ("ACGGGTTTTTT", "01222333333"),
00032             ("ACCGTTTTTTTT", "011233333333"),
00033             ]
00034         markov_model = MarkovModel.train_visible(states, alphabet, training_data)
00035         states = MarkovModel.find_states(markov_model, "AACGTT")
00036         self.assertEqual(len(states), 1)
00037         state_list, state_float = states[0]
00038         self.assertEqual(state_list, ['0', '0', '1', '2', '3', '3'])
00039         self.assertAlmostEqual(state_float, 0.0082128906)
00040         self.assertEqual(markov_model.states, ['0', '1', '2', '3'])
00041         self.assertEqual(markov_model.alphabet, ['A', 'C', 'G', 'T'])
00042         self.assertEqual(len(markov_model.p_initial), 4)
00043         self.assertAlmostEqual(markov_model.p_initial[0], 1.0)
00044         self.assertAlmostEqual(markov_model.p_initial[1], 0.0)
00045         self.assertAlmostEqual(markov_model.p_initial[2], 0.0)
00046         self.assertAlmostEqual(markov_model.p_initial[3], 0.0)
00047         self.assertEqual(len(markov_model.p_transition), 4)
00048         self.assertEqual(len(markov_model.p_transition[0]), 4)
00049         self.assertEqual(len(markov_model.p_transition[1]), 4)
00050         self.assertEqual(len(markov_model.p_transition[2]), 4)
00051         self.assertEqual(len(markov_model.p_transition[3]), 4)
00052         self.assertAlmostEqual(markov_model.p_transition[0][0], 0.2)
00053         self.assertAlmostEqual(markov_model.p_transition[0][1], 0.8)
00054         self.assertAlmostEqual(markov_model.p_transition[0][2], 0.0)
00055         self.assertAlmostEqual(markov_model.p_transition[0][3], 0.0)
00056         self.assertAlmostEqual(markov_model.p_transition[1][0], 0.0)
00057         self.assertAlmostEqual(markov_model.p_transition[1][1], 0.5)
00058         self.assertAlmostEqual(markov_model.p_transition[1][2], 0.5)
00059         self.assertAlmostEqual(markov_model.p_transition[1][3], 0.0)
00060         self.assertAlmostEqual(markov_model.p_transition[2][0], 0.0)
00061         self.assertAlmostEqual(markov_model.p_transition[2][1], 0.0)
00062         self.assertAlmostEqual(markov_model.p_transition[2][2], 0.5)
00063         self.assertAlmostEqual(markov_model.p_transition[2][3], 0.5)
00064         self.assertAlmostEqual(markov_model.p_transition[3][0], 0.0)
00065         self.assertAlmostEqual(markov_model.p_transition[3][1], 0.0)
00066         self.assertAlmostEqual(markov_model.p_transition[3][2], 0.0)
00067         self.assertAlmostEqual(markov_model.p_transition[3][3], 1.0)
00068         self.assertEqual(len(markov_model.p_emission), 4)
00069         self.assertEqual(len(markov_model.p_emission[0]), 4)
00070         self.assertEqual(len(markov_model.p_emission[1]), 4)
00071         self.assertEqual(len(markov_model.p_emission[2]), 4)
00072         self.assertEqual(len(markov_model.p_emission[3]), 4)
00073         self.assertAlmostEqual(markov_model.p_emission[0][0], 0.666667,
00074                                places=4)
00075         self.assertAlmostEqual(markov_model.p_emission[0][1], 0.111111,
00076                                places=4)
00077         self.assertAlmostEqual(markov_model.p_emission[0][2], 0.111111,
00078                                places=4)
00079         self.assertAlmostEqual(markov_model.p_emission[0][3], 0.111111,
00080                                places=4)
00081         self.assertAlmostEqual(markov_model.p_emission[1][0], 0.083333,
00082                                places=4)
00083         self.assertAlmostEqual(markov_model.p_emission[1][1], 0.750000,
00084                                places=4)
00085         self.assertAlmostEqual(markov_model.p_emission[1][2], 0.083333,
00086                                places=4)
00087         self.assertAlmostEqual(markov_model.p_emission[1][3], 0.083333,
00088                                places=4)
00089         self.assertAlmostEqual(markov_model.p_emission[2][0], 0.083333,
00090                                places=4)
00091         self.assertAlmostEqual(markov_model.p_emission[2][1], 0.083333,
00092                                places=4)
00093         self.assertAlmostEqual(markov_model.p_emission[2][2], 0.750000,
00094                                places=4)
00095         self.assertAlmostEqual(markov_model.p_emission[2][3], 0.083333,
00096                                places=4)
00097         self.assertAlmostEqual(markov_model.p_emission[3][0], 0.031250,
00098                                places=4)
00099         self.assertAlmostEqual(markov_model.p_emission[3][1], 0.031250,
00100                                places=4)
00101         self.assertAlmostEqual(markov_model.p_emission[3][2], 0.031250,
00102                                places=4)
00103         self.assertAlmostEqual(markov_model.p_emission[3][3], 0.906250,
00104                                places=4)
00105 
00106     def test_baum_welch(self):
00107         states = ["CP", "IP"]
00108         alphabet = ["cola", "ice_t", "lem"]
00109         outputs = [
00110             (2, 1, 0)
00111             ]
00112         p_initial = [1.0, 0.0000001]
00113         p_transition = [[0.7, 0.3],
00114                         [0.5, 0.5]]
00115         p_emission = [[0.6, 0.1, 0.3],
00116                       [0.1, 0.7, 0.2]]
00117         N, M = len(states), len(alphabet)
00118         x = MarkovModel._baum_welch(N, M, outputs,
00119                                     p_initial=p_initial,
00120                                     p_transition=p_transition,
00121                                     p_emission=p_emission
00122                                     )
00123         p_initial, p_transition, p_emission = x
00124         markov_model = MarkovModel.MarkovModel(states, alphabet,
00125                                      p_initial, p_transition, p_emission)
00126         self.assertEqual(markov_model.states, ['CP', 'IP'])
00127         self.assertEqual(markov_model.alphabet, ['cola', 'ice_t', 'lem'])
00128         self.assertEqual(len(markov_model.p_initial), 2)
00129         self.assertAlmostEqual(markov_model.p_initial[0], 1.0,
00130                                places=4)
00131         self.assertAlmostEqual(markov_model.p_initial[1], 0.0,
00132                                places=4)
00133         self.assertEqual(len(markov_model.p_transition), 2)
00134         self.assertEqual(len(markov_model.p_transition[0]), 2)
00135         self.assertEqual(len(markov_model.p_transition[1]), 2)
00136         self.assertAlmostEqual(markov_model.p_transition[0][0], 0.02460365,
00137                                places=4)
00138         self.assertAlmostEqual(markov_model.p_transition[0][1], 0.97539634,
00139                                places=4)
00140         self.assertAlmostEqual(markov_model.p_transition[1][0], 1.0,
00141                                places=4)
00142         self.assertAlmostEqual(markov_model.p_transition[1][1], 0.0,
00143                                places=4)
00144         self.assertEqual(len(markov_model.p_emission), 2)
00145         self.assertEqual(len(markov_model.p_emission[0]), 3)
00146         self.assertEqual(len(markov_model.p_emission[1]), 3)
00147         self.assertAlmostEqual(markov_model.p_emission[0][0], 0.5)
00148         self.assertAlmostEqual(markov_model.p_emission[0][1], 0.0)
00149         self.assertAlmostEqual(markov_model.p_emission[0][2], 0.5)
00150         self.assertAlmostEqual(markov_model.p_emission[1][0], 0.0)
00151         self.assertAlmostEqual(markov_model.p_emission[1][1], 1.0)
00152         self.assertAlmostEqual(markov_model.p_emission[1][2], 0.0)
00153 
00154     # Do some tests from the topcoder competition.
00155 
00156     def test_topcoder1(self):
00157         # NNNN
00158         states = "NR"
00159         alphabet = "AGTC"
00160         p_initial = array([1.0, 0.0])
00161         p_transition = array([[0.90, 0.10],
00162                               [0.20, 0.80]])
00163         p_emission = array([[0.30, 0.20, 0.30, 0.20],
00164                             [0.10, 0.40, 0.10, 0.40]])
00165         markov_model = MarkovModel.MarkovModel(
00166             states, alphabet, p_initial, p_transition, p_emission)
00167         states = MarkovModel.find_states(markov_model, "TGCC")
00168         self.assertEqual(len(states), 1)
00169         state_list, state_float = states[0]
00170         self.assertEqual(state_list, ['N', 'N', 'N', 'N'])
00171 
00172     def test_topcoder2(self):
00173         # NNNRRRNNRRNRRN
00174         states = "NR"
00175         alphabet = "AGTC"
00176         p_initial = array([1.0, 0.0])
00177         p_transition = array([[0.56, 0.44],
00178                               [0.25, 0.75]])
00179         p_emission = array([[0.04, 0.14, 0.62, 0.20],
00180                             [0.39, 0.15, 0.04, 0.42]])
00181         markov_model = MarkovModel.MarkovModel(
00182             states, alphabet, p_initial, p_transition, p_emission)
00183         states = MarkovModel.find_states(markov_model, "CCTGAGTTAGTCGT")
00184         self.assertEqual(len(states), 1)
00185         state_list, state_float = states[0]
00186         self.assertEqual(state_list, ['N', 'N', 'N', 'R', 'R', 'R', 'N', 'N', 'R', 'R', 'N', 'R', 'R', 'N'])
00187 
00188     def test_topcoder3(self):
00189         # NRRRRRRRRRRRNNNNRRRRRRRRR
00190         states = "NR"
00191         alphabet = "AGTC"
00192         p_initial = array([1.0, 0.0])
00193         p_transition = array([[0.75, 0.25],
00194                               [0.25, 0.75]])
00195         p_emission = array([[0.45, 0.36, 0.06, 0.13],
00196                             [0.24, 0.18, 0.12, 0.46]])
00197         markov_model = MarkovModel.MarkovModel(
00198             states, alphabet, p_initial, p_transition, p_emission)
00199         states = MarkovModel.find_states(markov_model, "CCGTACTTACCCAGGACCGCAGTCC")
00200         self.assertEqual(len(states), 1)
00201         state_list, state_float = states[0]
00202         self.assertEqual(state_list, ['N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'N', 'N', 'N', 'N', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R'])
00203 
00204     def test_topcoder4(self):
00205         # NRRRRRRRRRR
00206         states = "NR"
00207         alphabet = "AGTC"
00208         p_initial = array([1.0, 0.0])
00209         p_transition = array([[0.55, 0.45],
00210                               [0.15, 0.85]])
00211         p_emission = array([[0.75, 0.03, 0.01, 0.21],
00212                             [0.34, 0.11, 0.39, 0.16]])
00213         markov_model = MarkovModel.MarkovModel(
00214             states, alphabet, p_initial, p_transition, p_emission)
00215         states = MarkovModel.find_states(markov_model, "TTAGCAGTGCG")
00216         self.assertEqual(len(states), 1)
00217         state_list, state_float = states[0]
00218         self.assertEqual(state_list, ['N','R','R','R','R','R','R','R','R','R','R'])
00219 
00220     def test_topcoder5(self):
00221         # N
00222         states = "NR"
00223         alphabet = "AGTC"
00224         p_initial = array([1.0, 0.0])
00225         p_transition = array([[0.84, 0.16],
00226                               [0.25, 0.75]])
00227         p_emission = array([[0.26, 0.37, 0.08, 0.29],
00228                             [0.31, 0.13, 0.33, 0.23]])
00229         markov_model = MarkovModel.MarkovModel(
00230             states, alphabet, p_initial, p_transition, p_emission)
00231         states = MarkovModel.find_states(markov_model, "T")
00232         self.assertEqual(len(states), 1)
00233         state_list, state_float = states[0]
00234         self.assertEqual(state_list, ["N"])
00235 
00236 
00237 if __name__ == '__main__':
00238     runner = unittest.TextTestRunner(verbosity = 2)
00239     unittest.main(testRunner=runner)