Back to index

python-biopython  1.60
test_Cluster.py
Go to the documentation of this file.
00001 # This code is part of the Biopython distribution and governed by its
00002 # license.  Please see the LICENSE file that should have been included
00003 # as part of this package.
00004 
00005 import unittest
00006 
00007 try:
00008     from Bio import Cluster
00009 except ImportError:
00010     from Bio import MissingPythonDependencyError
00011     raise MissingPythonDependencyError("If you want to use Bio.Cluster, "
00012                                        "install NumPy first and then "
00013                                        "reinstall Biopython")
00014 
00015 try:
00016     import numpy
00017 except ImportError:
00018     from Bio import MissingPythonDependencyError
00019     raise MissingPythonDependencyError(\
00020         "Install NumPy if you want to use Bio.Cluster")
00021 
00022 class TestCluster(unittest.TestCase):
00023 
00024     module = 'Bio.Cluster'
00025 
00026     def test_median_mean(self):
00027         if TestCluster.module=='Bio.Cluster':
00028             from Bio.Cluster import mean, median
00029         elif TestCluster.module=='Pycluster':
00030             from Pycluster import mean, median
00031 
00032         data = numpy.array([ 34.3, 3, 2 ])
00033         self.assertAlmostEqual(mean(data), 13.1, places=3)
00034         self.assertAlmostEqual(median(data), 3.0, places=3)
00035 
00036         data = [ 5, 10, 15, 20]
00037         self.assertAlmostEqual(mean(data), 12.5, places=3)
00038         self.assertAlmostEqual(median(data), 12.5, places=3)
00039 
00040         data = [ 1, 2, 3, 5, 7, 11, 13, 17]
00041         self.assertAlmostEqual(mean(data), 7.375, places=3)
00042         self.assertAlmostEqual(median(data), 6.0, places=3)
00043 
00044         data = [ 100, 19, 3, 1.5, 1.4, 1, 1, 1]
00045         self.assertAlmostEqual(mean(data), 15.988, places=3)
00046         self.assertAlmostEqual(median(data), 1.45, places=3)
00047       
00048 
00049     def test_matrix_parse(self):
00050         if TestCluster.module=='Bio.Cluster':
00051             from Bio.Cluster import treecluster
00052         elif TestCluster.module=='Pycluster':
00053             from Pycluster import treecluster
00054 
00055         # Normal matrix, no errors
00056         data1 = numpy.array([[ 1.1, 1.2 ],
00057                              [ 1.4, 1.3 ],
00058                              [ 1.1, 1.5 ],
00059                              [ 2.0, 1.5 ],
00060                              [ 1.7, 1.9 ],
00061                              [ 1.7, 1.9 ],
00062                              [ 5.7, 5.9 ],
00063                              [ 5.7, 5.9 ],
00064                              [ 3.1, 3.3 ],
00065                              [ 5.4, 5.3 ],
00066                              [ 5.1, 5.5 ],
00067                              [ 5.0, 5.5 ],
00068                              [ 5.1, 5.2 ]])
00069       
00070         # Another normal matrix, no errors; written as a list
00071         data2 =  [[  1.1, 2.2, 3.3, 4.4, 5.5 ], 
00072                   [  3.1, 3.2, 1.3, 2.4, 1.5 ], 
00073                   [  4.1, 2.2, 0.3, 5.4, 0.5 ], 
00074                   [ 12.1, 2.0, 0.0, 5.0, 0.0 ]]
00075       
00076         # Ragged matrix
00077         data3 =  [[ 91.1, 92.2, 93.3, 94.4, 95.5], 
00078                   [ 93.1, 93.2, 91.3, 92.4 ], 
00079                   [ 94.1, 92.2, 90.3 ], 
00080                   [ 12.1, 92.0, 90.0, 95.0, 90.0 ]]
00081       
00082         # Matrix with bad cells
00083         data4 =  [ [ 7.1, 7.2, 7.3, 7.4, 7.5, ],
00084                    [ 7.1, 7.2, 7.3, 7.4, 'snoopy' ], 
00085                    [ 7.1, 7.2, 7.3, None, None]] 
00086 
00087         # Matrix with a bad row
00088         data5 =  [ [ 23.1, 23.2, 23.3, 23.4, 23.5], 
00089                    None,
00090                    [ 23.1, 23.0, 23.0, 23.0, 23.0]]
00091 
00092         # Various references that don't point to matrices at all
00093         data6 = "snoopy"
00094         data7 = {'a': [[2.3,1.2],[3.3,5.6]]}
00095         data8 = []
00096         data9 = [None]
00097       
00098         try:
00099             treecluster(data1)
00100         except:
00101             self.fail("treecluster failed to accept matrix data1")
00102 
00103         try:
00104             treecluster(data2)
00105         except:
00106             self.fail("treecluster failed to accept matrix data2")
00107 
00108         self.assertRaises(TypeError, lambda : treecluster(data3))
00109         self.assertRaises(TypeError, lambda : treecluster(data4))
00110         self.assertRaises(TypeError, lambda : treecluster(data5))
00111         self.assertRaises(TypeError, lambda : treecluster(data6))
00112         self.assertRaises(TypeError, lambda : treecluster(data7))
00113         self.assertRaises(TypeError, lambda : treecluster(data8))
00114         self.assertRaises(TypeError, lambda : treecluster(data9))
00115 
00116     def test_kcluster(self):
00117         if TestCluster.module=='Bio.Cluster':
00118             from Bio.Cluster import kcluster
00119         elif TestCluster.module=='Pycluster':
00120             from Pycluster import kcluster
00121 
00122         nclusters = 3
00123         # First data set
00124         weight = numpy.array([1,1,1,1,1])
00125         data   = numpy.array([[ 1.1, 2.2, 3.3, 4.4, 5.5],
00126                               [ 3.1, 3.2, 1.3, 2.4, 1.5], 
00127                               [ 4.1, 2.2, 0.3, 5.4, 0.5], 
00128                               [12.1, 2.0, 0.0, 5.0, 0.0]]) 
00129         mask =  numpy.array([[ 1, 1, 1, 1, 1], 
00130                              [ 1, 1, 1, 1, 1], 
00131                              [ 1, 1, 1, 1, 1], 
00132                              [ 1, 1, 1, 1, 1]], int) 
00133       
00134         clusterid, error, nfound = kcluster(data, nclusters=nclusters,
00135                                             mask=mask, weight=weight,
00136                                             transpose=0, npass=100,
00137                                             method='a', dist='e')
00138         self.assertEqual(len(clusterid), len(data))
00139 
00140         correct = [0,1,1,2]
00141         mapping = [clusterid[correct.index(i)] for i in range(nclusters)]
00142         for i in range(len(clusterid)):
00143             self.assertEqual(clusterid[i], mapping[correct[i]])
00144       
00145         # Second data set
00146         weight = numpy.array([1,1])
00147         data = numpy.array([[ 1.1, 1.2 ],
00148                       [ 1.4, 1.3 ],
00149                       [ 1.1, 1.5 ],
00150                       [ 2.0, 1.5 ],
00151                       [ 1.7, 1.9 ],
00152                       [ 1.7, 1.9 ],
00153                       [ 5.7, 5.9 ],
00154                       [ 5.7, 5.9 ],
00155                       [ 3.1, 3.3 ],
00156                       [ 5.4, 5.3 ],
00157                       [ 5.1, 5.5 ],
00158                       [ 5.0, 5.5 ],
00159                       [ 5.1, 5.2 ]])
00160         mask = numpy.array([[ 1, 1 ],
00161                             [ 1, 1 ],
00162                             [ 1, 1 ],
00163                             [ 1, 1 ],
00164                             [ 1, 1 ],
00165                             [ 1, 1 ],
00166                             [ 1, 1 ],
00167                             [ 1, 1 ],
00168                             [ 1, 1 ],
00169                             [ 1, 1 ],
00170                             [ 1, 1 ],
00171                             [ 1, 1 ],
00172                             [ 1, 1 ]], int)
00173 
00174         clusterid, error, nfound = kcluster(data, nclusters=3, mask=mask,
00175                                             weight=weight, transpose=0,
00176                                             npass=100, method='a', dist='e')
00177         self.assertEqual(len(clusterid), len(data))
00178 
00179         correct = [0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1]
00180         mapping = [clusterid[correct.index(i)] for i in range(nclusters)]
00181         for i in range(len(clusterid)):
00182             self.assertEqual(clusterid[i], mapping[correct[i]])
00183 
00184     def test_clusterdistance(self):
00185         if TestCluster.module=='Bio.Cluster':
00186             from Bio.Cluster import clusterdistance
00187         elif TestCluster.module=='Pycluster':
00188             from Pycluster import clusterdistance
00189 
00190         # First data set
00191         weight = numpy.array([ 1,1,1,1,1 ])
00192         data   = numpy.array([[  1.1, 2.2, 3.3, 4.4, 5.5, ], 
00193                               [  3.1, 3.2, 1.3, 2.4, 1.5, ], 
00194                               [  4.1, 2.2, 0.3, 5.4, 0.5, ], 
00195                               [ 12.1, 2.0, 0.0, 5.0, 0.0, ]])
00196         mask   = numpy.array([[ 1, 1, 1, 1, 1], 
00197                               [ 1, 1, 1, 1, 1], 
00198                               [ 1, 1, 1, 1, 1], 
00199                               [ 1, 1, 1, 1, 1]], int)
00200 
00201         # Cluster assignments
00202         c1 = [0]
00203         c2 = [1,2]
00204         c3 = [3]
00205 
00206         distance = clusterdistance(data, mask=mask, weight=weight,
00207                                    index1=c1, index2=c2, dist='e',
00208                                    method='a', transpose=0);
00209         self.assertAlmostEqual(distance, 6.650, places=3)
00210         distance = clusterdistance(data, mask=mask, weight=weight,
00211                                    index1=c1, index2=c3, dist='e',
00212                                    method='a', transpose=0);
00213         self.assertAlmostEqual(distance, 32.508, places=3)
00214         distance = clusterdistance(data, mask=mask, weight=weight,
00215                                    index1=c2, index2=c3, dist='e',
00216                                    method='a', transpose=0);
00217         self.assertAlmostEqual(distance, 15.118, places=3)
00218 
00219         # Second data set
00220         weight =  numpy.array([ 1,1 ])
00221         data   =  numpy.array([[ 1.1, 1.2 ],
00222                          [ 1.4, 1.3 ],
00223                          [ 1.1, 1.5 ],
00224                          [ 2.0, 1.5 ],
00225                          [ 1.7, 1.9 ],
00226                          [ 1.7, 1.9 ],
00227                          [ 5.7, 5.9 ],
00228                          [ 5.7, 5.9 ],
00229                          [ 3.1, 3.3 ],
00230                          [ 5.4, 5.3 ],
00231                          [ 5.1, 5.5 ],
00232                          [ 5.0, 5.5 ],
00233                          [ 5.1, 5.2 ]])
00234         mask = numpy.array([[ 1, 1 ],
00235                             [ 1, 1 ],
00236                             [ 1, 1 ],
00237                             [ 1, 1 ],
00238                             [ 1, 1 ],
00239                             [ 1, 1 ],
00240                             [ 1, 1 ],
00241                             [ 1, 1 ],
00242                             [ 1, 1 ],
00243                             [ 1, 1 ],
00244                             [ 1, 1 ],
00245                             [ 1, 1 ],
00246                             [ 1, 1 ]], int)
00247 
00248         # Cluster assignments
00249         c1 = [ 0, 1, 2, 3 ]
00250         c2 = [ 4, 5, 6, 7 ]
00251         c3 = [ 8 ]
00252 
00253         distance = clusterdistance(data, mask=mask, weight=weight,
00254                                    index1=c1, index2=c2, dist='e',
00255                                    method='a', transpose=0);
00256         self.assertAlmostEqual(distance, 5.833, places=3)
00257         distance = clusterdistance(data, mask=mask, weight=weight,
00258                                    index1=c1, index2=c3, dist='e',
00259                                    method='a', transpose=0);
00260         self.assertAlmostEqual(distance, 3.298, places=3)
00261         distance = clusterdistance(data, mask=mask, weight=weight,
00262                                    index1=c2, index2=c3, dist='e',
00263                                    method='a', transpose=0);
00264         self.assertAlmostEqual(distance, 0.360, places=3)
00265 
00266 
00267     def test_treecluster(self):
00268         if TestCluster.module=='Bio.Cluster':
00269             from Bio.Cluster import treecluster
00270         elif TestCluster.module=='Pycluster':
00271             from Pycluster import treecluster
00272 
00273         # First data set
00274         weight1 =  [ 1,1,1,1,1 ]
00275         data1   =  numpy.array([[  1.1, 2.2, 3.3, 4.4, 5.5], 
00276                                 [  3.1, 3.2, 1.3, 2.4, 1.5], 
00277                                 [  4.1, 2.2, 0.3, 5.4, 0.5], 
00278                                 [ 12.1, 2.0, 0.0, 5.0, 0.0]])
00279         mask1 = numpy.array([[ 1, 1, 1, 1, 1], 
00280                              [ 1, 1, 1, 1, 1], 
00281                              [ 1, 1, 1, 1, 1], 
00282                              [ 1, 1, 1, 1, 1]], int)
00283       
00284         # test first data set
00285         # Pairwise average-linkage clustering"
00286         tree = treecluster(data=data1, mask=mask1, weight=weight1,
00287                            transpose=0, method='a', dist='e')
00288         self.assertEqual(len(tree), len(data1)-1)
00289         self.assertEqual(tree[0].left, 2)
00290         self.assertEqual(tree[0].right, 1)
00291         self.assertAlmostEqual(tree[0].distance, 2.600, places=3)
00292         self.assertEqual(tree[1].left, -1)
00293         self.assertEqual(tree[1].right, 0)
00294         self.assertAlmostEqual(tree[1].distance, 7.300, places=3)
00295         self.assertEqual(tree[2].left, 3)
00296         self.assertEqual(tree[2].right, -2)
00297         self.assertAlmostEqual(tree[2].distance, 21.348, places=3)
00298 
00299         # Pairwise single-linkage clustering
00300         tree = treecluster(data=data1, mask=mask1, weight=weight1,
00301                            transpose=0, method='s', dist='e')
00302         self.assertEqual(len(tree), len(data1)-1)
00303         self.assertEqual(tree[0].left, 1)
00304         self.assertEqual(tree[0].right, 2)
00305         self.assertAlmostEqual(tree[0].distance, 2.600, places=3)
00306         self.assertEqual(tree[1].left, 0)
00307         self.assertEqual(tree[1].right, -1)
00308         self.assertAlmostEqual(tree[1].distance, 5.800, places=3)
00309         self.assertEqual(tree[2].left, -2)
00310         self.assertEqual(tree[2].right, 3)
00311         self.assertAlmostEqual(tree[2].distance, 12.908, places=3)
00312 
00313         # Pairwise centroid-linkage clustering
00314         tree = treecluster(data=data1, mask=mask1, weight=weight1,
00315                            transpose=0, method='c', dist='e')
00316         self.assertEqual(len(tree), len(data1)-1)
00317         self.assertEqual(tree[0].left, 1)
00318         self.assertEqual(tree[0].right, 2)
00319         self.assertAlmostEqual(tree[0].distance, 2.600, places=3)
00320         self.assertEqual(tree[1].left, 0)
00321         self.assertEqual(tree[1].right, -1)
00322         self.assertAlmostEqual(tree[1].distance, 6.650, places=3)
00323         self.assertEqual(tree[2].left, -2)
00324         self.assertEqual(tree[2].right, 3)
00325         self.assertAlmostEqual(tree[2].distance, 19.437, places=3)
00326 
00327         # Pairwise maximum-linkage clustering
00328         tree = treecluster(data=data1, mask=mask1, weight=weight1,
00329                            transpose=0, method='m', dist='e')
00330         self.assertEqual(len(tree), len(data1)-1)
00331         self.assertEqual(tree[0].left, 2)
00332         self.assertEqual(tree[0].right, 1)
00333         self.assertAlmostEqual(tree[0].distance, 2.600, places=3)
00334         self.assertEqual(tree[1].left, -1)
00335         self.assertEqual(tree[1].right, 0)
00336         self.assertAlmostEqual(tree[1].distance, 8.800, places=3)
00337         self.assertEqual(tree[2].left, 3)
00338         self.assertEqual(tree[2].right, -2)
00339         self.assertAlmostEqual(tree[2].distance, 32.508, places=3)
00340       
00341         # Second data set
00342         weight2 =  [ 1,1 ]
00343         data2 = numpy.array([[ 0.8223, 0.9295 ],
00344                              [ 1.4365, 1.3223 ],
00345                              [ 1.1623, 1.5364 ],
00346                              [ 2.1826, 1.1934 ],
00347                              [ 1.7763, 1.9352 ],
00348                              [ 1.7215, 1.9912 ],
00349                              [ 2.1812, 5.9935 ],
00350                              [ 5.3290, 5.9452 ],
00351                              [ 3.1491, 3.3454 ],
00352                              [ 5.1923, 5.3156 ],
00353                              [ 4.7735, 5.4012 ],
00354                              [ 5.1297, 5.5645 ],
00355                              [ 5.3934, 5.1823 ]])
00356         mask2 = numpy.array([[ 1, 1 ],
00357                              [ 1, 1 ],
00358                              [ 1, 1 ],
00359                              [ 1, 1 ],
00360                              [ 1, 1 ],
00361                              [ 1, 1 ],
00362                              [ 1, 1 ],
00363                              [ 1, 1 ],
00364                              [ 1, 1 ],
00365                              [ 1, 1 ],
00366                              [ 1, 1 ],
00367                              [ 1, 1 ],
00368                              [ 1, 1 ]], int)
00369       
00370         # Test second data set
00371         # Pairwise average-linkage clustering
00372         tree = treecluster(data=data2, mask=mask2, weight=weight2,
00373                            transpose=0, method='a', dist='e')
00374         self.assertEqual(len(tree), len(data2)-1)
00375         self.assertEqual(tree[0].left, 5)
00376         self.assertEqual(tree[0].right, 4)
00377         self.assertAlmostEqual(tree[0].distance, 0.003, places=3)
00378         self.assertEqual(tree[1].left, 9)
00379         self.assertEqual(tree[1].right, 12)
00380         self.assertAlmostEqual(tree[1].distance, 0.029, places=3)
00381         self.assertEqual(tree[2].left, 2)
00382         self.assertEqual(tree[2].right, 1)
00383         self.assertAlmostEqual(tree[2].distance, 0.061, places=3)
00384         self.assertEqual(tree[3].left, 11)
00385         self.assertEqual(tree[3].right, -2)
00386         self.assertAlmostEqual(tree[3].distance, 0.070, places=3)
00387         self.assertEqual(tree[4].left, -4)
00388         self.assertEqual(tree[4].right, 10)
00389         self.assertAlmostEqual(tree[4].distance, 0.128, places=3)
00390         self.assertEqual(tree[5].left, 7)
00391         self.assertEqual(tree[5].right, -5)
00392         self.assertAlmostEqual(tree[5].distance, 0.224, places=3)
00393         self.assertEqual(tree[6].left, -3)
00394         self.assertEqual(tree[6].right, 0)
00395         self.assertAlmostEqual(tree[6].distance, 0.254, places=3)
00396         self.assertEqual(tree[7].left, -1)
00397         self.assertEqual(tree[7].right, 3)
00398         self.assertAlmostEqual(tree[7].distance, 0.391, places=3)
00399         self.assertEqual(tree[8].left, -8)
00400         self.assertEqual(tree[8].right, -7)
00401         self.assertAlmostEqual(tree[8].distance, 0.532, places=3)
00402         self.assertEqual(tree[9].left, 8)
00403         self.assertEqual(tree[9].right, -9)
00404         self.assertAlmostEqual(tree[9].distance, 3.234, places=3)
00405         self.assertEqual(tree[10].left, -6)
00406         self.assertEqual(tree[10].right, 6)
00407         self.assertAlmostEqual(tree[10].distance, 4.636, places=3)
00408         self.assertEqual(tree[11].left, -11)
00409         self.assertEqual(tree[11].right, -10)
00410         self.assertAlmostEqual(tree[11].distance, 12.741, places=3)
00411       
00412         # Pairwise single-linkage clustering
00413         tree = treecluster(data=data2, mask=mask2, weight=weight2,
00414                            transpose=0, method='s', dist='e')
00415         self.assertEqual(len(tree), len(data2)-1)
00416         self.assertEqual(tree[0].left, 4)
00417         self.assertEqual(tree[0].right, 5)
00418         self.assertAlmostEqual(tree[0].distance, 0.003, places=3)
00419         self.assertEqual(tree[1].left, 9)
00420         self.assertEqual(tree[1].right, 12)
00421         self.assertAlmostEqual(tree[1].distance, 0.029, places=3)
00422         self.assertEqual(tree[2].left, 11)
00423         self.assertEqual(tree[2].right, -2)
00424         self.assertAlmostEqual(tree[2].distance, 0.033, places=3)
00425         self.assertEqual(tree[3].left, 1)
00426         self.assertEqual(tree[3].right, 2)
00427         self.assertAlmostEqual(tree[3].distance, 0.061, places=3)
00428         self.assertEqual(tree[4].left, 10)
00429         self.assertEqual(tree[4].right, -3)
00430         self.assertAlmostEqual(tree[4].distance, 0.077, places=3)
00431         self.assertEqual(tree[5].left, 7)
00432         self.assertEqual(tree[5].right, -5)
00433         self.assertAlmostEqual(tree[5].distance, 0.092, places=3)
00434         self.assertEqual(tree[6].left, 0)
00435         self.assertEqual(tree[6].right, -4)
00436         self.assertAlmostEqual(tree[6].distance, 0.242, places=3)
00437         self.assertEqual(tree[7].left, -7)
00438         self.assertEqual(tree[7].right, -1)
00439         self.assertAlmostEqual(tree[7].distance, 0.246, places=3)
00440         self.assertEqual(tree[8].left, 3)
00441         self.assertEqual(tree[8].right, -8)
00442         self.assertAlmostEqual(tree[8].distance, 0.287, places=3)
00443         self.assertEqual(tree[9].left, -9)
00444         self.assertEqual(tree[9].right, 8)
00445         self.assertAlmostEqual(tree[9].distance, 1.936, places=3)
00446         self.assertEqual(tree[10].left, -10)
00447         self.assertEqual(tree[10].right, -6)
00448         self.assertAlmostEqual(tree[10].distance, 3.432, places=3)
00449         self.assertEqual(tree[11].left, 6)
00450         self.assertEqual(tree[11].right, -11)
00451         self.assertAlmostEqual(tree[11].distance, 3.535, places=3)
00452       
00453         # Pairwise centroid-linkage clustering
00454         tree = treecluster(data=data2, mask=mask2, weight=weight2,
00455                            transpose=0, method='c', dist='e')
00456         self.assertEqual(len(tree), len(data2)-1)
00457         self.assertEqual(tree[0].left, 4)
00458         self.assertEqual(tree[0].right, 5)
00459         self.assertAlmostEqual(tree[0].distance, 0.003, places=3)
00460         self.assertEqual(tree[1].left, 12)
00461         self.assertEqual(tree[1].right, 9)
00462         self.assertAlmostEqual(tree[1].distance, 0.029, places=3)
00463         self.assertEqual(tree[2].left, 1)
00464         self.assertEqual(tree[2].right, 2)
00465         self.assertAlmostEqual(tree[2].distance, 0.061, places=3)
00466         self.assertEqual(tree[3].left, -2)
00467         self.assertEqual(tree[3].right, 11)
00468         self.assertAlmostEqual(tree[3].distance, 0.063, places=3)
00469         self.assertEqual(tree[4].left, 10)
00470         self.assertEqual(tree[4].right, -4)
00471         self.assertAlmostEqual(tree[4].distance, 0.109, places=3)
00472         self.assertEqual(tree[5].left, -5)
00473         self.assertEqual(tree[5].right, 7)
00474         self.assertAlmostEqual(tree[5].distance, 0.189, places=3)
00475         self.assertEqual(tree[6].left, 0)
00476         self.assertEqual(tree[6].right, -3)
00477         self.assertAlmostEqual(tree[6].distance, 0.239, places=3)
00478         self.assertEqual(tree[7].left, 3)
00479         self.assertEqual(tree[7].right, -1)
00480         self.assertAlmostEqual(tree[7].distance, 0.390, places=3)
00481         self.assertEqual(tree[8].left, -7)
00482         self.assertEqual(tree[8].right, -8)
00483         self.assertAlmostEqual(tree[8].distance, 0.382, places=3)
00484         self.assertEqual(tree[9].left, -9)
00485         self.assertEqual(tree[9].right, 8)
00486         self.assertAlmostEqual(tree[9].distance, 3.063, places=3)
00487         self.assertEqual(tree[10].left, 6)
00488         self.assertEqual(tree[10].right, -6)
00489         self.assertAlmostEqual(tree[10].distance, 4.578, places=3)
00490         self.assertEqual(tree[11].left, -10)
00491         self.assertEqual(tree[11].right, -11)
00492         self.assertAlmostEqual(tree[11].distance, 11.536, places=3)
00493       
00494         # Pairwise maximum-linkage clustering
00495         tree = treecluster(data=data2, mask=mask2, weight=weight2,
00496                            transpose=0, method='m', dist='e')
00497         self.assertEqual(len(tree), len(data2)-1)
00498         self.assertEqual(tree[0].left, 5)
00499         self.assertEqual(tree[0].right, 4)
00500         self.assertAlmostEqual(tree[0].distance, 0.003, places=3)
00501         self.assertEqual(tree[1].left, 9)
00502         self.assertEqual(tree[1].right, 12)
00503         self.assertAlmostEqual(tree[1].distance, 0.029, places=3)
00504         self.assertEqual(tree[2].left, 2)
00505         self.assertEqual(tree[2].right, 1)
00506         self.assertAlmostEqual(tree[2].distance, 0.061, places=3)
00507         self.assertEqual(tree[3].left, 11)
00508         self.assertEqual(tree[3].right, 10)
00509         self.assertAlmostEqual(tree[3].distance, 0.077, places=3)
00510         self.assertEqual(tree[4].left, -2)
00511         self.assertEqual(tree[4].right, -4)
00512         self.assertAlmostEqual(tree[4].distance, 0.216, places=3)
00513         self.assertEqual(tree[5].left, -3)
00514         self.assertEqual(tree[5].right, 0)
00515         self.assertAlmostEqual(tree[5].distance, 0.266, places=3)
00516         self.assertEqual(tree[6].left, -5)
00517         self.assertEqual(tree[6].right, 7)
00518         self.assertAlmostEqual(tree[6].distance, 0.302, places=3)
00519         self.assertEqual(tree[7].left, -1)
00520         self.assertEqual(tree[7].right, 3)
00521         self.assertAlmostEqual(tree[7].distance, 0.425, places=3)
00522         self.assertEqual(tree[8].left, -8)
00523         self.assertEqual(tree[8].right, -6)
00524         self.assertAlmostEqual(tree[8].distance, 0.968, places=3)
00525         self.assertEqual(tree[9].left, 8)
00526         self.assertEqual(tree[9].right, 6)
00527         self.assertAlmostEqual(tree[9].distance, 3.975, places=3)
00528         self.assertEqual(tree[10].left, -10)
00529         self.assertEqual(tree[10].right, -7)
00530         self.assertAlmostEqual(tree[10].distance, 5.755, places=3)
00531         self.assertEqual(tree[11].left, -11)
00532         self.assertEqual(tree[11].right, -9)
00533         self.assertAlmostEqual(tree[11].distance, 22.734, places=3)
00534 
00535     def test_somcluster(self):
00536         if TestCluster.module=='Bio.Cluster':
00537             from Bio.Cluster import somcluster
00538         elif TestCluster.module=='Pycluster':
00539             from Pycluster import somcluster
00540 
00541         # First data set
00542         weight = [ 1,1,1,1,1 ]
00543         data = numpy.array([[  1.1, 2.2, 3.3, 4.4, 5.5], 
00544                             [  3.1, 3.2, 1.3, 2.4, 1.5], 
00545                             [  4.1, 2.2, 0.3, 5.4, 0.5], 
00546                             [ 12.1, 2.0, 0.0, 5.0, 0.0]])
00547         mask = numpy.array([[ 1, 1, 1, 1, 1], 
00548                             [ 1, 1, 1, 1, 1], 
00549                             [ 1, 1, 1, 1, 1], 
00550                             [ 1, 1, 1, 1, 1]], int)
00551 
00552         clusterid, celldata = somcluster(data=data, mask=mask, weight=weight,
00553                                          transpose=0, nxgrid=10, nygrid=10,
00554                                          inittau=0.02, niter=100, dist='e')
00555         self.assertEqual(len(clusterid), len(data))
00556         self.assertEqual(len(clusterid[0]), 2)
00557 
00558         # Second data set
00559         weight =  [ 1,1 ]
00560         data = numpy.array([[ 1.1, 1.2 ],
00561                             [ 1.4, 1.3 ],
00562                             [ 1.1, 1.5 ],
00563                             [ 2.0, 1.5 ],
00564                             [ 1.7, 1.9 ],
00565                             [ 1.7, 1.9 ],
00566                             [ 5.7, 5.9 ],
00567                             [ 5.7, 5.9 ],
00568                             [ 3.1, 3.3 ],
00569                             [ 5.4, 5.3 ],
00570                             [ 5.1, 5.5 ],
00571                             [ 5.0, 5.5 ],
00572                             [ 5.1, 5.2 ]])
00573         mask = numpy.array([[ 1, 1 ],
00574                             [ 1, 1 ],
00575                             [ 1, 1 ],
00576                             [ 1, 1 ],
00577                             [ 1, 1 ],
00578                             [ 1, 1 ],
00579                             [ 1, 1 ],
00580                             [ 1, 1 ],
00581                             [ 1, 1 ],
00582                             [ 1, 1 ],
00583                             [ 1, 1 ],
00584                             [ 1, 1 ],
00585                             [ 1, 1 ]], int)
00586 
00587         clusterid, celldata = somcluster(data=data, mask=mask, weight=weight,
00588                                          transpose=0, nxgrid=10, nygrid=10,
00589                                          inittau=0.02, niter=100, dist='e')
00590         self.assertEqual(len(clusterid), len(data))
00591         self.assertEqual(len(clusterid[0]), 2)
00592 
00593     def test_distancematrix_kmedoids(self):
00594         if TestCluster.module=='Bio.Cluster':
00595             from Bio.Cluster import distancematrix, kmedoids
00596         elif TestCluster.module=='Pycluster':
00597             from Pycluster import distancematrix, kmedoids
00598 
00599         data = numpy.array([[2.2, 3.3, 4.4],
00600                             [2.1, 1.4, 5.6],
00601                             [7.8, 9.0, 1.2],
00602                             [4.5, 2.3, 1.5],
00603                             [4.2, 2.4, 1.9],
00604                             [3.6, 3.1, 9.3],
00605                             [2.3, 1.2, 3.9],
00606                             [4.2, 9.6, 9.3],
00607                             [1.7, 8.9, 1.1]])
00608         mask = numpy.array([[1, 1, 1],
00609                             [1, 1, 1],
00610                             [0, 1, 1],
00611                             [1, 1, 1],
00612                             [1, 1, 1],
00613                             [0, 1, 0],
00614                             [1, 1, 1],
00615                             [1, 0, 1],
00616                             [1, 1, 1]], int)
00617         weight = numpy.array([2.0, 1.0, 0.5])
00618         matrix = distancematrix(data, mask=mask, weight=weight)
00619 
00620         self.assertAlmostEqual(matrix[1][0], 1.243, places=3)
00621 
00622         self.assertAlmostEqual(matrix[2][0], 25.073, places=3)
00623         self.assertAlmostEqual(matrix[2][1], 44.960, places=3)
00624 
00625         self.assertAlmostEqual(matrix[3][0], 4.510, places=3)
00626         self.assertAlmostEqual(matrix[3][1], 5.924, places=3)
00627         self.assertAlmostEqual(matrix[3][2], 29.957, places=3)
00628 
00629         self.assertAlmostEqual(matrix[4][0], 3.410, places=3)
00630         self.assertAlmostEqual(matrix[4][1], 4.761, places=3)
00631         self.assertAlmostEqual(matrix[4][2], 29.203, places=3)
00632         self.assertAlmostEqual(matrix[4][3], 0.077, places=3)
00633 
00634         self.assertAlmostEqual(matrix[5][0], 0.040, places=3)
00635         self.assertAlmostEqual(matrix[5][1], 2.890, places=3)
00636         self.assertAlmostEqual(matrix[5][2], 34.810, places=3)
00637         self.assertAlmostEqual(matrix[5][3], 0.640, places=3)
00638         self.assertAlmostEqual(matrix[5][4], 0.490, places=3)
00639 
00640         self.assertAlmostEqual(matrix[6][0], 1.301, places=3)
00641         self.assertAlmostEqual(matrix[6][1], 0.447, places=3)
00642         self.assertAlmostEqual(matrix[6][2], 42.990, places=3)
00643         self.assertAlmostEqual(matrix[6][3], 3.934, places=3)
00644         self.assertAlmostEqual(matrix[6][4], 3.046, places=3)
00645         self.assertAlmostEqual(matrix[6][5], 3.610, places=3)
00646 
00647         self.assertAlmostEqual(matrix[7][0], 8.002, places=3)
00648         self.assertAlmostEqual(matrix[7][1], 6.266, places=3)
00649         self.assertAlmostEqual(matrix[7][2], 65.610, places=3)
00650         self.assertAlmostEqual(matrix[7][3], 12.240, places=3)
00651         self.assertAlmostEqual(matrix[7][4], 10.952, places=3)
00652         self.assertAlmostEqual(matrix[7][5], 0.000, places=3)
00653         self.assertAlmostEqual(matrix[7][6], 8.720, places=3)
00654 
00655         self.assertAlmostEqual(matrix[8][0], 10.659, places=3)
00656         self.assertAlmostEqual(matrix[8][1], 19.056, places=3)
00657         self.assertAlmostEqual(matrix[8][2], 0.010, places=3)
00658         self.assertAlmostEqual(matrix[8][3], 16.949, places=3)
00659         self.assertAlmostEqual(matrix[8][4], 15.734, places=3)
00660         self.assertAlmostEqual(matrix[8][5], 33.640, places=3)
00661         self.assertAlmostEqual(matrix[8][6], 18.266, places=3)
00662         self.assertAlmostEqual(matrix[8][7], 18.448, places=3)
00663         clusterid, error, nfound = kmedoids(matrix, npass=1000)
00664         self.assertEqual(clusterid[0], 5)
00665         self.assertEqual(clusterid[1], 5)
00666         self.assertEqual(clusterid[2], 2)
00667         self.assertEqual(clusterid[3], 5)
00668         self.assertEqual(clusterid[4], 5)
00669         self.assertEqual(clusterid[5], 5)
00670         self.assertEqual(clusterid[6], 5)
00671         self.assertEqual(clusterid[7], 5)
00672         self.assertEqual(clusterid[8], 2)
00673         self.assertAlmostEqual(error, 7.680, places=3)
00674 
00675     def test_pca(self):
00676         if TestCluster.module=='Bio.Cluster':
00677             from Bio.Cluster import pca
00678         elif TestCluster.module=='Pycluster':
00679             from Pycluster import pca
00680 
00681         data = numpy.array([[ 3.1, 1.2 ],
00682                             [ 1.4, 1.3 ],
00683                             [ 1.1, 1.5 ],
00684                             [ 2.0, 1.5 ],
00685                             [ 1.7, 1.9 ],
00686                             [ 1.7, 1.9 ],
00687                             [ 5.7, 5.9 ],
00688                             [ 5.7, 5.9 ],
00689                             [ 3.1, 3.3 ],
00690                             [ 5.4, 5.3 ],
00691                             [ 5.1, 5.5 ],
00692                             [ 5.0, 5.5 ],
00693                             [ 5.1, 5.2 ],
00694                            ])
00695 
00696         mean, coordinates, pc, eigenvalues =  pca(data)
00697         self.assertAlmostEqual(mean[0], 3.5461538461538464)
00698         self.assertAlmostEqual(mean[1], 3.5307692307692311)
00699         self.assertAlmostEqual(coordinates[0,0],  2.0323189722653883)
00700         self.assertAlmostEqual(coordinates[0,1],  1.2252420399694917)
00701         self.assertAlmostEqual(coordinates[1,0],  3.0936985166252251)
00702         self.assertAlmostEqual(coordinates[1,1], -0.10647619705157851)
00703         self.assertAlmostEqual(coordinates[2,0],  3.1453186907749426)
00704         self.assertAlmostEqual(coordinates[2,1], -0.46331699855941139)
00705         self.assertAlmostEqual(coordinates[3,0],  2.5440202962223761)
00706         self.assertAlmostEqual(coordinates[3,1],  0.20633980959571077)
00707         self.assertAlmostEqual(coordinates[4,0],  2.4468278463376221)
00708         self.assertAlmostEqual(coordinates[4,1], -0.28412285736824866)
00709         self.assertAlmostEqual(coordinates[5,0],  2.4468278463376221)
00710         self.assertAlmostEqual(coordinates[5,1], -0.28412285736824866)
00711         self.assertAlmostEqual(coordinates[6,0], -3.2018619434743254)
00712         self.assertAlmostEqual(coordinates[6,1],  0.019692314198662915)
00713         self.assertAlmostEqual(coordinates[7,0], -3.2018619434743254)
00714         self.assertAlmostEqual(coordinates[7,1],  0.019692314198662915)
00715         self.assertAlmostEqual(coordinates[8,0],  0.46978641990344067)
00716         self.assertAlmostEqual(coordinates[8,1], -0.17778754731982949)
00717         self.assertAlmostEqual(coordinates[9,0], -2.5549912731867215)
00718         self.assertAlmostEqual(coordinates[9,1],  0.19733897451533403)
00719         self.assertAlmostEqual(coordinates[10,0], -2.5033710990370044)
00720         self.assertAlmostEqual(coordinates[10,1], -0.15950182699250004)
00721         self.assertAlmostEqual(coordinates[11,0], -2.4365601663089413)
00722         self.assertAlmostEqual(coordinates[11,1], -0.23390813900973562)
00723         self.assertAlmostEqual(coordinates[12,0], -2.2801521629852974)
00724         self.assertAlmostEqual(coordinates[12,1],  0.0409309711916888)
00725         self.assertAlmostEqual(pc[0,0], -0.66810932728062988)
00726         self.assertAlmostEqual(pc[0,1], -0.74406312017235743)
00727         self.assertAlmostEqual(pc[1,0],  0.74406312017235743)
00728         self.assertAlmostEqual(pc[1,1], -0.66810932728062988)
00729         self.assertAlmostEqual(eigenvalues[0], 9.3110471246032844)
00730         self.assertAlmostEqual(eigenvalues[1], 1.4437456297481428)
00731 
00732         data = numpy.array([[ 2.3, 4.5, 1.2, 6.7, 5.3, 7.1],
00733                             [ 1.3, 6.5, 2.2, 5.7, 6.2, 9.1],
00734                             [ 3.2, 7.2, 3.2, 7.4, 7.3, 8.9],
00735                             [ 4.2, 5.2, 9.2, 4.4, 6.3, 7.2]])
00736         mean, coordinates, pc, eigenvalues =  pca(data)
00737         self.assertAlmostEqual(mean[0], 2.7500)
00738         self.assertAlmostEqual(mean[1], 5.8500)
00739         self.assertAlmostEqual(mean[2], 3.9500)
00740         self.assertAlmostEqual(mean[3], 6.0500)
00741         self.assertAlmostEqual(mean[4], 6.2750)
00742         self.assertAlmostEqual(mean[5], 8.0750)
00743         self.assertAlmostEqual(coordinates[0,0],  2.6460846688406905)
00744         self.assertAlmostEqual(coordinates[0,1], -2.1421701432732418)
00745         self.assertAlmostEqual(coordinates[0,2], -0.56620932754145858)
00746         self.assertAlmostEqual(coordinates[0,3],  0.0)
00747         self.assertAlmostEqual(coordinates[1,0],  2.0644120899917544)
00748         self.assertAlmostEqual(coordinates[1,1],  0.55542108669180323)
00749         self.assertAlmostEqual(coordinates[1,2],  1.4818772348457117)
00750         self.assertAlmostEqual(coordinates[1,3],  0.0)
00751         self.assertAlmostEqual(coordinates[2,0],  1.0686641862092987)
00752         self.assertAlmostEqual(coordinates[2,1],  1.9994412069101073)
00753         self.assertAlmostEqual(coordinates[2,2], -1.000720598980291)
00754         self.assertAlmostEqual(coordinates[2,3],  0.0)
00755         self.assertAlmostEqual(coordinates[3,0], -5.77916094504174)
00756         self.assertAlmostEqual(coordinates[3,1], -0.41269215032867046)
00757         self.assertAlmostEqual(coordinates[3,2],  0.085052691676038017)
00758         self.assertAlmostEqual(coordinates[3,3],  0.0)
00759         self.assertAlmostEqual(pc[0,0], -0.26379660005997291)
00760         self.assertAlmostEqual(pc[0,1],  0.064814972617134495)
00761         self.assertAlmostEqual(pc[0,2], -0.91763310094893846)
00762         self.assertAlmostEqual(pc[0,3],  0.26145408875373249)
00763         self.assertAlmostEqual(pc[1,0],  0.05073770520434398)
00764         self.assertAlmostEqual(pc[1,1],  0.68616983388698793)
00765         self.assertAlmostEqual(pc[1,2],  0.13819106187213354)
00766         self.assertAlmostEqual(pc[1,3],  0.19782544121828985)
00767         self.assertAlmostEqual(pc[2,0], -0.63000893660095947)
00768         self.assertAlmostEqual(pc[2,1],  0.091155993862151397)
00769         self.assertAlmostEqual(pc[2,2],  0.045630391256086845)
00770         self.assertAlmostEqual(pc[2,3], -0.67456694780914772)
00771         # As the last eigenvalue is zero, the corresponding eigenvector is
00772         # strongly affected by roundoff error, and is not being tested here.
00773         # For PCA, this doesn't matter since all data have a zero coefficient
00774         # along this eigenvector.
00775         self.assertAlmostEqual(eigenvalues[0], 6.7678878332578778)
00776         self.assertAlmostEqual(eigenvalues[1], 3.0108911400291856)
00777         self.assertAlmostEqual(eigenvalues[2], 1.8775592718563467)
00778         self.assertAlmostEqual(eigenvalues[3], 0.0)
00779 
00780 if __name__ == "__main__":
00781     TestCluster.module = 'Bio.Cluster'
00782     runner = unittest.TextTestRunner(verbosity = 2)
00783     unittest.main(testRunner=runner)