Back to index

python-biopython  1.60
test_Phylo.py
Go to the documentation of this file.
00001 # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
00002 # This code is part of the Biopython distribution and governed by its
00003 # license. Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """Unit tests for the Bio.Phylo module."""
00007 
00008 import sys
00009 import unittest
00010 from cStringIO import StringIO
00011 
00012 from Bio import Phylo
00013 from Bio.Phylo import PhyloXML, NewickIO
00014 
00015 
00016 # Example Newick and Nexus files
00017 EX_NEWICK = 'Nexus/int_node_labels.nwk'
00018 EX_NEXUS = 'Nexus/test_Nexus_input.nex'
00019 
00020 # Example PhyloXML files
00021 EX_APAF = 'PhyloXML/apaf.xml'
00022 EX_BCL2 = 'PhyloXML/bcl_2.xml'
00023 EX_PHYLO = 'PhyloXML/phyloxml_examples.xml'
00024 
00025 
00026 class IOTests(unittest.TestCase):
00027     """Tests for parsing and writing the supported formats."""
00028 
00029     def test_newick_read_single(self):
00030         """Read a Newick file with one tree."""
00031         tree = Phylo.read(EX_NEWICK, 'newick')
00032         self.assertEqual(len(tree.get_terminals()), 28)
00033 
00034     def test_newick_read_multiple(self):
00035         """Parse a Nexus file with multiple trees."""
00036         trees = list(Phylo.parse(EX_NEXUS, 'nexus'))
00037         self.assertEqual(len(trees), 3)
00038         for tree in trees:
00039             self.assertEqual(len(tree.get_terminals()), 9)
00040 
00041     def test_newick_write(self):
00042         """Parse a Nexus file with multiple trees."""
00043         # Tree with internal node labels
00044         mem_file = StringIO()
00045         tree = Phylo.read(StringIO('(A,B,(C,D)E)F;'), 'newick')
00046         Phylo.write(tree, mem_file, 'newick')
00047         mem_file.seek(0)
00048         tree2 = Phylo.read(mem_file, 'newick')
00049         # Sanity check
00050         self.assertEqual(tree2.count_terminals(), 4)
00051         # Check internal node labels were retained
00052         internal_names = set(c.name
00053                 for c in tree2.get_nonterminals()
00054                 if c is not None)
00055         self.assertEqual(internal_names, set(('E', 'F')))
00056 
00057     def test_format_branch_length(self):
00058         """Custom format string for Newick branch length serialization."""
00059         tree = Phylo.read(StringIO('A:0.1;'), 'newick')
00060         mem_file = StringIO()
00061         Phylo.write(tree, mem_file, 'newick', format_branch_length='%.0e')
00062         # Py2.5 compat: Windows with Py2.5- represents this as 1e-001;
00063         # on all other platforms it's 1e-01
00064         self.assertTrue(mem_file.getvalue().strip()
00065                         in ['A:1e-01;', 'A:1e-001;'])
00066 
00067     def test_convert(self):
00068         """Convert a tree between all supported formats."""
00069         mem_file_1 = StringIO()
00070         mem_file_3 = StringIO()
00071         if sys.version_info[0] == 3:
00072             from io import BytesIO
00073             mem_file_2 = BytesIO()
00074         else:
00075             mem_file_2 = StringIO()
00076         Phylo.convert(EX_NEWICK, 'newick', mem_file_1, 'nexus')
00077         mem_file_1.seek(0)
00078         Phylo.convert(mem_file_1, 'nexus', mem_file_2, 'phyloxml')
00079         mem_file_2.seek(0)
00080         Phylo.convert(mem_file_2, 'phyloxml', mem_file_3, 'newick')
00081         mem_file_3.seek(0)
00082         tree = Phylo.read(mem_file_3, 'newick')
00083         self.assertEqual(len(tree.get_terminals()), 28)
00084 
00085 
00086 class TreeTests(unittest.TestCase):
00087     """Tests for methods on BaseTree.Tree objects."""
00088     def test_root_with_outgroup(self):
00089         """Tree.root_with_outgroup: reroot at a given clade."""
00090         # On a large realistic tree, at a deep internal node
00091         tree = Phylo.read(EX_APAF, 'phyloxml')
00092         orig_num_tips = len(tree.get_terminals())
00093         orig_tree_len = tree.total_branch_length()
00094         tree.root_with_outgroup('19_NEMVE', '20_NEMVE')
00095         self.assertEqual(orig_num_tips, len(tree.get_terminals()))
00096         self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
00097         # Now, at an external node
00098         tree.root_with_outgroup('1_BRAFL')
00099         self.assertEqual(orig_num_tips, len(tree.get_terminals()))
00100         self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
00101         # Specifying outgroup branch length mustn't change the total tree size
00102         tree.root_with_outgroup('2_BRAFL', outgroup_branch_length=0.5)
00103         self.assertEqual(orig_num_tips, len(tree.get_terminals()))
00104         self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
00105         tree.root_with_outgroup('36_BRAFL', '37_BRAFL',
00106                 outgroup_branch_length=0.5)
00107         self.assertEqual(orig_num_tips, len(tree.get_terminals()))
00108         self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
00109         # On small contrived trees, testing edge cases
00110         for small_nwk in (
00111                 '(A,B,(C,D));',
00112                 '((E,F),((G,H)),(I,J));',
00113                 '((Q,R),(S,T),(U,V));',
00114                 '(X,Y);',
00115                 ):
00116             tree = Phylo.read(StringIO(small_nwk), 'newick')
00117             orig_tree_len = tree.total_branch_length()
00118             for node in list(tree.find_clades()):
00119                 tree.root_with_outgroup(node)
00120                 self.assertAlmostEqual(orig_tree_len,
00121                                        tree.total_branch_length())
00122 
00123     def test_root_at_midpoint(self):
00124         """Tree.root_at_midpoint: reroot at the tree's midpoint."""
00125         for treefname, fmt in [(EX_APAF, 'phyloxml'),
00126                                (EX_BCL2, 'phyloxml'),
00127                                (EX_NEWICK, 'newick'),
00128                               ]:
00129             tree = Phylo.read(treefname, fmt)
00130             orig_tree_len = tree.total_branch_length()
00131             # Total branch length does not change
00132             tree.root_at_midpoint()
00133             self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
00134             # Root is bifurcating
00135             self.assertEqual(len(tree.root.clades), 2)
00136             # Deepest tips under each child of the root are equally deep
00137             deep_dist_0 = max(tree.clade[0].depths().itervalues())
00138             deep_dist_1 = max(tree.clade[1].depths().itervalues())
00139             self.assertAlmostEqual(deep_dist_0, deep_dist_1)
00140 
00141     # Magic method
00142     def test_str(self):
00143         """Tree.__str__: pretty-print to a string.
00144 
00145         NB: The exact line counts are liable to change if the object
00146         constructors change.
00147         """
00148         for source, count in zip((EX_APAF, EX_BCL2), (386, 747)):
00149             tree = Phylo.read(source, 'phyloxml')
00150             output = str(tree)
00151             self.assertEqual(len(output.splitlines()), count)
00152 
00153 
00154 class MixinTests(unittest.TestCase):
00155     """Tests for TreeMixin methods."""
00156     def setUp(self):
00157         self.phylogenies = list(Phylo.parse(EX_PHYLO, 'phyloxml'))
00158 
00159     # Traversal methods
00160 
00161     def test_find_elements(self):
00162         """TreeMixin: find_elements() method."""
00163         # From the docstring example
00164         tree = self.phylogenies[5]
00165         matches = list(tree.find_elements(PhyloXML.Taxonomy, code='OCTVU'))
00166         self.assertEqual(len(matches), 1)
00167         self.assertTrue(isinstance(matches[0], PhyloXML.Taxonomy))
00168         self.assertEqual(matches[0].code, 'OCTVU')
00169         self.assertEqual(matches[0].scientific_name, 'Octopus vulgaris')
00170         # Iteration and regexps
00171         tree = self.phylogenies[10]
00172         for point, alt in zip(tree.find_elements(geodetic_datum=r'WGS\d{2}'),
00173                                (472, 10, 452)):
00174             self.assertTrue(isinstance(point, PhyloXML.Point))
00175             self.assertEqual(point.geodetic_datum, 'WGS84')
00176             self.assertAlmostEqual(point.alt, alt)
00177         # class filter
00178         tree = self.phylogenies[4]
00179         events = list(tree.find_elements(PhyloXML.Events))
00180         self.assertEqual(len(events), 2)
00181         self.assertEqual(events[0].speciations, 1)
00182         self.assertEqual(events[1].duplications, 1)
00183         # string filter & find_any
00184         tree = self.phylogenies[3]
00185         taxonomy = tree.find_any("B. subtilis")
00186         self.assertEqual(taxonomy.scientific_name, "B. subtilis")
00187         # integer filter
00188         tree = Phylo.read(EX_APAF, 'phyloxml')
00189         domains = list(tree.find_elements(start=5))
00190         self.assertEqual(len(domains), 8)
00191         for dom in domains:
00192             self.assertEqual(dom.start, 5)
00193             self.assertEqual(dom.value, 'CARD')
00194 
00195     def test_find_clades(self):
00196         """TreeMixin: find_clades() method."""
00197         # boolean filter
00198         for clade, name in zip(self.phylogenies[10].find_clades(name=True),
00199                                 list('ABCD')):
00200             self.assertTrue(isinstance(clade, PhyloXML.Clade))
00201             self.assertEqual(clade.name, name)
00202         # finding deeper attributes
00203         octo = list(self.phylogenies[5].find_clades(code='OCTVU'))
00204         self.assertEqual(len(octo), 1)
00205         self.assertTrue(isinstance(octo[0], PhyloXML.Clade))
00206         self.assertEqual(octo[0].taxonomies[0].code, 'OCTVU')
00207         # string filter
00208         dee = self.phylogenies[10].find_clades('D').next()
00209         self.assertEqual(dee.name, 'D')
00210 
00211 
00212     def test_find_terminal(self):
00213         """TreeMixin: find_elements() with terminal argument."""
00214         for tree, total, extern, intern in zip(
00215                 self.phylogenies,
00216                 (6, 6, 7, 18, 21, 27, 7, 9, 9, 19, 15, 9, 6),
00217                 (3, 3, 3, 3,  3,  3,  3, 3, 3, 3,  4,  3, 3),
00218                 (3, 3, 3, 3,  3,  3,  3, 3, 3, 3,  3,  3, 3),
00219                 ):
00220             self.assertEqual(len(list(tree.find_elements())), total)
00221             self.assertEqual(len(list(tree.find_elements(terminal=True))),
00222                              extern)
00223             self.assertEqual(len(list(tree.find_elements(terminal=False))),
00224                              intern)
00225 
00226     def test_get_path(self):
00227         """TreeMixin: get_path() method."""
00228         path = self.phylogenies[1].get_path('B')
00229         self.assertEqual(len(path), 2)
00230         self.assertAlmostEqual(path[0].branch_length, 0.06)
00231         self.assertAlmostEqual(path[1].branch_length, 0.23)
00232         self.assertEqual(path[1].name, 'B')
00233 
00234     def test_trace(self):
00235         """TreeMixin: trace() method."""
00236         tree = self.phylogenies[1]
00237         path = tree.trace('A', 'C')
00238         self.assertEqual(len(path), 3)
00239         self.assertAlmostEqual(path[0].branch_length, 0.06)
00240         self.assertAlmostEqual(path[2].branch_length, 0.4)
00241         self.assertEqual(path[2].name, 'C')
00242 
00243     # Information methods
00244 
00245     def test_common_ancestor(self):
00246         """TreeMixin: common_ancestor() method."""
00247         tree = self.phylogenies[1]
00248         lca = tree.common_ancestor('A', 'B')
00249         self.assertEqual(lca, tree.clade[0])
00250         lca = tree.common_ancestor('A', 'C')
00251         self.assertEqual(lca, tree.clade)
00252         tree = self.phylogenies[10]
00253         lca = tree.common_ancestor('A', 'B', 'C')
00254         self.assertEqual(lca, tree.clade[0])
00255 
00256     def test_depths(self):
00257         """TreeMixin: depths() method."""
00258         tree = self.phylogenies[1]
00259         depths = tree.depths()
00260         self.assertEqual(len(depths), 5)
00261         for found, expect in zip(sorted(depths.values()),
00262                                  [0, 0.060, 0.162, 0.290, 0.400]):
00263             self.assertAlmostEqual(found, expect)
00264 
00265     def test_distance(self):
00266         """TreeMixin: distance() method."""
00267         t = self.phylogenies[1]
00268         self.assertAlmostEqual(t.distance('A'), 0.162)
00269         self.assertAlmostEqual(t.distance('B'), 0.29)
00270         self.assertAlmostEqual(t.distance('C'), 0.4)
00271         self.assertAlmostEqual(t.distance('A', 'B'), 0.332)
00272         self.assertAlmostEqual(t.distance('A', 'C'), 0.562)
00273         self.assertAlmostEqual(t.distance('B', 'C'), 0.69)
00274 
00275     def test_is_bifurcating(self):
00276         """TreeMixin: is_bifurcating() method."""
00277         for tree, is_b in zip(self.phylogenies,
00278                 (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1)):
00279             self.assertEqual(tree.is_bifurcating(), is_b)
00280 
00281     def test_is_monophyletic(self):
00282         """TreeMixin: is_monophyletic() method."""
00283         tree = self.phylogenies[10]
00284         abcd = tree.get_terminals()
00285         abc = tree.clade[0].get_terminals()
00286         ab = abc[:2]
00287         d = tree.clade[1].get_terminals()
00288         self.assertEqual(tree.is_monophyletic(abcd), tree.root)
00289         self.assertEqual(tree.is_monophyletic(abc), tree.clade[0])
00290         self.assertEqual(tree.is_monophyletic(ab), False)
00291         self.assertEqual(tree.is_monophyletic(d), tree.clade[1])
00292         # Alternate argument form
00293         self.assertEqual(tree.is_monophyletic(*abcd), tree.root)
00294 
00295     def test_total_branch_length(self):
00296         """TreeMixin: total_branch_length() method."""
00297         tree = self.phylogenies[1]
00298         self.assertAlmostEqual(tree.total_branch_length(), 0.792)
00299         self.assertAlmostEqual(tree.clade[0].total_branch_length(), 0.392)
00300 
00301     # Tree manipulation methods
00302 
00303     def test_collapse(self):
00304         """TreeMixin: collapse() method."""
00305         tree = self.phylogenies[1]
00306         parent = tree.collapse(tree.clade[0])
00307         self.assertEqual(len(parent), 3)
00308         for clade, name, blength in zip(parent,
00309                 ('C', 'A', 'B'),
00310                 (0.4, 0.162, 0.29)):
00311             self.assertEqual(clade.name, name)
00312             self.assertAlmostEqual(clade.branch_length, blength)
00313 
00314     def test_collapse_all(self):
00315         """TreeMixin: collapse_all() method."""
00316         tree = Phylo.read(EX_APAF, 'phyloxml')
00317         d1 = tree.depths()
00318         tree.collapse_all()
00319         d2 = tree.depths()
00320         # Total branch lengths should not change
00321         for clade in d2:
00322             self.assertAlmostEqual(d1[clade], d2[clade])
00323         # No internal nodes should remain except the root
00324         self.assertEqual(len(tree.get_terminals()), len(tree.clade))
00325         self.assertEqual(len(list(tree.find_clades(terminal=False))), 1)
00326         # Again, with a target specification
00327         tree = Phylo.read(EX_APAF, 'phyloxml')
00328         d1 = tree.depths()
00329         internal_node_ct = len(tree.get_nonterminals())
00330         tree.collapse_all(lambda c: c.branch_length < 0.1)
00331         d2 = tree.depths()
00332         # Should have collapsed 7 internal nodes
00333         self.assertEqual(len(tree.get_nonterminals()), internal_node_ct - 7)
00334         for clade in d2:
00335             self.assertAlmostEqual(d1[clade], d2[clade])
00336 
00337     def test_ladderize(self):
00338         """TreeMixin: ladderize() method."""
00339         def ordered_names(tree):
00340             return [n.name for n in tree.get_terminals()]
00341         tree = self.phylogenies[10]
00342         self.assertEqual(ordered_names(tree), list('ABCD'))
00343         tree.ladderize()
00344         self.assertEqual(ordered_names(tree), list('DABC'))
00345         tree.ladderize(reverse=True)
00346         self.assertEqual(ordered_names(tree), list('ABCD'))
00347 
00348     def test_prune(self):
00349         """TreeMixin: prune() method."""
00350         tree = self.phylogenies[10]
00351         # Taxon in a trifurcation -- no collapse afterward
00352         parent = tree.prune(name='B')
00353         self.assertEqual(len(parent.clades), 2)
00354         self.assertEqual(parent.clades[0].name, 'A')
00355         self.assertEqual(parent.clades[1].name, 'C')
00356         self.assertEqual(len(tree.get_terminals()), 3)
00357         self.assertEqual(len(tree.get_nonterminals()), 2)
00358         # Taxon in a bifurcation -- collapse
00359         tree = self.phylogenies[0]
00360         parent = tree.prune(name='A')
00361         self.assertEqual(len(parent.clades), 2)
00362         for clade, name, blen in zip(parent, 'BC', (.29, .4)):
00363             self.assertTrue(clade.is_terminal())
00364             self.assertEqual(clade.name, name)
00365             self.assertAlmostEqual(clade.branch_length, blen)
00366         self.assertEqual(len(tree.get_terminals()), 2)
00367         self.assertEqual(len(tree.get_nonterminals()), 1)
00368         # Taxon just below the root -- don't screw up
00369         tree = self.phylogenies[1]
00370         parent = tree.prune(name='C')
00371         self.assertEqual(parent, tree.root)
00372         self.assertEqual(len(parent.clades), 2)
00373         for clade, name, blen in zip(parent, 'AB', (.102, .23)):
00374             self.assertTrue(clade.is_terminal())
00375             self.assertEqual(clade.name, name)
00376             self.assertAlmostEqual(clade.branch_length, blen)
00377         self.assertEqual(len(tree.get_terminals()), 2)
00378         self.assertEqual(len(tree.get_nonterminals()), 1)
00379 
00380     def test_split(self):
00381         """TreeMixin: split() method."""
00382         tree = self.phylogenies[0]
00383         C = tree.clade[1]
00384         C.split()
00385         self.assertEqual(len(C), 2)
00386         self.assertEqual(len(tree.get_terminals()), 4)
00387         self.assertEqual(len(tree.get_nonterminals()), 3)
00388         C[0].split(3, .5)
00389         self.assertEqual(len(tree.get_terminals()), 6)
00390         self.assertEqual(len(tree.get_nonterminals()), 4)
00391         for clade, name, blen in zip(C[0],
00392                 ('C00', 'C01', 'C02'),
00393                 (0.5, 0.5, 0.5)):
00394             self.assertTrue(clade.is_terminal())
00395             self.assertEqual(clade.name, name)
00396             self.assertEqual(clade.branch_length, blen)
00397 
00398 
00399 # ---------------------------------------------------------
00400 
00401 if __name__ == '__main__':
00402     runner = unittest.TextTestRunner(verbosity=2)
00403     unittest.main(testRunner=runner)