Back to index

python-biopython  1.60
test_EmbossPhylipNew.py
Go to the documentation of this file.
00001 # Copyright 2009 by David Winter.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 import os
00007 import sys
00008 import unittest
00009 import subprocess
00010 
00011 from Bio import MissingExternalDependencyError
00012 from Bio import AlignIO
00013 from Bio.Nexus import Trees # One day we should use planned TreeIO module
00014 
00015 from Bio.Emboss.Applications import FDNADistCommandline, FNeighborCommandline
00016 from Bio.Emboss.Applications import FSeqBootCommandline, FProtDistCommandline
00017 from Bio.Emboss.Applications import FProtParsCommandline, FConsenseCommandline
00018 from Bio.Emboss.Applications import FTreeDistCommandline, FDNAParsCommandline
00019 
00020 #Try to avoid problems when the OS is in another language
00021 os.environ['LANG'] = 'C'
00022 
00023 exes_wanted = ['fdnadist', 'fneighbor', 'fprotdist','fprotpars','fconsense',
00024                'fseqboot', 'ftreedist', 'fdnapars']
00025 exes = dict() #Dictionary mapping from names to exe locations
00026 
00027 if "EMBOSS_ROOT" in os.environ:
00028     #Windows default installation path is C:\mEMBOSS which contains the exes.
00029     #EMBOSS also sets an environment variable which we will check for.
00030     path = os.environ["EMBOSS_ROOT"]
00031     if os.path.isdir(path):
00032         for name in exes_wanted:
00033             if os.path.isfile(os.path.join(path, name+".exe")):
00034                 exes[name] = os.path.join(path, name+".exe")
00035     del path, name
00036 if sys.platform!="win32":
00037     import commands
00038     for name in exes_wanted:
00039         #This will "just work" if installed on the path as normal on Unix
00040         output = commands.getoutput("%s -help" % name)
00041         if "not found" not in output and "not recognized" not in output:
00042             exes[name] = name
00043         del output
00044     del name
00045 
00046 if len(exes) < len(exes_wanted):
00047     raise MissingExternalDependencyError(\
00048           "Install the Emboss package 'PhylipNew' if you want to use the "+\
00049           "Bio.Emboss.Applications wrappers for phylogenetic tools.")
00050 
00051  ###########################################################################
00052 
00053 # A few top level functions that are called repeatedly in the test cases
00054 def write_AlignIO_dna():
00055     """Convert opuntia.aln to a phylip file"""
00056     assert 1 == AlignIO.convert("Clustalw/opuntia.aln", "clustal",
00057                                 "Phylip/opuntia.phy", "phylip")
00058 
00059 def write_AlignIO_protein():
00060     """Convert hedgehog.aln to a phylip file"""
00061     assert 1 == AlignIO.convert("Clustalw/hedgehog.aln", "clustal",
00062                                 "Phylip/hedgehog.phy", "phylip")
00063 
00064 def clean_up():
00065     """Delete tests files (to be used as tearDown() function in test fixtures)"""
00066     for filename in ["test_file", "Phylip/opuntia.phy","Phylip/hedgehog.phy"]:
00067         if os.path.isfile(filename):
00068             os.remove(filename)
00069 
00070 def parse_trees(filename):
00071     """Helper function until we have Bio.Phylo on trunk."""
00072     data = open("test_file", "r").read()
00073     for tree_str in data.split(";\n"):
00074         if tree_str:
00075             yield Trees.Tree(tree_str+";")
00076 
00077 class DistanceTests(unittest.TestCase):
00078     """Tests for calculating distance based phylogenetic trees with phylip"""
00079 
00080     def tearDown(self):
00081         clean_up()
00082 
00083     test_taxa = ['Archaeohip', 'Calippus', 'Hypohippus', 'M._secundu',
00084                  'Merychippu', 'Mesohippus', 'Nannipus', 'Neohippari',
00085                  'Parahippus', 'Pliohippus']
00086     
00087     def distances_from_alignment(self, filename, DNA = True):
00088         """check we can make distance matrix from a given alignment"""
00089         self.assertTrue(os.path.isfile(filename), "Missing %s" % filename)
00090         if DNA:
00091             cline =  FDNADistCommandline(exes["fdnadist"],
00092                                          method = 'j',
00093                                          sequence= filename,
00094                                          outfile = "test_file",
00095                                          auto = True)
00096         else:
00097             cline = FProtDistCommandline(exes["fprotdist"],
00098                                          method = 'j',
00099                                          sequence= filename,
00100                                          outfile = "test_file",
00101                                          auto = True)
00102         stdout, strerr = cline()
00103         #biopython can't grok distance matrices, so we'll just check it exists
00104         self.assertTrue(os.path.isfile("test_file"))
00105     
00106     def tree_from_distances(self, filename):
00107         """Check we can estimate a tree from a distance matrix"""
00108         self.assertTrue(os.path.isfile(filename), "Missing %s" % filename)
00109         cline = FNeighborCommandline(exes["fneighbor"],
00110                                      datafile = filename,
00111                                      outtreefile = "test_file",
00112                                      auto= True, filter = True)
00113         stdout, stderr = cline()
00114         for tree in parse_trees("test_file"):
00115             tree_taxa = [t.replace(" ", "_") for t in tree.get_taxa()]
00116             self.assertEqual(self.test_taxa, sorted(tree_taxa))
00117 
00118     def test_distances_from_phylip_DNA(self):
00119         """Calculate a distance matrix from an phylip alignment"""
00120         self.distances_from_alignment("Phylip/horses.phy")
00121 
00122     def test_distances_from_AlignIO_DNA(self):
00123         """Calculate a distance matrix from an alignment written by AlignIO"""
00124         write_AlignIO_dna()
00125         self.distances_from_alignment("Phylip/opuntia.phy")
00126 
00127     #def test_distances_from_bootstrapped_phylip_DNA(self):
00128     #    """Calculate a set of distance matrices from phylip alignments"""
00129     #    self.distances_from_alignment("Phylip/bs_horses.phy")
00130 
00131     # fprotdist tests
00132     def test_distances_from_protein_phylip(self):
00133         """Calculate a distance matrix from phylip protein alignment"""
00134         self.distances_from_alignment("Phylip/interlaced.phy", DNA=False)
00135 
00136     def test_distances_from_protein_AlignIO(self):
00137         """Calculate distance matrix from an AlignIO written protein alignment"""
00138         write_AlignIO_protein()
00139         self.distances_from_alignment("Phylip/hedgehog.phy", DNA=False)
00140 
00141     #def test_distances_from_bootstrapped_phylip_protein(self):
00142     #    """Calculate distance matrices from a bootstrapped protein alignment"""
00143     #    self.distances_from_alignment("Clustalw/bs_interlaced.phy", DNA=False)
00144 
00145     # fneighbor tests
00146     #def test_tree_from_distances(self):
00147     #    """Estimate tree from distance matrix and parse it."""
00148     #    self.tree_from_distances("Phylip/horses.fdnadist")
00149 
00150     # This one won't work because of a bug in EMBOSS 6.0.1
00151     #def test_tree_from_bootstrapped_distances(self):
00152     #    """Estimate tree from bootstrapped distance matrix and parse it"""
00153     #    self.tree_from_distances("Phylip/bs_horses.fdnadist")
00154 
00155 class ParsimonyTests(unittest.TestCase):
00156     """Tests for estimating parsimony based phylogenetic trees with phylip"""
00157 
00158     def tearDown(self):
00159         clean_up()
00160 
00161     def parsimony_tree(self, filename, format, DNA=True):
00162         """Estimate a parsimony tree from an alignment"""
00163         self.assertTrue(os.path.isfile(filename), "Missing %s" % filename)
00164         if DNA:
00165             cline = FDNAParsCommandline(exes["fdnapars"],
00166                                         sequence = filename,
00167                                         outtreefile = "test_file",
00168                                         auto= True, stdout=True)
00169         else:
00170             cline = FProtParsCommandline(exes["fprotpars"],
00171                                          sequence = filename,
00172                                          outtreefile = "test_file",
00173                                          auto= True, stdout=True)
00174         stdout, stderr = cline()
00175         a_taxa = [s.name.replace(" ", "_") for s in
00176                   AlignIO.parse(open(filename, "r"), format).next()]
00177         for tree in parse_trees("test_file"):
00178             t_taxa = [t.replace(" ", "_") for t in tree.get_taxa()]
00179             self.assertEqual(sorted(a_taxa), sorted(t_taxa))
00180     
00181     # fdnapars tests
00182     #def test_parsimony_tree_from_phylip_DNA(self):
00183     #    """Make a parsimony tree from a phylip DNA alignment"""
00184     #    self.parsimony_tree("Phylip/horses.phy", "phylip")
00185 
00186     def test_parsimony_tree_from_AlignIO_DNA(self):
00187         """Make a parsimony tree from an alignment written with AlignIO"""
00188         write_AlignIO_dna()
00189         self.parsimony_tree("Phylip/opuntia.phy", "phylip")
00190 
00191     #def test_parsimony_bootstrapped_phylip_DNA(self):
00192     #    """Make a parsimony tree from a bootstrapped phylip DNA alignment"""
00193     #    self.parsimony_tree("Phylip/bs_horses.phy", "phylip")
00194 
00195     # fprotpars tests
00196     #def test_parsimony_tree_from_phylip_protein(self):
00197     #    """Make a parsimony tree from a phylip DNA alignment"""
00198     #    self.parsimony_tree("Phylip/interlaced.phy", "phylip", DNA=False)
00199 
00200     def test_parsimony_from_AlignIO_protein(self):
00201         """Make a parsimony tree from protein alignment written with AlignIO"""
00202         write_AlignIO_protein()
00203         self.parsimony_tree("Phylip/interlaced.phy", "phylip", DNA=False)
00204 
00205     #def test_parsimony_tree_bootstrapped_phylip_protein(self):
00206     #    """Make a parsimony tree from a phylip DNA alignment"""
00207     #    self.parsimony_tree("Phylip/bs_interlaced.phy", "phylip", DNA=False)
00208 
00209 class BootstrapTests(unittest.TestCase):
00210     """Tests for pseudosampling alignments with fseqboot"""
00211 
00212     def tearDown(self):
00213         clean_up()
00214      
00215     def check_bootstrap(self, filename, format, align_type="d"):
00216         """Check we can use fseqboot to pseudosample an alignment
00217         
00218         The align_type type argument is passed to the commandline object to
00219         set the output format to use (from [D]na,[p]rotein and [r]na )
00220         """
00221         self.assertTrue(os.path.isfile(filename), "Missing %s" % filename)
00222         cline = FSeqBootCommandline(exes["fseqboot"],
00223                                     sequence = filename,
00224                                     outfile =  "test_file",
00225                                     seqtype = align_type,
00226                                     reps = 2,
00227                                     auto = True, filter = True)
00228         stdout, stderr = cline()
00229         # the resultant file should have 2 alignments...
00230         bs = list(AlignIO.parse(open("test_file", "r" ), format))
00231         self.assertEqual(len(bs), 2)
00232         # ..and each name in the original alignment...
00233         a_names = [s.name.replace(" ", "_") for s in
00234                    AlignIO.read(open(filename, "r"), format)]
00235         # ...should be in each alignment in the bootstrapped file
00236         for a in bs:
00237             self.assertEqual(a_names, [s.name.replace(" ", "_") for s in a])
00238 
00239     def test_bootstrap_phylip_DNA(self):
00240         """Pseudosample a phylip DNA alignment"""
00241         self.check_bootstrap("Phylip/horses.phy", "phylip")
00242 
00243     def test_bootstrap_AlignIO_DNA(self):
00244         """Pseudosample a phylip DNA alignment written with AlignIO"""
00245         write_AlignIO_dna()
00246         self.check_bootstrap("Phylip/opuntia.phy", "phylip")
00247 
00248     def test_bootstrap_phylip_protein(self):
00249         """Pseudosample a phylip protein alignment"""
00250         self.check_bootstrap("Phylip/interlaced.phy", "phylip", "p")
00251 
00252     def test_bootstrap_AlignIO_protein(self):
00253         """Pseudosample a phylip protein alignment written with AlignIO"""
00254         write_AlignIO_protein()
00255         self.check_bootstrap("Phylip/hedgehog.phy", "phylip", "p")
00256 
00257 class TreeComparisonTests(unittest.TestCase):
00258     """Tests for comparing phylogenetic trees with phylip tools"""
00259 
00260     def tearDown(self):
00261         clean_up()
00262 
00263     def test_fconsense(self):
00264         """Calculate a consensus tree with fconsense"""
00265         cline = FConsenseCommandline(exes["fconsense"],
00266                                      intreefile = "Phylip/horses.tree",
00267                                      outtreefile = "test_file",
00268                                      auto = True, filter = True)
00269         stdout, stderr = cline()
00270         #Split the next and get_taxa into two steps to help 2to3 work
00271         tree1 = parse_trees("test_file").next()
00272         taxa1 = tree1.get_taxa()
00273         for tree in parse_trees("Phylip/horses.tree"):
00274             taxa2 = tree.get_taxa()
00275             self.assertEqual(sorted(taxa1),sorted(taxa2))
00276 
00277     def test_ftreedist(self):
00278         """Calculate the distance between trees with ftreedist"""
00279         cline = FTreeDistCommandline(exes["ftreedist"],
00280                                      intreefile = "Phylip/horses.tree",
00281                                      outfile = "test_file",
00282                                      auto = True, filter = True)
00283         stdout, stderr = cline()
00284         self.assertTrue(os.path.isfile("test_file"))
00285 
00286 if __name__ == "__main__":
00287     runner = unittest.TextTestRunner(verbosity = 2)
00288     unittest.main(testRunner=runner)
00289     clean_up()