Back to index

python-biopython  1.60
test_Muscle_tool.py
Go to the documentation of this file.
00001 # Copyright 2009 by Peter Cock.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 import os
00007 import sys
00008 import subprocess
00009 import unittest
00010 
00011 from Bio import MissingExternalDependencyError
00012 from Bio.Align.Applications import MuscleCommandline
00013 from Bio import SeqIO
00014 from Bio import AlignIO
00015 
00016 #################################################################
00017 
00018 #Try to avoid problems when the OS is in another language
00019 os.environ['LANG'] = 'C'
00020 
00021 muscle_exe = None
00022 if sys.platform=="win32":
00023     try:
00024         #This can vary depending on the Windows language.
00025         prog_files = os.environ["PROGRAMFILES"]
00026     except KeyError:
00027         prog_files = r"C:\Program Files"
00028     #For Windows, MUSCLE just comes as a zip file which contains the
00029     #a Muscle directory with the muscle.exe file plus a readme etc,
00030     #which the user could put anywhere.  We'll try a few sensible
00031     #locations under Program Files... and then the full path.
00032     likely_dirs = ["", #Current dir
00033                    prog_files,
00034                    os.path.join(prog_files,"Muscle3.6"),
00035                    os.path.join(prog_files,"Muscle3.7"),
00036                    os.path.join(prog_files,"Muscle3.8"),
00037                    os.path.join(prog_files,"Muscle3.9"),
00038                    os.path.join(prog_files,"Muscle")] + sys.path
00039     for folder in likely_dirs:
00040         if os.path.isdir(folder):
00041             if os.path.isfile(os.path.join(folder, "muscle.exe")):
00042                 muscle_exe = os.path.join(folder, "muscle.exe")
00043                 break
00044         if muscle_exe : break
00045 else:
00046     import commands
00047     output = commands.getoutput("muscle -version")
00048     #Since "not found" may be in another language, try and be sure this is
00049     #really the MUSCLE tool's output
00050     if "not found" not in output and "MUSCLE" in output \
00051     and "Edgar" in output:
00052         muscle_exe = "muscle"
00053 
00054 if not muscle_exe:
00055     raise MissingExternalDependencyError(\
00056         "Install MUSCLE if you want to use the Bio.Align.Applications wrapper.")
00057 
00058 #################################################################
00059 
00060 class MuscleApplication(unittest.TestCase):
00061     
00062     def setUp(self):
00063         self.infile1  = "Fasta/f002"
00064         self.infile2  = "Fasta/fa01"
00065         self.infile3  = "Fasta/f001"
00066         self.outfile1 = "Fasta/temp align out1.fa" #with spaces!
00067         self.outfile2 = "Fasta/temp_align_out2.fa"
00068         self.outfile3 = "Fasta/temp_align_out3.fa"
00069         self.outfile4 = "Fasta/temp_align_out4.fa"
00070 
00071     def tearDown(self):
00072         if os.path.isfile(self.outfile1):
00073             os.remove(self.outfile1)
00074         if os.path.isfile(self.outfile2):
00075             os.remove(self.outfile2)
00076         if os.path.isfile(self.outfile3):
00077             os.remove(self.outfile3)
00078         if os.path.isfile(self.outfile4):
00079             os.remove(self.outfile4)
00080 
00081     def test_Muscle_simple(self):
00082         """Simple round-trip through app just infile and outfile"""
00083         cmdline = MuscleCommandline(muscle_exe,
00084                                     input=self.infile1,
00085                                     out=self.outfile1)
00086         self.assertEqual(str(cmdline), muscle_exe \
00087                          + ' -in Fasta/f002 -out "Fasta/temp align out1.fa"')
00088         self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
00089         output, error = cmdline()
00090         self.assertEqual(output, "")
00091         self.assertTrue("ERROR" not in error)
00092 
00093     def test_Muscle_with_options(self):
00094         """Round-trip through app with a switch and valued option"""
00095         cmdline = MuscleCommandline(muscle_exe)
00096         cmdline.set_parameter("input", self.infile1) #"input" is alias for "in"
00097         cmdline.set_parameter("out", self.outfile2)
00098         #Use property:
00099         cmdline.objscore = "sp"
00100         cmdline.noanchors = True
00101         self.assertEqual(str(cmdline), muscle_exe +\
00102                          " -in Fasta/f002" + \
00103                          " -out Fasta/temp_align_out2.fa" + \
00104                          " -objscore sp -noanchors")
00105         self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
00106         output, error = cmdline()
00107         self.assertEqual(output, "")
00108         self.assertTrue("ERROR" not in error)
00109         self.assertTrue(error.strip().startswith("MUSCLE"), output)
00110 
00111     def test_Muscle_profile_simple(self):
00112         """Simple round-trip through app doing a profile alignment"""
00113         cmdline = MuscleCommandline(muscle_exe)
00114         cmdline.set_parameter("out", self.outfile3)
00115         cmdline.set_parameter("profile", True)
00116         cmdline.set_parameter("in1", self.infile2)
00117         cmdline.set_parameter("in2", self.infile3)
00118         self.assertEqual(str(cmdline), muscle_exe + \
00119                          " -out Fasta/temp_align_out3.fa" + \
00120                          " -profile -in1 Fasta/fa01 -in2 Fasta/f001")
00121         self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
00122         output, error = cmdline()
00123         self.assertEqual(output, "")
00124         self.assertTrue("ERROR" not in error)
00125         self.assertTrue(error.strip().startswith("MUSCLE"), output)
00126 
00127     def test_Muscle_profile_with_options(self):
00128         """Profile alignment, and switch and valued options"""
00129         #Using some keyword arguments, note -stable isn't supported in v3.8
00130         cmdline = MuscleCommandline(muscle_exe, out=self.outfile4,
00131                                     in1=self.infile2, in2=self.infile3,
00132                                     profile=True, stable=True,
00133                                     cluster1="neighborjoining")
00134         self.assertEqual(str(cmdline), muscle_exe + \
00135                          " -out Fasta/temp_align_out4.fa" + \
00136                          " -profile -in1 Fasta/fa01 -in2 Fasta/f001" + \
00137                          " -cluster1 neighborjoining -stable")
00138         self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
00139         """
00140         #TODO - Why doesn't this work with MUSCLE 3.6 on the Mac?
00141         #It may be another bug fixed in MUSCLE 3.7 ...
00142         result, stdout, stderr = generic_run(cmdline)
00143         #NOTE: generic_run has been removed from Biopython
00144         self.assertEqual(result.return_code, 0)
00145         self.assertEqual(stdout.read(), "")
00146         self.assertTrue("ERROR" not in stderr.read())
00147         self.assertEqual(str(result._cl), str(cmdline))
00148         """
00149 
00150 class SimpleAlignTest(unittest.TestCase):
00151     """Simple MUSCLE tests"""
00152 
00153     """
00154     #FASTA output seems broken on Muscle 3.6 (on the Mac).
00155     def test_simple_fasta(self):
00156         input_file = "Fasta/f002"
00157         self.assertTrue(os.path.isfile(input_file))
00158         records = list(SeqIO.parse(input_file,"fasta"))
00159         #Prepare the command...
00160         cmdline = MuscleCommandline(muscle_exe)
00161         cmdline.set_parameter("in", input_file)
00162         #Preserve input record order (makes checking output easier)
00163         cmdline.set_parameter("stable")
00164         #Set some others options just to test them
00165         cmdline.set_parameter("maxiters", 2)
00166         self.assertEqual(str(cmdline).rstrip(), "muscle -in Fasta/f002 -maxiters 2 -stable")
00167         result, out_handle, err_handle = generic_run(cmdline)
00168         #NOTE: generic_run has been removed from Biopython
00169         print err_handle.read()
00170         print out_handle.read()
00171         align = AlignIO.read(out_handle, "fasta")
00172         self.assertEqual(len(records),len(align))
00173         for old, new in zip(records, align):
00174             self.assertEqual(old.id, new.id)
00175             self.assertEqual(str(new.seq).replace("-",""), str(old.seq))
00176     """
00177 
00178     def test_simple_clustal(self):
00179         """Simple muscle call using Clustal output with a MUSCLE header"""
00180         input_file = "Fasta/f002"
00181         self.assertTrue(os.path.isfile(input_file))
00182         records = list(SeqIO.parse(input_file,"fasta"))
00183         records.sort(key = lambda rec: rec.id)
00184         #Prepare the command... use Clustal output (with a MUSCLE header)
00185         cmdline = MuscleCommandline(muscle_exe, input=input_file, clw = True)
00186         self.assertEqual(str(cmdline).rstrip(), muscle_exe + \
00187                          " -in Fasta/f002 -clw")
00188         self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
00189         child = subprocess.Popen(str(cmdline),
00190                                  stdout=subprocess.PIPE,
00191                                  stderr=subprocess.PIPE,
00192                                  universal_newlines=True,
00193                                  shell=(sys.platform!="win32"))
00194         #Didn't use -quiet so there should be progress reports on stderr,
00195         align = AlignIO.read(child.stdout, "clustal")
00196         align.sort() #by record.id
00197         self.assertTrue(child.stderr.read().strip().startswith("MUSCLE"))
00198         return_code = child.wait()
00199         self.assertEqual(return_code, 0)
00200         child.stdout.close()
00201         child.stderr.close()
00202         del child
00203         self.assertEqual(len(records),len(align))
00204         for old, new in zip(records, align):
00205             self.assertEqual(old.id, new.id)
00206             self.assertEqual(str(new.seq).replace("-",""), str(old.seq))
00207 
00208     def test_simple_clustal_strict(self):
00209         """Simple muscle call using strict Clustal output"""
00210         input_file = "Fasta/f002"
00211         self.assertTrue(os.path.isfile(input_file))
00212         records = list(SeqIO.parse(input_file,"fasta"))
00213         records.sort(key = lambda rec: rec.id)
00214         #Prepare the command...
00215         cmdline = MuscleCommandline(muscle_exe)
00216         cmdline.set_parameter("in", input_file)
00217         #Use clustal output (with a CLUSTAL header)
00218         cmdline.set_parameter("clwstrict", True) #Default None treated as False!
00219         self.assertEqual(str(cmdline).rstrip(), muscle_exe + \
00220                          " -in Fasta/f002 -clwstrict")
00221         self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
00222         child = subprocess.Popen(str(cmdline),
00223                                  stdout=subprocess.PIPE,
00224                                  stderr=subprocess.PIPE,
00225                                  universal_newlines=True,
00226                                  shell=(sys.platform!="win32"))
00227         #Didn't use -quiet so there should be progress reports on stderr,
00228         align = AlignIO.read(child.stdout, "clustal")
00229         align.sort()
00230         self.assertTrue(child.stderr.read().strip().startswith("MUSCLE"))
00231         self.assertEqual(len(records),len(align))
00232         for old, new in zip(records, align):
00233             self.assertEqual(old.id, new.id)
00234             self.assertEqual(str(new.seq).replace("-",""), str(old.seq))
00235         return_code = child.wait()
00236         self.assertEqual(return_code, 0)
00237         child.stdout.close()
00238         child.stderr.close()
00239         del child
00240 
00241     def test_long(self):
00242         """Simple muscle call using long file"""
00243         #Create a large input file by converting some of another example file
00244         temp_large_fasta_file = "temp_cw_prot.fasta"
00245         records = list(SeqIO.parse("NBRF/Cw_prot.pir", "pir"))[:40]
00246         SeqIO.write(records, temp_large_fasta_file, "fasta")
00247         #Prepare the command...
00248         cmdline = MuscleCommandline(muscle_exe)
00249         cmdline.set_parameter("in", temp_large_fasta_file)
00250         #Use fast options
00251         cmdline.set_parameter("maxiters", 1)
00252         cmdline.set_parameter("diags", True) #Default None treated as False!
00253         #Use clustal output
00254         cmdline.set_parameter("clwstrict", True) #Default None treated as False!
00255         #Shoudn't need this, but just to make sure it is accepted
00256         cmdline.set_parameter("maxhours", 0.1)
00257         #No progress reports to stderr
00258         cmdline.set_parameter("quiet", True) #Default None treated as False!
00259         self.assertEqual(str(cmdline).rstrip(), muscle_exe + \
00260                          " -in temp_cw_prot.fasta -diags -maxhours 0.1" + \
00261                          " -maxiters 1 -clwstrict -quiet")
00262         self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
00263         child = subprocess.Popen(str(cmdline),
00264                                  stdout=subprocess.PIPE,
00265                                  stderr=subprocess.PIPE,
00266                                  universal_newlines=True,
00267                                  shell=(sys.platform!="win32"))
00268         align = AlignIO.read(child.stdout, "clustal")
00269         align.sort()
00270         records.sort(key = lambda rec: rec.id)
00271         self.assertEqual(len(records), len(align))
00272         for old, new in zip(records, align):
00273             self.assertEqual(old.id, new.id)
00274             self.assertEqual(str(new.seq).replace("-",""), str(old.seq))
00275         os.remove(temp_large_fasta_file)
00276         #See if quiet worked:
00277         self.assertEqual("", child.stderr.read().strip())
00278         return_code = child.wait()
00279         self.assertEqual(return_code, 0)
00280         child.stdout.close()
00281         child.stderr.close()
00282         del child
00283 
00284     def test_using_stdin(self):
00285         """Simple alignment using stdin"""
00286         input_file = "Fasta/f002"
00287         self.assertTrue(os.path.isfile(input_file))
00288         records = list(SeqIO.parse(input_file,"fasta"))
00289         #Prepare the command... use Clustal output (with a MUSCLE header)
00290         cline = MuscleCommandline(muscle_exe, clw=True)
00291         self.assertEqual(str(cline).rstrip(), muscle_exe + " -clw")
00292         self.assertEqual(str(eval(repr(cline))), str(cline))
00293         child = subprocess.Popen(str(cline),
00294                                  stdin=subprocess.PIPE,
00295                                  stdout=subprocess.PIPE,
00296                                  stderr=subprocess.PIPE,
00297                                  universal_newlines=True,
00298                                  shell=(sys.platform!="win32"))
00299         SeqIO.write(records, child.stdin, "fasta")
00300         child.stdin.close()
00301         #Alignment will now run...
00302         align = AlignIO.read(child.stdout, "clustal")
00303         align.sort()
00304         records.sort(key = lambda rec: rec.id)
00305         self.assertEqual(len(records),len(align))
00306         for old, new in zip(records, align):
00307             self.assertEqual(old.id, new.id)
00308             self.assertEqual(str(new.seq).replace("-",""), str(old.seq))
00309         self.assertEqual(0, child.wait())
00310         child.stdout.close()
00311         child.stderr.close()
00312         del child
00313 
00314     def test_with_multiple_output_formats(self):
00315         """Simple muscle call with multiple output formats"""
00316         input_file = "Fasta/f002"
00317         output_html = "temp_f002.html"
00318         output_clwstrict = "temp_f002.clw"
00319         self.assertTrue(os.path.isfile(input_file))
00320         records = list(SeqIO.parse(input_file,"fasta"))
00321         records.sort(key = lambda rec: rec.id)
00322         #Prepare the command... use Clustal output (with a MUSCLE header)
00323         cmdline = MuscleCommandline(muscle_exe, input=input_file,
00324                                     clw=True, htmlout = output_html,
00325                                     clwstrictout = output_clwstrict)
00326         self.assertEqual(str(cmdline).rstrip(), muscle_exe + \
00327                          " -in Fasta/f002 -clw -htmlout temp_f002.html" +\
00328                          " -clwstrictout temp_f002.clw")
00329         self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
00330         child = subprocess.Popen(str(cmdline),
00331                                  stdout=subprocess.PIPE,
00332                                  stderr=subprocess.PIPE,
00333                                  universal_newlines=True,
00334                                  shell=(sys.platform!="win32"))
00335         #Clustalw on stdout:
00336         align = AlignIO.read(child.stdout, "clustal")
00337         align.sort()
00338         #Didn't use -quiet so there should be progress reports on stderr,
00339         self.assertTrue(child.stderr.read().strip().startswith("MUSCLE"))
00340         return_code = child.wait()
00341         self.assertEqual(return_code, 0)
00342         self.assertEqual(len(records),len(align))
00343         for old, new in zip(records, align):
00344             self.assertEqual(old.id, new.id)
00345         child.stdout.close()
00346         child.stderr.close()
00347         del child
00348         handle = open(output_html,"rU")
00349         html = handle.read().strip().upper()
00350         handle.close()
00351         self.assertTrue(html.startswith("<HTML"))
00352         self.assertTrue(html.endswith("</HTML>"))
00353         #ClustalW strict:
00354         align = AlignIO.read(output_clwstrict, "clustal")
00355         align.sort()
00356         self.assertEqual(len(records),len(align))
00357         for old, new in zip(records, align):
00358             self.assertEqual(old.id, new.id)
00359         os.remove(output_html)
00360         os.remove(output_clwstrict)
00361 
00362 if __name__ == "__main__":
00363     runner = unittest.TextTestRunner(verbosity = 2)
00364     unittest.main(testRunner=runner)