Back to index

python-biopython  1.60
test_NCBI_BLAST_tools.py
Go to the documentation of this file.
00001 # Copyright 2009-2010 by Peter Cock.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 #
00006 # This unit test attempts to locate the blastall executable and the nr
00007 # database, and if it finds them then do some standalone blast searches
00008 # using Bio.Blast.NCBIStandalone to call the command line tool.
00009 
00010 import os, sys
00011 import subprocess
00012 import unittest
00013 
00014 from Bio import MissingExternalDependencyError
00015 from Bio.Blast import Applications
00016 
00017 # TODO - On windows, can we use the ncbi.ini file?
00018 wanted = ["blastx", "blastp", "blastn", "tblastn", "tblastx",
00019           "rpsblast", "rpstblastn", "psiblast", "blast_formatter"]
00020 exe_names = {}
00021 
00022 if sys.platform=="win32":
00023     #The Windows 32 bit BLAST 2.2.22+ installer does add itself to the path,
00024     #and by default installs to C:\Program Files\NCBI\BLAST-2.2.22+\bin
00025     #To keep things simple, assume BLAST+ is on the path on Windows.
00026     #
00027     #On Windows the environment variable name isn't case senstive,
00028     #but must split on ";" not ":"
00029     likely_dirs = os.environ.get("PATH", "").split(";")
00030 else :
00031     likely_dirs = os.environ.get("PATH", "").split(":")
00032 
00033 for folder in likely_dirs:
00034     if not os.path.isdir(folder): continue
00035     for name in wanted :
00036         if sys.platform=="win32":
00037             exe_name = os.path.join(folder, name+".exe")
00038         else:
00039             exe_name = os.path.join(folder, name)
00040         if not os.path.isfile(exe_name):
00041             continue
00042         #To tell the old and new rpsblast apart (since I have both on
00043         #my path and the old blast has priority), try -h as a parameter.
00044         #This should also reject WU-BLAST (since it doesn't like -h).
00045         child = subprocess.Popen(exe_name + " -h",
00046                                  stdout=subprocess.PIPE,
00047                                  stderr=subprocess.PIPE,
00048                                  universal_newlines=True,
00049                                  shell=(sys.platform!="win32"))
00050         output, error = child.communicate()
00051         if child.returncode==0 and "ERROR: Invalid argument: -h" not in output:
00052             #Special case, blast_formatter from BLAST 2.2.23+ (i.e. BLAST+)
00053             #has mandatory argument -rid, but no -archive. We don't support it.
00054             if name == "blast_formatter" and " -archive " not in output:
00055                 continue
00056             exe_names[name] = exe_name
00057         #else :
00058         #    print "Rejecting", exe_name
00059         del exe_name, name
00060 
00061 #We can cope with blast_formatter being missing, only added in BLAST 2.2.24+
00062 if len(set(exe_names).difference(["blast_formatter"])) < len(wanted)-1 :
00063     raise MissingExternalDependencyError("Install the NCBI BLAST+ command line "
00064                                          "tools if you want to use the "
00065                                          "Bio.Blast.Applications wrapper.")
00066 
00067 
00068 class Pairwise(unittest.TestCase):
00069     def test_blastp(self):
00070         """Pairwise BLASTP search"""
00071         global exe_names
00072         cline = Applications.NcbiblastpCommandline(exe_names["blastp"],
00073                         query="Fasta/rose.pro",
00074                         subject="GenBank/NC_005816.faa",
00075                         evalue=1)
00076         self.assertEqual(str(cline), exe_names["blastp"] \
00077                          + " -query Fasta/rose.pro -evalue 1" \
00078                          + " -subject GenBank/NC_005816.faa")
00079         child = subprocess.Popen(str(cline),
00080                                  stdout=subprocess.PIPE,
00081                                  stderr=subprocess.PIPE,
00082                                  universal_newlines=True,
00083                                  shell=(sys.platform!="win32"))
00084         stdoutdata, stderrdata = child.communicate()
00085         return_code = child.returncode
00086         self.assertEqual(return_code, 0, "Got error code %i back from:\n%s"
00087                          % (return_code, cline))
00088         self.assertEqual(10, stdoutdata.count("Query= "))
00089         if stdoutdata.count("***** No hits found *****")==7:
00090             #This happens with BLAST 2.2.26+ which is potentially a bug
00091             pass
00092         else:
00093             self.assertEqual(9, stdoutdata.count("***** No hits found *****"))
00094         
00095         #TODO - Parse it? I think we'd need to update this obsole code :(
00096         #records = list(NCBIStandalone.Iterator(StringIO(stdoutdata),
00097         #                                       NCBIStandalone.BlastParser()))   
00098 
00099     def test_blastn(self):
00100         """Pairwise BLASTN search"""
00101         global exe_names
00102         cline = Applications.NcbiblastnCommandline(exe_names["blastn"],
00103                         query="GenBank/NC_005816.ffn",
00104                         subject="GenBank/NC_005816.fna",
00105                         evalue="0.000001")
00106         self.assertEqual(str(cline), exe_names["blastn"] \
00107                          + " -query GenBank/NC_005816.ffn -evalue 0.000001" \
00108                          + " -subject GenBank/NC_005816.fna")
00109         child = subprocess.Popen(str(cline),
00110                                  stdout=subprocess.PIPE,
00111                                  stderr=subprocess.PIPE,
00112                                  universal_newlines=True,
00113                                  shell=(sys.platform!="win32"))
00114         stdoutdata, stderrdata = child.communicate()
00115         return_code = child.returncode
00116         self.assertEqual(return_code, 0, "Got error code %i back from:\n%s"
00117                          % (return_code, cline))
00118         self.assertEqual(10, stdoutdata.count("Query= "))
00119         self.assertEqual(0, stdoutdata.count("***** No hits found *****"))
00120         #TODO - Parse it?
00121 
00122     def test_tblastn(self):
00123         """Pairwise TBLASTN search"""
00124         global exe_names
00125         cline = Applications.NcbitblastnCommandline(exe_names["tblastn"],
00126                         query="GenBank/NC_005816.faa",
00127                         subject="GenBank/NC_005816.fna",
00128                         evalue="1e-6")
00129         self.assertEqual(str(cline), exe_names["tblastn"] \
00130                          + " -query GenBank/NC_005816.faa -evalue 1e-6" \
00131                          + " -subject GenBank/NC_005816.fna")
00132         child = subprocess.Popen(str(cline),
00133                                  stdout=subprocess.PIPE,
00134                                  stderr=subprocess.PIPE,
00135                                  universal_newlines=True,
00136                                  shell=(sys.platform!="win32"))
00137         stdoutdata, stderrdata = child.communicate()
00138         return_code = child.returncode
00139         self.assertEqual(return_code, 0, "Got error code %i back from:\n%s"
00140                          % (return_code, cline))
00141         self.assertEqual(10, stdoutdata.count("Query= "))
00142         self.assertEqual(0, stdoutdata.count("***** No hits found *****"))
00143         #TODO - Parse it?
00144 
00145    
00146 class CheckCompleteArgList(unittest.TestCase):
00147     def check(self, exe_name, wrapper) :
00148         global exe_names
00149         exe = exe_names[exe_name]
00150         cline = wrapper(exe, h=True)
00151 
00152         names = set(parameter.names[0] \
00153                     for parameter in cline.parameters)
00154         
00155         child = subprocess.Popen(str(cline),
00156                                  stdout=subprocess.PIPE,
00157                                  stderr=subprocess.PIPE,
00158                                  universal_newlines=True,
00159                                  shell=(sys.platform!="win32"))
00160         stdoutdata, stderrdata = child.communicate()
00161         self.assertEqual(stderrdata, "",
00162                          "%s\n%s" % (str(cline), stderrdata))
00163         names_in_tool = set()
00164         while stdoutdata :
00165             index = stdoutdata.find("[")
00166             if index == -1 : break
00167             stdoutdata = stdoutdata[index+1:]
00168             index = stdoutdata.find("]")
00169             assert index != -1
00170             name = stdoutdata[:index]
00171             if " " in name : name = name.split(None,1)[0]
00172             names_in_tool.add(name)
00173             stdoutdata = stdoutdata[index+1:]
00174                 
00175         extra = names.difference(names_in_tool)
00176         missing = names_in_tool.difference(names)
00177         if "-soft_masking" in missing :
00178             #Known issue, need to establish how this option works
00179             missing.remove("-soft_masking")
00180         if "-use_index" in missing :
00181             #Known issue, need to establish how this option works
00182             missing.remove("-use_index")
00183         if "-verbose" in missing :
00184             #Known issue, seems to be present in some builds (Bug 3043)
00185             missing.remove("-verbose")
00186         if "-remote_verbose" in missing :
00187             #Known issue, seems to be present in some builds (Bug 3043)
00188             missing.remove("-remote_verbose")
00189         if "-use_test_remote_service" in missing :
00190             #Known issue, seems to be present in some builds (Bug 3043)
00191             missing.remove("-use_test_remote_service")
00192         if exe_name == "blastn" and "-off_diagonal_range" in extra:
00193             #Added in BLAST 2.2.23+
00194             extra.remove("-off_diagonal_range")
00195         if exe_name == "tblastx":
00196             #These appear to have been removed in BLAST 2.2.23+
00197             #(which seems a bit odd - TODO - check with NCBI?)
00198             extra = extra.difference(["-gapextend","-gapopen",
00199                                       "-xdrop_gap","-xdrop_gap_final"])
00200         if exe_name in ["rpsblast", "rpstblastn"]:
00201             #These appear to have been removed in BLAST 2.2.24+
00202             #(which seems a bit odd - TODO - check with NCBI?)
00203             extra = extra.difference(["-num_threads"])
00204         if exe_name in ["tblastn", "tblastx"]:
00205             #These appear to have been removed in BLAST 2.2.24+
00206             extra = extra.difference(["-db_soft_mask"])
00207         #This was added in BLAST 2.2.24+ to most/all the tools, so
00208         #will be seen as an extra argument on older versions:
00209         if "-seqidlist" in extra:
00210             extra.remove("-seqidlist")
00211         if "-db_hard_mask" in extra \
00212         and exe_name in ["blastn", "blastp", "blastx", "tblastx", "tblastn"]:
00213             #New in BLAST 2.2.25+ so will look like an extra arg on old BLAST
00214             extra.remove("-db_hard_mask")
00215         if "-msa_master_idx" in extra and exe_name=="psiblast":
00216             #New in BLAST 2.2.25+ so will look like an extra arg on old BLAST
00217             extra.remove("-msa_master_idx")
00218         if exe_name=="rpsblast":
00219             #New in BLAST 2.2.25+ so will look like an extra arg on old BLAST
00220             extra = extra.difference(["-best_hit_overhang",
00221                                       "-best_hit_score_edge",
00222                                       "-culling_limit"])
00223         if "-max_hsps_per_subject" in extra:
00224             #New in BLAST 2.2.26+ so will look like an extra arg on old BLAST
00225             extra.remove("-max_hsps_per_subject")
00226 
00227         if extra or missing:
00228             import warnings
00229             warnings.warn("NCBI BLAST+ %s and Biopython out sync. Please "
00230                           "update Biopython, or report this issue if you are "
00231                           "already using the latest version. (Exta args: %s; "
00232                           "Missing: %s)" % (exe_name,
00233                           ",".join(sorted(extra)),
00234                           ",".join(sorted(missing))))
00235 
00236         #An almost trivial example to test any validation
00237         if "-query" in names:
00238             cline = wrapper(exe, query="dummy")
00239         elif "-archive" in names:
00240             cline = wrapper(exe, archive="dummy")
00241         str(cline)
00242 
00243     def test_blastx(self):
00244         """Check all blastx arguments are supported"""
00245         self.check("blastx", Applications.NcbiblastxCommandline)
00246         
00247     def test_blastp(self):
00248         """Check all blastp arguments are supported"""
00249         self.check("blastp", Applications.NcbiblastpCommandline)
00250 
00251     def test_blastn(self):
00252         """Check all blastn arguments are supported"""
00253         self.check("blastn", Applications.NcbiblastnCommandline)
00254 
00255     def test_tblastx(self):
00256         """Check all tblastx arguments are supported"""
00257         self.check("tblastx", Applications.NcbitblastxCommandline)
00258         
00259     def test_tblastn(self):
00260         """Check all tblastn arguments are supported"""
00261         self.check("tblastn", Applications.NcbitblastnCommandline)
00262         
00263     def test_psiblast(self):
00264         """Check all psiblast arguments are supported"""
00265         self.check("psiblast", Applications.NcbipsiblastCommandline)
00266 
00267     def test_rpsblast(self):
00268         """Check all rpsblast arguments are supported"""
00269         self.check("rpsblast", Applications.NcbirpsblastCommandline)
00270 
00271     def test_rpstblastn(self):
00272         """Check all rpstblastn arguments are supported"""
00273         self.check("rpstblastn", Applications.NcbirpstblastnCommandline)
00274 
00275     if "blast_formatter" in exe_names:
00276         def test_blast_formatter(self):
00277             """Check all blast_formatter arguments are supported"""
00278             self.check("blast_formatter", Applications.NcbiblastformatterCommandline)
00279 
00280 
00281 if __name__ == "__main__":
00282     runner = unittest.TextTestRunner(verbosity = 2)
00283     unittest.main(testRunner=runner)