Back to index

python-biopython  1.60
Applications.py
Go to the documentation of this file.
00001 # Copyright 2001 Brad Chapman.
00002 # Revisions copyright 2009-2010 by Peter Cock.
00003 # Revisions copyright 2010 by Phillip Garland.
00004 # All rights reserved.
00005 # This code is part of the Biopython distribution and governed by its
00006 # license.  Please see the LICENSE file that should have been included
00007 # as part of this package.
00008 """Definitions for interacting with BLAST related applications.
00009 
00010 Obsolete wrappers for the old/classic NCBI BLAST tools (written in C):
00011 
00012 - FastacmdCommandline
00013 - BlastallCommandline
00014 - BlastpgpCommandline
00015 - RpsBlastCommandline
00016 
00017 Wrappers for the new NCBI BLAST+ tools (written in C++):
00018 
00019 - NcbiblastpCommandline - Protein-Protein BLAST
00020 - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST
00021 - NcbiblastxCommandline - Translated Query-Protein Subject BLAST
00022 - NcbitblastnCommandline - Protein Query-Translated Subject BLAST
00023 - NcbitblastxCommandline - Translated Query-Protein Subject BLAST
00024 - NcbipsiblastCommandline - Position-Specific Initiated BLAST
00025 - NcbirpsblastCommandline - Reverse Position Specific BLAST
00026 - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST
00027 - NcbiblastformatterCommandline - Convert ASN.1 to other BLAST output formats
00028 
00029 For further details, see:
00030 
00031 Camacho et al. BLAST+: architecture and applications
00032 BMC Bioinformatics 2009, 10:421
00033 doi:10.1186/1471-2105-10-421
00034 """
00035 from Bio.Application import _Option, AbstractCommandline, _Switch
00036 
00037 class FastacmdCommandline(AbstractCommandline):
00038     """Create a commandline for the fasta program from NCBI (OBSOLETE).
00039 
00040     """
00041     def __init__(self, cmd="fastacmd", **kwargs):
00042         self.parameters = [
00043            _Option(["-d", "database"],
00044                    "The database to retrieve from.",
00045                    is_required=True,
00046                    equate=False),
00047            _Option(["-s", "search_string"],
00048                    "The id to search for.",
00049                    is_required=True,
00050                    equate=False)
00051           ]
00052         AbstractCommandline.__init__(self, cmd, **kwargs)
00053 
00054 
00055 class _BlastCommandLine(AbstractCommandline):
00056     """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE).
00057 
00058     This is provided for subclassing, it deals with shared options
00059     common to all the BLAST tools (blastall, rpsblast, blastpgp).
00060     """
00061     def __init__(self, cmd=None, **kwargs):
00062         assert cmd is not None
00063         extra_parameters = [
00064            _Switch(["--help", "help"],
00065                     "Print USAGE, DESCRIPTION and ARGUMENTS description;  ignore other arguments."),
00066            _Option(["-d", "database"],
00067                    "The database to BLAST against.",
00068                    is_required=True,
00069                    equate=False),
00070            _Option(["-i", "infile"],
00071                    "The sequence to search with.",
00072                    filename=True,
00073                    is_required=True,
00074                    equate=False),
00075            _Option(["-e", "expectation"], 
00076                    "Expectation value cutoff.",
00077                    equate=False),
00078            _Option(["-m", "align_view"], 
00079                    "Alignment view.  Integer 0-11.  Use 7 for XML output.",
00080                    equate=False),
00081            _Option(["-o", "align_outfile", "outfile"],
00082                    "Output file for alignment.",
00083                    filename=True,
00084                    equate=False),
00085            _Option(["-y", "xdrop_extension"], 
00086                    "Dropoff for blast extensions.",
00087                    equate=False),
00088            _Option(["-F", "filter"],
00089                    "Filter query sequence with SEG?  T/F",
00090                    equate=False),
00091            _Option(["-X", "xdrop"], 
00092                    "Dropoff value (bits) for gapped alignments.",
00093                    equate=False),
00094            _Option(["-I", "show_gi"], 
00095                    "Show GI's in deflines?  T/F",
00096                    equate=False),
00097            _Option(["-J", "believe_query"], 
00098                    "Believe the query defline?  T/F",
00099                    equate=False),
00100            _Option(["-Z", "xdrop_final"], 
00101                    "X dropoff for final gapped alignment.",
00102                    equate=False),
00103            _Option(["-z", "db_length"], 
00104                    "Effective database length.",
00105                    equate=False),
00106            _Option(["-O", "seqalign_file"],
00107                    "seqalign file to output.",
00108                    filename=True,
00109                    equate=False),
00110            _Option(["-v", "descriptions"], 
00111                    "Number of one-line descriptions.",
00112                    equate=False),
00113            _Option(["-b", "alignments"], 
00114                    "Number of alignments.",
00115                    equate=False),
00116            _Option(["-Y", "search_length"], 
00117                    "Effective length of search space (use zero for the "
00118                    "real size).",
00119                    equate=False),
00120            _Option(["-T", "html"], 
00121                    "Produce HTML output?  T/F",
00122                    equate=False),
00123            _Option(["-U", "case_filter"],
00124                    "Use lower case filtering of FASTA sequence? T/F",
00125                    equate=False),
00126            _Option(["-a", "nprocessors"],
00127                    "Number of processors to use.",
00128                    equate=False),
00129            _Option(["-g", "gapped"], 
00130                    "Whether to do a gapped alignment.  T/F",
00131                    equate=False),
00132         ]
00133         try:
00134             #Insert extra parameters - at the start just in case there
00135             #are any arguments which must come last:
00136             self.parameters = extra_parameters + self.parameters
00137         except AttributeError:
00138             #Should we raise an error?  The subclass should have set this up!
00139             self.parameters = extra_parameters
00140         AbstractCommandline.__init__(self, cmd, **kwargs)
00141 
00142     def _validate(self):
00143         if self.help:
00144             #Don't want to check the normally mandatory arguments like db
00145             return
00146         AbstractCommandline._validate(self)
00147 
00148 
00149 class _BlastAllOrPgpCommandLine(_BlastCommandLine):
00150     """Base Commandline object for NCBI BLAST wrappers (PRIVATE).
00151 
00152     This is provided for subclassing, it deals with shared options
00153     common to all the blastall and blastpgp tools (but not rpsblast).
00154     """
00155     def __init__(self, cmd=None, **kwargs):
00156         assert cmd is not None
00157         extra_parameters = [
00158            _Option(["-G", "gap_open"], 
00159                    "Gap open penalty",
00160                    equate=False),
00161            _Option(["-E", "gap_extend"], 
00162                     "Gap extension penalty",
00163                    equate=False),
00164            _Option(["-A", "window_size"],
00165                     "Multiple hits window size",
00166                    equate=False),
00167            _Option(["-f", "hit_extend"], 
00168                    "Threshold for extending hits.",
00169                    equate=False),
00170            _Option(["-K", "keep_hits"],
00171                    " Number of best hits from a region to keep.",
00172                    equate=False),
00173            _Option(["-W", "wordsize"], 
00174                    "Word size",
00175                    equate=False),
00176            _Option(["-P", "passes"],
00177                    "Hits/passes.  Integer 0-2. 0 for multiple hit, "
00178                    "1 for single hit (does not apply to blastn)",
00179                    equate=False),
00180         ]
00181         try:
00182             #Insert extra parameters - at the start just in case there
00183             #are any arguments which must come last:
00184             self.parameters = extra_parameters + self.parameters
00185         except AttributeError:
00186             #Should we raise an error?  The subclass should have set this up!
00187             self.parameters = extra_parameters
00188         _BlastCommandLine.__init__(self, cmd, **kwargs)
00189 
00190 
00191 class BlastallCommandline(_BlastAllOrPgpCommandLine):
00192     """Create a commandline for the blastall program from NCBI (OBSOLETE).
00193 
00194     With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
00195     are replacing blastall with separate tools blastn, blastp, blastx, tblastn
00196     and tblastx.
00197 
00198     Like blastall, this wrapper is now obsolete, and will be deprecated and
00199     removed in a future release of Biopython.
00200 
00201     >>> from Bio.Blast.Applications import BlastallCommandline
00202     >>> cline = BlastallCommandline(program="blastx", infile="m_cold.fasta",
00203     ...                             database="nr", expectation=0.001)
00204     >>> cline
00205     BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx')
00206     >>> print cline
00207     blastall -d nr -i m_cold.fasta -e 0.001 -p blastx
00208 
00209     You would typically run the command line with cline() or via the Python
00210     subprocess module, as described in the Biopython tutorial.
00211     """
00212     #TODO - This could use more checking for valid parameters to the program.
00213     def __init__(self, cmd="blastall",**kwargs):
00214         import warnings
00215         warnings.warn("Like blastall, this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
00216         self.parameters = [
00217             #Sorted in the same order as the output from blastall --help
00218             #which should make it easier to keep them up to date in future.
00219             #Note that some arguments are defined the the base clases (above).
00220            _Option(["-p", "program"],
00221                    "The blast program to use (e.g. blastp, blastn).",
00222                    is_required=True,
00223                    equate=False),
00224            _Option(["-q", "nuc_mismatch"], 
00225                    "Penalty for a nucleotide mismatch (blastn only).",
00226                    equate=False),
00227            _Option(["-r", "nuc_match"], 
00228                    "Reward for a nucleotide match (blastn only).",
00229                    equate=False),
00230            _Option(["-Q", "query_genetic_code"],
00231                    "Query Genetic code to use.",
00232                    equate=False),
00233            _Option(["-D", "db_genetic_code"],
00234                    "DB Genetic code (for tblast[nx] only).",
00235                    equate=False),
00236            _Option(["-M", "matrix"], 
00237                    "Matrix to use",
00238                    equate=False),
00239            _Option(["-S", "strands"], 
00240                    "Query strands to search against database (for blast[nx], "
00241                    "and tblastx). 3 is both, 1 is top, 2 is bottom.",
00242                    equate=False),
00243            _Option(["-l", "restrict_gi"],
00244                    "Restrict search of database to list of GI's.",
00245                    equate=False),
00246            _Option(["-R", "checkpoint"],
00247                    "PSI-TBLASTN checkpoint input file.",
00248                    filename=True,
00249                    equate=False),
00250            _Option(["-n", "megablast"],
00251                    "MegaBlast search T/F.",
00252                    equate=False),
00253            #The old name "region_length" is for consistency with our
00254            #old blastall function wrapper:
00255            _Option(["-L", "region_length", "range_restriction"],
00256                    """Location on query sequence (string format start,end).
00257 
00258                    In older versions of BLAST, -L set the length of region
00259                    used to judge hits (see -K parameter).""",
00260                    equate=False),
00261            _Option(["-w", "frame_shit_penalty"],
00262                    "Frame shift penalty (OOF algorithm for blastx).",
00263                    equate=False),
00264            _Option(["-t", "largest_intron"],
00265                    "Length of the largest intron allowed in a translated "
00266                    "nucleotide sequence when linking multiple distinct "
00267                    "alignments. (0 invokes default behavior; a negative value "
00268                    "disables linking.)",
00269                    equate=False),
00270            _Option(["-B", "num_concatenated_queries"],
00271                    "Number of concatenated queries, for blastn and tblastn.",
00272                    equate=False),
00273            _Option(["-V", "oldengine"],
00274                    "Force use of the legacy BLAST engine.",
00275                    equate=False),
00276            _Option(["-C", "composition_based"],
00277                    """Use composition-based statistics for tblastn:
00278                    D or d: default (equivalent to F)
00279                    0 or F or f: no composition-based statistics
00280                    1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001
00281                    2: Composition-based score adjustment as in Bioinformatics
00282                        21:902-911, 2005, conditioned on sequence properties
00283                    3: Composition-based score adjustment as in Bioinformatics
00284                        21:902-911, 2005, unconditionally
00285                    For programs other than tblastn, must either be absent or be
00286                    D, F or 0.""",
00287                    equate=False),
00288            _Option(["-s", "smith_waterman"],
00289                    "Compute locally optimal Smith-Waterman alignments (This "
00290                    "option is only available for gapped tblastn.) T/F",
00291                    equate=False),
00292         ] 
00293         _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
00294 
00295 
00296 class BlastpgpCommandline(_BlastAllOrPgpCommandLine):
00297     """Create a commandline for the blastpgp program from NCBI (OBSOLETE).
00298 
00299     With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
00300     are replacing blastpgp with a renamed tool psiblast. This module provides
00301     NcbipsiblastCommandline as a wrapper for the new tool psiblast.
00302     
00303     Like blastpgp (and blastall), this wrapper is now obsolete, and will be
00304     deprecated and removed in a future release of Biopython.
00305 
00306     >>> from Bio.Blast.Applications import BlastpgpCommandline
00307     >>> cline = BlastpgpCommandline(help=True)
00308     >>> cline
00309     BlastpgpCommandline(cmd='blastpgp', help=True)
00310     >>> print cline
00311     blastpgp --help
00312 
00313     You would typically run the command line with cline() or via the Python
00314     subprocess module, as described in the Biopython tutorial.
00315     """
00316     def __init__(self, cmd="blastpgp",**kwargs):
00317         import warnings
00318         warnings.warn("Like blastpgp (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
00319         self.parameters = [
00320            _Option(["-C", "checkpoint_outfile"],
00321                    "Output file for PSI-BLAST checkpointing.",
00322                    filename=True,
00323                    equate=False),
00324            _Option(["-R", "restart_infile"],
00325                    "Input file for PSI-BLAST restart.",
00326                    filename=True,
00327                    equate=False),
00328            _Option(["-k", "hit_infile"],
00329                    "Hit file for PHI-BLAST.",
00330                    filename=True,
00331                    equate=False),
00332            _Option(["-Q", "matrix_outfile"],
00333                    "Output file for PSI-BLAST matrix in ASCII.",
00334                    filename=True,
00335                    equate=False),
00336            _Option(["-B", "align_infile"],
00337                    "Input alignment file for PSI-BLAST restart.",
00338                    filename=True,
00339                    equate=False),
00340            _Option(["-S", "required_start"], 
00341                    "Start of required region in query.",
00342                    equate=False),
00343            _Option(["-H", "required_end"],
00344                    "End of required region in query.",
00345                    equate=False),
00346            _Option(["-j", "npasses"],
00347                    "Number of passes",
00348                    equate=False),
00349            _Option(["-N", "nbits_gapping"], 
00350                    "Number of bits to trigger gapping.",
00351                    equate=False),
00352            _Option(["-c", "pseudocounts"],
00353                    "Pseudocounts constants for multiple passes.",
00354                    equate=False),
00355            _Option(["-h", "model_threshold"], 
00356                    "E-value threshold to include in multipass model.",
00357                    equate=False),
00358            #Does the old name "region_length" for -L make sense?
00359            _Option(["-L", "region_length"], 
00360                    "Cost to decline alignment (disabled when zero).",
00361                    equate=False),
00362            _Option(["-M", "matrix"], 
00363                    "Matrix (string, default BLOSUM62).",
00364                    equate=False),
00365            _Option(["-p", "program"],
00366                    "The blast program to use (e.g blastpgp, patseedp or seedp).",
00367                    is_required=True,
00368                    equate=False),
00369         ] 
00370         _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
00371 
00372 
00373 class RpsBlastCommandline(_BlastCommandLine):
00374     """Create a commandline for the classic rpsblast program from NCBI (OBSOLETE).
00375 
00376     With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
00377     are replacing the old rpsblast with a new version of the same name plus a
00378     second tool rpstblastn, both taking different command line arguments. This
00379     module provides NcbirpsblastCommandline and NcbirpsblasntCommandline as
00380     wrappers for the new tools.
00381     
00382     Like the old rpsblast (and blastall), this wrapper is now obsolete, and will
00383     be deprecated and removed in a future release of Biopython.
00384 
00385     >>> from Bio.Blast.Applications import RpsBlastCommandline
00386     >>> cline = RpsBlastCommandline(help=True)
00387     >>> cline
00388     RpsBlastCommandline(cmd='rpsblast', help=True)
00389     >>> print cline
00390     rpsblast --help
00391 
00392     You would typically run the command line with cline() or via the Python
00393     subprocess module, as described in the Biopython tutorial.
00394     """
00395     def __init__(self, cmd="rpsblast",**kwargs):
00396         import warnings
00397         warnings.warn("Like the old rpsblast (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
00398         self.parameters = [
00399            #Note -N is also in blastpgp, but not blastall
00400            _Option(["-N", "nbits_gapping"], 
00401                    "Number of bits to trigger gapping.",
00402                    equate=False),
00403            #Note blastall and blastpgp wrappers have -P with name "passes".
00404            #If this is the same thing, we should be consistent!
00405            _Option(["-P", "multihit"],
00406                    "0 for multiple hit, 1 for single hit",
00407                    equate=False),
00408            _Option(["-l", "logfile"],
00409                    "Logfile name.",
00410                    filename=True,
00411                    equate=False),
00412            _Option(["-p", "protein"], 
00413                    "Query sequence is protein. T/F",
00414                    equate=False),
00415            _Option(["-L", "range_restriction"], 
00416                    "Location on query sequence (string format start,end).",
00417                    equate=False),
00418         ] 
00419         _BlastCommandLine.__init__(self, cmd, **kwargs)
00420 
00421 ##############################################################################
00422 # Legacy BLAST wrappers above, (new) BLAST+ wrappers below
00423 ##############################################################################
00424 
00425 class _NcbibaseblastCommandline(AbstractCommandline):
00426     """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
00427 
00428     This is provided for subclassing, it deals with shared options
00429     common to all the BLAST tools (blastn, rpsblast, rpsblast, etc
00430     AND blast_formatter).
00431     """
00432     def __init__(self, cmd=None, **kwargs):
00433         assert cmd is not None
00434         extra_parameters = [
00435             #Core:
00436             _Switch(["-h", "h"],
00437                     "Print USAGE and DESCRIPTION;  ignore other arguments."),
00438             _Switch(["-help", "help"],
00439                     "Print USAGE, DESCRIPTION and ARGUMENTS description; "
00440                     "ignore other arguments."),
00441             _Switch(["-version", "version"],
00442                     "Print version number;  ignore other arguments."),
00443             # Output configuration options
00444             _Option(["-out", "out"],
00445                     "Output file for alignment.",
00446                     filename=True,
00447                     equate=False),
00448             #Formatting options:
00449             _Option(["-outfmt", "outfmt"], 
00450                     "Alignment view.  Integer 0-11.  Use 5 for XML output "
00451                     "(differs from classic BLAST which used 7 for XML).",
00452                     equate=False),
00453                     #TODO - Document and test the column options
00454             _Switch(["-show_gis","show_gis"],
00455                     "Show NCBI GIs in deflines?"),
00456             _Option(["-num_descriptions","num_descriptions"],
00457                     """Number of database sequences to show one-line descriptions for.
00458 
00459                     Integer argument (at least zero). Default is 500.
00460                     See also num_alignments.""",
00461                     equate=False),
00462             _Option(["-num_alignments","num_alignments"],
00463                     """Number of database sequences to show num_alignments for.
00464 
00465                     Integer argument (at least zero). Default is 200.
00466                     See also num_alignments.""",
00467                     equate=False),
00468             _Switch(["-html", "html"],
00469                     "Produce HTML output? See also the outfmt option."),
00470             #Miscellaneous options
00471             _Switch(["-parse_deflines", "parse_deflines"],
00472                     "Should the query and subject defline(s) be parsed?"),
00473             ]
00474         try:
00475             #Insert extra parameters - at the start just in case there
00476             #are any arguments which must come last:
00477             self.parameters = extra_parameters + self.parameters
00478         except AttributeError:
00479             #Should we raise an error?  The subclass should have set this up!
00480             self.parameters = extra_parameters
00481         AbstractCommandline.__init__(self, cmd, **kwargs)
00482 
00483     def _validate_incompatibilities(self, incompatibles):
00484         """Used by the BLAST+ _validate method (PRIVATE)."""
00485         for a in incompatibles:
00486             if self._get_parameter(a):
00487                 for b in incompatibles[a]:
00488                     if self._get_parameter(b):
00489                         raise ValueError("Options %s and %s are incompatible." \
00490                                          % (a,b))
00491 
00492         
00493 class _NcbiblastCommandline(_NcbibaseblastCommandline):
00494     """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
00495 
00496     This is provided for subclassing, it deals with shared options
00497     common to all the BLAST tools (blastn, rpsblast, rpsblast, etc).
00498     """
00499     def __init__(self, cmd=None, **kwargs):
00500         assert cmd is not None
00501         extra_parameters = [
00502             #Input query options:
00503             _Option(["-query", "query"],
00504                     "The sequence to search with.",
00505                     filename=True,
00506                     equate=False), #Should this be required?
00507             _Option(["-query_loc", "query_loc"],
00508                     "Location on the query sequence (Format: start-stop)",
00509                     equate=False),
00510             #General search options:
00511             _Option(["-db", "db"],
00512                     "The database to BLAST against.",
00513                     equate=False),
00514             _Option(["-evalue", "evalue"], 
00515                     "Expectation value cutoff.",
00516                     equate=False),
00517             _Option(["-word_size","word_size"],
00518                     """Word size for wordfinder algorithm.
00519 
00520                     Integer. Minimum 2.""",
00521                     equate=False),
00522             #BLAST-2-Sequences options:
00523             # - see subclass
00524             #Formatting options:
00525             # - see baseclass
00526             #Query filtering options
00527             # TODO -soft_masking <Boolean>, is this a switch or an option?
00528             #_Switch(["-soft_masking", "soft_masking"],
00529             #        "Apply filtering locations as soft masks?"),
00530             _Switch(["-lcase_masking", "lcase_masking"],
00531                     "Use lower case filtering in query and subject sequence(s)?"),
00532             #Restrict search or results
00533             _Option(["-gilist", "gilist"],
00534                     """Restrict search of database to list of GI's.
00535  
00536                     Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc""",
00537                     filename=True,
00538                     equate=False),
00539             _Option(["-negative_gilist", "negative_gilist"],
00540                     """Restrict search of database to everything except the listed GIs.
00541  
00542                     Incompatible with: gilist, seqidlist, remote, subject, subject_loc""",
00543                     filename=True,
00544                     equate=False),
00545             _Option(["-seqidlist", "seqidlist"],
00546                     """Restrict search of database to list of SeqID's.
00547  
00548                     Incompatible with: gilist, negative_gilist, remote, subject, subject_loc""",
00549                     filename=True,
00550                     equate=False),
00551             _Option(["-entrez_query", "entrez_query"],
00552                     "Restrict search with the given Entrez query (requires remote).",
00553                     equate=False),
00554             _Option(["-max_target_seqs", "max_target_seqs"],
00555                     """Maximum number of aligned sequences to keep.
00556 
00557                     Integer argument (at least one).""",
00558                     equate=False),
00559             #Statistical options
00560             _Option(["-dbsize", "dbsize"],
00561                     "Effective length of the database (integer)",
00562                     equate=False),
00563             _Option(["-searchsp", "searchsp"],
00564                     "Effective length of the search space (integer)",
00565                     equate=False),
00566             _Option(["-max_hsps_per_subject", "max_hsps_per_subject"],
00567                     "Override maximum number of HSPs per subject to save for ungapped searches (integer)",
00568                     equate=False),
00569             #Extension options
00570             _Option(["-xdrop_ungap", "xdrop_ungap"],
00571                     "X-dropoff value (in bits) for ungapped extensions. Float.",
00572                     equate=False),
00573             _Option(["-xdrop_gap", "xdrop_gap"],
00574                     "X-dropoff value (in bits) for preliminary gapped extensions. Float.",
00575                     equate=False),
00576             _Option(["-xdrop_gap_final", "xdrop_gap_final"],
00577                     "X-dropoff value (in bits) for final gapped alignment. Float.",
00578                     equate=False),
00579             _Option(["-window_size", "window_size"],
00580                     "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.",
00581                     equate=False),
00582             # Search strategy options
00583             _Option(["-import_search_strategy", "import_search_strategy"],
00584                     """Search strategy to use.
00585 
00586                     Incompatible with: export_search_strategy""",
00587                     filename=True,
00588                     equate=False),
00589             _Option(["-export_search_strategy", "export_search_strategy"],
00590                     """File name to record the search strategy used.
00591 
00592                     Incompatible with: import_search_strategy""",
00593                     filename=True,
00594                     equate=False),
00595             #Miscellaneous options
00596             _Option(["-num_threads", "num_threads"],
00597                     """Number of threads to use in the BLAST search.
00598 
00599                     Integer of at least one. Default is one.
00600                     Incompatible with: remote""",
00601                     equate=False),
00602             _Switch(["-remote", "remote"],
00603                     """Execute search remotely?
00604 
00605                     Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""),
00606             ]
00607         try:
00608             #Insert extra parameters - at the start just in case there
00609             #are any arguments which must come last:
00610             self.parameters = extra_parameters + self.parameters
00611         except AttributeError:
00612             #Should we raise an error?  The subclass should have set this up!
00613             self.parameters = extra_parameters
00614         _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
00615 
00616     def _validate(self):
00617         incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"],
00618                          "import_search_strategy" : ["export_search_strategy"],
00619                          "gilist":["negative_gilist"],
00620                          "seqidlist":["gilist", "negative_gilist", "remote"]}
00621         self._validate_incompatibilities(incompatibles)
00622         if self.entrez_query and not self.remote :
00623             raise ValueError("Option entrez_query requires remote option.")
00624         AbstractCommandline._validate(self)
00625 
00626 
00627 class _Ncbiblast2SeqCommandline(_NcbiblastCommandline):
00628     """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
00629 
00630     This is provided for subclassing, it deals with shared options
00631     common to all the BLAST tools supporting two-sequence BLAST
00632     (blastn, psiblast, etc) but not rpsblast or rpstblastn.
00633     """
00634     def __init__(self, cmd=None, **kwargs):
00635         assert cmd is not None
00636         extra_parameters = [
00637             #General search options:
00638             _Option(["-gapopen", "gapopen"],
00639                     "Cost to open a gap (integer).",
00640                     equate=False),
00641             _Option(["-gapextend", "gapextend"],
00642                     "Cost to extend a gap (integer).",
00643                     equate=False),
00644             #BLAST-2-Sequences options:
00645             _Option(["-subject", "subject"],
00646                     """Subject sequence(s) to search.
00647 
00648                     Incompatible with: db, gilist, negative_gilist.
00649                     See also subject_loc.""",
00650                     filename=True,
00651                     equate=False),
00652             _Option(["-subject_loc", "subject_loc"],
00653                     """Location on the subject sequence (Format: start-stop)
00654 
00655                     Incompatible with: db, gilist, seqidlist, negative_gilist,
00656                     db_soft_mask, db_hard_mask, remote.
00657                     
00658                     See also subject.""",
00659                     equate=False),
00660             #Restrict search or results:
00661             _Option(["-culling_limit", "culling_limit"],
00662                     """Hit culling limit (integer).
00663 
00664                     If the query range of a hit is enveloped by that of at
00665                     least this many higher-scoring hits, delete the hit.
00666 
00667                     Incompatible with: best_hit_overhang, best_hit_score_edge.
00668                     """,
00669                     equate=False),
00670             _Option(["-best_hit_overhang", "best_hit_overhang"],
00671                     """Best Hit algorithm overhang value (recommended value: 0.1)
00672 
00673                     Float between 0.0 and 0.5 inclusive.
00674 
00675                     Incompatible with: culling_limit.""",
00676                     equate=False),
00677             _Option(["-best_hit_score_edge", "best_hit_score_edge"],
00678                     """Best Hit algorithm score edge value (recommended value: 0.1)
00679 
00680                     Float between 0.0 and 0.5 inclusive.
00681 
00682                     Incompatible with: culling_limit.""",
00683                     equate=False),
00684             ]
00685         try:
00686             #Insert extra parameters - at the start just in case there
00687             #are any arguments which must come last:
00688             self.parameters = extra_parameters + self.parameters
00689         except AttributeError:
00690             #Should we raise an error?  The subclass should have set this up!
00691             self.parameters = extra_parameters
00692         _NcbiblastCommandline.__init__(self, cmd, **kwargs)
00693 
00694 
00695     def _validate(self):
00696         incompatibles = {"subject_loc":["db", "gilist", "negative_gilist", "seqidlist", "remote"],
00697                          "culling_limit":["best_hit_overhang","best_hit_score_edge"],
00698                          "subject":["db", "gilist", "negative_gilist", "seqidlist"]}
00699         self._validate_incompatibilities(incompatibles)
00700         _NcbiblastCommandline._validate(self)
00701 
00702 
00703 class _NcbiblastMain2SeqCommandline(_Ncbiblast2SeqCommandline):
00704     """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
00705 
00706     This is provided for subclassing, it deals with shared options
00707     common to the main BLAST tools blastp, blastn, blastx, tblastx, tblastn
00708     but not psiblast, rpsblast or rpstblastn.
00709     """
00710     def __init__(self, cmd=None, **kwargs):
00711         assert cmd is not None
00712         extra_parameters = [
00713             #Restrict search or results:
00714             _Option(["-db_soft_mask", "db_soft_mask"],
00715                     """Filtering algorithm for soft masking (integer).
00716 
00717                     Filtering algorithm ID to apply to the BLAST database as soft masking.
00718 
00719                     Incompatible with: db_hard_mask, subject, subject_loc""",
00720                     equate=False),
00721             _Option(["-db_hard_mask", "db_hard_mask"],
00722                     """Filtering algorithm for hard masking (integer).
00723 
00724                     Filtering algorithm ID to apply to the BLAST database as hard masking.
00725 
00726                     Incompatible with: db_soft_mask, subject, subject_loc""",
00727                     equate=False),
00728             ]
00729         try:
00730             #Insert extra parameters - at the start just in case there
00731             #are any arguments which must come last:
00732             self.parameters = extra_parameters + self.parameters
00733         except AttributeError:
00734             #Should we raise an error?  The subclass should have set this up!
00735             self.parameters = extra_parameters
00736         _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
00737 
00738     def _validate(self):
00739         incompatibles = {"db_soft_mask":["db_hard_mask", "subject", "subject_loc"],
00740                          "db_hard_mask":["db_soft_mask", "subject", "subject_loc"]}
00741         self._validate_incompatibilities(incompatibles)
00742         _Ncbiblast2SeqCommandline._validate(self)
00743 
00744 class NcbiblastpCommandline(_NcbiblastMain2SeqCommandline):
00745     """Create a commandline for the NCBI BLAST+ program blastp (for proteins).
00746 
00747     With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
00748     replaced the old blastall tool with separate tools for each of the searches.
00749     This wrapper therefore replaces BlastallCommandline with option -p blastp.
00750 
00751     >>> from Bio.Blast.Applications import NcbiblastpCommandline
00752     >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr",
00753     ...                               evalue=0.001, remote=True, ungapped=True)
00754     >>> cline
00755     NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True)
00756     >>> print cline
00757     blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped
00758 
00759     You would typically run the command line with cline() or via the Python
00760     subprocess module, as described in the Biopython tutorial.
00761     """
00762     def __init__(self, cmd="blastp", **kwargs):
00763         self.parameters = [
00764             #General search options:
00765             _Option(["-task", "task"],
00766                     "Task to execute (string, blastp (default) or blastp-short).",
00767                     checker_function=lambda value : value in ["blastp",
00768                                                               "blastp-short"],
00769                     equate=False),
00770             _Option(["-matrix", "matrix"],
00771                     "Scoring matrix name (default BLOSUM62)."),
00772             _Option(["-threshold", "threshold"],
00773                     "Minimum word score such that the word is added to the "
00774                     "BLAST lookup table (float)",
00775                     equate=False),
00776             _Option(["-comp_based_stats", "comp_based_stats"],
00777                     """Use composition-based statistics (string, default 2, i.e. True).
00778 
00779                     0, F or f: no composition-based statistics
00780                     2, T or t, D or d : Composition-based score adjustment as in
00781                     Bioinformatics 21:902-911, 2005, conditioned on sequence properties
00782 
00783                     Note that tblastn also supports values of 1 and 3.""",
00784                     checker_function=lambda value : value in "0Ft2TtDd",
00785                     equate=False),
00786             #Query filtering options:
00787             _Option(["-seg", "seg"],
00788                     """Filter query sequence with SEG (string).
00789 
00790                     Format: "yes", "window locut hicut", or "no" to disable.
00791                     Default is "12 2.2 2.5""",
00792                     equate=False),
00793             #Extension options:
00794             _Switch(["-ungapped", "ungapped"],
00795                     "Perform ungapped alignment only?"),
00796             #Miscellaneous options:
00797             _Switch(["-use_sw_tback", "use_sw_tback"],
00798                     "Compute locally optimal Smith-Waterman alignments?"),
00799             ]
00800         _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
00801 
00802 
00803 class NcbiblastnCommandline(_NcbiblastMain2SeqCommandline):
00804     """Wrapper for the NCBI BLAST+ program blastn (for nucleotides).
00805 
00806     With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
00807     replaced the old blastall tool with separate tools for each of the searches.
00808     This wrapper therefore replaces BlastallCommandline with option -p blastn.
00809 
00810     For example, to run a search against the "nt" nucleotide database using the
00811     FASTA nucleotide file "m_code.fasta" as the query, with an expectation value
00812     cut off of 0.001, saving the output to a file in XML format:
00813 
00814     >>> from Bio.Blast.Applications import NcbiblastnCommandline
00815     >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus",
00816     ...                               evalue=0.001, out="m_cold.xml", outfmt=5)
00817     >>> cline
00818     NcbiblastnCommandline(cmd='blastn', out='m_cold.xml', outfmt=5, query='m_cold.fasta', db='nt', evalue=0.001, strand='plus')
00819     >>> print cline
00820     blastn -out m_cold.xml -outfmt 5 -query m_cold.fasta -db nt -evalue 0.001 -strand plus
00821 
00822     You would typically run the command line with cline() or via the Python
00823     subprocess module, as described in the Biopython tutorial.
00824     """
00825     def __init__(self, cmd="blastn", **kwargs):
00826         self.parameters = [
00827             #Input query options:
00828             _Option(["-strand", "strand"],
00829                     """Query strand(s) to search against database/subject.
00830 
00831                     Values allowed are "both" (default), "minus", "plus".""",
00832                     checker_function=lambda value : value in ["both",
00833                                                               "minus",
00834                                                               "plus"],
00835                     equate=False),
00836             #General search options:
00837             _Option(["-task", "task"],
00838                     """Task to execute (string, default 'megablast')
00839 
00840                     Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast'
00841                     (the default), or 'vecscreen'.""",
00842                     checker_function=lambda value : value in ['blastn',
00843                                                               'blastn-short',
00844                                                               'dc-megablast',
00845                                                               'megablast',
00846                                                               'vecscreen'],
00847                     equate=False),
00848             _Option(["-penalty", "penalty"],
00849                     "Penalty for a nucleotide mismatch (integer, at most zero).",
00850                     equate=False),
00851             _Option(["-reward", "reward"],
00852                     "Reward for a nucleotide match (integer, at least zero).",
00853                     equate=False),
00854             #TODO - Does this need an argument or is it a switch?
00855             #_Option(["-use_index", "use_index"],
00856             #        "Use MegaBLAST database index (boolean).",
00857             #        equate=False),
00858             _Option(["-index_name", "index_name"],
00859                     "MegaBLAST database index name.",
00860                     equate=False),
00861             #Query filtering options:
00862             _Option(["-dust", "dust"],
00863                     """Filter query sequence with DUST (string).
00864 
00865                     Format: 'yes', 'level window linker', or 'no' to disable.
00866                     Default = '20 64 1'.
00867                     """,
00868                     equate=False),
00869             _Option(["-filtering_db", "filtering_db"],
00870                     "BLAST database containing filtering elements (i.e. repeats).",
00871                     equate=False),
00872             _Option(["-window_masker_taxid", "window_masker_taxid"],
00873                     "Enable WindowMasker filtering using a Taxonomic ID (integer).",
00874                     equate=False),
00875             _Option(["-window_masker_db", "window_masker_db"],
00876                     "Enable WindowMasker filtering using this repeats database (string).",
00877                     equate=False),
00878             #Restrict search or results:
00879             _Option(["-perc_identity", "perc_identity"],
00880                     "Percent identity (real, 0 to 100 inclusive).",
00881                     equate=False),
00882             #Discontiguous MegaBLAST options
00883             _Option(["-template_type", "template_type"],
00884                     """Discontiguous MegaBLAST template type (string).
00885 
00886                     Allowed values: 'coding', 'coding_and_optimal' or 'optimal'
00887                     Requires: template_length.""",
00888                     checker_function=lambda value : value in ['coding', 'coding_and_optimal','optimal'],
00889                     equate=False),
00890             _Option(["-template_length", "template_length"],
00891                     """Discontiguous MegaBLAST template length (integer).
00892 
00893                     Allowed values: 16, 18, 21
00894                     
00895                     Requires: template_type.""",
00896                     checker_function=lambda value : value in [16,18,21,'16','18','21'],
00897                     equate=False),
00898             #Extension options:
00899             _Switch(["-no_greedy", "no_greedy"],
00900                     "Use non-greedy dynamic programming extension"),
00901             _Option(["-min_raw_gapped_score", "min_raw_gapped_score"],
00902                     "Minimum raw gapped score to keep an alignment in the "
00903                     "preliminary gapped and traceback stages (integer).",
00904                     equate=False),
00905             _Switch(["-ungapped", "ungapped"],
00906                     "Perform ungapped alignment only?"),
00907             _Option(["-off_diagonal_range", "off_diagonal_range"],
00908                     """Number of off-diagonals to search for the 2nd hit (integer).
00909                     
00910                     Expects a positive integer, or 0 (default) to turn off.
00911                     
00912                     Added in BLAST 2.2.23+
00913                     """,
00914                     equate=False),
00915             ]
00916         _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
00917 
00918     def _validate(self):
00919         if (self.template_type and not self.template_length) \
00920         or (self.template_length and not self.template_type) :
00921             raise ValueError("Options template_type and template_type require each other.")
00922         _NcbiblastMain2SeqCommandline._validate(self)
00923 
00924 
00925 class NcbiblastxCommandline(_NcbiblastMain2SeqCommandline):
00926     """Wrapper for the NCBI BLAST+ program blastx (nucleotide query, protein database).
00927 
00928     With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
00929     replaced the old blastall tool with separate tools for each of the searches.
00930     This wrapper therefore replaces BlastallCommandline with option -p blastx.
00931 
00932     >>> from Bio.Blast.Applications import NcbiblastxCommandline
00933     >>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001)
00934     >>> cline
00935     NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001)
00936     >>> print cline
00937     blastx -query m_cold.fasta -db nr -evalue 0.001
00938 
00939     You would typically run the command line with cline() or via the Python
00940     subprocess module, as described in the Biopython tutorial.
00941     """
00942     def __init__(self, cmd="blastx", **kwargs):
00943         self.parameters = [
00944             #Input query options:
00945             _Option(["-strand", "strand"],
00946                     """Query strand(s) to search against database/subject.
00947 
00948                     Values allowed are "both" (default), "minus", "plus".""",
00949                     checker_function=lambda value : value in ["both", "minus", "plus"],
00950                     equate=False),
00951             #Input query options:
00952             _Option(["-query_gencode", "query_gencode"],
00953                     """Genetic code to use to translate query
00954 
00955                     Integer. Default is one.""",
00956                     equate=False),
00957             #General search options:
00958             _Option(["-frame_shift_penalty", "frame_shift_penalty"],
00959                     "Frame shift penalty (integer, at least 1, default ignored).",
00960                     equate=False),
00961             _Option(["-max_intron_length", "max_intron_length"],
00962                     """Maximum intron length (integer).
00963 
00964                     Length of the largest intron allowed in a translated nucleotide
00965                     sequence when linking multiple distinct alignments (a negative
00966                     value disables linking). Default zero.""",
00967                     equate=False),
00968             _Option(["-matrix", "matrix"],
00969                     "Scoring matrix name (default BLOSUM62).",
00970                     equate=False),
00971             _Option(["-threshold", "threshold"],
00972                     "Minimum word score such that the word is added to the "
00973                     "BLAST lookup table (float)",
00974                     equate=False),
00975             #Query filtering options:
00976             _Option(["-seg", "seg"],
00977                     """Filter query sequence with SEG (string).
00978 
00979                     Format: "yes", "window locut hicut", or "no" to disable.
00980                     Default is "12 2.2 2.5""",
00981                     equate=False),
00982             #Extension options:
00983             _Switch(["-ungapped", "ungapped"],
00984                     "Perform ungapped alignment only?"),
00985             ]
00986         _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
00987 
00988 
00989 class NcbitblastnCommandline(_NcbiblastMain2SeqCommandline):
00990     """Wrapper for the NCBI BLAST+ program tblastn.
00991 
00992     With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
00993     replaced the old blastall tool with separate tools for each of the searches.
00994     This wrapper therefore replaces BlastallCommandline with option -p tblastn.
00995 
00996     >>> from Bio.Blast.Applications import NcbitblastnCommandline
00997     >>> cline = NcbitblastnCommandline(help=True)
00998     >>> cline
00999     NcbitblastnCommandline(cmd='tblastn', help=True)
01000     >>> print cline
01001     tblastn -help
01002 
01003     You would typically run the command line with cline() or via the Python
01004     subprocess module, as described in the Biopython tutorial.
01005     """
01006     def __init__(self, cmd="tblastn", **kwargs):
01007         self.parameters = [
01008             #General search options:
01009             _Option(["-db_gencode", "db_gencode"],
01010                     """Genetic code to use to translate query
01011 
01012                     Integer. Default is one.""",
01013                     equate=False),
01014             _Option(["-frame_shift_penalty", "frame_shift_penalty"],
01015                     "Frame shift penalty (integer, at least 1, default ignored).",
01016                     equate=False),
01017             _Option(["-max_intron_length", "max_intron_length"],
01018                     """Maximum intron length (integer).
01019 
01020                     Length of the largest intron allowed in a translated nucleotide
01021                     sequence when linking multiple distinct alignments (a negative
01022                     value disables linking). Default zero.""",
01023                     equate=False),
01024             _Option(["-matrix", "matrix"],
01025                     "Scoring matrix name (default BLOSUM62).",
01026                     equate=False),
01027             _Option(["-threshold", "threshold"],
01028                     "Minimum word score such that the word is added to the BLAST lookup table (float)",
01029                     equate=False),
01030             _Option(["-comp_based_stats", "comp_based_stats"],
01031                     """Use composition-based statistics (string, default 2, i.e. True).
01032 
01033                     0, F or f: no composition-based statistics
01034                     1: Composition-based statistics as in NAR 29:2994-3005, 2001
01035                     2, T or t, D or d : Composition-based score adjustment as in
01036                        Bioinformatics 21:902-911, 2005, conditioned on sequence properties
01037                     3: Composition-based score adjustment as in Bioinformatics 21:902-911,
01038                        2005, unconditionally
01039 
01040                     Note that only tblastn supports values of 1 and 3.""",
01041                     checker_function=lambda value : value in "0Ft12TtDd3",
01042                     equate=False),
01043             #Query filtering options:
01044             _Option(["-seg", "seg"],
01045                     """Filter query sequence with SEG (string).
01046 
01047                     Format: "yes", "window locut hicut", or "no" to disable.
01048                     Default is "12 2.2 2.5""",
01049                     equate=False),
01050             #Extension options:
01051             _Switch(["-ungapped", "ungapped"],
01052                     "Perform ungapped alignment only?"),
01053             #Miscellaneous options:
01054             _Switch(["-use_sw_tback", "use_sw_tback"],
01055                     "Compute locally optimal Smith-Waterman alignments?"),
01056             #PSI-TBLASTN options:
01057             _Option(["-in_pssm", "in_pssm"],
01058                     """PSI-BLAST checkpoint file
01059 
01060                     Incompatible with: remote, query""",
01061                     filename=True,
01062                     equate=False),
01063             ]
01064         _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
01065 
01066 
01067 class NcbitblastxCommandline(_NcbiblastMain2SeqCommandline):
01068     """Wrapper for the NCBI BLAST+ program tblastx.
01069 
01070     With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
01071     replaced the old blastall tool with separate tools for each of the searches.
01072     This wrapper therefore replaces BlastallCommandline with option -p tblastx.
01073 
01074     >>> from Bio.Blast.Applications import NcbitblastxCommandline
01075     >>> cline = NcbitblastxCommandline(help=True)
01076     >>> cline
01077     NcbitblastxCommandline(cmd='tblastx', help=True)
01078     >>> print cline
01079     tblastx -help
01080 
01081     You would typically run the command line with cline() or via the Python
01082     subprocess module, as described in the Biopython tutorial.
01083     """
01084     def __init__(self, cmd="tblastx", **kwargs):
01085         self.parameters = [
01086             #Input query options:
01087             _Option(["-strand", "strand"],
01088                     """Query strand(s) to search against database/subject.
01089 
01090                     Values allowed are "both" (default), "minus", "plus".""",
01091                     checker_function=lambda value : value in ["both", "minus", "plus"],
01092                     equate=False),
01093             #Input query options:
01094             _Option(["-query_gencode", "query_gencode"],
01095                     """Genetic code to use to translate query
01096 
01097                     Integer. Default is one.""",
01098                     equate=False),
01099             #General search options:
01100             _Option(["-db_gencode", "db_gencode"],
01101                     """Genetic code to use to translate query
01102 
01103                     Integer. Default is one.""",
01104                     equate=False),
01105             _Option(["-max_intron_length", "max_intron_length"],
01106                     """Maximum intron length (integer).
01107 
01108                     Length of the largest intron allowed in a translated nucleotide
01109                     sequence when linking multiple distinct alignments (a negative
01110                     value disables linking). Default zero.""",
01111                     equate=False),
01112             _Option(["-matrix", "matrix"],
01113                     "Scoring matrix name (default BLOSUM62).",
01114                     equate=False),
01115             _Option(["-threshold", "threshold"],
01116                     "Minimum word score such that the word is added to the "
01117                     "BLAST lookup table (float)",
01118                     equate=False),
01119             #Query filtering options:
01120             _Option(["-seg", "seg"],
01121                     """Filter query sequence with SEG (string).
01122 
01123                     Format: "yes", "window locut hicut", or "no" to disable.
01124                     Default is "12 2.2 2.5""",
01125                     equate=False),
01126             ]
01127         _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
01128 
01129 
01130 class NcbipsiblastCommandline(_Ncbiblast2SeqCommandline):
01131     """Wrapper for the NCBI BLAST+ program psiblast.
01132 
01133     With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
01134     replaced the old blastpgp tool with a similar tool psiblast. This wrapper
01135     therefore replaces BlastpgpCommandline, the wrapper for blastpgp.
01136 
01137     >>> from Bio.Blast.Applications import NcbipsiblastCommandline
01138     >>> cline = NcbipsiblastCommandline(help=True)
01139     >>> cline
01140     NcbipsiblastCommandline(cmd='psiblast', help=True)
01141     >>> print cline
01142     psiblast -help
01143 
01144     You would typically run the command line with cline() or via the Python
01145     subprocess module, as described in the Biopython tutorial.
01146     """
01147     def __init__(self, cmd="psiblast", **kwargs):
01148         self.parameters = [
01149             #General search options:
01150             _Option(["-matrix", "matrix"],
01151                     "Scoring matrix name (default BLOSUM62).",
01152                     equate=False),
01153             _Option(["-threshold", "threshold"],
01154                     "Minimum word score such that the word is added to the "
01155                     "BLAST lookup table (float)",
01156                     equate=False),
01157             _Option(["-comp_based_stats", "comp_based_stats"],
01158                     """Use composition-based statistics (string, default 2, i.e. True).
01159 
01160                     0, F or f: no composition-based statistics
01161                     2, T or t, D or d : Composition-based score adjustment
01162                     as in Bioinformatics 21:902-911, 2005, conditioned on
01163                     sequence properties
01164 
01165                     Note that tblastn also supports values of 1 and 3.""",
01166                     checker_function=lambda value : value in "0Ft2TtDd",
01167                     equate=False),
01168             #Query filtering options:
01169             _Option(["-seg", "seg"],
01170                     """Filter query sequence with SEG (string).
01171 
01172                     Format: "yes", "window locut hicut", or "no" to disable.
01173                     Default is "12 2.2 2.5""",
01174                     equate=False),
01175             #Extension options:
01176             _Option(["-gap_trigger", "gap_trigger"],
01177                     "Number of bits to trigger gapping (float, default 22)",
01178                     equate=False),
01179             #Miscellaneous options:
01180             _Switch(["-use_sw_tback", "use_sw_tback"],
01181                     "Compute locally optimal Smith-Waterman alignments?"),
01182             #PSI-BLAST options:
01183             _Option(["-num_iterations", "num_iterations"],
01184                     """Number of iterations to perform, integer
01185 
01186                     Integer of at least one. Default is one.
01187                     Incompatible with: remote""",
01188                     equate=False),
01189             _Option(["-out_pssm", "out_pssm"],
01190                     "File name to store checkpoint file",
01191                     filename=True,
01192                     equate=False),
01193             _Option(["-out_ascii_pssm", "out_ascii_pssm"],
01194                     "File name to store ASCII version of PSSM",
01195                     filename=True,
01196                     equate=False),
01197             _Option(["-in_msa", "in_msa"],
01198                     """File name of multiple sequence alignment to restart
01199                     PSI-BLAST
01200 
01201                     Incompatible with: in_pssm, query""",
01202                     filename=True,
01203                     equate=False),
01204             _Option(["-msa_master_idx", "msa_master_idx"],
01205                     """Index of sequence to use as master in MSA.
01206 
01207                     Index (1-based) of sequence to use as the master in the
01208                     multiple sequence alignment. If not specified, the first
01209                     sequence is used.""",
01210                     equate=False),
01211             _Option(["-in_pssm", "in_pssm"],
01212                     """PSI-BLAST checkpoint file
01213 
01214                     Incompatible with: in_msa, query, phi_pattern""",
01215                     filename=True,
01216                     equate=False),
01217             #PSSM engine options:
01218             _Option(["-pseudocount", "pseudocount"],
01219                     """Pseudo-count value used when constructing PSSM
01220 
01221                     Integer. Default is zero.""",
01222                     equate=False),
01223             _Option(["-inclusion_ethresh", "inclusion_ethresh"],
01224                     """E-value inclusion threshold for pairwise alignments
01225 
01226                     Float. Default is 0.002.""",
01227                     equate=False),
01228             #PHI-BLAST options:
01229             _Option(["-phi_pattern", "phi_pattern"],
01230                     """File name containing pattern to search
01231 
01232                     Incompatible with: in_pssm""",
01233                     filename=True,
01234                     equate=False),
01235             ]
01236         _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
01237 
01238     def _validate(self):
01239         incompatibles = {"num_iterations":["remote"],
01240                          "in_msa":["in_pssm", "query"],
01241                          "in_pssm":["in_msa","query","phi_pattern"]}
01242         self._validate_incompatibilities(incompatibles)
01243         _Ncbiblast2SeqCommandline._validate(self)
01244 
01245 
01246 class NcbirpsblastCommandline(_NcbiblastCommandline):
01247     """Wrapper for the NCBI BLAST+ program rpsblast.
01248 
01249     With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
01250     replaced the old rpsblast tool with a similar tool of the same name. This
01251     wrapper replaces RpsBlastCommandline, the wrapper for the old rpsblast.
01252 
01253     >>> from Bio.Blast.Applications import NcbirpsblastCommandline
01254     >>> cline = NcbirpsblastCommandline(help=True)
01255     >>> cline
01256     NcbirpsblastCommandline(cmd='rpsblast', help=True)
01257     >>> print cline
01258     rpsblast -help
01259 
01260     You would typically run the command line with cline() or via the Python
01261     subprocess module, as described in the Biopython tutorial.
01262     """
01263     def __init__(self, cmd="rpsblast", **kwargs):
01264         self.parameters = [
01265             #Query filtering options:
01266             _Option(["-seg", "seg"],
01267                     """Filter query sequence with SEG (string).
01268 
01269                     Format: "yes", "window locut hicut", or "no" to disable.
01270                     Default is "12 2.2 2.5""",
01271                     equate=False),
01272             #Restrict search or results:
01273             _Option(["-culling_limit", "culling_limit"],
01274                     """Hit culling limit (integer).
01275 
01276                     If the query range of a hit is enveloped by that of at
01277                     least this many higher-scoring hits, delete the hit.
01278 
01279                     Incompatible with: best_hit_overhang, best_hit_score_edge.
01280                     """,
01281                     equate=False),
01282             _Option(["-best_hit_overhang", "best_hit_overhang"],
01283                     """Best Hit algorithm overhang value (recommended value: 0.1)
01284 
01285                     Float between 0.0 and 0.5 inclusive.
01286 
01287                     Incompatible with: culling_limit.""",
01288                     equate=False),
01289             _Option(["-best_hit_score_edge", "best_hit_score_edge"],
01290                     """Best Hit algorithm score edge value (recommended value: 0.1)
01291 
01292                     Float between 0.0 and 0.5 inclusive.
01293 
01294                     Incompatible with: culling_limit.""",
01295                     equate=False),
01296             ]
01297         _NcbiblastCommandline.__init__(self, cmd, **kwargs)
01298 
01299     def _validate(self):
01300         incompatibles = {"culling_limit":["best_hit_overhang","best_hit_score_edge"]}
01301         self._validate_incompatibilities(incompatibles)
01302         _NcbiblastCommandline._validate(self)
01303 
01304 
01305 class NcbirpstblastnCommandline(_NcbiblastCommandline):
01306     """Wrapper for the NCBI BLAST+ program rpstblastn.
01307 
01308     With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
01309     replaced the old rpsblast tool with a similar tool of the same name, and a
01310     separate tool rpstblastn for Translated Reverse Position Specific BLAST.
01311 
01312     >>> from Bio.Blast.Applications import NcbirpstblastnCommandline
01313     >>> cline = NcbirpstblastnCommandline(help=True)
01314     >>> cline
01315     NcbirpstblastnCommandline(cmd='rpstblastn', help=True)
01316     >>> print cline
01317     rpstblastn -help
01318 
01319     You would typically run the command line with cline() or via the Python
01320     subprocess module, as described in the Biopython tutorial.
01321     """
01322     def __init__(self, cmd="rpstblastn", **kwargs):
01323         self.parameters = [
01324             #Input query options:
01325             _Option(["-strand", "strand"],
01326                     """Query strand(s) to search against database/subject.
01327 
01328                     Values allowed are "both" (default), "minus", "plus".""",
01329                     checker_function=lambda value : value in ["both",
01330                                                               "minus",
01331                                                               "plus"],
01332                     equate=False),
01333             #Input query options:
01334             _Option(["-query_gencode", "query_gencode"],
01335                     """Genetic code to use to translate query
01336 
01337                     Integer. Default is one.""",
01338                     equate=False),
01339             #Query filtering options:
01340             _Option(["-seg", "seg"],
01341                     """Filter query sequence with SEG (string).
01342 
01343                     Format: "yes", "window locut hicut", or "no" to disable.
01344                     Default is "12 2.2 2.5""",
01345                     equate=False),
01346             #Extension options:
01347             _Switch(["-ungapped", "ungapped"],
01348                     "Perform ungapped alignment only?"),
01349             ]
01350         _NcbiblastCommandline.__init__(self, cmd, **kwargs)
01351 
01352 
01353 class NcbiblastformatterCommandline(_NcbibaseblastCommandline):
01354     """Wrapper for the NCBI BLAST+ program blast_formatter.
01355 
01356     With the release of BLAST 2.2.24+ (i.e. the BLAST suite rewritten in C++
01357     instead of C), the NCBI added the ASN.1 output format option to all the
01358     search tools, and extended the blast_formatter to support this as input.
01359 
01360     The blast_formatter command allows you to convert the ASN.1 output into
01361     the other output formats (XML, tabular, plain text, HTML).
01362     
01363     >>> from Bio.Blast.Applications import NcbiblastformatterCommandline
01364     >>> cline = NcbiblastformatterCommandline(archive="example.asn", outfmt=5, out="example.xml")
01365     >>> cline
01366     NcbiblastformatterCommandline(cmd='blast_formatter', out='example.xml', outfmt=5, archive='example.asn')
01367     >>> print cline
01368     blast_formatter -out example.xml -outfmt 5 -archive example.asn
01369 
01370     You would typically run the command line with cline() or via the Python
01371     subprocess module, as described in the Biopython tutorial.
01372 
01373     Note that this wrapper is for the version of blast_formatter from BLAST
01374     2.2.24+ (or later) which is when the NCBI first announced the inclusion
01375     this tool. There was actually an early version in BLAST 2.2.23+ (and
01376     possibly in older releases) but this did not have the -archive option
01377     (instead -rid is a mandatory argument), and is not supported by this
01378     wrapper.
01379     """
01380     def __init__(self, cmd="blast_formatter", **kwargs):
01381         self.parameters = [
01382             # Input options
01383             _Option(["-rid", "rid"],
01384                     "BLAST Request ID (RID), not compatiable with archive arg",
01385                     equate=False),
01386             _Option(["-archive", "archive"],
01387                     "Archive file of results, not compatiable with rid arg.",
01388                     filename=True,
01389                     equate=False),
01390             # Restrict search or results
01391             _Option(["-max_target_seqs", "max_target_seqs"],
01392                     "Maximum number of aligned sequences to keep",
01393                     checker_function=lambda value: value >= 1,
01394                     equate=False),
01395             ]
01396         _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
01397 
01398     def _validate(self):
01399         incompatibles = {"rid":["archive"]}
01400         self._validate_incompatibilities(incompatibles)
01401         _NcbibaseblastCommandline._validate(self)
01402         
01403 
01404 def _test():
01405     """Run the Bio.Blast.Applications module's doctests."""
01406     import doctest
01407     doctest.testmod(verbose=1)
01408 
01409 if __name__ == "__main__":
01410     #Run the doctests
01411     _test()