Back to index

python-biopython  1.60
_ClustalOmega.py
Go to the documentation of this file.
00001 # Copyright 2011 by Andreas Wilm. All rights reserved.
00002 # Based on ClustalW wrapper copyright 2009 by Cymon J. Cox.
00003 #
00004 # Wrapper for Clustal Omega by Andreas Wilm (2011). Used _Clustalw.py
00005 # as template.
00006 #
00007 # This code is part of the Biopython distribution and governed by its
00008 # license.  Please see the LICENSE file that should have been included
00009 # as part of this package.
00010 """Command line wrapper for the multiple alignment program Clustal Omega.
00011 """
00012 
00013 from Bio.Application import _Option, _Switch, AbstractCommandline
00014 
00015 class ClustalOmegaCommandline(AbstractCommandline):
00016     """Command line wrapper for clustal omega
00017 
00018     http://www.clustal.org/omega
00019 
00020     Example:
00021 
00022     >>> from Bio.Align.Applications import ClustalOmegaCommandline
00023     >>> in_file = "unaligned.fasta"
00024     >>> out_file = "aligned.fasta"
00025     >>> clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True)
00026     >>> print clustalomega_cline
00027     clustalo -i unaligned.fasta -o aligned.fasta --auto -v
00028 
00029 
00030     You would typically run the command line with clustalomega_cline() or via
00031     the Python subprocess module, as described in the Biopython tutorial.
00032 
00033     Citation:
00034 
00035     Sievers F, Wilm A, Dineen D, Gibson TJ, Karplus K, Li W, Lopez R,
00036     McWilliam H, Remmert R, Soding J, Thompson JD Higgins DG
00037     Fast, scalable generation of high-quality protein multiple
00038     sequence alignments using Clustal Omega.
00039     Molecular Systems Biology 2011; accepted. 
00040 
00041     Last checked against versions: 1.0.3
00042     """
00043     def __init__(self, cmd="clustalo", **kwargs):
00044         # order parameters in the same order as clustalo --help
00045         self.parameters = \
00046             [
00047             # Sequence Input
00048             _Option(["-i", "--in", "--infile", "infile"],
00049                     "Multiple sequence input",
00050                     filename=True,
00051                     equate=False),
00052             _Switch(["--dealign", "dealign"],
00053                     "Dealign input sequences"),
00054             _Option(["--hmm-in", "HMM input", "hmm_input"],
00055                     "HMM input files",
00056                     filename=True,
00057                     equate=False),
00058             _Option(["--profile1", "--p1", "profile1"],
00059                     "Pre-aligned multiple sequence file (aligned columns will be kept fix).",
00060                     filename=True,
00061                     equate=False),
00062             _Option(["--profile2", "--p2", "profile2"],
00063                     "Pre-aligned multiple sequence file (aligned columns will be kept fix).",
00064                     filename=True,
00065                     equate=False),
00066 
00067             # Clustering
00068             _Option(["--distmat-in", "distmat_in"],
00069                     "Pairwise distance matrix input file (skips distance computation).",
00070                     filename=True,
00071                     equate=False),
00072             _Option(["--distmat-out", "distmat_out"],
00073                     "Pairwise distance matrix output file.",
00074                     filename=True,
00075                     equate=False),
00076             _Option(["--guidetree-in", "guidetree_in"],
00077                     "Guide tree input file (skips distance computation and guide-tree clustering step).",
00078                     filename=True,
00079                     equate=False),
00080             _Option(["--guidetree-out", "guidetree_out"],
00081                     "Guide tree output file.",
00082                     filename=True,
00083                     equate=False),
00084             _Switch(["--full", "distmat_full"],
00085                     "Use full distance matrix for guide-tree calculation (might be slow; mBed is default)"),
00086             _Switch(["--full-iter", "distmat_full_iter"],
00087                     "Use full distance matrix for guide-tree calculation during iteration (might be slowish; mBed is default)"),
00088 
00089             # Alignment Output
00090             _Option(["-o", "--out", "--outfile", "outfile"],
00091                     "Multiple sequence alignment output file (default: stdout).",
00092                     filename=True,
00093                     equate=False),
00094             _Option(["--outfmt", "outfmt"],
00095                     "MSA output file format:"
00096                     " a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]"
00097                     " (default: fasta).",
00098                     equate=False,
00099                     checker_function=lambda x: x in ["a2m", "fa", "fasta", 
00100                                                      "clu", "clustal", 
00101                                                      "msf",
00102                                                      "phy", "phylip", 
00103                                                      "selex",
00104                                                      "st", "stockholm",
00105                                                      "vie", "vienna"]),
00106            # Iteration
00107             _Option(["--iterations", "--iter", "iterations"],
00108                     "Number of (combined guide-tree/HMM) iterations",
00109                     equate=False,
00110                     checker_function=lambda x: isinstance(x, int)),
00111             _Option(["--max-guidetree-iterations", "max_guidetree_iterations"],
00112                     "Maximum number of guidetree iterations",
00113                     equate=False,
00114                     checker_function=lambda x: isinstance(x, int)),
00115             _Option(["--max-hmm-iterations", "max_hmm_iterations"],
00116                     "Maximum number of HMM iterations",
00117                     equate=False,
00118                     checker_function=lambda x: isinstance(x, int)),
00119 
00120             # Limits (will exit early, if exceeded):
00121             _Option(["--maxnumseq", "maxnumseq"],
00122                     "Maximum allowed number of sequences",
00123                     equate=False,
00124                     checker_function=lambda x: isinstance(x, int)),
00125             _Option(["--maxseqlen", "maxseqlen"],
00126                     "Maximum allowed sequence length",
00127                     equate=False,
00128                     checker_function=lambda x: isinstance(x, int)),
00129 
00130             # Miscellaneous:
00131 
00132             _Switch(["--auto", "auto"],
00133                     "Set options automatically (might overwrite some of your options)"),
00134             _Option(["--threads", "threads"],
00135                     "Number of processors to use",
00136                     equate=False,
00137                     checker_function=lambda x: isinstance(x, int)),
00138             _Option(["-l", "--log", "log"],
00139                     "Log all non-essential output to this file.",
00140                     filename=True,
00141                     equate=False),
00142             _Switch(["-h", "--help", "help"],
00143                     "Outline the command line params."),
00144             _Switch(["-v", "--verbose", "verbose"],
00145                     "Verbose output"),
00146             _Switch(["--version", "version"],
00147                     "Print version information and exit"),
00148             _Switch(["--long-version", "long_version"],
00149                     "Print long version information and exit"),
00150             _Switch(["--force", "force"],
00151                     "Force file overwriting."),
00152 
00153             ]
00154         AbstractCommandline.__init__(self, cmd, **kwargs)
00155 
00156 def _test():
00157     """Run the module's doctests (PRIVATE)."""
00158     print "Runing ClustalW doctests..."
00159     import doctest
00160     doctest.testmod()
00161     print "Done"
00162 
00163 if __name__ == "__main__":
00164     _test()