Back to index

python-biopython  1.60
_Prank.py
Go to the documentation of this file.
00001 # Copyright 2009 by Cymon J. Cox.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 """Command line wrapper for the multiple alignment program PRANK.
00006 """
00007 
00008 __docformat__ = "epytext en" #Don't just use plain text in epydoc API pages!
00009 
00010 from Bio.Application import _Option, _Switch, AbstractCommandline
00011 
00012 class PrankCommandline(AbstractCommandline):
00013     """Command line wrapper for the multiple alignment program PRANK.
00014 
00015     http://www.ebi.ac.uk/goldman-srv/prank/prank/
00016 
00017     Example:
00018 
00019     To align a FASTA file (unaligned.fasta) with the output in aligned
00020     FASTA format with the output filename starting with "aligned" (you
00021     can't pick the filename explicitly), no tree ouput and no XML output,
00022     use:
00023 
00024     >>> from Bio.Align.Applications import PrankCommandline
00025     >>> prank_cline = PrankCommandline(d="unaligned.fasta",
00026     ...                                o="aligned", #prefix only!
00027     ...                                f=8, #FASTA output
00028     ...                                notree=True, noxml=True)
00029     >>> print prank_cline
00030     prank -d=unaligned.fasta -o=aligned -f=8 -noxml -notree
00031 
00032     You would typically run the command line with prank_cline() or via
00033     the Python subprocess module, as described in the Biopython tutorial.
00034 
00035     Citations:
00036 
00037     Loytynoja, A. and Goldman, N. 2005. An algorithm for progressive
00038     multiple alignment of sequences with insertions. Proceedings of
00039     the National Academy of Sciences, 102: 10557--10562.
00040 
00041     Loytynoja, A. and Goldman, N. 2008. Phylogeny-aware gap placement
00042     prevents errors in sequence alignment and evolutionary analysis.
00043     Science, 320: 1632.
00044 
00045     Last checked agains version: 081202
00046     """
00047     def __init__(self, cmd="prank", **kwargs):
00048         OUTPUT_FORMAT_VALUES = list(range(1,18))
00049         self.parameters = [
00050             ################## input/output parameters: ##################
00051             #-d=sequence_file
00052             _Option(["-d", "d"],
00053                     "Input filename",
00054                     filename=True,
00055                     is_required=True),
00056             #-t=tree_file [default: no tree, generate approximate NJ tree]
00057             _Option(["-t", "t"],"Input guide tree filename",
00058                     filename=True),
00059             #-tree="tree_string" [tree in newick format; in double quotes]
00060             _Option(["-tree", "tree"],
00061                     "Input guide tree as Newick string"),
00062             #-m=model_file [default: HKY2/WAG]
00063             _Option(["-m", "m"],
00064                     "User-defined alignment model filename. Default: "
00065                     "HKY2/WAG"),
00066             #-o=output_file [default: 'output']
00067             _Option(["-o", "o"],
00068                     "Output filenames prefix. Default: 'output'\n "
00069                     "Will write: output.?.fas (depending on requested "
00070                     "format), output.?.xml and output.?.dnd",
00071                     filename=True),
00072             #-f=output_format [default: 8]
00073             _Option(["-f", "f"],
00074                     "Output alignment format. Default: 8 FASTA\n"
00075                     "Option are:\n"
00076                     "1. IG/Stanford       8. Pearson/Fasta\n"
00077                     "2. GenBank/GB        11. Phylip3.2\n"
00078                     "3. NBRF              12. Phylip\n"
00079                     "4. EMBL              14. PIR/CODATA\n"
00080                     "6. DNAStrider        15. MSF\n"
00081                     "7. Fitch             17. PAUP/NEXUS",
00082                     checker_function=lambda x: x in OUTPUT_FORMAT_VALUES),
00083             _Switch(["-noxml", "noxml"],
00084                     "Do not output XML files"),
00085             _Switch(["-notree", "notree"],
00086                     "Do not output dnd tree files"),
00087             _Switch(["-shortnames", "shortnames"],
00088                     "Truncate names at first space"),
00089             _Switch(["-quiet", "quiet"],
00090                     "Reduce verbosity"),
00091             ####################### model parameters: ######################
00092             #+F [force insertions to be always skipped]
00093             #-F [equivalent]
00094             _Switch(["-F", "+F", "F"],
00095                     "Force insertions to be always skipped: same as +F"),
00096             #-dots [show insertion gaps as dots]
00097             _Switch(["-dots", "dots"],
00098                     "Show insertion gaps as dots"),
00099             #-gaprate=# [gap opening rate; default: dna 0.025 / prot 0.0025]
00100             _Option(["-gaprate", "gaprate"],
00101                     "Gap opening rate. Default: dna 0.025 prot 0.0025",
00102                     checker_function=lambda x: isinstance(x, float)), 
00103             #-gapext=# [gap extension probability; default: dna 0.5 / prot 0.5]
00104             _Option(["-gapext", "gapext"],
00105                     "Gap extension probability. Default: dna 0.5 "
00106                     "/ prot 0.5",
00107                     checker_function=lambda x: isinstance(x, float)),
00108             #-dnafreqs=#,#,#,# [ACGT; default: empirical]
00109             _Option(["-dnafreqs", "dnafreqs"],
00110                     "DNA frequencies - 'A,C,G,T'. eg '25,25,25,25' as a quote "
00111                     "surrounded string value. Default: empirical",
00112                     checker_function=lambda x: isinstance(x, bytes)), 
00113             #-kappa=# [ts/tv rate ratio; default:2]
00114             _Option(["-kappa", "kappa"],
00115                     "Transition/transversion ratio. Default: 2",
00116                     checker_function=lambda x: isinstance(x, int)), 
00117             #-rho=# [pur/pyr rate ratio; default:1]
00118             _Option(["-rho", "rho"],
00119                     "Purine/pyrimidine ratio. Default: 1",
00120                     checker_function=lambda x: isinstance(x, int)), 
00121             #-codon [for DNA: use empirical codon model]
00122             #Assuming this is an input file as in -m
00123             _Option(["-codon", "codon"],
00124                     "Codon model filename. Default: empirical codon model"),
00125             #-termgap [penalise terminal gaps normally]
00126             _Switch(["-termgap", "termgap"],
00127                     "Penalise terminal gaps normally"),
00128             ################ other parameters: ################################
00129             #-nopost [do not compute posterior support; default: compute]
00130             _Switch(["-nopost", "nopost"],
00131                     "Do not compute posterior support. Default: compute"),
00132             #-pwdist=# [expected pairwise distance for computing guidetree;
00133             #default: dna 0.25 / prot 0.5]
00134             _Option(["-pwdist", "pwdist"],
00135                     "Expected pairwise distance for computing guidetree. "
00136                     "Default: dna 0.25 / prot 0.5",
00137                     checker_function=lambda x: isinstance(x, float)),
00138             _Switch(["-once", "once"], 
00139                     "Run only once. Default: twice if no guidetree given"),
00140             _Switch(["-twice", "twice"],
00141                     "Always run twice"),
00142             _Switch(["-skipins", "skipins"],
00143                     "Skip insertions in posterior support"),
00144             _Switch(["-uselogs", "uselogs"],
00145                     "Slower but should work for a greater number of sequences"),
00146             _Switch(["-writeanc", "writeanc"],
00147                     "Output ancestral sequences"),
00148             _Switch(["-printnodes", "printnodes"],
00149                     "Output each node; mostly for debugging"),
00150             #-matresize=# [matrix resizing multiplier]
00151             # Doesnt specify type but Float and Int work
00152             _Option(["-matresize", "matresize"],
00153                     "Matrix resizing multiplier",
00154                     checker_function=lambda x: isinstance(x, float) or \
00155                                                isinstance(x, int)),
00156             #-matinitsize=# [matrix initial size multiplier]
00157             # Doesnt specify type but Float and Int work
00158             _Option(["-matinitsize", "matinitsize"],
00159                     "Matrix initial size multiplier",
00160                     checker_function=lambda x: isinstance(x, float) or \
00161                                                isinstance(x, int)),
00162             _Switch(["-longseq", "longseq"],
00163                     "Save space in pairwise alignments"),
00164             _Switch(["-pwgenomic", "pwgenomic"],
00165                     "Do pairwise alignment, no guidetree"),
00166             #-pwgenomicdist=# [distance for pairwise alignment; default: 0.3]
00167             _Option(["-pwgenomicdist", "pwgenomicdist"],
00168                     "Distance for pairwise alignment. Default: 0.3",
00169                     checker_function=lambda x: isinstance(x, float)),
00170             #-scalebranches=# [scale branch lengths; default: dna 1 / prot 2]
00171             _Option(["-scalebranches", "scalebranches"],
00172                     "Scale branch lengths. Default: dna 1 / prot 2",
00173                     checker_function=lambda x: isinstance(x, int)),
00174             #-fixedbranches=# [use fixed branch lengths]
00175             #Assume looking for a float
00176             _Option(["-fixedbranches", "fixedbranches"],
00177                     "Use fixed branch lengths of input value",
00178                     checker_function=lambda x: isinstance(x, float)),
00179             #-maxbranches=# [set maximum branch length]
00180             #Assume looking for a float
00181             _Option(["-maxbranches", "maxbranches"],
00182                     "Use maximum branch lengths of input value",
00183                     checker_function=lambda x: isinstance(x, float)),
00184             #-realbranches [disable branch length truncation]
00185             _Switch(["-realbranches", "realbranches"],
00186                     "Disable branch length truncation"),
00187             _Switch(["-translate", "translate"],
00188                     "Translate to protein"),
00189             _Switch(["-mttranslate", "mttranslate"],
00190                     "Translate to protein using mt table"),
00191             ###################### other: ####################
00192             _Switch(["-convert", "convert"],
00193                     "Convert input alignment to new format. Do "
00194                     "not perform alignment")
00195             ]
00196         AbstractCommandline.__init__(self, cmd, **kwargs)
00197 
00198 def _test():
00199     """Run the module's doctests (PRIVATE)."""
00200     print "Runing modules doctests..."
00201     import doctest
00202     doctest.testmod()
00203     print "Done"
00204 
00205 if __name__ == "__main__":
00206     _test()