Back to index

python-biopython  1.60
_Dialign.py
Go to the documentation of this file.
00001 # Copyright 2009 by Cymon J. Cox.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 """Command line wrapper for the multiple alignment program DIALIGN2-2.
00006 """
00007 
00008 __docformat__ = "epytext en" #Don't just use plain text in epydoc API pages!
00009 
00010 from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline
00011 
00012 class DialignCommandline(AbstractCommandline):
00013     """Command line wrapper for the multiple alignment program DIALIGN2-2.
00014 
00015     http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html
00016 
00017     Example:
00018 
00019     To align a FASTA file (unaligned.fasta) with the output files names
00020     aligned.* including a FASTA output file (aligned.fa), use:
00021 
00022     >>> from Bio.Align.Applications import DialignCommandline
00023     >>> dialign_cline = DialignCommandline(input="unaligned.fasta",
00024     ...                                    fn="aligned", fa=True)
00025     >>> print dialign_cline
00026     dialign2-2 -fa -fn aligned unaligned.fasta
00027 
00028     You would typically run the command line with dialign_cline() or via
00029     the Python subprocess module, as described in the Biopython tutorial.
00030     
00031     Citation:
00032 
00033     B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence
00034     Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36.
00035 
00036     Last checked against version: 2.2
00037     """
00038     def __init__(self, cmd="dialign2-2", **kwargs):
00039         self.program_name = cmd
00040         self.parameters = \
00041             [
00042             _Switch(["-afc", "afc"],
00043                     "Creates additional output file '*.afc' "
00044                     "containing data of all fragments considered "
00045                     "for alignment WARNING: this file can be HUGE !"),
00046             _Switch(["-afc_v", "afc_v"],
00047                     "Like '-afc' but verbose: fragments are explicitly "
00048                     "printed. WARNING: this file can be EVEN BIGGER !"),
00049             _Switch(["-anc", "anc"],
00050                     "Anchored alignment. Requires a file <seq_file>.anc "
00051                     "containing anchor points."),
00052             _Switch(["-cs", "cs"],
00053                     "If segments are translated, not only the `Watson "
00054                     "strand' but also the `Crick strand' is looked at."),
00055             _Switch(["-cw", "cw"],
00056                     "Additional output file in CLUSTAL W format."),
00057             _Switch(["-ds", "ds"],
00058                     "`dna alignment speed up' - non-translated nucleic acid "
00059                     "fragments are taken into account only if they start "
00060                     "with at least two matches. Speeds up DNA alignment at "
00061                     "the expense of sensitivity."),
00062             _Switch(["-fa", "fa"],
00063                     "Additional output file in FASTA format."),
00064             _Switch(["-ff", "ff"],
00065                     "Creates file *.frg containing information about all "
00066                     "fragments that are part of the respective optimal "
00067                     "pairwise alignmnets plus information about "
00068                     "consistency in the multiple alignment"),
00069             _Option(["-fn", "fn"],
00070                     "Output files are named <out_file>.<extension>.",
00071                     equate=False),
00072             _Switch(["-fop", "fop"],
00073                     "Creates file *.fop containing coordinates of all "
00074                     "fragments that are part of the respective pairwise alignments."),
00075             _Switch(["-fsm", "fsm"],
00076                     "Creates file *.fsm containing coordinates of all "
00077                     "fragments that are part of the final alignment"),
00078             _Switch(["-iw", "iw"],
00079                     "Overlap weights switched off (by default, overlap "
00080                     "weights are used if up to 35 sequences are aligned). "
00081                     "This option speeds up the alignment but may lead "
00082                     "to reduced alignment quality."),
00083             _Switch(["-lgs", "lgs"],
00084                     "`long genomic sequences' - combines the following "
00085                     "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, "
00086                     "-fop, -ff, -cs, -ds, -pst "),
00087             _Switch(["-lgs_t", "lgs_t"],
00088                     "Like '-lgs' but with all segment pairs assessed "
00089                     "at the peptide level (rather than 'mixed alignments' "
00090                     "as with the '-lgs' option). Therefore faster than "
00091                     "-lgs but not very sensitive for non-coding regions."),
00092             _Option(["-lmax", "lmax"],
00093                     "Maximum fragment length = x  (default: x = 40 or "
00094                     "x = 120 for `translated' fragments). Shorter x "
00095                     "speeds up the program but may affect alignment quality.",
00096                     checker_function=lambda x: isinstance(x, int),
00097                     equate=False),
00098             _Switch(["-lo", "lo"],
00099                     "(Long Output) Additional file *.log with information "
00100                     "about fragments selected for pairwise alignment and "
00101                     "about consistency in multi-alignment proceedure."),
00102             _Switch(["-ma", "ma"],
00103                     "`mixed alignments' consisting of P-fragments and "
00104                     "N-fragments if nucleic acid sequences are aligned."),
00105             _Switch(["-mask", "mask"],
00106                     "Residues not belonging to selected fragments are "
00107                     "replaced by `*' characters in output alignment "
00108                     "(rather than being printed in lower-case characters)"),
00109             _Switch(["-mat", "mat"],
00110                     "Creates file *mat with substitution counts derived "
00111                     "from the fragments that have been selected for alignment."),
00112             _Switch(["-mat_thr", "mat_thr"],
00113                     "Like '-mat' but only fragments with weight score "
00114                     "> t are considered"),
00115             _Switch(["-max_link", "max_link"],
00116                     "'maximum linkage' clustering used to construct "
00117                     "sequence tree (instead of UPGMA)."),
00118             _Switch(["-min_link", "min_link"],
00119                     "'minimum linkage' clustering used."),
00120             _Option(["-mot", "mot"],
00121                     "'motif' option.",
00122                     equate=False),
00123             _Switch(["-msf", "msf"],
00124                     "Separate output file in MSF format."),
00125             _Switch(["-n", "n"],
00126                     "Input sequences are nucleic acid sequences. "
00127                     "No translation of fragments."),
00128             _Switch(["-nt", "nt"],
00129                     "Input sequences are nucleic acid sequences and "
00130                     "`nucleic acid segments' are translated to `peptide "
00131                     "segments'."),
00132             _Switch(["-nta", "nta"],
00133                     "`no textual alignment' - textual alignment suppressed. "
00134                     "This option makes sense if other output files are of "
00135                     "intrest -- e.g. the fragment files created with -ff, "
00136                     "-fop, -fsm or -lo."),
00137             _Switch(["-o", "o"],
00138                     "Fast version, resulting alignments may be slightly "
00139                     "different."),
00140             _Switch(["-ow", "ow"],
00141                     "Overlap weights enforced (By default, overlap weights "
00142                     "are used only if up to 35 sequences are aligned since "
00143                     "calculating overlap weights is time consuming)."),
00144             _Switch(["-pst", "pst"],
00145                     "'print status'. Creates and updates a file *.sta with "
00146                     "information about the current status of the program "
00147                     "run.  This option is recommended if large data sets "
00148                     "are aligned since it allows the user to estimate the "
00149                     "remaining running time."),
00150             _Switch(["-smin", "smin"],
00151                     "Minimum similarity value for first residue pair "
00152                     "(or codon pair) in fragments. Speeds up protein "
00153                     "alignment or alignment of translated DNA fragments "
00154                     "at the expense of sensitivity."),
00155             _Option(["-stars", "stars"],
00156                     "Maximum number of `*' characters indicating degree "
00157                     "of local similarity among sequences. By default, no "
00158                     "stars are used but numbers between 0 and 9, instead.",
00159                     checker_function = lambda x: x in range(0,10),
00160                     equate=False),
00161             _Switch(["-stdo", "stdo"],
00162                     "Results written to standard output."),
00163             _Switch(["-ta", "ta"],
00164                     "Standard textual alignment printed (overrides "
00165                     "suppression of textual alignments in special "
00166                     "options, e.g. -lgs)"),
00167             _Option(["-thr", "thr"],
00168                     "Threshold T = x.",
00169                     checker_function = lambda x: isinstance(x, int),
00170                     equate=False),
00171             _Switch(["-xfr", "xfr"],
00172                     "'exclude fragments' - list of fragments can be "
00173                     "specified that are NOT considered for pairwise alignment"),
00174             _Argument(["input"],
00175                       "Input file name. Must be FASTA format",
00176                       filename=True,
00177                       is_required=True),
00178             ]
00179         AbstractCommandline.__init__(self, cmd, **kwargs)
00180 
00181 def _test():
00182     """Run the module's doctests (PRIVATE)."""
00183     print "Runing modules doctests..."
00184     import doctest
00185     doctest.testmod()
00186     print "Done"
00187 
00188 if __name__ == "__main__":
00189     _test()