Back to index

python-biopython  1.60
_TCoffee.py
Go to the documentation of this file.
00001 # Copyright 2009 by Cymon J. Cox and Brad Chapman. All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 """Command line wrapper for the multiple alignment program TCOFFEE.
00006 """
00007 
00008 __docformat__ = "epytext en" #Don't just use plain text in epydoc API pages!
00009 
00010 from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline
00011 
00012 class TCoffeeCommandline(AbstractCommandline):
00013     """Commandline object for the TCoffee alignment program.
00014 
00015     http://www.tcoffee.org/Projects_home_page/t_coffee_home_page.html
00016     
00017     The T-Coffee command line tool has a lot of switches and options.
00018     This wrapper implements a VERY limited number of options - if you
00019     would like to help improve it please get in touch.
00020 
00021     Example:
00022 
00023     To align a FASTA file (unaligned.fasta) with the output in ClustalW
00024     format (file aligned.aln), and otherwise default settings, use:
00025 
00026     >>> from Bio.Align.Applications import TCoffeeCommandline
00027     >>> tcoffee_cline = TCoffeeCommandline(infile="unaligned.fasta",
00028     ...                                    output="clustalw",
00029     ...                                    outfile="aligned.aln")
00030     >>> print tcoffee_cline
00031     t_coffee -output clustalw -infile unaligned.fasta -outfile aligned.aln
00032 
00033     You would typically run the command line with tcoffee_cline() or via
00034     the Python subprocess module, as described in the Biopython tutorial.
00035     
00036     Citation:
00037 
00038     T-Coffee: A novel method for multiple sequence alignments.
00039     Notredame, Higgins, Heringa, JMB,302(205-217) 2000
00040 
00041     Last checked against: Version_6.92
00042     """
00043     SEQ_TYPES = ["dna","protein","dna_protein"]
00044 
00045     def __init__(self, cmd="t_coffee", **kwargs):
00046         self.parameters = [
00047            _Option(["-output", "output"],
00048                    """Specify the output type.
00049                    One (or more separated by a comma) of:
00050                    'clustalw_aln', 'clustalw', 'gcg', 'msf_aln',
00051                    'pir_aln', 'fasta_aln', 'phylip', 'pir_seq', 'fasta_seq'
00052 
00053                    Note that of these Biopython's AlignIO module will only
00054                    read clustalw, pir, and fasta.
00055                    """, #TODO - Can we read the PHYLIP output?
00056                    equate=False),
00057            _Option(["-infile", "infile"],
00058                    "Specify the input file.",
00059                    filename=True,
00060                    is_required=True,
00061                    equate=False),
00062            #Indicates the name of the alignment output by t_coffee. If the
00063            #default is used, the alignment is named <your sequences>.aln
00064            _Option(["-outfile", "outfile"],
00065                    "Specify the output file. Default: <your sequences>.aln",
00066                    filename=True,
00067                    equate=False),
00068            _Switch(["-convert", "convert"],
00069                    "Specify you want to perform a file conversion"),
00070            _Option(["-type", "type"],
00071                    "Specify the type of sequence being aligned",
00072                    checker_function=lambda x: x in self.SEQ_TYPES,
00073                    equate=False),
00074            _Option(["-outorder", "outorder"],
00075                    "Specify the order of sequence to output"
00076                    "Either 'input', 'aligned' or <filename> of "
00077                    "Fasta file with sequence order",
00078                    equate=False),
00079            _Option(["-matrix", "matrix"],
00080                    "Specify the filename of the substitution matrix to use."
00081                    "Default: blosum62mt",
00082                    equate=False),
00083            _Option(["-gapopen", "gapopen"],
00084                    "Indicates the penalty applied for opening a gap "
00085                    "(negative integer)",
00086                    checker_function=lambda x: isinstance(x, int),
00087                    equate=False),
00088            _Option(["-gapext", "gapext"],
00089                    "Indicates the penalty applied for extending a "
00090                    "gap. (negative integer)",
00091                    checker_function=lambda x: isinstance(x, int),
00092                    equate=False),
00093            _Switch(["-quiet", "quiet"],
00094                    "Turn off log output"),
00095            _Option(["-mode", "mode"],
00096                    "Specifies a special mode: genome, quickaln, dali, 3dcoffee",
00097                    equate=False),
00098            ]
00099         AbstractCommandline.__init__(self, cmd, **kwargs)           
00100 
00101 def _test():
00102     """Run the module's doctests (PRIVATE)."""
00103     print "Runing modules doctests..."
00104     import doctest
00105     doctest.testmod()
00106     print "Done"
00107 
00108 if __name__ == "__main__":
00109     _test()