Back to index

python-biopython  1.60
_Novoalign.py
Go to the documentation of this file.
00001 # Copyright 2009 by Osvaldo Zagordi.  All rights reserved.
00002 # Revisions copyright 2010 by Peter Cock.
00003 # This code is part of the Biopython distribution and governed by its
00004 # license.  Please see the LICENSE file that should have been included
00005 # as part of this package.
00006 """Command line wrapper for the short read aligner Novoalign by Novocraft."""
00007 import types
00008 from Bio.Application import _Option, AbstractCommandline
00009 
00010 class NovoalignCommandline(AbstractCommandline):
00011     """Command line wrapper for novoalign by Novocraft.
00012 
00013     See www.novocraft.com - novoalign is a short read alignment program.
00014 
00015     Example:
00016 
00017     >>> from Bio.Sequencing.Applications import NovoalignCommandline
00018     >>> novoalign_cline = NovoalignCommandline(database='some_db',
00019     ...                                        readfile='some_seq.txt')
00020     >>> print novoalign_cline
00021     novoalign -d some_db -f some_seq.txt
00022 
00023     As will all the Biopython application wrappers, you can also add or
00024     change options after creating the object:
00025 
00026     >>> novoalign_cline.format = 'PRBnSEQ'
00027     >>> novoalign_cline.r_method='0.99' # limited valid values
00028     >>> novoalign_cline.fragment = '250 20' # must be given as a string
00029     >>> novoalign_cline.miRNA = 100
00030     >>> print novoalign_cline
00031     novoalign -d some_db -f some_seq.txt -F PRBnSEQ -r 0.99 -i 250 20 -m 100
00032 
00033     You would typically run the command line with novoalign_cline() or via
00034     the Python subprocess module, as described in the Biopython tutorial.
00035 
00036     Last checked against version: 2.05.04
00037     """
00038     def __init__(self, cmd="novoalign", **kwargs):
00039         
00040         READ_FORMAT = ['FA', 'SLXFQ', 'STDFQ', 'ILMFQ', 'PRB', 'PRBnSEQ']
00041         REPORT_FORMAT = ['Native', 'Pairwise', 'SAM']
00042         REPEAT_METHOD = ['None', 'Random', 'All', 'Exhaustive', '0.99']
00043         
00044         self.parameters = \
00045            [
00046             _Option(["-d", "database"],
00047                     "database filename",
00048                     filename=True,
00049                     equate=False),
00050             _Option(["-f", "readfile"],
00051                     "read file",
00052                     filename=True,
00053                     equate=False),
00054             _Option(["-F", "format"],
00055                     "Format of read files.\n\nAllowed values: %s" \
00056                     % ", ".join(READ_FORMAT),
00057                     checker_function=lambda x: x in READ_FORMAT,
00058                     equate=False),
00059             
00060             # Alignment scoring options
00061             _Option(["-t", "threshold"],
00062                     "Threshold for alignment score",
00063                     checker_function=lambda x: isinstance(x, types.IntType),
00064                     equate=False),
00065             _Option(["-g", "gap_open"],
00066                     "Gap opening penalty [default: 40]",
00067                     checker_function=lambda x: isinstance(x, types.IntType),
00068                     equate=False),
00069             _Option(["-x", "gap_extend"],
00070                     "Gap extend penalty [default: 15]",
00071                     checker_function=lambda x: isinstance(x, types.IntType),
00072                     equate=False),
00073             _Option(["-u", "unconverted"],
00074                     "Experimental: unconverted cytosines penalty in bisulfite mode\n\n"
00075                     "Default: no penalty",
00076                     checker_function=lambda x: isinstance(x, types.IntType),
00077                     equate=False),
00078             
00079             # Quality control and read filtering
00080             _Option(["-l", "good_bases"],
00081                     "Minimum number of good quality bases [default: log(N_g, 4) + 5]",
00082                     checker_function=lambda x: isinstance(x, types.IntType),
00083                     equate=False),
00084             _Option(["-h", "homopolymer"],
00085                     "Homopolymer read filter [default: 20; disable: negative value]",
00086                     checker_function=lambda x: isinstance(x, types.IntType),
00087                     equate=False),
00088             
00089             # Read preprocessing options
00090             _Option(["-a", "adapter3"],
00091                     "Strips a 3' adapter sequence prior to alignment.\n\n"
00092                     "With paired ends two adapters can be specified",
00093                     checker_function=lambda x: isinstance(x, types.StringType),
00094                     equate=False),
00095             _Option(["-n", "truncate"],
00096                     "Truncate to specific length before alignment",
00097                     checker_function=lambda x: isinstance(x, types.IntType),
00098                     equate=False),
00099             _Option(["-s", "trimming"],
00100                     "If fail to align, trim by s bases until they map or become shorter than l.\n\n"
00101                     "Ddefault: 2",
00102                     checker_function=lambda x: isinstance(x, types.IntType),
00103                     equate=False),
00104             _Option(["-5", "adapter5"],
00105                     "Strips a 5' adapter sequence.\n\n"
00106                     "Similar to -a (adaptor3), but on the 5' end.",
00107                     checker_function=lambda x: isinstance(x, types.StringType),
00108                     equate=False),
00109             # Reporting options
00110             _Option(["-o", "report"],
00111                     "Specifies the report format.\n\nAllowed values: %s\nDefault: Native" \
00112                     % ", ".join(REPORT_FORMAT),
00113                     checker_function=lambda x: x in REPORT_FORMAT,
00114                     equate=False),
00115             _Option(["-Q", "quality"],
00116                     "Lower threshold for an alignment to be reported [default: 0]",
00117                     checker_function=lambda x: isinstance(x, types.IntType),
00118                     equate=False),
00119             _Option(["-R", "repeats"],
00120                     "If score difference is higher, report repeats.\n\n"
00121                     "Otherwise -r read method applies [default: 5]",
00122                     checker_function=lambda x: isinstance(x, types.IntType),
00123                     equate=False),
00124             _Option(["-r", "r_method"],
00125                     "Methods to report reads with multiple matches.\n\n"
00126                     "Allowed values: %s\n"
00127                     "'All' and 'Exhaustive' accept limits." \
00128                     % ", ".join(REPEAT_METHOD),
00129                     checker_function=lambda x: x.split()[0] in REPEAT_METHOD,
00130                     equate=False),
00131             _Option(["-e", "recorded"],
00132                     "Alignments recorded with score equal to the best.\n\n"
00133                     "Default: 1000 in default read method, otherwise no limit.",
00134                     checker_function=lambda x: isinstance(x, types.IntType),
00135                     equate=False),
00136             _Option(["-q", "qual_digits"],
00137                     "Decimal digits for quality scores [default: 0]",
00138                     checker_function=lambda x: isinstance(x, types.IntType),
00139                     equate=False),
00140 
00141             # Paired end options
00142             _Option(["-i", "fragment"],
00143                     "Fragment length (2 reads + insert) and standard deviation [default: 250 30]",
00144                     checker_function=lambda x: len(x.split()) == 2,
00145                     equate=False),
00146             _Option(["-v", "variation"],
00147                     "Structural variation penalty [default: 70]",
00148                     checker_function=lambda x: isinstance(x, types.IntType),
00149                     equate=False),
00150             
00151             # miRNA mode
00152             _Option(["-m", "miRNA"],
00153                     "Sets miRNA mode and optionally sets a value for the region scanned [default: off]",
00154                     checker_function=lambda x: isinstance(x, types.IntType),
00155                     equate=False),
00156             
00157             # Multithreading
00158             _Option(["-c", "cores"],
00159                     "Number of threads, disabled on free versions [default: number of cores]",
00160                     checker_function=lambda x: isinstance(x, types.IntType),
00161                     equate=False),
00162             
00163             # Quality calibrations
00164             _Option(["-k", "read_cal"],
00165                     "Read quality calibration from file (mismatch counts)",
00166                     checker_function=lambda x: isinstance(x, types.StringType),
00167                     equate=False),
00168             _Option(["-K", "write_cal"],
00169                     "Accumulate mismatch counts and write to file",
00170                     checker_function=lambda x: isinstance(x, types.StringType),
00171                     equate=False),
00172             ]
00173         AbstractCommandline.__init__(self, cmd, **kwargs)
00174 
00175 def _test():
00176     """Run the module's doctests (PRIVATE)."""
00177     print "Runing Novoalign doctests..."
00178     import doctest
00179     doctest.testmod()
00180     print "Done"
00181 
00182 if __name__ == "__main__":
00183     _test()