Back to index

python-biopython  1.60
__init__.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 #
00006 # Bio.Wise contains modules for running and processing the output of
00007 # some of the models in the Wise2 package by Ewan Birney available from:
00008 # ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/
00009 # http://www.ebi.ac.uk/Wise2/
00010 # 
00011 # Bio.Wise.psw is for protein Smith-Waterman alignments
00012 # Bio.Wise.dnal is for Smith-Waterman DNA alignments
00013 
00014 __version__ = "$Revision: 1.17 $"
00015 
00016 import os
00017 import sys
00018 import tempfile
00019 
00020 from Bio import SeqIO
00021 
00022 
00023 def _build_align_cmdline(cmdline, pair, output_filename, kbyte=None, force_type=None, quiet=False):
00024     """Helper function to build a command line string (PRIVATE).
00025 
00026     >>> os.environ["WISE_KBYTE"]="300000"
00027     >>> if os.isatty(sys.stderr.fileno()):
00028     ...    c = _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"),
00029     ...                             "/tmp/output", kbyte=100000)
00030     ...    assert c == 'dnal -kbyte 100000 seq1.fna seq2.fna > /tmp/output', c
00031     ...    c = _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"),
00032     ...                             "/tmp/output_aa")
00033     ...    assert c == 'psw -kbyte 300000 seq1.faa seq2.faa > /tmp/output_aa', c
00034     ... else:
00035     ...    c = _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"),
00036     ...                             "/tmp/output", kbyte=100000)
00037     ...    assert c == 'dnal -kbyte 100000 -quiet seq1.fna seq2.fna > /tmp/output', c
00038     ...    c = _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"),
00039     ...                             "/tmp/output_aa")
00040     ...    assert c == 'psw -kbyte 300000 -quiet seq1.faa seq2.faa > /tmp/output_aa', c
00041 
00042     """
00043     cmdline = cmdline[:]
00044 
00045     ### XXX: force_type ignored
00046 
00047     if kbyte is None:
00048         try:
00049             cmdline.extend(("-kbyte", os.environ["WISE_KBYTE"]))
00050         except KeyError:
00051             pass
00052     else:
00053         cmdline.extend(("-kbyte", str(kbyte)))
00054 
00055     if not os.isatty(sys.stderr.fileno()):
00056         cmdline.append("-quiet")
00057 
00058     cmdline.extend(pair)
00059     cmdline.extend((">", output_filename))
00060     if quiet:
00061         cmdline.extend(("2>", "/dev/null"))
00062     cmdline_str = ' '.join(cmdline)
00063 
00064     return cmdline_str
00065 
00066 def align(cmdline, pair, kbyte=None, force_type=None, dry_run=False, quiet=False, debug=False):
00067     """
00068     Returns a filehandle
00069     """
00070     assert len(pair) == 2
00071     
00072     output_file = tempfile.NamedTemporaryFile(mode='r')
00073     input_files = tempfile.NamedTemporaryFile(mode="w"), tempfile.NamedTemporaryFile(mode="w")
00074 
00075     if dry_run:
00076         print _build_align_cmdline(cmdline,
00077                                    pair,
00078                                    output_file.name,
00079                                    kbyte,
00080                                    force_type,
00081                                    quiet)
00082         return
00083 
00084     for filename, input_file in zip(pair, input_files):
00085         # Pipe the file through Biopython's Fasta parser/writer
00086         # to make sure it conforms to the Fasta standard (in particular,
00087         # Wise2 may choke on long lines in the Fasta file)
00088         records = SeqIO.parse(open(filename), 'fasta')
00089         SeqIO.write(records, input_file, 'fasta')
00090         input_file.flush()
00091 
00092     input_file_names = [input_file.name for input_file in input_files]
00093 
00094     cmdline_str = _build_align_cmdline(cmdline,
00095                                        input_file_names,
00096                                        output_file.name,
00097                                        kbyte,
00098                                        force_type,
00099                                        quiet)
00100 
00101     if debug:
00102         print >>sys.stderr, cmdline_str
00103 
00104     status = os.system(cmdline_str) >> 8
00105 
00106     if status > 1:
00107         if kbyte != 0: # possible memory problem; could be None
00108             print >>sys.stderr, "INFO trying again with the linear model"
00109             return align(cmdline, pair, 0, force_type, dry_run, quiet, debug)
00110         else:
00111             raise OSError("%s returned %s" % (" ".join(cmdline), status))
00112     
00113     return output_file
00114 
00115 def all_pairs(singles):
00116     """
00117     Generate pairs list for all-against-all alignments
00118 
00119     >>> all_pairs(range(4))
00120     [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
00121     """
00122     pairs = []
00123 
00124     singles = list(singles)
00125     while singles:
00126         suitor = singles.pop(0) # if sorted, stay sorted
00127         pairs.extend([(suitor, single) for single in singles])
00128 
00129     return pairs
00130 
00131 def main():
00132     pass
00133 
00134 def _test(*args, **keywds):
00135     import doctest, sys
00136     doctest.testmod(sys.modules[__name__], *args, **keywds)
00137 
00138 if __name__ == "__main__":
00139     if __debug__:
00140         _test()
00141     main()