Back to index

python-biopython  1.60
psw.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 #
00006 # Bio.Wise contains modules for running and processing the output of
00007 # some of the models in the Wise2 package by Ewan Birney available from:
00008 # ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/
00009 # http://www.ebi.ac.uk/Wise2/
00010 # 
00011 # Bio.Wise.psw is for protein Smith-Waterman alignments
00012 # Bio.Wise.dnal is for Smith-Waterman DNA alignments
00013 
00014 __version__ = "$Revision: 1.5 $"
00015 
00016 import os
00017 import re
00018 import sys
00019 
00020 from Bio import Wise
00021 
00022 _CMDLINE_PSW = ["psw", "-l", "-F"]
00023 _OPTION_GAP_START = "-g"
00024 _OPTION_GAP_EXTENSION = "-e"
00025 _OPTION_SCORES = "-m"
00026 
00027 class AlignmentColumnFullException(Exception):
00028     pass
00029 
00030 class Alignment(list):
00031     def append(self, column_unit):
00032         try:
00033             self[-1].append(column_unit)
00034         except AlignmentColumnFullException:
00035             list.append(self, AlignmentColumn(column_unit))
00036         except IndexError:
00037             list.append(self, AlignmentColumn(column_unit))
00038 
00039 class AlignmentColumn(list):
00040     def _set_kind(self, column_unit):
00041         if self.kind == "SEQUENCE":
00042             self.kind = column_unit.kind
00043 
00044     def __init__(self, column_unit):
00045         assert column_unit.unit == 0
00046         self.kind = column_unit.kind
00047         list.__init__(self, [column_unit.column, None])
00048 
00049     def __repr__(self):
00050         return "%s(%s, %s)" % (self.kind, self[0], self[1])
00051 
00052     def append(self, column_unit):
00053         if self[1] is not None:
00054             raise AlignmentColumnFullException
00055 
00056         assert column_unit.unit == 1
00057 
00058         self._set_kind(column_unit)
00059         self[1] = column_unit.column
00060         
00061 class ColumnUnit(object):
00062     def __init__(self, unit, column, kind):
00063         self.unit = unit
00064         self.column = column
00065         self.kind = kind
00066 
00067     def __str__(self):
00068         return "ColumnUnit(unit=%s, column=%s, %s)" % (self.unit, self.column, self.kind)
00069 
00070     __repr__ = __str__
00071 
00072 _re_unit = re.compile(r"^Unit +([01])- \[ *(-?\d+)- *(-?\d+)\] \[(\w+)\]$")
00073 def parse_line(line):
00074     """
00075     >>> print parse_line("Column 0:")
00076     None
00077     >>> parse_line("Unit  0- [  -1-   0] [SEQUENCE]")
00078     ColumnUnit(unit=0, column=0, SEQUENCE)
00079     >>> parse_line("Unit  1- [  85-  86] [SEQUENCE]")
00080     ColumnUnit(unit=1, column=86, SEQUENCE)
00081     """
00082     match = _re_unit.match(line.rstrip())
00083 
00084     if not match:
00085         return
00086 
00087     return ColumnUnit(int(match.group(1)), int(match.group(3)), match.group(4))
00088     
00089 def parse(iterable):
00090     """
00091     format
00092 
00093     Column 0:
00094     Unit  0- [  -1-   0] [SEQUENCE]
00095     Unit  1- [  85-  86] [SEQUENCE]
00096 
00097     means that seq1[0] == seq2[86] (0-based)
00098     """
00099 
00100     alignment = Alignment()
00101     for line in iterable:
00102         try:
00103             if os.environ["WISE_PY_DEBUG"]:
00104                 print line,
00105         except KeyError:
00106             pass
00107             
00108         column_unit = parse_line(line)
00109         if column_unit:
00110             alignment.append(column_unit)
00111 
00112     return alignment
00113 
00114 def align(pair,
00115           scores=None,
00116           gap_start=None,
00117           gap_extension=None,
00118           *args, **keywds):
00119     
00120     cmdline = _CMDLINE_PSW[:]
00121     if scores:
00122         cmdline.extend((_OPTION_SCORES, scores))
00123     if gap_start:
00124         cmdline.extend((_OPTION_GAP_START, str(gap_start)))
00125     if gap_extension:
00126         cmdline.extend((_OPTION_GAP_EXTENSION, str(gap_extension)))
00127     temp_file = Wise.align(cmdline, pair, *args, **keywds)
00128     return parse(temp_file)
00129 
00130 def main():
00131     print align(sys.argv[1:3])
00132 
00133 def _test(*args, **keywds):
00134     import doctest, sys
00135     doctest.testmod(sys.modules[__name__], *args, **keywds)
00136 
00137 if __name__ == "__main__":
00138     if __debug__:
00139         _test()
00140     main()