Back to index

python-biopython  1.60
__init__.py
Go to the documentation of this file.
00001 # Copyright 2005 by Jonathan Taylor.
00002 # All rights reserved.
00003 # This code is part of the Biopython distribution and governed by its
00004 # license.  Please see the LICENSE file that should have been included
00005 # as part of this package.
00006 """This module deals with CAPS markers.
00007 
00008 A CAPS marker is a location a DifferentialCutsite as described below and a
00009 set of primers that can be used to visualize this.  More information can
00010 be found in the paper located at:
00011 
00012 http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8106085&dopt=Abstract
00013 
00014 Copyright Jonathan Taylor 2005
00015 """
00016 
00017 class DifferentialCutsite(object):
00018     """A differential cutsite is a location in an alignment where an enzyme cuts
00019     at least one sequence and also cannot cut at least one other sequence.
00020 
00021     Members:
00022     start       Where it lives in the alignment.
00023     enzyme      The enzyme that causes this.
00024     cuts_in     A list of sequences (as indexes into the alignment) the
00025                 enzyme cuts in.
00026     blocked_in  A list of sequences (as indexes into the alignment) the
00027                 enzyme is blocked in.
00028 
00029     """
00030 
00031     def __init__(self, **kwds):
00032         """Initialize a DifferentialCutsite.
00033 
00034         Each member (as listed in the class description) should be included as a
00035         keyword.
00036         """
00037         
00038         self.start = int(kwds["start"])
00039         self.enzyme = kwds["enzyme"]
00040         self.cuts_in = kwds["cuts_in"]
00041         self.blocked_in = kwds["blocked_in"]
00042 
00043 class AlignmentHasDifferentLengthsError(Exception):
00044     pass
00045 
00046 class CAPSMap(object):
00047     """A map of an alignment showing all possible dcuts.
00048 
00049     Members:
00050     alignment  The alignment that is mapped.
00051     dcuts      A list of possible CAPS markers in the form of 
00052                          DifferentialCutsites.
00053     """
00054 
00055     def __init__(self, alignment, enzymes = []):
00056         """Initialize the CAPSMap
00057 
00058         Required:
00059         alignment    The alignment to be mapped.
00060 
00061         Optional:
00062         enzymes      The enzymes to be used to create the map.
00063         """
00064 
00065         self.sequences = [rec.seq for rec in alignment]
00066         self.size = len(self.sequences)
00067         self.length = len(self.sequences[0])
00068         for seq in self.sequences:
00069             if len(seq) != self.length:
00070                 raise AlignmentHasDifferentLengthsError
00071 
00072         self.alignment = alignment
00073         self.enzymes = enzymes
00074 
00075         # look for dcuts
00076         self._digest()
00077     
00078     def _digest_with(self, enzyme):
00079         cuts = {}
00080         all = []
00081 
00082         # go through each sequence
00083         for seq in self.sequences:
00084 
00085             # grab all the cuts in the sequence
00086             cuts[seq] = [cut - enzyme.fst5 for cut in enzyme.search(seq)]
00087 
00088             # maintain a list of all cuts in all sequences
00089             all.extend(cuts[seq])
00090 
00091         # we sort the all list and remove duplicates
00092         all.sort()
00093         
00094         last = -999
00095         new = []
00096         for cut in all:
00097             if cut != last:
00098                 new.append(cut)
00099             last = cut
00100 
00101         all = new
00102         # all now has indices for all sequences in the alignment
00103 
00104         for cut in all:
00105             # test for dcuts
00106 
00107             cuts_in = []
00108             blocked_in = []
00109 
00110             for i in range(0, self.size):
00111                 seq = self.sequences[i]
00112                 if cut in cuts[seq]:
00113                     cuts_in.append(i)
00114                 else:
00115                     blocked_in.append(i)
00116 
00117             if cuts_in != [] and blocked_in != []:
00118                 self.dcuts.append(DifferentialCutsite(start = cut, enzyme = enzyme, cuts_in = cuts_in, blocked_in = blocked_in))
00119 
00120     def _digest(self):
00121         self.dcuts = []
00122 
00123         for enzyme in self.enzymes:
00124             self._digest_with(enzyme)