Back to index

python-biopython  1.60
PrintFormat.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 #
00003 #      Restriction Analysis Libraries.
00004 #      Copyright (C) 2004. Frederic Sohm.
00005 #
00006 # This code is part of the Biopython distribution and governed by its
00007 # license.  Please see the LICENSE file that should have been included
00008 # as part of this package.
00009 #
00010 
00011 import re
00012 import itertools
00013 from Bio.Restriction import RanaConfig as RanaConf
00014 
00015 """
00016 Usage:
00017 
00018     PrintFormat allow to print the results from restriction analysis in 3
00019     different format.
00020     List, column or map.
00021 
00022     the easiest way to use it is:
00023     
00024     >>> from Bio.Restriction.PrintFormat import PrintFormat
00025     >>> from Bio.Restriction.Restriction import AllEnzymes
00026     >>> from Bio import Entrez
00027     >>> from Bio import SeqIO
00028     >>> handle = Entrez.efetch(db="nucleotide", rettype="fasta", id="SYNPBR322")
00029     >>> pBR322 = SeqIO.read(handle, "fasta")
00030     >>> handle.close()
00031     >>> dct = AllEnzymes.search(pBR322.seq)
00032     >>> new = PrintFormat()
00033     >>> new.print_that(dct, '\n my pBR322 analysis\n\n','\n no site :\n\n')
00034 
00035      my pBR322 analysis
00036      
00037     AasI       :  2169, 2582.
00038     AatII      :  4289.
00039     ...
00040             More enzymes.
00041     ...
00042     ZraI       :  4287.
00043     ZrmI       :  3847.
00044     
00045      no site:
00046      
00047     AarI      AatI      Acc65I    AcsI      AcvI      AdeI      AflII     AgeI    
00048     ...
00049             More enzymes.
00050     ...
00051     Vha464I   XapI      XbaI      XcmI      XhoI      XmaCI     XmaI      XmaJI     
00052     Zsp2I 
00053 
00054     >>> new.sequence = pBR322.seq
00055     >>> new.print_as("map")
00056     >>> new.print_that(dct)
00057     ...
00058     
00059     Some of the methods of PrintFormat are meant to be overriden by derived
00060     class.
00061 """
00062 
00063 class PrintFormat(object):
00064     """PrintFormat allow the printing of results of restriction analysis."""
00065 
00066     ConsoleWidth = RanaConf.ConsoleWidth
00067     NameWidth    = RanaConf.NameWidth
00068     MaxSize      = RanaConf.MaxSize
00069     Cmodulo      = ConsoleWidth%NameWidth       
00070     PrefWidth    = ConsoleWidth - Cmodulo
00071     Indent       = RanaConf.Indent
00072     linesize     = PrefWidth - NameWidth
00073 
00074     def __init__(self):
00075         """PrintFormat() -> new PrintFormat Instance"""
00076         pass
00077 
00078     def print_as(self, what='list'):
00079         """PF.print_as([what='list']) -> print the results as specified.
00080 
00081         Valid format are:
00082             'list'      -> alphabetical order
00083             'number'    -> number of sites in the sequence
00084             'map'       -> a map representation of the sequence with the sites.
00085 
00086         If you want more flexibility over-ride the virtual method make_format.
00087         """
00088         if what == 'map':
00089             self.make_format = self._make_map
00090         elif what == 'number':
00091             self.make_format = self._make_number
00092         else:
00093             self.make_format = self._make_list
00094             
00095         return
00096             
00097 
00098     def print_that(self, dct, title='',  s1=''):
00099         """PF.print_that(dct, [title[, s1]]) -> Print dct nicely formatted.
00100 
00101         dct is a dictionary as returned by a RestrictionBatch.search()
00102         
00103         title is the title of the map.
00104         It must be a formated string, i.e. you must include the line break.
00105         
00106         s1 is the title separating the list of enzymes that have sites from
00107         those without sites.
00108         s1 must be a formatted string as well.
00109 
00110         The format of print_that is a list."""
00111         if not dct:
00112             dct = self.results
00113         ls, nc = [], []
00114         for k, v in dct.iteritems():
00115             if v:
00116                 ls.append((k,v))
00117             else:
00118                 nc.append(k)
00119         print self.make_format(ls, title, nc, s1)
00120         return
00121        
00122     def make_format(self, cut=[], title='', nc=[], s1=''):
00123         """PF.make_format(cut, nc, title, s) -> string
00124 
00125         Virtual method.
00126         Here to be pointed to one of the _make_* methods.
00127         You can as well create a new method and point make_format to it."""
00128         return self._make_list(cut,title, nc,s1)
00129 
00130 ###### _make_* methods to be used with the virtual method make_format
00131 
00132     def _make_list(self, ls,title, nc,s1):
00133         """PF._make_number(ls,title, nc,s1) -> string.
00134 
00135         return a string of form:
00136         
00137         title.
00138 
00139         enzyme1     :   position1, position2.
00140         enzyme2     :   position1, position2, position3.
00141 
00142         ls is a list of cutting enzymes.
00143         title is the title.
00144         nc is a list of non cutting enzymes.
00145         s1 is the sentence before the non cutting enzymes."""
00146         return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)
00147 
00148     def _make_map(self, ls,title, nc,s1):
00149         """PF._make_number(ls,title, nc,s1) -> string.
00150 
00151         return a string of form:
00152         
00153         title.
00154 
00155             enzyme1, position
00156             |
00157         AAAAAAAAAAAAAAAAAAAAA...
00158         |||||||||||||||||||||
00159         TTTTTTTTTTTTTTTTTTTTT...
00160 
00161         ls is a list of cutting enzymes.
00162         title is the title.
00163         nc is a list of non cutting enzymes.
00164         s1 is the sentence before the non cutting enzymes."""
00165         return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)
00166 
00167     def _make_number(self, ls,title, nc,s1):
00168         """PF._make_number(ls,title, nc,s1) -> string.
00169 
00170         title.
00171         
00172         enzyme which cut 1 time:
00173         
00174         enzyme1     :   position1.
00175 
00176         enzyme which cut 2 times:
00177         
00178         enzyme2     :   position1, position2.
00179         ...
00180 
00181         ls is a list of cutting enzymes.
00182         title is the title.
00183         nc is a list of non cutting enzymes.
00184         s1 is the sentence before the non cutting enzymes."""
00185         return self._make_number_only(ls, title)+self._make_nocut_only(nc,s1)
00186     
00187     def _make_nocut(self, ls,title, nc,s1):
00188         """PF._make_nocut(ls,title, nc,s1) -> string.
00189 
00190         return a formatted string of the non cutting enzymes.
00191 
00192         ls is a list of cutting enzymes -> will not be used.
00193         Here for compatibility with make_format.
00194         
00195         title is the title.
00196         nc is a list of non cutting enzymes.
00197         s1 is the sentence before the non cutting enzymes."""
00198         return title + self._make_nocut_only(nc, s1) 
00199 
00200     def _make_nocut_only(self, nc, s1, ls =[],title=''):
00201         """PF._make_nocut_only(nc, s1) -> string.
00202 
00203         return a formatted string of the non cutting enzymes.
00204         
00205         nc is a list of non cutting enzymes.
00206         s1 is the sentence before the non cutting enzymes.
00207         """
00208         if not nc:
00209             return s1
00210         nc.sort()
00211         st = ''
00212         stringsite = s1 or '\n   Enzymes which do not cut the sequence.\n\n'    
00213         Join = ''.join
00214         for key in nc:
00215             st = Join((st, str.ljust(str(key), self.NameWidth)))
00216             if len(st) > self.linesize:
00217                 stringsite = Join((stringsite, st, '\n'))
00218                 st = ''
00219         stringsite = Join((stringsite, st, '\n'))
00220         return stringsite
00221     
00222     def _make_list_only(self, ls, title, nc = [], s1 = ''):
00223         """PF._make_list_only(ls, title) -> string.
00224 
00225         return a string of form:
00226         
00227         title.
00228 
00229         enzyme1     :   position1, position2.
00230         enzyme2     :   position1, position2, position3.
00231         ...
00232         
00233         ls is a list of results.
00234         title is a string.
00235         Non cutting enzymes are not included."""
00236         if not ls:
00237             return title
00238         return self.__next_section(ls, title)
00239 
00240     def _make_number_only(self, ls, title, nc = [], s1 =''):
00241         """PF._make_number_only(ls, title) -> string.
00242 
00243         return a string of form:
00244         
00245         title.
00246         
00247         enzyme which cut 1 time:
00248         
00249         enzyme1     :   position1.
00250 
00251         enzyme which cut 2 times:
00252         
00253         enzyme2     :   position1, position2.
00254         ...
00255         
00256                 
00257         ls is a list of results.
00258         title is a string.
00259         Non cutting enzymes are not included."""
00260         if not ls:
00261             return title
00262         ls.sort(lambda x,y : cmp(len(x[1]), len(y[1])))
00263         iterator = iter(ls)
00264         cur_len  = 1
00265         new_sect = []
00266         for name, sites in iterator:
00267             l = len(sites)
00268             if l > cur_len:
00269                 title += "\n\nenzymes which cut %i times :\n\n"%cur_len
00270                 title = self.__next_section(new_sect, title)
00271                 new_sect, cur_len = [(name, sites)], l
00272                 continue
00273             new_sect.append((name,sites))
00274         title += "\n\nenzymes which cut %i times :\n\n"%cur_len
00275         return self.__next_section(new_sect, title)
00276             
00277     def _make_map_only(self, ls, title, nc = [],  s1 = ''):
00278         """PF._make_map_only(ls, title) -> string.
00279 
00280         return a string of form:
00281         
00282         title.
00283 
00284             enzyme1, position
00285             |
00286         AAAAAAAAAAAAAAAAAAAAA...
00287         |||||||||||||||||||||
00288         TTTTTTTTTTTTTTTTTTTTT...
00289         
00290                 
00291         ls is a list of results.
00292         title is a string.
00293         Non cutting enzymes are not included.
00294         """
00295         if not ls:
00296             return title
00297         resultKeys = [str(x) for x,y in ls]
00298         resultKeys.sort()
00299         map = title or ''
00300         enzymemap = {}
00301         for (enzyme, cut) in ls:
00302             for c in cut:
00303                 if c in enzymemap:
00304                     enzymemap[c].append(str(enzyme))
00305                 else:
00306                     enzymemap[c] = [str(enzyme)]
00307         mapping = enzymemap.keys()
00308         mapping.sort()
00309         cutloc = {}
00310         x, counter, length = 0, 0, len(self.sequence)
00311         for x in xrange(60, length, 60):
00312             counter = x - 60
00313             l=[]
00314             for key in mapping:
00315                 if key <= x:
00316                     l.append(key)
00317                 else:
00318                     cutloc[counter] = l
00319                     mapping = mapping[mapping.index(key):]
00320                     break
00321             cutloc[x] = l
00322         cutloc[x] = mapping
00323         sequence = self.sequence.tostring()
00324         revsequence = self.sequence.complement().tostring()
00325         a = '|'
00326         base, counter = 0, 0
00327         emptyline = ' ' * 60
00328         Join = ''.join
00329         for base in xrange(60, length, 60):
00330             counter = base - 60
00331             line = emptyline
00332             for key in cutloc[counter]:
00333                 s = ''
00334                 if key == base:
00335                     for n in enzymemap[key] : s = ' '.join((s,n))
00336                     l = line[0:59]
00337                     lineo = Join((l, str(key), s, '\n'))
00338                     line2 = Join((l, a, '\n'))
00339                     linetot = Join((lineo, line2))
00340                     map = Join((map, linetot))
00341                     break
00342                 for n in enzymemap[key] : s = ' '.join((s,n))
00343                 k = key%60
00344                 lineo = Join((line[0:(k-1)], str(key), s, '\n'))
00345                 line = Join((line[0:(k-1)], a, line[k:]))
00346                 line2 = Join((line[0:(k-1)], a, line[k:], '\n'))
00347                 linetot = Join((lineo,line2))
00348                 map = Join((map,linetot))
00349             mapunit = '\n'.join((sequence[counter : base],a * 60,
00350                                  revsequence[counter : base],
00351                                  Join((str.ljust(str(counter+1), 15), ' '* 30,
00352                                         str.rjust(str(base), 15),'\n\n'))
00353                                  ))
00354             map = Join((map, mapunit)) 
00355         line = ' '* 60
00356         for key in cutloc[base]:
00357             s = ''
00358             if key == length:
00359                 for n in enzymemap[key]:
00360                     s = Join((s,' ',n))
00361                 l = line[0:(length-1)]
00362                 lineo = Join((l,str(key),s,'\n'))
00363                 line2 = Join((l,a,'\n'))
00364                 linetot = Join((lineo, line2))
00365                 map = Join((map, linetot))
00366                 break
00367             for n in enzymemap[key] : s = Join((s,' ',n))
00368             k = key%60
00369             lineo = Join((line[0:(k-1)],str(key),s,'\n'))
00370             line = Join((line[0:(k-1)],a,line[k:]))
00371             line2 = Join((line[0:(k-1)],a,line[k:],'\n'))
00372             linetot = Join((lineo,line2))
00373             map = Join((map,linetot))
00374         mapunit = ''
00375         mapunit = Join((sequence[base : length], '\n'))
00376         mapunit = Join((mapunit, a * (length-base), '\n'))
00377         mapunit = Join((mapunit,revsequence[base:length], '\n'))
00378         mapunit = Join((mapunit, Join((str.ljust(str(base+1), 15), ' '*(
00379             length-base-30),str.rjust(str(length), 15),
00380                                        '\n\n'))))
00381         map = Join((map,mapunit))
00382         return map
00383     
00384 ###### private method to do lists:
00385     
00386     def __next_section(self, ls, into):
00387         """FP.__next_section(ls, into) -> string.
00388 
00389         ls is a list of tuple (string, [int, int]).
00390         into is a string to which the formatted ls will be added.
00391 
00392         Format ls as a string of lines:
00393         The form is:
00394 
00395         enzyme1     :   position1.
00396         enzyme2     :   position2, position3.
00397 
00398         then add the formatted ls to tot
00399         return tot."""
00400         ls.sort()
00401         indentation = '\n' + (self.NameWidth + self.Indent) * ' '
00402         linesize = self.linesize - self.MaxSize
00403         pat = re.compile("([\w,\s()]){1,%i}[,\.]"%linesize)
00404         several, Join = '', ''.join
00405         for name, sites in ls:
00406             stringsite = ''
00407             l = Join((', '.join([str(site) for site in sites]), '.'))
00408             if len(l) > linesize:
00409                 #
00410                 #   cut where appropriate and add the indentation
00411                 #
00412                 l = [x.group() for x in re.finditer(pat, l)]
00413                 stringsite = indentation.join(l) 
00414             else:
00415                 stringsite = l    
00416             into = Join((into,
00417                          str(name).ljust(self.NameWidth),' :  ',stringsite,'\n'))
00418         return into