Back to index

python-biopython  1.60
_Graph.py
Go to the documentation of this file.
00001 # Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
00002 # Revisions copyright 2008-2009 by Peter Cock.
00003 # This code is part of the Biopython distribution and governed by its
00004 # license.  Please see the LICENSE file that should have been included
00005 # as part of this package.
00006 #
00007 # Contact:       Leighton Pritchard, Scottish Crop Research Institute,
00008 #                Invergowrie, Dundee, Scotland, DD2 5DA, UK
00009 #                L.Pritchard@scri.ac.uk
00010 ################################################################################
00011 
00012 """ Graph module
00013 
00014     Provides:
00015 
00016     o GraphData - Contains data from which a graph will be drawn, and
00017                     information about its presentation
00018 
00019     For drawing capabilities, this module uses reportlab to draw and write
00020     the diagram:
00021 
00022     http://www.reportlab.com
00023 
00024     For dealing with biological information, the package expects BioPython
00025     objects:
00026 
00027     http://www.biopython.org
00028 """
00029 
00030 # ReportLab imports
00031 from reportlab.lib import colors
00032 
00033 from math import sqrt
00034 
00035 class GraphData(object):
00036     """ GraphData
00037 
00038         Provides:
00039 
00040         Methods:
00041 
00042         o __init__(self, id=None, data=None, name=None, style='bar',
00043                  color=colors.lightgreen, altcolor=colors.darkseagreen)
00044                  Called on instantiation
00045 
00046         o set_data(self, data)  Load the object with data to be plotted
00047 
00048         o get_data(self)    Returns the data to be plotted as a list of
00049                             (position, value) tuples
00050 
00051         o add_point(self, point)    Add a single point to the data set
00052 
00053         o quartiles(self)   Returns a tuple of the data quartiles
00054 
00055         o range(self)   Returns a tuple of the base range covered by the graph
00056                         data
00057 
00058         o mean(self)    Returns a float of the mean data point value
00059 
00060         o stdev(self)   Returns the sample standard deviation of the data values
00061 
00062         o __len__(self) Returns the length of sequence covered by the data
00063 
00064         o __getitem__(self, index)  Returns the value at the base specified,
00065                                     or graph data in the base range
00066 
00067         o __str__(self) Returns a formatted string describing the graph data
00068 
00069         Attributes:
00070 
00071         o id    Unique identifier for the data
00072 
00073         o data  Dictionary of describing the data, keyed by position
00074 
00075         o name  String describing the data
00076 
00077         o style String ('bar', 'heat', 'line') describing how to draw the data
00078 
00079         o poscolor     colors.Color for drawing high (some styles) or all
00080                         values
00081 
00082         o negcolor     colors.Color for drawing low values (some styles)
00083 
00084         o linewidth     Int, thickness to draw the line in 'line' styles
00085     """
00086     def __init__(self, id=None, data=None, name=None, style='bar',
00087                  color=colors.lightgreen, altcolor=colors.darkseagreen,
00088                  center=None, colour=None, altcolour=None, centre=None):
00089         """__init__(self, id=None, data=None, name=None, style='bar',
00090                  color=colors.lightgreen, altcolor=colors.darkseagreen)
00091 
00092             o id    Unique ID for the graph
00093 
00094             o data  List of (position, value) tuples
00095 
00096             o name  String describing the graph
00097 
00098             o style String describing the presentation style ('bar', 'line',
00099                     'heat')
00100 
00101             o color   colors.Color describing the color to draw all or the
00102                       'high' (some styles) values (overridden by backwards
00103                       compatible argument with UK spelling, colour).
00104 
00105             o altcolor colors.Color describing the color to draw the 'low'
00106                        values (some styles only) (overridden by backwards
00107                        compatible argument with UK spelling, colour).
00108 
00109             o center Value at which x-axis crosses y-axis (overridden by
00110                      backwards comparible argument with UK spelling, centre).
00111 
00112         """
00113 
00114         #Let the UK spelling (colour) override the USA spelling (color)
00115         if colour is not None:
00116             color = colour
00117         if altcolour is not None:
00118             altcolor = altcolour
00119         if centre is not None:
00120             center = centre
00121 
00122         self.id = id            # Unique identifier for the graph
00123         self.data = {}          # holds values, keyed by sequence position
00124         if data is not None:    
00125             self.set_data(data)
00126         self.name = name        # Descriptive string
00127 
00128         # Attributes describing how the graph will be drawn
00129         self.style = style          # One of 'bar', 'heat' or 'line'
00130         self.poscolor = color     # Color to draw all, or 'high' values
00131         self.negcolor = altcolor  # Color to draw 'low' values
00132         self.linewidth = 2          # linewidth to use in line graphs
00133         self.center = center        # value at which x-axis crosses y-axis
00134 
00135     def _set_centre(self, value):
00136         import warnings
00137         import Bio
00138         warnings.warn("The _set_centre method and .centre attribute are deprecated; please use the .center attribute instead", Bio.BiopythonDeprecationWarning)
00139         self.center = value
00140     centre = property(fget = lambda self : self.center,
00141                        fset = _set_centre,
00142                        doc="Backwards compatible alias for center (DEPRECATED)")
00143 
00144     def set_data(self, data):
00145         """ set_data(self, data)
00146 
00147             o data      List of (position, value) tuples
00148 
00149             Add data with a list of (position, value) tuples
00150         """
00151         for (pos, val) in data:     # Fill data dictionary
00152             self.data[pos] = val
00153 
00154 
00155     def get_data(self):
00156         """ get_data(self) -> [(int, float), (int, float), ...]
00157 
00158             Return data as a list of sorted (position, value) tuples
00159         """
00160         data = []
00161         for xval in self.data.keys():
00162             yval = self.data[xval]            
00163             data.append((xval, yval))
00164         data.sort()
00165         return data
00166 
00167 
00168     def add_point(self, point):
00169         """ add_point(self, point)
00170 
00171             o point     (position, value) tuple
00172 
00173             Add a single point to the set of data
00174         """
00175         pos, val = point
00176         self.data[pos] = val
00177 
00178 
00179     def quartiles(self):
00180         """ quartiles(self) -> (float, float, float, float, float)
00181 
00182             Returns the (minimum, lowerQ, medianQ, upperQ, maximum) values as
00183             a tuple
00184         """
00185         data = self.data.values()
00186         data.sort()
00187         datalen = len(data)
00188         return(data[0], data[datalen//4], data[datalen//2],
00189                data[3*datalen//4], data[-1])
00190 
00191 
00192     def range(self):
00193         """ range(self) -> (int, int)
00194 
00195             Returns the range of the data, i.e. its start and end points on
00196             the genome as a (start, end) tuple
00197         """
00198         positions = self.data.keys()
00199         positions.sort()
00200         # Return first and last positions in graph
00201         #print len(self.data)
00202         return (positions[0], positions[-1])    
00203 
00204 
00205     def mean(self):
00206         """ mean(self) -> Float
00207 
00208             Returns the mean value for the data points
00209         """
00210         data = self.data.values()
00211         sum = 0.
00212         for item in data:
00213             sum += float(item)
00214         return sum/len(data)
00215 
00216 
00217     def stdev(self):
00218         """ stdev(self) -> Float
00219 
00220             Returns the sample standard deviation for the data
00221         """
00222         data = self.data.values()
00223         m = self.mean()
00224         runtotal = 0.
00225         for entry in data:
00226             runtotal += float((entry - m)**2)
00227         # This is sample standard deviation; population stdev would involve
00228         # division by len(data), rather than len(data)-1
00229         return sqrt(runtotal/(len(data)-1))
00230         
00231 
00232     def __len__(self):
00233         """ __len__(self) -> Int
00234 
00235             Returns the number of points in the data set
00236         """
00237         return len(self.data)
00238 
00239 
00240     def __getitem__(self, index):
00241         """ __getitem__(self, index) -> Float or list of tuples
00242 
00243             Given an integer representing position on the sequence
00244             returns a float - the data value at the passed position.
00245 
00246             If a slice, returns graph data from the region as a list or
00247             (position, value) tuples. Slices with step are not supported.
00248 
00249             Returns the data value at the passed position
00250         """
00251         if isinstance(index, int):
00252             return self.data[index]
00253         elif isinstance(index, slice):
00254             #TODO - Why does it treat the end points both as inclusive?
00255             #This doesn't match Python norms does it?
00256             low = index.start
00257             high = index.stop
00258             if index.step is not None and index.step != 1:
00259                 raise ValueError
00260             positions = self.data.keys()
00261             positions.sort()
00262             outlist = []
00263             for pos in positions:
00264                 if pos >= low and pos <=high:
00265                     outlist.append((pos, self.data[pos]))
00266             return outlist
00267         else:
00268             raise TypeError("Need an integer or a slice")
00269 
00270 
00271     def __str__(self):
00272         """ __str__(self) -> ""
00273 
00274             Returns a string describing the graph data
00275         """
00276         outstr = ["\nGraphData: %s, ID: %s" % (self.name, self.id)]
00277         outstr.append("Number of points: %d" % len(self.data))
00278         outstr.append("Mean data value: %s" % self.mean())
00279         outstr.append("Sample SD: %.3f" % self.stdev())
00280         outstr.append("Minimum: %s\n1Q: %s\n2Q: %s\n3Q: %s\nMaximum: %s" % self.quartiles())
00281         outstr.append("Sequence Range: %s..%s" % self.range())
00282         return "\n".join(outstr)
00283 
00284