Back to index

python-biopython  1.60
_FeatureSet.py
Go to the documentation of this file.
00001 # Copyright 2003-2008 by Leighton Pritchard.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 #
00006 # Contact:       Leighton Pritchard, Scottish Crop Research Institute,
00007 #                Invergowrie, Dundee, Scotland, DD2 5DA, UK
00008 #                L.Pritchard@scri.ac.uk
00009 ################################################################################
00010 #
00011 # Thanks to Peter Cock for the impetus to write the get_features() code to
00012 # subselect Features.
00013 #
00014 ################################################################################
00015 
00016 """ FeatureSet module
00017 
00018     Provides:
00019 
00020     o FeatureSet - container for Feature objects
00021 
00022     For drawing capabilities, this module uses reportlab to draw and write
00023     the diagram:
00024 
00025     http://www.reportlab.com
00026 
00027     For dealing with biological information, the package expects BioPython
00028     objects:
00029 
00030     http://www.biopython.org
00031 """
00032 
00033 #------------------------------------------------------------------------------
00034 # IMPORTS
00035 
00036 # ReportLab
00037 from reportlab.pdfbase import _fontdata
00038 from reportlab.lib import colors
00039 
00040 # GenomeDiagram
00041 from _Feature import Feature
00042 
00043 # Builtins
00044 import re
00045 
00046 #------------------------------------------------------------------------------
00047 # CLASSES
00048 
00049 #------------------------------------------------------------
00050 # FeatureSet
00051 
00052 class FeatureSet(object):
00053     """ FeatureSet
00054 
00055         Provides:
00056 
00057         Methods:
00058 
00059         o __init__(self, set_id=None, name=None) Called on instantiation
00060 
00061         o add_feature(self, feature, color=colors.lightgreen)  Add a Feature
00062                         object to the set
00063 
00064         o del_feature(self, feature_id) Remove a feature from the set, by id
00065 
00066         o set_all_features(self, attr, value)   Set the passed attribute to the
00067                         passed value in all features in the set
00068 
00069         o get_features(self)    Returns a list of Features from the set
00070     
00071         o get_ids(self)     Returns a list of unique ids for features in the set
00072 
00073         o range(self)       Returns the range of bases covered by features in 
00074                             the set
00075 
00076         o to_string(self, verbose=0)    Returns a string describing the set
00077 
00078         o __len__(self)     Returns the length of sequence covered by the set
00079 
00080         o __getitem__(self, key)    Returns a feature from the set, keyed by id
00081 
00082         o __str__(self)     Returns a string describing the set
00083 
00084         Attributes:
00085 
00086         o id    Unique id for the set
00087 
00088         o name  String describing the set
00089     """
00090     def __init__(self, set_id=None, name=None, parent=None):
00091         """ __init__(self, set_id=None, name=None)
00092 
00093             o set_id    Unique id for the set
00094 
00095             o name      String identifying the feature set
00096         """
00097         self.parent = parent
00098         self.id = id            # Unique id for the set
00099         self.next_id = 0       # counter for unique feature ids
00100         self.features = {}     # Holds features, keyed by ID
00101         self.name = name        # String describing the set
00102 
00103 
00104     def add_feature(self, feature, **kwargs):
00105         """ add_feature(self, feature, **args)
00106 
00107             o feature       Bio.SeqFeature object
00108 
00109             o **kwargs      Keyword arguments for Feature.  Named attributes
00110                             of the Feature
00111                                                         
00112 
00113             Add a Bio.SeqFeature object to the diagram (will be stored
00114             internally in a Feature wrapper
00115         """
00116         id = self.next_id                                  # get id number
00117         f = Feature(self, id, feature)
00118         self.features[id] = f # add feature
00119         for key in kwargs:
00120             if key == "colour" or key == "color":
00121                 #Deal with "colour" as a special case by also mapping to color.
00122                 #If Feature.py used a python property we wouldn't need to call
00123                 #set_color explicitly.  However, this is important to make sure
00124                 #every color gets mapped to a colors object - for example color
00125                 #numbers, or strings (may not matter for PDF, but does for PNG).
00126                 self.features[id].set_color(kwargs[key])
00127                 continue
00128             setattr(self.features[id], key, kwargs[key])
00129         self.next_id += 1                                  # increment next id
00130         return f
00131 
00132     def del_feature(self, feature_id):
00133         """ del_feature(self, feature_id)
00134 
00135             o feature_id        Unique id of the feature to delete
00136 
00137             Remove a feature from the set, indicated by its id
00138         """
00139         del self.features[feature_id]
00140 
00141 
00142     def set_all_features(self, attr, value):
00143         """ set_all_features(self, attr, value)
00144 
00145             o attr      An attribute of the Feature class
00146 
00147             o value     The value to set that attribute
00148 
00149             Set the passed attribute of all features in the set to the
00150             passed value
00151         """
00152         changed = 0
00153         for feature in self.features.values():
00154             # If the feature has the attribute, and the value should change
00155             if hasattr(feature, attr):    
00156                 if getattr(feature, attr) != value:
00157                     setattr(feature, attr, value) # set it to the passed value
00158 
00159         #For backwards compatibility, we support both colour and color.
00160         #As a quick hack, make "colour" set both "colour" and "color".
00161         #if attr=="colour":
00162         #    self.set_all_feature("color",value)
00163 
00164 
00165     def get_features(self, attribute=None, value=None, comparator=None):
00166         """ get_features(self, attribute=None, value=None, comparator=None) ->
00167                                             [Feature, Feature, ...]
00168 
00169             o attribute        String, attribute of a Feature object
00170 
00171             o value            The value desired of the attribute
00172 
00173             o comparator       String, how to compare the Feature attribute to the
00174                                passed value
00175             
00176             If no attribute or value is given, return a list of all features in the
00177             feature set.  If both an attribute and value are given, then depending
00178             on the comparator, then a list of all features in the FeatureSet
00179             matching (or not) the passed value will be returned.  Allowed comparators
00180             are: 'startswith', 'not', 'like'.
00181 
00182             The user is expected to make a responsible decision about which feature
00183             attributes to use with which passed values and comparator settings.
00184         """
00185         # If no attribute or value specified, return all features
00186         if attribute is None or value is None:
00187             return self.features.values()
00188         # If no comparator is specified, return all features where the attribute
00189         # value matches that passed
00190         if comparator is None:
00191             return [feature for feature in self.features.values() if\
00192                     getattr(feature, attribute) == value]
00193         # If the comparator is 'not', return all features where the attribute
00194         # value does not match that passed
00195         elif comparator == 'not':
00196             return [feature for feature in self.features.values() if\
00197                     getattr(feature, attribute) != value]
00198         # If the comparator is 'startswith', return all features where the attribute
00199         # value does not match that passed
00200         elif comparator == 'startswith':
00201             return [feature for feature in self.features.values() if\
00202                     getattr(feature, attribute).startswith(value)]
00203         # If the comparator is 'like', use a regular expression search to identify
00204         # features
00205         elif comparator == 'like':
00206             return [feature for feature in self.features.values() if\
00207                     re.search(value, getattr(feature, attribute))]
00208         # As a final option, just return an empty list
00209         return []
00210 
00211 
00212 
00213     def get_ids(self):
00214         """ get_ids(self) -> [int, int, ...]
00215 
00216             Return a list of all ids for the feature set
00217         """
00218         return self.features.keys()
00219 
00220 
00221     def range(self):
00222         """ range(self)
00223 
00224             Returns the lowest and highest base (or mark) numbers as a tuple
00225         """
00226         lows, highs = [], []
00227         for feature in self.features.values():
00228             for start, end in feature.locations:
00229                 lows.append(start)
00230                 highs.append(end)
00231         if len(lows) != 0 and len(highs) != 0:      # Default in case there is 
00232             return (min(lows), max(highs))          # nothing in the set
00233         return 0, 0
00234 
00235 
00236     def to_string(self, verbose=0):
00237         """ to_string(self, verbose=0) -> ""
00238 
00239             o verbose       Boolean indicating whether a short or complete 
00240                             account of the set is required
00241 
00242             Returns a formatted string with information about the set
00243         """
00244         if not verbose:         # Short account only required
00245             return "%s" % self
00246         else:                   # Long account desired
00247             outstr = ["\n<%s: %s>" % (self.__class__, self.name)]
00248             outstr.append("%d features" % len(self.features))
00249             for key in self.features:
00250                 outstr.append("feature: %s" % self.features[key])
00251             return "\n".join(outstr)
00252 
00253     def __len__(self):
00254         """ __len__(self) -> int
00255 
00256             Return the number of features in the set
00257         """
00258         return len(self.features)
00259 
00260 
00261     def __getitem__(self, key):
00262         """ __getitem__(self, key) -> Feature
00263 
00264             Return a feature, keyed by id
00265         """
00266         return self.features[key]
00267 
00268 
00269     def __str__(self):
00270         """ __str__(self) -> ""
00271 
00272             Returns a formatted string with information about the feature set
00273         """
00274         outstr = ["\n<%s: %s %d features>" % (self.__class__, self.name, 
00275                                               len(self.features))]
00276         return "\n".join(outstr)
00277 
00278 ################################################################################
00279 # RUN AS SCRIPT
00280 ################################################################################
00281 
00282 if __name__ == '__main__':
00283 
00284     from Bio import SeqIO
00285     from Bio.SeqFeature import SeqFeature
00286     
00287     genbank_entry = SeqIO.read('/data/Genomes/Bacteria/Nanoarchaeum_equitans/NC_005213.gbk', 'gb')
00288 
00289     # Test code
00290     gdfs = FeatureSet(0, 'Nanoarchaeum equitans CDS')
00291     for feature in genbank_entry.features:
00292         if feature.type == 'CDS':
00293             gdfs.add_feature(feature)
00294 
00295     #print len(gdfs)
00296     #print gdfs.get_ids()
00297     #gdfs.del_feature(560)
00298     #print gdfs.get_ids()
00299     #print gdfs.get_features()
00300     #for feature in gdfs.get_features():
00301     #    print feature.id, feature.start, feature.end
00302     #print gdfs[500]   
00303 
00304