Back to index

python-biopython  1.60
test_GenomeDiagram.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 """Tests for GenomeDiagram general functionality.
00006 """
00007 
00008 ##########
00009 # IMPORTS
00010 
00011 # Builtins
00012 import os
00013 import unittest
00014 import math
00015 
00016 # Do we have ReportLab?  Raise error if not present.
00017 from Bio import MissingPythonDependencyError
00018 try:
00019     from reportlab.lib import colors
00020     from reportlab.pdfbase import pdfmetrics
00021     from reportlab.pdfbase.ttfonts import TTFont
00022     from reportlab.lib.units import cm
00023 except ImportError:
00024     raise MissingPythonDependencyError(
00025             "Install reportlab if you want to use Bio.Graphics.")
00026 
00027 # Biopython core
00028 from Bio import SeqIO
00029 from Bio.SeqFeature import SeqFeature, FeatureLocation
00030 from Bio import SeqUtils
00031 
00032 # Bio.Graphics.GenomeDiagram
00033 from Bio.Graphics.GenomeDiagram import FeatureSet, GraphSet, Track, Diagram
00034 from Bio.Graphics.GenomeDiagram import CrossLink
00035 #from Bio.Graphics.GenomeDiagram.Utilities import *
00036 
00037 #Currently private, but we test them here:
00038 from Bio.Graphics.GenomeDiagram._Graph import GraphData
00039 from Bio.Graphics.GenomeDiagram._Colors import ColorTranslator
00040 
00041 def fill_and_border(base_color, alpha=0.5):
00042     try:
00043         c = base_color.clone()
00044         c.alpha = alpha
00045         return c, base_color
00046     except AttributeError:
00047         #Old ReportLab, no transparency and/or no clone
00048         return base_color, base_color
00049 
00050 ###############################################################################
00051 # Utility functions for graph plotting, originally in GenomeDiagram.Utilities #
00052 # See Bug 2705 for discussion on where to put these functions in Biopython... #
00053 ###############################################################################
00054 def apply_to_window(sequence, window_size, function, step=None):
00055     """ apply_to_window(sequence, window_size, function) -> [(int, float),(int, float),...]
00056 
00057         o sequence      Bio.Seq.Seq object
00058 
00059         o window_size   Int describing the length of sequence to consider
00060 
00061         o step          Int describing the step to take between windows
00062                         (default = window_size//2)
00063 
00064         o function      Method or function that accepts a Bio.Seq.Seq object
00065                         as its sole argument and returns a single value
00066 
00067         Returns a list of (position, value) tuples for fragments of the passed
00068         sequence of length window_size (stepped by step), calculated by the
00069         passed function.  Returned positions are the midpoint of each window.
00070     """
00071     seqlen = len(sequence)      # Total length of sequence to be used
00072     if step is None:    # No step specified, so use half window-width or 1 if larger
00073         step = max(window_size//2, 1)
00074     else:               # Use specified step, or 1 if greater
00075         step = max(step, 1)
00076 
00077     results = []    # Holds (position, value) results
00078 
00079     # Perform the passed function on as many windows as possible, short of
00080     # overrunning the sequence
00081     pos = 0
00082     while pos < seqlen-window_size+1:
00083         # Obtain sequence fragment
00084         start, middle, end = pos, (pos+window_size+pos)//2, pos+window_size
00085         fragment = sequence[start:end]
00086         # Apply function to the sequence fragment
00087         value = function(fragment)
00088         results.append((middle, value)) # Add results to list
00089         # Advance to next fragment
00090         pos += step
00091 
00092     # Use the last available window on the sequence, even if it means
00093     # re-covering old ground
00094     if pos != seqlen - window_size:
00095         # Obtain sequence fragment
00096         pos = seqlen - window_size
00097         start, middle, end = pos, (pos+window_size+pos)//2, pos+window_size
00098         fragment = sequence[start:end]
00099         # Apply function to sequence fragment
00100         value = function(fragment)
00101         results.append((middle, value)) # Add results to list
00102         
00103     # Check on last sequence
00104     #print fragment
00105     #print seq[-100:]
00106     return results      # Return the list of (position, value) results
00107 
00108 def calc_gc_content(sequence):
00109     """ calc_gc_content(sequence)
00110 
00111         o sequence  A Bio.Seq.Seq object
00112 
00113         Returns the % G+C content in a passed sequence
00114     """
00115     d = {}
00116     for nt in ['A','T','G','C']:
00117         d[nt] = sequence.count(nt) + sequence.count(nt.lower())
00118     gc = d.get('G',0) + d.get('C',0)
00119 
00120     if gc == 0: return 0
00121     #print gc*100.0/(d['A'] +d['T'] + gc)
00122     return gc*1./(d['A'] +d['T'] + gc)
00123 
00124 
00125 def calc_at_content(sequence):
00126     """ calc_at_content(sequence)
00127 
00128         o sequence  A Bio.Seq.Seq object
00129 
00130         Returns the % A+T content in a passed sequence
00131     """
00132     d = {}
00133     for nt in ['A','T','G','C']:
00134         d[nt] = sequence.count(nt) + sequence.count(nt.lower())
00135     at = d.get('A',0) + d.get('T',0)
00136 
00137     if at == 0: return 0
00138     return at*1./(d['G'] +d['G'] + at)
00139 
00140 
00141 def calc_gc_skew(sequence):
00142     """ calc_gc_skew(sequence)
00143 
00144         o sequence   A Bio.Seq.Seq object
00145 
00146         Returns the (G-C)/(G+C) GC skew in a passed sequence
00147     """
00148     g = sequence.count('G') + sequence.count('g')
00149     c = sequence.count('C') + sequence.count('c')
00150     if g+c == 0:
00151         return 0.0 #TODO - return NaN or None here?
00152     else:
00153         return (g-c)/float(g+c)
00154 
00155 
00156 def calc_at_skew(sequence):
00157     """ calc_at_skew(sequence)
00158 
00159         o sequence   A Bio.Seq.Seq object
00160 
00161         Returns the (A-T)/(A+T) AT skew in a passed sequence
00162     """
00163     a = sequence.count('A') + sequence.count('a')
00164     t = sequence.count('T') + sequence.count('t')
00165     if a+t == 0:
00166         return 0.0 #TODO - return NaN or None here?
00167     else:
00168         return (a-t)/float(a+t)
00169 
00170 def calc_dinucleotide_counts(sequence):
00171     """Returns the total count of di-nucleotides repeats (e.g. "AA", "CC").
00172 
00173     This is purely for the sake of generating some non-random sequence
00174     based score for plotting, with no expected biological meaning.
00175 
00176     NOTE - Only considers same case pairs.
00177     NOTE - "AA" scores 1, "AAA" scores 2, "AAAA" scores 3 etc.
00178     """
00179     total = 0
00180     for letter in "ACTGUactgu":
00181         total += sequence.count(letter+letter)
00182     return total
00183     
00184 
00185 ###############################################################################
00186 # End of utility functions for graph plotting                                 #
00187 ###############################################################################
00188 
00189 # Tests
00190 class TrackTest(unittest.TestCase):
00191     # TODO Bring code from Track.py, unsure about what test does
00192     pass
00193 
00194 class ColorsTest(unittest.TestCase):
00195     def test_color_conversions(self):
00196         """Test color translations.
00197         """
00198         translator = ColorTranslator()
00199         
00200         # Does the translate method correctly convert the passed argument?
00201         assert translator.float1_color((0.5, 0.5, 0.5)) == translator.translate((0.5, 0.5, 0.5)), \
00202             "Did not correctly translate colour from floating point RGB tuple"
00203         assert translator.int255_color((1, 75, 240)) == translator.translate((1, 75, 240)), \
00204             "Did not correctly translate colour from integer RGB tuple"
00205         assert translator.artemis_color(7) == translator.translate(7), \
00206             "Did not correctly translate colour from Artemis colour scheme"                        
00207         assert translator.scheme_color(2) == translator.translate(2), \
00208             "Did not correctly translate colour from user-defined colour scheme"
00209 
00210             
00211 class GraphTest(unittest.TestCase):
00212     def test_limits(self):
00213         """Check line graphs."""
00214         #TODO - Fix GD so that the same min/max is used for all three lines?
00215         points = 1000
00216         scale = math.pi * 2.0 / points
00217         data1 = [math.sin(x*scale) for x in range(points)]
00218         data2 = [math.cos(x*scale) for x in range(points)]
00219         data3 = [2*math.sin(2*x*scale) for x in range(points)]
00220         
00221         gdd = Diagram('Test Diagram', circular=False,
00222                       y=0.01, yt=0.01, yb=0.01,
00223                       x=0.01, xl=0.01, xr=0.01)
00224         gdt_data = gdd.new_track(1, greytrack=False)
00225         gds_data = gdt_data.new_set("graph")
00226         for data_values, name, color in zip([data1,data2,data3],
00227                                             ["sin", "cos", "2sin2"],
00228                                             ["red","green","blue"]):
00229             data = zip(range(points), data_values)
00230             gds_data.new_graph(data, "", style="line",
00231                                color = color, altcolor = color,
00232                                center = 0)
00233 
00234         gdd.draw(format='linear',
00235                  tracklines=False,
00236                  pagesize=(15*cm,15*cm),
00237                  fragments=1,
00238                  start=0, end=points)
00239         gdd.write(os.path.join('Graphics', "line_graph.pdf"), "pdf")
00240         #Circular diagram
00241         gdd.draw(tracklines=False,
00242                  pagesize=(15*cm,15*cm),
00243                  circular=True, #Data designed to be periodic
00244                  start=0, end=points, circle_core=0.5)
00245         gdd.write(os.path.join('Graphics', "line_graph_c.pdf"), "pdf")
00246         
00247     def test_slicing(self):
00248         """Check GraphData slicing."""
00249         gd = GraphData()
00250         gd.set_data([(1, 10), (5, 15), (20, 40)])
00251         gd.add_point((10, 20))
00252         
00253         assert gd[4:16] == [(5, 15), (10, 20)], \
00254                 "Unable to insert and retrieve points correctly"
00255 
00256 
00257 class LabelTest(unittest.TestCase):
00258     """Check label positioning."""
00259     def setUp(self):
00260         self.gdd = Diagram('Test Diagram', circular=False,
00261                            y=0.01, yt=0.01, yb=0.01,
00262                            x=0.01, xl=0.01, xr=0.01)
00263 
00264     def finish(self, name, circular=True):
00265         #And draw it...
00266         tracks = len(self.gdd.tracks)
00267         #Work arround the page orientation code being too clever
00268         #and flipping the h & w round:
00269         if tracks <= 3:
00270             orient = "landscape"
00271         else:
00272             orient = "portrait"
00273         self.gdd.draw(format='linear', orientation=orient,
00274                       tracklines=False,
00275                       pagesize=(15*cm,5*cm*tracks),
00276                       fragments=1,
00277                       start=0, end=400)
00278         self.gdd.write(os.path.join('Graphics', name+".pdf"), "pdf")
00279         #For the tutorial this might be useful:
00280         #self.gdd.write(os.path.join('Graphics', name+".png"), "png")
00281         if circular:
00282             #Circular diagram
00283             self.gdd.draw(tracklines=False,
00284                           pagesize=(15*cm,15*cm),
00285                           fragments=1,
00286                           circle_core=0.5,
00287                           start=0, end=400)
00288             self.gdd.write(os.path.join('Graphics', name+"_c.pdf"), "pdf")
00289     
00290     def add_track_with_sigils(self, **kwargs):
00291         self.gdt_features = self.gdd.new_track(1, greytrack=False)
00292         self.gds_features = self.gdt_features.new_set()
00293         for i in range(18):
00294             start = int((400 * i)/18.0)
00295             end = start + 17
00296             if i % 3 == 0:
00297                 strand=None
00298                 name = "Strandless"
00299                 color=colors.orange
00300             elif i % 3 == 1:
00301                 strand=+1
00302                 name="Forward"
00303                 color=colors.red
00304             else:
00305                 strand = -1
00306                 name="Reverse"
00307                 color=colors.blue
00308             feature = SeqFeature(FeatureLocation(start, end), strand=strand)
00309             self.gds_features.add_feature(feature, name=name,
00310                                           color=color, label=True, **kwargs)
00311 
00312     def test_label_default(self):
00313         """Feature labels - default."""
00314         self.add_track_with_sigils()
00315         self.finish("labels_default")
00316 
00317 class SigilsTest(unittest.TestCase):
00318     """Check the different feature sigils.
00319 
00320     These figures are intended to be used in the Tutorial..."""
00321     def setUp(self):
00322         self.gdd = Diagram('Test Diagram', circular=False,
00323                            y=0.01, yt=0.01, yb=0.01,
00324                            x=0.01, xl=0.01, xr=0.01)
00325 
00326     def add_track_with_sigils(self, **kwargs):
00327         #Add a track of features,
00328         self.gdt_features = self.gdd.new_track(1, greytrack=False)
00329         #We'll just use one feature set for these features,
00330         self.gds_features = self.gdt_features.new_set()
00331         #Add three features to show the strand options,
00332         feature = SeqFeature(FeatureLocation(25, 125), strand=+1)
00333         self.gds_features.add_feature(feature, name="Forward", **kwargs)
00334         feature = SeqFeature(FeatureLocation(150, 250), strand=None)
00335         self.gds_features.add_feature(feature, name="Strandless", **kwargs)
00336         feature = SeqFeature(FeatureLocation(275, 375), strand=-1)
00337         self.gds_features.add_feature(feature, name="Reverse", **kwargs)
00338 
00339     def finish(self, name, circular=True):
00340         #And draw it...
00341         tracks = len(self.gdd.tracks)
00342         #Work arround the page orientation code being too clever
00343         #and flipping the h & w round:
00344         if tracks <= 3:
00345             orient = "landscape"
00346         else:
00347             orient = "portrait"
00348         self.gdd.draw(format='linear', orientation=orient,
00349                       tracklines=False,
00350                       pagesize=(15*cm,5*cm*tracks),
00351                       fragments=1,
00352                       start=0, end=400)
00353         self.gdd.write(os.path.join('Graphics', name+".pdf"), "pdf")
00354         #For the tutorial this might be useful:
00355         #self.gdd.write(os.path.join('Graphics', name+".png"), "png")
00356         if circular:
00357             #Circular diagram
00358             self.gdd.draw(tracklines=False,
00359                           pagesize=(15*cm,15*cm),
00360                           fragments=1,
00361                           circle_core=0.5,
00362                           start=0, end=400)
00363             self.gdd.write(os.path.join('Graphics', name+"_c.pdf"), "pdf")
00364 
00365     def test_labels(self):
00366         """Feature labels."""
00367         self.add_track_with_sigils(label=True)
00368         self.add_track_with_sigils(label=True, color="green",
00369                                    label_size=25, label_angle=0)
00370         self.add_track_with_sigils(label=True, color="purple",
00371                                    label_position="end",
00372                                    label_size=4, label_angle=90)
00373         self.add_track_with_sigils(label=True, color="blue",
00374                                    label_position="middle",
00375                                    label_size=6, label_angle=-90)
00376         self.assertEqual(len(self.gdd.tracks), 4)
00377         self.finish("GD_sigil_labels", circular=False)
00378 
00379     def test_arrow_shafts(self):
00380         """Feature arrow sigils, varying shafts."""
00381         self.add_track_with_sigils(sigil="ARROW")
00382         self.add_track_with_sigils(sigil="ARROW", color="brown",
00383                                    arrowshaft_height=1.0)
00384         self.add_track_with_sigils(sigil="ARROW", color="teal",
00385                                    arrowshaft_height=0.2)
00386         self.add_track_with_sigils(sigil="ARROW", color="darkgreen",
00387                                    arrowshaft_height=0.1)
00388         self.assertEqual(len(self.gdd.tracks), 4)
00389         self.finish("GD_sigil_arrow_shafts")        
00390 
00391     def test_arrow_heads(self):
00392         """Feature arrow sigils, varying heads."""
00393         self.add_track_with_sigils(sigil="ARROW")
00394         self.add_track_with_sigils(sigil="ARROW", color="blue",
00395                                    arrowhead_length=0.25)
00396         self.add_track_with_sigils(sigil="ARROW", color="orange",
00397                                    arrowhead_length=1)
00398         self.add_track_with_sigils(sigil="ARROW", color="red",
00399                                    arrowhead_length=10000) #Triangles
00400         self.assertEqual(len(self.gdd.tracks), 4)
00401         self.finish("GD_sigil_arrows")
00402 
00403     def test_small_arrow_heads(self):
00404         """Feature arrow sigil heads within bounding box."""
00405         #Add a track of features, bigger height to emphasise any sigil errors
00406         self.gdt_features = self.gdd.new_track(1, greytrack=True, height=3)
00407         #We'll just use one feature set for these features,
00408         self.gds_features = self.gdt_features.new_set()
00409         #Green arrows just have small heads (meaning if there is a mitre
00410         #it will escape the bounding box).  Red arrows are small triangles.
00411         feature = SeqFeature(FeatureLocation(15, 30), strand=+1)
00412         self.gds_features.add_feature(feature, color="grey")
00413         self.gds_features.add_feature(feature, name="Forward", sigil="ARROW",
00414                                       arrowhead_length=0.05)
00415         feature = SeqFeature(FeatureLocation(55, 60), strand=+1)
00416         self.gds_features.add_feature(feature, color="grey")
00417         self.gds_features.add_feature(feature, name="Forward", sigil="ARROW",
00418                                       arrowhead_length=1000, color="red")
00419         feature = SeqFeature(FeatureLocation(75, 125), strand=+1)
00420         self.gds_features.add_feature(feature, color="grey")
00421         self.gds_features.add_feature(feature, name="Forward", sigil="ARROW",
00422                                       arrowhead_length=0.05)
00423         feature = SeqFeature(FeatureLocation(140, 155), strand=None)
00424         self.gds_features.add_feature(feature, color="grey")
00425         self.gds_features.add_feature(feature, name="Strandless", sigil="ARROW",
00426                                       arrowhead_length=0.05)
00427         feature = SeqFeature(FeatureLocation(180, 185), strand=None)
00428         self.gds_features.add_feature(feature, color="grey")
00429         self.gds_features.add_feature(feature, name="Strandless", sigil="ARROW",
00430                                       arrowhead_length=1000, color="red")
00431         feature = SeqFeature(FeatureLocation(200, 250), strand=None)
00432         self.gds_features.add_feature(feature, color="grey")
00433         self.gds_features.add_feature(feature, name="Strandless", sigil="ARROW",
00434                                       arrowhead_length=0.05)
00435         feature = SeqFeature(FeatureLocation(265, 280), strand=-1)
00436         self.gds_features.add_feature(feature, name="Reverse", sigil="ARROW",
00437                                       arrowhead_length=0.05)
00438         feature = SeqFeature(FeatureLocation(305, 310), strand=-1)
00439         self.gds_features.add_feature(feature, color="grey")
00440         self.gds_features.add_feature(feature, name="Reverse", sigil="ARROW",
00441                                       arrowhead_length=1000, color="red")
00442         feature = SeqFeature(FeatureLocation(325, 375), strand=-1)
00443         self.gds_features.add_feature(feature, color="grey")
00444         self.gds_features.add_feature(feature, name="Reverse", sigil="ARROW",
00445                                       arrowhead_length=0.05)
00446         self.finish("GD_sigil_arrows_small")
00447 
00448     def test_long_arrow_heads(self):
00449         """Feature arrow sigil heads within bounding box."""
00450         #Add a track of features, bigger height to emphasise any sigil errors
00451         self.gdt_features = self.gdd.new_track(1, greytrack=True, height=3)
00452         #We'll just use one feature set for these features,
00453         self.gds_features = self.gdt_features.new_set()
00454         feature = SeqFeature(FeatureLocation(25, 375), strand=+1)
00455         self.gds_features.add_feature(feature, color="lightblue")
00456         self.gds_features.add_feature(feature, name="Forward", sigil="ARROW",
00457                                       color="blue", arrowhead_length=2.0)
00458         feature = SeqFeature(FeatureLocation(25, 375), strand=-1)
00459         self.gds_features.add_feature(feature, color="pink")
00460         self.gds_features.add_feature(feature, name="Reverse", sigil="ARROW",
00461                                       color="red", arrowhead_length=2.0)
00462         #Add another track of features, bigger height to emphasise any sigil errors
00463         self.gdt_features = self.gdd.new_track(1, greytrack=True, height=3)
00464         #We'll just use one feature set for these features,
00465         self.gds_features = self.gdt_features.new_set()
00466         feature = SeqFeature(FeatureLocation(25, 375), strand=None)
00467         self.gds_features.add_feature(feature, color="lightgreen")
00468         self.gds_features.add_feature(feature, name="Standless", sigil="ARROW",
00469                                       color="green", arrowhead_length=2.0)
00470         self.finish("GD_sigil_arrows_long")
00471 
00472 class DiagramTest(unittest.TestCase):
00473     """Creating feature sets, graph sets, tracks etc individually for the diagram."""
00474     def setUp(self):
00475         """Test setup, just loads a GenBank file as a SeqRecord."""
00476         handle = open(os.path.join("GenBank","NC_005816.gb"), 'r')
00477         self.record = SeqIO.read(handle, "genbank")
00478         handle.close()
00479 
00480     def test_write_arguments(self):
00481         """Check how the write methods respond to output format arguments."""
00482         gdd = Diagram('Test Diagram')
00483         gdd.drawing = None #Hack - need the ReportLab drawing object to be created.
00484         filename = os.path.join("Graphics","error.txt")
00485         #We (now) allow valid formats in any case.
00486         for output in ["XXX","xxx",None,123,5.9]:
00487             try:
00488                 gdd.write(filename, output)
00489                 assert False, \
00490                        "Should have rejected %s as an output format" % output
00491             except ValueError, e:
00492                 #Good!
00493                 pass
00494             try:
00495                 gdd.write_to_string(output)
00496                 assert False, \
00497                        "Should have rejected %s as an output format" % output
00498             except ValueError, e:
00499                 #Good!
00500                 pass
00501 
00502     def test_partial_diagram(self):
00503         """construct and draw SVG and PDF for just part of a SeqRecord."""
00504         genbank_entry = self.record
00505         start = 6500
00506         end = 8750
00507         
00508         gdd = Diagram('Test Diagram',
00509                       #For the circular diagram we don't want a closed cirle:
00510                       circular=False,
00511                       )
00512         #Add a track of features,
00513         gdt_features = gdd.new_track(1, greytrack=True,
00514                                      name="CDS Features",
00515                                      scale_largetick_interval=1000,
00516                                      scale_smalltick_interval=100,
00517                                      scale_format = "SInt",
00518                                      greytrack_labels=False,
00519                                      height=0.5)
00520         #We'll just use one feature set for these features,
00521         gds_features = gdt_features.new_set()
00522         for feature in genbank_entry.features:
00523             if feature.type != "CDS":
00524                 #We're going to ignore these.
00525                 continue
00526             if feature.location.end.position < start:
00527                 #Out of frame (too far left)
00528                 continue
00529             if feature.location.start.position > end:
00530                 #Out of frame (too far right)
00531                 continue
00532 
00533             #This URL should work in SVG output from recent versions
00534             #of ReportLab.  You need ReportLab 2.4 or later
00535             try :
00536                 url = "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi"+\
00537                       "?db=protein&id=%s" % feature.qualifiers["protein_id"][0]
00538             except KeyError :
00539                 url = None
00540                 
00541             #Note that I am using strings for color names, instead
00542             #of passing in color objects.  This should also work!
00543             if len(gds_features) % 2 == 0:
00544                 color = "white" #for testing the automatic black border!
00545             else:
00546                 color = "red"
00547             #Checking it can cope with the old UK spelling colour.
00548             #Also show the labels perpendicular to the track.
00549             gds_features.add_feature(feature, colour=color,
00550                                      url = url,
00551                                      sigil="ARROW",
00552                                      label_position = "start",
00553                                      label_size = 8,
00554                                      label_angle = 90,
00555                                      label=True)
00556 
00557         #And draw it...
00558         gdd.draw(format='linear', orientation='landscape',
00559                  tracklines=False, pagesize=(10*cm,6*cm), fragments=1,
00560                  start=start, end=end)
00561         output_filename = os.path.join('Graphics', 'GD_region_linear.pdf')
00562         gdd.write(output_filename, 'PDF')
00563 
00564         #Also check the write_to_string method matches,
00565         #(Note the possible confusion over new lines on Windows)
00566         assert open(output_filename).read().replace("\r\n","\n") \
00567                == gdd.write_to_string('PDF').replace("\r\n","\n")
00568 
00569         output_filename = os.path.join('Graphics', 'GD_region_linear.svg')
00570         gdd.write(output_filename, 'SVG')
00571 
00572         #Circular with a particular start/end is a bit odd, but by setting
00573         #circular=False (above) a sweep of 90% is used (a wedge is left out)
00574         gdd.draw(format='circular',
00575                  tracklines=False, pagesize=(10*cm,10*cm),
00576                  start=start, end=end)
00577         output_filename = os.path.join('Graphics', 'GD_region_circular.pdf')
00578         gdd.write(output_filename, 'PDF')
00579         output_filename = os.path.join('Graphics', 'GD_region_circular.svg')
00580         gdd.write(output_filename, 'SVG')
00581 
00582     def test_diagram_via_methods_pdf(self):
00583         """Construct and draw PDF using method approach."""
00584         genbank_entry = self.record
00585         gdd = Diagram('Test Diagram')
00586 
00587         #Add a track of features,
00588         gdt_features = gdd.new_track(1, greytrack=True,
00589                                      name="CDS Features", greytrack_labels=0,
00590                                      height=0.5)
00591         #We'll just use one feature set for the genes and misc_features,
00592         gds_features = gdt_features.new_set()
00593         for feature in genbank_entry.features:
00594             if feature.type == "gene":
00595                 if len(gds_features) % 2 == 0:
00596                     color = "blue"
00597                 else:
00598                     color = "lightblue"
00599                 gds_features.add_feature(feature, color=color,
00600                                             #label_position = "middle",
00601                                             #label_position = "end",
00602                                             label_position = "start",
00603                                             label_size = 11,
00604                                             #label_angle = 90,
00605                                             sigil="ARROW",
00606                                             label=True)
00607 
00608         #I want to include some strandless features, so for an example
00609         #will use EcoRI recognition sites etc.
00610         for site, name, color in [("GAATTC","EcoRI","green"),
00611                                   ("CCCGGG","SmaI","orange"),
00612                                   ("AAGCTT","HindIII","red"),
00613                                   ("GGATCC","BamHI","purple")]:
00614             index = 0
00615             while True:
00616                 index  = genbank_entry.seq.find(site, start=index)
00617                 if index == -1 : break
00618                 feature = SeqFeature(FeatureLocation(index, index+6), strand=None)
00619 
00620                 #This URL should work in SVG output from recent versions
00621                 #of ReportLab.  You need ReportLab 2.4 or later
00622                 try :
00623                     url = "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi"+\
00624                           "?db=protein&id=%s" % feature.qualifiers["protein_id"][0]
00625                 except KeyError :
00626                     url = None
00627 
00628                 gds_features.add_feature(feature, color = color,
00629                                          url = url,
00630                                          #label_position = "middle",
00631                                          label_size = 10,
00632                                          label_color = color,
00633                                          #label_angle = 90,
00634                                          name = name,
00635                                          label = True)
00636                 index += len(site)
00637             del index
00638 
00639         #Now add a graph track...
00640         gdt_at_gc = gdd.new_track(2, greytrack=True,
00641                                   name="AT and GC content",
00642                                   greytrack_labels=True)
00643         gds_at_gc = gdt_at_gc.new_set(type="graph")
00644 
00645         step = len(genbank_entry)//200
00646         gds_at_gc.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step),
00647                         'GC content', style='line', 
00648                         color=colors.lightgreen,
00649                         altcolor=colors.darkseagreen)
00650         gds_at_gc.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step),
00651                         'AT content', style='line', 
00652                         color=colors.orange,
00653                         altcolor=colors.red)
00654         
00655         #Finally draw it in both formats,
00656         gdd.draw(format='linear', orientation='landscape',
00657              tracklines=0, pagesize='A4', fragments=3)
00658         output_filename = os.path.join('Graphics', 'GD_by_meth_linear.pdf')
00659         gdd.write(output_filename, 'PDF')
00660 
00661         gdd.draw(format='circular', tracklines=False, circle_core=0.8,
00662                  pagesize=(20*cm,20*cm), circular=True)
00663         output_filename = os.path.join('Graphics', 'GD_by_meth_circular.pdf')
00664         gdd.write(output_filename, 'PDF')
00665 
00666     def test_diagram_via_object_pdf(self):
00667         """Construct and draw PDF using object approach."""
00668         genbank_entry = self.record
00669         gdd = Diagram('Test Diagram')
00670 
00671         gdt1 = Track('CDS features', greytrack=True,
00672                      scale_largetick_interval=1e4,
00673                      scale_smalltick_interval=1e3,
00674                      greytrack_labels=10,
00675                      greytrack_font_color="red",
00676                      scale_format = "SInt")
00677         gdt2 = Track('gene features', greytrack=1,
00678                    scale_largetick_interval=1e4)
00679 
00680         #First add some feature sets:
00681         gdfsA = FeatureSet(name='CDS backgrounds')
00682         gdfsB = FeatureSet(name='gene background')
00683 
00684 
00685         gdfs1 = FeatureSet(name='CDS features')
00686         gdfs2 = FeatureSet(name='gene features')
00687         gdfs3 = FeatureSet(name='misc_features')
00688         gdfs4 = FeatureSet(name='repeat regions')
00689 
00690         prev_gene = None
00691         cds_count = 0
00692         for feature in genbank_entry.features:
00693             if feature.type == 'CDS':
00694                 cds_count += 1
00695                 if prev_gene:
00696                     #Assuming it goes with this CDS!
00697                     if cds_count % 2 == 0:
00698                         dark, light = colors.peru, colors.tan
00699                     else:
00700                         dark, light = colors.burlywood, colors.bisque
00701                     #Background for CDS,
00702                     a = gdfsA.add_feature(SeqFeature(FeatureLocation(feature.location.start, feature.location.end, strand=0)),
00703                                          color=dark)
00704                     #Background for gene,
00705                     b = gdfsB.add_feature(SeqFeature(FeatureLocation(prev_gene.location.start, prev_gene.location.end, strand=0)),
00706                                           color=dark)
00707                     #Cross link,
00708                     gdd.cross_track_links.append(CrossLink(a, b, light, dark))
00709                     prev_gene = None
00710             if feature.type == 'gene':
00711                 prev_gene = feature
00712 
00713         #Some cross links on the same linear diagram fragment,
00714         f, c = fill_and_border(colors.red)
00715         a = gdfsA.add_feature(SeqFeature(FeatureLocation(2220,2230)), color=f, border=c)
00716         b = gdfsB.add_feature(SeqFeature(FeatureLocation(2200,2210)), color=f, border=c)
00717         gdd.cross_track_links.append(CrossLink(a, b, f, c))
00718 
00719         f, c = fill_and_border(colors.blue)
00720         a = gdfsA.add_feature(SeqFeature(FeatureLocation(2150,2200)), color=f, border=c)
00721         b = gdfsB.add_feature(SeqFeature(FeatureLocation(2220,2290)), color=f, border=c)
00722         gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True))
00723 
00724         f, c = fill_and_border(colors.green)
00725         a = gdfsA.add_feature(SeqFeature(FeatureLocation(2250,2560)), color=f, border=c)
00726         b = gdfsB.add_feature(SeqFeature(FeatureLocation(2300,2860)), color=f, border=c)
00727         gdd.cross_track_links.append(CrossLink(a, b, f, c))
00728 
00729         #Some cross links where both parts are saddling the linear diagram fragment boundary,
00730         f, c = fill_and_border(colors.red)
00731         a = gdfsA.add_feature(SeqFeature(FeatureLocation(3155,3250)), color=f, border=c)
00732         b = gdfsB.add_feature(SeqFeature(FeatureLocation(3130,3300)), color=f, border=c)
00733         gdd.cross_track_links.append(CrossLink(a, b, f, c))
00734         #Nestled within that (drawn on top),
00735         f, c = fill_and_border(colors.blue)
00736         a = gdfsA.add_feature(SeqFeature(FeatureLocation(3160,3275)), color=f, border=c)
00737         b = gdfsB.add_feature(SeqFeature(FeatureLocation(3180,3225)), color=f, border=c)
00738         gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True))
00739 
00740         #Some cross links where two features are on either side of the linear diagram fragment boundary,
00741         f, c = fill_and_border(colors.green)
00742         a = gdfsA.add_feature(SeqFeature(FeatureLocation(6450,6550)), color=f, border=c)
00743         b = gdfsB.add_feature(SeqFeature(FeatureLocation(6265,6365)), color=f, border=c)
00744         gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c))
00745         f, c = fill_and_border(colors.gold)
00746         a = gdfsA.add_feature(SeqFeature(FeatureLocation(6265,6365)), color=f, border=c)
00747         b = gdfsB.add_feature(SeqFeature(FeatureLocation(6450,6550)), color=f, border=c)
00748         gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c))
00749         f, c = fill_and_border(colors.red)
00750         a = gdfsA.add_feature(SeqFeature(FeatureLocation(6275,6375)), color=f, border=c)
00751         b = gdfsB.add_feature(SeqFeature(FeatureLocation(6430,6530)), color=f, border=c)
00752         gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True))
00753         f, c = fill_and_border(colors.blue)
00754         a = gdfsA.add_feature(SeqFeature(FeatureLocation(6430,6530)), color=f, border=c)
00755         b = gdfsB.add_feature(SeqFeature(FeatureLocation(6275,6375)), color=f, border=c)
00756         gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True))
00757 
00758 
00759         cds_count = 0
00760         for feature in genbank_entry.features:
00761             if feature.type == 'CDS':
00762                 cds_count += 1
00763                 if cds_count % 2 == 0:
00764                     gdfs1.add_feature(feature, color=colors.pink, sigil="ARROW")
00765                 else:
00766                     gdfs1.add_feature(feature, color=colors.red, sigil="ARROW")
00767 
00768             if feature.type == 'gene':
00769                 #Note we set the colour of ALL the genes later on as a test,
00770                 gdfs2.add_feature(feature, sigil="ARROW")
00771 
00772             if feature.type == 'misc_feature':
00773                 gdfs3.add_feature(feature, color=colors.orange)
00774 
00775             if feature.type == 'repeat_region':
00776                 gdfs4.add_feature(feature, color=colors.purple)
00777 
00778         #gdd.cross_track_links = gdd.cross_track_links[:1]
00779 
00780         gdfs1.set_all_features('label', 1)
00781         gdfs2.set_all_features('label', 1)
00782         gdfs3.set_all_features('label', 1)
00783         gdfs4.set_all_features('label', 1)
00784 
00785         gdfs3.set_all_features('hide', 0)
00786         gdfs4.set_all_features('hide', 0)
00787 
00788         #gdfs1.set_all_features('color', colors.red)
00789         gdfs2.set_all_features('color', colors.blue)
00790 
00791         gdt1.add_set(gdfsA) #Before CDS so under them!
00792         gdt1.add_set(gdfs1)
00793 
00794         gdt2.add_set(gdfsB) #Before genes so under them!
00795         gdt2.add_set(gdfs2)
00796                 
00797         gdt3 = Track('misc features and repeats', greytrack=1,
00798                    scale_largetick_interval=1e4)
00799         gdt3.add_set(gdfs3)
00800         gdt3.add_set(gdfs4)
00801 
00802         #Now add some graph sets:
00803 
00804         #Use a fairly large step so we can easily tell the difference
00805         #between the bar and line graphs.
00806         step = len(genbank_entry)//200
00807         gdgs1 = GraphSet('GC skew')
00808         
00809         graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step)
00810         gdgs1.new_graph(graphdata1, 'GC Skew', style='bar',
00811                 color=colors.violet,
00812                 altcolor=colors.purple)
00813         
00814         gdt4 = Track(\
00815                 'GC Skew (bar)',
00816                 height=1.94, greytrack=1,
00817                 scale_largetick_interval=1e4)
00818         gdt4.add_set(gdgs1)
00819 
00820 
00821         gdgs2 = GraphSet('GC and AT Content')
00822         gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step),
00823                         'GC content', style='line', 
00824                         color=colors.lightgreen,
00825                         altcolor=colors.darkseagreen)
00826 
00827         gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step),
00828                         'AT content', style='line', 
00829                         color=colors.orange,
00830                         altcolor=colors.red)    
00831 
00832         gdt5 = Track(\
00833                 'GC Content(green line), AT Content(red line)',
00834                 height=1.94, greytrack=1,
00835                 scale_largetick_interval=1e4)
00836         gdt5.add_set(gdgs2)
00837 
00838         gdgs3 = GraphSet('Di-nucleotide count')
00839         step = len(genbank_entry)//400 #smaller step
00840         gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step),
00841                         'Di-nucleotide count', style='heat', 
00842                         color=colors.red, altcolor=colors.orange)
00843         gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False)
00844         gdt6.add_set(gdgs3)
00845 
00846         #Add the tracks (from both features and graphs)
00847         #Leave some white space in the middle/bottom
00848         gdd.add_track(gdt4, 3) # GC skew
00849         gdd.add_track(gdt5, 4) # GC and AT content
00850         gdd.add_track(gdt1, 5) # CDS features
00851         gdd.add_track(gdt2, 6) # Gene features
00852         gdd.add_track(gdt3, 7) # Misc features and repeat feature
00853         gdd.add_track(gdt6, 8) # Feature depth
00854 
00855         #Finally draw it in both formats, and full view and partial
00856         gdd.draw(format='circular', orientation='landscape',
00857              tracklines=0, pagesize='A0')
00858         output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf')
00859         gdd.write(output_filename, 'PDF')
00860 
00861         gdd.circular=False
00862         gdd.draw(format='circular', orientation='landscape',
00863              tracklines=0, pagesize='A0', start=3000, end=6300)
00864         output_filename = os.path.join('Graphics', 'GD_by_obj_frag_circular.pdf')
00865         gdd.write(output_filename, 'PDF')
00866 
00867         gdd.draw(format='linear', orientation='landscape',
00868              tracklines=0, pagesize='A0', fragments=3)
00869         output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf')
00870         gdd.write(output_filename, 'PDF')
00871 
00872         gdd.set_all_tracks("greytrack_labels", 2)
00873         gdd.draw(format='linear', orientation='landscape',
00874              tracklines=0, pagesize=(30*cm,10*cm), fragments=1,
00875              start=3000, end=6300)
00876         output_filename = os.path.join('Graphics', 'GD_by_obj_frag_linear.pdf')
00877         gdd.write(output_filename, 'PDF')
00878 
00879 if __name__ == "__main__":
00880     runner = unittest.TextTestRunner(verbosity = 2)
00881     unittest.main(testRunner=runner)