Back to index

python-biopython  1.60
Public Member Functions | Public Attributes
test_GenomeDiagram.DiagramTest Class Reference

List of all members.

Public Member Functions

def setUp
def test_write_arguments
def test_partial_diagram
def test_diagram_via_methods_pdf
def test_diagram_via_object_pdf

Public Attributes

 record

Detailed Description

Creating feature sets, graph sets, tracks etc individually for the diagram.

Definition at line 472 of file test_GenomeDiagram.py.


Member Function Documentation

Test setup, just loads a GenBank file as a SeqRecord.

Definition at line 474 of file test_GenomeDiagram.py.

00474 
00475     def setUp(self):
00476         """Test setup, just loads a GenBank file as a SeqRecord."""
00477         handle = open(os.path.join("GenBank","NC_005816.gb"), 'r')
00478         self.record = SeqIO.read(handle, "genbank")
00479         handle.close()

Here is the call graph for this function:

Construct and draw PDF using method approach.

Definition at line 582 of file test_GenomeDiagram.py.

00582 
00583     def test_diagram_via_methods_pdf(self):
00584         """Construct and draw PDF using method approach."""
00585         genbank_entry = self.record
00586         gdd = Diagram('Test Diagram')
00587 
00588         #Add a track of features,
00589         gdt_features = gdd.new_track(1, greytrack=True,
00590                                      name="CDS Features", greytrack_labels=0,
00591                                      height=0.5)
00592         #We'll just use one feature set for the genes and misc_features,
00593         gds_features = gdt_features.new_set()
00594         for feature in genbank_entry.features:
00595             if feature.type == "gene":
00596                 if len(gds_features) % 2 == 0:
00597                     color = "blue"
00598                 else:
00599                     color = "lightblue"
00600                 gds_features.add_feature(feature, color=color,
00601                                             #label_position = "middle",
00602                                             #label_position = "end",
00603                                             label_position = "start",
00604                                             label_size = 11,
00605                                             #label_angle = 90,
00606                                             sigil="ARROW",
00607                                             label=True)
00608 
00609         #I want to include some strandless features, so for an example
00610         #will use EcoRI recognition sites etc.
00611         for site, name, color in [("GAATTC","EcoRI","green"),
00612                                   ("CCCGGG","SmaI","orange"),
00613                                   ("AAGCTT","HindIII","red"),
00614                                   ("GGATCC","BamHI","purple")]:
00615             index = 0
00616             while True:
00617                 index  = genbank_entry.seq.find(site, start=index)
00618                 if index == -1 : break
00619                 feature = SeqFeature(FeatureLocation(index, index+6), strand=None)
00620 
00621                 #This URL should work in SVG output from recent versions
00622                 #of ReportLab.  You need ReportLab 2.4 or later
00623                 try :
00624                     url = "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi"+\
00625                           "?db=protein&id=%s" % feature.qualifiers["protein_id"][0]
00626                 except KeyError :
00627                     url = None
00628 
00629                 gds_features.add_feature(feature, color = color,
00630                                          url = url,
00631                                          #label_position = "middle",
00632                                          label_size = 10,
00633                                          label_color = color,
00634                                          #label_angle = 90,
00635                                          name = name,
00636                                          label = True)
00637                 index += len(site)
00638             del index
00639 
00640         #Now add a graph track...
00641         gdt_at_gc = gdd.new_track(2, greytrack=True,
00642                                   name="AT and GC content",
00643                                   greytrack_labels=True)
00644         gds_at_gc = gdt_at_gc.new_set(type="graph")
00645 
00646         step = len(genbank_entry)//200
00647         gds_at_gc.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step),
00648                         'GC content', style='line', 
00649                         color=colors.lightgreen,
00650                         altcolor=colors.darkseagreen)
00651         gds_at_gc.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step),
00652                         'AT content', style='line', 
00653                         color=colors.orange,
00654                         altcolor=colors.red)
00655         
00656         #Finally draw it in both formats,
00657         gdd.draw(format='linear', orientation='landscape',
00658              tracklines=0, pagesize='A4', fragments=3)
00659         output_filename = os.path.join('Graphics', 'GD_by_meth_linear.pdf')
00660         gdd.write(output_filename, 'PDF')
00661 
00662         gdd.draw(format='circular', tracklines=False, circle_core=0.8,
00663                  pagesize=(20*cm,20*cm), circular=True)
00664         output_filename = os.path.join('Graphics', 'GD_by_meth_circular.pdf')
00665         gdd.write(output_filename, 'PDF')

Here is the call graph for this function:

Construct and draw PDF using object approach.

Definition at line 666 of file test_GenomeDiagram.py.

00666 
00667     def test_diagram_via_object_pdf(self):
00668         """Construct and draw PDF using object approach."""
00669         genbank_entry = self.record
00670         gdd = Diagram('Test Diagram')
00671 
00672         gdt1 = Track('CDS features', greytrack=True,
00673                      scale_largetick_interval=1e4,
00674                      scale_smalltick_interval=1e3,
00675                      greytrack_labels=10,
00676                      greytrack_font_color="red",
00677                      scale_format = "SInt")
00678         gdt2 = Track('gene features', greytrack=1,
00679                    scale_largetick_interval=1e4)
00680 
00681         #First add some feature sets:
00682         gdfsA = FeatureSet(name='CDS backgrounds')
00683         gdfsB = FeatureSet(name='gene background')
00684 
00685 
00686         gdfs1 = FeatureSet(name='CDS features')
00687         gdfs2 = FeatureSet(name='gene features')
00688         gdfs3 = FeatureSet(name='misc_features')
00689         gdfs4 = FeatureSet(name='repeat regions')
00690 
00691         prev_gene = None
00692         cds_count = 0
00693         for feature in genbank_entry.features:
00694             if feature.type == 'CDS':
00695                 cds_count += 1
00696                 if prev_gene:
00697                     #Assuming it goes with this CDS!
00698                     if cds_count % 2 == 0:
00699                         dark, light = colors.peru, colors.tan
00700                     else:
00701                         dark, light = colors.burlywood, colors.bisque
00702                     #Background for CDS,
00703                     a = gdfsA.add_feature(SeqFeature(FeatureLocation(feature.location.start, feature.location.end, strand=0)),
00704                                          color=dark)
00705                     #Background for gene,
00706                     b = gdfsB.add_feature(SeqFeature(FeatureLocation(prev_gene.location.start, prev_gene.location.end, strand=0)),
00707                                           color=dark)
00708                     #Cross link,
00709                     gdd.cross_track_links.append(CrossLink(a, b, light, dark))
00710                     prev_gene = None
00711             if feature.type == 'gene':
00712                 prev_gene = feature
00713 
00714         #Some cross links on the same linear diagram fragment,
00715         f, c = fill_and_border(colors.red)
00716         a = gdfsA.add_feature(SeqFeature(FeatureLocation(2220,2230)), color=f, border=c)
00717         b = gdfsB.add_feature(SeqFeature(FeatureLocation(2200,2210)), color=f, border=c)
00718         gdd.cross_track_links.append(CrossLink(a, b, f, c))
00719 
00720         f, c = fill_and_border(colors.blue)
00721         a = gdfsA.add_feature(SeqFeature(FeatureLocation(2150,2200)), color=f, border=c)
00722         b = gdfsB.add_feature(SeqFeature(FeatureLocation(2220,2290)), color=f, border=c)
00723         gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True))
00724 
00725         f, c = fill_and_border(colors.green)
00726         a = gdfsA.add_feature(SeqFeature(FeatureLocation(2250,2560)), color=f, border=c)
00727         b = gdfsB.add_feature(SeqFeature(FeatureLocation(2300,2860)), color=f, border=c)
00728         gdd.cross_track_links.append(CrossLink(a, b, f, c))
00729 
00730         #Some cross links where both parts are saddling the linear diagram fragment boundary,
00731         f, c = fill_and_border(colors.red)
00732         a = gdfsA.add_feature(SeqFeature(FeatureLocation(3155,3250)), color=f, border=c)
00733         b = gdfsB.add_feature(SeqFeature(FeatureLocation(3130,3300)), color=f, border=c)
00734         gdd.cross_track_links.append(CrossLink(a, b, f, c))
00735         #Nestled within that (drawn on top),
00736         f, c = fill_and_border(colors.blue)
00737         a = gdfsA.add_feature(SeqFeature(FeatureLocation(3160,3275)), color=f, border=c)
00738         b = gdfsB.add_feature(SeqFeature(FeatureLocation(3180,3225)), color=f, border=c)
00739         gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True))
00740 
00741         #Some cross links where two features are on either side of the linear diagram fragment boundary,
00742         f, c = fill_and_border(colors.green)
00743         a = gdfsA.add_feature(SeqFeature(FeatureLocation(6450,6550)), color=f, border=c)
00744         b = gdfsB.add_feature(SeqFeature(FeatureLocation(6265,6365)), color=f, border=c)
00745         gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c))
00746         f, c = fill_and_border(colors.gold)
00747         a = gdfsA.add_feature(SeqFeature(FeatureLocation(6265,6365)), color=f, border=c)
00748         b = gdfsB.add_feature(SeqFeature(FeatureLocation(6450,6550)), color=f, border=c)
00749         gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c))
00750         f, c = fill_and_border(colors.red)
00751         a = gdfsA.add_feature(SeqFeature(FeatureLocation(6275,6375)), color=f, border=c)
00752         b = gdfsB.add_feature(SeqFeature(FeatureLocation(6430,6530)), color=f, border=c)
00753         gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True))
00754         f, c = fill_and_border(colors.blue)
00755         a = gdfsA.add_feature(SeqFeature(FeatureLocation(6430,6530)), color=f, border=c)
00756         b = gdfsB.add_feature(SeqFeature(FeatureLocation(6275,6375)), color=f, border=c)
00757         gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True))
00758 
00759 
00760         cds_count = 0
00761         for feature in genbank_entry.features:
00762             if feature.type == 'CDS':
00763                 cds_count += 1
00764                 if cds_count % 2 == 0:
00765                     gdfs1.add_feature(feature, color=colors.pink, sigil="ARROW")
00766                 else:
00767                     gdfs1.add_feature(feature, color=colors.red, sigil="ARROW")
00768 
00769             if feature.type == 'gene':
00770                 #Note we set the colour of ALL the genes later on as a test,
00771                 gdfs2.add_feature(feature, sigil="ARROW")
00772 
00773             if feature.type == 'misc_feature':
00774                 gdfs3.add_feature(feature, color=colors.orange)
00775 
00776             if feature.type == 'repeat_region':
00777                 gdfs4.add_feature(feature, color=colors.purple)
00778 
00779         #gdd.cross_track_links = gdd.cross_track_links[:1]
00780 
00781         gdfs1.set_all_features('label', 1)
00782         gdfs2.set_all_features('label', 1)
00783         gdfs3.set_all_features('label', 1)
00784         gdfs4.set_all_features('label', 1)
00785 
00786         gdfs3.set_all_features('hide', 0)
00787         gdfs4.set_all_features('hide', 0)
00788 
00789         #gdfs1.set_all_features('color', colors.red)
00790         gdfs2.set_all_features('color', colors.blue)
00791 
00792         gdt1.add_set(gdfsA) #Before CDS so under them!
00793         gdt1.add_set(gdfs1)
00794 
00795         gdt2.add_set(gdfsB) #Before genes so under them!
00796         gdt2.add_set(gdfs2)
00797                 
00798         gdt3 = Track('misc features and repeats', greytrack=1,
00799                    scale_largetick_interval=1e4)
00800         gdt3.add_set(gdfs3)
00801         gdt3.add_set(gdfs4)
00802 
00803         #Now add some graph sets:
00804 
00805         #Use a fairly large step so we can easily tell the difference
00806         #between the bar and line graphs.
00807         step = len(genbank_entry)//200
00808         gdgs1 = GraphSet('GC skew')
00809         
00810         graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step)
00811         gdgs1.new_graph(graphdata1, 'GC Skew', style='bar',
00812                 color=colors.violet,
00813                 altcolor=colors.purple)
00814         
00815         gdt4 = Track(\
00816                 'GC Skew (bar)',
00817                 height=1.94, greytrack=1,
00818                 scale_largetick_interval=1e4)
00819         gdt4.add_set(gdgs1)
00820 
00821 
00822         gdgs2 = GraphSet('GC and AT Content')
00823         gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step),
00824                         'GC content', style='line', 
00825                         color=colors.lightgreen,
00826                         altcolor=colors.darkseagreen)
00827 
00828         gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step),
00829                         'AT content', style='line', 
00830                         color=colors.orange,
00831                         altcolor=colors.red)    
00832 
00833         gdt5 = Track(\
00834                 'GC Content(green line), AT Content(red line)',
00835                 height=1.94, greytrack=1,
00836                 scale_largetick_interval=1e4)
00837         gdt5.add_set(gdgs2)
00838 
00839         gdgs3 = GraphSet('Di-nucleotide count')
00840         step = len(genbank_entry)//400 #smaller step
00841         gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step),
00842                         'Di-nucleotide count', style='heat', 
00843                         color=colors.red, altcolor=colors.orange)
00844         gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False)
00845         gdt6.add_set(gdgs3)
00846 
00847         #Add the tracks (from both features and graphs)
00848         #Leave some white space in the middle/bottom
00849         gdd.add_track(gdt4, 3) # GC skew
00850         gdd.add_track(gdt5, 4) # GC and AT content
00851         gdd.add_track(gdt1, 5) # CDS features
00852         gdd.add_track(gdt2, 6) # Gene features
00853         gdd.add_track(gdt3, 7) # Misc features and repeat feature
00854         gdd.add_track(gdt6, 8) # Feature depth
00855 
00856         #Finally draw it in both formats, and full view and partial
00857         gdd.draw(format='circular', orientation='landscape',
00858              tracklines=0, pagesize='A0')
00859         output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf')
00860         gdd.write(output_filename, 'PDF')
00861 
00862         gdd.circular=False
00863         gdd.draw(format='circular', orientation='landscape',
00864              tracklines=0, pagesize='A0', start=3000, end=6300)
00865         output_filename = os.path.join('Graphics', 'GD_by_obj_frag_circular.pdf')
00866         gdd.write(output_filename, 'PDF')
00867 
00868         gdd.draw(format='linear', orientation='landscape',
00869              tracklines=0, pagesize='A0', fragments=3)
00870         output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf')
00871         gdd.write(output_filename, 'PDF')
00872 
00873         gdd.set_all_tracks("greytrack_labels", 2)
00874         gdd.draw(format='linear', orientation='landscape',
00875              tracklines=0, pagesize=(30*cm,10*cm), fragments=1,
00876              start=3000, end=6300)
00877         output_filename = os.path.join('Graphics', 'GD_by_obj_frag_linear.pdf')
00878         gdd.write(output_filename, 'PDF')

Here is the call graph for this function:

construct and draw SVG and PDF for just part of a SeqRecord.

Definition at line 502 of file test_GenomeDiagram.py.

00502 
00503     def test_partial_diagram(self):
00504         """construct and draw SVG and PDF for just part of a SeqRecord."""
00505         genbank_entry = self.record
00506         start = 6500
00507         end = 8750
00508         
00509         gdd = Diagram('Test Diagram',
00510                       #For the circular diagram we don't want a closed cirle:
00511                       circular=False,
00512                       )
00513         #Add a track of features,
00514         gdt_features = gdd.new_track(1, greytrack=True,
00515                                      name="CDS Features",
00516                                      scale_largetick_interval=1000,
00517                                      scale_smalltick_interval=100,
00518                                      scale_format = "SInt",
00519                                      greytrack_labels=False,
00520                                      height=0.5)
00521         #We'll just use one feature set for these features,
00522         gds_features = gdt_features.new_set()
00523         for feature in genbank_entry.features:
00524             if feature.type != "CDS":
00525                 #We're going to ignore these.
00526                 continue
00527             if feature.location.end.position < start:
00528                 #Out of frame (too far left)
00529                 continue
00530             if feature.location.start.position > end:
00531                 #Out of frame (too far right)
00532                 continue
00533 
00534             #This URL should work in SVG output from recent versions
00535             #of ReportLab.  You need ReportLab 2.4 or later
00536             try :
00537                 url = "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi"+\
00538                       "?db=protein&id=%s" % feature.qualifiers["protein_id"][0]
00539             except KeyError :
00540                 url = None
00541                 
00542             #Note that I am using strings for color names, instead
00543             #of passing in color objects.  This should also work!
00544             if len(gds_features) % 2 == 0:
00545                 color = "white" #for testing the automatic black border!
00546             else:
00547                 color = "red"
00548             #Checking it can cope with the old UK spelling colour.
00549             #Also show the labels perpendicular to the track.
00550             gds_features.add_feature(feature, colour=color,
00551                                      url = url,
00552                                      sigil="ARROW",
00553                                      label_position = "start",
00554                                      label_size = 8,
00555                                      label_angle = 90,
00556                                      label=True)
00557 
00558         #And draw it...
00559         gdd.draw(format='linear', orientation='landscape',
00560                  tracklines=False, pagesize=(10*cm,6*cm), fragments=1,
00561                  start=start, end=end)
00562         output_filename = os.path.join('Graphics', 'GD_region_linear.pdf')
00563         gdd.write(output_filename, 'PDF')
00564 
00565         #Also check the write_to_string method matches,
00566         #(Note the possible confusion over new lines on Windows)
00567         assert open(output_filename).read().replace("\r\n","\n") \
00568                == gdd.write_to_string('PDF').replace("\r\n","\n")
00569 
00570         output_filename = os.path.join('Graphics', 'GD_region_linear.svg')
00571         gdd.write(output_filename, 'SVG')
00572 
00573         #Circular with a particular start/end is a bit odd, but by setting
00574         #circular=False (above) a sweep of 90% is used (a wedge is left out)
00575         gdd.draw(format='circular',
00576                  tracklines=False, pagesize=(10*cm,10*cm),
00577                  start=start, end=end)
00578         output_filename = os.path.join('Graphics', 'GD_region_circular.pdf')
00579         gdd.write(output_filename, 'PDF')
00580         output_filename = os.path.join('Graphics', 'GD_region_circular.svg')
00581         gdd.write(output_filename, 'SVG')

Here is the call graph for this function:

Check how the write methods respond to output format arguments.

Definition at line 480 of file test_GenomeDiagram.py.

00480 
00481     def test_write_arguments(self):
00482         """Check how the write methods respond to output format arguments."""
00483         gdd = Diagram('Test Diagram')
00484         gdd.drawing = None #Hack - need the ReportLab drawing object to be created.
00485         filename = os.path.join("Graphics","error.txt")
00486         #We (now) allow valid formats in any case.
00487         for output in ["XXX","xxx",None,123,5.9]:
00488             try:
00489                 gdd.write(filename, output)
00490                 assert False, \
00491                        "Should have rejected %s as an output format" % output
00492             except ValueError, e:
00493                 #Good!
00494                 pass
00495             try:
00496                 gdd.write_to_string(output)
00497                 assert False, \
00498                        "Should have rejected %s as an output format" % output
00499             except ValueError, e:
00500                 #Good!
00501                 pass


Member Data Documentation

Definition at line 477 of file test_GenomeDiagram.py.


The documentation for this class was generated from the following file: