Back to index

python-biopython  1.60
ACT_example.py
Go to the documentation of this file.
00001 import sys
00002 import os
00003 import time
00004 from reportlab.lib import colors
00005 from reportlab.lib.units import cm
00006 
00007 from Bio.Blast.Applications import NcbitblastxCommandline
00008 from Bio.Graphics.GenomeDiagram import Diagram, CrossLink
00009 from Bio.SeqFeature import SeqFeature, FeatureLocation
00010 from Bio import SeqIO
00011 
00012 #Modify this line to point at the Artemis/ACT example data which is online at:
00013 # https://github.com/sanger-pathogens/Artemis/tree/master/etc
00014 input_folder = "/Applications/Artemis/Artemis.app/Contents/artemis/etc"
00015 
00016 name = "af063097_v_b132222"
00017 file_a = "af063097.embl"
00018 file_b = "b132222.embl"
00019 format_a = "embl"
00020 format_b = "embl"
00021 file_a_vs_b = "af063097_v_b132222.crunch"
00022 
00023 for f in [file_a, file_b, file_a_vs_b]:
00024     if not os.path.isfile(os.path.join(input_folder, f)):
00025         print "Missing input file %s.fna" % f
00026         sys.exit(1)
00027 
00028 #Only doing a_vs_b here, could also have b_vs_c and c_vs_d etc
00029 genomes = [
00030     (os.path.join(input_folder, file_a), format_a),
00031     (os.path.join(input_folder, file_b), format_b),
00032 ]
00033 comparisons = [os.path.join(input_folder, file_a_vs_b)]
00034 
00035 #Create diagram with tracks, each with a feature set
00036 assert len(genomes) >= 2 and len(genomes) == len(comparisons)+1
00037 gd_diagram = Diagram(name, track_size=0.35, circular=False)
00038 tracks = dict()
00039 feature_sets = dict()
00040 records = dict()
00041 for f, format in genomes:
00042     records[f] = SeqIO.read(f, format)
00043     tracks[f] = gd_diagram.new_track(1, name=f, start=0, end=len(records[f]),
00044                                      scale_smalltick_interval=1000,
00045                                      scale_largetick_interval=10000,
00046                                      greytrack=True, greytrack_labels=0)
00047     feature_sets[f] = tracks[f].new_set()
00048 
00049 print "Drawing matches..."
00050 for i, crunch_file in enumerate(comparisons):
00051     q = genomes[i+1][0] #query file
00052     s = genomes[i][0] #subject file
00053     q_set = feature_sets[q]
00054     s_set = feature_sets[s]
00055     handle = open(crunch_file)
00056     for line in handle:
00057         if line[0]=="#":
00058             continue
00059         parts = line.rstrip("\n").split(None,7)
00060         #0 = score
00061         #1 = id
00062         #2 = S1
00063         #3 = E1
00064         #4 = seq1
00065         #5 = S2
00066         #6 = E2
00067         #7 = seq2
00068         try:
00069             q_start, q_end = int(parts[2]), int(parts[3])
00070             s_start, s_end = int(parts[5]), int(parts[6])
00071         except IndexError:
00072             sys.stderr.write(repr(line) + "\n")
00073             sys.stderr.write(repr(parts) + "\n")
00074             raise
00075         flip = False
00076         if q_start > q_end:
00077             flip = not flip
00078             q_start, q_end = q_end, q_start
00079         if s_start > s_end:
00080             flip = not flip
00081             s_start, s_end = s_end, s_start
00082         if flip:
00083             c = colors.Color(0, 0, 1, alpha=0.25)
00084             b = False
00085         else:
00086             c = colors.Color(1, 0, 0, alpha=0.25)
00087             b = False
00088         q_feature = q_set.add_feature(SeqFeature(FeatureLocation(q_start-1, q_end)),
00089                                                  color=c, border=b)
00090         s_feature = s_set.add_feature(SeqFeature(FeatureLocation(s_start-1, s_end)),
00091                                                  color=c, border=b)
00092         gd_diagram.cross_track_links.append(CrossLink(q_feature, s_feature, c, b))
00093         #NOTE: We are using the same colour for all the matches,
00094         #with transparency. This means overlayed matches will appear darker.
00095         #It also means the drawing order not very important.
00096         #Note ACT puts long hits at the back, and colours by hit score
00097     handle.close()
00098 
00099 print "Drawing CDS features..."
00100 for f, format in genomes:
00101     record = records[f]
00102     feature_set = feature_sets[f]
00103     #Mark the CDS features
00104     for cds in record.features:
00105         if cds.type != "CDS":
00106             continue
00107         feature_set.add_feature(cds, sigil="ARROW",
00108                                 color=colors.lightblue,
00109                                 border=colors.blue)
00110 
00111 gd_diagram.draw(format="linear", fragments=3,
00112                 orientation="landscape", pagesize=(20*cm,10*cm))
00113 gd_diagram.write(name + ".pdf", "PDF")
00114 
00115 gd_diagram.draw(format="circular",
00116                 orientation="landscape", pagesize=(20*cm,20*cm))
00117 gd_diagram.write(name + "_c.pdf", "PDF")