Back to index

python-biopython  1.60
clustal_run.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """clustal_run.py
00003 
00004 Example code to show how to create a clustalw command line, run clustalw
00005 and parse the results into an object that can be dealt with easily."""
00006 # standard library
00007 import os
00008 import sys
00009 import subprocess
00010 
00011 # biopython
00012 from Bio.Alphabet import Gapped, IUPAC
00013 from Bio.Align.Applications import ClustalwCommandline
00014 from Bio import AlignIO
00015 from Bio.Align import AlignInfo
00016 from Bio.SubsMat import FreqTable
00017 
00018 # create the command line to run clustalw
00019 # this assumes you've got clustalw somewhere on your path, otherwise
00020 # you need to pass the full path of the executable to this via cmd="..."
00021 cline = ClustalwCommandline(infile='opuntia.fasta', outfile='test.aln')
00022 
00023 # actually perform the alignment
00024 return_code = subprocess.call(str(cline), shell=(sys.platform!="win32"))
00025 assert return_code==0, "Calling ClustalW failed"
00026 
00027 # Parse the output
00028 alignment = AlignIO.read("test.aln", "clustal",
00029                          alphabet=Gapped(IUPAC.unambiguous_dna))
00030 
00031 print alignment
00032 
00033 print 'first description:', alignment[0].description
00034 print 'first sequence:', alignment[0].seq
00035 
00036 # get the length of the alignment
00037 print 'length', alignment.get_alignment_length()
00038 
00039 print alignment
00040 
00041 # print out interesting information about the alignment
00042 summary_align = AlignInfo.SummaryInfo(alignment)
00043 
00044 consensus = summary_align.dumb_consensus()
00045 print 'consensus', consensus
00046 
00047 my_pssm = summary_align.pos_specific_score_matrix(consensus,
00048                                                   chars_to_ignore = ['N'])
00049 
00050 print my_pssm
00051 
00052 expect_freq = {
00053     'A' : .3,
00054     'G' : .2,
00055     'T' : .3,
00056     'C' : .2}
00057 
00058 freq_table_info = FreqTable.FreqTable(expect_freq, FreqTable.FREQ,
00059                                       IUPAC.unambiguous_dna)
00060 
00061 info_content = summary_align.information_content(5, 30,
00062                                                  chars_to_ignore = ['N'],
00063                                                  e_freq_table = \
00064                                                  freq_table_info)
00065 
00066 print "relative info content:", info_content
00067