Back to index

python-biopython  1.60
test_align.py
Go to the documentation of this file.
00001 
00002 #!/usr/bin/env python
00003 """test_align.py
00004 
00005 A script to test alignment stuff.
00006 
00007 Right now we've got tests for:
00008 o Reading and Writing clustal format
00009 o Reading and Writing fasta format
00010 o Converting between formats"""
00011 
00012 # standard library
00013 import os 
00014 
00015 # biopython
00016 from Bio import Alphabet
00017 from Bio.Seq import Seq
00018 from Bio.SeqRecord import SeqRecord
00019 from Bio.Alphabet import IUPAC
00020 from Bio.Align import AlignInfo
00021 from Bio import AlignIO
00022 from Bio.SubsMat import FreqTable
00023 from Bio.Align import MultipleSeqAlignment
00024 
00025 #Very simple tests on an empty alignment
00026 alignment = MultipleSeqAlignment([], Alphabet.generic_alphabet)
00027 assert alignment.get_alignment_length() == 0
00028 assert len(alignment) == 0
00029 del alignment
00030 
00031 #Basic tests on simple three string alignment
00032 alignment = MultipleSeqAlignment([], Alphabet.generic_alphabet)
00033 letters = "AbcDefGhiJklMnoPqrStuVwxYz"
00034 alignment.append(SeqRecord(Seq(letters), id="mixed"))
00035 alignment.append(SeqRecord(Seq(letters.lower()), id="lower"))
00036 alignment.append(SeqRecord(Seq(letters.upper()), id="upper"))
00037 assert alignment.get_alignment_length() == 26
00038 assert len(alignment) == 3
00039 assert str(alignment[0].seq) == letters
00040 assert str(alignment[1].seq) == letters.lower()
00041 assert str(alignment[2].seq) == letters.upper()
00042 assert alignment[0].id == "mixed"
00043 assert alignment[1].id == "lower"
00044 assert alignment[2].id == "upper"
00045 for (col, letter) in enumerate(letters):
00046     assert alignment[:,col] == letter + letter.lower() + letter.upper()
00047 #Check row extractions:
00048 assert alignment[0].id == "mixed"
00049 assert alignment[-1].id == "upper"
00050 #Check sub-alignment extraction by row slicing:
00051 assert isinstance(alignment[::-1], MultipleSeqAlignment)
00052 assert alignment[::-1][0].id == "upper"
00053 assert alignment[::-1][2].id == "mixed"
00054 
00055 del alignment
00056 del letters
00057 
00058 print "testing reading and writing clustal format..."
00059 test_dir = os.path.join(os.getcwd(), 'Clustalw')
00060 test_names = ['opuntia.aln', 'cw02.aln']
00061 
00062 test_files = []
00063 for name in test_names:
00064     test_files.append(os.path.join(test_dir, name))
00065 
00066 for test_file in test_files:
00067     # parse the alignment file and get an aligment object
00068     alignment = AlignIO.read(test_file, "clustal")
00069 
00070     # print the alignment back out
00071     print alignment.format("clustal")
00072 
00073 alignment = AlignIO.read(os.path.join(test_dir, test_names[0]), "clustal",
00074                          alphabet = Alphabet.Gapped(IUPAC.unambiguous_dna))
00075 
00076 # test the base alignment stuff
00077 print 'all_seqs...'
00078 for seq_record in alignment:
00079     print 'description:', seq_record.description
00080     print 'seq:', repr(seq_record.seq)
00081 print 'length:', alignment.get_alignment_length()
00082 
00083 print 'Calculating summary information...'
00084 align_info = AlignInfo.SummaryInfo(alignment)
00085 consensus = align_info.dumb_consensus()
00086 assert isinstance(consensus, Seq)
00087 print 'consensus:', repr(consensus)
00088 
00089 
00090 print 'Replacement dictionary'
00091 ks = align_info.replacement_dictionary(['N']).keys()
00092 ks.sort()
00093 for key in ks:
00094     print "%s : %s" % (key, align_info.replacement_dictionary(['N'])[key])
00095 
00096 print 'position specific score matrix.'
00097 print 'with a supplied consensus sequence...'
00098 print align_info.pos_specific_score_matrix(consensus, ['N'])
00099 
00100 print 'defaulting to a consensus sequence...'
00101 print align_info.pos_specific_score_matrix(chars_to_ignore = ['N'])
00102 
00103 print 'with a selected sequence...'
00104 second_seq = alignment[1].seq
00105 print align_info.pos_specific_score_matrix(second_seq, ['N'])
00106 
00107 print 'information content'
00108 print 'part of alignment: %0.2f' \
00109       % align_info.information_content(5, 50, chars_to_ignore = ['N'])
00110 print 'entire alignment: %0.2f' \
00111       % align_info.information_content(chars_to_ignore = ['N'])
00112 
00113 print 'relative information content'
00114 e_freq = {'G' : 0.25,
00115           'C' : 0.25,
00116           'A' : 0.25,
00117           'T' : 0.25}
00118 
00119 e_freq_table = FreqTable.FreqTable(e_freq, FreqTable.FREQ,
00120                                    IUPAC.unambiguous_dna)
00121 
00122 print 'relative information: %0.2f' \
00123       % align_info.information_content(e_freq_table = e_freq_table,
00124                                        chars_to_ignore = ['N'])
00125 
00126 print 'Column 1:', align_info.get_column(1)
00127 print 'IC for column 1: %0.2f' % align_info.ic_vector[1]
00128 print 'Column 7:', align_info.get_column(7)
00129 print 'IC for column 7: %0.2f' % align_info.ic_vector[7]
00130 print 'test print_info_content'
00131 AlignInfo.print_info_content(align_info)
00132 print "testing reading and writing fasta format..."
00133 
00134 to_parse = os.path.join(os.curdir, 'Quality', 'example.fasta')
00135 
00136 alignment = AlignIO.read(to_parse, "fasta",
00137                          alphabet = Alphabet.Gapped(IUPAC.ambiguous_dna))
00138 
00139 # test the base alignment stuff
00140 print 'all_seqs...'
00141 for seq_record in alignment:
00142     print 'description:', seq_record.description
00143     print 'seq:', repr(seq_record.seq)
00144 
00145 print 'length:', alignment.get_alignment_length()
00146 align_info = AlignInfo.SummaryInfo(alignment)
00147 consensus = align_info.dumb_consensus(ambiguous="N", threshold=0.6)
00148 assert isinstance(consensus, Seq)
00149 print 'consensus:', repr(consensus)
00150 
00151 print alignment
00152 
00153 
00154 print "Test format conversion..."
00155 
00156 # parse the alignment file and get an aligment object
00157 alignment = AlignIO.read(os.path.join(os.curdir, 'Clustalw', 'opuntia.aln'),
00158                          'clustal')
00159 
00160 print "As FASTA:"
00161 print alignment.format("fasta")
00162 print "As Clustal:"
00163 print alignment.format("clustal")
00164 
00165 """
00166 # test to find a position in an original sequence given a
00167 # column position in an alignment
00168 print "Testing finding column positions..."
00169 alignment_info = ["GATC--CGATC--G",
00170                   "GA--CCCG-TC--G",
00171                   "GAT--CC--TC--G"]
00172 
00173 gapped_unambiguous = Alphabet.Gapped(IUPAC.unambiguous_dna)
00174 
00175 alignment = Alignment(gapped_unambiguous)
00176 for seq in alignment_info:
00177     alignment.add_sequence("Blah", seq)
00178 
00179 test_seq_1 = Seq("GATCCGATCG")
00180 orig_pos = alignment.original_sequence_pos(3, test_seq_1, 0)
00181 assert orig_pos == 3, "Got unexpected position: %s" % orig_pos
00182 orig_pos = alignment.original_sequence_pos(7, test_seq_1, 0)
00183 assert orig_pos == 5, "Got unexpected position: %s" % orig_pos
00184 orig_pos = alignment.original_sequence_pos(0, test_seq_1, 0)
00185 assert orig_pos == 0, "Got unexpected position: %s" % orig_pos
00186 orig_pos = alignment.original_sequence_pos(13, test_seq_1, 0)
00187 assert orig_pos == 9, "Got unexpected position: %s" % orig_pos
00188 
00189 try:
00190     orig_pos = alignment.original_sequence_pos(5, test_seq_1, 0)
00191     raise AssertionError("Did not fail with a junk position")
00192 except AssertionError:
00193     pass
00194 """