Back to index

python-biopython  1.60
ClustalIO.py
Go to the documentation of this file.
00001 # Copyright 2006-2010 by Peter Cock.  All rights reserved.
00002 #
00003 # This code is part of the Biopython distribution and governed by its
00004 # license.  Please see the LICENSE file that should have been included
00005 # as part of this package.
00006 """
00007 Bio.AlignIO support for the "clustal" output from CLUSTAL W and other tools.
00008 
00009 You are expected to use this module via the Bio.AlignIO functions (or the
00010 Bio.SeqIO functions if you want to work directly with the gapped sequences).
00011 """
00012 
00013 from Bio.Seq import Seq
00014 from Bio.SeqRecord import SeqRecord
00015 from Bio.Align import MultipleSeqAlignment
00016 from Interfaces import AlignmentIterator, SequentialAlignmentWriter
00017 
00018 class ClustalWriter(SequentialAlignmentWriter):
00019     """Clustalw alignment writer."""
00020     def write_alignment(self, alignment):
00021         """Use this to write (another) single alignment to an open file."""
00022 
00023         if len(alignment) == 0:
00024             raise ValueError("Must have at least one sequence")
00025         if alignment.get_alignment_length() == 0:
00026             #This doubles as a check for an alignment object    
00027             raise ValueError("Non-empty sequences are required")
00028 
00029         #Old versions of the parser in Bio.Clustalw used a ._version property,
00030         try:
00031             version = str(alignment._version)
00032         except AttributeError:
00033             version = ""
00034         if not version:
00035             version = '1.81'
00036         if version.startswith("2."):
00037             #e.g. 2.0.x
00038             output = "CLUSTAL %s multiple sequence alignment\n\n\n" % version
00039         else:
00040             #e.g. 1.81 or 1.83
00041             output = "CLUSTAL X (%s) multiple sequence alignment\n\n\n" % version
00042         
00043         cur_char = 0
00044         max_length = len(alignment[0])
00045 
00046         if max_length <= 0:
00047             raise ValueError("Non-empty sequences are required")
00048 
00049         # keep displaying sequences until we reach the end
00050         while cur_char != max_length:
00051             # calculate the number of sequences to show, which will
00052             # be less if we are at the end of the sequence
00053             if (cur_char + 50) > max_length:
00054                 show_num = max_length - cur_char
00055             else:
00056                 show_num = 50
00057 
00058             # go through all of the records and print out the sequences
00059             # when we output, we do a nice 80 column output, although this
00060             # may result in truncation of the ids.
00061             for record in alignment:
00062                 #Make sure we don't get any spaces in the record
00063                 #identifier when output in the file by replacing
00064                 #them with underscores:
00065                 line = record.id[0:30].replace(" ","_").ljust(36)
00066                 line += record.seq[cur_char:(cur_char + show_num)].tostring()
00067                 output += line + "\n"
00068 
00069             # now we need to print out the star info, if we've got it
00070             # This was stored by Bio.Clustalw using a ._star_info property.
00071             if hasattr(alignment, "_star_info") and alignment._star_info != '':
00072                 output += (" " * 36) + \
00073                      alignment._star_info[cur_char:(cur_char + show_num)] + "\n"
00074 
00075             output += "\n"
00076             cur_char += show_num
00077 
00078         # Want a trailing blank new line in case the output is concatenated
00079         self.handle.write(output + "\n")
00080 
00081 class ClustalIterator(AlignmentIterator):
00082     """Clustalw alignment iterator."""
00083 
00084     def next(self):
00085         handle = self.handle
00086         try:
00087             #Header we saved from when we were parsing
00088             #the previous alignment.
00089             line = self._header
00090             del self._header
00091         except AttributeError:      
00092             line = handle.readline()
00093         if not line:
00094             raise StopIteration
00095 
00096         #Whitelisted headers we know about
00097         known_headers = ['CLUSTAL', 'PROBCONS', 'MUSCLE']
00098         if line.strip().split()[0] not in known_headers:
00099             raise ValueError("%s is not a known CLUSTAL header: %s" % \
00100                              (line.strip().split()[0],
00101                               ", ".join(known_headers)))
00102 
00103         # find the clustal version in the header line
00104         version = None
00105         for word in line.split():
00106             if word[0]=='(' and word[-1]==')':
00107                 word = word[1:-1]
00108             if word[0] in '0123456789':
00109                 version = word
00110                 break
00111 
00112         #There should be two blank lines after the header line
00113         line = handle.readline()
00114         while line.strip() == "":
00115             line = handle.readline()
00116 
00117         #If the alignment contains entries with the same sequence
00118         #identifier (not a good idea - but seems possible), then this
00119         #dictionary based parser will merge their sequences.  Fix this?
00120         ids = []
00121         seqs = []
00122         consensus = ""
00123         seq_cols = None #: Used to extract the consensus
00124 
00125         #Use the first block to get the sequence identifiers
00126         while True:
00127             if line[0] != " " and line.strip() != "":
00128                 #Sequences identifier...
00129                 fields = line.rstrip().split()
00130 
00131                 #We expect there to be two fields, there can be an optional
00132                 #"sequence number" field containing the letter count.
00133                 if len(fields) < 2 or len(fields) > 3:
00134                     raise ValueError("Could not parse line:\n%s" % line)
00135 
00136                 ids.append(fields[0])
00137                 seqs.append(fields[1])
00138 
00139                 #Record the sequence position to get the consensus
00140                 if seq_cols is None:
00141                     start = len(fields[0]) + line[len(fields[0]):].find(fields[1])
00142                     end = start + len(fields[1])
00143                     seq_cols = slice(start, end)
00144                     del start, end
00145                 assert fields[1] == line[seq_cols]
00146 
00147                 if len(fields) == 3:
00148                     #This MAY be an old style file with a letter count...
00149                     try:
00150                         letters = int(fields[2])
00151                     except ValueError:
00152                         raise ValueError("Could not parse line, bad sequence number:\n%s" % line)
00153                     if len(fields[1].replace("-","")) != letters:
00154                         raise ValueError("Could not parse line, invalid sequence number:\n%s" % line)
00155             elif line[0] == " ":
00156                 #Sequence consensus line...
00157                 assert len(ids) == len(seqs)
00158                 assert len(ids) > 0
00159                 assert seq_cols is not None
00160                 consensus = line[seq_cols]
00161                 assert not line[:seq_cols.start].strip()
00162                 assert not line[seq_cols.stop:].strip()
00163                 #Check for blank line (or end of file)
00164                 line = handle.readline()
00165                 assert line.strip() == ""
00166                 break
00167             else:
00168                 #No consensus
00169                 break
00170             line = handle.readline()
00171             if not line : break #end of file
00172 
00173         assert line.strip() == ""
00174         assert seq_cols is not None
00175 
00176         #Confirm all same length
00177         for s in seqs:
00178             assert len(s) == len(seqs[0])
00179         if consensus:
00180             assert len(consensus) == len(seqs[0])
00181 
00182         #Loop over any remaining blocks...
00183         done = False
00184         while not done:
00185             #There should be a blank line between each block.
00186             #Also want to ignore any consensus line from the
00187             #previous block.
00188             while (not line) or line.strip() == "":
00189                 line = handle.readline()
00190                 if not line : break # end of file
00191             if not line : break # end of file
00192 
00193             if line.split(None,1)[0] in known_headers:
00194                 #Found concatenated alignment.
00195                 done = True
00196                 self._header = line
00197                 break
00198 
00199             for i in range(len(ids)):
00200                 assert line[0] != " ", "Unexpected line:\n%s" % repr(line)
00201                 fields = line.rstrip().split()
00202                 
00203                 #We expect there to be two fields, there can be an optional
00204                 #"sequence number" field containing the letter count.
00205                 if len(fields) < 2 or len(fields) > 3:
00206                     raise ValueError("Could not parse line:\n%s" % repr(line))
00207 
00208                 if fields[0] != ids[i]:
00209                     raise ValueError("Identifiers out of order? Got '%s' but expected '%s'" \
00210                                       % (fields[0], ids[i]))
00211 
00212                 if fields[1] != line[seq_cols]:
00213                     start = len(fields[0]) + line[len(fields[0]):].find(fields[1])
00214                     assert start == seq_cols.start, 'Old location %s -> %i:XX' % (seq_cols, start)
00215                     end = start + len(fields[1])
00216                     seq_cols = slice(start, end)
00217                     del start, end
00218 
00219                 #Append the sequence
00220                 seqs[i] += fields[1]
00221                 assert len(seqs[i]) == len(seqs[0])
00222 
00223                 if len(fields) == 3:
00224                     #This MAY be an old style file with a letter count...
00225                     try:
00226                         letters = int(fields[2])
00227                     except ValueError:
00228                         raise ValueError("Could not parse line, bad sequence number:\n%s" % line)
00229                     if len(seqs[i].replace("-","")) != letters:
00230                         raise ValueError("Could not parse line, invalid sequence number:\n%s" % line)
00231 
00232                 #Read in the next line
00233                 line = handle.readline()
00234             #There should now be a consensus line
00235             if consensus:
00236                 assert line[0] == " "
00237                 assert seq_cols is not None
00238                 consensus += line[seq_cols]
00239                 assert len(consensus) == len(seqs[0])
00240                 assert not line[:seq_cols.start].strip()
00241                 assert not line[seq_cols.stop:].strip()
00242                 #Read in the next line
00243                 line = handle.readline()
00244             
00245 
00246         assert len(ids) == len(seqs)
00247         if len(seqs) == 0 or len(seqs[0]) == 0:
00248             raise StopIteration
00249 
00250         if self.records_per_alignment is not None \
00251         and self.records_per_alignment != len(ids):
00252             raise ValueError("Found %i records in this alignment, told to expect %i" \
00253                              % (len(ids), self.records_per_alignment))
00254 
00255         records = (SeqRecord(Seq(s, self.alphabet), id=i, description=i) \
00256                    for (i,s) in zip(ids, seqs)) 
00257         alignment = MultipleSeqAlignment(records, self.alphabet)
00258         #TODO - Handle alignment annotation better, for now
00259         #mimic the old parser in Bio.Clustalw
00260         if version:
00261             alignment._version = version
00262         if consensus:
00263             alignment_length = len(seqs[0])
00264             assert len(consensus) == alignment_length, \
00265                    "Alignment length is %i, consensus length is %i, '%s'" \
00266                    % (alignment_length, len(consensus), consensus)
00267             alignment._star_info = consensus
00268         return alignment
00269     
00270 if __name__ == "__main__":
00271     print "Running a quick self-test"
00272 
00273     #This is a truncated version of the example in Tests/cw02.aln
00274     #Notice the inclusion of sequence numbers (right hand side)
00275     aln_example1 = \
00276 """CLUSTAL W (1.81) multiple sequence alignment
00277 
00278 
00279 gi|4959044|gb|AAD34209.1|AF069      MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNN 50
00280 gi|671626|emb|CAA85685.1|           ---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFR 41
00281                                               * *: ::    :.   :*  :  :. : . :*  ::   .
00282 
00283 gi|4959044|gb|AAD34209.1|AF069      LLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDW 100
00284 gi|671626|emb|CAA85685.1|           VTPQPG-----------------VPPEEAGAAVAAESSTGT--------- 65
00285                                     :   **                  **:...   *.*** ..         
00286 
00287 gi|4959044|gb|AAD34209.1|AF069      LNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQT 150
00288 gi|671626|emb|CAA85685.1|           WTTVWTDGLTSLDRYKG-----RCYHIEPVPG------------------ 92
00289                                      .:*   * *: .* :*        : :* .*                  
00290 
00291 gi|4959044|gb|AAD34209.1|AF069      SENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTE 200
00292 gi|671626|emb|CAA85685.1|           -EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIP 141
00293                                      *::.  .    .:: :*..*  :* .*   .. .  :    .  :    
00294 
00295 gi|4959044|gb|AAD34209.1|AF069      VPTTRAQRRA 210
00296 gi|671626|emb|CAA85685.1|           VAYVKTFQGP 151
00297                                     *. .:: : .
00298                                      
00299 """                 
00300 
00301     #This example is a truncated version of the dataset used here:
00302     #http://virgil.ruc.dk/kurser/Sekvens/Treedraw.htm
00303     #with the last record repeated twice (deliberate toture test)
00304     aln_example2 = \
00305 """CLUSTAL X (1.83) multiple sequence alignment
00306 
00307 
00308 V_Harveyi_PATH                 --MKNWIKVAVAAIA--LSAA------------------TVQAATEVKVG
00309 B_subtilis_YXEM                MKMKKWTVLVVAALLAVLSACG------------NGNSSSKEDDNVLHVG
00310 B_subtilis_GlnH_homo_YCKK      MKKALLALFMVVSIAALAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVG
00311 YA80_HAEIN                     MKKLLFTTALLTGAIAFSTF-----------SHAGEIADRVEKTKTLLVG
00312 FLIY_ECOLI                     MKLAHLGRQALMGVMAVALVAG---MSVKSFADEG-LLNKVKERGTLLVG
00313 E_coli_GlnH                    --MKSVLKVSLAALTLAFAVS------------------SHAADKKLVVA
00314 Deinococcus_radiodurans        -MKKSLLSLKLSGLLVPSVLALS--------LSACSSPSSTLNQGTLKIA
00315 HISJ_E_COLI                    MKKLVLSLSLVLAFSSATAAF-------------------AAIPQNIRIG
00316 HISJ_E_COLI                    MKKLVLSLSLVLAFSSATAAF-------------------AAIPQNIRIG
00317                                          : .                                 : :.
00318 
00319 V_Harveyi_PATH                 MSGRYFPFTFVKQ--DKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGL
00320 B_subtilis_YXEM                ATGQSYPFAYKEN--GKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGE
00321 B_subtilis_GlnH_homo_YCKK      TEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAG
00322 YA80_HAEIN                     TEGTYAPFTFHDK-SGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAG
00323 FLIY_ECOLI                     LEGTYPPFSFQGD-DGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLAS
00324 E_coli_GlnH                    TDTAFVPFEFKQG--DKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPA
00325 Deinococcus_radiodurans        MEGTYPPFTSKNE-QGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAG
00326 HISJ_E_COLI                    TDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPS
00327 HISJ_E_COLI                    TDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPS
00328                                      **       .:  *::::.   : :.   .        ..:   
00329 
00330 V_Harveyi_PATH                 LETGRIDTISNQITMTDARKAKYLFADPYVVDG-AQI
00331 B_subtilis_YXEM                LQTGKLDTISNQVAVTDERKETYNFTKPYAYAG-TQI
00332 B_subtilis_GlnH_homo_YCKK      LNSKRFDVVANQVG-KTDREDKYDFSDKYTTSR-AVV
00333 YA80_HAEIN                     LNAKRFDVIANQTNPSPERLKKYSFTTPYNYSG-GVI
00334 FLIY_ECOLI                     LDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQAL
00335 E_coli_GlnH                    LQTKNVDLALAGITITDERKKAIDFSDGYYKSG-LLV
00336 Deinococcus_radiodurans        LQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEII
00337 HISJ_E_COLI                    LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLV
00338 HISJ_E_COLI                    LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLV
00339                                *.: . *        .  *     *:          :
00340 
00341 """
00342 
00343     from StringIO import StringIO
00344 
00345     alignments = list(ClustalIterator(StringIO(aln_example1)))
00346     assert 1 == len(alignments)
00347     assert alignments[0]._version == "1.81"
00348     alignment = alignments[0]
00349     assert 2 == len(alignment)
00350     assert alignment[0].id == "gi|4959044|gb|AAD34209.1|AF069"
00351     assert alignment[1].id == "gi|671626|emb|CAA85685.1|"
00352     assert alignment[0].seq.tostring() == \
00353           "MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNN" + \
00354           "LLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDW" + \
00355           "LNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQT" + \
00356           "SENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTE" + \
00357           "VPTTRAQRRA"
00358 
00359     alignments = list(ClustalIterator(StringIO(aln_example2)))
00360     assert 1 == len(alignments)
00361     assert alignments[0]._version == "1.83"
00362     alignment = alignments[0]
00363     assert 9 == len(alignment)
00364     assert alignment[-1].id == "HISJ_E_COLI"
00365     assert alignment[-1].seq.tostring() == \
00366           "MKKLVLSLSLVLAFSSATAAF-------------------AAIPQNIRIG" + \
00367           "TDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPS" + \
00368           "LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLV"
00369 
00370     for alignment in ClustalIterator(StringIO(aln_example2 + aln_example1)):
00371         print "Alignment with %i records of length %i" \
00372               % (len(alignment),
00373                  alignment.get_alignment_length())
00374 
00375     print "Checking empty file..."
00376     assert 0 == len(list(ClustalIterator(StringIO(""))))
00377 
00378     print "Checking write/read..."
00379     alignments = list(ClustalIterator(StringIO(aln_example1))) \
00380                + list(ClustalIterator(StringIO(aln_example2)))*2
00381     handle = StringIO()
00382     ClustalWriter(handle).write_file(alignments)
00383     handle.seek(0)
00384     for i,a in enumerate(ClustalIterator(handle)):
00385         assert a.get_alignment_length() == alignments[i].get_alignment_length()
00386     handle.seek(0)
00387 
00388     print "Testing write/read when there is only one sequence..."
00389     alignment = alignment[0:1]
00390     handle = StringIO()
00391     ClustalWriter(handle).write_file([alignment])
00392     handle.seek(0)
00393     for i,a in enumerate(ClustalIterator(handle)):
00394         assert a.get_alignment_length() == alignment.get_alignment_length()
00395         assert len(a) == 1
00396 
00397     aln_example3 = \
00398 """CLUSTAL 2.0.9 multiple sequence alignment
00399 
00400 
00401 Test1seq             ------------------------------------------------------------
00402 AT3G20900.1-SEQ      ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCAC
00403 AT3G20900.1-CDS      ------------------------------------------------------------
00404                                                                                  
00405 
00406 Test1seq             -----AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAAT
00407 AT3G20900.1-SEQ      ATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAAT
00408 AT3G20900.1-CDS      ------------------------------------------------------------
00409                                                                                  
00410 
00411 Test1seq             ACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATC
00412 AT3G20900.1-SEQ      ACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATC
00413 AT3G20900.1-CDS      ------------------------------------------------------------
00414                                                                                  
00415 
00416 Test1seq             AATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGA
00417 AT3G20900.1-SEQ      AATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGA
00418 AT3G20900.1-CDS      ------------------------------------------------------------
00419                                                                                  
00420 
00421 Test1seq             CTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACA
00422 AT3G20900.1-SEQ      CTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAA
00423 AT3G20900.1-CDS      ------------------------------------------------------------
00424                                                                                  
00425 
00426 Test1seq             AAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATT
00427 AT3G20900.1-SEQ      CAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATT
00428 AT3G20900.1-CDS      ------------------------------------------------------------
00429                                                                                  
00430 
00431 Test1seq             GTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGAT
00432 AT3G20900.1-SEQ      GTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGAT
00433 AT3G20900.1-CDS      ------------------------------------------------------------
00434                                                                                  
00435 
00436 Test1seq             CCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGT
00437 AT3G20900.1-SEQ      CCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGT
00438 AT3G20900.1-CDS      ------------------------------------------------------ATGAAC
00439                                                                              *   
00440 
00441 Test1seq             TCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGT
00442 AT3G20900.1-SEQ      GCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGT
00443 AT3G20900.1-CDS      AAAGTAGCGAGGAAGAACAAAACATC------AGCAAAGAAAACGATCTGTCTCCGTCGT
00444                          *  *** ***** *   *  **      ****************************
00445 
00446 Test1seq             AACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
00447 AT3G20900.1-SEQ      AACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
00448 AT3G20900.1-CDS      AACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
00449                      ******* **   * ****  ***************************************
00450 
00451 Test1seq             GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCT
00452 AT3G20900.1-SEQ      GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCT
00453 AT3G20900.1-CDS      GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCT
00454                      **************************************** *******************
00455 
00456 Test1seq             GCTGGGGATGGAGAGGGAACAGAGTT-
00457 AT3G20900.1-SEQ      GCTGGGGATGGAGAGGGAACAGAGTAG
00458 AT3G20900.1-CDS      GCTGGGGATGGAGAGGGAACAGAGTAG
00459                      *************************  
00460 """
00461     alignments = list(ClustalIterator(StringIO(aln_example3)))
00462     assert 1 == len(alignments)
00463     assert alignments[0]._version == "2.0.9"
00464 
00465     print "The End"