Back to index

python-biopython  1.60
EmbossIO.py
Go to the documentation of this file.
00001 # Copyright 2008-2010 by Peter Cock.  All rights reserved.
00002 #
00003 # This code is part of the Biopython distribution and governed by its
00004 # license.  Please see the LICENSE file that should have been included
00005 # as part of this package.
00006 """
00007 Bio.AlignIO support for the "emboss" alignment output from EMBOSS tools.
00008 
00009 You are expected to use this module via the Bio.AlignIO functions (or the
00010 Bio.SeqIO functions if you want to work directly with the gapped sequences).
00011 
00012 This module contains a parser for the EMBOSS pairs/simple file format, for
00013 example from the alignret, water and needle tools.
00014 """
00015 
00016 from Bio.Seq import Seq
00017 from Bio.SeqRecord import SeqRecord
00018 from Bio.Align import MultipleSeqAlignment
00019 from Interfaces import AlignmentIterator, SequentialAlignmentWriter
00020 
00021 class EmbossWriter(SequentialAlignmentWriter):
00022     """Emboss alignment writer (WORK IN PROGRESS).
00023 
00024     Writes a simplfied version of the EMBOSS pairs/simple file format.
00025     A lot of the information their tools record in their headers is not
00026     available and is ommitted.
00027     """
00028 
00029     def write_header(self):
00030         handle = self.handle
00031         handle.write("########################################\n")
00032         handle.write("# Program: Biopython\n")
00033         try:
00034             handle.write("# Report_file: %s\n" % handle.name)
00035         except AttributeError:
00036             pass
00037         handle.write("########################################\n")
00038 
00039     def write_footer(self):
00040         handle = self.handle
00041         handle.write("#---------------------------------------\n")
00042         handle.write("#---------------------------------------\n")
00043         
00044     def write_alignment(self, alignment):
00045         """Use this to write (another) single alignment to an open file."""
00046         handle = self.handle
00047         handle.write("#=======================================\n")
00048         handle.write("#\n")
00049         handle.write("# Aligned_sequences: %i\n" % len(alignment))
00050         for i, record in enumerate(alignment):
00051             handle.write("# %i: %s\n" % (i+1, record.id))
00052         handle.write("#\n")
00053         handle.write("# Length: %i\n" % alignment.get_alignment_length())
00054         handle.write("#\n")
00055         handle.write("#=======================================\n")
00056         handle.write("\n")
00057         #...
00058         assert False
00059 
00060 class EmbossIterator(AlignmentIterator):
00061     """Emboss alignment iterator.
00062 
00063     For reading the (pairwise) alignments from EMBOSS tools in what they
00064     call the "pairs" and "simple" formats.
00065     """
00066     
00067     def next(self):
00068 
00069         handle = self.handle
00070 
00071         try:
00072             #Header we saved from when we were parsing
00073             #the previous alignment.
00074             line = self._header
00075             del self._header
00076         except AttributeError:      
00077             line = handle.readline()
00078         if not line:
00079             raise StopIteration
00080 
00081         while line.rstrip() != "#=======================================":
00082             line = handle.readline()
00083             if not line:
00084                 raise StopIteration
00085 
00086         length_of_seqs = None
00087         number_of_seqs = None
00088         ids = []
00089         seqs = []
00090 
00091 
00092         while line[0] == "#":
00093             #Read in the rest of this alignment header,
00094             #try and discover the number of records expected
00095             #and their length
00096             parts = line[1:].split(":",1)
00097             key = parts[0].lower().strip()
00098             if key == "aligned_sequences":
00099                 number_of_seqs = int(parts[1].strip())
00100                 assert len(ids) == 0
00101                 # Should now expect the record identifiers...
00102                 for i in range(number_of_seqs):
00103                     line = handle.readline()
00104                     parts = line[1:].strip().split(":",1)
00105                     assert i+1 == int(parts[0].strip())
00106                     ids.append(parts[1].strip())
00107                 assert len(ids) == number_of_seqs
00108             if key == "length":
00109                 length_of_seqs = int(parts[1].strip())
00110 
00111             #And read in another line...
00112             line = handle.readline()
00113 
00114         if number_of_seqs is None:
00115             raise ValueError("Number of sequences missing!")
00116         if length_of_seqs is None:
00117             raise ValueError("Length of sequences missing!")
00118 
00119         if self.records_per_alignment is not None \
00120         and self.records_per_alignment != number_of_seqs:
00121             raise ValueError("Found %i records in this alignment, told to expect %i" \
00122                              % (number_of_seqs, self.records_per_alignment))
00123 
00124         seqs = ["" for id in ids]
00125         seq_starts = []
00126         index = 0
00127 
00128         #Parse the seqs
00129         while line:
00130             if len(line) > 21:
00131                 id_start = line[:21].strip().split(None, 1)
00132                 seq_end = line[21:].strip().split(None, 1)
00133                 if len(id_start) == 2 and len(seq_end) == 2:
00134                     #identifier, seq start position, seq, seq end position
00135                     #(an aligned seq is broken up into multiple lines)
00136                     id, start = id_start
00137                     seq, end = seq_end
00138                     if start==end:
00139                         #Special case, either a single letter is present,
00140                         #or no letters at all.
00141                         if seq.replace("-","") == "":
00142                             start = int(start)
00143                             end = int(end)
00144                         else:
00145                             start = int(start) - 1
00146                             end = int(end)
00147                     else:
00148                         assert seq.replace("-","") != ""
00149                         start = int(start)-1 #python counting
00150                         end = int(end)
00151 
00152                     #The identifier is truncated...
00153                     assert 0 <= index and index < number_of_seqs, \
00154                            "Expected index %i in range [0,%i)" \
00155                            % (index, number_of_seqs)
00156                     assert id==ids[index] or id == ids[index][:len(id)]
00157 
00158                     if len(seq_starts) == index:
00159                         #Record the start
00160                         seq_starts.append(start)
00161 
00162                     #Check the start...
00163                     if start == end:
00164                         assert seq.replace("-","") == "", line
00165                     else:
00166                         assert start - seq_starts[index] == len(seqs[index].replace("-","")), \
00167                         "Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s" \
00168                             % (len(seqs[index].replace("-","")), index, id, repr(seqs[index]),
00169                                start, line)
00170                     
00171                     seqs[index] += seq
00172 
00173                     #Check the end ...
00174                     assert end == seq_starts[index] + len(seqs[index].replace("-","")), \
00175                         "Found %i chars so far for sequence %i (%s, %s, start=%i), file says end %i:\n%s" \
00176                             % (len(seqs[index].replace("-","")), index, id, repr(seqs[index]),
00177                                seq_starts[index], end, line)
00178 
00179                     index += 1
00180                     if index >= number_of_seqs:
00181                         index = 0
00182                 else:
00183                     #just a start value, this is just alignment annotation (?)
00184                     #print "Skipping: " + line.rstrip()
00185                     pass
00186             elif line.strip() == "":
00187                 #Just a spacer?
00188                 pass
00189             else:
00190                 print line
00191                 assert False
00192 
00193             line = handle.readline()
00194             if line.rstrip() == "#---------------------------------------" \
00195             or line.rstrip() == "#=======================================":
00196                 #End of alignment
00197                 self._header = line
00198                 break
00199 
00200         assert index == 0
00201 
00202         if self.records_per_alignment is not None \
00203         and self.records_per_alignment != len(ids):
00204             raise ValueError("Found %i records in this alignment, told to expect %i" \
00205                              % (len(ids), self.records_per_alignment))
00206 
00207         records = []
00208         for id, seq in zip(ids, seqs):
00209             if len(seq) != length_of_seqs:
00210                 #EMBOSS 2.9.0 is known to use spaces instead of minus signs
00211                 #for leading gaps, and thus fails to parse.  This old version
00212                 #is still used as of Dec 2008 behind the EBI SOAP webservice:
00213                 #http://www.ebi.ac.uk/Tools/webservices/wsdl/WSEmboss.wsdl
00214                 raise ValueError("Error parsing alignment - sequences of "
00215                                  "different length? You could be using an "
00216                                  "old version of EMBOSS.")
00217             records.append(SeqRecord(Seq(seq, self.alphabet), \
00218                                      id=id, description=id))
00219         return MultipleSeqAlignment(records, self.alphabet)
00220 
00221 
00222 if __name__ == "__main__":
00223     print "Running a quick self-test"
00224 
00225     #http://emboss.sourceforge.net/docs/themes/alnformats/align.simple
00226     simple_example = \
00227 """########################################
00228 # Program:  alignret
00229 # Rundate:  Wed Jan 16 17:16:13 2002
00230 # Report_file: stdout
00231 ########################################
00232 #=======================================
00233 #
00234 # Aligned_sequences: 4
00235 # 1: IXI_234
00236 # 2: IXI_235
00237 # 3: IXI_236
00238 # 4: IXI_237
00239 # Matrix: EBLOSUM62
00240 # Gap_penalty: 10.0
00241 # Extend_penalty: 0.5
00242 #
00243 # Length: 131
00244 # Identity:      95/131 (72.5%)
00245 # Similarity:   127/131 (96.9%)
00246 # Gaps:          25/131 (19.1%)
00247 # Score: 100.0
00248 #
00249 #
00250 #=======================================
00251 
00252 IXI_234            1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT     50
00253 IXI_235            1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT     41
00254 IXI_236            1 TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRPPGRPCCSAAPPRPQAT     48
00255 IXI_237            1 TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRPT----CSAAPRRPQAT     45
00256                      |||||:|||||||||:::::::  |||||:||||:::::|||||:|||||
00257 
00258 IXI_234           51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG    100
00259 IXI_235           42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG     81
00260 IXI_236           49 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSR--G     96
00261 IXI_237           46 GGYKTCSGTCTTSTSTRHRGRSGYSARTTTAACLRASRKSMRAACSR--G     93
00262                      ||:||||||||||||||||||||:::::::::::|||||||||||||  |
00263 
00264 IXI_234          101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE    131
00265 IXI_235           82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE    112
00266 IXI_236           97 SRPPRFAPPLMSSCITSTTGPPPPAGDRSHE    127
00267 IXI_237           94 SRPNRFAPTLMSSCLTSTTGPPAYAGDRSHE    124
00268                      |||:||||:|||||:|||||||::|||||||
00269 
00270 
00271 #---------------------------------------
00272 #---------------------------------------
00273 
00274 """
00275     
00276     #http://emboss.sourceforge.net/docs/themes/alnformats/align.pair
00277     pair_example = \
00278 """########################################
00279 # Program:  water
00280 # Rundate:  Wed Jan 16 17:23:19 2002
00281 # Report_file: stdout
00282 ########################################
00283 #=======================================
00284 #
00285 # Aligned_sequences: 2
00286 # 1: IXI_234
00287 # 2: IXI_235
00288 # Matrix: EBLOSUM62
00289 # Gap_penalty: 10.0
00290 # Extend_penalty: 0.5
00291 #
00292 # Length: 131
00293 # Identity:     112/131 (85.5%)
00294 # Similarity:   112/131 (85.5%)
00295 # Gaps:          19/131 (14.5%)
00296 # Score: 591.5
00297 #
00298 #
00299 #=======================================
00300 
00301 IXI_234            1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT     50
00302                      |||||||||||||||         ||||||||||||||||||||||||||
00303 IXI_235            1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT     41
00304 
00305 IXI_234           51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG    100
00306                      ||||||||||||||||||||||||          ||||||||||||||||
00307 IXI_235           42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG     81
00308 
00309 IXI_234          101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE    131
00310                      |||||||||||||||||||||||||||||||
00311 IXI_235           82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE    112
00312 
00313 
00314 #---------------------------------------
00315 #---------------------------------------       
00316 
00317 
00318 """
00319 
00320     pair_example2 = \
00321 """########################################
00322 # Program: needle
00323 # Rundate: Sun 27 Apr 2007 17:20:35
00324 # Commandline: needle
00325 #    [-asequence] Spo0F.faa
00326 #    [-bsequence] paired_r.faa
00327 #    -sformat2 pearson
00328 # Align_format: srspair
00329 # Report_file: ref_rec .needle
00330 ########################################
00331 
00332 #=======================================
00333 #
00334 # Aligned_sequences: 2
00335 # 1: ref_rec
00336 # 2: gi|94968718|receiver
00337 # Matrix: EBLOSUM62
00338 # Gap_penalty: 10.0
00339 # Extend_penalty: 0.5
00340 #
00341 # Length: 124
00342 # Identity:      32/124 (25.8%)
00343 # Similarity:    64/124 (51.6%)
00344 # Gaps:          17/124 (13.7%)
00345 # Score: 112.0
00346 # 
00347 #
00348 #=======================================
00349 
00350 ref_rec            1 KILIVDD----QYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDL     46
00351                       :|:.||    :.|.|::|.:  :.|.....:|.:|.||:.:..:..|.:
00352 gi|94968718|r      1 -VLLADDHALVRRGFRLMLED--DPEIEIVAEAGDGAQAVKLAGELHPRV     47
00353 
00354 ref_rec           47 VLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALT     96
00355                      |::|..:|||.|::..|:::....:|.|:::|.:.|...::.:.|.||..
00356 gi|94968718|r     48 VVMDCAMPGMSGMDATKQIRTQWPDIAVLMLTMHSEDTWVRLALEAGANG     97
00357 
00358 ref_rec           97 HFAK-PFDIDEIRDAV--------    111
00359                      :..| ..|:|.|: ||        
00360 gi|94968718|r     98 YILKSAIDLDLIQ-AVRRVANGET    120
00361 
00362 
00363 #=======================================
00364 #
00365 # Aligned_sequences: 2
00366 # 1: ref_rec
00367 # 2: gi|94968761|receiver
00368 # Matrix: EBLOSUM62
00369 # Gap_penalty: 10.0
00370 # Extend_penalty: 0.5
00371 #
00372 # Length: 119
00373 # Identity:      34/119 (28.6%)
00374 # Similarity:    58/119 (48.7%)
00375 # Gaps:           9/119 ( 7.6%)
00376 # Score: 154.0
00377 # 
00378 #
00379 #=======================================
00380 
00381 ref_rec            1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLD     50
00382                       ||||||:......|:..|...|::.....|.::||:|...:..||:|.|
00383 gi|94968761|r      1 -ILIVDDEANTLASLSRAFRLAGHEATVCDNAVRALEIAKSKPFDLILSD     49
00384 
00385 ref_rec           51 MKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAK    100
00386                      :.:||.||:.:|:.:|.......|::|:....::|..::..||||....|
00387 gi|94968761|r     50 VVMPGRDGLTLLEDLKTAGVQAPVVMMSGQAHIEMAVKATRLGALDFLEK     99
00388 
00389 ref_rec          101 PFDIDEIRDAV--------    111
00390                      |...|::...|        
00391 gi|94968761|r    100 PLSTDKLLLTVENALKLKR    118
00392 
00393 
00394 #=======================================
00395 #
00396 # Aligned_sequences: 2
00397 # 1: ref_rec
00398 # 2: gi|94967506|receiver
00399 # Matrix: EBLOSUM62
00400 # Gap_penalty: 10.0
00401 # Extend_penalty: 0.5
00402 #
00403 # Length: 120
00404 # Identity:      29/120 (24.2%)
00405 # Similarity:    53/120 (44.2%)
00406 # Gaps:           9/120 ( 7.5%)
00407 # Score: 121.0
00408 # 
00409 #
00410 #=======================================
00411 
00412 ref_rec            1 -KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLL     49
00413                       .|::|||..|..:.:..||.:.|:..........|.:.:.....||.::
00414 gi|94967506|r      1 LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHPVDLAIV     50
00415 
00416 ref_rec           50 DMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFA     99
00417                      |:.:....|:|:|:|.:|....:..:|:|....|:|...|...||:.:..
00418 gi|94967506|r     51 DVYLGSTTGVEVLRRCRVHRPKLYAVIITGQISLEMAARSIAEGAVDYIQ    100
00419 
00420 ref_rec          100 KPFDIDEIRDAV--------    111
00421                      ||.|||.:.:..        
00422 gi|94967506|r    101 KPIDIDALLNIAERALEHKE    120
00423 
00424 
00425 #=======================================
00426 #
00427 # Aligned_sequences: 2
00428 # 1: ref_rec
00429 # 2: gi|94970045|receiver
00430 # Matrix: EBLOSUM62
00431 # Gap_penalty: 10.0
00432 # Extend_penalty: 0.5
00433 #
00434 # Length: 118
00435 # Identity:      30/118 (25.4%)
00436 # Similarity:    64/118 (54.2%)
00437 # Gaps:           9/118 ( 7.6%)
00438 # Score: 126.0
00439 # 
00440 #
00441 #=======================================
00442 
00443 ref_rec            1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTK--ERPDLVL     48
00444                       :|:|:|:..:|....:.....||:...|.:|.:||.:.:|  ||.|:::
00445 gi|94970045|r      1 -VLLVEDEEALRAAAGDFLETRGYKIMTARDGTEALSMASKFAERIDVLI     49
00446 
00447 ref_rec           49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF     98
00448                      .|:.:||:.|..:.:.:..|....:|:.|:.|.: :.:..:.|:.:.:.|
00449 gi|94970045|r     50 TDLVMPGISGRVLAQELVKIHPETKVMYMSGYDD-ETVMVNGEIDSSSAF     98
00450 
00451 ref_rec           99 -AKPFDID----EIRDAV    111
00452                       .|||.:|    :||:.:
00453 gi|94970045|r     99 LRKPFRMDALSAKIREVL    116
00454 
00455 
00456 #=======================================
00457 #
00458 # Aligned_sequences: 2
00459 # 1: ref_rec
00460 # 2: gi|94970041|receiver
00461 # Matrix: EBLOSUM62
00462 # Gap_penalty: 10.0
00463 # Extend_penalty: 0.5
00464 #
00465 # Length: 125
00466 # Identity:      35/125 (28.0%)
00467 # Similarity:    70/125 (56.0%)
00468 # Gaps:          18/125 (14.4%)
00469 # Score: 156.5
00470 # 
00471 #
00472 #=======================================
00473 
00474 ref_rec            1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIV--TKERPDLVL     48
00475                      .:|:|:|:.|:|.|:..:.:::||...:|.:|.:||:||  :.::.|::|
00476 gi|94970041|r      1 TVLLVEDEEGVRKLVRGILSRQGYHVLEATSGEEALEIVRESTQKIDMLL     50
00477 
00478 ref_rec           49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF     98
00479                      .|:.:.||.|.|:.:|:::...:::||.|:.|.:..:::.    |.||..
00480 gi|94970041|r     51 SDVVLVGMSGRELSERLRIQMPSLKVIYMSGYTDDAIVRH----GVLTES     96
00481 
00482 ref_rec           99 A----KPFDIDEIRDAV--------    111
00483                      |    |||..|.:...|        
00484 gi|94970041|r     97 AEFLQKPFTSDSLLRKVRAVLQKRQ    121
00485 
00486 
00487 #---------------------------------------
00488 #---------------------------------------
00489 
00490 """
00491 
00492     pair_example3 = """########################################
00493 # Program: needle
00494 # Rundate: Mon 14 Jul 2008 11:45:42
00495 # Commandline: needle
00496 #    [-asequence] asis:TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGTGTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTTGAAATTTTTAGGTTATTTAGACCATTTTTTTTTGTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTTTGGGGTTAAGACAAGGTGTCGTTGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATAGGATCTACCTTTTATCTTTCTAATCTTTTGTTTTAGTATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGC
00497 #    [-bsequence] asis:TTATTAATCTTATGGTTTTGCCGTAAAATTTCTTTCTTTATTTTTTATTGTTAGGATTTTGTTGATTTTATTTTTCTCAAGAATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGTTTAAAGTTTTATTAATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTTTTGGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGGAGTAAGAATTTCGATTCAGCAACTTTAGTTCACAGTCTTTTTTTTTATTAAGAAAGGTTT
00498 #    -filter
00499 # Align_format: srspair
00500 # Report_file: stdout
00501 ########################################
00502 
00503 #=======================================
00504 #
00505 # Aligned_sequences: 2
00506 # 1: asis
00507 # 2: asis
00508 # Matrix: EDNAFULL
00509 # Gap_penalty: 10.0
00510 # Extend_penalty: 0.5
00511 #
00512 # Length: 667
00513 # Identity:     210/667 (31.5%)
00514 # Similarity:   210/667 (31.5%)
00515 # Gaps:         408/667 (61.2%)
00516 # Score: 561.0
00517 # 
00518 #
00519 #=======================================
00520 
00521 asis               1 TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAAT     50
00522                                                                        
00523 asis               0 --------------------------------------------------      0
00524 
00525 asis              51 AGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGA    100
00526                                                                        
00527 asis               0 --------------------------------------------------      0
00528 
00529 asis             101 CTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTG    150
00530                                                                        
00531 asis               0 --------------------------------------------------      0
00532 
00533 asis             151 GGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGA    200
00534                                  .||||||                               
00535 asis               1 ------------TTATTAA-------------------------------      7
00536 
00537 asis             201 TACTTATTGT------GTAGTAGCTCATTTTCATTATGTTCTTCGAATGG    244
00538                       .|||||.||      |||..|..||  ||||.||||.||.|    ||.|
00539 asis               8 -TCTTATGGTTTTGCCGTAAAATTTC--TTTCTTTATTTTTT----ATTG     50
00540 
00541 asis             245 GAGCAGTCATTGGTATTTTTTTGGTTTTTTTTT------GAAATTTTTAG    288
00542                               ||.|.|||||.|||.||||.||||      | |||||||||
00543 asis              51 ---------TTAGGATTTTGTTGATTTTATTTTTCTCAAG-AATTTTTAG     90
00544 
00545 asis             289 GTTATTTAGACC-----ATTTTTTTTT--GTTTCGCTAATTAGAATTTTA    331
00546                      ||.|.|||||||     ||||||||.|  ||.|      |||.|.|||||
00547 asis              91 GTCAATTAGACCGGCTTATTTTTTTGTCAGTGT------TTAAAGTTTTA    134
00548 
00549 asis             332 TTAGCCTTTGGTTTTTTTTTATTTTT----TGGGGTTAAGACAAGGTGTC    377
00550                      |||                 ||||||    .||||...||||..|||||.
00551 asis             135 TTA-----------------ATTTTTGGGGGGGGGGGGAGACGGGGTGTT    167
00552 
00553 asis             378 GT-TGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATA--------    418
00554                      .| |||||||||||             ||  ||.||.||.||        
00555 asis             168 ATCTGAATTAGTTT-------------TT--GGGAGTCTCTAGACATCTC    202
00556 
00557 asis             419 -------------GGATCTACCTTTTATCTTTCTAAT--CTTTT----GT    449
00558                                   ||..||.||.|.|||..||||.||  |||||    | 
00559 asis             203 ATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGG-    251
00560 
00561 asis             450 TTTAGT-ATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTT    498
00562                         ||| |.|||     |||||||||.||| .||||||...|||||||||
00563 asis             252 ---AGTAAGAAT-----TTCGATTCAGCAA-CTTTAGTTCACAGTCTTTT    292
00564 
00565 asis             499 TTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTT    548
00566                      ||||||||..| ||||||||                              
00567 asis             293 TTTTTATTAAG-AAAGGTTT------------------------------    311
00568 
00569 asis             549 TCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGT    598
00570                                                                        
00571 asis             311 --------------------------------------------------    311
00572 
00573 asis             599 GAAAGGGGGTTAATAGC    615
00574                                       
00575 asis             311 -----------------    311
00576 
00577 
00578 #---------------------------------------
00579 #---------------------------------------"""
00580 
00581     from StringIO import StringIO
00582 
00583     alignments = list(EmbossIterator(StringIO(pair_example)))
00584     assert len(alignments) == 1
00585     assert len(alignments[0]) == 2
00586     assert [r.id for r in alignments[0]] \
00587            == ["IXI_234", "IXI_235"]
00588     
00589     alignments = list(EmbossIterator(StringIO(simple_example)))
00590     assert len(alignments) == 1    
00591     assert len(alignments[0]) == 4
00592     assert [r.id for r in alignments[0]] \
00593            == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"]
00594 
00595     alignments = list(EmbossIterator(StringIO(pair_example + simple_example)))
00596     assert len(alignments) == 2    
00597     assert len(alignments[0]) == 2
00598     assert len(alignments[1]) == 4
00599     assert [r.id for r in alignments[0]] \
00600            == ["IXI_234", "IXI_235"]
00601     assert [r.id for r in alignments[1]] \
00602            == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"]
00603 
00604     alignments = list(EmbossIterator(StringIO(pair_example2)))
00605     assert len(alignments) == 5
00606     assert len(alignments[0]) == 2
00607     assert [r.id for r in alignments[0]] \
00608            == ["ref_rec", "gi|94968718|receiver"]
00609     assert [r.id for r in alignments[4]] \
00610            == ["ref_rec", "gi|94970041|receiver"]
00611 
00612 
00613     alignments = list(EmbossIterator(StringIO(pair_example3)))
00614     assert len(alignments) == 1
00615     assert len(alignments[0]) == 2
00616     assert [r.id for r in alignments[0]] \
00617            == ["asis","asis"]
00618 
00619     print "Done"