Back to index

python-biopython  1.60
test_SeqIO_features.py
Go to the documentation of this file.
00001 # Copyright 2009-2011 by Peter Cock.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """SeqFeature related tests for SeqRecord objects from Bio.SeqIO.
00007 
00008 Initially this takes matched tests of GenBank and FASTA files from the NCBI
00009 and confirms they are consistent using our different parsers.
00010 """
00011 import os
00012 import unittest
00013 from Bio.Alphabet import generic_dna, generic_rna, generic_protein
00014 from Bio import SeqIO
00015 from Bio.Seq import Seq, UnknownSeq, MutableSeq, reverse_complement
00016 from Bio.SeqRecord import SeqRecord
00017 from Bio.SeqFeature import SeqFeature, FeatureLocation, ExactPosition, \
00018                            BeforePosition, AfterPosition, OneOfPosition, \
00019                            WithinPosition
00020 from StringIO import StringIO
00021 from Bio.SeqIO.InsdcIO import _insdc_feature_location_string
00022 
00023 #Top level function as this makes it easier to use for debugging:
00024 def write_read(filename, in_format="gb", out_formats=["gb", "embl", "imgt"]):
00025     for out_format in out_formats:
00026         gb_records = list(SeqIO.parse(open(filename),in_format))
00027         #Write it out...
00028         handle = StringIO()
00029         SeqIO.write(gb_records, handle, out_format)
00030         handle.seek(0)
00031         #Now load it back and check it agrees,
00032         gb_records2 = list(SeqIO.parse(handle,out_format))
00033         compare_records(gb_records, gb_records2)
00034 
00035 def compare_record(old, new, expect_minor_diffs=False):
00036     #Note the name matching is a bit fuzzy
00037     if not expect_minor_diffs \
00038     and old.id != new.id and old.name != new.name \
00039     and (old.id not in new.id) and (new.id not in old.id) \
00040     and (old.id.replace(" ","_") != new.id.replace(" ","_")):
00041         raise ValueError("'%s' or '%s' vs '%s' or '%s' records" \
00042                          % (old.id, old.name, new.id, new.name))
00043     if len(old.seq) != len(new.seq):
00044         raise ValueError("%i vs %i" % (len(old.seq), len(new.seq)))
00045     if isinstance(old.seq, UnknownSeq) \
00046     and isinstance(new.seq, UnknownSeq):
00047         #Jython didn't like us comparing the string of very long
00048         #UnknownSeq object (out of heap memory error)
00049         if old.seq._character.upper() != new.seq._character:
00050             raise ValueError("%s vs %s" % (repr(old.seq), repr(new.seq)))
00051     elif str(old.seq).upper() != str(new.seq).upper():
00052         if len(old.seq) < 200:
00053             raise ValueError("'%s' vs '%s'" % (old.seq, new.seq))
00054         else:
00055             raise ValueError("'%s...' vs '%s...'" % (old.seq[:100], new.seq[:100]))
00056     if old.features and new.features:
00057         if not compare_features(old.features, new.features):
00058             return False
00059     #Just insist on at least one word in common:
00060     if (old.description or new.description) \
00061     and not set(old.description.split()).intersection(new.description.split()):
00062         raise ValueError("%s versus %s" \
00063                          % (repr(old.description), repr(new.description)))
00064     #This only checks common annotation
00065     #Would a white list be easier?
00066     for key in set(old.annotations).intersection(new.annotations):
00067         if key in ["data_file_division", "accessions"]:
00068             #TODO - These are not yet supported on output, or
00069             #have other complications (e.g. different number of accessions
00070             #allowed in various file formats)
00071             continue
00072         if key == "comment":
00073             #Ignore whitespace
00074             if old.annotations[key].split() != new.annotations[key].split():
00075                 raise ValueError("Annotation mis-match for comment:\n%s\n%s" \
00076                                 % (old.annotations[key], new.annotations[key]))
00077             continue
00078         if key == "references":
00079             if expect_minor_diffs:
00080                 #TODO - Implement EMBL output of references
00081                 continue
00082             assert len(old.annotations[key]) == len(new.annotations[key])
00083             for r1, r2 in zip(old.annotations[key], new.annotations[key]):
00084                 assert r1.title == r2.title
00085                 assert r1.authors == r2.authors, \
00086                        "Old: '%s'\nNew: '%s'" % (r1.authors, r2.authors)
00087                 assert r1.journal == r2.journal
00088                 if r1.consrtm and r2.consrtm:
00089                     #Not held in EMBL files
00090                     assert r1.consrtm == r2.consrtm
00091                 if r1.medline_id and r2.medline_id:
00092                     #Not held in EMBL files
00093                     assert r1.medline_id == r2.medline_id
00094                 assert r1.pubmed_id == r2.pubmed_id
00095             continue
00096         if repr(old.annotations[key]) != repr(new.annotations[key]):
00097             raise ValueError("Annotation mis-match for %s:\n%s\n%s" \
00098                              % (key, old.annotations[key], new.annotations[key]))
00099     return True
00100 
00101 def compare_records(old_list, new_list, expect_minor_diffs=False):
00102     """Check two lists of SeqRecords agree, raises a ValueError if mismatch."""
00103     if len(old_list) != len(new_list):
00104         raise ValueError("%i vs %i records" % (len(old_list), len(new_list)))
00105     for old, new in zip(old_list, new_list):
00106         if not compare_record(old,new,expect_minor_diffs):
00107             return False
00108     return True
00109 
00110 def compare_feature(old, new, ignore_sub_features=False):
00111     """Check two SeqFeatures agree."""
00112     if old.type != new.type:
00113         raise ValueError("Type %s versus %s" % (repr(old.type), repr(new.type)))
00114     if old.location.nofuzzy_start != new.location.nofuzzy_start \
00115     or old.location.nofuzzy_end != new.location.nofuzzy_end:
00116         raise ValueError("%s versus %s:\n%s\nvs:\n%s" \
00117                          % (old.location, new.location, repr(old), repr(new)))
00118     if old.strand != new.strand:
00119         raise ValueError("Different strand:\n%s\nvs:\n%s" % (repr(old), repr(new)))
00120     if old.ref != new.ref:
00121         raise ValueError("Different ref:\n%s\nvs:\n%s" % (repr(old), repr(new)))
00122     if old.ref_db != new.ref_db:
00123         raise ValueError("Different ref_db:\n%s\nvs:\n%s" % (repr(old), repr(new)))
00124     if old.location_operator != new.location_operator:
00125         raise ValueError("Different location_operator:\n%s\nvs:\n%s" % (repr(old), repr(new)))
00126     if old.location.start != new.location.start \
00127     or str(old.location.start) != str(new.location.start):
00128         raise ValueError("Start %s versus %s:\n%s\nvs:\n%s" \
00129                          % (old.location.start, new.location.start, repr(old), repr(new)))
00130     if old.location.end != new.location.end \
00131     or str(old.location.end) != str(new.location.end):
00132         raise ValueError("End %s versus %s:\n%s\nvs:\n%s" \
00133                          % (old.location.end, new.location.end, repr(old), repr(new)))
00134     if not ignore_sub_features:
00135         if len(old.sub_features) != len(new.sub_features):
00136             raise ValueError("Different sub features")
00137         for a,b in zip(old.sub_features, new.sub_features):
00138             if not compare_feature(a,b):
00139                 return False
00140     #This only checks key shared qualifiers
00141     #Would a white list be easier?
00142     #for key in ["name","gene","translation","codon_table","codon_start","locus_tag"]:
00143     for key in set(old.qualifiers).intersection(new.qualifiers):
00144         if key in ["db_xref","protein_id","product","note"]:
00145             #EMBL and GenBank files are use different references/notes/etc
00146             continue
00147         if old.qualifiers[key] != new.qualifiers[key]:
00148             raise ValueError("Qualifier mis-match for %s:\n%s\n%s" \
00149                              % (key, old.qualifiers[key], new.qualifiers[key]))
00150     return True
00151 
00152 def compare_features(old_list, new_list, ignore_sub_features=False):
00153     """Check two lists of SeqFeatures agree, raises a ValueError if mismatch."""
00154     if len(old_list) != len(new_list):
00155         raise ValueError("%i vs %i features" % (len(old_list), len(new_list)))
00156     for old, new in zip(old_list, new_list):
00157         #This assumes they are in the same order
00158         if not compare_feature(old,new,ignore_sub_features):
00159             return False
00160     return True
00161 
00162 def make_join_feature(f_list, ftype="misc_feature"):
00163     #NOTE - Does NOT reorder the sub-features (which you may
00164     #want to do for reverse strand features...)
00165     if len(set(f.strand for f in f_list))==1:
00166         strand = f_list[0].strand
00167     else:
00168         strand = None
00169     for f in f_list:
00170         f.type=ftype
00171         f.location_operator="join"
00172     jf = SeqFeature(FeatureLocation(f_list[0].location.start,
00173                                     f_list[-1].location.end,
00174                                     strand),
00175                     type=ftype, location_operator="join")
00176     assert jf.location.strand == strand
00177     assert jf.strand == strand
00178     jf.sub_features = f_list
00179     return jf
00180 
00181 #Prepare a single GenBank record with one feature with a %s place holder for
00182 #the feature location
00183 gbk_template = open("GenBank/iro.gb", "rU").read()
00184 gbk_template = gbk_template.replace('     gene            341..756\n'
00185                                     '                     /gene="FTCD"\n',
00186                                     '     misc_feature    %s\n'
00187                                     '                     /note="Test case"\n')
00188 gbk_template = gbk_template.replace('     exon            341..384\n'
00189                                     '                     /gene="FTCD"\n'
00190                                     '                     /number=1\n', '')
00191 gbk_template = gbk_template.replace('     intron          385..617\n'
00192                                     '                     /gene="FTCD"\n'
00193                                     '                     /number=1\n', '')
00194 gbk_template = gbk_template.replace('     exon            618..756\n'
00195                                     '                     /gene="FTCD"\n'
00196                                     '                     /number=2\n', '')
00197 assert len(gbk_template)==4445
00198 assert gbk_template.count("%") == 1, gbk_template
00199 
00200 class SeqFeatureExtractionWritingReading(unittest.TestCase):
00201     """Tests for SeqFeature sequence extract method, writing, and reading."""
00202 
00203     def check(self, parent_seq, feature, answer_str, location_str):
00204         self.assertEqual(location_str,
00205             _insdc_feature_location_string(feature,len(parent_seq)))
00206 
00207         new = feature.extract(parent_seq)
00208         self.assertTrue(isinstance(new, Seq))
00209         self.assertEqual(str(new), answer_str)
00210 
00211         if not feature.sub_features:
00212             new = parent_seq[feature.location.start:feature.location.end]
00213             if feature.strand == -1:
00214                 new = reverse_complement(new)
00215             self.assertEqual(str(new), answer_str)
00216 
00217         new = feature.extract(str(parent_seq))
00218         self.assertTrue(isinstance(new, str))
00219         self.assertEqual(new, answer_str)
00220 
00221         new = feature.extract(parent_seq.tomutable())
00222         self.assertTrue(isinstance(new, Seq)) #Not MutableSeq!
00223         self.assertEqual(str(new), answer_str)
00224 
00225         new = feature.extract(UnknownSeq(len(parent_seq), parent_seq.alphabet))
00226         self.assertTrue(isinstance(new, UnknownSeq))
00227         self.assertEqual(len(new), len(answer_str))
00228         
00229         if _insdc_feature_location_string(feature, 1326) != location_str:
00230             #This is to avoid issues with the N^1 between feature which only
00231             #makes sense at the end of the sequence
00232             return
00233         #This template is DNA, but that will still be OK for protein features
00234         #as they have no strand information... but see below for strand fun
00235         rec = SeqIO.read(StringIO(gbk_template % location_str), "gb")
00236         self.assertEqual(1326, len(rec))
00237         self.assertEqual(2, len(rec.features))
00238         self.assertEqual(rec.features[0].type, "source")
00239         self.assertEqual(rec.features[1].type, "misc_feature")
00240         new_f = rec.features[1]
00241         self.assertEqual(location_str,
00242                          _insdc_feature_location_string(new_f,1326))
00243 
00244         #Checking the strand is tricky - on parsing a GenBank file
00245         #strand +1 is assumed, but our constructed features for the
00246         #unit test have mostly defaulted to strand None.
00247         self.assertEqual(len(feature.sub_features), len(new_f.sub_features))
00248         for f1, f2 in zip(feature.sub_features, new_f.sub_features):
00249             f1.type = "misc_feature" #hack as may not be misc_feature
00250             if f1.strand is None:
00251                 f1.strand = f2.strand #hack as described above
00252             self.assertEqual(f1.strand, f2.strand)
00253             self.assertTrue(compare_feature(f1,f2))
00254         feature.type = "misc_feature" #hack as may not be misc_feature
00255         if not feature.strand:
00256             feature.strand = new_f.strand #hack as above
00257         self.assertEqual(feature.strand, new_f.strand)
00258         self.assertTrue(compare_feature(feature, new_f))
00259         
00260         #Some feature method tests
00261         parent = "ACGT"*250
00262         s = feature.extract(parent)
00263         self.assertEqual(len(feature), len(s))
00264         for i in feature:
00265             self.assertTrue(i in feature)
00266         self.assertEqual(set(feature),
00267                          set(i for i in range(1000) if i in feature))
00268         if feature.strand == +1:
00269             self.assertEqual(s, "".join(parent[i] for i in feature))
00270             
00271 
00272     def test_simple_rna(self):
00273         """Feature on RNA (simple, default strand)"""
00274         s = Seq("GAUCRYWSMKHBVDN", generic_rna)
00275         f = SeqFeature(FeatureLocation(5,10))
00276         self.assertEqual(f.strand, None)
00277         self.assertEqual(f.location.strand, None)
00278         self.check(s, f, "YWSMK", "6..10")
00279 
00280     def test_simple_dna(self):
00281         """Feature on DNA (simple, default strand)"""
00282         s = Seq("GATCRYWSMKHBVDN", generic_dna)
00283         f = SeqFeature(FeatureLocation(5,10))
00284         self.check(s, f, "YWSMK", "6..10")
00285 
00286     def test_single_letter_dna(self):
00287         """Feature on DNA (single letter, default strand)"""
00288         s = Seq("GATCRYWSMKHBVDN", generic_dna)
00289         f = SeqFeature(FeatureLocation(5,6))
00290         self.check(s, f, "Y", "6")
00291 
00292     def test_zero_len_dna(self):
00293         """Feature on DNA (between location, zero length, default strand)"""
00294         s = Seq("GATCRYWSMKHBVDN", generic_dna)
00295         f = SeqFeature(FeatureLocation(5,5))
00296         self.check(s, f, "", "5^6")
00297 
00298     def test_zero_len_dna_end(self):
00299         """Feature on DNA (between location at end, zero length, default strand)"""
00300         s = Seq("GATCRYWSMKHBVDN", generic_dna)
00301         f = SeqFeature(FeatureLocation(15,15))
00302         self.check(s, f, "", "15^1")
00303 
00304     def test_simple_dna_strand0(self):
00305         """Feature on DNA (simple, strand 0)"""
00306         s = Seq("GATCRYWSMKHBVDN", generic_dna)
00307         f = SeqFeature(FeatureLocation(5,10), strand=0)
00308         self.check(s, f, "YWSMK", "6..10")
00309 
00310     def test_simple_dna_strand_none(self):
00311         """Feature on DNA (simple, strand None)"""
00312         s = Seq("GATCRYWSMKHBVDN", generic_dna)
00313         f = SeqFeature(FeatureLocation(5,10), strand=None)
00314         self.check(s, f, "YWSMK", "6..10")
00315 
00316     def test_simple_dna_strand1(self):
00317         """Feature on DNA (simple, strand +1)"""
00318         s = Seq("GATCRYWSMKHBVDN", generic_dna)
00319         f = SeqFeature(FeatureLocation(5,10), strand=1)
00320         self.assertEqual(f.strand, +1)
00321         self.assertEqual(f.location.strand, +1)
00322         self.check(s, f, "YWSMK", "6..10")
00323         
00324     def test_simple_dna_strand_minus(self):
00325         """Feature on DNA (simple, strand -1)"""
00326         s = Seq("GATCRYWSMKHBVDN", generic_dna)
00327         f = SeqFeature(FeatureLocation(5,10), strand=-1)
00328         self.assertEqual(f.strand, -1)
00329         self.assertEqual(f.location.strand, -1)
00330         self.check(s, f, "MKSWR", "complement(6..10)")
00331 
00332     def test_simple_dna_join(self):
00333         """Feature on DNA (join, strand +1)"""
00334         s = Seq("GATCRYWSMKHBVDN", generic_dna)
00335         f1 = SeqFeature(FeatureLocation(5,10), strand=1)
00336         f2 = SeqFeature(FeatureLocation(12,15), strand=1)
00337         f = make_join_feature([f1,f2])
00338         self.check(s, f, "YWSMKVDN", "join(6..10,13..15)")
00339 
00340     def test_simple_dna_join(self):
00341         """Feature on DNA (join, strand -1)"""
00342         s = Seq("AAAAACCCCCTTTTTGGGGG", generic_dna)
00343         f1 = SeqFeature(FeatureLocation(5,10), strand=-1)
00344         f2 = SeqFeature(FeatureLocation(12,15), strand=-1)
00345         f = make_join_feature([f1,f2])
00346         self.check(s, f, reverse_complement("CCCCC"+"TTT"),
00347                    "complement(join(6..10,13..15))")
00348 
00349     def test_simple_dna_join(self):
00350         """Feature on DNA (join, strand -1, before position)"""
00351         s = Seq("AAAAACCCCCTTTTTGGGGG", generic_dna)
00352         f1 = SeqFeature(FeatureLocation(BeforePosition(5),10), strand=-1)
00353         f2 = SeqFeature(FeatureLocation(12,15), strand=-1)
00354         f = make_join_feature([f1,f2])
00355         self.check(s, f, reverse_complement("CCCCC"+"TTT"),
00356                    "complement(join(<6..10,13..15))")
00357 
00358     def test_simple_dna_join_after(self):
00359         """Feature on DNA (join, strand -1, after position)"""
00360         s = Seq("AAAAACCCCCTTTTTGGGGG", generic_dna)
00361         f1 = SeqFeature(FeatureLocation(5,10), strand=-1)
00362         f2 = SeqFeature(FeatureLocation(12,AfterPosition(15)), strand=-1)
00363         f = make_join_feature([f1,f2])
00364         self.check(s, f, reverse_complement("CCCCC"+"TTT"),
00365                    "complement(join(6..10,13..>15))")
00366 
00367     def test_mixed_strand_dna_join(self):
00368         """Feature on DNA (join, mixed strand)"""
00369         s = Seq("AAAAACCCCCTTTTTGGGGG", generic_dna)
00370         f1 = SeqFeature(FeatureLocation(5,10), strand=+1)
00371         f2 = SeqFeature(FeatureLocation(12,15), strand=-1)
00372         f = make_join_feature([f1,f2])
00373         self.check(s, f, "CCCCC"+reverse_complement("TTT"),
00374                    "join(6..10,complement(13..15))")
00375 
00376     def test_mixed_strand_dna_multi_join(self):
00377         """Feature on DNA (multi-join, mixed strand)"""
00378         s = Seq("AAAAACCCCCTTTTTGGGGG", generic_dna)
00379         f1 = SeqFeature(FeatureLocation(5,10), strand=+1)
00380         f2 = SeqFeature(FeatureLocation(12,15), strand=-1)
00381         f3 = SeqFeature(FeatureLocation(BeforePosition(0),5), strand=+1)
00382         f = make_join_feature([f1,f2,f3])
00383         self.check(s, f, "CCCCC"+reverse_complement("TTT")+"AAAAA",
00384                    "join(6..10,complement(13..15),<1..5)")
00385 
00386     def test_protein_simple(self):
00387         """Feature on protein (simple)"""
00388         s = Seq("ABCDEFGHIJKLMNOPQRSTUVWXYZ", generic_protein)
00389         f = SeqFeature(FeatureLocation(5,10))
00390         self.check(s, f, "FGHIJ", "6..10")
00391 
00392     def test_protein_join(self):
00393         """Feature on protein (join)"""
00394         s = Seq("ABCDEFGHIJKLMNOPQRSTUVWXYZ", generic_protein)
00395         f1 = SeqFeature(FeatureLocation(5,10))
00396         f2 = SeqFeature(FeatureLocation(15,20))
00397         f = make_join_feature([f1,f2])
00398         self.check(s, f, "FGHIJ"+"PQRST", "join(6..10,16..20)")
00399 
00400     def test_protein_join_fuzzy(self):
00401         """Feature on protein (fuzzy join)"""
00402         s = Seq("ABCDEFGHIJKLMNOPQRSTUVWXYZ", generic_protein)
00403         f1 = SeqFeature(FeatureLocation(BeforePosition(5),10))
00404         f2 = SeqFeature(FeatureLocation(OneOfPosition(15, (ExactPosition(15),
00405                                                            ExactPosition(16))),
00406                                         AfterPosition(20)))
00407         f = make_join_feature([f1,f2])
00408         self.check(s, f, "FGHIJ"+"PQRST", "join(<6..10,one-of(16,17)..>20)")
00409 
00410     def test_protein_multi_join(self):
00411         """Feature on protein (multi-join)"""
00412         s = Seq("ABCDEFGHIJKLMNOPQRSTUVWXYZ", generic_protein)
00413         f1 = SeqFeature(FeatureLocation(1,2))
00414         f2 = SeqFeature(FeatureLocation(8,9))
00415         f3 = SeqFeature(FeatureLocation(14,16))
00416         f4 = SeqFeature(FeatureLocation(24,25))
00417         f5 = SeqFeature(FeatureLocation(19,20))
00418         f6 = SeqFeature(FeatureLocation(7,8))
00419         f7 = SeqFeature(FeatureLocation(14,15))
00420         f8 = SeqFeature(FeatureLocation(13,14))
00421         f = make_join_feature([f1,f2,f3,f4,f5,f6,f7,f8])
00422         self.check(s, f, "BIOPYTHON", "join(2,9,15..16,25,20,8,15,14)")
00423 
00424     def test_protein_between(self):
00425         """Feature on protein (between location, zero length)"""
00426         s = Seq("ABCDEFGHIJKLMNOPQRSTUVWXYZ", generic_protein)
00427         f = SeqFeature(FeatureLocation(5,5))
00428         self.check(s, f, "", "5^6")
00429 
00430     def test_protein_oneof(self):
00431         """Feature on protein (one-of positions)"""
00432         s = Seq("ABCDEFGHIJKLMNOPQRSTUVWXYZ", generic_protein)
00433         start = OneOfPosition(5, (ExactPosition(5),ExactPosition(7)))
00434         end = OneOfPosition(11, (ExactPosition(10),ExactPosition(11)))
00435         f = SeqFeature(FeatureLocation(start,10))
00436         self.check(s, f, "FGHIJ", "one-of(6,8)..10")
00437         f = SeqFeature(FeatureLocation(start,end))
00438         self.check(s, f, "FGHIJK", "one-of(6,8)..one-of(10,11)")
00439         f = SeqFeature(FeatureLocation(5,end))
00440         self.check(s, f, "FGHIJK", "6..one-of(10,11)")
00441 
00442 
00443 class SeqFeatureCreation(unittest.TestCase):
00444     """Test basic creation of SeqFeatures.
00445     """
00446 
00447     def test_qualifiers(self):
00448         """Pass in qualifiers to SeqFeatures.
00449         """
00450         f = SeqFeature(FeatureLocation(10,20), strand=+1, type="CDS")
00451         self.assertEqual(f.qualifiers, {})
00452         f = SeqFeature(FeatureLocation(10,20), strand=+1, type="CDS",
00453                 qualifiers={"test": ["a test"]})
00454         self.assertEqual(f.qualifiers["test"], ["a test"])
00455 
00456 class FeatureWriting(unittest.TestCase):
00457     def setUp(self):
00458         self.record = SeqRecord(Seq("ACGT"*100, generic_dna),
00459                                 id="Test", name="Test", description="Test")
00460     def write_read_check(self, format):
00461         handle = StringIO()
00462         SeqIO.write([self.record], handle, format)
00463         handle.seek(0)
00464         record2 = SeqIO.read(handle, format)
00465         compare_record(self.record, record2)
00466     
00467     def write_read_checks(self, formats=["gb", "embl", "imgt"]):
00468         for f in formats:
00469             self.write_read_check(f)
00470 
00471     def test_exact(self):
00472         """GenBank/EMBL write/read simple exact locations."""
00473         #Note we don't have to explicitly give an ExactPosition object,
00474         #an integer will also work:
00475         f = SeqFeature(FeatureLocation(10,20), strand=+1, type="CDS")
00476         self.assertEqual(_insdc_feature_location_string(f,100),
00477                          "11..20")
00478         self.assertEqual(_insdc_feature_location_string(f._flip(20),20),
00479                          "complement(1..10)")
00480         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00481                          "complement(81..90)")
00482         self.assertEqual(f._flip(100).strand, -1)
00483         self.record.features.append(f)
00484         
00485         f = SeqFeature(FeatureLocation(30,40), strand=-1, type="CDS")
00486         self.assertEqual(_insdc_feature_location_string(f,100),
00487                          "complement(31..40)")
00488         self.assertEqual(_insdc_feature_location_string(f._flip(40),40),
00489                          "1..10")
00490         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00491                          "61..70")
00492         self.assertEqual(f._flip(100).strand, +1)
00493         self.record.features.append(f)
00494 
00495         f = SeqFeature(FeatureLocation(ExactPosition(50),ExactPosition(60)), \
00496                        strand=+1, type="CDS")
00497         self.assertEqual(_insdc_feature_location_string(f,100),
00498                          "51..60")
00499         self.assertEqual(_insdc_feature_location_string(f._flip(60),60),
00500                          "complement(1..10)")
00501         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00502                          "complement(41..50)")
00503         self.assertEqual(f._flip(100).strand, -1)
00504         self.record.features.append(f)
00505 
00506         self.write_read_checks()
00507         #The write/check won't work on strandless features due to the
00508         #limitations of the GenBank (and EMBL) feature location scheme
00509         for s in [0, None] :
00510             #Check flipping of a simple strand 0 feature:
00511             f = SeqFeature(FeatureLocation(0,100), strand=s, type="source")
00512             self.assertEqual(_insdc_feature_location_string(f,100),
00513                              "1..100")
00514             self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00515                              "1..100")
00516             self.assertEqual(_insdc_feature_location_string(f._flip(200),200),
00517                              "101..200")
00518             self.assertEqual(f._flip(100).strand, f.strand)
00519 
00520     def test_between(self):
00521         """GenBank/EMBL write/read simple between locations."""
00522         #Note we don't use the BetweenPosition any more!
00523         f = SeqFeature(FeatureLocation(10,10), strand=+1, type="variation")
00524         self.assertEqual(_insdc_feature_location_string(f,100),
00525                          "10^11")
00526         self.assertEqual(_insdc_feature_location_string(f._flip(20),20),
00527                          "complement(10^11)")
00528         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00529                          "complement(90^91)")
00530         self.assertEqual(f._flip(100).strand, -1)
00531         self.record.features.append(f)
00532         f = SeqFeature(FeatureLocation(20,20), strand=-1, type="variation")
00533         self.assertEqual(_insdc_feature_location_string(f,100),
00534                          "complement(20^21)")
00535         self.assertEqual(_insdc_feature_location_string(f._flip(40),40),
00536                          "20^21")
00537         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00538                          "80^81")
00539         self.assertEqual(f._flip(100).strand, +1)
00540         self.record.features.append(f)
00541         self.write_read_checks()
00542 
00543     def test_join(self):
00544         """GenBank/EMBL write/read simple join locations."""
00545         f1 = SeqFeature(FeatureLocation(10,20), strand=+1)
00546         f2 = SeqFeature(FeatureLocation(25,40), strand=+1)
00547         f = make_join_feature([f1,f2])
00548         self.record.features.append(f)
00549         self.assertEqual(_insdc_feature_location_string(f,500),
00550                          "join(11..20,26..40)")
00551         self.assertEqual(_insdc_feature_location_string(f._flip(60),60),
00552                          "complement(join(21..35,41..50))")
00553         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00554                          "complement(join(61..75,81..90))")
00555         for sub_f in f._flip(100).sub_features :
00556             self.assertEqual(sub_f.strand, -1)
00557         self.assertEqual(f._flip(100).strand, -1)
00558         f1 = SeqFeature(FeatureLocation(110,120), strand=+1)
00559         f2 = SeqFeature(FeatureLocation(125,140), strand=+1)
00560         f3 = SeqFeature(FeatureLocation(145,150), strand=+1)
00561         f = make_join_feature([f1,f2,f3], "CDS")
00562         self.assertEqual(_insdc_feature_location_string(f,500),
00563                          "join(111..120,126..140,146..150)")
00564         self.assertEqual(_insdc_feature_location_string(f._flip(150),150),
00565                          "complement(join(1..5,11..25,31..40))")
00566         for sub_f in f._flip(100).sub_features :
00567             self.assertEqual(sub_f.strand,-1)
00568         self.assertEqual(f._flip(100).strand, -1)
00569         self.record.features.append(f)
00570         f1 = SeqFeature(FeatureLocation(210,220), strand=-1)
00571         f2 = SeqFeature(FeatureLocation(225,240), strand=-1)
00572         f = make_join_feature([f1,f2], ftype="gene")
00573         self.assertEqual(_insdc_feature_location_string(f,500),
00574                          "complement(join(211..220,226..240))")
00575         self.assertEqual(_insdc_feature_location_string(f._flip(300),300),
00576                          "join(61..75,81..90)")
00577         for sub_f in f._flip(100).sub_features :
00578             self.assertEqual(sub_f.strand, +1)
00579         self.assertEqual(f._flip(100).strand, +1)
00580         self.record.features.append(f)
00581         f1 = SeqFeature(FeatureLocation(310,320), strand=-1)
00582         f2 = SeqFeature(FeatureLocation(325,340), strand=-1)
00583         f3 = SeqFeature(FeatureLocation(345,350), strand=-1)
00584         f = make_join_feature([f1,f2,f3], "CDS")
00585         self.assertEqual(_insdc_feature_location_string(f,500),
00586                          "complement(join(311..320,326..340,346..350))")
00587         self.assertEqual(_insdc_feature_location_string(f._flip(350),350),
00588                          "join(1..5,11..25,31..40)")
00589         for sub_f in f._flip(100).sub_features :
00590             self.assertEqual(sub_f.strand, +1)
00591         self.assertEqual(f._flip(100).strand, +1)
00592         self.record.features.append(f)
00593         self.write_read_checks()
00594 
00595     def test_fuzzy_join(self):
00596         """Features: write/read fuzzy join locations."""
00597         s = "N"*500
00598         f1 = SeqFeature(FeatureLocation(BeforePosition(10),20), strand=+1)
00599         f2 = SeqFeature(FeatureLocation(25,AfterPosition(40)), strand=+1)
00600         f = make_join_feature([f1,f2])
00601         self.record.features.append(f)
00602         self.assertEqual(_insdc_feature_location_string(f,500),
00603                          "join(<11..20,26..>40)")
00604         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00605                          "complement(join(<61..75,81..>90))")
00606         self.assertEqual(f.strand, +1)
00607         for sub_f in f._flip(100).sub_features :
00608             self.assertEqual(sub_f.strand, -1)
00609         self.assertEqual(f._flip(100).strand, -1)
00610         
00611         f1 = SeqFeature(FeatureLocation(OneOfPosition(107, [ExactPosition(107),
00612                                                             ExactPosition(110)]),
00613                                         120),
00614                         strand=+1)
00615         f2 = SeqFeature(FeatureLocation(125,140), strand=+1)
00616         f3 = SeqFeature(FeatureLocation(145,WithinPosition(160, left=150, right=160)), strand=+1)
00617         f = make_join_feature([f1,f2,f3], "CDS")
00618         self.assertEqual(_insdc_feature_location_string(f,500),
00619                          "join(one-of(108,111)..120,126..140,146..(150.160))")
00620         self.assertEqual(len(f), len(f.extract(s)))
00621         self.assertEqual(_insdc_feature_location_string(f._flip(200),200),
00622                          "complement(join((41.51)..55,61..75,81..one-of(90,93)))")
00623         self.assertEqual(f.strand, +1)
00624         for sub_f in f._flip(100).sub_features :
00625             self.assertEqual(sub_f.strand,-1)
00626         self.assertEqual(f._flip(100).strand, -1)
00627         self.record.features.append(f)
00628         
00629         f1 = SeqFeature(FeatureLocation(BeforePosition(210),220), strand=-1)
00630         f2 = SeqFeature(FeatureLocation(225,WithinPosition(244, left=240, right=244)), strand=-1)
00631         f = make_join_feature([f1,f2], "gene")
00632         self.assertEqual(_insdc_feature_location_string(f,500),
00633                          "complement(join(<211..220,226..(240.244)))")
00634         self.assertEqual(len(f), len(f.extract(s)))
00635         self.assertEqual(_insdc_feature_location_string(f._flip(300),300),
00636                          "join((57.61)..75,81..>90)")
00637         self.assertEqual(f.strand, -1)
00638         for sub_f in f._flip(100).sub_features :
00639             self.assertEqual(sub_f.strand, +1)
00640         self.assertEqual(f._flip(100).strand, +1)
00641         self.record.features.append(f)
00642         
00643         f1 = SeqFeature(FeatureLocation(AfterPosition(310),320), strand=-1)
00644         #Note - is one-of(340,337) allowed or should it be one-of(337,340)?
00645         f2 = SeqFeature(FeatureLocation(325,OneOfPosition(340, [ExactPosition(340),
00646                                                                 ExactPosition(337)])),
00647                         strand=-1)
00648         f3 = SeqFeature(FeatureLocation(345,WithinPosition(355, left=350, right=355)), strand=-1)
00649         f = make_join_feature([f1,f2,f3], "CDS")
00650         self.assertEqual(_insdc_feature_location_string(f,500),
00651                          "complement(join(>311..320,326..one-of(340,337),346..(350.355)))")
00652         self.assertEqual(len(f), len(f.extract(s)))
00653         self.assertEqual(_insdc_feature_location_string(f._flip(400),400),
00654                          "join((46.51)..55,one-of(64,61)..75,81..<90)")
00655         self.assertEqual(f.strand, -1)
00656         for sub_f in f._flip(100).sub_features :
00657             self.assertEqual(sub_f.strand, +1)
00658         self.assertEqual(f._flip(100).strand, +1)
00659         self.record.features.append(f)
00660         
00661         self.write_read_checks()
00662 
00663     def test_before(self):
00664         """Features: write/read simple before locations."""
00665         s = "N"*200
00666         f = SeqFeature(FeatureLocation(BeforePosition(5),10), \
00667                        strand=+1, type="CDS")
00668         self.assertEqual(_insdc_feature_location_string(f,100),
00669                          "<6..10")
00670         self.assertEqual(len(f), len(f.extract(s)))
00671         self.assertEqual(_insdc_feature_location_string(f._flip(20),20),
00672                          "complement(11..>15)")
00673         self.assertEqual(f.strand, +1)
00674         self.assertEqual(f._flip(100).strand, -1)
00675         self.record.features.append(f)
00676         
00677         f = SeqFeature(FeatureLocation(BeforePosition(15),BeforePosition(20)), \
00678                        strand=+1, type="CDS")
00679         self.assertEqual(_insdc_feature_location_string(f,100),
00680                          "<16..<20")
00681         self.assertEqual(len(f), len(f.extract(s)))
00682         self.assertEqual(_insdc_feature_location_string(f._flip(20),20),
00683                          "complement(>1..>5)")
00684         self.assertEqual(f.strand, +1)
00685         self.assertEqual(f._flip(100).strand, -1)
00686         self.record.features.append(f)
00687         
00688         f = SeqFeature(FeatureLocation(25,BeforePosition(30)), \
00689                        strand=+1, type="CDS")
00690         self.assertEqual(_insdc_feature_location_string(f,100),
00691                          "26..<30")
00692         self.assertEqual(len(f), len(f.extract(s)))
00693         self.assertEqual(_insdc_feature_location_string(f._flip(40),40),
00694                          "complement(>11..15)")
00695         self.assertEqual(f.strand, +1)
00696         self.assertEqual(f._flip(100).strand, -1)
00697         self.record.features.append(f)
00698         
00699         f = SeqFeature(FeatureLocation(BeforePosition(35),40), \
00700                        strand=-1, type="CDS")
00701         self.assertEqual(_insdc_feature_location_string(f,100),
00702                          "complement(<36..40)")
00703         self.assertEqual(len(f), len(f.extract(s)))
00704         self.assertEqual(_insdc_feature_location_string(f._flip(40),40),
00705                          "1..>5")
00706         self.assertEqual(f.strand, -1)
00707         self.assertEqual(f._flip(100).strand, +1)
00708         self.record.features.append(f)
00709         
00710         f = SeqFeature(FeatureLocation(BeforePosition(45),BeforePosition(50)), \
00711                        strand=-1, type="CDS")
00712         self.assertEqual(_insdc_feature_location_string(f,100),
00713                          "complement(<46..<50)")
00714         self.assertEqual(len(f), len(f.extract(s)))
00715         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00716                          ">51..>55")
00717         self.assertEqual(f.strand, -1)
00718         self.assertEqual(f._flip(100).strand, +1)
00719         self.record.features.append(f)
00720         
00721         f = SeqFeature(FeatureLocation(55,BeforePosition(60)), \
00722                        strand=-1, type="CDS")
00723         self.assertEqual(_insdc_feature_location_string(f,100),
00724                          "complement(56..<60)")
00725         self.assertEqual(len(f), len(f.extract(s)))
00726         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00727                          ">41..45")
00728         self.assertEqual(f.strand, -1)
00729         self.assertEqual(f._flip(100).strand, +1)
00730         self.record.features.append(f)
00731         
00732         self.write_read_checks()
00733         
00734     def test_after(self):
00735         """Features: write/read simple after locations."""
00736         s = "N" * 200
00737         f = SeqFeature(FeatureLocation(AfterPosition(5),10), \
00738                        strand=+1, type="CDS")
00739         self.assertEqual(_insdc_feature_location_string(f,100),
00740                          ">6..10")
00741         self.assertEqual(len(f), len(f.extract(s)))
00742         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00743                          "complement(91..<95)")
00744         self.assertEqual(f.strand, +1)
00745         self.assertEqual(f._flip(100).strand, -1)
00746         self.record.features.append(f)
00747 
00748         f = SeqFeature(FeatureLocation(AfterPosition(15),AfterPosition(20)), \
00749                        strand=+1, type="CDS")
00750         self.assertEqual(_insdc_feature_location_string(f,100),
00751                          ">16..>20")
00752         self.assertEqual(len(f), len(f.extract(s)))
00753         self.assertEqual(_insdc_feature_location_string(f._flip(20),20),
00754                          "complement(<1..<5)")
00755         self.assertEqual(f.strand, +1)
00756         self.assertEqual(f._flip(100).strand, -1)
00757         self.record.features.append(f)
00758 
00759         f = SeqFeature(FeatureLocation(25,AfterPosition(30)), \
00760                        strand=+1, type="CDS")
00761         self.assertEqual(_insdc_feature_location_string(f,100),
00762                          "26..>30")
00763         self.assertEqual(len(f), len(f.extract(s)))
00764         self.assertEqual(_insdc_feature_location_string(f._flip(30),30),
00765                          "complement(<1..5)")
00766         self.assertEqual(f.strand, +1)
00767         self.assertEqual(f._flip(100).strand, -1)
00768         self.record.features.append(f)
00769 
00770         f = SeqFeature(FeatureLocation(AfterPosition(35),40), \
00771                        strand=-1, type="CDS")
00772         self.assertEqual(_insdc_feature_location_string(f,100),
00773                          "complement(>36..40)")
00774         self.assertEqual(len(f), len(f.extract(s)))
00775         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00776                          "61..<65")
00777         self.assertEqual(f.strand, -1)
00778         self.assertEqual(f._flip(100).strand, +1)
00779         self.record.features.append(f)
00780 
00781         f = SeqFeature(FeatureLocation(AfterPosition(45),AfterPosition(50)), \
00782                        strand=-1, type="CDS")
00783         self.assertEqual(_insdc_feature_location_string(f,100),
00784                          "complement(>46..>50)")
00785         self.assertEqual(len(f), len(f.extract(s)))
00786         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00787                          "<51..<55")
00788         self.assertEqual(f.strand, -1)
00789         self.assertEqual(f._flip(100).strand, +1)
00790         self.record.features.append(f)
00791 
00792         f = SeqFeature(FeatureLocation(55,AfterPosition(60)), \
00793                        strand=-1, type="CDS")
00794         self.assertEqual(_insdc_feature_location_string(f,100),
00795                          "complement(56..>60)")
00796         self.assertEqual(len(f), len(f.extract(s)))
00797         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00798                          "<41..45")
00799         self.assertEqual(f.strand, -1)
00800         self.assertEqual(f._flip(100).strand, +1)
00801         self.record.features.append(f)
00802 
00803         self.write_read_checks()
00804 
00805     def test_oneof(self):
00806         """Features: write/read simple one-of locations."""
00807         s = "N" * 100
00808         start = OneOfPosition(0, [ExactPosition(0),ExactPosition(3),ExactPosition(6)])
00809         f = SeqFeature(FeatureLocation(start,21), strand=+1, type="CDS")
00810         self.assertEqual(_insdc_feature_location_string(f,100),
00811                          "one-of(1,4,7)..21")
00812         self.assertEqual(len(f), len(f.extract(s)))
00813         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00814                          "complement(80..one-of(94,97,100))")
00815         self.assertEqual(f.strand, +1)
00816         self.assertEqual(f._flip(100).strand, -1)
00817         self.record.features.append(f)
00818         
00819         start = OneOfPosition(10, [ExactPosition(x) for x in [10,13,16]])
00820         end = OneOfPosition(50, [ExactPosition(x) for x in [41,44,50]])
00821         f = SeqFeature(FeatureLocation(start,end), strand=+1, type="gene")
00822         self.assertEqual(_insdc_feature_location_string(f,100),
00823                          "one-of(11,14,17)..one-of(41,44,50)")
00824         self.assertEqual(len(f), len(f.extract(s)))
00825         self.assertEqual(_insdc_feature_location_string(f._flip(50),50),
00826                          "complement(one-of(1,7,10)..one-of(34,37,40))")
00827         self.assertEqual(f.strand, +1)
00828         self.assertEqual(f._flip(100).strand, -1)
00829         self.record.features.append(f)
00830         
00831         end = OneOfPosition(33, [ExactPosition(x) for x in [30,33]])
00832         f = SeqFeature(FeatureLocation(27,end), strand=+1, type="gene")
00833         self.assertEqual(_insdc_feature_location_string(f,100),
00834                          "28..one-of(30,33)")
00835         self.assertEqual(len(f), len(f.extract(s)))
00836         self.assertEqual(_insdc_feature_location_string(f._flip(40),40),
00837                          "complement(one-of(8,11)..13)")
00838         self.assertEqual(f.strand, +1)
00839         self.assertEqual(f._flip(100).strand, -1)
00840         self.record.features.append(f)
00841         
00842         start = OneOfPosition(36, [ExactPosition(x) for x in [36,40]])
00843         f = SeqFeature(FeatureLocation(start,46), strand=-1, type="CDS")
00844         self.assertEqual(_insdc_feature_location_string(f,100),
00845                          "complement(one-of(37,41)..46)")
00846         self.assertEqual(len(f), len(f.extract(s)))
00847         self.assertEqual(_insdc_feature_location_string(f._flip(50),50),
00848                          "5..one-of(10,14)")
00849         self.assertEqual(f.strand, -1)
00850         self.assertEqual(f._flip(100).strand, +1)
00851         self.record.features.append(f)
00852         
00853         start = OneOfPosition(45, [ExactPosition(x) for x in [45,60]])
00854         end = OneOfPosition(90, [ExactPosition(x) for x in [70,90]])
00855         f = SeqFeature(FeatureLocation(start,end), strand=-1, type="CDS")
00856         self.assertEqual(_insdc_feature_location_string(f,100),
00857                          "complement(one-of(46,61)..one-of(70,90))")
00858         self.assertEqual(len(f), len(f.extract(s)))
00859         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00860                          "one-of(11,31)..one-of(40,55)")
00861         self.assertEqual(f.strand, -1)
00862         self.assertEqual(f._flip(100).strand, +1)
00863         self.record.features.append(f)
00864         
00865         end = OneOfPosition(63, [ExactPosition(x) for x in [60,63]])
00866         f = SeqFeature(FeatureLocation(55,end), strand=-1, type="tRNA")
00867         self.assertEqual(_insdc_feature_location_string(f,100),
00868                          "complement(56..one-of(60,63))")
00869         self.assertEqual(len(f), len(f.extract(s)))
00870         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00871                          "one-of(38,41)..45")
00872         self.assertEqual(f.strand, -1)
00873         self.assertEqual(f._flip(100).strand, +1)
00874         self.record.features.append(f)
00875         
00876         self.write_read_checks()
00877 
00878     def test_within(self):
00879         """Features: write/read simple within locations."""
00880         s = "N" * 100
00881         f = SeqFeature(FeatureLocation(WithinPosition(2, left=2, right=8),10), \
00882                        strand=+1, type="CDS")
00883         self.assertEqual(_insdc_feature_location_string(f,100),
00884                          "(3.9)..10")
00885         self.assertEqual(len(f), len(f.extract(s)))
00886         self.assertEqual(_insdc_feature_location_string(f._flip(20),20),
00887                          "complement(11..(12.18))")
00888         self.assertEqual(f.strand, +1)
00889         self.assertEqual(f._flip(100).strand, -1)
00890         self.record.features.append(f)
00891         
00892         f = SeqFeature(FeatureLocation(WithinPosition(12, left=12, right=18),
00893                                        WithinPosition(28, left=20, right=28)), \
00894                        strand=+1, type="CDS")
00895         self.assertEqual(_insdc_feature_location_string(f,100),
00896                          "(13.19)..(20.28)")
00897         self.assertEqual(len(f), len(f.extract(s)))
00898         self.assertEqual(_insdc_feature_location_string(f._flip(30),30),
00899                          "complement((3.11)..(12.18))")
00900         self.assertEqual(f.strand, +1)
00901         self.assertEqual(f._flip(100).strand, -1)
00902         self.record.features.append(f)
00903         
00904         f = SeqFeature(FeatureLocation(25,WithinPosition(33, left=30, right=33)), \
00905                        strand=+1, type="misc_feature")
00906         self.assertEqual(_insdc_feature_location_string(f,100),
00907                          "26..(30.33)")
00908         self.assertEqual(len(f), len(f.extract(s)))
00909         self.assertEqual(_insdc_feature_location_string(f._flip(40),40),
00910                          "complement((8.11)..15)")
00911         self.assertEqual(f.strand, +1)
00912         self.assertEqual(f._flip(100).strand, -1)
00913         self.record.features.append(f)
00914         
00915         f = SeqFeature(FeatureLocation(WithinPosition(35, left=35, right=39),40), \
00916                        strand=-1, type="rRNA")
00917         self.assertEqual(_insdc_feature_location_string(f,100),
00918                          "complement((36.40)..40)")
00919         self.assertEqual(_insdc_feature_location_string(f._flip(40),40),
00920                          "1..(1.5)")
00921         self.assertEqual(f.strand, -1)
00922         self.assertEqual(f._flip(100).strand, +1)
00923         self.record.features.append(f)
00924         
00925         f = SeqFeature(FeatureLocation(WithinPosition(45, left=45, right=47),
00926                                        WithinPosition(53, left=50, right=53)), \
00927                        strand=-1, type="repeat_region")
00928         self.assertEqual(_insdc_feature_location_string(f,100),
00929                          "complement((46.48)..(50.53))")
00930         self.assertEqual(len(f), len(f.extract(s)))
00931         self.assertEqual(_insdc_feature_location_string(f._flip(60),60),
00932                          "(8.11)..(13.15)")
00933         self.assertEqual(f.strand, -1)
00934         self.assertEqual(f._flip(100).strand, +1)
00935         self.record.features.append(f)
00936         
00937         f = SeqFeature(FeatureLocation(55,WithinPosition(65, left=60, right=65)), \
00938                        strand=-1, type="CDS")
00939         self.assertEqual(_insdc_feature_location_string(f,100),
00940                          "complement(56..(60.65))")
00941         self.assertEqual(len(f), len(f.extract(s)))
00942         self.assertEqual(_insdc_feature_location_string(f._flip(100),100),
00943                          "(36.41)..45")
00944         self.assertEqual(f.strand, -1)
00945         self.assertEqual(f._flip(100).strand, +1)
00946         self.record.features.append(f)
00947         
00948         self.write_read_checks()
00949         
00950 class NC_000932(unittest.TestCase):
00951     #This includes an evil dual strand gene
00952     basename = "NC_000932"
00953     emblname = None # "AP000423" has different annotation (e.g. more CDS)
00954     table = 11
00955     skip_trans_test = ["gi|7525080|ref|NP_051037.1|", #dual-strand
00956                        "gi|7525057|ref|NP_051038.1|", #dual-strand
00957                        "gi|90110725|ref|NP_051109.2|", #Invalid annotation? No start codon
00958                        ]
00959     __doc__ = "Tests using %s GenBank and FASTA files from the NCBI" % basename
00960     #TODO - neat way to change the docstrings...
00961 
00962     def setUp(self):
00963         self.gb_filename = os.path.join("GenBank",self.basename+".gb")
00964         self.ffn_filename = os.path.join("GenBank",self.basename+".ffn")
00965         self.faa_filename = os.path.join("GenBank",self.basename+".faa")
00966         self.fna_filename = os.path.join("GenBank",self.basename+".fna")
00967         if self.emblname:
00968             self.embl_filename = os.path.join("EMBL",self.emblname+".embl")
00969 
00970     #These tests only need the GenBank file and the FAA file:
00971     def test_CDS(self):
00972         #"""Checking GenBank CDS translations vs FASTA faa file."""
00973         gb_record = SeqIO.read(open(self.gb_filename),"genbank")
00974         gb_cds = list(SeqIO.parse(open(self.gb_filename),"genbank-cds"))
00975         fasta = list(SeqIO.parse(open(self.faa_filename),"fasta"))
00976         compare_records(gb_cds, fasta)
00977         cds_features = [f for f in gb_record.features if f.type=="CDS"]
00978         self.assertEqual(len(cds_features), len(fasta))
00979         for f, r in zip(cds_features, fasta):
00980             if r.id in self.skip_trans_test:
00981                 continue
00982             #Get the nucleotides and translate them
00983             nuc = f.extract(gb_record.seq)
00984             self.assertEqual(len(nuc), len(f))
00985             pro = nuc.translate(table=self.table, cds=True)
00986             #print r.id, nuc, pro, r.seq
00987             #print f
00988             if pro[-1] == "*":
00989                 self.assertEqual(str(pro)[:-1], str(r.seq))
00990             else:
00991                 self.assertEqual(str(pro), str(r.seq))
00992 
00993 class NC_005816(NC_000932):
00994     basename = "NC_005816"
00995     emblname = "AE017046"
00996     table = 11
00997     skip_trans_test = []
00998     __doc__ = "Tests using %s GenBank and FASTA files from the NCBI" % basename
00999 
01000     def test_GenBank_vs_EMBL(self):
01001         if not self.emblname:
01002             return
01003         gb_record = SeqIO.read(open(self.gb_filename),"genbank")
01004         embl_record = SeqIO.read(open(self.embl_filename),"embl")
01005         if len(embl_record.features) < len(gb_record.features):
01006             #Used to match, but I've update the GenBank files
01007             #which now has lots of misc_feature entries not in EMBL
01008             embl_record.features = [f for f in embl_record.features \
01009                                     if f.type != "misc_feature"]
01010             gb_record.features = [f for f in gb_record.features \
01011                                   if f.type != "misc_feature"]
01012         return compare_record(gb_record, embl_record, expect_minor_diffs=True)
01013 
01014     def test_Translations(self):
01015         #"""Checking translation of FASTA features (faa vs ffn)."""
01016         faa_records = list(SeqIO.parse(open(self.faa_filename),"fasta"))
01017         ffn_records = list(SeqIO.parse(open(self.ffn_filename),"fasta"))
01018         self.assertEqual(len(faa_records),len(ffn_records))
01019         for faa, fna in zip(faa_records, ffn_records):
01020             translation = fna.seq.translate(self.table, cds=True)
01021             if faa.id in self.skip_trans_test:
01022                 continue
01023             if (str(translation) != str(faa.seq)) \
01024             and (str(translation) != str(faa.seq)+"*"):
01025                 t = SeqRecord(translation, id="Translation",
01026                               description="Table %s" % self.table)
01027                 raise ValueError("FAA vs FNA translation problem:\n%s\n%s\n%s\n" \
01028                                  % (fna.format("fasta"),
01029                                     t.format("fasta"),
01030                                     faa.format("fasta")))
01031     
01032     def test_Genome(self):
01033         #"""Checking GenBank sequence vs FASTA fna file."""
01034         gb_record = SeqIO.read(open(self.gb_filename),"genbank")
01035         fa_record = SeqIO.read(open(self.fna_filename),"fasta")
01036         compare_record(gb_record, fa_record)
01037         if self.emblname is None:
01038             return
01039         embl_record = SeqIO.read(open(self.embl_filename),"embl")
01040         if len(embl_record.features) < len(gb_record.features):
01041             #Hack since now out of sync for NC_005816
01042             embl_record.features = [f for f in embl_record.features \
01043                                     if f.type != "misc_feature"]
01044             gb_record.features = [f for f in gb_record.features \
01045                                   if f.type != "misc_feature"]
01046         compare_record(gb_record, embl_record, expect_minor_diffs=True)
01047 
01048     def test_Features(self):
01049         #"""Checking GenBank features sequences vs FASTA ffn file."""
01050         gb_record = SeqIO.read(open(self.gb_filename),"genbank")
01051         features = [f for f in gb_record.features if f.type=="CDS"]
01052         fa_records = list(SeqIO.parse(open(self.ffn_filename),"fasta"))
01053         self.assertEqual(len(fa_records), len(features))
01054         #This assumes they are in the same order...
01055         for fa_record, f in zip(fa_records, features):
01056             #TODO - check the FASTA ID line against the co-ordinates?
01057             f_seq = f.extract(gb_record.seq)
01058             self.assertEqual(len(fa_record.seq),
01059                              len(f_seq))
01060             self.assertEqual(str(fa_record.seq),
01061                              str(f_seq))
01062             self.assertEqual(len(f_seq), len(f))
01063 
01064 
01065 class TestWriteRead(unittest.TestCase):
01066     """Test can write and read back files."""
01067     def test_NC_000932(self):
01068         """Write and read back NC_000932.gb"""
01069         write_read(os.path.join("GenBank", "NC_000932.gb"), "gb")
01070 
01071     def test_NC_005816(self):
01072         """Write and read back NC_005816.gb"""
01073         write_read(os.path.join("GenBank", "NC_005816.gb"), "gb")
01074 
01075     def test_gbvrl1_start(self):
01076         """Write and read back gbvrl1_start.seq"""
01077         write_read(os.path.join("GenBank", "gbvrl1_start.seq"), "gb")
01078 
01079     def test_NT_019265(self):
01080         """Write and read back NT_019265.gb"""
01081         write_read(os.path.join("GenBank", "NT_019265.gb"), "gb")
01082 
01083     def test_cor6(self):
01084         """Write and read back cor6_6.gb"""
01085         write_read(os.path.join("GenBank", "cor6_6.gb"), "gb")
01086 
01087     def test_arab1(self):
01088         """Write and read back arab1.gb"""
01089         write_read(os.path.join("GenBank", "arab1.gb"), "gb")
01090 
01091     def test_one_of(self):
01092         """Write and read back of_one.gb"""
01093         write_read(os.path.join("GenBank", "one_of.gb"), "gb")
01094 
01095     def test_pri1(self):
01096         """Write and read back pri1.gb"""
01097         write_read(os.path.join("GenBank", "pri1.gb"), "gb")
01098 
01099     def test_noref(self):
01100         """Write and read back noref.gb"""
01101         write_read(os.path.join("GenBank", "noref.gb"), "gb")
01102 
01103     def test_origin_line(self):
01104         """Write and read back origin_line.gb"""
01105         write_read(os.path.join("GenBank", "origin_line.gb"), "gb")
01106 
01107     def test_dbsource_wrap(self):
01108         """Write and read back dbsource_wrap.gb"""
01109         write_read(os.path.join("GenBank", "dbsource_wrap.gb"), "gb", ["gb"])
01110         #Protein so can't convert this to EMBL format
01111 
01112     def test_blank_seq(self):
01113         """Write and read back blank_seq.gb"""
01114         write_read(os.path.join("GenBank", "blank_seq.gb"), "gb", ["gb"])
01115         #Protein so can't convert this to EMBL format
01116 
01117     def test_extra_keywords(self):
01118         """Write and read back extra_keywords.gb"""
01119         write_read(os.path.join("GenBank", "extra_keywords.gb"), "gb")
01120 
01121     def test_protein_refseq(self):
01122         """Write and read back protein_refseq.gb"""
01123         write_read(os.path.join("GenBank", "protein_refseq.gb"), "gb", ["gb"])
01124         #Protein so can't convert this to EMBL format
01125 
01126     def test_protein_refseq2(self):
01127         """Write and read back protein_refseq2.gb"""
01128         write_read(os.path.join("GenBank", "protein_refseq2.gb"), "gb", ["gb"])
01129         #Protein so can't convert this to EMBL format
01130 
01131     def test_AAA03323(self):
01132         """Write and read back AAA03323.embl"""
01133         write_read(os.path.join("EMBL", "AAA03323.embl"), "embl")
01134 
01135     def test_AE017046(self):
01136         """Write and read back AE017046.embl"""
01137         write_read(os.path.join("EMBL", "AE017046.embl"), "embl")
01138 
01139     def test_DD231055_edited(self):
01140         """Write and read back DD231055_edited.embl"""
01141         write_read(os.path.join("EMBL", "DD231055_edited.embl"), "embl")
01142 
01143     def test_Human_contigs(self):
01144         """Write and read back Human_contigs.embl"""
01145         write_read(os.path.join("EMBL", "Human_contigs.embl"), "embl")
01146 
01147     def test_SC10H5(self):
01148         """Write and read back SC10H5.embl"""
01149         write_read(os.path.join("EMBL", "SC10H5.embl"), "embl")
01150 
01151     def test_TRBG361(self):
01152         """Write and read back TRBG361.embl"""
01153         write_read(os.path.join("EMBL", "TRBG361.embl"), "embl")
01154 
01155     def test_U87107(self):
01156         """Write and read back U87107.embl"""
01157         write_read(os.path.join("EMBL", "U87107.embl"), "embl")
01158 
01159 
01160 if __name__ == "__main__":
01161     runner = unittest.TextTestRunner(verbosity = 2)
01162     unittest.main(testRunner=runner)