Back to index

python-biopython  1.60
Classes | Functions | Variables
test_SeqIO_features Namespace Reference

Classes

class  SeqFeatureExtractionWritingReading
class  SeqFeatureCreation
class  FeatureWriting
class  NC_000932
class  NC_005816
class  TestWriteRead

Functions

def write_read
def compare_record
def compare_records
def compare_feature
def compare_features
def make_join_feature

Variables

tuple gbk_template = open("GenBank/iro.gb", "rU")
tuple runner = unittest.TextTestRunner(verbosity = 2)

Function Documentation

def test_SeqIO_features.compare_feature (   old,
  new,
  ignore_sub_features = False 
)
Check two SeqFeatures agree.

Definition at line 110 of file test_SeqIO_features.py.

00110 
00111 def compare_feature(old, new, ignore_sub_features=False):
00112     """Check two SeqFeatures agree."""
00113     if old.type != new.type:
00114         raise ValueError("Type %s versus %s" % (repr(old.type), repr(new.type)))
00115     if old.location.nofuzzy_start != new.location.nofuzzy_start \
00116     or old.location.nofuzzy_end != new.location.nofuzzy_end:
00117         raise ValueError("%s versus %s:\n%s\nvs:\n%s" \
00118                          % (old.location, new.location, repr(old), repr(new)))
00119     if old.strand != new.strand:
00120         raise ValueError("Different strand:\n%s\nvs:\n%s" % (repr(old), repr(new)))
00121     if old.ref != new.ref:
00122         raise ValueError("Different ref:\n%s\nvs:\n%s" % (repr(old), repr(new)))
00123     if old.ref_db != new.ref_db:
00124         raise ValueError("Different ref_db:\n%s\nvs:\n%s" % (repr(old), repr(new)))
00125     if old.location_operator != new.location_operator:
00126         raise ValueError("Different location_operator:\n%s\nvs:\n%s" % (repr(old), repr(new)))
00127     if old.location.start != new.location.start \
00128     or str(old.location.start) != str(new.location.start):
00129         raise ValueError("Start %s versus %s:\n%s\nvs:\n%s" \
00130                          % (old.location.start, new.location.start, repr(old), repr(new)))
00131     if old.location.end != new.location.end \
00132     or str(old.location.end) != str(new.location.end):
00133         raise ValueError("End %s versus %s:\n%s\nvs:\n%s" \
00134                          % (old.location.end, new.location.end, repr(old), repr(new)))
00135     if not ignore_sub_features:
00136         if len(old.sub_features) != len(new.sub_features):
00137             raise ValueError("Different sub features")
00138         for a,b in zip(old.sub_features, new.sub_features):
00139             if not compare_feature(a,b):
00140                 return False
00141     #This only checks key shared qualifiers
00142     #Would a white list be easier?
00143     #for key in ["name","gene","translation","codon_table","codon_start","locus_tag"]:
00144     for key in set(old.qualifiers).intersection(new.qualifiers):
00145         if key in ["db_xref","protein_id","product","note"]:
00146             #EMBL and GenBank files are use different references/notes/etc
00147             continue
00148         if old.qualifiers[key] != new.qualifiers[key]:
00149             raise ValueError("Qualifier mis-match for %s:\n%s\n%s" \
00150                              % (key, old.qualifiers[key], new.qualifiers[key]))
00151     return True

Here is the caller graph for this function:

def test_SeqIO_features.compare_features (   old_list,
  new_list,
  ignore_sub_features = False 
)
Check two lists of SeqFeatures agree, raises a ValueError if mismatch.

Definition at line 152 of file test_SeqIO_features.py.

00152 
00153 def compare_features(old_list, new_list, ignore_sub_features=False):
00154     """Check two lists of SeqFeatures agree, raises a ValueError if mismatch."""
00155     if len(old_list) != len(new_list):
00156         raise ValueError("%i vs %i features" % (len(old_list), len(new_list)))
00157     for old, new in zip(old_list, new_list):
00158         #This assumes they are in the same order
00159         if not compare_feature(old,new,ignore_sub_features):
00160             return False
00161     return True

Here is the call graph for this function:

Here is the caller graph for this function:

def test_SeqIO_features.compare_record (   old,
  new,
  expect_minor_diffs = False 
)

Definition at line 35 of file test_SeqIO_features.py.

00035 
00036 def compare_record(old, new, expect_minor_diffs=False):
00037     #Note the name matching is a bit fuzzy
00038     if not expect_minor_diffs \
00039     and old.id != new.id and old.name != new.name \
00040     and (old.id not in new.id) and (new.id not in old.id) \
00041     and (old.id.replace(" ","_") != new.id.replace(" ","_")):
00042         raise ValueError("'%s' or '%s' vs '%s' or '%s' records" \
00043                          % (old.id, old.name, new.id, new.name))
00044     if len(old.seq) != len(new.seq):
00045         raise ValueError("%i vs %i" % (len(old.seq), len(new.seq)))
00046     if isinstance(old.seq, UnknownSeq) \
00047     and isinstance(new.seq, UnknownSeq):
00048         #Jython didn't like us comparing the string of very long
00049         #UnknownSeq object (out of heap memory error)
00050         if old.seq._character.upper() != new.seq._character:
00051             raise ValueError("%s vs %s" % (repr(old.seq), repr(new.seq)))
00052     elif str(old.seq).upper() != str(new.seq).upper():
00053         if len(old.seq) < 200:
00054             raise ValueError("'%s' vs '%s'" % (old.seq, new.seq))
00055         else:
00056             raise ValueError("'%s...' vs '%s...'" % (old.seq[:100], new.seq[:100]))
00057     if old.features and new.features:
00058         if not compare_features(old.features, new.features):
00059             return False
00060     #Just insist on at least one word in common:
00061     if (old.description or new.description) \
00062     and not set(old.description.split()).intersection(new.description.split()):
00063         raise ValueError("%s versus %s" \
00064                          % (repr(old.description), repr(new.description)))
00065     #This only checks common annotation
00066     #Would a white list be easier?
00067     for key in set(old.annotations).intersection(new.annotations):
00068         if key in ["data_file_division", "accessions"]:
00069             #TODO - These are not yet supported on output, or
00070             #have other complications (e.g. different number of accessions
00071             #allowed in various file formats)
00072             continue
00073         if key == "comment":
00074             #Ignore whitespace
00075             if old.annotations[key].split() != new.annotations[key].split():
00076                 raise ValueError("Annotation mis-match for comment:\n%s\n%s" \
00077                                 % (old.annotations[key], new.annotations[key]))
00078             continue
00079         if key == "references":
00080             if expect_minor_diffs:
00081                 #TODO - Implement EMBL output of references
00082                 continue
00083             assert len(old.annotations[key]) == len(new.annotations[key])
00084             for r1, r2 in zip(old.annotations[key], new.annotations[key]):
00085                 assert r1.title == r2.title
00086                 assert r1.authors == r2.authors, \
00087                        "Old: '%s'\nNew: '%s'" % (r1.authors, r2.authors)
00088                 assert r1.journal == r2.journal
00089                 if r1.consrtm and r2.consrtm:
00090                     #Not held in EMBL files
00091                     assert r1.consrtm == r2.consrtm
00092                 if r1.medline_id and r2.medline_id:
00093                     #Not held in EMBL files
00094                     assert r1.medline_id == r2.medline_id
00095                 assert r1.pubmed_id == r2.pubmed_id
00096             continue
00097         if repr(old.annotations[key]) != repr(new.annotations[key]):
00098             raise ValueError("Annotation mis-match for %s:\n%s\n%s" \
00099                              % (key, old.annotations[key], new.annotations[key]))
00100     return True

Here is the call graph for this function:

Here is the caller graph for this function:

def test_SeqIO_features.compare_records (   old_list,
  new_list,
  expect_minor_diffs = False 
)
Check two lists of SeqRecords agree, raises a ValueError if mismatch.

Definition at line 101 of file test_SeqIO_features.py.

00101 
00102 def compare_records(old_list, new_list, expect_minor_diffs=False):
00103     """Check two lists of SeqRecords agree, raises a ValueError if mismatch."""
00104     if len(old_list) != len(new_list):
00105         raise ValueError("%i vs %i records" % (len(old_list), len(new_list)))
00106     for old, new in zip(old_list, new_list):
00107         if not compare_record(old,new,expect_minor_diffs):
00108             return False
00109     return True

Here is the call graph for this function:

Here is the caller graph for this function:

def test_SeqIO_features.make_join_feature (   f_list,
  ftype = "misc_feature" 
)

Definition at line 162 of file test_SeqIO_features.py.

00162 
00163 def make_join_feature(f_list, ftype="misc_feature"):
00164     #NOTE - Does NOT reorder the sub-features (which you may
00165     #want to do for reverse strand features...)
00166     if len(set(f.strand for f in f_list))==1:
00167         strand = f_list[0].strand
00168     else:
00169         strand = None
00170     for f in f_list:
00171         f.type=ftype
00172         f.location_operator="join"
00173     jf = SeqFeature(FeatureLocation(f_list[0].location.start,
00174                                     f_list[-1].location.end,
00175                                     strand),
00176                     type=ftype, location_operator="join")
00177     assert jf.location.strand == strand
00178     assert jf.strand == strand
00179     jf.sub_features = f_list
00180     return jf
00181 
00182 #Prepare a single GenBank record with one feature with a %s place holder for
#the feature location

Here is the caller graph for this function:

def test_SeqIO_features.write_read (   filename,
  in_format = "gb",
  out_formats = ["gb",
  embl,
  imgt 
)

Definition at line 24 of file test_SeqIO_features.py.

00024 
00025 def write_read(filename, in_format="gb", out_formats=["gb", "embl", "imgt"]):
00026     for out_format in out_formats:
00027         gb_records = list(SeqIO.parse(open(filename),in_format))
00028         #Write it out...
00029         handle = StringIO()
00030         SeqIO.write(gb_records, handle, out_format)
00031         handle.seek(0)
00032         #Now load it back and check it agrees,
00033         gb_records2 = list(SeqIO.parse(handle,out_format))
00034         compare_records(gb_records, gb_records2)

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

tuple test_SeqIO_features.gbk_template = open("GenBank/iro.gb", "rU")

Definition at line 183 of file test_SeqIO_features.py.

tuple test_SeqIO_features.runner = unittest.TextTestRunner(verbosity = 2)

Definition at line 1161 of file test_SeqIO_features.py.