Back to index

python-biopython  1.60
Functions
seq_tests_common Namespace Reference

Functions

def checksum_summary
def compare_reference
def compare_feature
def compare_sequence
def compare_features
def compare_record
def compare_records

Function Documentation

Definition at line 9 of file seq_tests_common.py.

00009 
00010 def checksum_summary(record):
00011     if isinstance(record.seq, UnknownSeq):
00012         return repr(record.seq)
00013     if len(record.seq) < 25:
00014         short = record.seq.tostring()
00015     else:
00016         short = record.seq.tostring()[:19] \
00017               + "..." + record.seq.tostring()[-3:]
00018     return "%s [%s] len %i" \
00019            % (short, seguid(record.seq), len(record.seq))

Here is the call graph for this function:

def seq_tests_common.compare_feature (   old_f,
  new_f 
)
Compare two SeqFeature objects

Definition at line 65 of file seq_tests_common.py.

00065 
00066 def compare_feature(old_f, new_f):
00067     """Compare two SeqFeature objects"""
00068     assert isinstance(old_f, SeqFeature)
00069     assert isinstance(new_f, SeqFeature)
00070 
00071     assert old_f.type == new_f.type, \
00072         "%s -> %s" % (old_f.type, new_f.type) 
00073     
00074     assert old_f.strand == new_f.strand, \
00075         "%s -> %s" % (old_f.strand, new_f.strand)
00076 
00077     assert old_f.ref == new_f.ref, \
00078         "%s -> %s" % (old_f.ref, new_f.ref)
00079 
00080     assert old_f.ref_db == new_f.ref_db, \
00081         "%s -> %s" % (old_f.ref_db, new_f.ref_db)
00082 
00083     #TODO - BioSQL does not store/retrieve feature's id (Bug 2526)   
00084     assert old_f.id == new_f.id or new_f.id == "<unknown id>"
00085 
00086     #TODO - Work out how the location_qualifier_value table should
00087     #be used, given BioPerl seems to ignore it (Bug 2766)
00088     #assert old_f.location_operator == new_f.location_operator, \
00089     #        "%s -> %s" % (old_f.location_operator, new_f.location_operator)
00090     
00091     # We dont store fuzzy locations:
00092     try:
00093         assert str(old_f.location) == str(new_f.location), \
00094            "%s -> %s" % (str(old_f.location), str(new_f.location))
00095     except AssertionError, e:
00096         if isinstance(old_f.location.start, ExactPosition) and \
00097             isinstance(old_f.location.end, ExactPosition):
00098             # Its not a problem with fuzzy locations, re-raise 
00099             raise e
00100         else:
00101             assert old_f.location.nofuzzy_start == \
00102                     new_f.location.nofuzzy_start, \
00103                     "%s -> %s" % (old_f.location.nofuzzy_start, \
00104                                   new_f.location.nofuzzy_start)
00105             assert old_f.location.nofuzzy_end == \
00106                     new_f.location.nofuzzy_end, \
00107                     "%s -> %s" % (old_f.location.nofuzzy_end, \
00108                                   new_f.location.nofuzzy_end)
00109 
00110     assert len(old_f.sub_features) == len(new_f.sub_features), \
00111         "number of sub_features: %s -> %s" % \
00112         (len(old_f.sub_features), len(new_f.sub_features))
00113     
00114     for old_sub, new_sub in zip(old_f.sub_features, new_f.sub_features):
00115         
00116         assert old_sub.type == new_sub.type, \
00117             "%s -> %s" % (old_sub.type, new_sub.type)
00118         
00119         assert old_sub.strand == new_sub.strand, \
00120             "%s -> %s" % (old_sub.strand, new_sub.strand)
00121 
00122         assert old_sub.ref == new_sub.ref, \
00123             "%s -> %s" % (old_sub.ref, new_sub.ref)
00124 
00125         assert old_sub.ref_db == new_sub.ref_db, \
00126             "%s -> %s" % (old_sub.ref_db, new_sub.ref_db)
00127 
00128         #TODO - Work out how the location_qualifier_value table should
00129         #be used, given BioPerl seems to ignore it (Bug 2766)
00130         #assert old_sub.location_operator == new_sub.location_operator, \
00131         #    "%s -> %s" % (old_sub.location_operator, new_sub.location_operator)
00132 
00133         # Compare sub-feature Locations:
00134         # 
00135         # BioSQL currently does not store fuzzy locations, but instead stores
00136         # them as FeatureLocation.nofuzzy_start FeatureLocation.nofuzzy_end.
00137         # The vast majority of cases will be comparisons of ExactPosition
00138         # class locations, so we'll try that first and catch the exceptions.
00139 
00140         try:
00141             assert str(old_sub.location) == str(new_sub.location), \
00142                "%s -> %s" % (str(old_sub.location), str(new_sub.location))
00143         except AssertionError, e:
00144             if isinstance(old_sub.location.start, ExactPosition) and \
00145                 isinstance(old_sub.location.end, ExactPosition):
00146                 # Its not a problem with fuzzy locations, re-raise 
00147                 raise e
00148             else:
00149                 #At least one of the locations is fuzzy
00150                 assert old_sub.location.nofuzzy_start == \
00151                        new_sub.location.nofuzzy_start, \
00152                        "%s -> %s" % (old_sub.location.nofuzzy_start, \
00153                                      new_sub.location.nofuzzy_start)
00154                 assert old_sub.location.nofuzzy_end == \
00155                        new_sub.location.nofuzzy_end, \
00156                        "%s -> %s" % (old_sub.location.nofuzzy_end, \
00157                                      new_sub.location.nofuzzy_end)
00158 
00159     assert len(old_f.qualifiers) == len(new_f.qualifiers)    
00160     assert set(old_f.qualifiers) == set(new_f.qualifiers)
00161     for key in old_f.qualifiers:
00162         if isinstance(old_f.qualifiers[key], str):
00163             if isinstance(new_f.qualifiers[key], str):
00164                 assert old_f.qualifiers[key] == new_f.qualifiers[key]
00165             elif isinstance(new_f.qualifiers[key], list):
00166                 #Maybe a string turning into a list of strings?
00167                 assert [old_f.qualifiers[key]] == new_f.qualifiers[key], \
00168                         "%s -> %s" \
00169                         % (repr(old_f.qualifiers[key]),
00170                            repr(new_f.qualifiers[key]))
00171             else:
00172                 assert False, "Problem with feature's '%s' qualifier" & key
00173         else:
00174             #Should both be lists of strings...
00175             assert old_f.qualifiers[key] == new_f.qualifiers[key], \
00176                 "%s -> %s" % (old_f.qualifiers[key], new_f.qualifiers[key])
00177     return True

Here is the caller graph for this function:

def seq_tests_common.compare_features (   old_list,
  new_list 
)

Definition at line 238 of file seq_tests_common.py.

00238 
00239 def compare_features(old_list, new_list):
00240     assert isinstance(old_list, list)
00241     assert isinstance(new_list, list)
00242     assert len(old_list) == len(new_list)
00243     for old_f, new_f in zip(old_list, new_list):
00244         if not compare_feature(old_f, new_f):
00245             return False
00246     return True
        

Here is the call graph for this function:

Here is the caller graph for this function:

def seq_tests_common.compare_record (   old,
  new 
)
Compare two SeqRecord or DBSeqRecord objects

Definition at line 247 of file seq_tests_common.py.

00247 
00248 def compare_record(old, new):
00249     """Compare two SeqRecord or DBSeqRecord objects"""
00250     assert isinstance(old, SeqRecord)
00251     assert isinstance(new, SeqRecord)
00252     #Sequence:
00253     compare_sequence(old.seq, new.seq)
00254     #Basics:
00255     assert old.id == new.id
00256     assert old.name == new.name
00257     assert old.description == new.description
00258     assert old.dbxrefs == new.dbxrefs, \
00259            "dbxrefs mismatch\nOld: %s\nNew: %s" \
00260            % (old.dbxrefs, new.dbxrefs)
00261     #Features:
00262     if not compare_features(old.features, new.features):
00263         return False
00264 
00265     #Annotation:
00266     #We are expecting to see some "extra" annotations appearing,
00267     #such as 'cross_references', 'dates', 'data_file_division',
00268     #'ncbi_taxon' and 'gi'.
00269     #TODO - address these, see Bug 2681?
00270     new_keys = set(new.annotations).difference(old.annotations)
00271     new_keys = new_keys.difference(['cross_references', 'date', 
00272                                     'data_file_division', 'ncbi_taxid', 'gi'])
00273     assert not new_keys, "Unexpected new annotation keys: %s" \
00274            % ", ".join(new_keys)
00275     missing_keys = set(old.annotations).difference(new.annotations)
00276     missing_keys = missing_keys.difference(['ncbi_taxid', # Can't store chimeras
00277                                             ])
00278     assert not missing_keys, "Unexpectedly missing annotation keys: %s" \
00279            % ", ".join(missing_keys)
00280     
00281     #In the short term, just compare any shared keys:
00282     for key in set(old.annotations).intersection(new.annotations):
00283         if key == "references":
00284             assert len(old.annotations[key]) == len(new.annotations[key])
00285             for old_r, new_r in zip(old.annotations[key], new.annotations[key]):
00286                 compare_reference(old_r, new_r)
00287         elif key == "comment":
00288             #Turn them both into containing strings for comparison - due to
00289             #line wrapping in GenBank etc we don't really expect the white
00290             #space to be 100% the same.
00291             if isinstance(old.annotations[key], list):
00292                 old_comment = " ".join(old.annotations[key])
00293             else:
00294                 old_comment = old.annotations[key]
00295             if isinstance(new.annotations[key], list):
00296                 new_comment = " ".join(new.annotations[key])
00297             else:
00298                 new_comment = new.annotations[key]
00299             old_comment = old_comment.replace("\n"," ").replace("  ", " ")
00300             new_comment = new_comment.replace("\n"," ").replace("  ", " ")
00301             assert old_comment == new_comment, \
00302                 "Comment annotation changed by load/retrieve\n" \
00303                 "Was:%s\nNow:%s" \
00304                 % (repr(old_comment), repr(new_comment))
00305         elif key in ["taxonomy", "organism", "source"]:
00306             #If there is a taxon id recorded, these fields get overwritten
00307             #by data from the taxon/taxon_name tables.  There is no
00308             #guarantee that they will be identical after a load/retrieve.
00309             assert isinstance(new.annotations[key], basestring) \
00310                 or isinstance(new.annotations[key], list)
00311         elif type(old.annotations[key]) == type(new.annotations[key]):
00312             assert old.annotations[key] == new.annotations[key], \
00313                 "Annotation '%s' changed by load/retrieve\nWas:%s\nNow:%s" \
00314                 % (key, old.annotations[key], new.annotations[key])
00315         elif isinstance(old.annotations[key], str) \
00316         and isinstance(new.annotations[key], list):
00317             #Any annotation which is a single string gets turned into
00318             #a list containing one string by BioSQL at the moment.
00319             assert [old.annotations[key]] == new.annotations[key], \
00320                 "Annotation '%s' changed by load/retrieve\nWas:%s\nNow:%s" \
00321                 % (key, old.annotations[key], new.annotations[key])
00322         elif isinstance(old.annotations[key], list) \
00323         and isinstance(new.annotations[key], str):
00324             assert old.annotations[key] == [new.annotations[key]], \
00325                 "Annotation '%s' changed by load/retrieve\nWas:%s\nNow:%s" \
00326                 % (key, old.annotations[key], new.annotations[key])
00327     return True

Here is the call graph for this function:

Here is the caller graph for this function:

def seq_tests_common.compare_records (   old_list,
  new_list 
)

Definition at line 328 of file seq_tests_common.py.

00328 
00329 def compare_records(old_list, new_list):
00330     assert isinstance(old_list, list)
00331     assert isinstance(new_list, list)
00332     assert len(old_list) == len(new_list)
00333     for old_r, new_r in zip(old_list, new_list):
00334         if not compare_record(old_r, new_r):
00335             return False
00336     return True

Here is the call graph for this function:

def seq_tests_common.compare_reference (   old_r,
  new_r 
)
Compare two Reference objects

Note new_r is assumed to be a BioSQL DBSeqRecord, due to limitations
of the BioSQL table structure.

Definition at line 20 of file seq_tests_common.py.

00020 
00021 def compare_reference(old_r, new_r):
00022     """Compare two Reference objects
00023 
00024     Note new_r is assumed to be a BioSQL DBSeqRecord, due to limitations
00025     of the BioSQL table structure.
00026     """
00027     assert old_r.title == new_r.title, \
00028            "%s vs %s" % (old_r.title, new_r.title)
00029     assert old_r.authors == new_r.authors, \
00030            "%s vs %s" % (old_r.authors, new_r.authors)
00031     assert old_r.journal == new_r.journal, \
00032            "%s vs %s" % (old_r.journal, new_r.journal)
00033     assert old_r.medline_id == new_r.medline_id, \
00034            "%s vs %s" % (old_r.medline_id, new_r.medline_id)
00035 
00036     if old_r.pubmed_id and new_r.pubmed_id:
00037         assert old_r.pubmed_id == new_r.pubmed_id
00038         #Looking at BioSQL/BioSeq.py function _retrieve_reference
00039         #it seems that it will get either the MEDLINE or PUBMED,
00040         #but not both.  I *think* the current schema does not allow
00041         #us to store both... must confirm this.
00042     
00043     #TODO - assert old_r.comment == new_r.comment
00044     #Looking at the tables, I *think* the current schema does not
00045     #allow us to store a reference comment.  Must confirm this.
00046     assert old_r.comment == new_r.comment or new_r.comment == "", \
00047                           "%r vs %r" % (old_r.comment, new_r.comment)
00048 
00049     #TODO - assert old_r.consrtm == new_r.consrtm
00050     #Looking at the tables, I *think* the current schema does not
00051     #allow us to store a consortium.
00052     assert old_r.consrtm == new_r.consrtm or new_r.consrtm == ""
00053     
00054     if len(old_r.location) == 0:
00055         assert len(new_r.location) == 0
00056     else:
00057         #BioSQL can only store ONE location!
00058         #TODO - Check BioPerl with a GenBank file with multiple ref locations
00059         assert isinstance(old_r.location[0], FeatureLocation)
00060         assert isinstance(new_r.location[0], FeatureLocation)
00061         assert old_r.location[0].start == new_r.location[0].start and \
00062                old_r.location[0].end == new_r.location[0].end
00063 
00064     return True

Here is the caller graph for this function:

def seq_tests_common.compare_sequence (   old,
  new 
)
Compare two Seq or DBSeq objects

Definition at line 178 of file seq_tests_common.py.

00178 
00179 def compare_sequence(old, new):
00180     """Compare two Seq or DBSeq objects"""
00181     assert len(old) == len(new), "%i vs %i" % (len(old), len(new))
00182     assert old.tostring() == new.tostring()
00183 
00184     if isinstance(old, UnknownSeq):
00185         assert isinstance(new, UnknownSeq)
00186     else:
00187         assert not isinstance(new, UnknownSeq)
00188 
00189     ln = len(old)
00190     s = old.tostring()
00191     assert isinstance(s, str)
00192 
00193     #Don't check every single element; for long sequences
00194     #this takes far far far too long to run!
00195     #Test both positive and negative indices
00196     if ln < 50:
00197         indices = range(-ln,ln)
00198     else:
00199         #A selection of end cases, and the mid point
00200         indices = [-ln,-ln+1,-(ln//2),-1,0,1,ln//2,ln-2,ln-1]
00201 
00202     #Test element access,    
00203     for i in indices:
00204         expected = s[i]
00205         assert expected == old[i]
00206         assert expected == new[i]
00207 
00208     #Test slices
00209     indices.append(ln) #check copes with overflows
00210     indices.append(ln+1000) #check copes with overflows
00211     for i in indices:
00212         for j in indices:
00213             expected = s[i:j]
00214             assert expected == old[i:j].tostring(), \
00215                    "Slice %s vs %s" % (repr(expected), repr(old[i:j]))
00216             assert expected == new[i:j].tostring(), \
00217                    "Slice %s vs %s" % (repr(expected), repr(new[i:j]))
00218             #Slicing with step of 1 should make no difference.
00219             #Slicing with step 3 might be useful for codons.
00220             for step in [1,3]:
00221                 expected = s[i:j:step]
00222                 assert expected == old[i:j:step].tostring()
00223                 assert expected == new[i:j:step].tostring()
00224 
00225         #Check automatic end points
00226         expected = s[i:]
00227         assert expected == old[i:].tostring()
00228         assert expected == new[i:].tostring()
00229                 
00230         expected = s[:i]
00231         assert expected == old[:i].tostring()
00232         assert expected == new[:i].tostring()
00233 
00234     #Check "copy" splice
00235     assert s == old[:].tostring()
00236     assert s == new[:].tostring()
00237     return True

Here is the call graph for this function:

Here is the caller graph for this function: