Back to index

python-biopython  1.60
Public Member Functions
test_Uniprot.TestUniprot Class Reference

List of all members.

Public Member Functions

def test_uni001
def compare_txt_xml
def test_Q13639
def test_multi_ex
def test_multi_ex_index

Detailed Description

Definition at line 20 of file test_Uniprot.py.


Member Function Documentation

def test_Uniprot.TestUniprot.compare_txt_xml (   self,
  old,
  new 
)

Definition at line 76 of file test_Uniprot.py.

00076 
00077     def compare_txt_xml(self, old, new):
00078         self.assertEqual(old.id, new.id)
00079         self.assertEqual(old.name, new.name)
00080         self.assertEqual(len(old), len(new))
00081         self.assertEqual(str(old.seq), str(new.seq))
00082         for key in set(old.annotations).intersection(new.annotations):
00083             if key == "references":
00084                 self.assertEqual(len(old.annotations[key]),
00085                                  len(new.annotations[key]))
00086                 for r1, r2 in zip(old.annotations[key], new.annotations[key]):
00087                     #Tweak for line breaks in plain text SwissProt
00088                     r1.title = r1.title.replace("- ", "-")
00089                     r2.title = r2.title.replace("- ", "-")
00090                     r1.journal = r1.journal.rstrip(".") #Should parser do this?
00091                     r1.medline_id = "" #Missing in UniPort MXL? TODO - check
00092                     #Lots of extra comments in UniProt XML
00093                     r1.comment = ""
00094                     r2.comment = ""
00095                     if not r2.journal: r1.journal = ""
00096                     compare_reference(r1, r2)
00097             elif old.annotations[key] == new.annotations[key]:
00098                 pass
00099             elif key in ["date"]:
00100                 #TODO - Why is this a list vs str?
00101                 pass
00102             elif type(old.annotations[key]) != type(new.annotations[key]):
00103                 raise TypeError("%s gives %s vs %s" % \
00104                                  (key, old.annotations[key], new.annotations[key]))
00105             elif key in ["organism"]:
00106                 if old.annotations[key] == new.annotations[key]:
00107                     pass
00108                 elif old.annotations[key].startswith(new.annotations[key]+" "):
00109                     pass
00110                 else:
00111                     raise ValueError(key)
00112             elif isinstance(old.annotations[key], list) \
00113             and sorted(old.annotations[key]) == sorted(new.annotations[key]):
00114                 pass
00115             else:
00116                 raise ValueError("%s gives %s vs %s" % \
00117                                  (key, old.annotations[key], new.annotations[key]))
00118         self.assertEqual(len(old.features), len(new.features),
00119                          "Features in %s, %i vs %i" %
00120                          (old.id, len(old.features), len(new.features)))
00121         for f1, f2 in zip(old.features, new.features):
00122             """
00123             self.assertEqual(f1.location.nofuzzy_start, f2.location.nofuzzy_start,
00124                              "%s %s vs %s %s" %
00125                              (f1.location, f1.type, f2.location, f2.type))
00126             self.assertEqual(f1.location.nofuzzy_end, f2.location.nofuzzy_end,
00127                              "%s %s vs %s %s" %
00128                              (f1.location, f1.type, f2.location, f2.type))
00129             """
00130             self.assertEqual(repr(f1.location), repr(f2.location),
00131                             "%s %s vs %s %s" %
00132                             (f1.location, f1.type, f2.location, f2.type))

Here is the call graph for this function:

Here is the caller graph for this function:

Compare SwissProt text and uniprot XML versions of several examples.

Definition at line 139 of file test_Uniprot.py.

00139 
00140     def test_multi_ex(self):
00141         """Compare SwissProt text and uniprot XML versions of several examples."""
00142         txt_list = list(SeqIO.parse("SwissProt/multi_ex.txt", "swiss"))
00143         xml_list = list(SeqIO.parse("SwissProt/multi_ex.xml", "uniprot-xml"))
00144         fas_list = list(SeqIO.parse("SwissProt/multi_ex.fasta", "fasta"))
00145         ids = [x.strip() for x in open("SwissProt/multi_ex.list")]
00146         self.assertEqual(len(txt_list), len(ids))
00147         self.assertEqual(len(txt_list), len(fas_list))
00148         self.assertEqual(len(txt_list), len(xml_list))
00149         for txt, xml, fas, id in zip(txt_list, xml_list, fas_list, ids):
00150             self.assertEqual(txt.id, id)
00151             self.assertTrue(txt.id in fas.id.split("|"))
00152             self.assertEqual(str(txt.seq), str(fas.seq))
00153             self.compare_txt_xml(txt, xml)
    

Here is the call graph for this function:

Index SwissProt text and uniprot XML versions of several examples.

Definition at line 154 of file test_Uniprot.py.

00154 
00155     def test_multi_ex_index(self):
00156         """Index SwissProt text and uniprot XML versions of several examples."""
00157         txt_list = list(SeqIO.parse("SwissProt/multi_ex.txt", "swiss"))
00158         xml_list = list(SeqIO.parse("SwissProt/multi_ex.xml", "uniprot-xml"))
00159         ids = [x.strip() for x in open("SwissProt/multi_ex.list")]
00160         txt_index = SeqIO.index("SwissProt/multi_ex.txt", "swiss")
00161         xml_index = SeqIO.index("SwissProt/multi_ex.xml", "uniprot-xml")
00162         self.assertEqual(sorted(txt_index), sorted(ids))
00163         self.assertEqual(sorted(xml_index), sorted(ids))
00164         #Check SeqIO.parse() versus SeqIO.index() for plain text "swiss"
00165         for old in txt_list:
00166             new = txt_index[old.id]
00167             compare_record(old, new)
00168         #Check SeqIO.parse() versus SeqIO.index() for XML "uniprot-xml"
00169         for old in xml_list:
00170             new = xml_index[old.id]
00171             compare_record(old, new)
        

Here is the call graph for this function:

Compare SwissProt text and uniprot XML versions of Q13639.

Definition at line 133 of file test_Uniprot.py.

00133 
00134     def test_Q13639(self):
00135         """Compare SwissProt text and uniprot XML versions of Q13639."""
00136         old = SeqIO.read("SwissProt/Q13639.txt", "swiss")
00137         new = SeqIO.read("SwissProt/Q13639.xml", "uniprot-xml")
00138         self.compare_txt_xml(old, new)
    

Here is the call graph for this function:

Definition at line 22 of file test_Uniprot.py.

00022 
00023     def test_uni001(self):
00024         "Parsing Uniprot file uni001"
00025         filename = 'uni001'
00026         # test the record parser
00027 
00028         datafile = os.path.join('SwissProt', filename)
00029 
00030         test_handle = open(datafile)
00031         seq_record = SeqIO.read(test_handle, "uniprot-xml")
00032         test_handle.close()
00033 
00034         self.assertTrue(isinstance(seq_record, SeqRecord))
00035 
00036         # test a couple of things on the record -- this is not exhaustive
00037         self.assertEqual(seq_record.id, "Q91G55")
00038         self.assertEqual(seq_record.name, "043L_IIV6")
00039         self.assertEqual(seq_record.description, "Uncharacterized protein 043L")
00040         self.assertEqual(repr(seq_record.seq), "Seq('MDLINNKLNIEIQKFCLDLEKKYNINYNNLIDLWFNKESTERLIKCEVNLENKI...IPI', ProteinAlphabet())")
00041 
00042         # self.assertEqual(seq_record.accessions, ['Q91G55']) #seq_record.accessions does not exist
00043         # self.assertEqual(seq_record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
00044         # self.assertEqual(record.seqinfo, (348, 39676, '75818910'))
00045     
00046         self.assertEqual(len(seq_record.features), 1)           
00047         self.assertEqual(repr(seq_record.features[0]), "SeqFeature(FeatureLocation(ExactPosition(0), ExactPosition(116)), type='chain', id='PRO_0000377969')")
00048 
00049         self.assertEqual(len(seq_record.annotations['references']), 2)
00050         self.assertEqual(seq_record.annotations['references'][0].authors, 'Jakob N.J., Mueller K., Bahr U., Darai G.')
00051         self.assertEqual(seq_record.annotations['references'][0].title, 'Analysis of the first complete DNA sequence of an invertebrate iridovirus: coding strategy of the genome of Chilo iridescent virus.')
00052         self.assertEqual(seq_record.annotations['references'][0].journal, 'Virology 286:182-196(2001)')
00053         self.assertEqual(seq_record.annotations['references'][0].comment, 'journal article | 2001 | Scope: NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA] | ')
00054 
00055         self.assertEqual(len(seq_record.dbxrefs), 11)
00056         self.assertEqual(seq_record.dbxrefs[0], 'DOI:10.1006/viro.2001.0963')
00057 
00058         self.assertEqual(seq_record.annotations['sequence_length'], 116)
00059         self.assertEqual(seq_record.annotations['sequence_checksum'], '4A29B35FB716523C')
00060         self.assertEqual(seq_record.annotations['modified'], '2009-07-07')
00061         self.assertEqual(seq_record.annotations['accessions'], ['Q91G55'])
00062         self.assertEqual(seq_record.annotations['taxonomy'], ['Viruses', 'dsDNA viruses, no RNA stage', 'Iridoviridae', 'Iridovirus'])
00063         self.assertEqual(seq_record.annotations['sequence_mass'], 13673)
00064         self.assertEqual(seq_record.annotations['dataset'], 'Swiss-Prot')
00065         self.assertEqual(seq_record.annotations['gene_name_ORF'], ['IIV6-043L'])
00066         self.assertEqual(seq_record.annotations['version'], 21)
00067         self.assertEqual(seq_record.annotations['sequence_modified'], '2001-12-01')
00068         self.assertEqual(seq_record.annotations['keywords'], ['Complete proteome', 'Virus reference strain'])
00069         self.assertEqual(seq_record.annotations['organism_host'], ['Acheta domesticus', 'House cricket', 'Chilo suppressalis', 'striped riceborer', 'Gryllus bimaculatus', 'Two-spotted cricket', 'Gryllus campestris', 'Spodoptera frugiperda', 'Fall armyworm'])
00070         self.assertEqual(seq_record.annotations['created'], '2009-06-16')
00071         self.assertEqual(seq_record.annotations['organism_name'], ['Chilo iridescent virus'])
00072         self.assertEqual(seq_record.annotations['organism'], 'Invertebrate iridescent virus 6 (IIV-6)')
00073         self.assertEqual(seq_record.annotations['recommendedName_fullName'], ['Uncharacterized protein 043L'])
00074         self.assertEqual(seq_record.annotations['sequence_version'], 1)
00075         self.assertEqual(seq_record.annotations['proteinExistence'], ['Predicted'])

Here is the call graph for this function:


The documentation for this class was generated from the following file: