Back to index

python-biopython  1.60
test_SwissProt.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """Test for the SwissProt parser on SwissProt files.
00003 """
00004 import os
00005 import unittest
00006 
00007 from Bio import SeqIO
00008 from Bio import SwissProt
00009 from Bio.SeqRecord import SeqRecord
00010 
00011 
00012 
00013 class TestSwissProt(unittest.TestCase):
00014 
00015     def test_sp001(self):
00016         "Parsing SwissProt file sp001"
00017         filename = 'sp001'
00018         # test the record parser
00019 
00020         datafile = os.path.join('SwissProt', filename)
00021 
00022         test_handle = open(datafile)
00023         seq_record = SeqIO.read(test_handle, "swiss")
00024         test_handle.close()
00025 
00026         self.assertTrue(isinstance(seq_record, SeqRecord))
00027 
00028         self.assertEqual(seq_record.id, "Q13454")
00029         self.assertEqual(seq_record.name, "N33_HUMAN")
00030         self.assertEqual(seq_record.description, "N33 PROTEIN.")
00031         self.assertEqual(repr(seq_record.seq), "Seq('MGARGAPSRRRQAGRRLRYLPTGSFPFLLLLLLLCIQLGGGQKKKENLLAEKVE...DFE', ProteinAlphabet())")
00032 
00033         test_handle = open(datafile)
00034         record = SwissProt.read(test_handle)
00035         test_handle.close()
00036 
00037         # test a couple of things on the record -- this is not exhaustive
00038         self.assertEqual(record.entry_name, "N33_HUMAN")
00039         self.assertEqual(record.accessions, ['Q13454', 'Q14911', 'Q14912'])
00040         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
00041         self.assertEqual(record.seqinfo, (348, 39676, '75818910'))
00042     
00043         self.assertEqual(len(record.features), 6)
00044         self.assertEqual(record.features[0], ('TRANSMEM', 20, 40, 'POTENTIAL.', ''))
00045         self.assertEqual(record.features[1], ('TRANSMEM', 197, 217, 'POTENTIAL.', ''))
00046         self.assertEqual(record.features[2], ('TRANSMEM', 222, 242, 'POTENTIAL.', ''))
00047         self.assertEqual(record.features[3], ('TRANSMEM', 277, 297, 'POTENTIAL.', ''))
00048         self.assertEqual(record.features[4], ('TRANSMEM', 313, 333, 'POTENTIAL.', ''))
00049         self.assertEqual(record.features[5], ('VARSPLIC', 344, 348, 'DLDFE -> FLIK (IN FORM 2).', ''))
00050 
00051         self.assertEqual(len(record.references), 1)
00052         self.assertEqual(record.references[0].authors, "MACGROGAN D., LEVY A., BOVA G.S., ISAACS W.B., BOOKSTEIN R.")
00053         self.assertEqual(record.references[0].title, "Structure and methylation-associated silencing of a gene within a homozygously deleted region of human chromosome band 8p22.")
00054         self.assertEqual(len(record.references[0].references), 1)
00055         self.assertEqual(record.references[0].references[0], ('MEDLINE', '96299740'))
00056 
00057         #Check the two parsers agree on the essentials
00058         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00059         self.assertEqual(seq_record.description, record.description)
00060         self.assertEqual(seq_record.name, record.entry_name)
00061         self.assertTrue(seq_record.id in record.accessions)
00062 
00063         #Now try using the iterator - note that all these
00064         #test cases have only one record.
00065 
00066         # With the SequenceParser
00067         test_handle = open(datafile)
00068         records = list(SeqIO.parse(test_handle, "swiss"))
00069         test_handle.close()
00070 
00071         self.assertEqual(len(records), 1)
00072         self.assertTrue(isinstance(records[0], SeqRecord))
00073 
00074         #Check matches what we got earlier without the iterator:
00075         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00076         self.assertEqual(records[0].description, seq_record.description)
00077         self.assertEqual(records[0].name, seq_record.name)
00078         self.assertEqual(records[0].id, seq_record.id)
00079 
00080         # With the RecordParser
00081         test_handle = open(datafile)
00082         records = list(SwissProt.parse(test_handle))
00083         test_handle.close()
00084 
00085         self.assertEqual(len(records), 1)
00086         self.assertTrue(isinstance(records[0], SwissProt.Record))
00087 
00088         #Check matches what we got earlier without the iterator:
00089         self.assertEqual(records[0].sequence, record.sequence)
00090         self.assertEqual(records[0].description, record.description)
00091         self.assertEqual(records[0].entry_name, record.entry_name)
00092         self.assertEqual(records[0].accessions, record.accessions)
00093 
00094 
00095     def test_sp002(self):
00096         "Parsing SwissProt file sp002"
00097 
00098         filename = 'sp002'
00099         # test the record parser
00100 
00101         datafile = os.path.join('SwissProt', filename)
00102 
00103         test_handle = open(datafile)
00104         seq_record = SeqIO.read(test_handle, "swiss")
00105         test_handle.close()
00106 
00107         self.assertTrue(isinstance(seq_record, SeqRecord))
00108 
00109         self.assertEqual(seq_record.id, "P54101")
00110         self.assertEqual(seq_record.name, "CSP_MOUSE")
00111         self.assertEqual(seq_record.description, "CYSTEINE STRING PROTEIN (CSP).")
00112         self.assertEqual(repr(seq_record.seq), "Seq('MADQRQRSLSTSGESLYHVLGLDKNATSDDIKKSYRKLALKYHPDKNPDNPEAA...GFN', ProteinAlphabet())")
00113 
00114         test_handle = open(datafile)
00115         record = SwissProt.read(test_handle)
00116         test_handle.close()
00117 
00118         # test a couple of things on the record -- this is not exhaustive
00119         self.assertEqual(record.entry_name, "CSP_MOUSE")
00120         self.assertEqual(record.accessions, ['P54101'])
00121         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Rodentia', 'Sciurognathi', 'Muridae', 'Murinae', 'Mus'])
00122         self.assertEqual(record.seqinfo, (198, 22100, '9DF0142B'))
00123     
00124         self.assertEqual(len(record.features), 2) 
00125         self.assertEqual(record.features[0], ('DOMAIN', 13, 82, 'DNAJ-LIKE.', ''))
00126         self.assertEqual(record.features[1], ('DOMAIN', 118, 128, 'POLY-CYS.', ''))
00127 
00128         self.assertEqual(len(record.references), 3)
00129         self.assertEqual(record.references[0].authors, "QIN N., LIN T., BIRNBAUMER L.")
00130         self.assertEqual(record.references[0].title, "")
00131         self.assertEqual(len(record.references[0].references), 0)
00132         self.assertEqual(record.references[1].authors, "MASTROGIACOMO A., GUNDERSEN C.B.")
00133         self.assertEqual(record.references[1].title, "The nucleotide and deduced amino acid sequence of a rat cysteine string protein.")
00134         self.assertEqual(len(record.references[1].references), 1)
00135         self.assertEqual(record.references[1].references[0], ('MEDLINE', '95223109'))
00136         self.assertEqual(record.references[2].authors, "BRAUN J.E., SCHELLER R.H.")
00137         self.assertEqual(record.references[2].title, "Cysteine string protein, a DnaJ family member, is present on diverse secretory vesicles.")
00138         self.assertEqual(len(record.references[2].references), 1)
00139         self.assertEqual(record.references[2].references[0], ('MEDLINE', '96188189'))
00140 
00141         #Check the two parsers agree on the essentials
00142         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00143         self.assertEqual(seq_record.description, record.description)
00144         self.assertEqual(seq_record.name, record.entry_name)
00145         self.assertTrue(seq_record.id in record.accessions)
00146 
00147         #Now try using the iterator - note that all these
00148         #test cases have only one record.
00149 
00150         # With the SequenceParser
00151         test_handle = open(datafile)
00152         records = list(SeqIO.parse(test_handle, "swiss"))
00153         test_handle.close()
00154 
00155         self.assertEqual(len(records), 1)
00156         self.assertTrue(isinstance(records[0], SeqRecord))
00157 
00158         #Check matches what we got earlier without the iterator:
00159         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00160         self.assertEqual(records[0].description, seq_record.description)
00161         self.assertEqual(records[0].name, seq_record.name)
00162         self.assertEqual(records[0].id, seq_record.id)
00163 
00164         # With the RecordParser
00165         test_handle = open(datafile)
00166         records = list(SwissProt.parse(test_handle))
00167         test_handle.close()
00168 
00169         self.assertEqual(len(records), 1)
00170         self.assertTrue(isinstance(records[0], SwissProt.Record))
00171 
00172         #Check matches what we got earlier without the iterator:
00173         self.assertEqual(records[0].sequence, record.sequence)
00174         self.assertEqual(records[0].description, record.description)
00175         self.assertEqual(records[0].entry_name, record.entry_name)
00176         self.assertEqual(records[0].accessions, record.accessions)
00177 
00178     def test_sp003(self):
00179         "Parsing SwissProt file sp003"
00180 
00181         filename = 'sp003'
00182         # test the record parser
00183 
00184         datafile = os.path.join('SwissProt', filename)
00185 
00186         test_handle = open(datafile)
00187         seq_record = SeqIO.read(test_handle, "swiss")
00188         test_handle.close()
00189 
00190         self.assertTrue(isinstance(seq_record, SeqRecord))
00191 
00192         self.assertEqual(seq_record.id, "P42655")
00193         self.assertEqual(seq_record.name, "143E_HUMAN")
00194         self.assertEqual(seq_record.description, "14-3-3 PROTEIN EPSILON (MITOCHONDRIAL IMPORT STIMULATION FACTOR L SUBUNIT) (PROTEIN KINASE C INHIBITOR PROTEIN-1) (KCIP-1) (14-3-3E).")
00195         self.assertEqual(repr(seq_record.seq), "Seq('MDDREDLVYQAKLAEQAERYDEMVESMKKVAGMDVELTVEERNLLSVAYKNVIG...ENQ', ProteinAlphabet())")
00196 
00197         test_handle = open(datafile)
00198         record = SwissProt.read(test_handle)
00199         test_handle.close()
00200 
00201         # test a couple of things on the record -- this is not exhaustive
00202         self.assertEqual(record.entry_name, "143E_HUMAN")
00203         self.assertEqual(record.accessions, ['P42655', 'P29360', 'Q63631'])
00204         self.assertEqual(record.organism_classification, ['EUKARYOTA', 'METAZOA', 'CHORDATA', 'VERTEBRATA', 'MAMMALIA', 'EUTHERIA', 'PRIMATES', 'CATARRHINI', 'HOMINIDAE', 'HOMO'])
00205         self.assertEqual(record.seqinfo, (255, 29174, '40A43E62'))
00206 
00207         self.assertEqual(len(record.features), 5)
00208         self.assertEqual(record.features[0], ('MOD_RES', 1, 1, 'ACETYLATION.', ''))
00209         self.assertEqual(record.features[1], ('CONFLICT', 73, 73, 'K -> T (IN REF. 8).', ''))
00210         self.assertEqual(record.features[2], ('CONFLICT', 120, 120, 'F -> S (IN REF. 8).', ''))
00211         self.assertEqual(record.features[3], ('CONFLICT', 123, 123, 'K -> Y (IN REF. 8).', ''))
00212         self.assertEqual(record.features[4], ('CONFLICT', 129, 129, 'H -> Y (IN REF. 13).', ''))
00213 
00214         self.assertEqual(len(record.references), 13)
00215         self.assertEqual(record.references[0].authors, "CONKLIN D.S., GALAKTIONOV K., BEACH D.")
00216         self.assertEqual(record.references[0].title, "14-3-3 proteins associate with cdc25 phosphatases.")
00217         self.assertEqual(len(record.references[0].references), 1)
00218         self.assertEqual(record.references[0].references[0], ('MEDLINE', '95372385'))
00219         self.assertEqual(record.references[1].authors, "LUK S.C.W., LEE C.Y., WAYE M.M.Y.")
00220         self.assertEqual(record.references[1].title, "")
00221         self.assertEqual(len(record.references[1].references), 0)
00222         self.assertEqual(record.references[2].authors, "JIN D.Y., LYU M.S., KOZAK C.A., JEANG K.T.")
00223         self.assertEqual(record.references[2].title, "Function of 14-3-3 proteins.")
00224         self.assertEqual(len(record.references[2].references), 1)
00225         self.assertEqual(record.references[2].references[0], ('MEDLINE', '96300316'))
00226         self.assertEqual(record.references[3].authors, "CHONG S.S., TANIGAMI A., ROSCHKE A.V., LEDBETTER D.H.")
00227         self.assertEqual(record.references[3].title, "14-3-3 epsilon has no homology to LIS1 and lies telomeric to it on chromosome 17p13.3 outside the Miller-Dieker syndrome chromosome region.")
00228         self.assertEqual(len(record.references[3].references), 1)
00229         self.assertEqual(record.references[3].references[0], ('MEDLINE', '97011338'))
00230         self.assertEqual(record.references[4].authors, "TANIGAMI A., CHONG S.S., LEDBETTER D.H.")
00231         self.assertEqual(record.references[4].title, "14-3-3 epsilon genomic sequence.")
00232         self.assertEqual(len(record.references[4].references), 0)
00233         self.assertEqual(record.references[5].authors, "ROSEBOOM P.H., WELLER J.L., BABILA T., AITKEN A., SELLERS L.A., MOFFET J.R., NAMBOODIRI M.A., KLEIN D.C.")
00234         self.assertEqual(record.references[5].title, "Cloning and characterization of the epsilon and zeta isoforms of the 14-3-3 proteins.")
00235         self.assertEqual(len(record.references[5].references), 1)
00236         self.assertEqual(record.references[5].references[0], ('MEDLINE', '94296566'))
00237         self.assertEqual(record.references[6].authors, "ALAM R., HACHIYA N., SAKAGUCHI M., SHUN-ICHIRO K., IWANAGA S., KITAJIMA M., MIHARA K., OMURA T.")
00238         self.assertEqual(record.references[6].title, "cDNA cloning and characterization of mitochondrial import stimulation factor (MSF) purified from rat liver cytosol.")
00239         self.assertEqual(len(record.references[6].references), 1)
00240         self.assertEqual(record.references[6].references[0], ('MEDLINE', '95122474'))
00241         self.assertEqual(record.references[7].authors, "GAO L., GU X.B., YU D.S., YU R.K., ZENG G.")
00242         self.assertEqual(record.references[7].title, "Association of a 14-3-3 protein with CMP-NeuAc:GM1 alpha 2,3- sialyltransferase.")
00243         self.assertEqual(len(record.references[7].references), 1)
00244         self.assertEqual(record.references[7].references[0], ('MEDLINE', '96280718'))
00245         self.assertEqual(record.references[8].authors, "MCCONNELL J.E., ARMSTRONG J.F., BARD J.B.")
00246         self.assertEqual(record.references[8].title, "The mouse 14-3-3 epsilon isoform, a kinase regulator whose expression pattern is modulated in mesenchyme and neuronal differentiation.")
00247         self.assertEqual(len(record.references[8].references), 1)
00248         self.assertEqual(record.references[8].references[0], ('MEDLINE', '95269876'))
00249         self.assertEqual(record.references[9].authors, "TAKIHARA Y., IRIE K., NOMURA M., MOTALEB M., MATSUMOTO K., SHIMADA K.")
00250         self.assertEqual(record.references[9].title, "")
00251         self.assertEqual(len(record.references[9].references), 0)
00252         self.assertEqual(record.references[10].authors, "JONES J.M., NIIKURA T., PINKE R.M., GUO W., MOLDAY L., LEYKAM J., MCCONNELL D.G.")
00253         self.assertEqual(record.references[10].title, "Expression of 14-3-3 proteins in bovine retinal photoreceptors.")
00254         self.assertEqual(len(record.references[10].references), 0)
00255         self.assertEqual(record.references[11].authors, "TOKER A., SELLERS L.A., AMESS B., PATEL Y., HARRIS A., AITKEN A.")
00256         self.assertEqual(record.references[11].title, "Multiple isoforms of a protein kinase C inhibitor (KCIP-1/14-3-3) from sheep brain. Amino acid sequence of phosphorylated forms.")
00257         self.assertEqual(len(record.references[11].references), 1)
00258         self.assertEqual(record.references[11].references[0], ('MEDLINE', '92283271'))
00259         self.assertEqual(record.references[12].authors, "TOKER A., ELLIS C.A., SELLERS L.A., AITKEN A.")
00260         self.assertEqual(record.references[12].title, "Protein kinase C inhibitor proteins. Purification from sheep brain and sequence similarity to lipocortins and 14-3-3 protein.")
00261         self.assertEqual(len(record.references[12].references), 1)
00262         self.assertEqual(record.references[12].references[0], ('MEDLINE', '90345949'))
00263 
00264         #Check the two parsers agree on the essentials
00265         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00266         self.assertEqual(seq_record.description, record.description)
00267         self.assertEqual(seq_record.name, record.entry_name)
00268         self.assertTrue(seq_record.id in record.accessions)
00269 
00270         #Now try using the iterator - note that all these
00271         #test cases have only one record.
00272 
00273         # With the SequenceParser
00274         test_handle = open(datafile)
00275         records = list(SeqIO.parse(test_handle, "swiss"))
00276         test_handle.close()
00277 
00278         self.assertEqual(len(records), 1)
00279         self.assertTrue(isinstance(records[0], SeqRecord))
00280 
00281         #Check matches what we got earlier without the iterator:
00282         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00283         self.assertEqual(records[0].description, seq_record.description)
00284         self.assertEqual(records[0].name, seq_record.name)
00285         self.assertEqual(records[0].id, seq_record.id)
00286 
00287         # With the RecordParser
00288         test_handle = open(datafile)
00289         records = list(SwissProt.parse(test_handle))
00290         test_handle.close()
00291 
00292         self.assertEqual(len(records), 1)
00293         self.assertTrue(isinstance(records[0], SwissProt.Record))
00294 
00295         #Check matches what we got earlier without the iterator:
00296         self.assertEqual(records[0].sequence, record.sequence)
00297         self.assertEqual(records[0].description, record.description)
00298         self.assertEqual(records[0].entry_name, record.entry_name)
00299         self.assertEqual(records[0].accessions, record.accessions)
00300 
00301 
00302     def test_sp004(self):
00303         "Parsing SwissProt file sp004"
00304 
00305         filename = 'sp004'
00306         # test the record parser
00307 
00308         datafile = os.path.join('SwissProt', filename)
00309 
00310         test_handle = open(datafile)
00311         seq_record = SeqIO.read(test_handle, "swiss")
00312         test_handle.close()
00313 
00314         self.assertTrue(isinstance(seq_record, SeqRecord))
00315 
00316         self.assertEqual(seq_record.id, "P23082")
00317         self.assertEqual(seq_record.name, "NDOA_PSEPU")
00318         self.assertEqual(seq_record.description, "NAPHTHALENE 1,2-DIOXYGENASE SYSTEM FERREDOXIN COMPONENT.")
00319         self.assertEqual(repr(seq_record.seq), "Seq('TVKWIEAVALSDILEGDVLGVTVEGKELALYEVEGEIYATDNLCTHGSARMSDG...DLS', ProteinAlphabet())")
00320 
00321         test_handle = open(datafile)
00322         record = SwissProt.read(test_handle)
00323         test_handle.close()
00324 
00325         # test a couple of things on the record -- this is not exhaustive
00326         self.assertEqual(record.entry_name, "NDOA_PSEPU")
00327         self.assertEqual(record.accessions, ['P23082', 'Q52123', 'O07829'])
00328         self.assertEqual(record.organism_classification, ['Bacteria', 'Proteobacteria', 'gamma subdivision', 'Pseudomonas group', 'Pseudomonas'])
00329         self.assertEqual(record.seqinfo, (103, 11315, '9F91B3C8'))
00330     
00331         self.assertEqual(len(record.features), 12)
00332         self.assertEqual(record.features[0], ('INIT_MET', 0, 0, '', ''))
00333         self.assertEqual(record.features[1], ('METAL', 44, 44, 'IRON-SULFUR (2FE-2S) (POTENTIAL).', ''))
00334         self.assertEqual(record.features[2], ('METAL', 46, 46, 'IRON-SULFUR (2FE-2S) (POTENTIAL).', ''))
00335         self.assertEqual(record.features[3], ('METAL', 63, 63, 'IRON-SULFUR (2FE-2S) (POTENTIAL).', ''))
00336         self.assertEqual(record.features[4], ('METAL', 66, 66, 'IRON-SULFUR (2FE-2S) (POTENTIAL).', ''))
00337         self.assertEqual(record.features[5], ('VARIANT', 2, 2, 'V -> E (IN STRAIN G7).', ''))
00338         self.assertEqual(record.features[6], ('VARIANT', 14, 14, 'L -> P (IN STRAIN G7).', ''))
00339         self.assertEqual(record.features[7], ('VARIANT', 48, 48, 'S -> A (IN STRAIN G7).', ''))
00340         self.assertEqual(record.features[8], ('VARIANT', 76, 76, 'K -> R (IN STRAIN G7).', ''))
00341         self.assertEqual(record.features[9], ('VARIANT', 84, 84, 'Q -> E (IN STRAIN G7).', ''))
00342         self.assertEqual(record.features[10], ('VARIANT', 90, 90, 'P -> A (IN STRAIN G7).', ''))
00343         self.assertEqual(record.features[11], ('VARIANT', 103, 103, 'S -> GEF (IN STRAIN G7).', ''))
00344 
00345         self.assertEqual(len(record.references), 4) 
00346         self.assertEqual(record.references[0].authors, "KURKELA S., LEHVAESLAIHO H., PALVA E.T., TEERI T.H.")
00347         self.assertEqual(record.references[0].title, "Cloning, nucleotide sequence and characterization of genes encoding naphthalene dioxygenase of Pseudomonas putida strain NCIB9816.")
00348         self.assertEqual(len(record.references[0].references), 1)
00349         self.assertEqual(record.references[0].references[0], ('MEDLINE', '89211973'))
00350         self.assertEqual(record.references[1].authors, "SIMON M.J., OSSLUND T.D., SAUNDERS R., ENSLEY B.D., SUGGS S., HARCOURT A.A., SUEN W.-C., CRUDEN D.L., GIBSON D.T., ZYLSTRA G.J.")
00351         self.assertEqual(record.references[1].title, "Sequences of genes encoding naphthalene dioxygenase in Pseudomonas putida strains G7 and NCIB 9816-4.")
00352         self.assertEqual(len(record.references[1].references), 1)
00353         self.assertEqual(record.references[1].references[0], ('MEDLINE', '93252277'))
00354         self.assertEqual(record.references[2].authors, "DENOME S.A., STANLEY D.C., OLSON E.S., YOUNG K.D.")
00355         self.assertEqual(record.references[2].title, "Metabolism of dibenzothiophene and naphthalene in Pseudomonas strains: complete DNA sequence of an upper naphthalene catabolic pathway.")
00356         self.assertEqual(len(record.references[2].references), 1)
00357         self.assertEqual(record.references[2].references[0], ('MEDLINE', '94042852'))
00358         self.assertEqual(record.references[3].authors, "HAMANN C.")
00359         self.assertEqual(record.references[3].title, "")
00360         self.assertEqual(len(record.references[3].references), 0)
00361 
00362         #Check the two parsers agree on the essentials
00363         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00364         self.assertEqual(seq_record.description, record.description)
00365         self.assertEqual(seq_record.name, record.entry_name)
00366         self.assertTrue(seq_record.id in record.accessions)
00367 
00368         #Now try using the iterator - note that all these
00369         #test cases have only one record.
00370 
00371         # With the SequenceParser
00372         test_handle = open(datafile)
00373         records = list(SeqIO.parse(test_handle, "swiss"))
00374         test_handle.close()
00375 
00376         self.assertEqual(len(records), 1)
00377         self.assertTrue(isinstance(records[0], SeqRecord))
00378 
00379         #Check matches what we got earlier without the iterator:
00380         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00381         self.assertEqual(records[0].description, seq_record.description)
00382         self.assertEqual(records[0].name, seq_record.name)
00383         self.assertEqual(records[0].id, seq_record.id)
00384 
00385         # With the RecordParser
00386         test_handle = open(datafile)
00387         records = list(SwissProt.parse(test_handle))
00388         test_handle.close()
00389 
00390         self.assertEqual(len(records), 1)
00391         self.assertTrue(isinstance(records[0], SwissProt.Record))
00392 
00393         #Check matches what we got earlier without the iterator:
00394         self.assertEqual(records[0].sequence, record.sequence)
00395         self.assertEqual(records[0].description, record.description)
00396         self.assertEqual(records[0].entry_name, record.entry_name)
00397         self.assertEqual(records[0].accessions, record.accessions)
00398 
00399     def test_sp005(self):
00400         "Parsing SwissProt file sp005"
00401 
00402         filename = 'sp005'
00403         # test the record parser
00404 
00405         datafile = os.path.join('SwissProt', filename)
00406 
00407         test_handle = open(datafile)
00408         seq_record = SeqIO.read(test_handle, "swiss")
00409         test_handle.close()
00410 
00411         self.assertTrue(isinstance(seq_record, SeqRecord))
00412 
00413         self.assertEqual(seq_record.id, "P24973")
00414         self.assertEqual(seq_record.name, "NU3M_BALPH")
00415         self.assertEqual(seq_record.description, "NADH-UBIQUINONE OXIDOREDUCTASE CHAIN 3 (EC 1.6.5.3).")
00416         self.assertEqual(repr(seq_record.seq), "Seq('MNLLLTLLTNTTLALLLVFIAFWLPQLNVYAEKTSPYECGFDPMGSARLPFSMK...WAE', ProteinAlphabet())")
00417 
00418         test_handle = open(datafile)
00419         record = SwissProt.read(test_handle)
00420         test_handle.close()
00421 
00422         # test a couple of things on the record -- this is not exhaustive
00423         self.assertEqual(record.entry_name, "NU3M_BALPH")
00424         self.assertEqual(record.accessions, ['P24973'])
00425         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Cetartiodactyla', 'Cetacea', 'Mysticeti', 'Balaenopteridae', 'Balaenoptera'])
00426         self.assertEqual(record.seqinfo, (115, 13022, 'ACF02965'))
00427     
00428         self.assertEqual(len(record.features), 0)
00429 
00430         self.assertEqual(len(record.references), 2)
00431         self.assertEqual(record.references[0].authors, "ARNASON U., GULLBERG A., WIDEGREN B.")
00432         self.assertEqual(record.references[0].title, "The complete nucleotide sequence of the mitochondrial DNA of the fin whale, Balaenoptera physalus.")
00433         self.assertEqual(len(record.references[0].references), 1)
00434         self.assertEqual(record.references[0].references[0], ('MEDLINE', '92139449'))
00435         self.assertEqual(record.references[1].authors, "ARNASON U., GULLBERG A.")
00436         self.assertEqual(record.references[1].title, "Comparison between the complete mtDNA sequences of the blue and the fin whale, two species that can hybridize in nature.")
00437         self.assertEqual(len(record.references[1].references), 1)
00438         self.assertEqual(record.references[1].references[0], ('MEDLINE', '94141932'))
00439 
00440         #Check the two parsers agree on the essentials
00441         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00442         self.assertEqual(seq_record.description, record.description)
00443         self.assertEqual(seq_record.name, record.entry_name)
00444         self.assertTrue(seq_record.id in record.accessions)
00445 
00446         #Now try using the iterator - note that all these
00447         #test cases have only one record.
00448 
00449         # With the SequenceParser
00450         test_handle = open(datafile)
00451         records = list(SeqIO.parse(test_handle, "swiss"))
00452         test_handle.close()
00453 
00454         self.assertEqual(len(records), 1)
00455         self.assertTrue(isinstance(records[0], SeqRecord))
00456 
00457         #Check matches what we got earlier without the iterator:
00458         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00459         self.assertEqual(records[0].description, seq_record.description)
00460         self.assertEqual(records[0].name, seq_record.name)
00461         self.assertEqual(records[0].id, seq_record.id)
00462 
00463         # With the RecordParser
00464         test_handle = open(datafile)
00465         records = list(SwissProt.parse(test_handle))
00466         test_handle.close()
00467 
00468         self.assertEqual(len(records), 1)
00469         self.assertTrue(isinstance(records[0], SwissProt.Record))
00470 
00471         #Check matches what we got earlier without the iterator:
00472         self.assertEqual(records[0].sequence, record.sequence)
00473         self.assertEqual(records[0].description, record.description)
00474         self.assertEqual(records[0].entry_name, record.entry_name)
00475         self.assertEqual(records[0].accessions, record.accessions)
00476 
00477 
00478     def test_sp006(self):
00479         "Parsing SwissProt file sp006"
00480 
00481         filename = 'sp006'
00482         # test the record parser
00483 
00484         datafile = os.path.join('SwissProt', filename)
00485 
00486         test_handle = open(datafile)
00487         seq_record = SeqIO.read(test_handle, "swiss")
00488         test_handle.close()
00489 
00490         self.assertTrue(isinstance(seq_record, SeqRecord))
00491 
00492         self.assertEqual(seq_record.id, "P39896")
00493         self.assertEqual(seq_record.name, "TCMO_STRGA")
00494         self.assertEqual(seq_record.description, "TETRACENOMYCIN POLYKETIDE SYNTHESIS 8-O-METHYL TRANSFERASE TCMO (EC 2.1.1.-).")
00495         self.assertEqual(repr(seq_record.seq), "Seq('MTPHTHVRGPGDILQLTMAFYGSRALISAVELDLFTLLAGKPLPLGELCERAGI...KPR', ProteinAlphabet())")
00496 
00497         test_handle = open(datafile)
00498         record = SwissProt.read(test_handle)
00499         test_handle.close()
00500 
00501         # test a couple of things on the record -- this is not exhaustive
00502         self.assertEqual(record.entry_name, "TCMO_STRGA")
00503         self.assertEqual(record.accessions, ['P39896'])
00504         self.assertEqual(record.organism_classification, ['BACTERIA', 'FIRMICUTES', 'ACTINOBACTERIA', 'ACTINOBACTERIDAE', 'ACTINOMYCETALES', 'STREPTOMYCINEAE', 'STREPTOMYCETACEAE', 'STREPTOMYCES'])
00505         self.assertEqual(record.seqinfo, (339, 37035, '848B7337'))
00506     
00507         self.assertEqual(len(record.features), 0)
00508 
00509         self.assertEqual(len(record.references), 1)
00510         self.assertEqual(record.references[0].authors, "SUMMERS R.G., WENDT-PIENKOWSKI E., MOTAMEDI H., HUTCHINSON C.R.")
00511         self.assertEqual(record.references[0].title, "Nucleotide sequence of the tcmII-tcmIV region of the tetracenomycin C biosynthetic gene cluster of Streptomyces glaucescens and evidence that the tcmN gene encodes a multifunctional cyclase-dehydratase-O-methyl transferase.")
00512         self.assertEqual(len(record.references[0].references), 1)
00513         self.assertEqual(record.references[0].references[0], ('MEDLINE', '92193265'))
00514 
00515         #Check the two parsers agree on the essentials
00516         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00517         self.assertEqual(seq_record.description, record.description)
00518         self.assertEqual(seq_record.name, record.entry_name)
00519         self.assertTrue(seq_record.id in record.accessions)
00520 
00521         #Now try using the iterator - note that all these
00522         #test cases have only one record.
00523 
00524         # With the SequenceParser
00525         test_handle = open(datafile)
00526         records = list(SeqIO.parse(test_handle, "swiss"))
00527         test_handle.close()
00528 
00529         self.assertEqual(len(records), 1)
00530         self.assertTrue(isinstance(records[0], SeqRecord))
00531 
00532         #Check matches what we got earlier without the iterator:
00533         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00534         self.assertEqual(records[0].description, seq_record.description)
00535         self.assertEqual(records[0].name, seq_record.name)
00536         self.assertEqual(records[0].id, seq_record.id)
00537 
00538         # With the RecordParser
00539         test_handle = open(datafile)
00540         records = list(SwissProt.parse(test_handle))
00541         test_handle.close()
00542 
00543         self.assertEqual(len(records), 1)
00544         self.assertTrue(isinstance(records[0], SwissProt.Record))
00545 
00546         #Check matches what we got earlier without the iterator:
00547         self.assertEqual(records[0].sequence, record.sequence)
00548         self.assertEqual(records[0].description, record.description)
00549         self.assertEqual(records[0].entry_name, record.entry_name)
00550         self.assertEqual(records[0].accessions, record.accessions)
00551 
00552 
00553     def test_sp007(self):
00554         "Parsing SwissProt file sp007"
00555 
00556         filename = 'sp007'
00557         # test the record parser
00558 
00559         datafile = os.path.join('SwissProt', filename)
00560 
00561         test_handle = open(datafile)
00562         seq_record = SeqIO.read(test_handle, "swiss")
00563         test_handle.close()
00564 
00565         self.assertTrue(isinstance(seq_record, SeqRecord))
00566 
00567         self.assertEqual(seq_record.id, "O95832")
00568         self.assertEqual(seq_record.name, "CLD1_HUMAN")
00569         self.assertEqual(seq_record.description, "CLAUDIN-1 (SENESCENCE-ASSOCIATED EPITHELIAL MEMBRANE PROTEIN).")
00570         self.assertEqual(repr(seq_record.seq), "Seq('MANAGLQLLGFILAFLGWIGAIVSTALPQWRIYSYAGDNIVTAQAMYEGLWMSC...DYV', ProteinAlphabet())")
00571 
00572         test_handle = open(datafile)
00573         record = SwissProt.read(test_handle)
00574         test_handle.close()
00575 
00576         # test a couple of things on the record -- this is not exhaustive
00577         self.assertEqual(record.entry_name, "CLD1_HUMAN")
00578         self.assertEqual(record.accessions, ['O95832'])
00579         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
00580         self.assertEqual(record.seqinfo, (211, 22744, '07269000E6C214F0'))
00581     
00582         self.assertEqual(len(record.features), 6)
00583         self.assertEqual(record.features[0], ('TRANSMEM', 8, 28, 'POTENTIAL.', ''))
00584         self.assertEqual(record.features[1], ('TRANSMEM', 82, 102, 'POTENTIAL.', ''))
00585         self.assertEqual(record.features[2], ('TRANSMEM', 116, 136, 'POTENTIAL.', ''))
00586         self.assertEqual(record.features[3], ('TRANSMEM', 164, 184, 'POTENTIAL.', ''))
00587         self.assertEqual(record.features[4], ('CONFLICT', 62, 62, 'I -> V (IN REF. 2).', ''))
00588         self.assertEqual(record.features[5], ('CONFLICT', 135, 135, 'V -> A (IN REF. 2).', ''))
00589 
00590         self.assertEqual(len(record.references), 2)
00591         self.assertEqual(record.references[0].authors, "Swisshelm K.L., Machl A., Planitzer S., Robertson R., Kubbies M., Hosier S.")
00592         self.assertEqual(record.references[0].title, "SEMP1, a senescence-associated cDNA isolated from human mammary epithelial cells, is a member of an epithelial membrane protein superfamily.")
00593         self.assertEqual(len(record.references[0].references), 1)
00594         self.assertEqual(record.references[0].references[0], ('MEDLINE', '99132301'))
00595         self.assertEqual(record.references[1].authors, "Mitic L.M., Anderson J.M.")
00596         self.assertEqual(record.references[1].title, "Human claudin-1 isolated from Caco-2 mRNA.")
00597         self.assertEqual(len(record.references[1].references), 0)
00598 
00599         #Check the two parsers agree on the essentials
00600         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00601         self.assertEqual(seq_record.description, record.description)
00602         self.assertEqual(seq_record.name, record.entry_name)
00603         self.assertTrue(seq_record.id in record.accessions)
00604 
00605         #Now try using the iterator - note that all these
00606         #test cases have only one record.
00607 
00608         # With the SequenceParser
00609         test_handle = open(datafile)
00610         records = list(SeqIO.parse(test_handle, "swiss"))
00611         test_handle.close()
00612 
00613         self.assertEqual(len(records), 1)
00614         self.assertTrue(isinstance(records[0], SeqRecord))
00615 
00616         #Check matches what we got earlier without the iterator:
00617         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00618         self.assertEqual(records[0].description, seq_record.description)
00619         self.assertEqual(records[0].name, seq_record.name)
00620         self.assertEqual(records[0].id, seq_record.id)
00621 
00622         # With the RecordParser
00623         test_handle = open(datafile)
00624         records = list(SwissProt.parse(test_handle))
00625         test_handle.close()
00626 
00627         self.assertEqual(len(records), 1)
00628         self.assertTrue(isinstance(records[0], SwissProt.Record))
00629 
00630         #Check matches what we got earlier without the iterator:
00631         self.assertEqual(records[0].sequence, record.sequence)
00632         self.assertEqual(records[0].description, record.description)
00633         self.assertEqual(records[0].entry_name, record.entry_name)
00634         self.assertEqual(records[0].accessions, record.accessions)
00635 
00636 
00637     def test_sp008(self):
00638         "Parsing SwissProt file sp008"
00639 
00640         filename = 'sp008'
00641         # test the record parser
00642 
00643         datafile = os.path.join('SwissProt', filename)
00644 
00645         test_handle = open(datafile)
00646         seq_record = SeqIO.read(test_handle, "swiss")
00647         test_handle.close()
00648 
00649         self.assertTrue(isinstance(seq_record, SeqRecord))
00650 
00651         self.assertEqual(seq_record.id, "P01892")
00652         self.assertEqual(seq_record.name, "1A02_HUMAN")
00653         self.assertEqual(seq_record.description, "HLA CLASS I HISTOCOMPATIBILITY ANTIGEN, A-2 ALPHA CHAIN PRECURSOR.")
00654         self.assertEqual(repr(seq_record.seq), "Seq('MAVMAPRTLVLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDD...CKV', ProteinAlphabet())")
00655 
00656         test_handle = open(datafile)
00657         record = SwissProt.read(test_handle)
00658         test_handle.close()
00659 
00660         # test a couple of things on the record -- this is not exhaustive
00661         self.assertEqual(record.entry_name, "1A02_HUMAN")
00662         self.assertEqual(record.accessions, ['P01892', 'P06338', 'P30514', 'P30444', 'P30445', 'P30446', 'Q29680', 'Q29899', 'Q95352', 'Q29837', 'Q95380'])
00663         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
00664         self.assertEqual(record.seqinfo, (365, 40922, 'B54A97B24B337C08'))
00665     
00666         self.assertEqual(len(record.features), 71)
00667         self.assertEqual(record.features[0], ('SIGNAL', 1, 24, '', ''))
00668         self.assertEqual(record.features[1], ('CHAIN', 25, 365, 'HLA CLASS I HISTOCOMPATIBILITY ANTIGEN, A-2 ALPHA CHAIN.', ''))
00669         self.assertEqual(record.features[2], ('DOMAIN', 25, 114, 'EXTRACELLULAR ALPHA-1.', ''))
00670         self.assertEqual(record.features[3], ('DOMAIN', 115, 206, 'EXTRACELLULAR ALPHA-2.', ''))
00671         self.assertEqual(record.features[4], ('DOMAIN', 207, 298, 'EXTRACELLULAR ALPHA-3.', ''))
00672         self.assertEqual(record.features[5], ('DOMAIN', 299, 308, 'CONNECTING PEPTIDE.', ''))
00673         self.assertEqual(record.features[6], ('TRANSMEM', 309, 332, '', ''))
00674         self.assertEqual(record.features[7], ('DOMAIN', 333, 365, 'CYTOPLASMIC TAIL.', ''))
00675         self.assertEqual(record.features[8], ('CARBOHYD', 110, 110, '', ''))
00676         self.assertEqual(record.features[9], ('DISULFID', 125, 188, '', ''))
00677         self.assertEqual(record.features[10], ('DISULFID', 227, 283, '', ''))
00678         self.assertEqual(record.features[11], ('STRAND', 27, 36, '', ''))
00679         self.assertEqual(record.features[12], ('STRAND', 45, 52, '', ''))
00680         self.assertEqual(record.features[13], ('TURN', 53, 54, '', ''))
00681         self.assertEqual(record.features[14], ('STRAND', 55, 61, '', ''))
00682         self.assertEqual(record.features[15], ('TURN', 62, 63, '', ''))
00683         self.assertEqual(record.features[16], ('STRAND', 70, 71, '', ''))
00684         self.assertEqual(record.features[17], ('HELIX', 74, 76, '', ''))
00685         self.assertEqual(record.features[18], ('TURN', 77, 78, '', ''))
00686         self.assertEqual(record.features[19], ('HELIX', 81, 108, '', ''))
00687         self.assertEqual(record.features[20], ('TURN', 109, 110, '', ''))
00688         self.assertEqual(record.features[21], ('TURN', 113, 114, '', ''))
00689         self.assertEqual(record.features[22], ('STRAND', 118, 127, '', ''))
00690         self.assertEqual(record.features[23], ('TURN', 129, 130, '', ''))
00691         self.assertEqual(record.features[24], ('STRAND', 133, 142, '', ''))
00692         self.assertEqual(record.features[25], ('TURN', 143, 144, '', ''))
00693         self.assertEqual(record.features[26], ('STRAND', 145, 150, '', ''))
00694         self.assertEqual(record.features[27], ('TURN', 152, 153, '', ''))
00695         self.assertEqual(record.features[28], ('STRAND', 157, 159, '', ''))
00696         self.assertEqual(record.features[29], ('TURN', 163, 163, '', ''))
00697         self.assertEqual(record.features[30], ('HELIX', 164, 173, '', ''))
00698         self.assertEqual(record.features[31], ('TURN', 174, 175, '', ''))
00699         self.assertEqual(record.features[32], ('HELIX', 176, 185, '', ''))
00700         self.assertEqual(record.features[33], ('TURN', 186, 186, '', ''))
00701         self.assertEqual(record.features[34], ('HELIX', 187, 198, '', ''))
00702         self.assertEqual(record.features[35], ('TURN', 199, 199, '', ''))
00703         self.assertEqual(record.features[36], ('HELIX', 200, 203, '', ''))
00704         self.assertEqual(record.features[37], ('TURN', 204, 204, '', ''))
00705         self.assertEqual(record.features[38], ('STRAND', 207, 207, '', ''))
00706         self.assertEqual(record.features[39], ('STRAND', 210, 219, '', ''))
00707         self.assertEqual(record.features[40], ('TURN', 220, 221, '', ''))
00708         self.assertEqual(record.features[41], ('STRAND', 222, 233, '', ''))
00709         self.assertEqual(record.features[42], ('STRAND', 238, 243, '', ''))
00710         self.assertEqual(record.features[43], ('TURN', 244, 245, '', ''))
00711         self.assertEqual(record.features[44], ('STRAND', 246, 247, '', ''))
00712         self.assertEqual(record.features[45], ('HELIX', 249, 251, '', ''))
00713         self.assertEqual(record.features[46], ('STRAND', 253, 254, '', ''))
00714         self.assertEqual(record.features[47], ('STRAND', 258, 259, '', ''))
00715         self.assertEqual(record.features[48], ('STRAND', 265, 274, '', ''))
00716         self.assertEqual(record.features[49], ('TURN', 275, 276, '', ''))
00717         self.assertEqual(record.features[50], ('HELIX', 278, 280, '', ''))
00718         self.assertEqual(record.features[51], ('STRAND', 281, 286, '', ''))
00719         self.assertEqual(record.features[52], ('TURN', 288, 289, '', ''))
00720         self.assertEqual(record.features[53], ('STRAND', 294, 297, '', ''))
00721         self.assertEqual(record.features[54], ('VARIANT', 33, 33, 'F -> Y (IN A*0205, A*0206, A*0208, A*0210 AND A*0221).', 'VAR_004334'))
00722         self.assertEqual(record.features[55], ('VARIANT', 54, 54, 'D -> N (IN A*0221).', 'VAR_004335'))
00723         self.assertEqual(record.features[56], ('VARIANT', 67, 67, 'Q -> R (IN A*0202, A*0205, AND A*0208).', 'VAR_004336'))
00724         self.assertEqual(record.features[57], ('VARIANT', 90, 90, 'K -> N (IN A*0208 AND A*0220).', 'VAR_004337'))
00725         self.assertEqual(record.features[58], ('VARIANT', 97, 98, 'TH -> ID (IN A*0211).', 'VAR_004338'))
00726         self.assertEqual(record.features[59], ('VARIANT', 119, 119, 'V -> L (IN A*0202, A*0205, A*0208 AND A*0217).', 'VAR_004339'))
00727         self.assertEqual(record.features[60], ('VARIANT', 121, 121, 'R -> M (IN A*0204 AND A*0217).', 'VAR_004340'))
00728         self.assertEqual(record.features[61], ('VARIANT', 123, 123, 'Y -> C (IN A*0207 AND A*0218).', 'VAR_004341'))
00729         self.assertEqual(record.features[62], ('VARIANT', 123, 123, 'Y -> F (IN A*0210 AND A*0217).', 'VAR_004342'))
00730         self.assertEqual(record.features[63], ('VARIANT', 131, 131, 'W -> G (IN A*0210).', 'VAR_004343'))
00731         self.assertEqual(record.features[64], ('VARIANT', 162, 162, 'M -> K (IN A*0218).', 'VAR_004344'))
00732         self.assertEqual(record.features[65], ('VARIANT', 173, 173, 'A -> T (IN A*0203).', 'VAR_004345'))
00733         self.assertEqual(record.features[66], ('VARIANT', 176, 176, 'V -> E (IN A*0203 AND A*0213).', 'VAR_004346'))
00734         self.assertEqual(record.features[67], ('VARIANT', 180, 180, 'L -> W (IN A*0202, A*0203, A*0205 AND A*0208).', 'VAR_004347'))
00735         self.assertEqual(record.features[68], ('VARIANT', 180, 180, 'L -> Q (IN A*0212 AND A*0213).', 'VAR_004348'))
00736         self.assertEqual(record.features[69], ('VARIANT', 187, 187, 'T -> E (IN A*0216).', 'VAR_004349'))
00737         self.assertEqual(record.features[70], ('VARIANT', 260, 260, 'A -> E (IN A*0209).', 'VAR_004350'))
00738 
00739         self.assertEqual(len(record.references), 27)
00740         self.assertEqual(record.references[0].authors, "Koller B.H., Orr H.T.")
00741         self.assertEqual(record.references[0].title, "Cloning and complete sequence of an HLA-A2 gene: analysis of two HLA-A alleles at the nucleotide level.")
00742         self.assertEqual(len(record.references[0].references), 1)
00743         self.assertEqual(record.references[0].references[0], ('MEDLINE', '85132727'))
00744         self.assertEqual(record.references[1].authors, "Cianetti L., Testa U., Scotto L., la Valle R., Simeone A., Boccoli G., Giannella G., Peschle C., Boncinelli E.")
00745         self.assertEqual(record.references[1].title, "Three new class I HLA alleles: structure of mRNAs and alternative mechanisms of processing.")
00746         self.assertEqual(len(record.references[1].references), 1)
00747         self.assertEqual(record.references[1].references[0], ('MEDLINE', '89122144'))
00748         self.assertEqual(record.references[2].authors, "Ennis P.D., Zemmour J., Salter R.D., Parham P.")
00749         self.assertEqual(record.references[2].title, "Rapid cloning of HLA-A,B cDNA by using the polymerase chain reaction: frequency and nature of errors produced in amplification.")
00750         self.assertEqual(len(record.references[2].references), 1)
00751         self.assertEqual(record.references[2].references[0], ('MEDLINE', '90207291'))
00752         self.assertEqual(record.references[3].authors, "Belich M.P., Madrigal J.A., Hildebrand W.H., Zemmour J., Williams R.C., Luz R., Petzl-Erler M.L., Parham P.")
00753         self.assertEqual(record.references[3].title, "Unusual HLA-B alleles in two tribes of Brazilian Indians.")
00754         self.assertEqual(len(record.references[3].references), 1)
00755         self.assertEqual(record.references[3].references[0], ('MEDLINE', '92269955'))
00756         self.assertEqual(record.references[4].authors, "Krangel M.S.")
00757         self.assertEqual(record.references[4].title, "Unusual RNA splicing generates a secreted form of HLA-A2 in a mutagenized B lymphoblastoid cell line.")
00758         self.assertEqual(len(record.references[4].references), 1)
00759         self.assertEqual(record.references[4].references[0], ('MEDLINE', '85230571'))
00760         self.assertEqual(record.references[5].authors, "Orr H.T., Lopez de Castro J.A., Parham P., Ploegh H.L., Strominger J.L.")
00761         self.assertEqual(record.references[5].title, "Comparison of amino acid sequences of two human histocompatibility antigens, HLA-A2 and HLA-B7: location of putative alloantigenic sites.")
00762         self.assertEqual(len(record.references[5].references), 1)
00763         self.assertEqual(record.references[5].references[0], ('MEDLINE', '80056745'))
00764         self.assertEqual(record.references[6].authors, "Lopez de Castro J.A., Strominger J.L., Strong D.M., Orr H.T.")
00765         self.assertEqual(record.references[6].title, "Structure of crossreactive human histocompatibility antigens HLA-A28 and HLA-A2: possible implications for the generation of HLA polymorphism.")
00766         self.assertEqual(len(record.references[6].references), 1)
00767         self.assertEqual(record.references[6].references[0], ('MEDLINE', '82247941'))
00768         self.assertEqual(record.references[7].authors, "Mattson D.H., Handy D.E., Bradley D.A., Coligan J.E., Cowan E.P., Biddison W.E.")
00769         self.assertEqual(record.references[7].title, "DNA sequences of the genes that encode the CTL-defined HLA-A2 variants M7 and DK1.")
00770         self.assertEqual(len(record.references[7].references), 1)
00771         self.assertEqual(record.references[7].references[0], ('MEDLINE', '87306734'))
00772         self.assertEqual(record.references[8].authors, "Holmes N., Ennis P., Wan A.M., Denney D.W., Parham P.")
00773         self.assertEqual(record.references[8].title, "Multiple genetic mechanisms have contributed to the generation of the HLA-A2/A28 family of class I MHC molecules.")
00774         self.assertEqual(len(record.references[8].references), 1)
00775         self.assertEqual(record.references[8].references[0], ('MEDLINE', '87252273'))
00776         self.assertEqual(record.references[9].authors, "Domena J.D.")
00777         self.assertEqual(record.references[9].title, "")
00778         self.assertEqual(len(record.references[9].references), 0)
00779         self.assertEqual(record.references[10].authors, "Castano A.R., Lopez de Castro J.A.")
00780         self.assertEqual(record.references[10].title, "Structure of the HLA-A*0204 antigen, found in South American Indians. Spatial clustering of HLA-A2 subtype polymorphism.")
00781         self.assertEqual(len(record.references[10].references), 1)
00782         self.assertEqual(record.references[10].references[0], ('MEDLINE', '92039809'))
00783         self.assertEqual(record.references[11].authors, "Watkins D.I., McAdam S.N., Liu X., Stang C.R., Milford E.L., Levine C.G., Garber T.L., Dogon A.L., Lord C.I., Ghim S.H., Troup G.M., Hughes A.L., Letvin N.L.")
00784         self.assertEqual(record.references[11].title, "New recombinant HLA-B alleles in a tribe of South American Amerindians indicate rapid evolution of MHC class I loci.")
00785         self.assertEqual(len(record.references[11].references), 1)
00786         self.assertEqual(record.references[11].references[0], ('MEDLINE', '92269956'))
00787         self.assertEqual(record.references[12].authors, "Parham P., Lawlor D.A., Lomen C.E., Ennis P.D.")
00788         self.assertEqual(record.references[12].title, "Diversity and diversification of HLA-A,B,C alleles.")
00789         self.assertEqual(len(record.references[12].references), 1)
00790         self.assertEqual(record.references[12].references[0], ('MEDLINE', '89235215'))
00791         self.assertEqual(record.references[13].authors, "Ezquerra A., Domenech N., van der Poel J., Strominger J.L., Vega M.A., Lopez de Castro J.A.")
00792         self.assertEqual(record.references[13].title, "Molecular analysis of an HLA-A2 functional variant CLA defined by cytolytic T lymphocytes.")
00793         self.assertEqual(len(record.references[13].references), 1)
00794         self.assertEqual(record.references[13].references[0], ('MEDLINE', '86305811'))
00795         self.assertEqual(record.references[14].authors, "Domenech N., Ezquerra A., Castano R., Lopez de Castro J.A.")
00796         self.assertEqual(record.references[14].title, "Structural analysis of HLA-A2.4 functional variant KNE. Implications for the mapping of HLA-A2-specific T-cell epitopes.")
00797         self.assertEqual(len(record.references[14].references), 1)
00798         self.assertEqual(record.references[14].references[0], ('MEDLINE', '88113844'))
00799         self.assertEqual(record.references[15].authors, "Domenech N., Castano R., Goulmy E., Lopez de Castro J.A.")
00800         self.assertEqual(record.references[15].title, "Molecular analysis of HLA-A2.4 functional variant KLO: close structural and evolutionary relatedness to the HLA-A2.2 subtype.")
00801         self.assertEqual(len(record.references[15].references), 1)
00802         self.assertEqual(record.references[15].references[0], ('MEDLINE', '88314183'))
00803         self.assertEqual(record.references[16].authors, "Castano R., Ezquerra A., Domenech N., Lopez de Castro J.A.")
00804         self.assertEqual(record.references[16].title, "An HLA-A2 population variant with structural polymorphism in the alpha 3 region.")
00805         self.assertEqual(len(record.references[16].references), 1)
00806         self.assertEqual(record.references[16].references[0], ('MEDLINE', '88186100'))
00807         self.assertEqual(record.references[17].authors, "Epstein H., Kennedy L., Holmes N.")
00808         self.assertEqual(record.references[17].title, "An Oriental HLA-A2 subtype is closely related to a subset of Caucasoid HLA-A2 alleles.")
00809         self.assertEqual(len(record.references[17].references), 1)
00810         self.assertEqual(record.references[17].references[0], ('MEDLINE', '89122133'))
00811         self.assertEqual(record.references[18].authors, "Castano A.R., Lopez de Castro J.A.")
00812         self.assertEqual(record.references[18].title, "Structure of the HLA-A*0211 (A2.5) subtype: further evidence for selection-driven diversification of HLA-A2 antigens.")
00813         self.assertEqual(len(record.references[18].references), 1)
00814         self.assertEqual(record.references[18].references[0], ('MEDLINE', '92218010'))
00815         self.assertEqual(record.references[19].authors, "Barber D.F., Fernandez J.M., Lopez de Castro J.A.")
00816         self.assertEqual(record.references[19].title, "Primary structure of a new HLA-A2 subtype: HLA-A*0213.")
00817         self.assertEqual(len(record.references[19].references), 1)
00818         self.assertEqual(record.references[19].references[0], ('MEDLINE', '94222455'))
00819         self.assertEqual(record.references[20].authors, "Barouch D., Krausa P., Bodmer J., Browning M.J., McMichael A.J.")
00820         self.assertEqual(record.references[20].title, "Identification of a novel HLA-A2 subtype, HLA-A*0216.")
00821         self.assertEqual(len(record.references[20].references), 1)
00822         self.assertEqual(record.references[20].references[0], ('MEDLINE', '95278976'))
00823         self.assertEqual(record.references[21].authors, "Selvakumar A., Granja C.B., Salazar M., Alosco S.M., Yunis E.J., Dupont B.")
00824         self.assertEqual(record.references[21].title, "A novel subtype of A2 (A*0217) isolated from the South American Indian B-cell line AMALA.")
00825         self.assertEqual(len(record.references[21].references), 1)
00826         self.assertEqual(record.references[21].references[0], ('MEDLINE', '95381236'))
00827         self.assertEqual(record.references[22].authors, "Kashiwase K., Tokunaga K., Ishikawa Y., Oohashi H., Hashimoto M., Akaza T., Tadokoro K., Juji T.")
00828         self.assertEqual(record.references[22].title, "A new A2 sequence HLA-A2K from Japanese.")
00829         self.assertEqual(len(record.references[22].references), 0)
00830         self.assertEqual(record.references[23].authors, "Fleischhauer K., Zino E., Mazzi B., Severini G.M., Benazzi E., Bordignon C.")
00831         self.assertEqual(record.references[23].title, "HLA-A*02 subtype distribution in Caucasians from northern Italy: identification of A*0220.")
00832         self.assertEqual(len(record.references[23].references), 1)
00833         self.assertEqual(record.references[23].references[0], ('MEDLINE', '97161038'))
00834         self.assertEqual(record.references[24].authors, "Szmania S., Baxter-Lowe L.A.")
00835         self.assertEqual(record.references[24].title, "Nucleotide sequence of a novel HLA-A2 gene.")
00836         self.assertEqual(len(record.references[24].references), 0)
00837         self.assertEqual(record.references[25].authors, "Bjorkman P.J., Saper M.A., Samraoui B., Bennett W.S., Strominger J.L., Wiley D.C.")
00838         self.assertEqual(record.references[25].title, "Structure of the human class I histocompatibility antigen, HLA-A2.")
00839         self.assertEqual(len(record.references[25].references), 1)
00840         self.assertEqual(record.references[25].references[0], ('MEDLINE', '88014204'))
00841         self.assertEqual(record.references[26].authors, "Saper M.A., Bjorkman P.J., Wiley D.C.")
00842         self.assertEqual(record.references[26].title, "Refined structure of the human histocompatibility antigen HLA-A2 at 2.6-A resolution.")
00843         self.assertEqual(len(record.references[26].references), 1)
00844         self.assertEqual(record.references[26].references[0], ('MEDLINE', '91245570'))
00845 
00846         #Check the two parsers agree on the essentials
00847         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00848         self.assertEqual(seq_record.description, record.description)
00849         self.assertEqual(seq_record.name, record.entry_name)
00850         self.assertTrue(seq_record.id in record.accessions)
00851 
00852         #Now try using the iterator - note that all these
00853         #test cases have only one record.
00854 
00855         # With the SequenceParser
00856         test_handle = open(datafile)
00857         records = list(SeqIO.parse(test_handle, "swiss"))
00858         test_handle.close()
00859 
00860         self.assertEqual(len(records), 1)
00861         self.assertTrue(isinstance(records[0], SeqRecord))
00862 
00863         #Check matches what we got earlier without the iterator:
00864         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00865         self.assertEqual(records[0].description, seq_record.description)
00866         self.assertEqual(records[0].name, seq_record.name)
00867         self.assertEqual(records[0].id, seq_record.id)
00868 
00869         # With the RecordParser
00870         test_handle = open(datafile)
00871         records = list(SwissProt.parse(test_handle))
00872         test_handle.close()
00873 
00874         self.assertEqual(len(records), 1)
00875         self.assertTrue(isinstance(records[0], SwissProt.Record))
00876 
00877         #Check matches what we got earlier without the iterator:
00878         self.assertEqual(records[0].sequence, record.sequence)
00879         self.assertEqual(records[0].description, record.description)
00880         self.assertEqual(records[0].entry_name, record.entry_name)
00881         self.assertEqual(records[0].accessions, record.accessions)
00882 
00883     def test_sp009(self):
00884         "Parsing SwissProt file sp009"
00885 
00886         filename = 'sp009'
00887         # test the record parser
00888 
00889         datafile = os.path.join('SwissProt', filename)
00890 
00891         test_handle = open(datafile)
00892         seq_record = SeqIO.read(test_handle, "swiss")
00893         test_handle.close()
00894 
00895         self.assertTrue(isinstance(seq_record, SeqRecord))
00896 
00897         self.assertEqual(seq_record.id, "O23729")
00898         self.assertEqual(seq_record.name, "CHS3_BROFI")
00899         self.assertEqual(seq_record.description, "CHALCONE SYNTHASE 3 (EC 2.3.1.74) (NARINGENIN-CHALCONE SYNTHASE 3).")
00900         self.assertEqual(repr(seq_record.seq), "Seq('MAPAMEEIRQAQRAEGPAAVLAIGTSTPPNALYQADYPDYYFRITKSEHLTELK...GAE', ProteinAlphabet())")
00901 
00902         test_handle = open(datafile)
00903         record = SwissProt.read(test_handle)
00904         test_handle.close()
00905 
00906         # test a couple of things on the record -- this is not exhaustive
00907         self.assertEqual(record.entry_name, "CHS3_BROFI")
00908         self.assertEqual(record.accessions, ['O23729'])
00909         self.assertEqual(record.organism_classification, ['Eukaryota', 'Viridiplantae', 'Embryophyta', 'Tracheophyta', 'Spermatophyta', 'Magnoliophyta', 'Liliopsida', 'Asparagales', 'Orchidaceae', 'Bromheadia'])
00910         self.assertEqual(record.seqinfo, (394, 42941, '2F8D14AF4870BBB2'))
00911     
00912         self.assertEqual(len(record.features), 1)
00913         self.assertEqual(record.features[0], ('ACT_SITE', 165, 165, 'BY SIMILARITY.', ''))
00914 
00915         self.assertEqual(len(record.references), 1)
00916         self.assertEqual(record.references[0].authors, "Liew C.F., Lim S.H., Loh C.S., Goh C.J.")
00917         self.assertEqual(record.references[0].title, "Molecular cloning and sequence analysis of chalcone synthase cDNAs of Bromheadia finlaysoniana.")
00918         self.assertEqual(len(record.references[0].references), 0)
00919 
00920 
00921         #Check the two parsers agree on the essentials
00922         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00923         self.assertEqual(seq_record.description, record.description)
00924         self.assertEqual(seq_record.name, record.entry_name)
00925         self.assertTrue(seq_record.id in record.accessions)
00926 
00927         #Now try using the iterator - note that all these
00928         #test cases have only one record.
00929 
00930         # With the SequenceParser
00931         test_handle = open(datafile)
00932         records = list(SeqIO.parse(test_handle, "swiss"))
00933         test_handle.close()
00934 
00935         self.assertEqual(len(records), 1)
00936         self.assertTrue(isinstance(records[0], SeqRecord))
00937 
00938         #Check matches what we got earlier without the iterator:
00939         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00940         self.assertEqual(records[0].description, seq_record.description)
00941         self.assertEqual(records[0].name, seq_record.name)
00942         self.assertEqual(records[0].id, seq_record.id)
00943 
00944         # With the RecordParser
00945         test_handle = open(datafile)
00946         records = list(SwissProt.parse(test_handle))
00947         test_handle.close()
00948 
00949         self.assertEqual(len(records), 1)
00950         self.assertTrue(isinstance(records[0], SwissProt.Record))
00951 
00952         #Check matches what we got earlier without the iterator:
00953         self.assertEqual(records[0].sequence, record.sequence)
00954         self.assertEqual(records[0].description, record.description)
00955         self.assertEqual(records[0].entry_name, record.entry_name)
00956         self.assertEqual(records[0].accessions, record.accessions)
00957 
00958     def test_sp010(self):
00959         "Parsing SwissProt file sp010"
00960 
00961         filename = 'sp010'
00962         # test the record parser
00963 
00964         datafile = os.path.join('SwissProt', filename)
00965 
00966         test_handle = open(datafile)
00967         seq_record = SeqIO.read(test_handle, "swiss")
00968         test_handle.close()
00969 
00970         self.assertTrue(isinstance(seq_record, SeqRecord))
00971 
00972         self.assertEqual(seq_record.id, "Q13639")
00973         self.assertEqual(seq_record.name, "5H4_HUMAN")
00974         self.assertEqual(seq_record.description, "5-HYDROXYTRYPTAMINE 4 RECEPTOR (5-HT-4) (SEROTONIN RECEPTOR) (5-HT4).")
00975         self.assertEqual(repr(seq_record.seq), "Seq('MDKLDANVSSEEGFGSVEKVVLLTFLSTVILMAILGNLLVMVAVCWDRQLRKIK...SDT', ProteinAlphabet())")
00976 
00977         test_handle = open(datafile)
00978         record = SwissProt.read(test_handle)
00979         test_handle.close()
00980 
00981         # test a couple of things on the record -- this is not exhaustive
00982         self.assertEqual(record.entry_name, "5H4_HUMAN")
00983         self.assertEqual(record.accessions, ['Q13639', 'Q9UBM6', 'Q9UQR6', 'Q9UE22', 'Q9UE23', 'Q9UBT4', 'Q9NY73'])
00984         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
00985         self.assertEqual(record.seqinfo, (388, 43761, '7FCFEC60E7BDF560'))
00986     
00987         self.assertEqual(len(record.features), 23)
00988         self.assertEqual(record.features[0], ('DOMAIN', 1, 19, 'EXTRACELLULAR (POTENTIAL).', ''))
00989         self.assertEqual(record.features[1], ('TRANSMEM', 20, 40, '1 (POTENTIAL).', ''))
00990         self.assertEqual(record.features[2], ('DOMAIN', 41, 58, 'CYTOPLASMIC (POTENTIAL).', ''))
00991         self.assertEqual(record.features[3], ('TRANSMEM', 59, 79, '2 (POTENTIAL).', ''))
00992         self.assertEqual(record.features[4], ('DOMAIN', 80, 93, 'EXTRACELLULAR (POTENTIAL).', ''))
00993         self.assertEqual(record.features[5], ('TRANSMEM', 94, 116, '3 (POTENTIAL).', ''))
00994         self.assertEqual(record.features[6], ('DOMAIN', 117, 137, 'CYTOPLASMIC (POTENTIAL).', ''))
00995         self.assertEqual(record.features[7], ('TRANSMEM', 138, 158, '4 (POTENTIAL).', ''))
00996         self.assertEqual(record.features[8], ('DOMAIN', 159, 192, 'EXTRACELLULAR (POTENTIAL).', ''))
00997         self.assertEqual(record.features[9], ('TRANSMEM', 193, 213, '5 (POTENTIAL).', ''))
00998         self.assertEqual(record.features[10], ('DOMAIN', 214, 260, 'CYTOPLASMIC (POTENTIAL).', ''))
00999         self.assertEqual(record.features[11], ('TRANSMEM', 261, 281, '6 (POTENTIAL).', ''))
01000         self.assertEqual(record.features[12], ('DOMAIN', 282, 294, 'EXTRACELLULAR (POTENTIAL).', ''))
01001         self.assertEqual(record.features[13], ('TRANSMEM', 295, 315, '7 (POTENTIAL).', ''))
01002         self.assertEqual(record.features[14], ('DOMAIN', 316, 388, 'CYTOPLASMIC (POTENTIAL).', ''))
01003         self.assertEqual(record.features[15], ('CARBOHYD', 7, 7, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01004         self.assertEqual(record.features[16], ('DISULFID', 93, 184, 'BY SIMILARITY.', ''))
01005         self.assertEqual(record.features[17], ('LIPID', 329, 329, 'PALMITATE (BY SIMILARITY).', ''))
01006         self.assertEqual(record.features[18], ('VARSPLIC', 169, 169, 'L -> LERSLNQGLGQDFHA (IN ISOFORM 5- HT4(F)).', ''))
01007         self.assertEqual(record.features[19], ('VARSPLIC', 359, 388, 'RDAVECGGQWESQCHPPATSPLVAAQPSDT -> SGCSPVSSFLLLFCNRPVPV (IN ISOFORM 5-HT4(E)).', ''))
01008         self.assertEqual(record.features[20], ('VARSPLIC', 359, 388, 'RDAVECGGQWESQCHPPATSPLVAAQPSDT -> SSGTETDRRNFGIRKRRLTKPS (IN ISOFORM 5-HT4(D)).', ''))
01009         self.assertEqual(record.features[21], ('VARSPLIC', 360, 388, 'DAVECGGQWESQCHPPATSPLVAAQPSDT -> F (IN ISOFORM 5-HT4(C)).', ''))
01010         self.assertEqual(record.features[22], ('VARSPLIC', 360, 388, 'DAVECGGQWESQCHPPATSPLVAAQPSDT -> YTVLHRGHHQELEKLPIHNDPESLESCF (IN ISOFORM 5- HT4(A)).', ''))
01011         self.assertEqual(len(record.references), 6)
01012 
01013         self.assertEqual(record.references[0].authors, "Blondel O., Gastineau M., Dahmoune Y., Langlois M., Fischmeister R.")
01014         self.assertEqual(record.references[0].title, "Cloning, expression, and pharmacology of four human 5- hydroxytryptamine receptor isoforms produced by alternative splicing in the carboxyl terminus.")
01015         self.assertEqual(len(record.references[0].references), 1)
01016         self.assertEqual(record.references[0].references[0], ('PubMed', '9603189'))
01017         self.assertEqual(record.references[1].authors, "Van den Wyngaert I., Gommeren W., Jurzak M., Verhasselt P., Gordon R., Leysen J., Luyten W., Bender E.")
01018         self.assertEqual(record.references[1].title, "Cloning and expression of 5-HT4 receptor species and splice variants.")
01019         self.assertEqual(len(record.references[1].references), 0)
01020         self.assertEqual(record.references[2].authors, "Claeysen S., Faye P., Sebben M., Lemaire S., Bockaert J., Dumuis A.")
01021         self.assertEqual(record.references[2].title, "Cloning and expression of human 5-HT4S receptors. Effect of receptor density on their coupling to adenylyl cyclase.")
01022         self.assertEqual(len(record.references[2].references), 1)
01023         self.assertEqual(record.references[2].references[0], ('PubMed', '9351641'))
01024         self.assertEqual(record.references[3].authors, "Claeysen S., Sebben M., Becamel C., Bockaert J., Dumuis A.")
01025         self.assertEqual(record.references[3].title, "Novel brain-specific 5-HT4 receptors splice variants show marked constitutive activity: role of the c-terminal intracellular domain.")
01026         self.assertEqual(len(record.references[3].references), 0)
01027         self.assertEqual(record.references[4].authors, "Bender E., Pindon A., van Oers I., Zhang Y.B., Gommeren W., Verhasselt P., Jurzak M., Leysen J., Luyten W.")
01028         self.assertEqual(record.references[4].title, "Structure of the human serotonin 5-HT4 receptor gene and cloning of a novel 5-HT4 splice variant.")
01029         self.assertEqual(len(record.references[4].references), 1)
01030         self.assertEqual(record.references[4].references[0], ('PubMed', '10646498'))
01031         self.assertEqual(record.references[5].authors, "Ullmer C., Schmuck K., Kalkman H.O., Lubbert H.")
01032         self.assertEqual(record.references[5].title, "Expression of serotonin receptor mRNAs in blood vessels.")
01033         self.assertEqual(len(record.references[5].references), 2)
01034         self.assertEqual(record.references[5].references[0], ('MEDLINE', '95385798'))
01035         self.assertEqual(record.references[5].references[1], ('PubMed', '7656980'))
01036 
01037         #Check the two parsers agree on the essentials
01038         self.assertEqual(seq_record.seq.tostring(), record.sequence)
01039         self.assertEqual(seq_record.description, record.description)
01040         self.assertEqual(seq_record.name, record.entry_name)
01041         self.assertTrue(seq_record.id in record.accessions)
01042 
01043         #Now try using the iterator - note that all these
01044         #test cases have only one record.
01045 
01046         # With the SequenceParser
01047         test_handle = open(datafile)
01048         records = list(SeqIO.parse(test_handle, "swiss"))
01049         test_handle.close()
01050 
01051         self.assertEqual(len(records), 1)
01052         self.assertTrue(isinstance(records[0], SeqRecord))
01053 
01054         #Check matches what we got earlier without the iterator:
01055         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
01056         self.assertEqual(records[0].description, seq_record.description)
01057         self.assertEqual(records[0].name, seq_record.name)
01058         self.assertEqual(records[0].id, seq_record.id)
01059 
01060         # With the RecordParser
01061         test_handle = open(datafile)
01062         records = list(SwissProt.parse(test_handle))
01063         test_handle.close()
01064 
01065         self.assertEqual(len(records), 1)
01066         self.assertTrue(isinstance(records[0], SwissProt.Record))
01067 
01068         #Check matches what we got earlier without the iterator:
01069         self.assertEqual(records[0].sequence, record.sequence)
01070         self.assertEqual(records[0].description, record.description)
01071         self.assertEqual(records[0].entry_name, record.entry_name)
01072         self.assertEqual(records[0].accessions, record.accessions)
01073 
01074     def test_sp011(self):
01075         "Parsing SwissProt file sp011"
01076 
01077         filename = 'sp011'
01078         # test the record parser
01079 
01080         datafile = os.path.join('SwissProt', filename)
01081 
01082         test_handle = open(datafile)
01083         seq_record = SeqIO.read(test_handle, "swiss")
01084         test_handle.close()
01085 
01086         self.assertTrue(isinstance(seq_record, SeqRecord))
01087 
01088         self.assertEqual(seq_record.id, "P16235")
01089         self.assertEqual(seq_record.name, "LSHR_RAT")
01090         self.assertEqual(seq_record.description, "LUTROPIN-CHORIOGONADOTROPIC HORMONE RECEPTOR PRECURSOR (LH/CG-R) (LSH-R) (LUTEINIZING HORMONE RECEPTOR).")
01091         self.assertEqual(repr(seq_record.seq), "Seq('MGRRVPALRQLLVLAVLLLKPSQLQSRELSGSRCPEPCDCAPDGALRCPGPRAG...LTH', ProteinAlphabet())")
01092 
01093         test_handle = open(datafile)
01094         record = SwissProt.read(test_handle)
01095         test_handle.close()
01096 
01097         # test a couple of things on the record -- this is not exhaustive
01098         self.assertEqual(record.entry_name, "LSHR_RAT")
01099         self.assertEqual(record.accessions, ['P16235', 'P70646', 'Q63807', 'Q63808', 'Q63809'])
01100         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Rodentia', 'Sciurognathi', 'Muridae', 'Murinae', 'Rattus'])
01101         self.assertEqual(record.seqinfo, (700, 78035, '31807E73BAC94F1F'))
01102     
01103         self.assertEqual(len(record.features), 52)
01104         self.assertEqual(record.features[0], ('SIGNAL', 1, 26, '', ''))
01105         self.assertEqual(record.features[1], ('CHAIN', 27, 700, 'LUTROPIN-CHORIOGONADOTROPIC HORMONE RECEPTOR.', ''))
01106         self.assertEqual(record.features[2], ('DOMAIN', 27, 362, 'EXTRACELLULAR (POTENTIAL).', ''))
01107         self.assertEqual(record.features[3], ('TRANSMEM', 363, 390, '1 (POTENTIAL).', ''))
01108         self.assertEqual(record.features[4], ('DOMAIN', 391, 399, 'CYTOPLASMIC (POTENTIAL).', ''))
01109         self.assertEqual(record.features[5], ('TRANSMEM', 400, 422, '2 (POTENTIAL).', ''))
01110         self.assertEqual(record.features[6], ('DOMAIN', 423, 443, 'EXTRACELLULAR (POTENTIAL).', ''))
01111         self.assertEqual(record.features[7], ('TRANSMEM', 444, 466, '3 (POTENTIAL).', ''))
01112         self.assertEqual(record.features[8], ('DOMAIN', 467, 486, 'CYTOPLASMIC (POTENTIAL).', ''))
01113         self.assertEqual(record.features[9], ('TRANSMEM', 487, 509, '4 (POTENTIAL).', ''))
01114         self.assertEqual(record.features[10], ('DOMAIN', 510, 529, 'EXTRACELLULAR (POTENTIAL).', ''))
01115         self.assertEqual(record.features[11], ('TRANSMEM', 530, 551, '5 (POTENTIAL).', ''))
01116         self.assertEqual(record.features[12], ('DOMAIN', 552, 574, 'CYTOPLASMIC (POTENTIAL).', ''))
01117         self.assertEqual(record.features[13], ('TRANSMEM', 575, 598, '6 (POTENTIAL).', ''))
01118         self.assertEqual(record.features[14], ('DOMAIN', 599, 609, 'EXTRACELLULAR (POTENTIAL).', ''))
01119         self.assertEqual(record.features[15], ('TRANSMEM', 610, 631, '7 (POTENTIAL).', ''))
01120         self.assertEqual(record.features[16], ('DOMAIN', 632, 700, 'CYTOPLASMIC (POTENTIAL).', ''))
01121         self.assertEqual(record.features[17], ('REPEAT', 52, 75, 'LRR 1.', ''))
01122         self.assertEqual(record.features[18], ('REPEAT', 126, 150, 'LRR 2.', ''))
01123         self.assertEqual(record.features[19], ('REPEAT', 152, 175, 'LRR 3.', ''))
01124         self.assertEqual(record.features[20], ('REPEAT', 176, 200, 'LRR 4.', ''))
01125         self.assertEqual(record.features[21], ('REPEAT', 202, 224, 'LRR 5.', ''))
01126         self.assertEqual(record.features[22], ('REPEAT', 225, 248, 'LRR 6.', ''))
01127         self.assertEqual(record.features[23], ('REPEAT', 250, 271, 'LRR 7.', ''))
01128         self.assertEqual(record.features[24], ('DISULFID', 443, 518, 'BY SIMILARITY.', ''))
01129         self.assertEqual(record.features[25], ('CARBOHYD', 103, 103, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01130         self.assertEqual(record.features[26], ('CARBOHYD', 178, 178, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01131         self.assertEqual(record.features[27], ('CARBOHYD', 199, 199, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01132         self.assertEqual(record.features[28], ('CARBOHYD', 295, 295, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01133         self.assertEqual(record.features[29], ('CARBOHYD', 303, 303, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01134         self.assertEqual(record.features[30], ('CARBOHYD', 317, 317, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01135         self.assertEqual(record.features[31], ('VARSPLIC', 83, 132, 'MISSING (IN ISOFORM 1950).', ''))
01136         self.assertEqual(record.features[32], ('VARSPLIC', 133, 157, 'MISSING (IN ISOFORM 1759).', ''))
01137         self.assertEqual(record.features[33], ('VARSPLIC', 184, 700, 'MISSING (IN ISOFORM C2).', ''))
01138         self.assertEqual(record.features[34], ('VARSPLIC', 232, 251, 'DISSTKLQALPSHGLESIQT -> PCRATGWSPFRRSSPCLPTH (IN ISOFORM 2075).', ''))
01139         self.assertEqual(record.features[35], ('VARSPLIC', 232, 293, 'MISSING (IN ISOFORM E/A2, ISOFORM EB AND ISOFORM B1).', ''))
01140         self.assertEqual(record.features[36], ('VARSPLIC', 252, 700, 'MISSING (IN ISOFORM 2075).', ''))
01141         self.assertEqual(record.features[37], ('VARSPLIC', 294, 367, 'QNFSFSIFENFSKQCESTVRKADNETLYSAIFEENELSGWDYDYGFCSPKTLQCAPEPDAFNPCEDIMGYAFLR -> IFHFPFLKTSPNNAKAQLEKQITRRFIPPSLRRMNSVAGIMIMASVHPRHSNVLQNQMLSTPVKILWAMPSLGS (IN ISOFORM B1 AND ISOFORM B3).', ''))
01142         self.assertEqual(record.features[38], ('VARSPLIC', 294, 294, 'Q -> P (IN ISOFORM C1).', ''))
01143         self.assertEqual(record.features[39], ('VARSPLIC', 295, 700, 'MISSING (IN ISOFORM C1).', ''))
01144         self.assertEqual(record.features[40], ('VARSPLIC', 321, 342, 'YSAIFEENELSGWDYDYGFCSP -> LHGALPAAHCLRGLPNKRPVL (IN ISOFORM 1834, ISOFORM 1759 AND ISOFORM EB).', ''))
01145         self.assertEqual(record.features[41], ('VARSPLIC', 343, 700, 'MISSING (IN ISOFORMS 1834, ISOFORM 1759 AND ISOFORM EB).', ''))
01146         self.assertEqual(record.features[42], ('VARSPLIC', 368, 700, 'MISSING (IN ISOFORM B1 AND ISOFORM B3).', ''))
01147         self.assertEqual(record.features[43], ('VARIANT', 82, 82, 'I -> M (IN ISOFORM 1950).', ''))
01148         self.assertEqual(record.features[44], ('VARIANT', 179, 179, 'E -> G (IN ISOFORM 1759).', ''))
01149         self.assertEqual(record.features[45], ('VARIANT', 233, 233, 'I -> T (IN ISOFORM 1950).', ''))
01150         self.assertEqual(record.features[46], ('VARIANT', 646, 646, 'G -> S (IN ISOFORM 1950).', ''))
01151         self.assertEqual(record.features[47], ('MUTAGEN', 409, 409, 'D->N: SIGNIFICANT REDUCTION OF BINDING.', ''))
01152         self.assertEqual(record.features[48], ('MUTAGEN', 436, 436, 'D->N: NO CHANGE IN BINDING OR CAMP PROD.', ''))
01153         self.assertEqual(record.features[49], ('MUTAGEN', 455, 455, 'E->Q: NO CHANGE IN BINDING OR CAMP PROD.', ''))
01154         self.assertEqual(record.features[50], ('MUTAGEN', 582, 582, 'D->N: NO CHANGE IN BINDING OR CAMP PROD.', ''))
01155         self.assertEqual(record.features[51], ('CONFLICT', 33, 33, 'R -> L (IN REF. 7).', ''))
01156 
01157         self.assertEqual(len(record.references), 8)
01158         self.assertEqual(record.references[0].authors, "McFarland K.C., Sprengel R., Phillips H.S., Koehler M., Rosemblit N., Nikolics K., Segaloff D.L., Seeburg P.H.")
01159         self.assertEqual(record.references[0].title, "Lutropin-choriogonadotropin receptor: an unusual member of the G protein-coupled receptor family.")
01160         self.assertEqual(len(record.references[0].references), 2)
01161         self.assertEqual(record.references[0].references[0], ('MEDLINE', '89332512'))
01162         self.assertEqual(record.references[0].references[1], ('PubMed', '2502842'))
01163         self.assertEqual(record.references[1].authors, "Aatsinki J.T., Pietila E.M., Lakkakorpi J.T., Rajaniemi H.J.")
01164         self.assertEqual(record.references[1].title, "Expression of the LH/CG receptor gene in rat ovarian tissue is regulated by an extensive alternative splicing of the primary transcript.")
01165         self.assertEqual(len(record.references[1].references), 2)
01166         self.assertEqual(record.references[1].references[0], ('MEDLINE', '92347604'))
01167         self.assertEqual(record.references[1].references[1], ('PubMed', '1353463'))
01168         self.assertEqual(record.references[2].authors, "Koo Y.B., Slaughter R.G., Ji T.H.")
01169         self.assertEqual(record.references[2].title, "Structure of the luteinizing hormone receptor gene and multiple exons of the coding sequence.")
01170         self.assertEqual(len(record.references[2].references), 2)
01171         self.assertEqual(record.references[2].references[0], ('MEDLINE', '91209270'))
01172         self.assertEqual(record.references[2].references[1], ('PubMed', '2019252'))
01173         self.assertEqual(record.references[3].authors, "Bernard M.P., Myers R.V., Moyle W.R.")
01174         self.assertEqual(record.references[3].title, "Cloning of rat lutropin (LH) receptor analogs lacking the soybean lectin domain.")
01175         self.assertEqual(len(record.references[3].references), 2)
01176         self.assertEqual(record.references[3].references[0], ('MEDLINE', '91006819'))
01177         self.assertEqual(record.references[3].references[1], ('PubMed', '1976554'))
01178         self.assertEqual(record.references[4].authors, "Segaloff D.L., Sprengel R., Nikolics K., Ascoli M.")
01179         self.assertEqual(record.references[4].title, "Structure of the lutropin/choriogonadotropin receptor.")
01180         self.assertEqual(len(record.references[4].references), 2)
01181         self.assertEqual(record.references[4].references[0], ('MEDLINE', '91126285'))
01182         self.assertEqual(record.references[4].references[1], ('PubMed', '2281186'))
01183         self.assertEqual(record.references[5].authors, "Tsai-Morris C.H., Buczko E., Wang W., Dufau M.L.")
01184         self.assertEqual(record.references[5].title, "Intronic nature of the rat luteinizing hormone receptor gene defines a soluble receptor subspecies with hormone binding activity.")
01185         self.assertEqual(len(record.references[5].references), 2)
01186         self.assertEqual(record.references[5].references[0], ('MEDLINE', '91060531'))
01187         self.assertEqual(record.references[5].references[1], ('PubMed', '2174034'))
01188         self.assertEqual(record.references[6].authors, "Roche P.C., Ryan R.J.")
01189         self.assertEqual(record.references[6].title, "Purification, characterization, and amino-terminal sequence of rat ovarian receptor for luteinizing hormone/human choriogonadotropin.")
01190         self.assertEqual(len(record.references[6].references), 2)
01191         self.assertEqual(record.references[6].references[0], ('MEDLINE', '89174723'))
01192         self.assertEqual(record.references[6].references[1], ('PubMed', '2925659'))
01193         self.assertEqual(record.references[7].authors, "Ji I., Ji T.H.")
01194         self.assertEqual(record.references[7].title, "Asp383 in the second transmembrane domain of the lutropin receptor is important for high affinity hormone binding and cAMP production.")
01195         self.assertEqual(len(record.references[7].references), 2)
01196         self.assertEqual(record.references[7].references[0], ('MEDLINE', '91332007'))
01197         self.assertEqual(record.references[7].references[1], ('PubMed', '1714448'))
01198 
01199         #Check the two parsers agree on the essentials
01200         self.assertEqual(seq_record.seq.tostring(), record.sequence)
01201         self.assertEqual(seq_record.description, record.description)
01202         self.assertEqual(seq_record.name, record.entry_name)
01203         self.assertTrue(seq_record.id in record.accessions)
01204 
01205         #Now try using the iterator - note that all these
01206         #test cases have only one record.
01207 
01208         # With the SequenceParser
01209         test_handle = open(datafile)
01210         records = list(SeqIO.parse(test_handle, "swiss"))
01211         test_handle.close()
01212 
01213         self.assertEqual(len(records), 1)
01214         self.assertTrue(isinstance(records[0], SeqRecord))
01215 
01216         #Check matches what we got earlier without the iterator:
01217         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
01218         self.assertEqual(records[0].description, seq_record.description)
01219         self.assertEqual(records[0].name, seq_record.name)
01220         self.assertEqual(records[0].id, seq_record.id)
01221 
01222         # With the RecordParser
01223         test_handle = open(datafile)
01224         records = list(SwissProt.parse(test_handle))
01225         test_handle.close()
01226 
01227         self.assertEqual(len(records), 1)
01228         self.assertTrue(isinstance(records[0], SwissProt.Record))
01229 
01230         #Check matches what we got earlier without the iterator:
01231         self.assertEqual(records[0].sequence, record.sequence)
01232         self.assertEqual(records[0].description, record.description)
01233         self.assertEqual(records[0].entry_name, record.entry_name)
01234         self.assertEqual(records[0].accessions, record.accessions)
01235 
01236 
01237     def test_sp012(self):
01238         "Parsing SwissProt file sp012"
01239 
01240         filename = 'sp012'
01241         # test the record parser
01242 
01243         datafile = os.path.join('SwissProt', filename)
01244 
01245         test_handle = open(datafile)
01246         seq_record = SeqIO.read(test_handle, "swiss")
01247         test_handle.close()
01248 
01249         self.assertTrue(isinstance(seq_record, SeqRecord))
01250 
01251         self.assertEqual(seq_record.id, "Q9Y736")
01252         self.assertEqual(seq_record.name, "Q9Y736")
01253         self.assertEqual(seq_record.description, "UBIQUITIN.")
01254         self.assertEqual(repr(seq_record.seq), "Seq('MQIFVKTLTGKTITLEVESSDTIDNVKTKIQDKEGIPPDQQRLIFAGKQLEDGR...GGN', ProteinAlphabet())")
01255 
01256         test_handle = open(datafile)
01257         record = SwissProt.read(test_handle)
01258         test_handle.close()
01259 
01260         # test a couple of things on the record -- this is not exhaustive
01261         self.assertEqual(record.entry_name, "Q9Y736")
01262         self.assertEqual(record.accessions, ['Q9Y736'])
01263         self.assertEqual(record.organism_classification, ['Eukaryota', 'Fungi', 'Ascomycota', 'Pezizomycotina', 'Eurotiomycetes', 'Onygenales', 'Arthrodermataceae', 'mitosporic Arthrodermataceae', 'Trichophyton'])
01264         self.assertEqual(record.seqinfo, (153, 17238, '01153CF30C2DEDFF'))
01265     
01266         self.assertEqual(len(record.features), 0)
01267 
01268         self.assertEqual(len(record.references), 2)
01269         self.assertEqual(record.references[0].authors, "Kano R., Nakamura Y., Watanabe S., Hasegawa A.")
01270         self.assertEqual(record.references[0].title, "Trichophyton mentagrophytes mRNA for ubiquitin.")
01271         self.assertEqual(len(record.references[0].references), 0)
01272         self.assertEqual(record.references[1].authors, "Kano R.")
01273         self.assertEqual(record.references[1].title, "Microsporum canis mRNA for ubiquitin, complete cds.")
01274         self.assertEqual(len(record.references[1].references), 0)
01275 
01276         #Check the two parsers agree on the essentials
01277         self.assertEqual(seq_record.seq.tostring(), record.sequence)
01278         self.assertEqual(seq_record.description, record.description)
01279         self.assertEqual(seq_record.name, record.entry_name)
01280         self.assertTrue(seq_record.id in record.accessions)
01281 
01282         #Now try using the iterator - note that all these
01283         #test cases have only one record.
01284 
01285         # With the SequenceParser
01286         test_handle = open(datafile)
01287         records = list(SeqIO.parse(test_handle, "swiss"))
01288         test_handle.close()
01289 
01290         self.assertEqual(len(records), 1)
01291         self.assertTrue(isinstance(records[0], SeqRecord))
01292 
01293         #Check matches what we got earlier without the iterator:
01294         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
01295         self.assertEqual(records[0].description, seq_record.description)
01296         self.assertEqual(records[0].name, seq_record.name)
01297         self.assertEqual(records[0].id, seq_record.id)
01298 
01299         # With the RecordParser
01300         test_handle = open(datafile)
01301         records = list(SwissProt.parse(test_handle))
01302         test_handle.close()
01303 
01304         self.assertEqual(len(records), 1)
01305         self.assertTrue(isinstance(records[0], SwissProt.Record))
01306 
01307         #Check matches what we got earlier without the iterator:
01308         self.assertEqual(records[0].sequence, record.sequence)
01309         self.assertEqual(records[0].description, record.description)
01310         self.assertEqual(records[0].entry_name, record.entry_name)
01311         self.assertEqual(records[0].accessions, record.accessions)
01312 
01313 
01314     def test_sp013(self):
01315         "Parsing SwissProt file sp013"
01316 
01317         filename = 'sp013'
01318         # test the record parser
01319 
01320         datafile = os.path.join('SwissProt', filename)
01321 
01322         test_handle = open(datafile)
01323         seq_record = SeqIO.read(test_handle, "swiss")
01324         test_handle.close()
01325 
01326         self.assertTrue(isinstance(seq_record, SeqRecord))
01327 
01328         self.assertEqual(seq_record.id, "P82909")
01329         self.assertEqual(seq_record.name, "P82909")
01330         self.assertEqual(seq_record.description, "MITOCHONDRIAL 28S RIBOSOMAL PROTEIN S36 (MRP-S36).")
01331         self.assertEqual(repr(seq_record.seq), "Seq('MGSKMASASRVVQVVKPHTPLIRFPDRRDNPKPNVSEALRSAGLPSHSSVISQH...GPE', ProteinAlphabet())")
01332 
01333         test_handle = open(datafile)
01334         record = SwissProt.read(test_handle)
01335         test_handle.close()
01336 
01337         # test a couple of things on the record -- this is not exhaustive
01338         self.assertEqual(record.entry_name, "P82909")
01339         self.assertEqual(record.accessions, ['P82909'])
01340         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
01341         self.assertEqual(record.seqinfo, (102, 11335, '83EF107B42E2FCFD'))
01342     
01343         self.assertEqual(len(record.features), 0)
01344 
01345         self.assertEqual(len(record.references), 2)
01346         self.assertEqual(record.references[0].authors, "Strausberg R.")
01347         self.assertEqual(record.references[0].title, "")
01348         self.assertEqual(len(record.references[0].references), 0)
01349         self.assertEqual(record.references[1].authors, "Koc E.C., Burkhart W., Blackburn K., Moseley A., Spremulli L.L.")
01350         self.assertEqual(record.references[1].title, "The small subunit of the mammalian mitochondrial ribosome. Identification of the full complement ribosomal proteins present.")
01351         self.assertEqual(len(record.references[1].references), 0)
01352 
01353         #Check the two parsers agree on the essentials
01354         self.assertEqual(seq_record.seq.tostring(), record.sequence)
01355         self.assertEqual(seq_record.description, record.description)
01356         self.assertEqual(seq_record.name, record.entry_name)
01357         self.assertTrue(seq_record.id in record.accessions)
01358 
01359         #Now try using the iterator - note that all these
01360         #test cases have only one record.
01361 
01362         # With the SequenceParser
01363         test_handle = open(datafile)
01364         records = list(SeqIO.parse(test_handle, "swiss"))
01365         test_handle.close()
01366 
01367         self.assertEqual(len(records), 1)
01368         self.assertTrue(isinstance(records[0], SeqRecord))
01369 
01370         #Check matches what we got earlier without the iterator:
01371         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
01372         self.assertEqual(records[0].description, seq_record.description)
01373         self.assertEqual(records[0].name, seq_record.name)
01374         self.assertEqual(records[0].id, seq_record.id)
01375 
01376         # With the RecordParser
01377         test_handle = open(datafile)
01378         records = list(SwissProt.parse(test_handle))
01379         test_handle.close()
01380 
01381         self.assertEqual(len(records), 1)
01382         self.assertTrue(isinstance(records[0], SwissProt.Record))
01383 
01384         #Check matches what we got earlier without the iterator:
01385         self.assertEqual(records[0].sequence, record.sequence)
01386         self.assertEqual(records[0].description, record.description)
01387         self.assertEqual(records[0].entry_name, record.entry_name)
01388         self.assertEqual(records[0].accessions, record.accessions)
01389 
01390 
01391     def test_sp014(self):
01392         "Parsing SwissProt file sp014"
01393 
01394         filename = 'sp014'
01395         # test the record parser
01396 
01397         datafile = os.path.join('SwissProt', filename)
01398 
01399         test_handle = open(datafile)
01400         seq_record = SeqIO.read(test_handle, "swiss")
01401         test_handle.close()
01402 
01403         self.assertTrue(isinstance(seq_record, SeqRecord))
01404 
01405         self.assertEqual(seq_record.id, "P12166")
01406         self.assertEqual(seq_record.name, "PSBL_ORYSA")
01407         self.assertEqual(seq_record.description, "PHOTOSYSTEM II REACTION CENTER L PROTEIN (PSII 5 KDA PROTEIN).")
01408         self.assertEqual(repr(seq_record.seq), "Seq('TQSNPNEQNVELNRTSLYWGLLLIFVLAVLFSNYFFN', ProteinAlphabet())")
01409 
01410         test_handle = open(datafile)
01411         record = SwissProt.read(test_handle)
01412         test_handle.close()
01413 
01414         # test a couple of things on the record -- this is not exhaustive
01415         self.assertEqual(record.entry_name, "PSBL_ORYSA")
01416         self.assertEqual(record.accessions, ['P12166', 'P12167', 'Q34007'])
01417         self.assertEqual(record.organism_classification, ['Eukaryota', 'Viridiplantae', 'Embryophyta', 'Tracheophyta', 'Spermatophyta', 'Magnoliophyta', 'Liliopsida', 'Poales', 'Poaceae', 'Ehrhartoideae', 'Oryzeae', 'Oryza'])
01418         self.assertEqual(record.seqinfo, (37, 4366, 'CC537AEC50B2C784'))
01419     
01420         self.assertEqual(len(record.features), 1)
01421         self.assertEqual(record.features[0], ('INIT_MET', 0, 0, '', ''))
01422 
01423         self.assertEqual(len(record.references), 22)
01424         self.assertEqual(record.references[0].authors, "Sugiura M.")
01425         self.assertEqual(record.references[0].title, "")
01426         self.assertEqual(len(record.references[0].references), 0)
01427         self.assertEqual(record.references[1].authors, "Hiratsuka J., Shimada H., Whittier R., Ishibashi T., Sakamoto M., Mori M., Kondo C., Honji Y., Sun C.-R., Meng B.-Y., Li Y.-Q., Kanno A., Nishizawa Y., Hirai A., Shinozaki K., Sugiura M.")
01428         self.assertEqual(record.references[1].title, "The complete sequence of the rice (Oryza sativa) chloroplast genome: intermolecular recombination between distinct tRNA genes accounts for a major plastid DNA inversion during the evolution of the cereals.")
01429         self.assertEqual(len(record.references[1].references), 2)
01430         self.assertEqual(record.references[1].references[0], ('MEDLINE', '89364698'))
01431         self.assertEqual(record.references[1].references[1], ('PubMed', '2770692'))
01432         self.assertEqual(record.references[2].authors, "Sugiura M.")
01433         self.assertEqual(record.references[2].title, "")
01434         self.assertEqual(len(record.references[2].references), 0)
01435         self.assertEqual(record.references[3].authors, "Shinozaki K., Ohme M., Tanaka M., Wakasugi T., Hayashida N., Matsubayashi T., Zaita N., Chunwongse J., Obokata J., Yamaguchi-Shinozaki K., Ohto C., Torazawa K., Meng B.Y., Sugita M., Deno H., Kamogashira T., Yamada K., Kusuda J., Takaiwa F., Kato A., Tohdoh N., Shimada H., Sugiura M.")
01436         self.assertEqual(record.references[3].title, "The complete nucleotide sequence of the tobacco chloroplast genome: its gene organization and expression.")
01437         self.assertEqual(len(record.references[3].references), 0)
01438         self.assertEqual(record.references[4].authors, "Chaudhuri S., Maliga P.")
01439         self.assertEqual(record.references[4].title, "Sequences directing C to U editing of the plastid psbL mRNA are located within a 22 nucleotide segment spanning the editing site.")
01440         self.assertEqual(len(record.references[4].references), 2)
01441         self.assertEqual(record.references[4].references[0], ('MEDLINE', '97076156'))
01442         self.assertEqual(record.references[4].references[1], ('PubMed', '8918473'))
01443         self.assertEqual(record.references[5].authors, "Chakhmakhcheva O.G., Andreeva A.V., Buryakova A.A., Reverdatto S.V., Efimov V.A.")
01444         self.assertEqual(record.references[5].title, "Nucleotide sequence of the barley chloroplast psbE, psbF genes and flanking regions.")
01445         self.assertEqual(len(record.references[5].references), 2)
01446         self.assertEqual(record.references[5].references[0], ('MEDLINE', '89240046'))
01447         self.assertEqual(record.references[5].references[1], ('PubMed', '2654886'))
01448         self.assertEqual(record.references[6].authors, "Efimov V.A., Andreeva A.V., Reverdatto S.V., Chakhmakhcheva O.G.")
01449         self.assertEqual(record.references[6].title, "Photosystem II of rye. Nucleotide sequence of the psbB, psbC, psbE, psbF, psbH genes of rye and chloroplast DNA regions adjacent to them.")
01450         self.assertEqual(len(record.references[6].references), 2)
01451         self.assertEqual(record.references[6].references[0], ('MEDLINE', '92207253'))
01452         self.assertEqual(record.references[6].references[1], ('PubMed', '1804121'))
01453         self.assertEqual(record.references[7].authors, "Webber A.N., Hird S.M., Packman L.C., Dyer T.A., Gray J.C.")
01454         self.assertEqual(record.references[7].title, "A photosystem II polypeptide is encoded by an open reading frame co-transcribed with genes for cytochrome b-559 in wheat chloroplast DNA.")
01455         self.assertEqual(len(record.references[7].references), 0)
01456         self.assertEqual(record.references[8].authors, "Kudla J., Igloi G.L., Metzlaff M., Hagemann R., Koessel H.")
01457         self.assertEqual(record.references[8].title, "RNA editing in tobacco chloroplasts leads to the formation of a translatable psbL mRNA by a C to U substitution within the initiation codon.")
01458         self.assertEqual(len(record.references[8].references), 2)
01459         self.assertEqual(record.references[8].references[0], ('MEDLINE', '92191997'))
01460         self.assertEqual(record.references[8].references[1], ('PubMed', '1547774'))
01461         self.assertEqual(record.references[9].authors, "Zolotarev A.S., Kolosov V.L.")
01462         self.assertEqual(record.references[9].title, "Nucleotide sequence of the rye chloroplast DNA fragment, comprising psbE and psbF genes.")
01463         self.assertEqual(len(record.references[9].references), 2)
01464         self.assertEqual(record.references[9].references[0], ('MEDLINE', '89160331'))
01465         self.assertEqual(record.references[9].references[1], ('PubMed', '2646599'))
01466         self.assertEqual(record.references[10].authors, "Kolosov V.L., Klezovich O.N., Abdulaev N.G., Zolotarev A.S.")
01467         self.assertEqual(record.references[10].title, "Photosystem II of rye. Nucleotide sequence of genes psbE, psbF, psbL and OPC40 of chloroplast DNA.")
01468         self.assertEqual(len(record.references[10].references), 2)
01469         self.assertEqual(record.references[10].references[0], ('MEDLINE', '90073796'))
01470         self.assertEqual(record.references[10].references[1], ('PubMed', '2686655'))
01471         self.assertEqual(record.references[11].authors, "Haley J., Bogorad L.")
01472         self.assertEqual(record.references[11].title, "")
01473         self.assertEqual(len(record.references[11].references), 0)
01474         self.assertEqual(record.references[12].authors, "Maier R.M., Neckermann K., Igloi G.L., Koessel H.")
01475         self.assertEqual(record.references[12].title, "Complete sequence of the maize chloroplast genome: gene content, hotspots of divergence and fine tuning of genetic information by transcript editing.")
01476         self.assertEqual(len(record.references[12].references), 2)
01477         self.assertEqual(record.references[12].references[0], ('MEDLINE', '95395841'))
01478         self.assertEqual(record.references[12].references[1], ('PubMed', '7666415'))
01479         self.assertEqual(record.references[13].authors, "Willey D.L., Gray J.C.")
01480         self.assertEqual(record.references[13].title, "Two small open reading frames are co-transcribed with the pea chloroplast genes for the polypeptides of cytochrome b-559.")
01481         self.assertEqual(len(record.references[13].references), 2)
01482         self.assertEqual(record.references[13].references[0], ('MEDLINE', '89354671'))
01483         self.assertEqual(record.references[13].references[1], ('PubMed', '2766383'))
01484         self.assertEqual(record.references[14].authors, "Bock R., Hagemann R., Koessel H., Kudla J.")
01485         self.assertEqual(record.references[14].title, "Tissue- and stage-specific modulation of RNA editing of the psbF and psbL transcript from spinach plastids -- a new regulatory mechanism?")
01486         self.assertEqual(len(record.references[14].references), 2)
01487         self.assertEqual(record.references[14].references[0], ('MEDLINE', '93360903'))
01488         self.assertEqual(record.references[14].references[1], ('PubMed', '8355656'))
01489         self.assertEqual(record.references[15].authors, "Hermann R.G., Alt J., Schiller B., Widger W.R., Cramer W.A.")
01490         self.assertEqual(record.references[15].title, "Nucleotide sequence of the gene for apocytochrome b-559 on the spinach plastid chromosome: implications for the structure of the membrane protein.")
01491         self.assertEqual(len(record.references[15].references), 0)
01492         self.assertEqual(record.references[16].authors, "Kuntz M., Camara B., Weil J.-H., Schantz R.")
01493         self.assertEqual(record.references[16].title, "The psbL gene from bell pepper (Capsicum annuum): plastid RNA editing also occurs in non-photosynthetic chromoplasts.")
01494         self.assertEqual(len(record.references[16].references), 2)
01495         self.assertEqual(record.references[16].references[0], ('MEDLINE', '93099270'))
01496         self.assertEqual(record.references[16].references[1], ('PubMed', '1463853'))
01497         self.assertEqual(record.references[17].authors, "Forsthoefel N.R., Cushman J.C.")
01498         self.assertEqual(record.references[17].title, "Characterization and expression of photosystem II genes (psbE, psbF, and psbL) from the facultative crassulacean acid metabolism plant Mesembryanthemum crystallinum.")
01499         self.assertEqual(len(record.references[17].references), 2)
01500         self.assertEqual(record.references[17].references[0], ('MEDLINE', '94345017'))
01501         self.assertEqual(record.references[17].references[1], ('PubMed', '8066140'))
01502         self.assertEqual(record.references[18].authors, "Kubo T., Yanai Y., Kinoshita T., Mikami T.")
01503         self.assertEqual(record.references[18].title, "The chloroplast trnP-trnW-petG gene cluster in the mitochondrial genomes of Beta vulgaris, B. trigyna and B. webbiana: evolutionary aspects.")
01504         self.assertEqual(len(record.references[18].references), 2)
01505         self.assertEqual(record.references[18].references[0], ('MEDLINE', '95254673'))
01506         self.assertEqual(record.references[18].references[1], ('PubMed', '7736615'))
01507         self.assertEqual(record.references[19].authors, "Naithani S.")
01508         self.assertEqual(record.references[19].title, "")
01509         self.assertEqual(len(record.references[19].references), 0)
01510         self.assertEqual(record.references[20].authors, "Ikeuchi M., Takio K., Inoue Y.")
01511         self.assertEqual(record.references[20].title, "N-terminal sequencing of photosystem II low-molecular-mass proteins. 5 and 4.1 kDa components of the O2-evolving core complex from higher plants.")
01512         self.assertEqual(len(record.references[20].references), 2)
01513         self.assertEqual(record.references[20].references[0], ('MEDLINE', '89121082'))
01514         self.assertEqual(record.references[20].references[1], ('PubMed', '2644131'))
01515         self.assertEqual(record.references[21].authors, "Zheleva D., Sharma J., Panico M., Morris H.R., Barber J.")
01516         self.assertEqual(record.references[21].title, "Isolation and characterization of monomeric and dimeric CP47-reaction center photosystem II complexes.")
01517         self.assertEqual(len(record.references[21].references), 2)
01518         self.assertEqual(record.references[21].references[0], ('MEDLINE', '98298118'))
01519         self.assertEqual(record.references[21].references[1], ('PubMed', '9632665'))
01520 
01521         #Check the two parsers agree on the essentials
01522         self.assertEqual(seq_record.seq.tostring(), record.sequence)
01523         self.assertEqual(seq_record.description, record.description)
01524         self.assertEqual(seq_record.name, record.entry_name)
01525         self.assertTrue(seq_record.id in record.accessions)
01526 
01527         #Now try using the iterator - note that all these
01528         #test cases have only one record.
01529 
01530         # With the SequenceParser
01531         test_handle = open(datafile)
01532         records = list(SeqIO.parse(test_handle, "swiss"))
01533         test_handle.close()
01534 
01535         self.assertEqual(len(records), 1)
01536         self.assertTrue(isinstance(records[0], SeqRecord))
01537 
01538         #Check matches what we got earlier without the iterator:
01539         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
01540         self.assertEqual(records[0].description, seq_record.description)
01541         self.assertEqual(records[0].name, seq_record.name)
01542         self.assertEqual(records[0].id, seq_record.id)
01543 
01544         # With the RecordParser
01545         test_handle = open(datafile)
01546         records = list(SwissProt.parse(test_handle))
01547         test_handle.close()
01548 
01549         self.assertEqual(len(records), 1)
01550         self.assertTrue(isinstance(records[0], SwissProt.Record))
01551 
01552         #Check matches what we got earlier without the iterator:
01553         self.assertEqual(records[0].sequence, record.sequence)
01554         self.assertEqual(records[0].description, record.description)
01555         self.assertEqual(records[0].entry_name, record.entry_name)
01556         self.assertEqual(records[0].accessions, record.accessions)
01557 
01558 
01559     def test_sp015(self):
01560         "Parsing SwissProt file sp015"
01561 
01562         filename = 'sp015'
01563         # test the record parser
01564 
01565         datafile = os.path.join('SwissProt', filename)
01566 
01567         test_handle = open(datafile)
01568         seq_record = SeqIO.read(test_handle, "swiss")
01569         test_handle.close()
01570 
01571         self.assertTrue(isinstance(seq_record, SeqRecord))
01572 
01573         self.assertEqual(seq_record.id, "IPI00383150")
01574         self.assertEqual(seq_record.name, "IPI00383150.2")
01575         self.assertEqual(seq_record.description, "")
01576         self.assertEqual(repr(seq_record.seq), "Seq('MSFQAPRRLLELAGQSLLRDQALAISVLDELPRELFPRLFVEAFTSRRCEVLKV...TPC', ProteinAlphabet())")
01577 
01578         test_handle = open(datafile)
01579         record = SwissProt.read(test_handle)
01580         test_handle.close()
01581 
01582         # test a couple of things on the record -- this is not exhaustive
01583         self.assertEqual(record.entry_name, "IPI00383150.2")
01584         self.assertEqual(record.accessions, ['IPI00383150'])
01585         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
01586         self.assertEqual(record.seqinfo, (457, 52856, '5C3151AAADBDE232'))
01587     
01588         self.assertEqual(len(record.features), 0)
01589         self.assertEqual(len(record.references), 0)
01590 
01591         #Check the two parsers agree on the essentials
01592         self.assertEqual(seq_record.seq.tostring(), record.sequence)
01593         self.assertEqual(seq_record.description, record.description)
01594         self.assertEqual(seq_record.name, record.entry_name)
01595         self.assertTrue(seq_record.id in record.accessions)
01596 
01597         #Now try using the iterator - note that all these
01598         #test cases have only one record.
01599 
01600         # With the SequenceParser
01601         test_handle = open(datafile)
01602         records = list(SeqIO.parse(test_handle, "swiss"))
01603         test_handle.close()
01604 
01605         self.assertEqual(len(records), 1)
01606         self.assertTrue(isinstance(records[0], SeqRecord))
01607 
01608         #Check matches what we got earlier without the iterator:
01609         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
01610         self.assertEqual(records[0].description, seq_record.description)
01611         self.assertEqual(records[0].name, seq_record.name)
01612         self.assertEqual(records[0].id, seq_record.id)
01613 
01614         # With the RecordParser
01615         test_handle = open(datafile)
01616         records = list(SwissProt.parse(test_handle))
01617         test_handle.close()
01618 
01619         self.assertEqual(len(records), 1)
01620         self.assertTrue(isinstance(records[0], SwissProt.Record))
01621 
01622         #Check matches what we got earlier without the iterator:
01623         self.assertEqual(records[0].sequence, record.sequence)
01624         self.assertEqual(records[0].description, record.description)
01625         self.assertEqual(records[0].entry_name, record.entry_name)
01626         self.assertEqual(records[0].accessions, record.accessions)
01627 
01628 
01629 
01630 if __name__ == "__main__":
01631     runner = unittest.TextTestRunner(verbosity = 2)
01632     unittest.main(testRunner=runner)