Back to index

python-biopython  1.60
Public Member Functions
test_SwissProt.TestSwissProt Class Reference

List of all members.

Public Member Functions

def test_sp001
def test_sp002
def test_sp003
def test_sp004
def test_sp005
def test_sp006
def test_sp007
def test_sp008
def test_sp009
def test_sp010
def test_sp011
def test_sp012
def test_sp013
def test_sp014
def test_sp015

Detailed Description

Definition at line 13 of file test_SwissProt.py.


Member Function Documentation

Definition at line 15 of file test_SwissProt.py.

00015 
00016     def test_sp001(self):
00017         "Parsing SwissProt file sp001"
00018         filename = 'sp001'
00019         # test the record parser
00020 
00021         datafile = os.path.join('SwissProt', filename)
00022 
00023         test_handle = open(datafile)
00024         seq_record = SeqIO.read(test_handle, "swiss")
00025         test_handle.close()
00026 
00027         self.assertTrue(isinstance(seq_record, SeqRecord))
00028 
00029         self.assertEqual(seq_record.id, "Q13454")
00030         self.assertEqual(seq_record.name, "N33_HUMAN")
00031         self.assertEqual(seq_record.description, "N33 PROTEIN.")
00032         self.assertEqual(repr(seq_record.seq), "Seq('MGARGAPSRRRQAGRRLRYLPTGSFPFLLLLLLLCIQLGGGQKKKENLLAEKVE...DFE', ProteinAlphabet())")
00033 
00034         test_handle = open(datafile)
00035         record = SwissProt.read(test_handle)
00036         test_handle.close()
00037 
00038         # test a couple of things on the record -- this is not exhaustive
00039         self.assertEqual(record.entry_name, "N33_HUMAN")
00040         self.assertEqual(record.accessions, ['Q13454', 'Q14911', 'Q14912'])
00041         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
00042         self.assertEqual(record.seqinfo, (348, 39676, '75818910'))
00043     
00044         self.assertEqual(len(record.features), 6)
00045         self.assertEqual(record.features[0], ('TRANSMEM', 20, 40, 'POTENTIAL.', ''))
00046         self.assertEqual(record.features[1], ('TRANSMEM', 197, 217, 'POTENTIAL.', ''))
00047         self.assertEqual(record.features[2], ('TRANSMEM', 222, 242, 'POTENTIAL.', ''))
00048         self.assertEqual(record.features[3], ('TRANSMEM', 277, 297, 'POTENTIAL.', ''))
00049         self.assertEqual(record.features[4], ('TRANSMEM', 313, 333, 'POTENTIAL.', ''))
00050         self.assertEqual(record.features[5], ('VARSPLIC', 344, 348, 'DLDFE -> FLIK (IN FORM 2).', ''))
00051 
00052         self.assertEqual(len(record.references), 1)
00053         self.assertEqual(record.references[0].authors, "MACGROGAN D., LEVY A., BOVA G.S., ISAACS W.B., BOOKSTEIN R.")
00054         self.assertEqual(record.references[0].title, "Structure and methylation-associated silencing of a gene within a homozygously deleted region of human chromosome band 8p22.")
00055         self.assertEqual(len(record.references[0].references), 1)
00056         self.assertEqual(record.references[0].references[0], ('MEDLINE', '96299740'))
00057 
00058         #Check the two parsers agree on the essentials
00059         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00060         self.assertEqual(seq_record.description, record.description)
00061         self.assertEqual(seq_record.name, record.entry_name)
00062         self.assertTrue(seq_record.id in record.accessions)
00063 
00064         #Now try using the iterator - note that all these
00065         #test cases have only one record.
00066 
00067         # With the SequenceParser
00068         test_handle = open(datafile)
00069         records = list(SeqIO.parse(test_handle, "swiss"))
00070         test_handle.close()
00071 
00072         self.assertEqual(len(records), 1)
00073         self.assertTrue(isinstance(records[0], SeqRecord))
00074 
00075         #Check matches what we got earlier without the iterator:
00076         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00077         self.assertEqual(records[0].description, seq_record.description)
00078         self.assertEqual(records[0].name, seq_record.name)
00079         self.assertEqual(records[0].id, seq_record.id)
00080 
00081         # With the RecordParser
00082         test_handle = open(datafile)
00083         records = list(SwissProt.parse(test_handle))
00084         test_handle.close()
00085 
00086         self.assertEqual(len(records), 1)
00087         self.assertTrue(isinstance(records[0], SwissProt.Record))
00088 
00089         #Check matches what we got earlier without the iterator:
00090         self.assertEqual(records[0].sequence, record.sequence)
00091         self.assertEqual(records[0].description, record.description)
00092         self.assertEqual(records[0].entry_name, record.entry_name)
00093         self.assertEqual(records[0].accessions, record.accessions)
00094 

Here is the call graph for this function:

Definition at line 95 of file test_SwissProt.py.

00095 
00096     def test_sp002(self):
00097         "Parsing SwissProt file sp002"
00098 
00099         filename = 'sp002'
00100         # test the record parser
00101 
00102         datafile = os.path.join('SwissProt', filename)
00103 
00104         test_handle = open(datafile)
00105         seq_record = SeqIO.read(test_handle, "swiss")
00106         test_handle.close()
00107 
00108         self.assertTrue(isinstance(seq_record, SeqRecord))
00109 
00110         self.assertEqual(seq_record.id, "P54101")
00111         self.assertEqual(seq_record.name, "CSP_MOUSE")
00112         self.assertEqual(seq_record.description, "CYSTEINE STRING PROTEIN (CSP).")
00113         self.assertEqual(repr(seq_record.seq), "Seq('MADQRQRSLSTSGESLYHVLGLDKNATSDDIKKSYRKLALKYHPDKNPDNPEAA...GFN', ProteinAlphabet())")
00114 
00115         test_handle = open(datafile)
00116         record = SwissProt.read(test_handle)
00117         test_handle.close()
00118 
00119         # test a couple of things on the record -- this is not exhaustive
00120         self.assertEqual(record.entry_name, "CSP_MOUSE")
00121         self.assertEqual(record.accessions, ['P54101'])
00122         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Rodentia', 'Sciurognathi', 'Muridae', 'Murinae', 'Mus'])
00123         self.assertEqual(record.seqinfo, (198, 22100, '9DF0142B'))
00124     
00125         self.assertEqual(len(record.features), 2) 
00126         self.assertEqual(record.features[0], ('DOMAIN', 13, 82, 'DNAJ-LIKE.', ''))
00127         self.assertEqual(record.features[1], ('DOMAIN', 118, 128, 'POLY-CYS.', ''))
00128 
00129         self.assertEqual(len(record.references), 3)
00130         self.assertEqual(record.references[0].authors, "QIN N., LIN T., BIRNBAUMER L.")
00131         self.assertEqual(record.references[0].title, "")
00132         self.assertEqual(len(record.references[0].references), 0)
00133         self.assertEqual(record.references[1].authors, "MASTROGIACOMO A., GUNDERSEN C.B.")
00134         self.assertEqual(record.references[1].title, "The nucleotide and deduced amino acid sequence of a rat cysteine string protein.")
00135         self.assertEqual(len(record.references[1].references), 1)
00136         self.assertEqual(record.references[1].references[0], ('MEDLINE', '95223109'))
00137         self.assertEqual(record.references[2].authors, "BRAUN J.E., SCHELLER R.H.")
00138         self.assertEqual(record.references[2].title, "Cysteine string protein, a DnaJ family member, is present on diverse secretory vesicles.")
00139         self.assertEqual(len(record.references[2].references), 1)
00140         self.assertEqual(record.references[2].references[0], ('MEDLINE', '96188189'))
00141 
00142         #Check the two parsers agree on the essentials
00143         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00144         self.assertEqual(seq_record.description, record.description)
00145         self.assertEqual(seq_record.name, record.entry_name)
00146         self.assertTrue(seq_record.id in record.accessions)
00147 
00148         #Now try using the iterator - note that all these
00149         #test cases have only one record.
00150 
00151         # With the SequenceParser
00152         test_handle = open(datafile)
00153         records = list(SeqIO.parse(test_handle, "swiss"))
00154         test_handle.close()
00155 
00156         self.assertEqual(len(records), 1)
00157         self.assertTrue(isinstance(records[0], SeqRecord))
00158 
00159         #Check matches what we got earlier without the iterator:
00160         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00161         self.assertEqual(records[0].description, seq_record.description)
00162         self.assertEqual(records[0].name, seq_record.name)
00163         self.assertEqual(records[0].id, seq_record.id)
00164 
00165         # With the RecordParser
00166         test_handle = open(datafile)
00167         records = list(SwissProt.parse(test_handle))
00168         test_handle.close()
00169 
00170         self.assertEqual(len(records), 1)
00171         self.assertTrue(isinstance(records[0], SwissProt.Record))
00172 
00173         #Check matches what we got earlier without the iterator:
00174         self.assertEqual(records[0].sequence, record.sequence)
00175         self.assertEqual(records[0].description, record.description)
00176         self.assertEqual(records[0].entry_name, record.entry_name)
00177         self.assertEqual(records[0].accessions, record.accessions)

Here is the call graph for this function:

Definition at line 178 of file test_SwissProt.py.

00178 
00179     def test_sp003(self):
00180         "Parsing SwissProt file sp003"
00181 
00182         filename = 'sp003'
00183         # test the record parser
00184 
00185         datafile = os.path.join('SwissProt', filename)
00186 
00187         test_handle = open(datafile)
00188         seq_record = SeqIO.read(test_handle, "swiss")
00189         test_handle.close()
00190 
00191         self.assertTrue(isinstance(seq_record, SeqRecord))
00192 
00193         self.assertEqual(seq_record.id, "P42655")
00194         self.assertEqual(seq_record.name, "143E_HUMAN")
00195         self.assertEqual(seq_record.description, "14-3-3 PROTEIN EPSILON (MITOCHONDRIAL IMPORT STIMULATION FACTOR L SUBUNIT) (PROTEIN KINASE C INHIBITOR PROTEIN-1) (KCIP-1) (14-3-3E).")
00196         self.assertEqual(repr(seq_record.seq), "Seq('MDDREDLVYQAKLAEQAERYDEMVESMKKVAGMDVELTVEERNLLSVAYKNVIG...ENQ', ProteinAlphabet())")
00197 
00198         test_handle = open(datafile)
00199         record = SwissProt.read(test_handle)
00200         test_handle.close()
00201 
00202         # test a couple of things on the record -- this is not exhaustive
00203         self.assertEqual(record.entry_name, "143E_HUMAN")
00204         self.assertEqual(record.accessions, ['P42655', 'P29360', 'Q63631'])
00205         self.assertEqual(record.organism_classification, ['EUKARYOTA', 'METAZOA', 'CHORDATA', 'VERTEBRATA', 'MAMMALIA', 'EUTHERIA', 'PRIMATES', 'CATARRHINI', 'HOMINIDAE', 'HOMO'])
00206         self.assertEqual(record.seqinfo, (255, 29174, '40A43E62'))
00207 
00208         self.assertEqual(len(record.features), 5)
00209         self.assertEqual(record.features[0], ('MOD_RES', 1, 1, 'ACETYLATION.', ''))
00210         self.assertEqual(record.features[1], ('CONFLICT', 73, 73, 'K -> T (IN REF. 8).', ''))
00211         self.assertEqual(record.features[2], ('CONFLICT', 120, 120, 'F -> S (IN REF. 8).', ''))
00212         self.assertEqual(record.features[3], ('CONFLICT', 123, 123, 'K -> Y (IN REF. 8).', ''))
00213         self.assertEqual(record.features[4], ('CONFLICT', 129, 129, 'H -> Y (IN REF. 13).', ''))
00214 
00215         self.assertEqual(len(record.references), 13)
00216         self.assertEqual(record.references[0].authors, "CONKLIN D.S., GALAKTIONOV K., BEACH D.")
00217         self.assertEqual(record.references[0].title, "14-3-3 proteins associate with cdc25 phosphatases.")
00218         self.assertEqual(len(record.references[0].references), 1)
00219         self.assertEqual(record.references[0].references[0], ('MEDLINE', '95372385'))
00220         self.assertEqual(record.references[1].authors, "LUK S.C.W., LEE C.Y., WAYE M.M.Y.")
00221         self.assertEqual(record.references[1].title, "")
00222         self.assertEqual(len(record.references[1].references), 0)
00223         self.assertEqual(record.references[2].authors, "JIN D.Y., LYU M.S., KOZAK C.A., JEANG K.T.")
00224         self.assertEqual(record.references[2].title, "Function of 14-3-3 proteins.")
00225         self.assertEqual(len(record.references[2].references), 1)
00226         self.assertEqual(record.references[2].references[0], ('MEDLINE', '96300316'))
00227         self.assertEqual(record.references[3].authors, "CHONG S.S., TANIGAMI A., ROSCHKE A.V., LEDBETTER D.H.")
00228         self.assertEqual(record.references[3].title, "14-3-3 epsilon has no homology to LIS1 and lies telomeric to it on chromosome 17p13.3 outside the Miller-Dieker syndrome chromosome region.")
00229         self.assertEqual(len(record.references[3].references), 1)
00230         self.assertEqual(record.references[3].references[0], ('MEDLINE', '97011338'))
00231         self.assertEqual(record.references[4].authors, "TANIGAMI A., CHONG S.S., LEDBETTER D.H.")
00232         self.assertEqual(record.references[4].title, "14-3-3 epsilon genomic sequence.")
00233         self.assertEqual(len(record.references[4].references), 0)
00234         self.assertEqual(record.references[5].authors, "ROSEBOOM P.H., WELLER J.L., BABILA T., AITKEN A., SELLERS L.A., MOFFET J.R., NAMBOODIRI M.A., KLEIN D.C.")
00235         self.assertEqual(record.references[5].title, "Cloning and characterization of the epsilon and zeta isoforms of the 14-3-3 proteins.")
00236         self.assertEqual(len(record.references[5].references), 1)
00237         self.assertEqual(record.references[5].references[0], ('MEDLINE', '94296566'))
00238         self.assertEqual(record.references[6].authors, "ALAM R., HACHIYA N., SAKAGUCHI M., SHUN-ICHIRO K., IWANAGA S., KITAJIMA M., MIHARA K., OMURA T.")
00239         self.assertEqual(record.references[6].title, "cDNA cloning and characterization of mitochondrial import stimulation factor (MSF) purified from rat liver cytosol.")
00240         self.assertEqual(len(record.references[6].references), 1)
00241         self.assertEqual(record.references[6].references[0], ('MEDLINE', '95122474'))
00242         self.assertEqual(record.references[7].authors, "GAO L., GU X.B., YU D.S., YU R.K., ZENG G.")
00243         self.assertEqual(record.references[7].title, "Association of a 14-3-3 protein with CMP-NeuAc:GM1 alpha 2,3- sialyltransferase.")
00244         self.assertEqual(len(record.references[7].references), 1)
00245         self.assertEqual(record.references[7].references[0], ('MEDLINE', '96280718'))
00246         self.assertEqual(record.references[8].authors, "MCCONNELL J.E., ARMSTRONG J.F., BARD J.B.")
00247         self.assertEqual(record.references[8].title, "The mouse 14-3-3 epsilon isoform, a kinase regulator whose expression pattern is modulated in mesenchyme and neuronal differentiation.")
00248         self.assertEqual(len(record.references[8].references), 1)
00249         self.assertEqual(record.references[8].references[0], ('MEDLINE', '95269876'))
00250         self.assertEqual(record.references[9].authors, "TAKIHARA Y., IRIE K., NOMURA M., MOTALEB M., MATSUMOTO K., SHIMADA K.")
00251         self.assertEqual(record.references[9].title, "")
00252         self.assertEqual(len(record.references[9].references), 0)
00253         self.assertEqual(record.references[10].authors, "JONES J.M., NIIKURA T., PINKE R.M., GUO W., MOLDAY L., LEYKAM J., MCCONNELL D.G.")
00254         self.assertEqual(record.references[10].title, "Expression of 14-3-3 proteins in bovine retinal photoreceptors.")
00255         self.assertEqual(len(record.references[10].references), 0)
00256         self.assertEqual(record.references[11].authors, "TOKER A., SELLERS L.A., AMESS B., PATEL Y., HARRIS A., AITKEN A.")
00257         self.assertEqual(record.references[11].title, "Multiple isoforms of a protein kinase C inhibitor (KCIP-1/14-3-3) from sheep brain. Amino acid sequence of phosphorylated forms.")
00258         self.assertEqual(len(record.references[11].references), 1)
00259         self.assertEqual(record.references[11].references[0], ('MEDLINE', '92283271'))
00260         self.assertEqual(record.references[12].authors, "TOKER A., ELLIS C.A., SELLERS L.A., AITKEN A.")
00261         self.assertEqual(record.references[12].title, "Protein kinase C inhibitor proteins. Purification from sheep brain and sequence similarity to lipocortins and 14-3-3 protein.")
00262         self.assertEqual(len(record.references[12].references), 1)
00263         self.assertEqual(record.references[12].references[0], ('MEDLINE', '90345949'))
00264 
00265         #Check the two parsers agree on the essentials
00266         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00267         self.assertEqual(seq_record.description, record.description)
00268         self.assertEqual(seq_record.name, record.entry_name)
00269         self.assertTrue(seq_record.id in record.accessions)
00270 
00271         #Now try using the iterator - note that all these
00272         #test cases have only one record.
00273 
00274         # With the SequenceParser
00275         test_handle = open(datafile)
00276         records = list(SeqIO.parse(test_handle, "swiss"))
00277         test_handle.close()
00278 
00279         self.assertEqual(len(records), 1)
00280         self.assertTrue(isinstance(records[0], SeqRecord))
00281 
00282         #Check matches what we got earlier without the iterator:
00283         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00284         self.assertEqual(records[0].description, seq_record.description)
00285         self.assertEqual(records[0].name, seq_record.name)
00286         self.assertEqual(records[0].id, seq_record.id)
00287 
00288         # With the RecordParser
00289         test_handle = open(datafile)
00290         records = list(SwissProt.parse(test_handle))
00291         test_handle.close()
00292 
00293         self.assertEqual(len(records), 1)
00294         self.assertTrue(isinstance(records[0], SwissProt.Record))
00295 
00296         #Check matches what we got earlier without the iterator:
00297         self.assertEqual(records[0].sequence, record.sequence)
00298         self.assertEqual(records[0].description, record.description)
00299         self.assertEqual(records[0].entry_name, record.entry_name)
00300         self.assertEqual(records[0].accessions, record.accessions)
00301 

Here is the call graph for this function:

Definition at line 302 of file test_SwissProt.py.

00302 
00303     def test_sp004(self):
00304         "Parsing SwissProt file sp004"
00305 
00306         filename = 'sp004'
00307         # test the record parser
00308 
00309         datafile = os.path.join('SwissProt', filename)
00310 
00311         test_handle = open(datafile)
00312         seq_record = SeqIO.read(test_handle, "swiss")
00313         test_handle.close()
00314 
00315         self.assertTrue(isinstance(seq_record, SeqRecord))
00316 
00317         self.assertEqual(seq_record.id, "P23082")
00318         self.assertEqual(seq_record.name, "NDOA_PSEPU")
00319         self.assertEqual(seq_record.description, "NAPHTHALENE 1,2-DIOXYGENASE SYSTEM FERREDOXIN COMPONENT.")
00320         self.assertEqual(repr(seq_record.seq), "Seq('TVKWIEAVALSDILEGDVLGVTVEGKELALYEVEGEIYATDNLCTHGSARMSDG...DLS', ProteinAlphabet())")
00321 
00322         test_handle = open(datafile)
00323         record = SwissProt.read(test_handle)
00324         test_handle.close()
00325 
00326         # test a couple of things on the record -- this is not exhaustive
00327         self.assertEqual(record.entry_name, "NDOA_PSEPU")
00328         self.assertEqual(record.accessions, ['P23082', 'Q52123', 'O07829'])
00329         self.assertEqual(record.organism_classification, ['Bacteria', 'Proteobacteria', 'gamma subdivision', 'Pseudomonas group', 'Pseudomonas'])
00330         self.assertEqual(record.seqinfo, (103, 11315, '9F91B3C8'))
00331     
00332         self.assertEqual(len(record.features), 12)
00333         self.assertEqual(record.features[0], ('INIT_MET', 0, 0, '', ''))
00334         self.assertEqual(record.features[1], ('METAL', 44, 44, 'IRON-SULFUR (2FE-2S) (POTENTIAL).', ''))
00335         self.assertEqual(record.features[2], ('METAL', 46, 46, 'IRON-SULFUR (2FE-2S) (POTENTIAL).', ''))
00336         self.assertEqual(record.features[3], ('METAL', 63, 63, 'IRON-SULFUR (2FE-2S) (POTENTIAL).', ''))
00337         self.assertEqual(record.features[4], ('METAL', 66, 66, 'IRON-SULFUR (2FE-2S) (POTENTIAL).', ''))
00338         self.assertEqual(record.features[5], ('VARIANT', 2, 2, 'V -> E (IN STRAIN G7).', ''))
00339         self.assertEqual(record.features[6], ('VARIANT', 14, 14, 'L -> P (IN STRAIN G7).', ''))
00340         self.assertEqual(record.features[7], ('VARIANT', 48, 48, 'S -> A (IN STRAIN G7).', ''))
00341         self.assertEqual(record.features[8], ('VARIANT', 76, 76, 'K -> R (IN STRAIN G7).', ''))
00342         self.assertEqual(record.features[9], ('VARIANT', 84, 84, 'Q -> E (IN STRAIN G7).', ''))
00343         self.assertEqual(record.features[10], ('VARIANT', 90, 90, 'P -> A (IN STRAIN G7).', ''))
00344         self.assertEqual(record.features[11], ('VARIANT', 103, 103, 'S -> GEF (IN STRAIN G7).', ''))
00345 
00346         self.assertEqual(len(record.references), 4) 
00347         self.assertEqual(record.references[0].authors, "KURKELA S., LEHVAESLAIHO H., PALVA E.T., TEERI T.H.")
00348         self.assertEqual(record.references[0].title, "Cloning, nucleotide sequence and characterization of genes encoding naphthalene dioxygenase of Pseudomonas putida strain NCIB9816.")
00349         self.assertEqual(len(record.references[0].references), 1)
00350         self.assertEqual(record.references[0].references[0], ('MEDLINE', '89211973'))
00351         self.assertEqual(record.references[1].authors, "SIMON M.J., OSSLUND T.D., SAUNDERS R., ENSLEY B.D., SUGGS S., HARCOURT A.A., SUEN W.-C., CRUDEN D.L., GIBSON D.T., ZYLSTRA G.J.")
00352         self.assertEqual(record.references[1].title, "Sequences of genes encoding naphthalene dioxygenase in Pseudomonas putida strains G7 and NCIB 9816-4.")
00353         self.assertEqual(len(record.references[1].references), 1)
00354         self.assertEqual(record.references[1].references[0], ('MEDLINE', '93252277'))
00355         self.assertEqual(record.references[2].authors, "DENOME S.A., STANLEY D.C., OLSON E.S., YOUNG K.D.")
00356         self.assertEqual(record.references[2].title, "Metabolism of dibenzothiophene and naphthalene in Pseudomonas strains: complete DNA sequence of an upper naphthalene catabolic pathway.")
00357         self.assertEqual(len(record.references[2].references), 1)
00358         self.assertEqual(record.references[2].references[0], ('MEDLINE', '94042852'))
00359         self.assertEqual(record.references[3].authors, "HAMANN C.")
00360         self.assertEqual(record.references[3].title, "")
00361         self.assertEqual(len(record.references[3].references), 0)
00362 
00363         #Check the two parsers agree on the essentials
00364         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00365         self.assertEqual(seq_record.description, record.description)
00366         self.assertEqual(seq_record.name, record.entry_name)
00367         self.assertTrue(seq_record.id in record.accessions)
00368 
00369         #Now try using the iterator - note that all these
00370         #test cases have only one record.
00371 
00372         # With the SequenceParser
00373         test_handle = open(datafile)
00374         records = list(SeqIO.parse(test_handle, "swiss"))
00375         test_handle.close()
00376 
00377         self.assertEqual(len(records), 1)
00378         self.assertTrue(isinstance(records[0], SeqRecord))
00379 
00380         #Check matches what we got earlier without the iterator:
00381         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00382         self.assertEqual(records[0].description, seq_record.description)
00383         self.assertEqual(records[0].name, seq_record.name)
00384         self.assertEqual(records[0].id, seq_record.id)
00385 
00386         # With the RecordParser
00387         test_handle = open(datafile)
00388         records = list(SwissProt.parse(test_handle))
00389         test_handle.close()
00390 
00391         self.assertEqual(len(records), 1)
00392         self.assertTrue(isinstance(records[0], SwissProt.Record))
00393 
00394         #Check matches what we got earlier without the iterator:
00395         self.assertEqual(records[0].sequence, record.sequence)
00396         self.assertEqual(records[0].description, record.description)
00397         self.assertEqual(records[0].entry_name, record.entry_name)
00398         self.assertEqual(records[0].accessions, record.accessions)

Here is the call graph for this function:

Definition at line 399 of file test_SwissProt.py.

00399 
00400     def test_sp005(self):
00401         "Parsing SwissProt file sp005"
00402 
00403         filename = 'sp005'
00404         # test the record parser
00405 
00406         datafile = os.path.join('SwissProt', filename)
00407 
00408         test_handle = open(datafile)
00409         seq_record = SeqIO.read(test_handle, "swiss")
00410         test_handle.close()
00411 
00412         self.assertTrue(isinstance(seq_record, SeqRecord))
00413 
00414         self.assertEqual(seq_record.id, "P24973")
00415         self.assertEqual(seq_record.name, "NU3M_BALPH")
00416         self.assertEqual(seq_record.description, "NADH-UBIQUINONE OXIDOREDUCTASE CHAIN 3 (EC 1.6.5.3).")
00417         self.assertEqual(repr(seq_record.seq), "Seq('MNLLLTLLTNTTLALLLVFIAFWLPQLNVYAEKTSPYECGFDPMGSARLPFSMK...WAE', ProteinAlphabet())")
00418 
00419         test_handle = open(datafile)
00420         record = SwissProt.read(test_handle)
00421         test_handle.close()
00422 
00423         # test a couple of things on the record -- this is not exhaustive
00424         self.assertEqual(record.entry_name, "NU3M_BALPH")
00425         self.assertEqual(record.accessions, ['P24973'])
00426         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Cetartiodactyla', 'Cetacea', 'Mysticeti', 'Balaenopteridae', 'Balaenoptera'])
00427         self.assertEqual(record.seqinfo, (115, 13022, 'ACF02965'))
00428     
00429         self.assertEqual(len(record.features), 0)
00430 
00431         self.assertEqual(len(record.references), 2)
00432         self.assertEqual(record.references[0].authors, "ARNASON U., GULLBERG A., WIDEGREN B.")
00433         self.assertEqual(record.references[0].title, "The complete nucleotide sequence of the mitochondrial DNA of the fin whale, Balaenoptera physalus.")
00434         self.assertEqual(len(record.references[0].references), 1)
00435         self.assertEqual(record.references[0].references[0], ('MEDLINE', '92139449'))
00436         self.assertEqual(record.references[1].authors, "ARNASON U., GULLBERG A.")
00437         self.assertEqual(record.references[1].title, "Comparison between the complete mtDNA sequences of the blue and the fin whale, two species that can hybridize in nature.")
00438         self.assertEqual(len(record.references[1].references), 1)
00439         self.assertEqual(record.references[1].references[0], ('MEDLINE', '94141932'))
00440 
00441         #Check the two parsers agree on the essentials
00442         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00443         self.assertEqual(seq_record.description, record.description)
00444         self.assertEqual(seq_record.name, record.entry_name)
00445         self.assertTrue(seq_record.id in record.accessions)
00446 
00447         #Now try using the iterator - note that all these
00448         #test cases have only one record.
00449 
00450         # With the SequenceParser
00451         test_handle = open(datafile)
00452         records = list(SeqIO.parse(test_handle, "swiss"))
00453         test_handle.close()
00454 
00455         self.assertEqual(len(records), 1)
00456         self.assertTrue(isinstance(records[0], SeqRecord))
00457 
00458         #Check matches what we got earlier without the iterator:
00459         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00460         self.assertEqual(records[0].description, seq_record.description)
00461         self.assertEqual(records[0].name, seq_record.name)
00462         self.assertEqual(records[0].id, seq_record.id)
00463 
00464         # With the RecordParser
00465         test_handle = open(datafile)
00466         records = list(SwissProt.parse(test_handle))
00467         test_handle.close()
00468 
00469         self.assertEqual(len(records), 1)
00470         self.assertTrue(isinstance(records[0], SwissProt.Record))
00471 
00472         #Check matches what we got earlier without the iterator:
00473         self.assertEqual(records[0].sequence, record.sequence)
00474         self.assertEqual(records[0].description, record.description)
00475         self.assertEqual(records[0].entry_name, record.entry_name)
00476         self.assertEqual(records[0].accessions, record.accessions)
00477 

Here is the call graph for this function:

Definition at line 478 of file test_SwissProt.py.

00478 
00479     def test_sp006(self):
00480         "Parsing SwissProt file sp006"
00481 
00482         filename = 'sp006'
00483         # test the record parser
00484 
00485         datafile = os.path.join('SwissProt', filename)
00486 
00487         test_handle = open(datafile)
00488         seq_record = SeqIO.read(test_handle, "swiss")
00489         test_handle.close()
00490 
00491         self.assertTrue(isinstance(seq_record, SeqRecord))
00492 
00493         self.assertEqual(seq_record.id, "P39896")
00494         self.assertEqual(seq_record.name, "TCMO_STRGA")
00495         self.assertEqual(seq_record.description, "TETRACENOMYCIN POLYKETIDE SYNTHESIS 8-O-METHYL TRANSFERASE TCMO (EC 2.1.1.-).")
00496         self.assertEqual(repr(seq_record.seq), "Seq('MTPHTHVRGPGDILQLTMAFYGSRALISAVELDLFTLLAGKPLPLGELCERAGI...KPR', ProteinAlphabet())")
00497 
00498         test_handle = open(datafile)
00499         record = SwissProt.read(test_handle)
00500         test_handle.close()
00501 
00502         # test a couple of things on the record -- this is not exhaustive
00503         self.assertEqual(record.entry_name, "TCMO_STRGA")
00504         self.assertEqual(record.accessions, ['P39896'])
00505         self.assertEqual(record.organism_classification, ['BACTERIA', 'FIRMICUTES', 'ACTINOBACTERIA', 'ACTINOBACTERIDAE', 'ACTINOMYCETALES', 'STREPTOMYCINEAE', 'STREPTOMYCETACEAE', 'STREPTOMYCES'])
00506         self.assertEqual(record.seqinfo, (339, 37035, '848B7337'))
00507     
00508         self.assertEqual(len(record.features), 0)
00509 
00510         self.assertEqual(len(record.references), 1)
00511         self.assertEqual(record.references[0].authors, "SUMMERS R.G., WENDT-PIENKOWSKI E., MOTAMEDI H., HUTCHINSON C.R.")
00512         self.assertEqual(record.references[0].title, "Nucleotide sequence of the tcmII-tcmIV region of the tetracenomycin C biosynthetic gene cluster of Streptomyces glaucescens and evidence that the tcmN gene encodes a multifunctional cyclase-dehydratase-O-methyl transferase.")
00513         self.assertEqual(len(record.references[0].references), 1)
00514         self.assertEqual(record.references[0].references[0], ('MEDLINE', '92193265'))
00515 
00516         #Check the two parsers agree on the essentials
00517         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00518         self.assertEqual(seq_record.description, record.description)
00519         self.assertEqual(seq_record.name, record.entry_name)
00520         self.assertTrue(seq_record.id in record.accessions)
00521 
00522         #Now try using the iterator - note that all these
00523         #test cases have only one record.
00524 
00525         # With the SequenceParser
00526         test_handle = open(datafile)
00527         records = list(SeqIO.parse(test_handle, "swiss"))
00528         test_handle.close()
00529 
00530         self.assertEqual(len(records), 1)
00531         self.assertTrue(isinstance(records[0], SeqRecord))
00532 
00533         #Check matches what we got earlier without the iterator:
00534         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00535         self.assertEqual(records[0].description, seq_record.description)
00536         self.assertEqual(records[0].name, seq_record.name)
00537         self.assertEqual(records[0].id, seq_record.id)
00538 
00539         # With the RecordParser
00540         test_handle = open(datafile)
00541         records = list(SwissProt.parse(test_handle))
00542         test_handle.close()
00543 
00544         self.assertEqual(len(records), 1)
00545         self.assertTrue(isinstance(records[0], SwissProt.Record))
00546 
00547         #Check matches what we got earlier without the iterator:
00548         self.assertEqual(records[0].sequence, record.sequence)
00549         self.assertEqual(records[0].description, record.description)
00550         self.assertEqual(records[0].entry_name, record.entry_name)
00551         self.assertEqual(records[0].accessions, record.accessions)
00552 

Here is the call graph for this function:

Definition at line 553 of file test_SwissProt.py.

00553 
00554     def test_sp007(self):
00555         "Parsing SwissProt file sp007"
00556 
00557         filename = 'sp007'
00558         # test the record parser
00559 
00560         datafile = os.path.join('SwissProt', filename)
00561 
00562         test_handle = open(datafile)
00563         seq_record = SeqIO.read(test_handle, "swiss")
00564         test_handle.close()
00565 
00566         self.assertTrue(isinstance(seq_record, SeqRecord))
00567 
00568         self.assertEqual(seq_record.id, "O95832")
00569         self.assertEqual(seq_record.name, "CLD1_HUMAN")
00570         self.assertEqual(seq_record.description, "CLAUDIN-1 (SENESCENCE-ASSOCIATED EPITHELIAL MEMBRANE PROTEIN).")
00571         self.assertEqual(repr(seq_record.seq), "Seq('MANAGLQLLGFILAFLGWIGAIVSTALPQWRIYSYAGDNIVTAQAMYEGLWMSC...DYV', ProteinAlphabet())")
00572 
00573         test_handle = open(datafile)
00574         record = SwissProt.read(test_handle)
00575         test_handle.close()
00576 
00577         # test a couple of things on the record -- this is not exhaustive
00578         self.assertEqual(record.entry_name, "CLD1_HUMAN")
00579         self.assertEqual(record.accessions, ['O95832'])
00580         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
00581         self.assertEqual(record.seqinfo, (211, 22744, '07269000E6C214F0'))
00582     
00583         self.assertEqual(len(record.features), 6)
00584         self.assertEqual(record.features[0], ('TRANSMEM', 8, 28, 'POTENTIAL.', ''))
00585         self.assertEqual(record.features[1], ('TRANSMEM', 82, 102, 'POTENTIAL.', ''))
00586         self.assertEqual(record.features[2], ('TRANSMEM', 116, 136, 'POTENTIAL.', ''))
00587         self.assertEqual(record.features[3], ('TRANSMEM', 164, 184, 'POTENTIAL.', ''))
00588         self.assertEqual(record.features[4], ('CONFLICT', 62, 62, 'I -> V (IN REF. 2).', ''))
00589         self.assertEqual(record.features[5], ('CONFLICT', 135, 135, 'V -> A (IN REF. 2).', ''))
00590 
00591         self.assertEqual(len(record.references), 2)
00592         self.assertEqual(record.references[0].authors, "Swisshelm K.L., Machl A., Planitzer S., Robertson R., Kubbies M., Hosier S.")
00593         self.assertEqual(record.references[0].title, "SEMP1, a senescence-associated cDNA isolated from human mammary epithelial cells, is a member of an epithelial membrane protein superfamily.")
00594         self.assertEqual(len(record.references[0].references), 1)
00595         self.assertEqual(record.references[0].references[0], ('MEDLINE', '99132301'))
00596         self.assertEqual(record.references[1].authors, "Mitic L.M., Anderson J.M.")
00597         self.assertEqual(record.references[1].title, "Human claudin-1 isolated from Caco-2 mRNA.")
00598         self.assertEqual(len(record.references[1].references), 0)
00599 
00600         #Check the two parsers agree on the essentials
00601         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00602         self.assertEqual(seq_record.description, record.description)
00603         self.assertEqual(seq_record.name, record.entry_name)
00604         self.assertTrue(seq_record.id in record.accessions)
00605 
00606         #Now try using the iterator - note that all these
00607         #test cases have only one record.
00608 
00609         # With the SequenceParser
00610         test_handle = open(datafile)
00611         records = list(SeqIO.parse(test_handle, "swiss"))
00612         test_handle.close()
00613 
00614         self.assertEqual(len(records), 1)
00615         self.assertTrue(isinstance(records[0], SeqRecord))
00616 
00617         #Check matches what we got earlier without the iterator:
00618         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00619         self.assertEqual(records[0].description, seq_record.description)
00620         self.assertEqual(records[0].name, seq_record.name)
00621         self.assertEqual(records[0].id, seq_record.id)
00622 
00623         # With the RecordParser
00624         test_handle = open(datafile)
00625         records = list(SwissProt.parse(test_handle))
00626         test_handle.close()
00627 
00628         self.assertEqual(len(records), 1)
00629         self.assertTrue(isinstance(records[0], SwissProt.Record))
00630 
00631         #Check matches what we got earlier without the iterator:
00632         self.assertEqual(records[0].sequence, record.sequence)
00633         self.assertEqual(records[0].description, record.description)
00634         self.assertEqual(records[0].entry_name, record.entry_name)
00635         self.assertEqual(records[0].accessions, record.accessions)
00636 

Here is the call graph for this function:

Definition at line 637 of file test_SwissProt.py.

00637 
00638     def test_sp008(self):
00639         "Parsing SwissProt file sp008"
00640 
00641         filename = 'sp008'
00642         # test the record parser
00643 
00644         datafile = os.path.join('SwissProt', filename)
00645 
00646         test_handle = open(datafile)
00647         seq_record = SeqIO.read(test_handle, "swiss")
00648         test_handle.close()
00649 
00650         self.assertTrue(isinstance(seq_record, SeqRecord))
00651 
00652         self.assertEqual(seq_record.id, "P01892")
00653         self.assertEqual(seq_record.name, "1A02_HUMAN")
00654         self.assertEqual(seq_record.description, "HLA CLASS I HISTOCOMPATIBILITY ANTIGEN, A-2 ALPHA CHAIN PRECURSOR.")
00655         self.assertEqual(repr(seq_record.seq), "Seq('MAVMAPRTLVLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDD...CKV', ProteinAlphabet())")
00656 
00657         test_handle = open(datafile)
00658         record = SwissProt.read(test_handle)
00659         test_handle.close()
00660 
00661         # test a couple of things on the record -- this is not exhaustive
00662         self.assertEqual(record.entry_name, "1A02_HUMAN")
00663         self.assertEqual(record.accessions, ['P01892', 'P06338', 'P30514', 'P30444', 'P30445', 'P30446', 'Q29680', 'Q29899', 'Q95352', 'Q29837', 'Q95380'])
00664         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
00665         self.assertEqual(record.seqinfo, (365, 40922, 'B54A97B24B337C08'))
00666     
00667         self.assertEqual(len(record.features), 71)
00668         self.assertEqual(record.features[0], ('SIGNAL', 1, 24, '', ''))
00669         self.assertEqual(record.features[1], ('CHAIN', 25, 365, 'HLA CLASS I HISTOCOMPATIBILITY ANTIGEN, A-2 ALPHA CHAIN.', ''))
00670         self.assertEqual(record.features[2], ('DOMAIN', 25, 114, 'EXTRACELLULAR ALPHA-1.', ''))
00671         self.assertEqual(record.features[3], ('DOMAIN', 115, 206, 'EXTRACELLULAR ALPHA-2.', ''))
00672         self.assertEqual(record.features[4], ('DOMAIN', 207, 298, 'EXTRACELLULAR ALPHA-3.', ''))
00673         self.assertEqual(record.features[5], ('DOMAIN', 299, 308, 'CONNECTING PEPTIDE.', ''))
00674         self.assertEqual(record.features[6], ('TRANSMEM', 309, 332, '', ''))
00675         self.assertEqual(record.features[7], ('DOMAIN', 333, 365, 'CYTOPLASMIC TAIL.', ''))
00676         self.assertEqual(record.features[8], ('CARBOHYD', 110, 110, '', ''))
00677         self.assertEqual(record.features[9], ('DISULFID', 125, 188, '', ''))
00678         self.assertEqual(record.features[10], ('DISULFID', 227, 283, '', ''))
00679         self.assertEqual(record.features[11], ('STRAND', 27, 36, '', ''))
00680         self.assertEqual(record.features[12], ('STRAND', 45, 52, '', ''))
00681         self.assertEqual(record.features[13], ('TURN', 53, 54, '', ''))
00682         self.assertEqual(record.features[14], ('STRAND', 55, 61, '', ''))
00683         self.assertEqual(record.features[15], ('TURN', 62, 63, '', ''))
00684         self.assertEqual(record.features[16], ('STRAND', 70, 71, '', ''))
00685         self.assertEqual(record.features[17], ('HELIX', 74, 76, '', ''))
00686         self.assertEqual(record.features[18], ('TURN', 77, 78, '', ''))
00687         self.assertEqual(record.features[19], ('HELIX', 81, 108, '', ''))
00688         self.assertEqual(record.features[20], ('TURN', 109, 110, '', ''))
00689         self.assertEqual(record.features[21], ('TURN', 113, 114, '', ''))
00690         self.assertEqual(record.features[22], ('STRAND', 118, 127, '', ''))
00691         self.assertEqual(record.features[23], ('TURN', 129, 130, '', ''))
00692         self.assertEqual(record.features[24], ('STRAND', 133, 142, '', ''))
00693         self.assertEqual(record.features[25], ('TURN', 143, 144, '', ''))
00694         self.assertEqual(record.features[26], ('STRAND', 145, 150, '', ''))
00695         self.assertEqual(record.features[27], ('TURN', 152, 153, '', ''))
00696         self.assertEqual(record.features[28], ('STRAND', 157, 159, '', ''))
00697         self.assertEqual(record.features[29], ('TURN', 163, 163, '', ''))
00698         self.assertEqual(record.features[30], ('HELIX', 164, 173, '', ''))
00699         self.assertEqual(record.features[31], ('TURN', 174, 175, '', ''))
00700         self.assertEqual(record.features[32], ('HELIX', 176, 185, '', ''))
00701         self.assertEqual(record.features[33], ('TURN', 186, 186, '', ''))
00702         self.assertEqual(record.features[34], ('HELIX', 187, 198, '', ''))
00703         self.assertEqual(record.features[35], ('TURN', 199, 199, '', ''))
00704         self.assertEqual(record.features[36], ('HELIX', 200, 203, '', ''))
00705         self.assertEqual(record.features[37], ('TURN', 204, 204, '', ''))
00706         self.assertEqual(record.features[38], ('STRAND', 207, 207, '', ''))
00707         self.assertEqual(record.features[39], ('STRAND', 210, 219, '', ''))
00708         self.assertEqual(record.features[40], ('TURN', 220, 221, '', ''))
00709         self.assertEqual(record.features[41], ('STRAND', 222, 233, '', ''))
00710         self.assertEqual(record.features[42], ('STRAND', 238, 243, '', ''))
00711         self.assertEqual(record.features[43], ('TURN', 244, 245, '', ''))
00712         self.assertEqual(record.features[44], ('STRAND', 246, 247, '', ''))
00713         self.assertEqual(record.features[45], ('HELIX', 249, 251, '', ''))
00714         self.assertEqual(record.features[46], ('STRAND', 253, 254, '', ''))
00715         self.assertEqual(record.features[47], ('STRAND', 258, 259, '', ''))
00716         self.assertEqual(record.features[48], ('STRAND', 265, 274, '', ''))
00717         self.assertEqual(record.features[49], ('TURN', 275, 276, '', ''))
00718         self.assertEqual(record.features[50], ('HELIX', 278, 280, '', ''))
00719         self.assertEqual(record.features[51], ('STRAND', 281, 286, '', ''))
00720         self.assertEqual(record.features[52], ('TURN', 288, 289, '', ''))
00721         self.assertEqual(record.features[53], ('STRAND', 294, 297, '', ''))
00722         self.assertEqual(record.features[54], ('VARIANT', 33, 33, 'F -> Y (IN A*0205, A*0206, A*0208, A*0210 AND A*0221).', 'VAR_004334'))
00723         self.assertEqual(record.features[55], ('VARIANT', 54, 54, 'D -> N (IN A*0221).', 'VAR_004335'))
00724         self.assertEqual(record.features[56], ('VARIANT', 67, 67, 'Q -> R (IN A*0202, A*0205, AND A*0208).', 'VAR_004336'))
00725         self.assertEqual(record.features[57], ('VARIANT', 90, 90, 'K -> N (IN A*0208 AND A*0220).', 'VAR_004337'))
00726         self.assertEqual(record.features[58], ('VARIANT', 97, 98, 'TH -> ID (IN A*0211).', 'VAR_004338'))
00727         self.assertEqual(record.features[59], ('VARIANT', 119, 119, 'V -> L (IN A*0202, A*0205, A*0208 AND A*0217).', 'VAR_004339'))
00728         self.assertEqual(record.features[60], ('VARIANT', 121, 121, 'R -> M (IN A*0204 AND A*0217).', 'VAR_004340'))
00729         self.assertEqual(record.features[61], ('VARIANT', 123, 123, 'Y -> C (IN A*0207 AND A*0218).', 'VAR_004341'))
00730         self.assertEqual(record.features[62], ('VARIANT', 123, 123, 'Y -> F (IN A*0210 AND A*0217).', 'VAR_004342'))
00731         self.assertEqual(record.features[63], ('VARIANT', 131, 131, 'W -> G (IN A*0210).', 'VAR_004343'))
00732         self.assertEqual(record.features[64], ('VARIANT', 162, 162, 'M -> K (IN A*0218).', 'VAR_004344'))
00733         self.assertEqual(record.features[65], ('VARIANT', 173, 173, 'A -> T (IN A*0203).', 'VAR_004345'))
00734         self.assertEqual(record.features[66], ('VARIANT', 176, 176, 'V -> E (IN A*0203 AND A*0213).', 'VAR_004346'))
00735         self.assertEqual(record.features[67], ('VARIANT', 180, 180, 'L -> W (IN A*0202, A*0203, A*0205 AND A*0208).', 'VAR_004347'))
00736         self.assertEqual(record.features[68], ('VARIANT', 180, 180, 'L -> Q (IN A*0212 AND A*0213).', 'VAR_004348'))
00737         self.assertEqual(record.features[69], ('VARIANT', 187, 187, 'T -> E (IN A*0216).', 'VAR_004349'))
00738         self.assertEqual(record.features[70], ('VARIANT', 260, 260, 'A -> E (IN A*0209).', 'VAR_004350'))
00739 
00740         self.assertEqual(len(record.references), 27)
00741         self.assertEqual(record.references[0].authors, "Koller B.H., Orr H.T.")
00742         self.assertEqual(record.references[0].title, "Cloning and complete sequence of an HLA-A2 gene: analysis of two HLA-A alleles at the nucleotide level.")
00743         self.assertEqual(len(record.references[0].references), 1)
00744         self.assertEqual(record.references[0].references[0], ('MEDLINE', '85132727'))
00745         self.assertEqual(record.references[1].authors, "Cianetti L., Testa U., Scotto L., la Valle R., Simeone A., Boccoli G., Giannella G., Peschle C., Boncinelli E.")
00746         self.assertEqual(record.references[1].title, "Three new class I HLA alleles: structure of mRNAs and alternative mechanisms of processing.")
00747         self.assertEqual(len(record.references[1].references), 1)
00748         self.assertEqual(record.references[1].references[0], ('MEDLINE', '89122144'))
00749         self.assertEqual(record.references[2].authors, "Ennis P.D., Zemmour J., Salter R.D., Parham P.")
00750         self.assertEqual(record.references[2].title, "Rapid cloning of HLA-A,B cDNA by using the polymerase chain reaction: frequency and nature of errors produced in amplification.")
00751         self.assertEqual(len(record.references[2].references), 1)
00752         self.assertEqual(record.references[2].references[0], ('MEDLINE', '90207291'))
00753         self.assertEqual(record.references[3].authors, "Belich M.P., Madrigal J.A., Hildebrand W.H., Zemmour J., Williams R.C., Luz R., Petzl-Erler M.L., Parham P.")
00754         self.assertEqual(record.references[3].title, "Unusual HLA-B alleles in two tribes of Brazilian Indians.")
00755         self.assertEqual(len(record.references[3].references), 1)
00756         self.assertEqual(record.references[3].references[0], ('MEDLINE', '92269955'))
00757         self.assertEqual(record.references[4].authors, "Krangel M.S.")
00758         self.assertEqual(record.references[4].title, "Unusual RNA splicing generates a secreted form of HLA-A2 in a mutagenized B lymphoblastoid cell line.")
00759         self.assertEqual(len(record.references[4].references), 1)
00760         self.assertEqual(record.references[4].references[0], ('MEDLINE', '85230571'))
00761         self.assertEqual(record.references[5].authors, "Orr H.T., Lopez de Castro J.A., Parham P., Ploegh H.L., Strominger J.L.")
00762         self.assertEqual(record.references[5].title, "Comparison of amino acid sequences of two human histocompatibility antigens, HLA-A2 and HLA-B7: location of putative alloantigenic sites.")
00763         self.assertEqual(len(record.references[5].references), 1)
00764         self.assertEqual(record.references[5].references[0], ('MEDLINE', '80056745'))
00765         self.assertEqual(record.references[6].authors, "Lopez de Castro J.A., Strominger J.L., Strong D.M., Orr H.T.")
00766         self.assertEqual(record.references[6].title, "Structure of crossreactive human histocompatibility antigens HLA-A28 and HLA-A2: possible implications for the generation of HLA polymorphism.")
00767         self.assertEqual(len(record.references[6].references), 1)
00768         self.assertEqual(record.references[6].references[0], ('MEDLINE', '82247941'))
00769         self.assertEqual(record.references[7].authors, "Mattson D.H., Handy D.E., Bradley D.A., Coligan J.E., Cowan E.P., Biddison W.E.")
00770         self.assertEqual(record.references[7].title, "DNA sequences of the genes that encode the CTL-defined HLA-A2 variants M7 and DK1.")
00771         self.assertEqual(len(record.references[7].references), 1)
00772         self.assertEqual(record.references[7].references[0], ('MEDLINE', '87306734'))
00773         self.assertEqual(record.references[8].authors, "Holmes N., Ennis P., Wan A.M., Denney D.W., Parham P.")
00774         self.assertEqual(record.references[8].title, "Multiple genetic mechanisms have contributed to the generation of the HLA-A2/A28 family of class I MHC molecules.")
00775         self.assertEqual(len(record.references[8].references), 1)
00776         self.assertEqual(record.references[8].references[0], ('MEDLINE', '87252273'))
00777         self.assertEqual(record.references[9].authors, "Domena J.D.")
00778         self.assertEqual(record.references[9].title, "")
00779         self.assertEqual(len(record.references[9].references), 0)
00780         self.assertEqual(record.references[10].authors, "Castano A.R., Lopez de Castro J.A.")
00781         self.assertEqual(record.references[10].title, "Structure of the HLA-A*0204 antigen, found in South American Indians. Spatial clustering of HLA-A2 subtype polymorphism.")
00782         self.assertEqual(len(record.references[10].references), 1)
00783         self.assertEqual(record.references[10].references[0], ('MEDLINE', '92039809'))
00784         self.assertEqual(record.references[11].authors, "Watkins D.I., McAdam S.N., Liu X., Stang C.R., Milford E.L., Levine C.G., Garber T.L., Dogon A.L., Lord C.I., Ghim S.H., Troup G.M., Hughes A.L., Letvin N.L.")
00785         self.assertEqual(record.references[11].title, "New recombinant HLA-B alleles in a tribe of South American Amerindians indicate rapid evolution of MHC class I loci.")
00786         self.assertEqual(len(record.references[11].references), 1)
00787         self.assertEqual(record.references[11].references[0], ('MEDLINE', '92269956'))
00788         self.assertEqual(record.references[12].authors, "Parham P., Lawlor D.A., Lomen C.E., Ennis P.D.")
00789         self.assertEqual(record.references[12].title, "Diversity and diversification of HLA-A,B,C alleles.")
00790         self.assertEqual(len(record.references[12].references), 1)
00791         self.assertEqual(record.references[12].references[0], ('MEDLINE', '89235215'))
00792         self.assertEqual(record.references[13].authors, "Ezquerra A., Domenech N., van der Poel J., Strominger J.L., Vega M.A., Lopez de Castro J.A.")
00793         self.assertEqual(record.references[13].title, "Molecular analysis of an HLA-A2 functional variant CLA defined by cytolytic T lymphocytes.")
00794         self.assertEqual(len(record.references[13].references), 1)
00795         self.assertEqual(record.references[13].references[0], ('MEDLINE', '86305811'))
00796         self.assertEqual(record.references[14].authors, "Domenech N., Ezquerra A., Castano R., Lopez de Castro J.A.")
00797         self.assertEqual(record.references[14].title, "Structural analysis of HLA-A2.4 functional variant KNE. Implications for the mapping of HLA-A2-specific T-cell epitopes.")
00798         self.assertEqual(len(record.references[14].references), 1)
00799         self.assertEqual(record.references[14].references[0], ('MEDLINE', '88113844'))
00800         self.assertEqual(record.references[15].authors, "Domenech N., Castano R., Goulmy E., Lopez de Castro J.A.")
00801         self.assertEqual(record.references[15].title, "Molecular analysis of HLA-A2.4 functional variant KLO: close structural and evolutionary relatedness to the HLA-A2.2 subtype.")
00802         self.assertEqual(len(record.references[15].references), 1)
00803         self.assertEqual(record.references[15].references[0], ('MEDLINE', '88314183'))
00804         self.assertEqual(record.references[16].authors, "Castano R., Ezquerra A., Domenech N., Lopez de Castro J.A.")
00805         self.assertEqual(record.references[16].title, "An HLA-A2 population variant with structural polymorphism in the alpha 3 region.")
00806         self.assertEqual(len(record.references[16].references), 1)
00807         self.assertEqual(record.references[16].references[0], ('MEDLINE', '88186100'))
00808         self.assertEqual(record.references[17].authors, "Epstein H., Kennedy L., Holmes N.")
00809         self.assertEqual(record.references[17].title, "An Oriental HLA-A2 subtype is closely related to a subset of Caucasoid HLA-A2 alleles.")
00810         self.assertEqual(len(record.references[17].references), 1)
00811         self.assertEqual(record.references[17].references[0], ('MEDLINE', '89122133'))
00812         self.assertEqual(record.references[18].authors, "Castano A.R., Lopez de Castro J.A.")
00813         self.assertEqual(record.references[18].title, "Structure of the HLA-A*0211 (A2.5) subtype: further evidence for selection-driven diversification of HLA-A2 antigens.")
00814         self.assertEqual(len(record.references[18].references), 1)
00815         self.assertEqual(record.references[18].references[0], ('MEDLINE', '92218010'))
00816         self.assertEqual(record.references[19].authors, "Barber D.F., Fernandez J.M., Lopez de Castro J.A.")
00817         self.assertEqual(record.references[19].title, "Primary structure of a new HLA-A2 subtype: HLA-A*0213.")
00818         self.assertEqual(len(record.references[19].references), 1)
00819         self.assertEqual(record.references[19].references[0], ('MEDLINE', '94222455'))
00820         self.assertEqual(record.references[20].authors, "Barouch D., Krausa P., Bodmer J., Browning M.J., McMichael A.J.")
00821         self.assertEqual(record.references[20].title, "Identification of a novel HLA-A2 subtype, HLA-A*0216.")
00822         self.assertEqual(len(record.references[20].references), 1)
00823         self.assertEqual(record.references[20].references[0], ('MEDLINE', '95278976'))
00824         self.assertEqual(record.references[21].authors, "Selvakumar A., Granja C.B., Salazar M., Alosco S.M., Yunis E.J., Dupont B.")
00825         self.assertEqual(record.references[21].title, "A novel subtype of A2 (A*0217) isolated from the South American Indian B-cell line AMALA.")
00826         self.assertEqual(len(record.references[21].references), 1)
00827         self.assertEqual(record.references[21].references[0], ('MEDLINE', '95381236'))
00828         self.assertEqual(record.references[22].authors, "Kashiwase K., Tokunaga K., Ishikawa Y., Oohashi H., Hashimoto M., Akaza T., Tadokoro K., Juji T.")
00829         self.assertEqual(record.references[22].title, "A new A2 sequence HLA-A2K from Japanese.")
00830         self.assertEqual(len(record.references[22].references), 0)
00831         self.assertEqual(record.references[23].authors, "Fleischhauer K., Zino E., Mazzi B., Severini G.M., Benazzi E., Bordignon C.")
00832         self.assertEqual(record.references[23].title, "HLA-A*02 subtype distribution in Caucasians from northern Italy: identification of A*0220.")
00833         self.assertEqual(len(record.references[23].references), 1)
00834         self.assertEqual(record.references[23].references[0], ('MEDLINE', '97161038'))
00835         self.assertEqual(record.references[24].authors, "Szmania S., Baxter-Lowe L.A.")
00836         self.assertEqual(record.references[24].title, "Nucleotide sequence of a novel HLA-A2 gene.")
00837         self.assertEqual(len(record.references[24].references), 0)
00838         self.assertEqual(record.references[25].authors, "Bjorkman P.J., Saper M.A., Samraoui B., Bennett W.S., Strominger J.L., Wiley D.C.")
00839         self.assertEqual(record.references[25].title, "Structure of the human class I histocompatibility antigen, HLA-A2.")
00840         self.assertEqual(len(record.references[25].references), 1)
00841         self.assertEqual(record.references[25].references[0], ('MEDLINE', '88014204'))
00842         self.assertEqual(record.references[26].authors, "Saper M.A., Bjorkman P.J., Wiley D.C.")
00843         self.assertEqual(record.references[26].title, "Refined structure of the human histocompatibility antigen HLA-A2 at 2.6-A resolution.")
00844         self.assertEqual(len(record.references[26].references), 1)
00845         self.assertEqual(record.references[26].references[0], ('MEDLINE', '91245570'))
00846 
00847         #Check the two parsers agree on the essentials
00848         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00849         self.assertEqual(seq_record.description, record.description)
00850         self.assertEqual(seq_record.name, record.entry_name)
00851         self.assertTrue(seq_record.id in record.accessions)
00852 
00853         #Now try using the iterator - note that all these
00854         #test cases have only one record.
00855 
00856         # With the SequenceParser
00857         test_handle = open(datafile)
00858         records = list(SeqIO.parse(test_handle, "swiss"))
00859         test_handle.close()
00860 
00861         self.assertEqual(len(records), 1)
00862         self.assertTrue(isinstance(records[0], SeqRecord))
00863 
00864         #Check matches what we got earlier without the iterator:
00865         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00866         self.assertEqual(records[0].description, seq_record.description)
00867         self.assertEqual(records[0].name, seq_record.name)
00868         self.assertEqual(records[0].id, seq_record.id)
00869 
00870         # With the RecordParser
00871         test_handle = open(datafile)
00872         records = list(SwissProt.parse(test_handle))
00873         test_handle.close()
00874 
00875         self.assertEqual(len(records), 1)
00876         self.assertTrue(isinstance(records[0], SwissProt.Record))
00877 
00878         #Check matches what we got earlier without the iterator:
00879         self.assertEqual(records[0].sequence, record.sequence)
00880         self.assertEqual(records[0].description, record.description)
00881         self.assertEqual(records[0].entry_name, record.entry_name)
00882         self.assertEqual(records[0].accessions, record.accessions)

Here is the call graph for this function:

Definition at line 883 of file test_SwissProt.py.

00883 
00884     def test_sp009(self):
00885         "Parsing SwissProt file sp009"
00886 
00887         filename = 'sp009'
00888         # test the record parser
00889 
00890         datafile = os.path.join('SwissProt', filename)
00891 
00892         test_handle = open(datafile)
00893         seq_record = SeqIO.read(test_handle, "swiss")
00894         test_handle.close()
00895 
00896         self.assertTrue(isinstance(seq_record, SeqRecord))
00897 
00898         self.assertEqual(seq_record.id, "O23729")
00899         self.assertEqual(seq_record.name, "CHS3_BROFI")
00900         self.assertEqual(seq_record.description, "CHALCONE SYNTHASE 3 (EC 2.3.1.74) (NARINGENIN-CHALCONE SYNTHASE 3).")
00901         self.assertEqual(repr(seq_record.seq), "Seq('MAPAMEEIRQAQRAEGPAAVLAIGTSTPPNALYQADYPDYYFRITKSEHLTELK...GAE', ProteinAlphabet())")
00902 
00903         test_handle = open(datafile)
00904         record = SwissProt.read(test_handle)
00905         test_handle.close()
00906 
00907         # test a couple of things on the record -- this is not exhaustive
00908         self.assertEqual(record.entry_name, "CHS3_BROFI")
00909         self.assertEqual(record.accessions, ['O23729'])
00910         self.assertEqual(record.organism_classification, ['Eukaryota', 'Viridiplantae', 'Embryophyta', 'Tracheophyta', 'Spermatophyta', 'Magnoliophyta', 'Liliopsida', 'Asparagales', 'Orchidaceae', 'Bromheadia'])
00911         self.assertEqual(record.seqinfo, (394, 42941, '2F8D14AF4870BBB2'))
00912     
00913         self.assertEqual(len(record.features), 1)
00914         self.assertEqual(record.features[0], ('ACT_SITE', 165, 165, 'BY SIMILARITY.', ''))
00915 
00916         self.assertEqual(len(record.references), 1)
00917         self.assertEqual(record.references[0].authors, "Liew C.F., Lim S.H., Loh C.S., Goh C.J.")
00918         self.assertEqual(record.references[0].title, "Molecular cloning and sequence analysis of chalcone synthase cDNAs of Bromheadia finlaysoniana.")
00919         self.assertEqual(len(record.references[0].references), 0)
00920 
00921 
00922         #Check the two parsers agree on the essentials
00923         self.assertEqual(seq_record.seq.tostring(), record.sequence)
00924         self.assertEqual(seq_record.description, record.description)
00925         self.assertEqual(seq_record.name, record.entry_name)
00926         self.assertTrue(seq_record.id in record.accessions)
00927 
00928         #Now try using the iterator - note that all these
00929         #test cases have only one record.
00930 
00931         # With the SequenceParser
00932         test_handle = open(datafile)
00933         records = list(SeqIO.parse(test_handle, "swiss"))
00934         test_handle.close()
00935 
00936         self.assertEqual(len(records), 1)
00937         self.assertTrue(isinstance(records[0], SeqRecord))
00938 
00939         #Check matches what we got earlier without the iterator:
00940         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
00941         self.assertEqual(records[0].description, seq_record.description)
00942         self.assertEqual(records[0].name, seq_record.name)
00943         self.assertEqual(records[0].id, seq_record.id)
00944 
00945         # With the RecordParser
00946         test_handle = open(datafile)
00947         records = list(SwissProt.parse(test_handle))
00948         test_handle.close()
00949 
00950         self.assertEqual(len(records), 1)
00951         self.assertTrue(isinstance(records[0], SwissProt.Record))
00952 
00953         #Check matches what we got earlier without the iterator:
00954         self.assertEqual(records[0].sequence, record.sequence)
00955         self.assertEqual(records[0].description, record.description)
00956         self.assertEqual(records[0].entry_name, record.entry_name)
00957         self.assertEqual(records[0].accessions, record.accessions)

Here is the call graph for this function:

Definition at line 958 of file test_SwissProt.py.

00958 
00959     def test_sp010(self):
00960         "Parsing SwissProt file sp010"
00961 
00962         filename = 'sp010'
00963         # test the record parser
00964 
00965         datafile = os.path.join('SwissProt', filename)
00966 
00967         test_handle = open(datafile)
00968         seq_record = SeqIO.read(test_handle, "swiss")
00969         test_handle.close()
00970 
00971         self.assertTrue(isinstance(seq_record, SeqRecord))
00972 
00973         self.assertEqual(seq_record.id, "Q13639")
00974         self.assertEqual(seq_record.name, "5H4_HUMAN")
00975         self.assertEqual(seq_record.description, "5-HYDROXYTRYPTAMINE 4 RECEPTOR (5-HT-4) (SEROTONIN RECEPTOR) (5-HT4).")
00976         self.assertEqual(repr(seq_record.seq), "Seq('MDKLDANVSSEEGFGSVEKVVLLTFLSTVILMAILGNLLVMVAVCWDRQLRKIK...SDT', ProteinAlphabet())")
00977 
00978         test_handle = open(datafile)
00979         record = SwissProt.read(test_handle)
00980         test_handle.close()
00981 
00982         # test a couple of things on the record -- this is not exhaustive
00983         self.assertEqual(record.entry_name, "5H4_HUMAN")
00984         self.assertEqual(record.accessions, ['Q13639', 'Q9UBM6', 'Q9UQR6', 'Q9UE22', 'Q9UE23', 'Q9UBT4', 'Q9NY73'])
00985         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
00986         self.assertEqual(record.seqinfo, (388, 43761, '7FCFEC60E7BDF560'))
00987     
00988         self.assertEqual(len(record.features), 23)
00989         self.assertEqual(record.features[0], ('DOMAIN', 1, 19, 'EXTRACELLULAR (POTENTIAL).', ''))
00990         self.assertEqual(record.features[1], ('TRANSMEM', 20, 40, '1 (POTENTIAL).', ''))
00991         self.assertEqual(record.features[2], ('DOMAIN', 41, 58, 'CYTOPLASMIC (POTENTIAL).', ''))
00992         self.assertEqual(record.features[3], ('TRANSMEM', 59, 79, '2 (POTENTIAL).', ''))
00993         self.assertEqual(record.features[4], ('DOMAIN', 80, 93, 'EXTRACELLULAR (POTENTIAL).', ''))
00994         self.assertEqual(record.features[5], ('TRANSMEM', 94, 116, '3 (POTENTIAL).', ''))
00995         self.assertEqual(record.features[6], ('DOMAIN', 117, 137, 'CYTOPLASMIC (POTENTIAL).', ''))
00996         self.assertEqual(record.features[7], ('TRANSMEM', 138, 158, '4 (POTENTIAL).', ''))
00997         self.assertEqual(record.features[8], ('DOMAIN', 159, 192, 'EXTRACELLULAR (POTENTIAL).', ''))
00998         self.assertEqual(record.features[9], ('TRANSMEM', 193, 213, '5 (POTENTIAL).', ''))
00999         self.assertEqual(record.features[10], ('DOMAIN', 214, 260, 'CYTOPLASMIC (POTENTIAL).', ''))
01000         self.assertEqual(record.features[11], ('TRANSMEM', 261, 281, '6 (POTENTIAL).', ''))
01001         self.assertEqual(record.features[12], ('DOMAIN', 282, 294, 'EXTRACELLULAR (POTENTIAL).', ''))
01002         self.assertEqual(record.features[13], ('TRANSMEM', 295, 315, '7 (POTENTIAL).', ''))
01003         self.assertEqual(record.features[14], ('DOMAIN', 316, 388, 'CYTOPLASMIC (POTENTIAL).', ''))
01004         self.assertEqual(record.features[15], ('CARBOHYD', 7, 7, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01005         self.assertEqual(record.features[16], ('DISULFID', 93, 184, 'BY SIMILARITY.', ''))
01006         self.assertEqual(record.features[17], ('LIPID', 329, 329, 'PALMITATE (BY SIMILARITY).', ''))
01007         self.assertEqual(record.features[18], ('VARSPLIC', 169, 169, 'L -> LERSLNQGLGQDFHA (IN ISOFORM 5- HT4(F)).', ''))
01008         self.assertEqual(record.features[19], ('VARSPLIC', 359, 388, 'RDAVECGGQWESQCHPPATSPLVAAQPSDT -> SGCSPVSSFLLLFCNRPVPV (IN ISOFORM 5-HT4(E)).', ''))
01009         self.assertEqual(record.features[20], ('VARSPLIC', 359, 388, 'RDAVECGGQWESQCHPPATSPLVAAQPSDT -> SSGTETDRRNFGIRKRRLTKPS (IN ISOFORM 5-HT4(D)).', ''))
01010         self.assertEqual(record.features[21], ('VARSPLIC', 360, 388, 'DAVECGGQWESQCHPPATSPLVAAQPSDT -> F (IN ISOFORM 5-HT4(C)).', ''))
01011         self.assertEqual(record.features[22], ('VARSPLIC', 360, 388, 'DAVECGGQWESQCHPPATSPLVAAQPSDT -> YTVLHRGHHQELEKLPIHNDPESLESCF (IN ISOFORM 5- HT4(A)).', ''))
01012         self.assertEqual(len(record.references), 6)
01013 
01014         self.assertEqual(record.references[0].authors, "Blondel O., Gastineau M., Dahmoune Y., Langlois M., Fischmeister R.")
01015         self.assertEqual(record.references[0].title, "Cloning, expression, and pharmacology of four human 5- hydroxytryptamine receptor isoforms produced by alternative splicing in the carboxyl terminus.")
01016         self.assertEqual(len(record.references[0].references), 1)
01017         self.assertEqual(record.references[0].references[0], ('PubMed', '9603189'))
01018         self.assertEqual(record.references[1].authors, "Van den Wyngaert I., Gommeren W., Jurzak M., Verhasselt P., Gordon R., Leysen J., Luyten W., Bender E.")
01019         self.assertEqual(record.references[1].title, "Cloning and expression of 5-HT4 receptor species and splice variants.")
01020         self.assertEqual(len(record.references[1].references), 0)
01021         self.assertEqual(record.references[2].authors, "Claeysen S., Faye P., Sebben M., Lemaire S., Bockaert J., Dumuis A.")
01022         self.assertEqual(record.references[2].title, "Cloning and expression of human 5-HT4S receptors. Effect of receptor density on their coupling to adenylyl cyclase.")
01023         self.assertEqual(len(record.references[2].references), 1)
01024         self.assertEqual(record.references[2].references[0], ('PubMed', '9351641'))
01025         self.assertEqual(record.references[3].authors, "Claeysen S., Sebben M., Becamel C., Bockaert J., Dumuis A.")
01026         self.assertEqual(record.references[3].title, "Novel brain-specific 5-HT4 receptors splice variants show marked constitutive activity: role of the c-terminal intracellular domain.")
01027         self.assertEqual(len(record.references[3].references), 0)
01028         self.assertEqual(record.references[4].authors, "Bender E., Pindon A., van Oers I., Zhang Y.B., Gommeren W., Verhasselt P., Jurzak M., Leysen J., Luyten W.")
01029         self.assertEqual(record.references[4].title, "Structure of the human serotonin 5-HT4 receptor gene and cloning of a novel 5-HT4 splice variant.")
01030         self.assertEqual(len(record.references[4].references), 1)
01031         self.assertEqual(record.references[4].references[0], ('PubMed', '10646498'))
01032         self.assertEqual(record.references[5].authors, "Ullmer C., Schmuck K., Kalkman H.O., Lubbert H.")
01033         self.assertEqual(record.references[5].title, "Expression of serotonin receptor mRNAs in blood vessels.")
01034         self.assertEqual(len(record.references[5].references), 2)
01035         self.assertEqual(record.references[5].references[0], ('MEDLINE', '95385798'))
01036         self.assertEqual(record.references[5].references[1], ('PubMed', '7656980'))
01037 
01038         #Check the two parsers agree on the essentials
01039         self.assertEqual(seq_record.seq.tostring(), record.sequence)
01040         self.assertEqual(seq_record.description, record.description)
01041         self.assertEqual(seq_record.name, record.entry_name)
01042         self.assertTrue(seq_record.id in record.accessions)
01043 
01044         #Now try using the iterator - note that all these
01045         #test cases have only one record.
01046 
01047         # With the SequenceParser
01048         test_handle = open(datafile)
01049         records = list(SeqIO.parse(test_handle, "swiss"))
01050         test_handle.close()
01051 
01052         self.assertEqual(len(records), 1)
01053         self.assertTrue(isinstance(records[0], SeqRecord))
01054 
01055         #Check matches what we got earlier without the iterator:
01056         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
01057         self.assertEqual(records[0].description, seq_record.description)
01058         self.assertEqual(records[0].name, seq_record.name)
01059         self.assertEqual(records[0].id, seq_record.id)
01060 
01061         # With the RecordParser
01062         test_handle = open(datafile)
01063         records = list(SwissProt.parse(test_handle))
01064         test_handle.close()
01065 
01066         self.assertEqual(len(records), 1)
01067         self.assertTrue(isinstance(records[0], SwissProt.Record))
01068 
01069         #Check matches what we got earlier without the iterator:
01070         self.assertEqual(records[0].sequence, record.sequence)
01071         self.assertEqual(records[0].description, record.description)
01072         self.assertEqual(records[0].entry_name, record.entry_name)
01073         self.assertEqual(records[0].accessions, record.accessions)

Here is the call graph for this function:

Definition at line 1074 of file test_SwissProt.py.

01074 
01075     def test_sp011(self):
01076         "Parsing SwissProt file sp011"
01077 
01078         filename = 'sp011'
01079         # test the record parser
01080 
01081         datafile = os.path.join('SwissProt', filename)
01082 
01083         test_handle = open(datafile)
01084         seq_record = SeqIO.read(test_handle, "swiss")
01085         test_handle.close()
01086 
01087         self.assertTrue(isinstance(seq_record, SeqRecord))
01088 
01089         self.assertEqual(seq_record.id, "P16235")
01090         self.assertEqual(seq_record.name, "LSHR_RAT")
01091         self.assertEqual(seq_record.description, "LUTROPIN-CHORIOGONADOTROPIC HORMONE RECEPTOR PRECURSOR (LH/CG-R) (LSH-R) (LUTEINIZING HORMONE RECEPTOR).")
01092         self.assertEqual(repr(seq_record.seq), "Seq('MGRRVPALRQLLVLAVLLLKPSQLQSRELSGSRCPEPCDCAPDGALRCPGPRAG...LTH', ProteinAlphabet())")
01093 
01094         test_handle = open(datafile)
01095         record = SwissProt.read(test_handle)
01096         test_handle.close()
01097 
01098         # test a couple of things on the record -- this is not exhaustive
01099         self.assertEqual(record.entry_name, "LSHR_RAT")
01100         self.assertEqual(record.accessions, ['P16235', 'P70646', 'Q63807', 'Q63808', 'Q63809'])
01101         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Rodentia', 'Sciurognathi', 'Muridae', 'Murinae', 'Rattus'])
01102         self.assertEqual(record.seqinfo, (700, 78035, '31807E73BAC94F1F'))
01103     
01104         self.assertEqual(len(record.features), 52)
01105         self.assertEqual(record.features[0], ('SIGNAL', 1, 26, '', ''))
01106         self.assertEqual(record.features[1], ('CHAIN', 27, 700, 'LUTROPIN-CHORIOGONADOTROPIC HORMONE RECEPTOR.', ''))
01107         self.assertEqual(record.features[2], ('DOMAIN', 27, 362, 'EXTRACELLULAR (POTENTIAL).', ''))
01108         self.assertEqual(record.features[3], ('TRANSMEM', 363, 390, '1 (POTENTIAL).', ''))
01109         self.assertEqual(record.features[4], ('DOMAIN', 391, 399, 'CYTOPLASMIC (POTENTIAL).', ''))
01110         self.assertEqual(record.features[5], ('TRANSMEM', 400, 422, '2 (POTENTIAL).', ''))
01111         self.assertEqual(record.features[6], ('DOMAIN', 423, 443, 'EXTRACELLULAR (POTENTIAL).', ''))
01112         self.assertEqual(record.features[7], ('TRANSMEM', 444, 466, '3 (POTENTIAL).', ''))
01113         self.assertEqual(record.features[8], ('DOMAIN', 467, 486, 'CYTOPLASMIC (POTENTIAL).', ''))
01114         self.assertEqual(record.features[9], ('TRANSMEM', 487, 509, '4 (POTENTIAL).', ''))
01115         self.assertEqual(record.features[10], ('DOMAIN', 510, 529, 'EXTRACELLULAR (POTENTIAL).', ''))
01116         self.assertEqual(record.features[11], ('TRANSMEM', 530, 551, '5 (POTENTIAL).', ''))
01117         self.assertEqual(record.features[12], ('DOMAIN', 552, 574, 'CYTOPLASMIC (POTENTIAL).', ''))
01118         self.assertEqual(record.features[13], ('TRANSMEM', 575, 598, '6 (POTENTIAL).', ''))
01119         self.assertEqual(record.features[14], ('DOMAIN', 599, 609, 'EXTRACELLULAR (POTENTIAL).', ''))
01120         self.assertEqual(record.features[15], ('TRANSMEM', 610, 631, '7 (POTENTIAL).', ''))
01121         self.assertEqual(record.features[16], ('DOMAIN', 632, 700, 'CYTOPLASMIC (POTENTIAL).', ''))
01122         self.assertEqual(record.features[17], ('REPEAT', 52, 75, 'LRR 1.', ''))
01123         self.assertEqual(record.features[18], ('REPEAT', 126, 150, 'LRR 2.', ''))
01124         self.assertEqual(record.features[19], ('REPEAT', 152, 175, 'LRR 3.', ''))
01125         self.assertEqual(record.features[20], ('REPEAT', 176, 200, 'LRR 4.', ''))
01126         self.assertEqual(record.features[21], ('REPEAT', 202, 224, 'LRR 5.', ''))
01127         self.assertEqual(record.features[22], ('REPEAT', 225, 248, 'LRR 6.', ''))
01128         self.assertEqual(record.features[23], ('REPEAT', 250, 271, 'LRR 7.', ''))
01129         self.assertEqual(record.features[24], ('DISULFID', 443, 518, 'BY SIMILARITY.', ''))
01130         self.assertEqual(record.features[25], ('CARBOHYD', 103, 103, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01131         self.assertEqual(record.features[26], ('CARBOHYD', 178, 178, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01132         self.assertEqual(record.features[27], ('CARBOHYD', 199, 199, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01133         self.assertEqual(record.features[28], ('CARBOHYD', 295, 295, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01134         self.assertEqual(record.features[29], ('CARBOHYD', 303, 303, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01135         self.assertEqual(record.features[30], ('CARBOHYD', 317, 317, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
01136         self.assertEqual(record.features[31], ('VARSPLIC', 83, 132, 'MISSING (IN ISOFORM 1950).', ''))
01137         self.assertEqual(record.features[32], ('VARSPLIC', 133, 157, 'MISSING (IN ISOFORM 1759).', ''))
01138         self.assertEqual(record.features[33], ('VARSPLIC', 184, 700, 'MISSING (IN ISOFORM C2).', ''))
01139         self.assertEqual(record.features[34], ('VARSPLIC', 232, 251, 'DISSTKLQALPSHGLESIQT -> PCRATGWSPFRRSSPCLPTH (IN ISOFORM 2075).', ''))
01140         self.assertEqual(record.features[35], ('VARSPLIC', 232, 293, 'MISSING (IN ISOFORM E/A2, ISOFORM EB AND ISOFORM B1).', ''))
01141         self.assertEqual(record.features[36], ('VARSPLIC', 252, 700, 'MISSING (IN ISOFORM 2075).', ''))
01142         self.assertEqual(record.features[37], ('VARSPLIC', 294, 367, 'QNFSFSIFENFSKQCESTVRKADNETLYSAIFEENELSGWDYDYGFCSPKTLQCAPEPDAFNPCEDIMGYAFLR -> IFHFPFLKTSPNNAKAQLEKQITRRFIPPSLRRMNSVAGIMIMASVHPRHSNVLQNQMLSTPVKILWAMPSLGS (IN ISOFORM B1 AND ISOFORM B3).', ''))
01143         self.assertEqual(record.features[38], ('VARSPLIC', 294, 294, 'Q -> P (IN ISOFORM C1).', ''))
01144         self.assertEqual(record.features[39], ('VARSPLIC', 295, 700, 'MISSING (IN ISOFORM C1).', ''))
01145         self.assertEqual(record.features[40], ('VARSPLIC', 321, 342, 'YSAIFEENELSGWDYDYGFCSP -> LHGALPAAHCLRGLPNKRPVL (IN ISOFORM 1834, ISOFORM 1759 AND ISOFORM EB).', ''))
01146         self.assertEqual(record.features[41], ('VARSPLIC', 343, 700, 'MISSING (IN ISOFORMS 1834, ISOFORM 1759 AND ISOFORM EB).', ''))
01147         self.assertEqual(record.features[42], ('VARSPLIC', 368, 700, 'MISSING (IN ISOFORM B1 AND ISOFORM B3).', ''))
01148         self.assertEqual(record.features[43], ('VARIANT', 82, 82, 'I -> M (IN ISOFORM 1950).', ''))
01149         self.assertEqual(record.features[44], ('VARIANT', 179, 179, 'E -> G (IN ISOFORM 1759).', ''))
01150         self.assertEqual(record.features[45], ('VARIANT', 233, 233, 'I -> T (IN ISOFORM 1950).', ''))
01151         self.assertEqual(record.features[46], ('VARIANT', 646, 646, 'G -> S (IN ISOFORM 1950).', ''))
01152         self.assertEqual(record.features[47], ('MUTAGEN', 409, 409, 'D->N: SIGNIFICANT REDUCTION OF BINDING.', ''))
01153         self.assertEqual(record.features[48], ('MUTAGEN', 436, 436, 'D->N: NO CHANGE IN BINDING OR CAMP PROD.', ''))
01154         self.assertEqual(record.features[49], ('MUTAGEN', 455, 455, 'E->Q: NO CHANGE IN BINDING OR CAMP PROD.', ''))
01155         self.assertEqual(record.features[50], ('MUTAGEN', 582, 582, 'D->N: NO CHANGE IN BINDING OR CAMP PROD.', ''))
01156         self.assertEqual(record.features[51], ('CONFLICT', 33, 33, 'R -> L (IN REF. 7).', ''))
01157 
01158         self.assertEqual(len(record.references), 8)
01159         self.assertEqual(record.references[0].authors, "McFarland K.C., Sprengel R., Phillips H.S., Koehler M., Rosemblit N., Nikolics K., Segaloff D.L., Seeburg P.H.")
01160         self.assertEqual(record.references[0].title, "Lutropin-choriogonadotropin receptor: an unusual member of the G protein-coupled receptor family.")
01161         self.assertEqual(len(record.references[0].references), 2)
01162         self.assertEqual(record.references[0].references[0], ('MEDLINE', '89332512'))
01163         self.assertEqual(record.references[0].references[1], ('PubMed', '2502842'))
01164         self.assertEqual(record.references[1].authors, "Aatsinki J.T., Pietila E.M., Lakkakorpi J.T., Rajaniemi H.J.")
01165         self.assertEqual(record.references[1].title, "Expression of the LH/CG receptor gene in rat ovarian tissue is regulated by an extensive alternative splicing of the primary transcript.")
01166         self.assertEqual(len(record.references[1].references), 2)
01167         self.assertEqual(record.references[1].references[0], ('MEDLINE', '92347604'))
01168         self.assertEqual(record.references[1].references[1], ('PubMed', '1353463'))
01169         self.assertEqual(record.references[2].authors, "Koo Y.B., Slaughter R.G., Ji T.H.")
01170         self.assertEqual(record.references[2].title, "Structure of the luteinizing hormone receptor gene and multiple exons of the coding sequence.")
01171         self.assertEqual(len(record.references[2].references), 2)
01172         self.assertEqual(record.references[2].references[0], ('MEDLINE', '91209270'))
01173         self.assertEqual(record.references[2].references[1], ('PubMed', '2019252'))
01174         self.assertEqual(record.references[3].authors, "Bernard M.P., Myers R.V., Moyle W.R.")
01175         self.assertEqual(record.references[3].title, "Cloning of rat lutropin (LH) receptor analogs lacking the soybean lectin domain.")
01176         self.assertEqual(len(record.references[3].references), 2)
01177         self.assertEqual(record.references[3].references[0], ('MEDLINE', '91006819'))
01178         self.assertEqual(record.references[3].references[1], ('PubMed', '1976554'))
01179         self.assertEqual(record.references[4].authors, "Segaloff D.L., Sprengel R., Nikolics K., Ascoli M.")
01180         self.assertEqual(record.references[4].title, "Structure of the lutropin/choriogonadotropin receptor.")
01181         self.assertEqual(len(record.references[4].references), 2)
01182         self.assertEqual(record.references[4].references[0], ('MEDLINE', '91126285'))
01183         self.assertEqual(record.references[4].references[1], ('PubMed', '2281186'))
01184         self.assertEqual(record.references[5].authors, "Tsai-Morris C.H., Buczko E., Wang W., Dufau M.L.")
01185         self.assertEqual(record.references[5].title, "Intronic nature of the rat luteinizing hormone receptor gene defines a soluble receptor subspecies with hormone binding activity.")
01186         self.assertEqual(len(record.references[5].references), 2)
01187         self.assertEqual(record.references[5].references[0], ('MEDLINE', '91060531'))
01188         self.assertEqual(record.references[5].references[1], ('PubMed', '2174034'))
01189         self.assertEqual(record.references[6].authors, "Roche P.C., Ryan R.J.")
01190         self.assertEqual(record.references[6].title, "Purification, characterization, and amino-terminal sequence of rat ovarian receptor for luteinizing hormone/human choriogonadotropin.")
01191         self.assertEqual(len(record.references[6].references), 2)
01192         self.assertEqual(record.references[6].references[0], ('MEDLINE', '89174723'))
01193         self.assertEqual(record.references[6].references[1], ('PubMed', '2925659'))
01194         self.assertEqual(record.references[7].authors, "Ji I., Ji T.H.")
01195         self.assertEqual(record.references[7].title, "Asp383 in the second transmembrane domain of the lutropin receptor is important for high affinity hormone binding and cAMP production.")
01196         self.assertEqual(len(record.references[7].references), 2)
01197         self.assertEqual(record.references[7].references[0], ('MEDLINE', '91332007'))
01198         self.assertEqual(record.references[7].references[1], ('PubMed', '1714448'))
01199 
01200         #Check the two parsers agree on the essentials
01201         self.assertEqual(seq_record.seq.tostring(), record.sequence)
01202         self.assertEqual(seq_record.description, record.description)
01203         self.assertEqual(seq_record.name, record.entry_name)
01204         self.assertTrue(seq_record.id in record.accessions)
01205 
01206         #Now try using the iterator - note that all these
01207         #test cases have only one record.
01208 
01209         # With the SequenceParser
01210         test_handle = open(datafile)
01211         records = list(SeqIO.parse(test_handle, "swiss"))
01212         test_handle.close()
01213 
01214         self.assertEqual(len(records), 1)
01215         self.assertTrue(isinstance(records[0], SeqRecord))
01216 
01217         #Check matches what we got earlier without the iterator:
01218         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
01219         self.assertEqual(records[0].description, seq_record.description)
01220         self.assertEqual(records[0].name, seq_record.name)
01221         self.assertEqual(records[0].id, seq_record.id)
01222 
01223         # With the RecordParser
01224         test_handle = open(datafile)
01225         records = list(SwissProt.parse(test_handle))
01226         test_handle.close()
01227 
01228         self.assertEqual(len(records), 1)
01229         self.assertTrue(isinstance(records[0], SwissProt.Record))
01230 
01231         #Check matches what we got earlier without the iterator:
01232         self.assertEqual(records[0].sequence, record.sequence)
01233         self.assertEqual(records[0].description, record.description)
01234         self.assertEqual(records[0].entry_name, record.entry_name)
01235         self.assertEqual(records[0].accessions, record.accessions)
01236 

Here is the call graph for this function:

Definition at line 1237 of file test_SwissProt.py.

01237 
01238     def test_sp012(self):
01239         "Parsing SwissProt file sp012"
01240 
01241         filename = 'sp012'
01242         # test the record parser
01243 
01244         datafile = os.path.join('SwissProt', filename)
01245 
01246         test_handle = open(datafile)
01247         seq_record = SeqIO.read(test_handle, "swiss")
01248         test_handle.close()
01249 
01250         self.assertTrue(isinstance(seq_record, SeqRecord))
01251 
01252         self.assertEqual(seq_record.id, "Q9Y736")
01253         self.assertEqual(seq_record.name, "Q9Y736")
01254         self.assertEqual(seq_record.description, "UBIQUITIN.")
01255         self.assertEqual(repr(seq_record.seq), "Seq('MQIFVKTLTGKTITLEVESSDTIDNVKTKIQDKEGIPPDQQRLIFAGKQLEDGR...GGN', ProteinAlphabet())")
01256 
01257         test_handle = open(datafile)
01258         record = SwissProt.read(test_handle)
01259         test_handle.close()
01260 
01261         # test a couple of things on the record -- this is not exhaustive
01262         self.assertEqual(record.entry_name, "Q9Y736")
01263         self.assertEqual(record.accessions, ['Q9Y736'])
01264         self.assertEqual(record.organism_classification, ['Eukaryota', 'Fungi', 'Ascomycota', 'Pezizomycotina', 'Eurotiomycetes', 'Onygenales', 'Arthrodermataceae', 'mitosporic Arthrodermataceae', 'Trichophyton'])
01265         self.assertEqual(record.seqinfo, (153, 17238, '01153CF30C2DEDFF'))
01266     
01267         self.assertEqual(len(record.features), 0)
01268 
01269         self.assertEqual(len(record.references), 2)
01270         self.assertEqual(record.references[0].authors, "Kano R., Nakamura Y., Watanabe S., Hasegawa A.")
01271         self.assertEqual(record.references[0].title, "Trichophyton mentagrophytes mRNA for ubiquitin.")
01272         self.assertEqual(len(record.references[0].references), 0)
01273         self.assertEqual(record.references[1].authors, "Kano R.")
01274         self.assertEqual(record.references[1].title, "Microsporum canis mRNA for ubiquitin, complete cds.")
01275         self.assertEqual(len(record.references[1].references), 0)
01276 
01277         #Check the two parsers agree on the essentials
01278         self.assertEqual(seq_record.seq.tostring(), record.sequence)
01279         self.assertEqual(seq_record.description, record.description)
01280         self.assertEqual(seq_record.name, record.entry_name)
01281         self.assertTrue(seq_record.id in record.accessions)
01282 
01283         #Now try using the iterator - note that all these
01284         #test cases have only one record.
01285 
01286         # With the SequenceParser
01287         test_handle = open(datafile)
01288         records = list(SeqIO.parse(test_handle, "swiss"))
01289         test_handle.close()
01290 
01291         self.assertEqual(len(records), 1)
01292         self.assertTrue(isinstance(records[0], SeqRecord))
01293 
01294         #Check matches what we got earlier without the iterator:
01295         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
01296         self.assertEqual(records[0].description, seq_record.description)
01297         self.assertEqual(records[0].name, seq_record.name)
01298         self.assertEqual(records[0].id, seq_record.id)
01299 
01300         # With the RecordParser
01301         test_handle = open(datafile)
01302         records = list(SwissProt.parse(test_handle))
01303         test_handle.close()
01304 
01305         self.assertEqual(len(records), 1)
01306         self.assertTrue(isinstance(records[0], SwissProt.Record))
01307 
01308         #Check matches what we got earlier without the iterator:
01309         self.assertEqual(records[0].sequence, record.sequence)
01310         self.assertEqual(records[0].description, record.description)
01311         self.assertEqual(records[0].entry_name, record.entry_name)
01312         self.assertEqual(records[0].accessions, record.accessions)
01313 

Here is the call graph for this function:

Definition at line 1314 of file test_SwissProt.py.

01314 
01315     def test_sp013(self):
01316         "Parsing SwissProt file sp013"
01317 
01318         filename = 'sp013'
01319         # test the record parser
01320 
01321         datafile = os.path.join('SwissProt', filename)
01322 
01323         test_handle = open(datafile)
01324         seq_record = SeqIO.read(test_handle, "swiss")
01325         test_handle.close()
01326 
01327         self.assertTrue(isinstance(seq_record, SeqRecord))
01328 
01329         self.assertEqual(seq_record.id, "P82909")
01330         self.assertEqual(seq_record.name, "P82909")
01331         self.assertEqual(seq_record.description, "MITOCHONDRIAL 28S RIBOSOMAL PROTEIN S36 (MRP-S36).")
01332         self.assertEqual(repr(seq_record.seq), "Seq('MGSKMASASRVVQVVKPHTPLIRFPDRRDNPKPNVSEALRSAGLPSHSSVISQH...GPE', ProteinAlphabet())")
01333 
01334         test_handle = open(datafile)
01335         record = SwissProt.read(test_handle)
01336         test_handle.close()
01337 
01338         # test a couple of things on the record -- this is not exhaustive
01339         self.assertEqual(record.entry_name, "P82909")
01340         self.assertEqual(record.accessions, ['P82909'])
01341         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
01342         self.assertEqual(record.seqinfo, (102, 11335, '83EF107B42E2FCFD'))
01343     
01344         self.assertEqual(len(record.features), 0)
01345 
01346         self.assertEqual(len(record.references), 2)
01347         self.assertEqual(record.references[0].authors, "Strausberg R.")
01348         self.assertEqual(record.references[0].title, "")
01349         self.assertEqual(len(record.references[0].references), 0)
01350         self.assertEqual(record.references[1].authors, "Koc E.C., Burkhart W., Blackburn K., Moseley A., Spremulli L.L.")
01351         self.assertEqual(record.references[1].title, "The small subunit of the mammalian mitochondrial ribosome. Identification of the full complement ribosomal proteins present.")
01352         self.assertEqual(len(record.references[1].references), 0)
01353 
01354         #Check the two parsers agree on the essentials
01355         self.assertEqual(seq_record.seq.tostring(), record.sequence)
01356         self.assertEqual(seq_record.description, record.description)
01357         self.assertEqual(seq_record.name, record.entry_name)
01358         self.assertTrue(seq_record.id in record.accessions)
01359 
01360         #Now try using the iterator - note that all these
01361         #test cases have only one record.
01362 
01363         # With the SequenceParser
01364         test_handle = open(datafile)
01365         records = list(SeqIO.parse(test_handle, "swiss"))
01366         test_handle.close()
01367 
01368         self.assertEqual(len(records), 1)
01369         self.assertTrue(isinstance(records[0], SeqRecord))
01370 
01371         #Check matches what we got earlier without the iterator:
01372         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
01373         self.assertEqual(records[0].description, seq_record.description)
01374         self.assertEqual(records[0].name, seq_record.name)
01375         self.assertEqual(records[0].id, seq_record.id)
01376 
01377         # With the RecordParser
01378         test_handle = open(datafile)
01379         records = list(SwissProt.parse(test_handle))
01380         test_handle.close()
01381 
01382         self.assertEqual(len(records), 1)
01383         self.assertTrue(isinstance(records[0], SwissProt.Record))
01384 
01385         #Check matches what we got earlier without the iterator:
01386         self.assertEqual(records[0].sequence, record.sequence)
01387         self.assertEqual(records[0].description, record.description)
01388         self.assertEqual(records[0].entry_name, record.entry_name)
01389         self.assertEqual(records[0].accessions, record.accessions)
01390 

Here is the call graph for this function:

Definition at line 1391 of file test_SwissProt.py.

01391 
01392     def test_sp014(self):
01393         "Parsing SwissProt file sp014"
01394 
01395         filename = 'sp014'
01396         # test the record parser
01397 
01398         datafile = os.path.join('SwissProt', filename)
01399 
01400         test_handle = open(datafile)
01401         seq_record = SeqIO.read(test_handle, "swiss")
01402         test_handle.close()
01403 
01404         self.assertTrue(isinstance(seq_record, SeqRecord))
01405 
01406         self.assertEqual(seq_record.id, "P12166")
01407         self.assertEqual(seq_record.name, "PSBL_ORYSA")
01408         self.assertEqual(seq_record.description, "PHOTOSYSTEM II REACTION CENTER L PROTEIN (PSII 5 KDA PROTEIN).")
01409         self.assertEqual(repr(seq_record.seq), "Seq('TQSNPNEQNVELNRTSLYWGLLLIFVLAVLFSNYFFN', ProteinAlphabet())")
01410 
01411         test_handle = open(datafile)
01412         record = SwissProt.read(test_handle)
01413         test_handle.close()
01414 
01415         # test a couple of things on the record -- this is not exhaustive
01416         self.assertEqual(record.entry_name, "PSBL_ORYSA")
01417         self.assertEqual(record.accessions, ['P12166', 'P12167', 'Q34007'])
01418         self.assertEqual(record.organism_classification, ['Eukaryota', 'Viridiplantae', 'Embryophyta', 'Tracheophyta', 'Spermatophyta', 'Magnoliophyta', 'Liliopsida', 'Poales', 'Poaceae', 'Ehrhartoideae', 'Oryzeae', 'Oryza'])
01419         self.assertEqual(record.seqinfo, (37, 4366, 'CC537AEC50B2C784'))
01420     
01421         self.assertEqual(len(record.features), 1)
01422         self.assertEqual(record.features[0], ('INIT_MET', 0, 0, '', ''))
01423 
01424         self.assertEqual(len(record.references), 22)
01425         self.assertEqual(record.references[0].authors, "Sugiura M.")
01426         self.assertEqual(record.references[0].title, "")
01427         self.assertEqual(len(record.references[0].references), 0)
01428         self.assertEqual(record.references[1].authors, "Hiratsuka J., Shimada H., Whittier R., Ishibashi T., Sakamoto M., Mori M., Kondo C., Honji Y., Sun C.-R., Meng B.-Y., Li Y.-Q., Kanno A., Nishizawa Y., Hirai A., Shinozaki K., Sugiura M.")
01429         self.assertEqual(record.references[1].title, "The complete sequence of the rice (Oryza sativa) chloroplast genome: intermolecular recombination between distinct tRNA genes accounts for a major plastid DNA inversion during the evolution of the cereals.")
01430         self.assertEqual(len(record.references[1].references), 2)
01431         self.assertEqual(record.references[1].references[0], ('MEDLINE', '89364698'))
01432         self.assertEqual(record.references[1].references[1], ('PubMed', '2770692'))
01433         self.assertEqual(record.references[2].authors, "Sugiura M.")
01434         self.assertEqual(record.references[2].title, "")
01435         self.assertEqual(len(record.references[2].references), 0)
01436         self.assertEqual(record.references[3].authors, "Shinozaki K., Ohme M., Tanaka M., Wakasugi T., Hayashida N., Matsubayashi T., Zaita N., Chunwongse J., Obokata J., Yamaguchi-Shinozaki K., Ohto C., Torazawa K., Meng B.Y., Sugita M., Deno H., Kamogashira T., Yamada K., Kusuda J., Takaiwa F., Kato A., Tohdoh N., Shimada H., Sugiura M.")
01437         self.assertEqual(record.references[3].title, "The complete nucleotide sequence of the tobacco chloroplast genome: its gene organization and expression.")
01438         self.assertEqual(len(record.references[3].references), 0)
01439         self.assertEqual(record.references[4].authors, "Chaudhuri S., Maliga P.")
01440         self.assertEqual(record.references[4].title, "Sequences directing C to U editing of the plastid psbL mRNA are located within a 22 nucleotide segment spanning the editing site.")
01441         self.assertEqual(len(record.references[4].references), 2)
01442         self.assertEqual(record.references[4].references[0], ('MEDLINE', '97076156'))
01443         self.assertEqual(record.references[4].references[1], ('PubMed', '8918473'))
01444         self.assertEqual(record.references[5].authors, "Chakhmakhcheva O.G., Andreeva A.V., Buryakova A.A., Reverdatto S.V., Efimov V.A.")
01445         self.assertEqual(record.references[5].title, "Nucleotide sequence of the barley chloroplast psbE, psbF genes and flanking regions.")
01446         self.assertEqual(len(record.references[5].references), 2)
01447         self.assertEqual(record.references[5].references[0], ('MEDLINE', '89240046'))
01448         self.assertEqual(record.references[5].references[1], ('PubMed', '2654886'))
01449         self.assertEqual(record.references[6].authors, "Efimov V.A., Andreeva A.V., Reverdatto S.V., Chakhmakhcheva O.G.")
01450         self.assertEqual(record.references[6].title, "Photosystem II of rye. Nucleotide sequence of the psbB, psbC, psbE, psbF, psbH genes of rye and chloroplast DNA regions adjacent to them.")
01451         self.assertEqual(len(record.references[6].references), 2)
01452         self.assertEqual(record.references[6].references[0], ('MEDLINE', '92207253'))
01453         self.assertEqual(record.references[6].references[1], ('PubMed', '1804121'))
01454         self.assertEqual(record.references[7].authors, "Webber A.N., Hird S.M., Packman L.C., Dyer T.A., Gray J.C.")
01455         self.assertEqual(record.references[7].title, "A photosystem II polypeptide is encoded by an open reading frame co-transcribed with genes for cytochrome b-559 in wheat chloroplast DNA.")
01456         self.assertEqual(len(record.references[7].references), 0)
01457         self.assertEqual(record.references[8].authors, "Kudla J., Igloi G.L., Metzlaff M., Hagemann R., Koessel H.")
01458         self.assertEqual(record.references[8].title, "RNA editing in tobacco chloroplasts leads to the formation of a translatable psbL mRNA by a C to U substitution within the initiation codon.")
01459         self.assertEqual(len(record.references[8].references), 2)
01460         self.assertEqual(record.references[8].references[0], ('MEDLINE', '92191997'))
01461         self.assertEqual(record.references[8].references[1], ('PubMed', '1547774'))
01462         self.assertEqual(record.references[9].authors, "Zolotarev A.S., Kolosov V.L.")
01463         self.assertEqual(record.references[9].title, "Nucleotide sequence of the rye chloroplast DNA fragment, comprising psbE and psbF genes.")
01464         self.assertEqual(len(record.references[9].references), 2)
01465         self.assertEqual(record.references[9].references[0], ('MEDLINE', '89160331'))
01466         self.assertEqual(record.references[9].references[1], ('PubMed', '2646599'))
01467         self.assertEqual(record.references[10].authors, "Kolosov V.L., Klezovich O.N., Abdulaev N.G., Zolotarev A.S.")
01468         self.assertEqual(record.references[10].title, "Photosystem II of rye. Nucleotide sequence of genes psbE, psbF, psbL and OPC40 of chloroplast DNA.")
01469         self.assertEqual(len(record.references[10].references), 2)
01470         self.assertEqual(record.references[10].references[0], ('MEDLINE', '90073796'))
01471         self.assertEqual(record.references[10].references[1], ('PubMed', '2686655'))
01472         self.assertEqual(record.references[11].authors, "Haley J., Bogorad L.")
01473         self.assertEqual(record.references[11].title, "")
01474         self.assertEqual(len(record.references[11].references), 0)
01475         self.assertEqual(record.references[12].authors, "Maier R.M., Neckermann K., Igloi G.L., Koessel H.")
01476         self.assertEqual(record.references[12].title, "Complete sequence of the maize chloroplast genome: gene content, hotspots of divergence and fine tuning of genetic information by transcript editing.")
01477         self.assertEqual(len(record.references[12].references), 2)
01478         self.assertEqual(record.references[12].references[0], ('MEDLINE', '95395841'))
01479         self.assertEqual(record.references[12].references[1], ('PubMed', '7666415'))
01480         self.assertEqual(record.references[13].authors, "Willey D.L., Gray J.C.")
01481         self.assertEqual(record.references[13].title, "Two small open reading frames are co-transcribed with the pea chloroplast genes for the polypeptides of cytochrome b-559.")
01482         self.assertEqual(len(record.references[13].references), 2)
01483         self.assertEqual(record.references[13].references[0], ('MEDLINE', '89354671'))
01484         self.assertEqual(record.references[13].references[1], ('PubMed', '2766383'))
01485         self.assertEqual(record.references[14].authors, "Bock R., Hagemann R., Koessel H., Kudla J.")
01486         self.assertEqual(record.references[14].title, "Tissue- and stage-specific modulation of RNA editing of the psbF and psbL transcript from spinach plastids -- a new regulatory mechanism?")
01487         self.assertEqual(len(record.references[14].references), 2)
01488         self.assertEqual(record.references[14].references[0], ('MEDLINE', '93360903'))
01489         self.assertEqual(record.references[14].references[1], ('PubMed', '8355656'))
01490         self.assertEqual(record.references[15].authors, "Hermann R.G., Alt J., Schiller B., Widger W.R., Cramer W.A.")
01491         self.assertEqual(record.references[15].title, "Nucleotide sequence of the gene for apocytochrome b-559 on the spinach plastid chromosome: implications for the structure of the membrane protein.")
01492         self.assertEqual(len(record.references[15].references), 0)
01493         self.assertEqual(record.references[16].authors, "Kuntz M., Camara B., Weil J.-H., Schantz R.")
01494         self.assertEqual(record.references[16].title, "The psbL gene from bell pepper (Capsicum annuum): plastid RNA editing also occurs in non-photosynthetic chromoplasts.")
01495         self.assertEqual(len(record.references[16].references), 2)
01496         self.assertEqual(record.references[16].references[0], ('MEDLINE', '93099270'))
01497         self.assertEqual(record.references[16].references[1], ('PubMed', '1463853'))
01498         self.assertEqual(record.references[17].authors, "Forsthoefel N.R., Cushman J.C.")
01499         self.assertEqual(record.references[17].title, "Characterization and expression of photosystem II genes (psbE, psbF, and psbL) from the facultative crassulacean acid metabolism plant Mesembryanthemum crystallinum.")
01500         self.assertEqual(len(record.references[17].references), 2)
01501         self.assertEqual(record.references[17].references[0], ('MEDLINE', '94345017'))
01502         self.assertEqual(record.references[17].references[1], ('PubMed', '8066140'))
01503         self.assertEqual(record.references[18].authors, "Kubo T., Yanai Y., Kinoshita T., Mikami T.")
01504         self.assertEqual(record.references[18].title, "The chloroplast trnP-trnW-petG gene cluster in the mitochondrial genomes of Beta vulgaris, B. trigyna and B. webbiana: evolutionary aspects.")
01505         self.assertEqual(len(record.references[18].references), 2)
01506         self.assertEqual(record.references[18].references[0], ('MEDLINE', '95254673'))
01507         self.assertEqual(record.references[18].references[1], ('PubMed', '7736615'))
01508         self.assertEqual(record.references[19].authors, "Naithani S.")
01509         self.assertEqual(record.references[19].title, "")
01510         self.assertEqual(len(record.references[19].references), 0)
01511         self.assertEqual(record.references[20].authors, "Ikeuchi M., Takio K., Inoue Y.")
01512         self.assertEqual(record.references[20].title, "N-terminal sequencing of photosystem II low-molecular-mass proteins. 5 and 4.1 kDa components of the O2-evolving core complex from higher plants.")
01513         self.assertEqual(len(record.references[20].references), 2)
01514         self.assertEqual(record.references[20].references[0], ('MEDLINE', '89121082'))
01515         self.assertEqual(record.references[20].references[1], ('PubMed', '2644131'))
01516         self.assertEqual(record.references[21].authors, "Zheleva D., Sharma J., Panico M., Morris H.R., Barber J.")
01517         self.assertEqual(record.references[21].title, "Isolation and characterization of monomeric and dimeric CP47-reaction center photosystem II complexes.")
01518         self.assertEqual(len(record.references[21].references), 2)
01519         self.assertEqual(record.references[21].references[0], ('MEDLINE', '98298118'))
01520         self.assertEqual(record.references[21].references[1], ('PubMed', '9632665'))
01521 
01522         #Check the two parsers agree on the essentials
01523         self.assertEqual(seq_record.seq.tostring(), record.sequence)
01524         self.assertEqual(seq_record.description, record.description)
01525         self.assertEqual(seq_record.name, record.entry_name)
01526         self.assertTrue(seq_record.id in record.accessions)
01527 
01528         #Now try using the iterator - note that all these
01529         #test cases have only one record.
01530 
01531         # With the SequenceParser
01532         test_handle = open(datafile)
01533         records = list(SeqIO.parse(test_handle, "swiss"))
01534         test_handle.close()
01535 
01536         self.assertEqual(len(records), 1)
01537         self.assertTrue(isinstance(records[0], SeqRecord))
01538 
01539         #Check matches what we got earlier without the iterator:
01540         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
01541         self.assertEqual(records[0].description, seq_record.description)
01542         self.assertEqual(records[0].name, seq_record.name)
01543         self.assertEqual(records[0].id, seq_record.id)
01544 
01545         # With the RecordParser
01546         test_handle = open(datafile)
01547         records = list(SwissProt.parse(test_handle))
01548         test_handle.close()
01549 
01550         self.assertEqual(len(records), 1)
01551         self.assertTrue(isinstance(records[0], SwissProt.Record))
01552 
01553         #Check matches what we got earlier without the iterator:
01554         self.assertEqual(records[0].sequence, record.sequence)
01555         self.assertEqual(records[0].description, record.description)
01556         self.assertEqual(records[0].entry_name, record.entry_name)
01557         self.assertEqual(records[0].accessions, record.accessions)
01558 

Here is the call graph for this function:

Definition at line 1559 of file test_SwissProt.py.

01559 
01560     def test_sp015(self):
01561         "Parsing SwissProt file sp015"
01562 
01563         filename = 'sp015'
01564         # test the record parser
01565 
01566         datafile = os.path.join('SwissProt', filename)
01567 
01568         test_handle = open(datafile)
01569         seq_record = SeqIO.read(test_handle, "swiss")
01570         test_handle.close()
01571 
01572         self.assertTrue(isinstance(seq_record, SeqRecord))
01573 
01574         self.assertEqual(seq_record.id, "IPI00383150")
01575         self.assertEqual(seq_record.name, "IPI00383150.2")
01576         self.assertEqual(seq_record.description, "")
01577         self.assertEqual(repr(seq_record.seq), "Seq('MSFQAPRRLLELAGQSLLRDQALAISVLDELPRELFPRLFVEAFTSRRCEVLKV...TPC', ProteinAlphabet())")
01578 
01579         test_handle = open(datafile)
01580         record = SwissProt.read(test_handle)
01581         test_handle.close()
01582 
01583         # test a couple of things on the record -- this is not exhaustive
01584         self.assertEqual(record.entry_name, "IPI00383150.2")
01585         self.assertEqual(record.accessions, ['IPI00383150'])
01586         self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
01587         self.assertEqual(record.seqinfo, (457, 52856, '5C3151AAADBDE232'))
01588     
01589         self.assertEqual(len(record.features), 0)
01590         self.assertEqual(len(record.references), 0)
01591 
01592         #Check the two parsers agree on the essentials
01593         self.assertEqual(seq_record.seq.tostring(), record.sequence)
01594         self.assertEqual(seq_record.description, record.description)
01595         self.assertEqual(seq_record.name, record.entry_name)
01596         self.assertTrue(seq_record.id in record.accessions)
01597 
01598         #Now try using the iterator - note that all these
01599         #test cases have only one record.
01600 
01601         # With the SequenceParser
01602         test_handle = open(datafile)
01603         records = list(SeqIO.parse(test_handle, "swiss"))
01604         test_handle.close()
01605 
01606         self.assertEqual(len(records), 1)
01607         self.assertTrue(isinstance(records[0], SeqRecord))
01608 
01609         #Check matches what we got earlier without the iterator:
01610         self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
01611         self.assertEqual(records[0].description, seq_record.description)
01612         self.assertEqual(records[0].name, seq_record.name)
01613         self.assertEqual(records[0].id, seq_record.id)
01614 
01615         # With the RecordParser
01616         test_handle = open(datafile)
01617         records = list(SwissProt.parse(test_handle))
01618         test_handle.close()
01619 
01620         self.assertEqual(len(records), 1)
01621         self.assertTrue(isinstance(records[0], SwissProt.Record))
01622 
01623         #Check matches what we got earlier without the iterator:
01624         self.assertEqual(records[0].sequence, record.sequence)
01625         self.assertEqual(records[0].description, record.description)
01626         self.assertEqual(records[0].entry_name, record.entry_name)
01627         self.assertEqual(records[0].accessions, record.accessions)
01628 
01629 

Here is the call graph for this function:


The documentation for this class was generated from the following file: