Back to index

python-biopython  1.60
test_prodoc.py
Go to the documentation of this file.
00001 # Copyright 1999 by Cayte Lindner.  All rights reserved.
00002 # Copyright 2009 by Michiel de Hoon.  All rights reserved.
00003 # This code is part of the Biopython distribution and governed by its
00004 # license.  Please see the LICENSE file that should have been included
00005 # as part of this package.
00006 
00007 import os
00008 import unittest
00009 from Bio.ExPASy import Prodoc
00010 
00011 
00012 
00013 
00014 class TestProdocRead(unittest.TestCase):
00015 
00016     def test_read_pdoc00100(self):
00017         "Reading Prodoc record PDOC00100"
00018         filename = os.path.join( 'Prosite', 'Doc', 'pdoc00100.txt')
00019         handle = open(filename)
00020         record = Prodoc.read(handle)
00021         handle.close()
00022 
00023         self.assertEqual(record.accession, "PDOC00100")
00024         self.assertEqual(len(record.prosite_refs), 4)
00025         self.assertEqual(record.prosite_refs[0], ("PS00107", "PROTEIN_KINASE_ATP"))
00026         self.assertEqual(record.prosite_refs[1], ("PS00108", "PROTEIN_KINASE_ST"))
00027         self.assertEqual(record.prosite_refs[2], ("PS00109", "PROTEIN_KINASE_TYR"))
00028         self.assertEqual(record.prosite_refs[3], ("PS50011", "PROTEIN_KINASE_DOM"))
00029         self.assertEqual(record.text, """\
00030 ******************************************
00031 * Protein kinases signatures and profile *
00032 ******************************************
00033 
00034 Eukaryotic  protein kinases [1 to 5]  are  enzymes  that   belong  to  a  very
00035 extensive family of  proteins which share a conserved catalytic core common to
00036 both serine/threonine and  tyrosine protein kinases.  There  are  a  number of
00037 conserved regions in the catalytic domain of protein kinases. We have selected
00038 two of these regions to build signature patterns.  The  first region, which is
00039 located in the N-terminal extremity of the catalytic domain, is a glycine-rich
00040 stretch of residues in the vicinity  of a lysine residue, which has been shown
00041 to be involved in ATP binding.   The second  region,  which is  located in the
00042 central part of the  catalytic  domain,  contains  a  conserved  aspartic acid
00043 residue  which is important for the catalytic activity  of  the enzyme [6]; we
00044 have derived  two signature patterns for that region: one specific for serine/
00045 threonine kinases  and  the  other  for  tyrosine kinases. We also developed a
00046 profile which is based on the alignment in [1] and covers the entire catalytic
00047 domain.
00048 
00049 -Consensus pattern: [LIV]-G-{P}-G-{P}-[FYWMGSTNH]-[SGA]-{PW}-[LIVCAT]-{PD}-x-
00050                     [GSTACLIVMFY]-x(5,18)-[LIVMFYWCSTAR]-[AIVP]-[LIVMFAGCKR]-K
00051                     [K binds ATP]
00052 -Sequences known to belong to this class detected by the pattern: the majority
00053  of known  protein  kinases  but it fails to find a number of them, especially
00054  viral kinases  which  are  quite  divergent in this region and are completely
00055  missed by this pattern.
00056 -Other sequence(s) detected in Swiss-Prot: 42.
00057 
00058 -Consensus pattern: [LIVMFYC]-x-[HY]-x-D-[LIVMFY]-K-x(2)-N-[LIVMFYCT](3)
00059                     [D is an active site residue]
00060 -Sequences known to belong to this class detected by the pattern: Most serine/
00061  threonine  specific protein  kinases  with  10 exceptions (half of them viral
00062  kinases) and  also  Epstein-Barr  virus BGLF4 and Drosophila ninaC which have
00063  respectively Ser and Arg instead of the conserved Lys and which are therefore
00064  detected by the tyrosine kinase specific pattern described below.
00065 -Other sequence(s) detected in Swiss-Prot: 1.
00066 
00067 -Consensus pattern: [LIVMFYC]-{A}-[HY]-x-D-[LIVMFY]-[RSTAC]-{D}-{PF}-N-
00068                     [LIVMFYC](3)
00069                     [D is an active site residue]
00070 -Sequences known to belong to this class detected by the pattern: ALL tyrosine
00071  specific protein  kinases  with  the  exception of human ERBB3 and mouse blk.
00072  This pattern    will    also    detect    most    bacterial    aminoglycoside
00073  phosphotransferases [8,9]  and  herpesviruses ganciclovir kinases [10]; which
00074  are proteins structurally and evolutionary related to protein kinases.
00075 -Other sequence(s) detected in Swiss-Prot: 17.
00076 
00077 -Sequences known to belong to this class detected by the profile: ALL,  except
00078  for three  viral  kinases.  This  profile  also  detects  receptor  guanylate
00079  cyclases (see   <PDOC00430>)  and  2-5A-dependent  ribonucleases.    Sequence
00080  similarities between  these  two  families  and the eukaryotic protein kinase
00081  family have been noticed before. It also detects Arabidopsis thaliana kinase-
00082  like protein TMKL1 which seems to have lost its catalytic activity.
00083 -Other sequence(s) detected in Swiss-Prot: 4.
00084 
00085 -Note: If a protein  analyzed  includes the two protein kinase signatures, the
00086  probability of it being a protein kinase is close to 100%
00087 -Note: Eukaryotic-type protein  kinases  have  also  been found in prokaryotes
00088  such as Myxococcus xanthus [11] and Yersinia pseudotuberculosis.
00089 -Note: The  patterns  shown  above has been updated since their publication in
00090  [7].
00091 
00092 -Expert(s) to contact by email:
00093            Hunter T.; hunter@salk-sc2.sdsc.edu
00094            Quinn A.M.; quinn@biomed.med.yale.edu
00095 
00096 -Last update: April 2006 / Pattern revised.
00097 
00098 """)
00099 
00100         self.assertEqual(len(record.references), 11)
00101         self.assertEqual(record.references[ 0].number, "1")
00102         self.assertEqual(record.references[ 0].authors, "Hanks S.K., Hunter T.")
00103         self.assertEqual(record.references[ 0].citation, """\
00104 "Protein kinases 6. The eukaryotic protein kinase superfamily: kinase
00105 (catalytic) domain structure and classification."
00106 FASEB J. 9:576-596(1995).
00107 PubMed=7768349""")
00108         self.assertEqual(record.references[ 1].number, "2")
00109         self.assertEqual(record.references[ 1].authors, "Hunter T.")
00110         self.assertEqual(record.references[ 1].citation, """\
00111 "Protein kinase classification."
00112 Methods Enzymol. 200:3-37(1991).
00113 PubMed=1835513""")
00114         self.assertEqual(record.references[ 2].number, "3")
00115         self.assertEqual(record.references[ 2].authors, "Hanks S.K., Quinn A.M.")
00116         self.assertEqual(record.references[ 2].citation, """\
00117 "Protein kinase catalytic domain sequence database: identification of
00118 conserved features of primary structure and classification of family
00119 members."
00120 Methods Enzymol. 200:38-62(1991).
00121 PubMed=1956325""")
00122         self.assertEqual(record.references[ 3].number, "4")
00123         self.assertEqual(record.references[ 3].authors, "Hanks S.K.")
00124         self.assertEqual(record.references[ 3].citation, 'Curr. Opin. Struct. Biol. 1:369-383(1991).')
00125         self.assertEqual(record.references[ 4].number, "5")
00126         self.assertEqual(record.references[ 4].authors, "Hanks S.K., Quinn A.M., Hunter T.")
00127         self.assertEqual(record.references[ 4].citation, """\
00128 "The protein kinase family: conserved features and deduced phylogeny
00129 of the catalytic domains."
00130 Science 241:42-52(1988).
00131 PubMed=3291115""")
00132         self.assertEqual(record.references[ 5].number, "6")
00133         self.assertEqual(record.references[ 5].authors, "Knighton D.R., Zheng J.H., Ten Eyck L.F., Ashford V.A., Xuong N.-H., Taylor S.S., Sowadski J.M.")
00134         self.assertEqual(record.references[ 5].citation, """\
00135 "Crystal structure of the catalytic subunit of cyclic adenosine
00136 monophosphate-dependent protein kinase."
00137 Science 253:407-414(1991).
00138 PubMed=1862342""")
00139         self.assertEqual(record.references[ 6].number, "7")
00140         self.assertEqual(record.references[ 6].authors, "Bairoch A., Claverie J.-M.")
00141         self.assertEqual(record.references[ 6].citation, """\
00142 "Sequence patterns in protein kinases."
00143 Nature 331:22-22(1988).
00144 PubMed=3340146; DOI=10.1038/331022a0""")
00145         self.assertEqual(record.references[ 7].number, "8")
00146         self.assertEqual(record.references[ 7].authors, "Benner S.")
00147         self.assertEqual(record.references[ 7].citation, 'Nature 329:21-21(1987).')
00148         self.assertEqual(record.references[ 8].number, "9")
00149         self.assertEqual(record.references[ 8].authors, "Kirby R.")
00150         self.assertEqual(record.references[ 8].citation, """\
00151 "Evolutionary origin of aminoglycoside phosphotransferase resistance
00152 genes."
00153 J. Mol. Evol. 30:489-492(1990).
00154 PubMed=2165531""")
00155         self.assertEqual(record.references[ 9].number, "10")
00156         self.assertEqual(record.references[ 9].authors, "Littler E., Stuart A.D., Chee M.S.")
00157         self.assertEqual(record.references[ 9].citation, 'Nature 358:160-162(1992).')
00158         self.assertEqual(record.references[10].number, "11")
00159         self.assertEqual(record.references[10].authors, "Munoz-Dorado J., Inouye S., Inouye M.")
00160         self.assertEqual(record.references[10].citation, 'Cell 67:995-1006(1991).')
00161 
00162     def test_read_pdoc00113(self):
00163         "Reading Prodoc record PDOC00113"
00164         filename = os.path.join( 'Prosite', 'Doc', 'pdoc00113.txt')
00165         handle = open(filename)
00166         record = Prodoc.read(handle)
00167         handle.close()
00168 
00169         self.assertEqual(record.accession, "PDOC00113")
00170         self.assertEqual(len(record.prosite_refs), 1)
00171         self.assertEqual(record.prosite_refs[0], ("PS00123", "ALKALINE_PHOSPHATASE"))
00172         self.assertEqual(record.text, """\
00173 ************************************
00174 * Alkaline phosphatase active site *
00175 ************************************
00176 
00177 Alkaline phosphatase (EC 3.1.3.1) (ALP) [1] is a zinc and magnesium-containing
00178 metalloenzyme  which hydrolyzes phosphate esters, optimally at high pH.  It is
00179 found in nearly  all living organisms,  with the exception of some plants.  In
00180 Escherichia coli, ALP (gene phoA) is found in the periplasmic space.  In yeast
00181 it (gene  PHO8)  is  found  in  lysosome-like vacuoles and in mammals, it is a
00182 glycoprotein attached to the membrane by a GPI-anchor.
00183 
00184 In mammals, four different isozymes are currently known [2]. Three of them are
00185 tissue-specific:  the  placental,  placental-like (germ cell)   and intestinal
00186 isozymes.  The fourth form is  tissue non-specific and was previously known as
00187 the liver/bone/kidney isozyme.
00188 
00189 Streptomyces' species  involved  in  the  synthesis  of  streptomycin (SM), an
00190 antibiotic, express  a  phosphatase (EC 3.1.3.39) (gene strK) which is  highly
00191 related to ALP.   It specifically cleaves  both  streptomycin-6-phosphate and,
00192 more slowly, streptomycin-3"-phosphate.
00193 
00194 A serine is involved   in the catalytic activity of ALP. The region around the
00195 active site serine is relatively well conserved and can be used as a signature
00196 pattern.
00197 
00198 -Consensus pattern: [IV]-x-D-S-[GAS]-[GASC]-[GAST]-[GA]-T
00199                     [S is the active site residue]
00200 -Sequences known to belong to this class detected by the pattern: ALL.
00201 -Other sequence(s) detected in Swiss-Prot: 3.
00202 -Last update: June 1994 / Text revised.
00203 
00204 """)
00205 
00206         self.assertEqual(len(record.references), 3)
00207         self.assertEqual(record.references[ 0].number, "1")
00208         self.assertEqual(record.references[ 0].authors, "Trowsdale J., Martin D., Bicknell D., Campbell I.")
00209         self.assertEqual(record.references[ 0].citation, """\
00210 "Alkaline phosphatases."
00211 Biochem. Soc. Trans. 18:178-180(1990).
00212 PubMed=2379681""")
00213         self.assertEqual(record.references[ 1].number, "2")
00214         self.assertEqual(record.references[ 1].authors, "Manes T., Glade K., Ziomek C.A., Millan J.L.")
00215         self.assertEqual(record.references[ 1].citation, """\
00216 "Genomic structure and comparison of mouse tissue-specific alkaline
00217 phosphatase genes."
00218 Genomics 8:541-554(1990).
00219 PubMed=2286375""")
00220         self.assertEqual(record.references[ 2].number, "3")
00221         self.assertEqual(record.references[ 2].authors, "Mansouri K., Piepersberg W.")
00222         self.assertEqual(record.references[ 2].citation, """\
00223 "Genetics of streptomycin production in Streptomyces griseus:
00224 nucleotide sequence of five genes, strFGHIK, including a phosphatase
00225 gene."
00226 Mol. Gen. Genet. 228:459-469(1991).
00227 PubMed=1654502""")
00228 
00229     def test_read_pdoc00144(self):
00230         "Reading Prodoc record PDOC00144"
00231         filename = os.path.join( 'Prosite', 'Doc', 'pdoc00144.txt')
00232         handle = open(filename)
00233         record = Prodoc.read(handle)
00234         handle.close()
00235 
00236         self.assertEqual(record.accession, "PDOC00144")
00237         self.assertEqual(len(record.prosite_refs), 2)
00238         self.assertEqual(record.prosite_refs[0], ("PS00159", "ALDOLASE_KDPG_KHG_1"))
00239         self.assertEqual(record.prosite_refs[1], ("PS00160", "ALDOLASE_KDPG_KHG_2"))
00240         self.assertEqual(record.text, """\
00241 *************************************************
00242 * KDPG and KHG aldolases active site signatures *
00243 *************************************************
00244 
00245 4-hydroxy-2-oxoglutarate aldolase (EC 4.1.3.16)  (KHG-aldolase)  catalyzes the
00246 interconversion of  4-hydroxy-2-oxoglutarate  into  pyruvate  and  glyoxylate.
00247 Phospho-2-dehydro-3-deoxygluconate  aldolase   (EC 4.1.2.14)   (KDPG-aldolase)
00248 catalyzes the interconversion of  6-phospho-2-dehydro-3-deoxy-D-gluconate into
00249 pyruvate and glyceraldehyde 3-phosphate.
00250 
00251 These two enzymes are structurally and functionally related [1]. They are both
00252 homotrimeric proteins of approximately 220 amino-acid residues. They are class
00253 I aldolases whose catalytic mechanism involves  the formation of a Schiff-base
00254 intermediate  between  the  substrate  and the epsilon-amino group of a lysine
00255 residue. In both enzymes, an arginine is required for catalytic activity.
00256 
00257 We developed  two signature patterns for these enzymes. The first one contains
00258 the active  site  arginine  and the second, the lysine involved in the Schiff-
00259 base formation.
00260 
00261 -Consensus pattern: G-[LIVM]-x(3)-E-[LIV]-T-[LF]-R
00262                     [R is the active site residue]
00263 -Sequences known to belong to this class detected by the pattern: ALL,  except
00264  for Bacillus  subtilis  KDPG-aldolase  which  has  Thr  instead of Arg in the
00265  active site.
00266 -Other sequence(s) detected in Swiss-Prot: NONE.
00267 
00268 -Consensus pattern: G-x(3)-[LIVMF]-K-[LF]-F-P-[SA]-x(3)-G
00269                     [K is involved in Schiff-base formation]
00270 -Sequences known to belong to this class detected by the pattern: ALL.
00271 -Other sequence(s) detected in Swiss-Prot: NONE.
00272 
00273 -Last update: November 1997 / Patterns and text revised.
00274 
00275 """)
00276 
00277         self.assertEqual(len(record.references), 1)
00278         self.assertEqual(record.references[ 0].number, "1")
00279         self.assertEqual(record.references[ 0].authors, "Vlahos C.J., Dekker E.E.")
00280         self.assertEqual(record.references[ 0].citation, """\
00281 "The complete amino acid sequence and identification of the
00282 active-site arginine peptide of Escherichia coli
00283 2-keto-4-hydroxyglutarate aldolase."
00284 J. Biol. Chem. 263:11683-11691(1988).
00285 PubMed=3136164""")
00286 
00287     def test_read_pdoc00149(self):
00288         "Reading Prodoc record PDOC00149"
00289         filename = os.path.join( 'Prosite', 'Doc', 'pdoc00149.txt')
00290         handle = open(filename)
00291         record = Prodoc.read(handle)
00292         handle.close()
00293 
00294         self.assertEqual(record.accession, "PDOC00149")
00295         self.assertEqual(len(record.prosite_refs), 1)
00296         self.assertEqual(record.prosite_refs[0], ("PS00165", "DEHYDRATASE_SER_THR"))
00297         self.assertEqual(record.text, """\
00298 *********************************************************************
00299 * Serine/threonine dehydratases pyridoxal-phosphate attachment site *
00300 *********************************************************************
00301 
00302 Serine and threonine  dehydratases [1,2]  are  functionally  and  structurally
00303 related pyridoxal-phosphate dependent enzymes:
00304 
00305  - L-serine dehydratase (EC 4.3.1.17) and D-serine  dehydratase  (EC 4.3.1.18)
00306    catalyze the dehydratation of L-serine (respectively D-serine) into ammonia
00307    and pyruvate.
00308  - Threonine dehydratase  (EC 4.3.1.19) (TDH) catalyzes  the  dehydratation of
00309    threonine into  alpha-ketobutarate  and  ammonia.  In Escherichia coli  and
00310    other microorganisms,  two  classes  of  TDH  are  known  to  exist. One is
00311    involved in  the  biosynthesis of isoleucine, the other in hydroxamino acid
00312    catabolism.
00313 
00314 Threonine synthase  (EC 4.2.3.1) is  also  a  pyridoxal-phosphate  enzyme,  it
00315 catalyzes the  transformation of  homoserine-phosphate into threonine.  It has
00316 been shown [3] that  threonine  synthase  is  distantly related to the serine/
00317 threonine dehydratases.
00318 
00319 In all these enzymes, the pyridoxal-phosphate group is  attached  to a  lysine
00320 residue.  The sequence around  this residue is sufficiently conserved to allow
00321 the derivation  of  a  pattern  specific  to serine/threonine dehydratases and
00322 threonine synthases.
00323 
00324 -Consensus pattern: [DESH]-x(4,5)-[STVG]-{EVKD}-[AS]-[FYI]-K-[DLIFSA]-[RLVMF]-
00325                     [GA]-[LIVMGA]
00326                     [The K is the pyridoxal-P attachment site]
00327 -Sequences known to belong to this class detected by the pattern: ALL.
00328 -Other sequence(s) detected in Swiss-Prot: 17.
00329 
00330 -Note: Some   bacterial L-serine dehydratases - such as those from Escherichia
00331  coli - are iron-sulfur proteins [4] and do not belong to this family.
00332 
00333 -Last update: December 2004 / Pattern and text revised.
00334 
00335 """)
00336 
00337         self.assertEqual(len(record.references), 4)
00338         self.assertEqual(record.references[ 0].number, "1")
00339         self.assertEqual(record.references[ 0].authors, "Ogawa H., Gomi T., Konishi K., Date T., Nakashima H., Nose K., Matsuda Y., Peraino C., Pitot H.C., Fujioka M.")
00340         self.assertEqual(record.references[ 0].citation, """\
00341 "Human liver serine dehydratase. cDNA cloning and sequence homology
00342 with hydroxyamino acid dehydratases from other sources."
00343 J. Biol. Chem. 264:15818-15823(1989).
00344 PubMed=2674117""")
00345         self.assertEqual(record.references[ 1].number, "2")
00346         self.assertEqual(record.references[ 1].authors, "Datta P., Goss T.J., Omnaas J.R., Patil R.V.")
00347         self.assertEqual(record.references[ 1].citation, """\
00348 "Covalent structure of biodegradative threonine dehydratase of
00349 Escherichia coli: homology with other dehydratases."
00350 Proc. Natl. Acad. Sci. U.S.A. 84:393-397(1987).
00351 PubMed=3540965""")
00352         self.assertEqual(record.references[ 2].number, "3")
00353         self.assertEqual(record.references[ 2].authors, "Parsot C.")
00354         self.assertEqual(record.references[ 2].citation, """\
00355 "Evolution of biosynthetic pathways: a common ancestor for threonine
00356 synthase, threonine dehydratase and D-serine dehydratase."
00357 EMBO J. 5:3013-3019(1986).
00358 PubMed=3098560""")
00359         self.assertEqual(record.references[ 3].number, "4")
00360         self.assertEqual(record.references[ 3].authors, "Grabowski R., Hofmeister A.E.M., Buckel W.")
00361         self.assertEqual(record.references[ 3].citation, """\
00362 "Bacterial L-serine dehydratases: a new family of enzymes containing
00363 iron-sulfur clusters."
00364 Trends Biochem. Sci. 18:297-300(1993).
00365 PubMed=8236444""")
00366 
00367     def test_read_pdoc00340(self):
00368         "Reading Prodoc record PDOC00340"
00369         filename = os.path.join( 'Prosite', 'Doc', 'pdoc00340.txt')
00370         handle = open(filename)
00371         record = Prodoc.read(handle)
00372         handle.close()
00373 
00374         self.assertEqual(record.accession, "PDOC00340")
00375         self.assertEqual(len(record.prosite_refs), 3)
00376         self.assertEqual(record.prosite_refs[0], ("PS00406", "ACTINS_1"))
00377         self.assertEqual(record.prosite_refs[1], ("PS00432", "ACTINS_2"))
00378         self.assertEqual(record.prosite_refs[2], ("PS01132", "ACTINS_ACT_LIKE"))
00379         self.assertEqual(record.text, """\
00380 *********************
00381 * Actins signatures *
00382 *********************
00383 
00384 Actins [1 to 4] are highly conserved contractile  proteins that are present in
00385 all eukaryotic cells. In vertebrates there are three groups of actin isoforms:
00386 alpha, beta and gamma.  The alpha actins are found in muscle tissues and are a
00387 major constituent of the contractile apparatus.  The beta and gamma actins co-
00388 exists in most cell  types as  components of the cytoskeleton and as mediators
00389 of internal cell motility.  In plants [5]  there  are  many isoforms which are
00390 probably involved  in  a  variety of  functions such as cytoplasmic streaming,
00391 cell shape determination,  tip growth,  graviperception, cell wall deposition,
00392 etc.
00393 
00394 Actin exists either in a monomeric form (G-actin) or in a polymerized form (F-
00395 actin). Each actin monomer  can  bind a molecule of ATP;  when  polymerization
00396 occurs, the ATP is hydrolyzed.
00397 
00398 Actin is a protein of from 374 to 379 amino acid  residues.  The  structure of
00399 actin has been highly conserved in the course of evolution.
00400 
00401 Recently some  divergent  actin-like  proteins have been identified in several
00402 species. These proteins are:
00403 
00404  - Centractin  (actin-RPV)  from mammals, fungi (yeast ACT5, Neurospora crassa
00405    ro-4) and  Pneumocystis  carinii  (actin-II).  Centractin  seems  to  be  a
00406    component of  a  multi-subunit  centrosomal complex involved in microtubule
00407    based vesicle motility. This subfamily is also known as ARP1.
00408  - ARP2  subfamily  which  includes  chicken ACTL, yeast ACT2, Drosophila 14D,
00409    C.elegans actC.
00410  - ARP3  subfamily  which includes actin 2 from mammals, Drosophila 66B, yeast
00411    ACT4 and fission yeast act2.
00412  - ARP4  subfamily  which includes yeast ACT3 and Drosophila 13E.
00413 
00414 We developed  three  signature  patterns. The first two are specific to actins
00415 and span  positions  54 to 64 and 357 to 365. The last signature picks up both
00416 actins and  the actin-like proteins and corresponds to positions 106 to 118 in
00417 actins.
00418 
00419 -Consensus pattern: [FY]-[LIV]-[GV]-[DE]-E-[ARV]-[QLAH]-x(1,2)-[RKQ](2)-[GD]
00420 -Sequences known to belong to this class detected by the pattern: ALL,  except
00421  for the actin-like proteins and 10 actins.
00422 -Other sequence(s) detected in Swiss-Prot: NONE.
00423 
00424 -Consensus pattern: W-[IVC]-[STAK]-[RK]-x-[DE]-Y-[DNE]-[DE]
00425 -Sequences known to belong to this class detected by the pattern: ALL,  except
00426  for the actin-like proteins and 9 actins.
00427 -Other sequence(s) detected in Swiss-Prot: NONE.
00428 
00429 -Consensus pattern: [LM]-[LIVMA]-T-E-[GAPQ]-x-[LIVMFYWHQPK]-[NS]-[PSTAQ]-x(2)-
00430                     N-[KR]
00431 -Sequences known to belong to this class detected by the pattern: ALL,  except
00432  for 5 actins.
00433 -Other sequence(s) detected in Swiss-Prot: NONE.
00434 
00435 -Last update: December 2004 / Patterns and text revised.
00436 
00437 """)
00438 
00439         self.assertEqual(len(record.references), 5)
00440         self.assertEqual(record.references[ 0].number, "1")
00441         self.assertEqual(record.references[ 0].authors, "Sheterline P., Clayton J., Sparrow J.C.")
00442         self.assertEqual(record.references[ 0].citation, '(In) Actins, 3rd Edition, Academic Press Ltd, London, (1996).')
00443         self.assertEqual(record.references[ 1].number, "2")
00444         self.assertEqual(record.references[ 1].authors, "Pollard T.D., Cooper J.A.")
00445         self.assertEqual(record.references[ 1].citation, 'Annu. Rev. Biochem. 55:987-1036(1986).')
00446         self.assertEqual(record.references[ 2].number, "3")
00447         self.assertEqual(record.references[ 2].authors, "Pollard T.D.")
00448         self.assertEqual(record.references[ 2].citation, """\
00449 "Actin."
00450 Curr. Opin. Cell Biol. 2:33-40(1990).
00451 PubMed=2183841""")
00452         self.assertEqual(record.references[ 3].number, "4")
00453         self.assertEqual(record.references[ 3].authors, "Rubenstein P.A.")
00454         self.assertEqual(record.references[ 3].citation, """\
00455 "The functional importance of multiple actin isoforms."
00456 BioEssays 12:309-315(1990).
00457 PubMed=2203335""")
00458         self.assertEqual(record.references[ 4].number, "5")
00459         self.assertEqual(record.references[ 4].authors, "Meagher R.B., McLean B.G.")
00460         self.assertEqual(record.references[ 4].citation, 'Cell Motil. Cytoskeleton 16:164-166(1990).')
00461 
00462     def test_read_pdoc00424(self):
00463         "Reading Prodoc record PDOC00424"
00464         filename = os.path.join( 'Prosite', 'Doc', 'pdoc00424.txt',)
00465         handle = open(filename)
00466         record = Prodoc.read(handle)
00467         handle.close()
00468 
00469         self.assertEqual(record.accession, "PDOC00424")
00470         self.assertEqual(len(record.prosite_refs), 1)
00471         self.assertEqual(record.prosite_refs[0], ("PS00488", "PAL_HISTIDASE"))
00472         self.assertEqual(record.text, """\
00473 **********************************************************
00474 * Phenylalanine and histidine ammonia-lyases active site *
00475 **********************************************************
00476 
00477 Phenylalanine ammonia-lyase (EC 4.3.1.5) (PAL) is  a  key  enzyme of plant and
00478 fungi  phenylpropanoid  metabolism  which is involved in the biosynthesis of a
00479 wide  variety  of secondary metabolites such  as  flavanoids,   furanocoumarin
00480 phytoalexins and  cell  wall  components.  These compounds have many important
00481 roles in plants during normal growth and in responses to environmental stress.
00482 PAL catalyzes  the  removal  of  an  ammonia  group from phenylalanine to form
00483 trans-cinnamate.
00484 
00485 Histidine ammonia-lyase (EC 4.3.1.3) (histidase)  catalyzes  the first step in
00486 histidine degradation, the removal of  an  ammonia  group  from  histidine  to
00487 produce urocanic acid.
00488 
00489 The two types of enzymes are functionally and  structurally related [1].  They
00490 are the only enzymes  which are known to have the modified amino acid dehydro-
00491 alanine (DHA) in their active site. A serine residue has been shown [2,3,4] to
00492 be the  precursor  of  this  essential electrophilic moiety. The region around
00493 this active  site  residue  is  well  conserved and can be used as a signature
00494 pattern.
00495 
00496 -Consensus pattern: [GS]-[STG]-[LIVM]-[STG]-[SAC]-S-G-[DH]-L-x-[PN]-L-[SA]-
00497                     x(2,3)-[SAGVTL]
00498                     [S is the active site residue]
00499 -Sequences known to belong to this class detected by the pattern: ALL.
00500 -Other sequence(s) detected in Swiss-Prot: NONE.
00501 -Last update: April 2006 / Pattern revised.
00502 
00503 """)
00504 
00505         self.assertEqual(len(record.references), 4)
00506         self.assertEqual(record.references[ 0].number, "1")
00507         self.assertEqual(record.references[ 0].authors, "Taylor R.G., Lambert M.A., Sexsmith E., Sadler S.J., Ray P.N., Mahuran D.J., McInnes R.R.")
00508         self.assertEqual(record.references[ 0].citation, """\
00509 "Cloning and expression of rat histidase. Homology to two bacterial
00510 histidases and four phenylalanine ammonia-lyases."
00511 J. Biol. Chem. 265:18192-18199(1990).
00512 PubMed=2120224""")
00513         self.assertEqual(record.references[ 1].number, "2")
00514         self.assertEqual(record.references[ 1].authors, "Langer M., Reck G., Reed J., Retey J.")
00515         self.assertEqual(record.references[ 1].citation, """\
00516 "Identification of serine-143 as the most likely precursor of
00517 dehydroalanine in the active site of histidine ammonia-lyase. A study
00518 of the overexpressed enzyme by site-directed mutagenesis."
00519 Biochemistry 33:6462-6467(1994).
00520 PubMed=8204579""")
00521         self.assertEqual(record.references[ 2].number, "3")
00522         self.assertEqual(record.references[ 2].authors, "Schuster B., Retey J.")
00523         self.assertEqual(record.references[ 2].citation, """\
00524 "Serine-202 is the putative precursor of the active site
00525 dehydroalanine of phenylalanine ammonia lyase. Site-directed
00526 mutagenesis studies on the enzyme from parsley (Petroselinum crispum
00527 L.)."
00528 FEBS Lett. 349:252-254(1994).
00529 PubMed=8050576""")
00530         self.assertEqual(record.references[ 3].number, "4")
00531         self.assertEqual(record.references[ 3].authors, "Taylor R.G., McInnes R.R.")
00532         self.assertEqual(record.references[ 3].citation, """\
00533 "Site-directed mutagenesis of conserved serines in rat histidase.
00534 Identification of serine 254 as an essential active site residue."
00535 J. Biol. Chem. 269:27473-27477(1994).
00536 PubMed=7961661""")
00537 
00538     def test_read_pdoc00472(self):
00539         "Reading Prodoc record PDOC00472"
00540         filename = os.path.join( 'Prosite', 'Doc', 'pdoc00472.txt')
00541         handle = open(filename)
00542         record = Prodoc.read(handle)
00543         handle.close()
00544 
00545         self.assertEqual(record.accession, "PDOC00472")
00546         self.assertEqual(len(record.prosite_refs), 1)
00547         self.assertEqual(record.prosite_refs[0], ("PS00546", "CYSTEINE_SWITCH"))
00548         self.assertEqual(record.text, """\
00549 *****************************
00550 * Matrixins cysteine switch *
00551 *****************************
00552 
00553 Mammalian extracellular matrix metalloproteinases (EC 3.4.24.-), also known as
00554 matrixins [1] (see <PDOC00129>), are zinc-dependent enzymes. They are secreted
00555 by cells  in an inactive form (zymogen) that differs from the mature enzyme by
00556 the presence  of  an  N-terminal propeptide. A highly conserved octapeptide is
00557 found two  residues  downstream  of the C-terminal end of the propeptide. This
00558 region has been shown to be  involved  in  autoinhibition  of matrixins [2,3];
00559 a cysteine  within the octapeptide chelates  the  active  site  zinc ion, thus
00560 inhibiting the  enzyme.  This  region has been called the 'cysteine switch' or
00561 'autoinhibitor region'.
00562 
00563 A cysteine switch has been found in the following zinc proteases:
00564 
00565  - MMP-1 (EC 3.4.24.7) (interstitial collagenase).
00566  - MMP-2 (EC 3.4.24.24) (72 Kd gelatinase).
00567  - MMP-3 (EC 3.4.24.17) (stromelysin-1).
00568  - MMP-7 (EC 3.4.24.23) (matrilysin).
00569  - MMP-8 (EC 3.4.24.34) (neutrophil collagenase).
00570  - MMP-9 (EC 3.4.24.35) (92 Kd gelatinase).
00571  - MMP-10 (EC 3.4.24.22) (stromelysin-2).
00572  - MMP-11 (EC 3.4.24.-) (stromelysin-3).
00573  - MMP-12 (EC 3.4.24.65) (macrophage metalloelastase).
00574  - MMP-13 (EC 3.4.24.-) (collagenase 3).
00575  - MMP-14 (EC 3.4.24.-) (membrane-type matrix metalliproteinase 1).
00576  - MMP-15 (EC 3.4.24.-) (membrane-type matrix metalliproteinase 2).
00577  - MMP-16 (EC 3.4.24.-) (membrane-type matrix metalliproteinase 3).
00578  - Sea urchin hatching enzyme (EC 3.4.24.12) (envelysin) [4].
00579  - Chlamydomonas reinhardtii gamete lytic enzyme (GLE) [5].
00580 
00581 -Consensus pattern: P-R-C-[GN]-x-P-[DR]-[LIVSAPKQ]
00582                     [C chelates the zinc ion]
00583 -Sequences known to belong to this class detected by the pattern: ALL,  except
00584  for cat MMP-7 and mouse MMP-11.
00585 -Other sequence(s) detected in Swiss-Prot: NONE.
00586 -Last update: November 1997 / Pattern and text revised.
00587 
00588 """)
00589 
00590         self.assertEqual(len(record.references), 5)
00591         self.assertEqual(record.references[ 0].number, "1")
00592         self.assertEqual(record.references[ 0].authors, "Woessner J.F. Jr.")
00593         self.assertEqual(record.references[ 0].citation, """\
00594 "Matrix metalloproteinases and their inhibitors in connective tissue
00595 remodeling."
00596 FASEB J. 5:2145-2154(1991).
00597 PubMed=1850705""")
00598         self.assertEqual(record.references[ 1].number, "2")
00599         self.assertEqual(record.references[ 1].authors, "Sanchez-Lopez R., Nicholson R., Gesnel M.C., Matrisian L.M., Breathnach R.")
00600         self.assertEqual(record.references[ 1].citation, 'J. Biol. Chem. 263:11892-11899(1988).')
00601         self.assertEqual(record.references[ 2].number, "3")
00602         self.assertEqual(record.references[ 2].authors, "Park A.J., Matrisian L.M., Kells A.F., Pearson R., Yuan Z.Y., Navre M.")
00603         self.assertEqual(record.references[ 2].citation, """\
00604 "Mutational analysis of the transin (rat stromelysin) autoinhibitor
00605 region demonstrates a role for residues surrounding the 'cysteine
00606 switch'."
00607 J. Biol. Chem. 266:1584-1590(1991).
00608 PubMed=1988438""")
00609         self.assertEqual(record.references[ 3].number, "4")
00610         self.assertEqual(record.references[ 3].authors, "Lepage T., Gache C.")
00611         self.assertEqual(record.references[ 3].citation, """\
00612 "Early expression of a collagenase-like hatching enzyme gene in the
00613 sea urchin embryo."
00614 EMBO J. 9:3003-3012(1990).
00615 PubMed=2167841""")
00616         self.assertEqual(record.references[ 4].number, "5")
00617         self.assertEqual(record.references[ 4].authors, "Kinoshita T., Fukuzawa H., Shimada T., Saito T., Matsuda Y.")
00618         self.assertEqual(record.references[ 4].citation, """\
00619 "Primary structure and expression of a gamete lytic enzyme in
00620 Chlamydomonas reinhardtii: similarity of functional domains to matrix
00621 metalloproteases."
00622 Proc. Natl. Acad. Sci. U.S.A. 89:4693-4697(1992).
00623 PubMed=1584806""")
00624 
00625     def test_read_pdoc00640(self):
00626         "Reading Prodoc record PDOC00640"
00627         filename = os.path.join( 'Prosite', 'Doc', 'pdoc00640.txt',)
00628         handle = open(filename)
00629         record = Prodoc.read(handle)
00630         handle.close()
00631 
00632         self.assertEqual(record.accession, "PDOC00640")
00633         self.assertEqual(len(record.prosite_refs), 1)
00634         self.assertEqual(record.prosite_refs[0], ("PS00812", "GLYCOSYL_HYDROL_F8"))
00635         self.assertEqual(record.text, """\
00636 ******************************************
00637 * Glycosyl hydrolases family 8 signature *
00638 ******************************************
00639 
00640 The microbial degradation  of cellulose and  xylans requires  several types of
00641 enzymes such as endoglucanases (EC 3.2.1.4),  cellobiohydrolases (EC 3.2.1.91)
00642 (exoglucanases), or xylanases (EC 3.2.1.8) [1,2].  Fungi and bacteria produces
00643 a spectrum of cellulolytic  enzymes (cellulases)  and  xylanases which, on the
00644 basis of sequence similarities,  can be classified into families. One of these
00645 families is known as the cellulase family D [3] or as  the glycosyl hydrolases
00646 family 8  [4,E1].  The  enzymes  which  are  currently known to belong to this
00647 family are listed below.
00648 
00649  - Acetobacter xylinum endonuclease cmcAX.
00650  - Bacillus strain KSM-330 acidic endonuclease K (Endo-K).
00651  - Cellulomonas josui endoglucanase 2 (celB).
00652  - Cellulomonas uda endoglucanase.
00653  - Clostridium cellulolyticum endoglucanases C (celcCC).
00654  - Clostridium thermocellum endoglucanases A (celA).
00655  - Erwinia chrysanthemi minor endoglucanase y (celY).
00656  - Bacillus circulans beta-glucanase (EC 3.2.1.73).
00657  - Escherichia coli hypothetical protein yhjM.
00658 
00659 The most conserved region in  these enzymes is  a stretch of about 20 residues
00660 that contains  two conserved aspartate. The first asparatate is thought [5] to
00661 act as the nucleophile in the catalytic mechanism. We have used this region as
00662 a signature pattern.
00663 
00664 -Consensus pattern: A-[ST]-D-[AG]-D-x(2)-[IM]-A-x-[SA]-[LIVM]-[LIVMG]-x-A-
00665                     x(3)-[FW]
00666                     [The first D is an active site residue]
00667 -Sequences known to belong to this class detected by the pattern: ALL.
00668 -Other sequence(s) detected in Swiss-Prot: NONE.
00669 
00670 -Expert(s) to contact by email:
00671            Henrissat B.; bernie@afmb.cnrs-mrs.fr
00672 
00673 -Last update: November 1997 / Text revised.
00674 
00675 """)
00676 
00677         self.assertEqual(len(record.references), 6)
00678         self.assertEqual(record.references[ 0].number, "1")
00679         self.assertEqual(record.references[ 0].authors, "Beguin P.")
00680         self.assertEqual(record.references[ 0].citation, """\
00681 "Molecular biology of cellulose degradation."
00682 Annu. Rev. Microbiol. 44:219-248(1990).
00683 PubMed=2252383; DOI=10.1146/annurev.mi.44.100190.001251""")
00684         self.assertEqual(record.references[ 1].number, "2")
00685         self.assertEqual(record.references[ 1].authors, "Gilkes N.R., Henrissat B., Kilburn D.G., Miller R.C. Jr., Warren R.A.J.")
00686         self.assertEqual(record.references[ 1].citation, """\
00687 "Domains in microbial beta-1, 4-glycanases: sequence conservation,
00688 function, and enzyme families."
00689 Microbiol. Rev. 55:303-315(1991).
00690 PubMed=1886523""")
00691         self.assertEqual(record.references[ 2].number, "3")
00692         self.assertEqual(record.references[ 2].authors, "Henrissat B., Claeyssens M., Tomme P., Lemesle L., Mornon J.-P.")
00693         self.assertEqual(record.references[ 2].citation, """\
00694 "Cellulase families revealed by hydrophobic cluster analysis."
00695 Gene 81:83-95(1989).
00696 PubMed=2806912""")
00697         self.assertEqual(record.references[ 3].number, "4")
00698         self.assertEqual(record.references[ 3].authors, "Henrissat B.")
00699         self.assertEqual(record.references[ 3].citation, """\
00700 "A classification of glycosyl hydrolases based on amino acid sequence
00701 similarities."
00702 Biochem. J. 280:309-316(1991).
00703 PubMed=1747104""")
00704         self.assertEqual(record.references[ 4].number, "5")
00705         self.assertEqual(record.references[ 4].authors, "Alzari P.M., Souchon H., Dominguez R.")
00706         self.assertEqual(record.references[ 4].citation, """\
00707 "The crystal structure of endoglucanase CelA, a family 8 glycosyl
00708 hydrolase from Clostridium thermocellum."
00709 Structure 4:265-275(1996).
00710 PubMed=8805535""")
00711         self.assertEqual(record.references[ 5].number, "E1")
00712         self.assertEqual(record.references[ 5].authors, "")
00713         self.assertEqual(record.references[ 5].citation, 'http://www.expasy.org/cgi-bin/lists?glycosid.txt')
00714 
00715     def test_read_pdoc00787(self):
00716         "Reading Prodoc record PDOC00787"
00717         filename = os.path.join( 'Prosite', 'Doc', 'pdoc00787.txt')
00718         handle = open(filename)
00719         record = Prodoc.read(handle)
00720         handle.close()
00721 
00722         self.assertEqual(record.accession, "PDOC00787")
00723         self.assertEqual(len(record.prosite_refs), 1)
00724         self.assertEqual(record.prosite_refs[0], ("PS01027", "GLYCOSYL_HYDROL_F39"))
00725         self.assertEqual(record.text, """\
00726 ******************************************************
00727 * Glycosyl hydrolases family 39 putative active site *
00728 ******************************************************
00729 
00730 It has  been  shown  [1,E1]  that  the  following  glycosyl  hydrolases can be
00731 classified into a single family on the basis of sequence similarities:
00732 
00733  - Mammalian lysosomal alpha-L-iduronidase (EC 3.2.1.76).
00734  - Caldocellum  saccharolyticum  and  Thermoanaerobacter saccharolyticum beta-
00735    xylosidase (EC 3.2.1.37) (gene xynB).
00736 
00737 The best  conserved  regions  in  these  enzymes is  located in the N-terminal
00738 section. It   contains  a  glutamic  acid  residue  which,  on  the  basis  of
00739 similarities with other  families of glycosyl hydrolases [2], probably acts as
00740 the proton donor in the catalytic mechanism. We use this region as a signature
00741 pattern.
00742 
00743 -Consensus pattern: W-x-F-E-x-W-N-E-P-[DN]
00744                     [The second E may be the active site residue]
00745 -Sequences known to belong to this class detected by the pattern: ALL.
00746 -Other sequence(s) detected in Swiss-Prot: NONE.
00747 
00748 -Expert(s) to contact by email:
00749            Henrissat B.; bernie@afmb.cnrs-mrs.fr
00750 
00751 -Last update: May 2004 / Text revised.
00752 
00753 """)
00754 
00755         self.assertEqual(len(record.references), 3)
00756         self.assertEqual(record.references[ 0].number, "1")
00757         self.assertEqual(record.references[ 0].authors, "Henrissat B., Bairoch A.")
00758         self.assertEqual(record.references[ 0].citation, """\
00759 "New families in the classification of glycosyl hydrolases based on
00760 amino acid sequence similarities."
00761 Biochem. J. 293:781-788(1993).
00762 PubMed=8352747""")
00763         self.assertEqual(record.references[ 1].number, "2")
00764         self.assertEqual(record.references[ 1].authors, "Henrissat B., Callebaut I., Fabrega S., Lehn P., Mornon J.-P., Davies G.")
00765         self.assertEqual(record.references[ 1].citation, """\
00766 "Conserved catalytic machinery and the prediction of a common fold for
00767 several families of glycosyl hydrolases."
00768 Proc. Natl. Acad. Sci. U.S.A. 92:7090-7094(1995).
00769 PubMed=7624375""")
00770         self.assertEqual(record.references[ 2].number, "E1")
00771         self.assertEqual(record.references[ 2].authors, '')
00772         self.assertEqual(record.references[ 2].citation, "http://www.expasy.org/cgi-bin/lists?glycosid.txt")
00773 
00774     def test_read_pdoc0933(self):
00775         "Reading Prodoc record PDOC00933"
00776         filename = os.path.join( 'Prosite', 'Doc', 'pdoc00933.txt')
00777         handle = open(filename)
00778         record = Prodoc.read(handle)
00779         handle.close()
00780 
00781         self.assertEqual(record.accession, "PDOC00933")
00782         self.assertEqual(len(record.prosite_refs), 1)
00783         self.assertEqual(record.prosite_refs[0], ("PS01213", "GLOBIN_FAM_2"))
00784         self.assertEqual(record.text, """\
00785 **********************************************
00786 * Protozoan/cyanobacterial globins signature *
00787 **********************************************
00788 
00789 Globins are heme-containing  proteins involved in  binding and/or transporting
00790 oxygen [1]. Almost all globins belong to a large family (see <PDOC00793>), the
00791 only exceptions  are  the  following proteins which form a family of their own
00792 [2,3,4]:
00793 
00794  - Monomeric  hemoglobins  from the protozoan Paramecium caudatum, Tetrahymena
00795    pyriformis and Tetrahymena thermophila.
00796  - Cyanoglobins  from  the  cyanobacteria Nostoc commune and Synechocystis PCC
00797    6803.
00798  - Globins  LI637  and  LI410  from  the chloroplast of the alga Chlamydomonas
00799    eugametos.
00800  - Mycobacterium tuberculosis globins glbN and glbO.
00801 
00802 These proteins  contain a conserved histidine which could be involved in heme-
00803 binding. As a signature pattern, we use a conserved region that ends with this
00804 residue.
00805 
00806 -Consensus pattern: F-[LF]-x(4)-[GE]-G-[PAT]-x(2)-[YW]-x-[GSE]-[KRQAE]-x(1,5)-
00807                     [LIVM]-x(3)-H
00808                     [The H may be a heme ligand]
00809 -Sequences known to belong to this class detected by the pattern: ALL.
00810 -Other sequence(s) detected in Swiss-Prot: NONE.
00811 -Last update: April 2006 / Pattern revised.
00812 
00813 """)
00814 
00815         self.assertEqual(len(record.references), 4)
00816         self.assertEqual(record.references[ 0].number, "1")
00817         self.assertEqual(record.references[ 0].authors, "Concise Encyclopedia Biochemistry, Second Edition, Walter de Gruyter, Berlin New-York (1988).")
00818         self.assertEqual(record.references[ 0].citation, '')
00819         self.assertEqual(record.references[ 1].number, "2")
00820         self.assertEqual(record.references[ 1].authors, "Takagi T.")
00821         self.assertEqual(record.references[ 1].citation, 'Curr. Opin. Struct. Biol. 3:413-418(1993).')
00822         self.assertEqual(record.references[ 2].number, "3")
00823         self.assertEqual(record.references[ 2].authors, "Couture M., Chamberland H., St-Pierre B., Lafontaine J., Guertin M.")
00824         self.assertEqual(record.references[ 2].citation, """\
00825 "Nuclear genes encoding chloroplast hemoglobins in the unicellular
00826 green alga Chlamydomonas eugametos."
00827 Mol. Gen. Genet. 243:185-197(1994).
00828 PubMed=8177215""")
00829         self.assertEqual(record.references[ 3].number, "4")
00830         self.assertEqual(record.references[ 3].authors, "Couture M., Das T.K., Savard P.Y., Ouellet Y., Wittenberg J.B., Wittenberg B.A., Rousseau D.L., Guertin M.")
00831         self.assertEqual(record.references[ 3].citation, """\
00832 "Structural investigations of the hemoglobin of the cyanobacterium
00833 Synechocystis PCC6803 reveal a unique distal heme pocket."
00834 Eur. J. Biochem. 267:4770-4780(2000).
00835 PubMed=10903511""")
00836 
00837 class TestProdocParse(unittest.TestCase):
00838 
00839     def test_parse_pdoc(self):
00840         "Parsing an excerpt of prosite.doc" 
00841         filename = os.path.join( 'Prosite', 'Doc', 'prosite.excerpt.doc')
00842         handle = open(filename)
00843         records = Prodoc.parse(handle)
00844 
00845         # Testing the first parsed record
00846         record = records.next()
00847         self.assertEqual(record.accession, "PDOC00000")
00848         self.assertEqual(len(record.prosite_refs), 0)
00849         self.assertEqual(record.text, """\
00850 **********************************
00851 *** PROSITE documentation file ***
00852 **********************************
00853 
00854 Release 20.43 of 10-Feb-2009.
00855 
00856 PROSITE is developed by the Swiss Institute of Bioinformatics (SIB) under
00857 the responsability of Amos Bairoch and Nicolas Hulo.
00858 
00859 This release was prepared by: Nicolas Hulo, Virginie Bulliard, Petra
00860 Langendijk-Genevaux and Christian Sigrist with the help of Edouard
00861 de Castro, Lorenzo Cerutti, Corinne Lachaize and Amos Bairoch.
00862 
00863 
00864 See: http://www.expasy.org/prosite/
00865 Email: prosite@expasy.org
00866 
00867 Acknowledgements:
00868 
00869  - To all those mentioned in this document who have reviewed the entry(ies)
00870    for which they are listed as experts. With specific thanks to Rein Aasland,
00871    Mark Boguski, Peer Bork, Josh Cherry, Andre Chollet, Frank Kolakowski,
00872    David Landsman, Bernard Henrissat, Eugene Koonin, Steve Henikoff, Manuel
00873    Peitsch and Jonathan Reizer.
00874  - Jim Apostolopoulos is the author of the PDOC00699 entry.
00875  - Brigitte Boeckmann is the author of the PDOC00691, PDOC00703, PDOC00829,
00876    PDOC00796, PDOC00798, PDOC00799, PDOC00906, PDOC00907, PDOC00908,
00877    PDOC00912, PDOC00913, PDOC00924, PDOC00928, PDOC00929, PDOC00955,
00878    PDOC00961, PDOC00966, PDOC00988 and PDOC50020 entries.
00879  - Jean-Louis Boulay is the author of the PDOC01051, PDOC01050, PDOC01052,
00880    PDOC01053 and PDOC01054 entries.
00881  - Ryszard Brzezinski is the author of the PDOC60000 entry.
00882  - Elisabeth Coudert is the author of the PDOC00373 entry.
00883  - Kirill Degtyarenko is the author of the PDOC60001 entry.
00884  - Christian Doerig is the author of the PDOC01049 entry.
00885  - Kay Hofmann is the author of the PDOC50003, PDOC50006, PDOC50007 and
00886    PDOC50017 entries.
00887  - Chantal Hulo is the author of the PDOC00987 entry.
00888  - Karine Michoud is the author of the PDOC01044 and PDOC01042 entries.
00889  - Yuri Panchin is the author of the PDOC51013 entry.
00890  - S. Ramakumar is the author of the PDOC51052, PDOC60004, PDOC60010,
00891    PDOC60011, PDOC60015, PDOC60016, PDOC60018, PDOC60020, PDOC60021,
00892    PDOC60022, PDOC60023, PDOC60024, PDOC60025, PDOC60026, PDOC60027,
00893    PDOC60028, PDOC60029 and PDOC60030 entries.
00894  - Keith Robison is the author of the PDOC00830 and PDOC00861 entries.
00895 
00896    ------------------------------------------------------------------------
00897    PROSITE is copyright.   It  is  produced  by  the  Swiss  Institute   of
00898    Bioinformatics (SIB). There are no restrictions on its use by non-profit
00899    institutions as long as its  content is in no way modified. Usage by and
00900    for commercial  entities requires a license agreement.   For information
00901    about  the  licensing  scheme   send  an  email to license@isb-sib.ch or
00902    see: http://www.expasy.org/prosite/prosite_license.htm.
00903    ------------------------------------------------------------------------
00904 
00905 """)
00906 
00907         # Testing the second parsed record"
00908         record = records.next()
00909         self.assertEqual(record.accession, "PDOC00001")
00910         self.assertEqual(len(record.prosite_refs), 1)
00911         self.assertEqual(record.prosite_refs[0], ("PS00001", "ASN_GLYCOSYLATION"))
00912         self.assertEqual(record.text, """\
00913 ************************
00914 * N-glycosylation site *
00915 ************************
00916 
00917 It has been known for a long time [1] that potential N-glycosylation sites are
00918 specific to the consensus sequence Asn-Xaa-Ser/Thr.  It must be noted that the
00919 presence of the consensus  tripeptide  is  not sufficient  to conclude that an
00920 asparagine residue is glycosylated, due to  the fact that the  folding of  the
00921 protein plays an important  role in the  regulation of N-glycosylation [2]. It
00922 has been shown [3] that  the  presence of proline between Asn and Ser/Thr will
00923 inhibit N-glycosylation; this  has  been confirmed by a recent [4] statistical
00924 analysis of glycosylation sites, which also  shows that about 50% of the sites
00925 that have a proline C-terminal to Ser/Thr are not glycosylated.
00926 
00927 It must also  be noted that there  are  a few  reported cases of glycosylation
00928 sites with the pattern Asn-Xaa-Cys; an  experimentally demonstrated occurrence
00929 of such a non-standard site is found in the plasma protein C [5].
00930 
00931 -Consensus pattern: N-{P}-[ST]-{P}
00932                     [N is the glycosylation site]
00933 -Last update: May 1991 / Text revised.
00934 
00935 """)
00936         self.assertEqual(record.references[ 0].number, "1")
00937         self.assertEqual(record.references[ 0].authors, "Marshall R.D.")
00938         self.assertEqual(record.references[ 0].citation, """\
00939 "Glycoproteins."
00940 Annu. Rev. Biochem. 41:673-702(1972).
00941 PubMed=4563441; DOI=10.1146/annurev.bi.41.070172.003325""")
00942         self.assertEqual(record.references[ 1].number, "2")
00943         self.assertEqual(record.references[ 1].authors, "Pless D.D., Lennarz W.J.")
00944         self.assertEqual(record.references[ 1].citation, """\
00945 "Enzymatic conversion of proteins to glycoproteins."
00946 Proc. Natl. Acad. Sci. U.S.A. 74:134-138(1977).
00947 PubMed=264667""")
00948         self.assertEqual(record.references[ 2].number, "3")
00949         self.assertEqual(record.references[ 2].authors, "Bause E.")
00950         self.assertEqual(record.references[ 2].citation, """\
00951 "Structural requirements of N-glycosylation of proteins. Studies with
00952 proline peptides as conformational probes."
00953 Biochem. J. 209:331-336(1983).
00954 PubMed=6847620""")
00955         self.assertEqual(record.references[ 3].number, "4")
00956         self.assertEqual(record.references[ 3].authors, "Gavel Y., von Heijne G.")
00957         self.assertEqual(record.references[ 3].citation, """\
00958 "Sequence differences between glycosylated and non-glycosylated
00959 Asn-X-Thr/Ser acceptor sites: implications for protein engineering."
00960 Protein Eng. 3:433-442(1990).
00961 PubMed=2349213""")
00962         self.assertEqual(record.references[ 4].number, "5")
00963         self.assertEqual(record.references[ 4].authors, "Miletich J.P., Broze G.J. Jr.")
00964         self.assertEqual(record.references[ 4].citation, """\
00965 "Beta protein C is not glycosylated at asparagine 329. The rate of
00966 translation may influence the frequency of usage at
00967 asparagine-X-cysteine sites."
00968 J. Biol. Chem. 265:11397-11404(1990).
00969 PubMed=1694179""")
00970 
00971         # Testing the third parsed record" 
00972         record = records.next()
00973         self.assertEqual(record.accession, "PDOC00004")
00974         self.assertEqual(len(record.prosite_refs), 1)
00975         self.assertEqual(record.prosite_refs[0], ("PS00004", "CAMP_PHOSPHO_SITE"))
00976         self.assertEqual(record.text, """\
00977 ****************************************************************
00978 * cAMP- and cGMP-dependent protein kinase phosphorylation site *
00979 ****************************************************************
00980 
00981 There has been a  number of studies  relative to the  specificity of cAMP- and
00982 cGMP-dependent protein kinases [1,2,3].  Both types of kinases appear to share
00983 a preference  for  the  phosphorylation  of serine or threonine residues found
00984 close to at least  two consecutive N-terminal  basic residues. It is important
00985 to note that there are quite a number of exceptions to this rule.
00986 
00987 -Consensus pattern: [RK](2)-x-[ST]
00988                     [S or T is the phosphorylation site]
00989 -Last update: June 1988 / First entry.
00990 
00991 """)
00992 
00993         self.assertEqual(record.references[ 0].number, "1")
00994         self.assertEqual(record.references[ 0].authors, "Fremisco J.R., Glass D.B., Krebs E.G.")
00995         self.assertEqual(record.references[ 0].citation, """\
00996 J. Biol. Chem. 255:4240-4245(1980).""")
00997         self.assertEqual(record.references[ 1].number, "2")
00998         self.assertEqual(record.references[ 1].authors, "Glass D.B., Smith S.B.")
00999         self.assertEqual(record.references[ 1].citation, """\
01000 "Phosphorylation by cyclic GMP-dependent protein kinase of a synthetic
01001 peptide corresponding to the autophosphorylation site in the enzyme."
01002 J. Biol. Chem. 258:14797-14803(1983).
01003 PubMed=6317673""")
01004         self.assertEqual(record.references[ 2].number, "3")
01005         self.assertEqual(record.references[ 2].authors, "Glass D.B., el-Maghrabi M.R., Pilkis S.J.")
01006         self.assertEqual(record.references[ 2].citation, """\
01007 "Synthetic peptides corresponding to the site phosphorylated in
01008 6-phosphofructo-2-kinase/fructose-2,6-bisphosphatase as substrates of
01009 cyclic nucleotide-dependent protein kinases."
01010 J. Biol. Chem. 261:2987-2993(1986).
01011 PubMed=3005275""")
01012 
01013         # Testing the fourth parsed record"
01014         record = records.next()
01015         self.assertEqual(record.accession, "PDOC60030")
01016         self.assertEqual(len(record.prosite_refs), 1)
01017         self.assertEqual(record.prosite_refs[0], ("PS60030", "BACTERIOCIN_IIA"))
01018         self.assertEqual(record.text, """\
01019 ******************************************
01020 * Bacteriocin class IIa family signature *
01021 ******************************************
01022 
01023 Many Gram-positive  bacteria  produce  ribosomally  synthesized  antimicrobial
01024 peptides, often  termed  bacteriocins. One important and well studied class of
01025 bacteriocins is the class IIa or pediocin-like bacteriocins produced by lactic
01026 acid bacteria.  All  class  IIa  bacteriocins  are produced by food-associated
01027 strains, isolated  from  a  variety of food products of industrial and natural
01028 origins, including  meat  products,  dairy  products and vegetables. Class IIa
01029 bacteriocins are all cationic, display anti-Listeria activity, and kill target
01030 cells by permeabilizing the cell membrane [1-3].
01031 
01032 Class IIa  bacteriocins  contain  between  37  and 48 residues. Based on their
01033 primary structures,  the  peptide  chains  of  class  IIa  bacteriocins may be
01034 divided roughly into two regions: a hydrophilic, cationic and highly conserved
01035 N-terminal region,  and  a  less  conserved hydrophobic/amphiphilic C-terminal
01036 region. The  N-terminal  region  contains  the conserved Y-G-N-G-V/L 'pediocin
01037 box' motif  and  two conserved cysteine residues joined by a disulfide bridge.
01038 It forms  a  three-stranded antiparallel beta-sheet supported by the conserved
01039 disulfide bridge  (see <PDB:1OG7>). This cationic N-terminal beta-sheet domain
01040 mediates binding of the class IIa bacteriocin to the target cell membrane. The
01041 C-terminal region forms a hairpin-like domain (see <PDB:1OG7>) that penetrates
01042 into the  hydrophobic  part  of  the  target  cell membrane, thereby mediating
01043 leakage through  the  membrane.  The  two domains are joined by a hinge, which
01044 enables movement of the domains relative to each other [2,3].
01045 
01046 Some proteins  known  to belong to the class IIa bacteriocin family are listed
01047 below:
01048 
01049  - Pediococcus acidilactici pediocin PA-1.
01050  - Leuconostoc mesenteroides mesentericin Y105.
01051  - Carnobacterium piscicola carnobacteriocin B2.
01052  - Lactobacillus sake sakacin P.
01053  - Enterococcus faecium enterocin A.
01054  - Enterococcus faecium enterocin P.
01055  - Leuconostoc gelidum leucocin A.
01056  - Lactobacillus curvatus curvacin A.
01057  - Listeria innocua listeriocin 743A.
01058 
01059 The pattern  we  developed  for  the  class  IIa bacteriocin family covers the
01060 'pediocin box' motif.
01061 
01062 -Conserved pattern: Y-G-N-G-[VL]-x-C-x(4)-C
01063 -Sequences known to belong to this class detected by the pattern: ALL.
01064 -Other sequence(s) detected in Swiss-Prot: NONE.
01065 
01066 -Expert(s) to contact by email:
01067            Ramakumar S.; ramak@physics.iisc.ernet.in
01068 
01069 -Last update: March 2006 / First entry.
01070 
01071 """)
01072 
01073         self.assertEqual(record.references[ 0].number, "1")
01074         self.assertEqual(record.references[ 0].authors, "Ennahar S., Sonomoto K., Ishizaki A.")
01075         self.assertEqual(record.references[ 0].citation, """\
01076 "Class IIa bacteriocins from lactic acid bacteria: antibacterial
01077 activity and food preservation."
01078 J. Biosci. Bioeng. 87:705-716(1999).
01079 PubMed=16232543""")
01080         self.assertEqual(record.references[ 1].number, "2")
01081         self.assertEqual(record.references[ 1].authors, "Johnsen L., Fimland G., Nissen-Meyer J.")
01082         self.assertEqual(record.references[ 1].citation, """\
01083 "The C-terminal domain of pediocin-like antimicrobial peptides (class
01084 IIa bacteriocins) is involved in specific recognition of the
01085 C-terminal part of cognate immunity proteins and in determining the
01086 antimicrobial spectrum."
01087 J. Biol. Chem. 280:9243-9250(2005).
01088 PubMed=15611086; DOI=10.1074/jbc.M412712200""")
01089         self.assertEqual(record.references[ 2].number, "3")
01090         self.assertEqual(record.references[ 2].authors, "Fimland G., Johnsen L., Dalhus B., Nissen-Meyer J.")
01091         self.assertEqual(record.references[ 2].citation, """\
01092 "Pediocin-like antimicrobial peptides (class IIa bacteriocins) and
01093 their immunity proteins: biosynthesis, structure, and mode of
01094 action."
01095 J. Pept. Sci. 11:688-696(2005).
01096 PubMed=16059970; DOI=10.1002/psc.699""")
01097 
01098 
01099 if __name__ == "__main__":
01100     runner = unittest.TextTestRunner(verbosity = 2)
01101     unittest.main(testRunner=runner)