Back to index

python-biopython  1.60
test_MMCIF.py
Go to the documentation of this file.
00001 # Copyright 2012 Lenna X. Peterson (arklenna@gmail.com).
00002 # All rights reserved.
00003 #
00004 # Tests adapted from test_PDB.py
00005 #
00006 # This code is part of the Biopython distribution and governed by its
00007 # license. Please see the LICENSE file that should have been included
00008 # as part of this package.
00009 
00010 """Unit tests for the MMCIF portion of the Bio.PDB module."""
00011 
00012 import os
00013 import tempfile
00014 import unittest
00015 import warnings
00016 
00017 try:
00018     import numpy
00019     from numpy import dot #Missing on PyPy's micronumpy
00020     del dot
00021 except ImportError:
00022     from Bio import MissingPythonDependencyError
00023     raise MissingPythonDependencyError(
00024         "Install NumPy if you want to use Bio.PDB.")
00025 
00026 try:
00027     import Bio.PDB.mmCIF.MMCIFlex
00028 except ImportError:
00029     from Bio import MissingPythonDependencyError
00030     raise MissingPythonDependencyError("C extension MMCIFlex not installed.")
00031 
00032 from Bio.Seq import Seq
00033 from Bio.Alphabet import generic_protein
00034 from Bio.PDB.PDBExceptions import PDBConstructionException, PDBConstructionWarning
00035 
00036 from Bio.PDB import PPBuilder, CaPPBuilder
00037 from Bio.PDB.MMCIFParser import MMCIFParser
00038 
00039 class ParseReal(unittest.TestCase):
00040     """Testing with real CIF file(s)."""
00041 
00042     def test_parser(self):
00043         """Extract polypeptides from 1A80."""
00044         parser = MMCIFParser()
00045         structure = parser.get_structure("example", "PDB/1A8O.cif")
00046         self.assertEqual(len(structure), 1)
00047         for ppbuild in [PPBuilder(), CaPPBuilder()]:
00048             #==========================================================
00049             # Check that serial_num (model column) is stored properly
00050             self.assertEqual(structure[0].serial_num, 1)
00051             #First try allowing non-standard amino acids,
00052             polypeptides = ppbuild.build_peptides(structure[0], False)
00053             self.assertEqual(len(polypeptides), 1)
00054             pp = polypeptides[0]
00055             # Check the start and end positions
00056             self.assertEqual(pp[0].get_id()[1], 151)
00057             self.assertEqual(pp[-1].get_id()[1], 220)
00058             # Check the sequence
00059             s = pp.get_sequence()
00060             self.assertTrue(isinstance(s, Seq))
00061             self.assertEqual(s.alphabet, generic_protein)
00062             #Here non-standard MSE are shown as M
00063             self.assertEqual("MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
00064                              "NANPDCKTILKALGPGATLEEMMTACQG", str(s))
00065             #==========================================================
00066             #Now try strict version with only standard amino acids
00067             #Should ignore MSE 151 at start, and then break the chain
00068             #at MSE 185, and MSE 214,215
00069             polypeptides = ppbuild.build_peptides(structure[0], True)
00070             self.assertEqual(len(polypeptides), 3)
00071             #First fragment
00072             pp = polypeptides[0]
00073             self.assertEqual(pp[0].get_id()[1], 152)
00074             self.assertEqual(pp[-1].get_id()[1], 184)
00075             s = pp.get_sequence()
00076             self.assertTrue(isinstance(s, Seq))
00077             self.assertEqual(s.alphabet, generic_protein)
00078             self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))
00079             #Second fragment
00080             pp = polypeptides[1]
00081             self.assertEqual(pp[0].get_id()[1], 186)
00082             self.assertEqual(pp[-1].get_id()[1], 213)
00083             s = pp.get_sequence()
00084             self.assertTrue(isinstance(s, Seq))
00085             self.assertEqual(s.alphabet, generic_protein)
00086             self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))
00087             #Third fragment
00088             pp = polypeptides[2]
00089             self.assertEqual(pp[0].get_id()[1], 216)
00090             self.assertEqual(pp[-1].get_id()[1], 220)
00091             s = pp.get_sequence()
00092             self.assertTrue(isinstance(s, Seq))
00093             self.assertEqual(s.alphabet, generic_protein)
00094             self.assertEqual("TACQG", str(s))
00095 
00096     def testModels(self):
00097         """Test file with multiple models"""
00098         parser = MMCIFParser()
00099         structure = parser.get_structure("example", "PDB/1LCD.cif")
00100         self.assertEqual(len(structure), 3)
00101         for ppbuild in [PPBuilder(), CaPPBuilder()]:
00102                 #==========================================================
00103                 # Check that serial_num (model column) is stored properly
00104                 self.assertEqual(structure[0].serial_num, 1)
00105                 self.assertEqual(structure[1].serial_num, 2)
00106                 self.assertEqual(structure[2].serial_num, 3)
00107                 #First try allowing non-standard amino acids,
00108                 polypeptides = ppbuild.build_peptides(structure[0], False)
00109                 self.assertEqual(len(polypeptides), 1)
00110                 pp = polypeptides[0]
00111                 # Check the start and end positions
00112                 self.assertEqual(pp[0].get_id()[1], 1)
00113                 self.assertEqual(pp[-1].get_id()[1], 51)
00114                 # Check the sequence
00115                 s = pp.get_sequence()
00116                 self.assertTrue(isinstance(s, Seq))
00117                 self.assertEqual(s.alphabet, generic_protein)
00118                 #Here non-standard MSE are shown as M
00119                 self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", 
00120                                  str(s))
00121                 #==========================================================
00122                 #Now try strict version with only standard amino acids
00123                 polypeptides = ppbuild.build_peptides(structure[0], True)
00124                 self.assertEqual(len(polypeptides), 1)
00125                 pp = polypeptides[0]
00126                 # Check the start and end positions
00127                 self.assertEqual(pp[0].get_id()[1], 1)
00128                 self.assertEqual(pp[-1].get_id()[1], 51)
00129                 # Check the sequence
00130                 s = pp.get_sequence()
00131                 self.assertTrue(isinstance(s, Seq))
00132                 self.assertEqual(s.alphabet, generic_protein)
00133                 self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR",
00134                                  str(s))
00135 
00136 if __name__ == '__main__':
00137     runner = unittest.TextTestRunner(verbosity=2)
00138     unittest.main(testRunner=runner)