Back to index

python-biopython  1.60
MMCIFParser.py
Go to the documentation of this file.
00001 # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """mmCIF parser (partly implemented in C)."""
00007 
00008 from string import ascii_letters
00009 
00010 import numpy
00011 
00012 from Bio.PDB.MMCIF2Dict import MMCIF2Dict
00013 from Bio.PDB.StructureBuilder import StructureBuilder
00014 from Bio.PDB.PDBExceptions import \
00015         PDBConstructionException, PDBConstructionWarning
00016 
00017 
00018 class MMCIFParser(object):
00019     def get_structure(self, structure_id, filename):
00020         self._mmcif_dict=MMCIF2Dict(filename)
00021         self._structure_builder=StructureBuilder()
00022         self._build_structure(structure_id)
00023         return self._structure_builder.get_structure()
00024 
00025     def _build_structure(self, structure_id):
00026         mmcif_dict=self._mmcif_dict
00027         atom_id_list=mmcif_dict["_atom_site.label_atom_id"]
00028         residue_id_list=mmcif_dict["_atom_site.label_comp_id"]
00029         try:
00030             element_list = mmcif_dict["_atom_site.type_symbol"]
00031         except KeyError:
00032             element_list = None
00033         seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
00034         chain_id_list=mmcif_dict["_atom_site.label_asym_id"]
00035         x_list=map(float, mmcif_dict["_atom_site.Cartn_x"])
00036         y_list=map(float, mmcif_dict["_atom_site.Cartn_y"])
00037         z_list=map(float, mmcif_dict["_atom_site.Cartn_z"])
00038         alt_list=mmcif_dict["_atom_site.label_alt_id"]
00039         b_factor_list=mmcif_dict["_atom_site.B_iso_or_equiv"]
00040         occupancy_list=mmcif_dict["_atom_site.occupancy"]
00041         fieldname_list=mmcif_dict["_atom_site.group_PDB"]
00042         try:
00043             serial_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]]
00044         except KeyError:
00045             # No model number column
00046             serial_list = None
00047         except ValueError:
00048             # Invalid model number (malformed file)
00049             raise PDBConstructionException("Invalid model number")
00050         try:
00051             aniso_u11=mmcif_dict["_atom_site.aniso_U[1][1]"]
00052             aniso_u12=mmcif_dict["_atom_site.aniso_U[1][2]"]
00053             aniso_u13=mmcif_dict["_atom_site.aniso_U[1][3]"]
00054             aniso_u22=mmcif_dict["_atom_site.aniso_U[2][2]"]
00055             aniso_u23=mmcif_dict["_atom_site.aniso_U[2][3]"]
00056             aniso_u33=mmcif_dict["_atom_site.aniso_U[3][3]"]
00057             aniso_flag=1
00058         except KeyError:
00059             # no anisotropic B factors
00060             aniso_flag=0
00061         # if auth_seq_id is present, we use this.
00062         # Otherwise label_seq_id is used.
00063         if "_atom_site.auth_seq_id" in mmcif_dict:
00064             seq_id_list=mmcif_dict["_atom_site.auth_seq_id"]
00065         else:
00066             seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
00067         # Now loop over atoms and build the structure
00068         current_chain_id=None
00069         current_residue_id=None
00070         structure_builder=self._structure_builder
00071         structure_builder.init_structure(structure_id)
00072         structure_builder.init_seg(" ")
00073         # Historically, Biopython PDB parser uses model_id to mean array index
00074         # so serial_id means the Model ID specified in the file
00075         current_model_id = 0
00076         current_serial_id = 0 
00077         for i in xrange(0, len(atom_id_list)):
00078             x=x_list[i]
00079             y=y_list[i]
00080             z=z_list[i]
00081             resname=residue_id_list[i]
00082             chainid=chain_id_list[i]
00083             altloc=alt_list[i]
00084             if altloc==".":
00085                 altloc=" "
00086             resseq=seq_id_list[i]
00087             name=atom_id_list[i]
00088             tempfactor=b_factor_list[i]
00089             occupancy=occupancy_list[i]
00090             fieldname=fieldname_list[i]
00091             if fieldname=="HETATM":
00092                 hetatm_flag="H"
00093             else:
00094                 hetatm_flag=" "
00095             if serial_list is not None:
00096                 # model column exists; use it
00097                 serial_id = serial_list[i]
00098                 if current_serial_id != serial_id:
00099                     # if serial changes, update it and start new model
00100                     current_serial_id = serial_id
00101                     structure_builder.init_model(current_model_id, current_serial_id)
00102                     current_model_id += 1
00103             else:
00104                 # no explicit model column; initialize single model
00105                 structure_builder.init_model(current_model_id)
00106             if current_chain_id!=chainid:
00107                 current_chain_id=chainid
00108                 structure_builder.init_chain(current_chain_id)
00109                 current_residue_id=resseq
00110                 icode, int_resseq=self._get_icode(resseq)
00111                 structure_builder.init_residue(resname, hetatm_flag, int_resseq, 
00112                     icode)
00113             elif current_residue_id!=resseq:
00114                 current_residue_id=resseq
00115                 icode, int_resseq=self._get_icode(resseq)
00116                 structure_builder.init_residue(resname, hetatm_flag, int_resseq, 
00117                     icode)
00118             coord=numpy.array((x, y, z), 'f')  
00119             element = element_list[i] if element_list else None
00120             structure_builder.init_atom(name, coord, tempfactor, occupancy, altloc,
00121                 name, element=element)   
00122             if aniso_flag==1:
00123                 u=(aniso_u11[i], aniso_u12[i], aniso_u13[i],
00124                     aniso_u22[i], aniso_u23[i], aniso_u33[i])
00125                 mapped_anisou=map(float, u)
00126                 anisou_array=numpy.array(mapped_anisou, 'f')
00127                 structure_builder.set_anisou(anisou_array)
00128         # Now try to set the cell
00129         try:
00130             a=float(mmcif_dict["_cell.length_a"])
00131             b=float(mmcif_dict["_cell.length_b"])
00132             c=float(mmcif_dict["_cell.length_c"])
00133             alpha=float(mmcif_dict["_cell.angle_alpha"])
00134             beta=float(mmcif_dict["_cell.angle_beta"])
00135             gamma=float(mmcif_dict["_cell.angle_gamma"])
00136             cell=numpy.array((a, b, c, alpha, beta, gamma), 'f')
00137             spacegroup=mmcif_dict["_symmetry.space_group_name_H-M"]
00138             spacegroup=spacegroup[1:-1] # get rid of quotes!!
00139             if spacegroup==None:
00140                 raise Exception
00141             structure_builder.set_symmetry(spacegroup, cell)
00142         except:
00143             pass    # no cell found, so just ignore
00144 
00145     def _get_icode(self, resseq):           
00146         """Tries to return the icode. In MMCIF files this is just part of
00147         resseq! In PDB files, it's a separate field."""
00148         last_resseq_char=resseq[-1]
00149         if last_resseq_char in ascii_letters:
00150             icode=last_resseq_char
00151             int_resseq=int(resseq[0:-1])
00152         else:
00153             icode=" "
00154             int_resseq=int(resseq)
00155         return icode, int_resseq    
00156 
00157 
00158 if __name__=="__main__":
00159     import sys
00160 
00161     if len(sys.argv) != 2:
00162         print "Usage: python MMCIFparser.py filename"
00163         raise SystemExit
00164     filename=sys.argv[1]
00165 
00166     p=MMCIFParser()
00167 
00168     structure=p.get_structure("test", filename)
00169 
00170     for model in structure.get_list():
00171         print model
00172         for chain in model.get_list():
00173             print chain
00174             print "Found %d residues." % len(chain.get_list())
00175