Back to index

python-biopython  1.60
StructureBuilder.py
Go to the documentation of this file.
00001 # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.  
00005 
00006 """Consumer class that builds a Structure object.
00007 
00008 This is used by the PDBParser and MMCIFparser classes.
00009 """
00010 
00011 import warnings
00012 
00013 # SMCRA hierarchy
00014 from Bio.PDB.Structure import Structure
00015 from Bio.PDB.Model import Model
00016 from Bio.PDB.Chain import Chain
00017 from Bio.PDB.Residue import Residue, DisorderedResidue
00018 from Bio.PDB.Atom import Atom, DisorderedAtom 
00019 
00020 from Bio.PDB.PDBExceptions import \
00021         PDBConstructionException, PDBConstructionWarning
00022 
00023 
00024 class StructureBuilder(object):
00025     """
00026     Deals with contructing the Structure object. The StructureBuilder class is used
00027     by the PDBParser classes to translate a file to a Structure object.
00028     """
00029     def __init__(self):
00030         self.line_counter=0
00031         self.header={}
00032 
00033     def _is_completely_disordered(self, residue):
00034         "Return 1 if all atoms in the residue have a non blank altloc."
00035         atom_list=residue.get_unpacked_list()
00036         for atom in atom_list:
00037             altloc=atom.get_altloc()
00038             if altloc==" ":
00039                 return 0
00040         return 1
00041 
00042     # Public methods called by the Parser classes
00043 
00044     def set_header(self, header):
00045         self.header=header
00046 
00047     def set_line_counter(self, line_counter):
00048         """
00049         The line counter keeps track of the line in the PDB file that 
00050         is being parsed.
00051         
00052         Arguments:
00053         o line_counter - int
00054         """
00055         self.line_counter=line_counter
00056 
00057     def init_structure(self, structure_id):
00058         """Initiate a new Structure object with given id.
00059 
00060         Arguments:
00061         o id - string
00062         """
00063         self.structure=Structure(structure_id)
00064 
00065     def init_model(self, model_id, serial_num = None):
00066         """Initiate a new Model object with given id.
00067         
00068         Arguments:
00069         o id - int
00070         o serial_num - int
00071         """
00072         self.model=Model(model_id,serial_num)
00073         self.structure.add(self.model)
00074 
00075     def init_chain(self, chain_id):
00076         """Initiate a new Chain object with given id.
00077 
00078         Arguments:
00079         o chain_id - string
00080         """
00081         if self.model.has_id(chain_id):
00082             self.chain=self.model[chain_id]
00083             warnings.warn("WARNING: Chain %s is discontinuous at line %i."
00084                           % (chain_id, self.line_counter),
00085                           PDBConstructionWarning)
00086         else:
00087             self.chain=Chain(chain_id)
00088             self.model.add(self.chain)
00089 
00090     def init_seg(self, segid):
00091         """Flag a change in segid.
00092         
00093         Arguments:
00094         o segid - string
00095         """
00096         self.segid=segid
00097 
00098     def init_residue(self, resname, field, resseq, icode):
00099         """
00100         Initiate a new Residue object.
00101 
00102         Arguments:
00103         o resname - string, e.g. "ASN"
00104         o field - hetero flag, "W" for waters, "H" for 
00105             hetero residues, otherwise blank.
00106         o resseq - int, sequence identifier
00107         o icode - string, insertion code
00108         """
00109         if field!=" ":
00110             if field=="H":
00111                 # The hetero field consists of H_ + the residue name (e.g. H_FUC)
00112                 field="H_"+resname 
00113         res_id=(field, resseq, icode) 
00114         if field==" ":
00115             if self.chain.has_id(res_id):
00116                 # There already is a residue with the id (field, resseq, icode).
00117                 # This only makes sense in the case of a point mutation.
00118                 warnings.warn("WARNING: Residue ('%s', %i, '%s') "
00119                               "redefined at line %i."
00120                               % (field, resseq, icode, self.line_counter),
00121                               PDBConstructionWarning)
00122                 duplicate_residue=self.chain[res_id]
00123                 if duplicate_residue.is_disordered()==2:
00124                     # The residue in the chain is a DisorderedResidue object.
00125                     # So just add the last Residue object. 
00126                     if duplicate_residue.disordered_has_id(resname):
00127                         # The residue was already made
00128                         self.residue=duplicate_residue
00129                         duplicate_residue.disordered_select(resname)
00130                     else:
00131                         # Make a new residue and add it to the already
00132                         # present DisorderedResidue
00133                         new_residue=Residue(res_id, resname, self.segid)
00134                         duplicate_residue.disordered_add(new_residue)
00135                         self.residue=duplicate_residue
00136                         return
00137                 else:
00138                     # Make a new DisorderedResidue object and put all
00139                     # the Residue objects with the id (field, resseq, icode) in it.
00140                     # These residues each should have non-blank altlocs for all their atoms.
00141                     # If not, the PDB file probably contains an error. 
00142                     if not self._is_completely_disordered(duplicate_residue):
00143                         # if this exception is ignored, a residue will be missing
00144                         self.residue=None
00145                         raise PDBConstructionException(\
00146                             "Blank altlocs in duplicate residue %s ('%s', %i, '%s')" \
00147                             % (resname, field, resseq, icode))
00148                     self.chain.detach_child(res_id)
00149                     new_residue=Residue(res_id, resname, self.segid)
00150                     disordered_residue=DisorderedResidue(res_id)
00151                     self.chain.add(disordered_residue)
00152                     disordered_residue.disordered_add(duplicate_residue)
00153                     disordered_residue.disordered_add(new_residue)
00154                     self.residue=disordered_residue
00155                     return
00156         residue=Residue(res_id, resname, self.segid)
00157         self.chain.add(residue)
00158         self.residue=residue
00159 
00160     def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
00161                   serial_number=None, element=None):
00162         """
00163         Initiate a new Atom object.
00164 
00165         Arguments:
00166         o name - string, atom name, e.g. CA, spaces should be stripped
00167         o coord - Numeric array (Float0, size 3), atomic coordinates
00168         o b_factor - float, B factor
00169         o occupancy - float
00170         o altloc - string, alternative location specifier
00171         o fullname - string, atom name including spaces, e.g. " CA "
00172         o element - string, upper case, e.g. "HG" for mercury
00173         """
00174         residue=self.residue
00175         # if residue is None, an exception was generated during
00176         # the construction of the residue
00177         if residue is None:
00178             return
00179         # First check if this atom is already present in the residue. 
00180         # If it is, it might be due to the fact that the two atoms have atom 
00181         # names that differ only in spaces (e.g. "CA.." and ".CA.",
00182         # where the dots are spaces). If that is so, use all spaces
00183         # in the atom name of the current atom. 
00184         if residue.has_id(name):
00185                 duplicate_atom=residue[name]
00186                 # atom name with spaces of duplicate atom
00187                 duplicate_fullname=duplicate_atom.get_fullname()
00188                 if duplicate_fullname!=fullname:
00189                     # name of current atom now includes spaces
00190                     name=fullname
00191                     warnings.warn("Atom names %r and %r differ "
00192                                   "only in spaces at line %i."
00193                                   % (duplicate_fullname, fullname,
00194                                      self.line_counter),
00195                                   PDBConstructionWarning)
00196         atom=self.atom=Atom(name, coord, b_factor, occupancy, altloc,
00197                             fullname, serial_number, element)
00198         if altloc!=" ":
00199             # The atom is disordered
00200             if residue.has_id(name):
00201                 # Residue already contains this atom
00202                 duplicate_atom=residue[name]
00203                 if duplicate_atom.is_disordered()==2:
00204                     duplicate_atom.disordered_add(atom)     
00205                 else:
00206                     # This is an error in the PDB file:
00207                     # a disordered atom is found with a blank altloc
00208                     # Detach the duplicate atom, and put it in a 
00209                     # DisorderedAtom object together with the current 
00210                     # atom.
00211                     residue.detach_child(name)
00212                     disordered_atom=DisorderedAtom(name)
00213                     residue.add(disordered_atom)
00214                     disordered_atom.disordered_add(atom)
00215                     disordered_atom.disordered_add(duplicate_atom)
00216                     residue.flag_disordered()
00217                     warnings.warn("WARNING: disordered atom found "
00218                                   "with blank altloc before line %i.\n"
00219                                   % self.line_counter,
00220                                   PDBConstructionWarning)
00221             else:
00222                 # The residue does not contain this disordered atom
00223                 # so we create a new one.
00224                 disordered_atom=DisorderedAtom(name)
00225                 residue.add(disordered_atom)
00226                 # Add the real atom to the disordered atom, and the 
00227                 # disordered atom to the residue
00228                 disordered_atom.disordered_add(atom)
00229                 residue.flag_disordered()
00230         else:   
00231             # The atom is not disordered
00232             residue.add(atom)
00233 
00234     def set_anisou(self, anisou_array):
00235         "Set anisotropic B factor of current Atom."
00236         self.atom.set_anisou(anisou_array)
00237 
00238     def set_siguij(self, siguij_array):
00239         "Set standard deviation of anisotropic B factor of current Atom."
00240         self.atom.set_siguij(siguij_array)
00241 
00242     def set_sigatm(self, sigatm_array):
00243         "Set standard deviation of atom position of current Atom."
00244         self.atom.set_sigatm(sigatm_array)
00245 
00246     def get_structure(self):
00247         "Return the structure."
00248         # first sort everything
00249         # self.structure.sort()
00250         # Add the header dict
00251         self.structure.header=self.header
00252         return self.structure
00253 
00254     def set_symmetry(self, spacegroup, cell):
00255         pass
00256 
00257