Back to index

python-biopython  1.60
Residues.py
Go to the documentation of this file.
00001 # Copyright 2000 by Jeffrey Chang.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 # Gavin E. Crooks 2001-11-03
00007 # Minor extensions, some bug fixes, and major changes to the interface 
00008 
00009 import re
00010 
00011 """A collection of residues from a PDB structure."""
00012 
00013 _pdbid_re = re.compile(r"^(\w\w\w\w)(?:$|\s+|_)(.*)")
00014 _fragment_re = re.compile(r"\(?(\w:)?(-?\w*)-?(-?\w*)\)?(.*)")
00015 
00016 class Residues(object):
00017     """A collection of residues from a PDB structure.
00018 
00019     This class provides code to work with SCOP domain definitions. These
00020     are concisely expressed as a one or more chain fragments. For example,
00021     "(1bba A:10-20,B:)" indicates residue 10 through 20 (inclusive) of
00022     chain A, and every residue of chain B in the pdb structure 1bba. The pdb
00023     id and brackets are optional. In addition "-" indicates every residue of
00024     a pbd structure with one unnamed chain.
00025 
00026     Start and end residue ids consist of the residue sequence number and an
00027     optional single letter insertion code. e.g. "12", "-1", "1a", "1000"
00028 
00029 
00030     pdbid -- An optional PDB id, e.g. "1bba"
00031 
00032     fragments -- A sequence of tuples (chainID, startResID, endResID)
00033     
00034     """
00035 
00036 
00037     def __init__(self, str=None):
00038         self.pdbid = ''
00039         self.fragments = ()
00040         if str is not None : self._parse(str)
00041 
00042 
00043     def _parse(self, str):
00044         str = str.strip()
00045 
00046         #Is there a pdbid at the front? e.g. 1bba A:1-100
00047         m = _pdbid_re.match(str)
00048         if m is not None:
00049             self.pdbid = m.group(1)
00050             str = m.group(2) # Everything else
00051 
00052         if str=='' or str == '-' or str=='(-)':  # no fragments, whole sequence
00053             return
00054     
00055         fragments = []
00056         for l in str.split(","):
00057             m = _fragment_re.match(l)
00058             if m is None:
00059                 raise ValueError("I don't understand the format of %s" % l)
00060             chain, start, end, postfix = m.groups()
00061 
00062             if postfix != "":
00063                  raise ValueError("I don't understand the format of %s" % l)
00064 
00065             if chain:
00066                 if chain[-1] != ':':
00067                     raise ValueError("I don't understand the chain in %s" % l)
00068                 chain = chain[:-1]   # chop off the ':'
00069             else:
00070                 chain ="" 
00071             
00072             fragments.append((chain, start, end))
00073         self.fragments = tuple(fragments)
00074             
00075     def __str__(self):
00076         prefix =""
00077         if self.pdbid:
00078             prefix =self.pdbid +' '
00079             
00080         if not self.fragments: return prefix+'-'
00081         strs = []
00082         for chain, start, end in self.fragments:
00083             s = []
00084             if chain: s.append("%s:" % chain)
00085             if start: s.append("%s-%s" % (start, end))
00086             strs.append("".join(s))
00087         return prefix+ ",".join(strs)
00088 
00089 
00090 
00091 
00092 
00093 
00094 
00095