Back to index

python-biopython  1.60
MMCIF2Dict.py
Go to the documentation of this file.
00001 # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """Turn an mmCIF file into a dictionary."""
00007 
00008 import os.path
00009 import warnings
00010 import Bio.PDB.mmCIF.MMCIFlex as MMCIFlex
00011 
00012 
00013 class MMCIF2Dict(dict):
00014     # The token identifiers
00015     NAME=1
00016     LOOP=2
00017     DATA=3
00018     SEMICOLONS=4    
00019     DOUBLEQUOTED=5
00020     QUOTED=6
00021     SIMPLE=7
00022 
00023     def __init__(self, filename):
00024         if not os.path.isfile(filename):
00025             raise IOError("File not found.")
00026         MMCIFlex.open_file(filename)
00027         dict.__init__(self, **self._make_mmcif_dict())
00028         MMCIFlex.close_file()
00029 
00030     def _make_mmcif_dict(self): 
00031         """
00032         Loop through PLY token (type, value) pairs, return a dict.
00033 
00034         """
00035         # this dict will contain the name/data pairs 
00036         mmcif_dict = {}
00037         # entry for garbage
00038         mmcif_dict[None] = []
00039         # local copies
00040         NAME=self.NAME
00041         LOOP=self.LOOP
00042         DATA=self.DATA
00043         SEMICOLONS=self.SEMICOLONS
00044         DOUBLEQUOTED=self.DOUBLEQUOTED
00045         QUOTED=self.QUOTED
00046         SIMPLE=self.SIMPLE
00047         get_token=MMCIFlex.get_token
00048         # are we looping?
00049         loop_flag=0
00050         # list of names in loop
00051         temp_list=[]
00052         # last encountered name
00053         current_name=None
00054         # get first token/value pair
00055         token, value=get_token()
00056         # print token, value
00057         # loop until EOF (token==0)
00058         while token:
00059             if token==NAME:
00060                 if loop_flag:
00061                     # Make lists for all the names in the loop
00062                     while token==NAME:
00063                         # create  a list for each name encountered in loop
00064                         new_list=mmcif_dict[value]=[]
00065                         temp_list.append(new_list)
00066                         token, value=get_token()  
00067                         # print token, value
00068                     loop_flag=0         
00069                     # nr of data items parsed
00070                     data_counter=0
00071                     # corresponding data name
00072                     pos=0
00073                     nr_fields=len(temp_list)
00074                     # Now fill all lists with the data
00075                     while token>3:
00076                         pos=data_counter%nr_fields
00077                         data_counter=data_counter+1
00078                         temp_list[pos].append(value)
00079                         token, value=get_token()  
00080                         # print token, value
00081                     if pos!=nr_fields-1:
00082                         warnings.warn("ERROR: broken name-data pair "
00083                                       "(data missing)!", RuntimeWarning)
00084                     # The last token was not used, so
00085                     # don't set token to None! (this means the 
00086                     # last parsed token goes through the loop again)
00087                 else:   
00088                     # simple name-data pair (no loop)
00089                     # so next token should be the data
00090                     next_token, data=get_token()  
00091                     # print token, value
00092                     mmcif_dict[value]=data
00093                     if next_token<4:
00094                         warnings.warn("ERROR: broken name-data pair "
00095                                       "(name-non data pair)!", RuntimeWarning)
00096                         # print token, value
00097                     else:   
00098                         # get next token
00099                         token=None
00100             elif token==LOOP:
00101                 loop_flag=1
00102                 temp_list=[]
00103                 # get next token
00104                 token=None
00105             elif token==DATA:
00106                 mmcif_dict[value[0:5]]=value[5:]
00107                 token=None
00108             else:
00109                 # we found some complete garbage
00110                 warnings.warn("ERROR: broken name-data pair "
00111                               "(missing name)!\n%s %s" % (token, value),
00112                               RuntimeWarning)
00113                 mmcif_dict[None].append(value)
00114                 # get next token
00115                 token=None
00116             if token==None:
00117                 token, value=get_token()
00118                 # print token, value
00119         return mmcif_dict
00120 
00121 
00122 if __name__=="__main__":
00123 
00124     import sys
00125 
00126     if len(sys.argv)!=2:
00127         print "Usage: python MMCIF2Dict filename."
00128 
00129     filename=sys.argv[1]    
00130 
00131     mmcif_dict = MMCIF2Dict(filename)
00132 
00133     entry = ""
00134     print "Now type a key ('q' to end, 'k' for a list of all keys):"
00135     while(entry != "q"):
00136         entry = raw_input("MMCIF dictionary key ==> ")    
00137         if entry == "q":
00138             sys.exit()
00139         if entry == "k":
00140             for key in mmcif_dict:
00141                 print key
00142             continue
00143         try:
00144             value=mmcif_dict[entry]
00145             if type(value)==type([]):
00146                 for item in value:
00147                     print item
00148             else:
00149                 print value
00150         except KeyError:
00151             print "No such key found."
00152