Back to index

python-biopython  1.60
xpktools.py
Go to the documentation of this file.
00001 # xpktools.py: A python module containing function definitions and classes
00002 #          useful for manipulating data from nmrview .xpk peaklist files.
00003 #
00004 # ********** INDEX of functions and classes **********
00005 #
00006 #    XpkEntry class: A class suited for handling single lines of
00007 #        non-header data from an nmrview .xpk file.  This class
00008 #        provides methods for extracting data by the field name
00009 #        which is listed in the last line of the peaklist header.
00010 
00011 import sys
00012 
00013 # * * * * * INITIALIZATIONS * * * * *
00014 HEADERLEN=6
00015 # * * * * * _______________ * * * * *
00016 
00017 class XpkEntry(object):
00018     # Usage: XpkEntry(xpkentry,xpkheadline) where xpkentry is the line
00019     #        from an nmrview .xpk file and xpkheadline is the line from
00020     #        the header file that gives the names of the entries
00021     #        which is typcially the sixth line of the header (counting fm 1)
00022     # Variables are accessed by either their name in the header line as in
00023     #   self.field["H1.P"] will return the H1.P entry for example.
00024     #   self.field["entrynum"] returns the line number (1st field of line)
00025 
00026     def __init__(self,entry,headline):
00027        self.fields={}   # Holds all fields from input line in a dictionary
00028                         # keys are data labels from the .xpk header 
00029        datlist  = entry.split()
00030        headlist = headline.split()
00031 
00032        i=0  
00033        for i in range(len(datlist)-1):
00034          self.fields[headlist[i]]=datlist[i+1]
00035        i=i+1
00036 
00037        try:
00038            self.fields["entrynum"]=datlist[0]
00039        except IndexError, e:
00040            pass
00041 
00042 class Peaklist(object):
00043     # This class reads in an entire xpk file and returns
00044     # Header file lines are available as attributes
00045     # The data lines are available as a list
00046     def __init__(self,infn):
00047     
00048         self.data=[]    # init the data line list
00049 
00050         infile=open(infn,'r')
00051 
00052         # Read in the header lines
00053         self.firstline=infile.readline().split("\012")[0]
00054         self.axislabels=infile.readline().split("\012")[0]
00055         self.dataset=infile.readline().split("\012")[0]
00056         self.sw=infile.readline().split("\012")[0]
00057         self.sf=infile.readline().split("\012")[0]
00058         self.datalabels=infile.readline().split("\012")[0]
00059 
00060         # Read in the data lines to a list 
00061         line=infile.readline()
00062         while line:
00063             self.data.append(line.split("\012")[0])
00064         line=infile.readline()
00065 
00066     def residue_dict(self,index):
00067         # Generate a dictionary idexed by residue number or a nucleus
00068         # The nucleus should be given as the input argument in the
00069         # same form as it appears in the xpk label line (H1, 15N for example)
00070 
00071         maxres=-1; minres=-1
00072 
00073         # Cast the data lines into the xpentry class
00074         self.dict={}
00075         for i in range(len(self.data)):
00076             line=self.data[i]
00077             ind=XpkEntry(line,self.datalabels).fields[index+".L"]
00078             key=ind.split(".")[0]
00079 
00080             res=int(key)
00081 
00082             if (maxres==-1):
00083                 maxres=res
00084             if (minres==-1):
00085                 minres=res
00086 
00087             maxres=max([maxres,res])
00088             minres=min([minres,res])
00089 
00090             if str(res) in self.dict:
00091                 # Append additional data to list under same key
00092                 templst=self.dict[str(res)]
00093                 templst.append(line)
00094                 self.dict[str(res)]=templst
00095 
00096             else:
00097                 # This is a new residue, start a new list
00098                 self.dict[str(res)]=[line]  # Use [] for list type
00099 
00100         self.dict["maxres"]=maxres
00101         self.dict["minres"]=minres
00102 
00103         return self.dict
00104 
00105     def write_header(self,outfn):
00106         outfile=_try_open_write(outfn)
00107         outfile.write(self.firstline);outfile.write("\012")
00108         outfile.write(self.axislabels);outfile.write("\012")
00109         outfile.write(self.dataset);outfile.write("\012")
00110         outfile.write(self.sw);outfile.write("\012")
00111         outfile.write(self.sf);outfile.write("\012")
00112         outfile.write(self.datalabels);outfile.write("\012")
00113         outfile.close() 
00114 
00115 def _try_open_read(fn):
00116 # Try to open a file for reading.  Exit on IOError
00117   try:
00118     infile=open(fn,'r')
00119   except IOError, e:
00120     print "file", fn, "could not be opened for reading - quitting."
00121     sys.exit(0)
00122   return infile
00123 
00124 def _try_open_write(fn):
00125 # Try to open a file for writing.  Exit on IOError
00126   try:
00127     infile=open(fn,'w')
00128   except IOError, e:
00129     print "file", fn, "could not be opened for writing - quitting."
00130     sys.exit(0)
00131   return infile
00132 
00133 
00134 def replace_entry(line,fieldn,newentry):
00135         # Replace an entry in a string by the field number
00136         # No padding is implemented currently.  Spacing will change if
00137         #  the original field entry and the new field entry are of
00138         #  different lengths.
00139         # This method depends on xpktools._find_start_entry
00140 
00141         start=_find_start_entry(line,fieldn)
00142         leng=len(line[start:].split()[0])
00143         newline=line[:start]+str(newentry)+line[(start+leng):]
00144         return newline
00145 
00146 def _find_start_entry(line,n):
00147         # find the starting point character for the n'th entry in
00148         # a space delimited line.  n is counted starting with 1
00149         # The n=1 field by definition begins at the first character
00150         # This function is used by replace_entry
00151 
00152         infield=0       # A flag that indicates that the counter is in a field
00153 
00154         if (n==1):
00155                 return 0        # Special case
00156 
00157         # Count the number of fields by counting spaces
00158         c=1
00159         leng=len(line)
00160 
00161         # Initialize variables according to whether the first character
00162         #  is a space or a character
00163         if (line[0]==" "):
00164                 infield=0
00165                 field=0
00166         else:
00167                 infield=1
00168                 field=1
00169 
00170 
00171         while (c<leng and field<n):
00172                 if (infield):
00173                         if (line[c]==" " and not (line[c-1]==" ")):
00174                                 infield=0
00175                 else:
00176                         if (not line[c]==" "):
00177                                 infield=1
00178                                 field=field+1
00179 
00180                 c=c+1
00181 
00182         return c-1
00183 
00184 
00185 def data_table(fn_list, datalabel, keyatom):
00186 # Generate and generate a data table from a list of
00187 # input xpk files <fn_list>.  The data element reported is
00188 # <datalabel> and the index for the data table is by the 
00189 # nucleus indicated by <keyatom>.
00190 
00191   outlist=[]
00192 
00193   [dict_list,label_line_list]=_read_dicts(fn_list,keyatom)
00194 
00195   # Find global max and min residue numbers
00196   minr=dict_list[0]["minres"]; maxr=dict_list[0]["maxres"]
00197  
00198   for dictionary in dict_list:
00199     if (maxr < dictionary["maxres"]):
00200       maxr = dictionary["maxres"]
00201     if (minr > dictionary["minres"]):
00202       minr = dictionary["minres"]
00203 
00204   res=minr
00205   while res <= maxr:        # s.t. res numbers
00206     count=0
00207     line=str(res)
00208     for dictionary in dict_list:      # s.t. dictionaries
00209       label=label_line_list[count]
00210       if str(res) in dictionary:
00211         line=line+"\t"+XpkEntry(dictionary[str(res)][0],label).fields[datalabel]
00212       else:
00213         line=line+"\t"+"*"
00214       count=count+1
00215     line=line+"\n"
00216     outlist.append(line)
00217     res=res+1
00218 
00219   return outlist
00220 
00221 def _sort_keys(dictionary):
00222   keys=dictionary.keys()
00223   sorted_keys=keys.sort()
00224   return sorted_keys
00225 
00226 def _read_dicts(fn_list, keyatom):
00227 # Read multiple files into a list of residue dictionaries
00228   dict_list=[]; datalabel_list=[]
00229   for fn in fn_list:
00230     peaklist=Peaklist(fn); dict=peaklist.residue_dict(keyatom)
00231     dict_list.append(dict)
00232     datalabel_list.append(peaklist.datalabels)
00233 
00234   return [dict_list, datalabel_list]