Back to index

python-biopython  1.60
codeml.py
Go to the documentation of this file.
00001 # Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com)
00002 # This code is part of the Biopython distribution and governed by its
00003 # license. Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 import os
00007 import os.path
00008 from _paml import Paml, PamlError, _relpath
00009 import _parse_codeml
00010 
00011 #TODO - Restore use of with statement for closing handles automatically
00012 #after dropping Python 2.4
00013 
00014 class CodemlError(EnvironmentError):
00015     """CODEML has failed. Run with verbose = True to view CODEML's error
00016 message"""
00017 
00018 class Codeml(Paml):
00019     """This class implements an interface to CODEML, part of the PAML package."""
00020 
00021     def __init__(self, alignment = None, tree = None, working_dir = None,
00022                 out_file = None):
00023         """Initialize the codeml instance. 
00024         
00025         The user may optionally pass in strings specifying the locations
00026         of the input alignment and tree files, the working directory and
00027         the final output file. Other options found in the CODEML control
00028         have typical settings by default to run site class models 0, 1 and
00029         2 on a nucleotide alignment.
00030         """
00031         Paml.__init__(self, alignment, working_dir, out_file)
00032         if tree is not None:
00033             if not os.path.exists(tree):
00034                 raise IOError, "The specified tree file does not exist."
00035         self.tree = tree
00036         self.ctl_file = "codeml.ctl"
00037         self._options = {"noisy": None, 
00038                         "verbose": None, 
00039                         "runmode": None,
00040                         "seqtype": None, 
00041                         "CodonFreq": None, 
00042                         "ndata": None,
00043                         "clock": None, 
00044                         "aaDist": None,
00045                         "aaRatefile": None, 
00046                         "model": None,
00047                         "NSsites": None, 
00048                         "icode": None, 
00049                         "Mgene": None,
00050                         "fix_kappa": None, 
00051                         "kappa": None, 
00052                         "fix_omega": None,
00053                         "omega": None, 
00054                         "fix_alpha": None, 
00055                         "alpha": None,
00056                         "Malpha": None, 
00057                         "ncatG": None, 
00058                         "getSE": None,
00059                         "RateAncestor": None, 
00060                         "Small_Diff": None,
00061                         "cleandata": None, 
00062                         "fix_blength": None, 
00063                         "method": None,
00064                         "rho": None,
00065                         "fix_rho": None}
00066         
00067     def write_ctl_file(self):
00068         """Dynamically build a CODEML control file from the options.
00069         
00070         The control file is written to the location specified by the 
00071         ctl_file property of the codeml class.
00072         """
00073         # Make sure all paths are relative to the working directory
00074         self._set_rel_paths()
00075         if True: #Dummy statement to preserve indentation for diff
00076             ctl_handle = open(self.ctl_file, 'w')
00077             ctl_handle.write("seqfile = %s\n" % self._rel_alignment)
00078             ctl_handle.write("outfile = %s\n" % self._rel_out_file)
00079             ctl_handle.write("treefile = %s\n" % self._rel_tree)
00080             for option in self._options.items():
00081                 if option[1] == None:
00082                     # If an option has a value of None, there's no need
00083                     # to write it in the control file; it's normally just
00084                     # commented out.
00085                     continue
00086                 if option[0] == "NSsites":
00087                     # NSsites is stored in Python as a list but in the 
00088                     # control file it is specified as a series of numbers
00089                     # separated by spaces.
00090                     NSsites = " ".join([str(site) for site in option[1]])
00091                     ctl_handle.write("%s = %s\n" % (option[0], NSsites))
00092                 else:
00093                     ctl_handle.write("%s = %s\n" % (option[0], option[1]))
00094             ctl_handle.close()
00095     
00096     def read_ctl_file(self, ctl_file):
00097         """Parse a control file and load the options into the Codeml instance.
00098         """
00099         temp_options = {}
00100         if not os.path.isfile(ctl_file):
00101             raise IOError("File not found: %r" % ctl_file)
00102         else:
00103             ctl_handle = open(ctl_file)
00104             for line in ctl_handle:
00105                 line = line.strip()
00106                 uncommented = line.split("*",1)[0]
00107                 if uncommented != "":
00108                     if "=" not in uncommented:
00109                         ctl_handle.close()
00110                         raise AttributeError, \
00111                             "Malformed line in control file:\n%r" % line
00112                     (option, value) = uncommented.split("=")
00113                     option = option.strip()
00114                     value = value.strip()
00115                     if option == "seqfile":
00116                         self.alignment = value
00117                     elif option == "treefile":
00118                         self.tree = value
00119                     elif option == "outfile":
00120                         self.out_file = value
00121                     elif option == "NSsites":
00122                         site_classes = value.split(" ")
00123                         for n in range(len(site_classes)):
00124                             try:
00125                                 site_classes[n] = int(site_classes[n])
00126                             except:
00127                                 ctl_handle.close()
00128                                 raise TypeError, \
00129                                     "Invalid site class: %s" % site_classes[n]
00130                         temp_options["NSsites"] = site_classes
00131                     elif option not in self._options:
00132                         ctl_handle.close()
00133                         raise KeyError, "Invalid option: %s" % option
00134                     else:
00135                         if "." in value:
00136                             try:
00137                                 converted_value = float(value)
00138                             except:
00139                                 converted_value = value
00140                         else:
00141                             try:
00142                                 converted_value = int(value)
00143                             except:
00144                                 converted_value = value
00145                         temp_options[option] = converted_value
00146             ctl_handle.close()
00147         for option in self._options.keys():
00148             if option in temp_options.keys():
00149                 self._options[option] = temp_options[option]
00150             else:
00151                 self._options[option] = None
00152                             
00153     def print_options(self):
00154         """Print out all of the options and their current settings."""
00155         for option in self._options.items():
00156             if option[0] == "NSsites" and option[1] is not None:
00157                 # NSsites is stored in Python as a list but in the 
00158                 # control file it is specified as a series of numbers
00159                 # separated by spaces.
00160                 NSsites = " ".join([str(site) for site in option[1]])
00161                 print "%s = %s" % (option[0], NSsites)
00162             else:
00163                 print "%s = %s" % (option[0], option[1])
00164         
00165     def _set_rel_paths(self):
00166         """Convert all file/directory locations to paths relative to the current working directory.
00167         
00168         CODEML requires that all paths specified in the control file be
00169         relative to the directory from which it is called rather than 
00170         absolute paths.
00171         """
00172         Paml._set_rel_paths(self)
00173         if self.tree is not None:
00174             self._rel_tree = _relpath(self.tree, self.working_dir)
00175         
00176     def run(self, ctl_file = None, verbose = False, command = "codeml",
00177                 parse = True):
00178         """Run codeml using the current configuration and then parse the results. 
00179         
00180         Return a process signal so the user can determine if
00181         the execution was successful (return code 0 is successful, -N
00182         indicates a failure). The arguments may be passed as either 
00183         absolute or relative paths, despite the fact that CODEML 
00184         requires relative paths.
00185         """
00186         if self.tree is None:
00187             raise ValueError, "Tree file not specified."
00188         if not os.path.exists(self.tree):
00189             raise IOError, "The specified tree file does not exist."
00190         Paml.run(self, ctl_file, verbose, command)
00191         if parse:
00192             results = read(self.out_file)
00193         else:
00194             results = None
00195         return results        
00196 
00197 def read(results_file):
00198     """Parse a CODEML results file."""
00199     results = {}
00200     if not os.path.exists(results_file):
00201         raise IOError, "Results file does not exist."
00202     handle = open(results_file)
00203     lines = handle.readlines()
00204     handle.close()
00205     (results, multi_models, multi_genes) = _parse_codeml.parse_basics(lines, 
00206             results)
00207     results = _parse_codeml.parse_nssites(lines, results, multi_models, 
00208             multi_genes)
00209     results = _parse_codeml.parse_pairwise(lines, results)
00210     results = _parse_codeml.parse_distances(lines, results)
00211     if len(results) == 0:
00212         raise ValueError, "Invalid results file"
00213     return results