Back to index

python-biopython  1.60
baseml.py
Go to the documentation of this file.
00001 # Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com)
00002 # This code is part of the Biopython distribution and governed by its
00003 # license. Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 import os
00007 import os.path
00008 from _paml import Paml, PamlError, _relpath
00009 import _parse_baseml
00010 
00011 #TODO - Restore use of with statement for closing handles automatically
00012 #after dropping Python 2.4
00013 
00014 class BasemlError(EnvironmentError):
00015     """BASEML has failed. Run with verbose = True to view BASEML's error
00016 message"""
00017 
00018 class Baseml(Paml):
00019     """This class implements an interface to BASEML, part of the PAML package."""
00020 
00021     def __init__(self, alignment = None, tree = None, working_dir = None,
00022                 out_file = None):
00023         """Initialize the Baseml instance. 
00024         
00025         The user may optionally pass in strings specifying the locations
00026         of the input alignment and tree files, the working directory and
00027         the final output file. 
00028         """
00029         Paml.__init__(self, alignment, working_dir, out_file)
00030         if tree is not None:
00031             if not os.path.exists(tree):
00032                 raise IOError, "The specified tree file does not exist."
00033         self.tree = tree
00034         self.ctl_file = "baseml.ctl"
00035         self._options = {"noisy": None,
00036                         "verbose": None,
00037                         "runmode": None,
00038                         "model": None,
00039                         "model_options": None,
00040                         "Mgene": None,
00041                         "ndata": None,
00042                         "clock": None,
00043                         "fix_kappa": None,
00044                         "kappa": None,
00045                         "fix_alpha": None,
00046                         "alpha": None,
00047                         "Malpha": None,
00048                         "ncatG": None,
00049                         "fix_rho": None,
00050                         "rho": None,
00051                         "nparK": None,
00052                         "nhomo": None,
00053                         "getSE": None,
00054                         "RateAncestor": None,
00055                         "Small_Diff": None,
00056                         "cleandata": None,
00057                         "icode": None,
00058                         "fix_blength": None,
00059                         "method": None}
00060 
00061     def write_ctl_file(self):
00062         """Dynamically build a BASEML control file from the options.
00063 
00064         The control file is written to the location specified by the 
00065         ctl_file property of the baseml class.
00066         """
00067         # Make sure all paths are relative to the working directory
00068         self._set_rel_paths()
00069         if True: #Dummy statement to preserve indentation for diff
00070             ctl_handle = open(self.ctl_file, 'w')
00071             ctl_handle.write("seqfile = %s\n" % self._rel_alignment)
00072             ctl_handle.write("outfile = %s\n" % self._rel_out_file)
00073             ctl_handle.write("treefile = %s\n" % self._rel_tree)
00074             for option in self._options.items():
00075                 if option[1] == None:
00076                     # If an option has a value of None, there's no need
00077                     # to write it in the control file; it's normally just
00078                     # commented out.
00079                     continue
00080                 if option[0] == "model_options":
00081                     continue
00082                 # If "model" is 9 or 10, it may be followed in the cotnrol
00083                 # file by further options such as
00084                 # [1 (TC CT AG GA)]
00085                 # or
00086                 # [5 (AC CA) (AG GA) (AT TA) (CG GC) (CT TC)]
00087                 # which are to be stored in "model_options" as a string.
00088                 if option[0] == "model" and option[1] in [9, 10]:
00089                     if self._options["model_options"] is not None:
00090                         ctl_handle.write("model = %s  %s" % (option[1],
00091                                          self._options["model_options"]))
00092                         continue
00093                 ctl_handle.write("%s = %s\n" % (option[0], option[1]))
00094             ctl_handle.close()
00095 
00096     def read_ctl_file(self, ctl_file):
00097         """Parse a control file and load the options into the Baseml instance.
00098         """
00099         temp_options = {}
00100         if not os.path.isfile(ctl_file):
00101             raise IOError("File not found: %r" % ctl_file)
00102         else:
00103             ctl_handle = open(ctl_file)
00104             for line in ctl_handle:
00105                 line = line.strip()
00106                 uncommented = line.split("*",1)[0]
00107                 if uncommented != "":
00108                     if "=" not in uncommented:
00109                         ctl_handle.close()
00110                         raise AttributeError, \
00111                             "Malformed line in control file:\n%r" % line
00112                     (option, value) = uncommented.split("=")
00113                     option = option.strip()
00114                     value = value.strip()
00115                     if option == "seqfile":
00116                         self.alignment = value
00117                     elif option == "treefile":
00118                         self.tree = value
00119                     elif option == "outfile":
00120                         self.out_file = value
00121                     elif option not in self._options:
00122                         ctl_handle.close()
00123                         raise KeyError, "Invalid option: %s" % option
00124                     elif option == "model":
00125                         if len(value) <= 2 and value.isdigit():
00126                             temp_options["model"] = int(value)
00127                             temp_options["model_options"] = None
00128                         else:
00129                             model_num = value.partition(" ")[0]
00130                             model_opt = value.partition(" ")[2].strip()
00131                             temp_options["model"] = int(model_num)
00132                             temp_options["model_options"] = model_opt
00133                     else:
00134                         if "." in value or "e-" in value:
00135                             try:
00136                                 converted_value = float(value)
00137                             except:
00138                                 converted_value = value
00139                         else:
00140                             try:
00141                                 converted_value = int(value)
00142                             except:
00143                                 converted_value = value
00144                         temp_options[option] = converted_value
00145             ctl_handle.close()
00146         for option in self._options.keys():
00147             if option in temp_options.keys():
00148                 self._options[option] = temp_options[option]
00149             else:
00150                 self._options[option] = None
00151 
00152     def _set_rel_paths(self):
00153         """Convert all file/directory locations to paths relative to the current working directory.
00154 
00155         BASEML requires that all paths specified in the control file be
00156         relative to the directory from which it is called rather than 
00157         absolute paths.
00158         """
00159         Paml._set_rel_paths(self)
00160         if self.tree is not None:
00161             self._rel_tree = _relpath(self.tree, self.working_dir)
00162 
00163     def run(self, ctl_file = None, verbose = False, command = "baseml",
00164                 parse = True):
00165         """Run baseml using the current configuration and then parse the results. 
00166 
00167         Return a process signal so the user can determine if
00168         the execution was successful (return code 0 is successful, -N
00169         indicates a failure). The arguments may be passed as either 
00170         absolute or relative paths, despite the fact that BASEML 
00171         requires relative paths.
00172         """
00173         if self.tree is None:
00174             raise ValueError, "Tree file not specified."
00175         if not os.path.exists(self.tree):
00176             raise IOError, "The specified tree file does not exist."
00177         Paml.run(self, ctl_file, verbose, command)
00178         if parse:
00179             results = read(self.out_file)
00180         else:
00181             results = None
00182         return results
00183 
00184 def read(results_file):
00185     results = {}
00186     """Parse a BASEML results file."""
00187     if not os.path.exists(results_file):
00188         raise IOError, "Results file does not exist."
00189     handle = open(results_file)
00190     lines = handle.readlines()
00191     handle.close()
00192     (results, num_params) = _parse_baseml.parse_basics(lines, results)
00193     results = _parse_baseml.parse_parameters(lines, results, num_params)
00194     if results.get("version") is None:
00195         raise ValueError, "Invalid results file"
00196     return results