Back to index

python-biopython  1.60
Functions | Variables
Bio.Phylo.PAML._parse_baseml Namespace Reference

Functions

def parse_basics
def parse_parameters
def parse_parameter_list
def parse_kappas
def parse_rates
def parse_freqs

Variables

tuple line_floats_re = re.compile("-*\d+\.\d+")

Function Documentation

def Bio.Phylo.PAML._parse_baseml.parse_basics (   lines,
  results 
)
Parse the basics that should be present in most baseml results files.

Definition at line 10 of file _parse_baseml.py.

00010 
00011 def parse_basics(lines, results):
00012     """Parse the basics that should be present in most baseml results files.
00013     """
00014     version_re = re.compile("BASEML \(in paml version (\d+\.\d+[a-z]*).*")
00015     np_re = re.compile("lnL\(ntime:\s+\d+\s+np:\s+(\d+)\)")
00016     num_params = -1
00017     for line in lines:
00018         # Find all floating point numbers in this line
00019         line_floats_res = line_floats_re.findall(line)
00020         line_floats = [float(val) for val in line_floats_res]
00021         # Find the version number
00022         # Example match: 
00023         # "BASEML (in paml version 4.3, August 2009)  alignment.phylip"
00024         version_res = version_re.match(line)
00025         if version_res is not None:
00026             results["version"] = version_res.group(1)
00027         # Find max lnL
00028         # Example match:
00029         # ln Lmax (unconstrained) = -316.049385
00030         if "ln Lmax" in line and len(line_floats) == 1:
00031             results["lnL max"] = line_floats[0]
00032         # Find lnL values.
00033         # Example match (lnL = -2021.348300):
00034         # "lnL(ntime: 19  np: 22):  -2021.348300      +0.000000"
00035         elif "lnL(ntime:" in line and len(line_floats) > 0:
00036             results["lnL"] = line_floats[0]
00037             np_res = np_re.match(line)
00038             if np_res is not None:
00039                 num_params = int(np_res.group(1))
00040         # Find tree lengths.
00041         # Example match: "tree length =   1.71931"
00042         elif "tree length" in line and len(line_floats) == 1:
00043             results["tree length"] = line_floats[0]
00044         # Find the estimated tree, only taking the tree if it has
00045         # branch lengths
00046         elif re.match("\(+", line) is not None:
00047             if ":" in line:
00048                 results["tree"] = line.strip()
00049     return (results, num_params)

def Bio.Phylo.PAML._parse_baseml.parse_freqs (   lines,
  parameters 
)
Parse the basepair frequencies.

Definition at line 181 of file _parse_baseml.py.

00181 
00182 def parse_freqs(lines, parameters):
00183     """Parse the basepair frequencies.
00184     """
00185     root_re = re.compile("Note: node (\d+) is root.")
00186     branch_freqs_found = False
00187     base_freqs_found = False
00188     for line in lines:
00189         # Find all floating point numbers in this line
00190         line_floats_res = line_floats_re.findall(line)
00191         line_floats = [float(val) for val in line_floats_res]
00192         # Find base frequencies from baseml 4.3
00193         # Example match:
00194         # "Base frequencies:   0.20090  0.16306  0.37027  0.26577"  
00195         if "Base frequencies" in line and len(line_floats) > 0:
00196             base_frequencies = {}
00197             base_frequencies["T"] = line_floats[0]
00198             base_frequencies["C"] = line_floats[1]
00199             base_frequencies["A"] = line_floats[2]
00200             base_frequencies["G"] = line_floats[3]
00201             parameters["base frequencies"] = base_frequencies
00202         # Find base frequencies from baseml 4.1:
00203         # Example match:
00204         # "base frequency parameters
00205         # "  0.20317  0.16768  0.36813  0.26102"
00206         elif "base frequency parameters" in line:
00207             base_freqs_found = True
00208         # baseml 4.4 returns to having the base frequencies on the next line
00209         # but the heading changed
00210         elif "Base frequencies" in line and len(line_floats) == 0:
00211             base_freqs_found = True
00212         elif base_freqs_found and len(line_floats) > 0:
00213             base_frequencies = {}
00214             base_frequencies["T"] = line_floats[0]
00215             base_frequencies["C"] = line_floats[1]
00216             base_frequencies["A"] = line_floats[2]
00217             base_frequencies["G"] = line_floats[3]
00218             parameters["base frequencies"] = base_frequencies
00219             base_freqs_found = False            
00220         # Find frequencies
00221         # Example match: 
00222         # "freq:   0.90121  0.96051  0.99831  1.03711  1.10287"
00223         elif "freq: " in line and len(line_floats) > 0:
00224             parameters["rate frequencies"] = line_floats
00225         # Find branch-specific frequency parameters
00226         # Example match (note: I think it's possible to have 4 more
00227         # values per line, enclosed in brackets, so I'll account for 
00228         # this):
00229         # (frequency parameters for branches)  [frequencies at nodes] (see Yang & Roberts 1995 fig 1)
00230         #
00231         # Node #1  ( 0.25824  0.24176  0.25824  0.24176 )
00232         # Node #2  ( 0.00000  0.50000  0.00000  0.50000 )
00233         elif "(frequency parameters for branches)" in line:
00234             parameters["nodes"] = {}
00235             branch_freqs_found = True
00236         elif branch_freqs_found is True:
00237             if len(line_floats) > 0:
00238                 node_res = re.match("Node \#(\d+)", line)
00239                 node_num = int(node_res.group(1))
00240                 node = {"root":False}
00241                 node["frequency parameters"] = line_floats[:4]
00242                 if len(line_floats) > 4:
00243                     node["base frequencies"] = {"T":line_floats[4],
00244                                                 "C":line_floats[5],
00245                                                 "A":line_floats[6],
00246                                                 "G":line_floats[7]}
00247                 parameters["nodes"][node_num] = node
00248             else:
00249                 root_res = root_re.match(line)
00250                 if root_res is not None:
00251                     root_node = int(root_res.group(1))
00252                     parameters["nodes"][root_node]["root"] =\
00253                         True
00254                     branch_freqs_found = False
00255     return parameters

Here is the caller graph for this function:

def Bio.Phylo.PAML._parse_baseml.parse_kappas (   lines,
  parameters 
)
Parse out the kappa parameters.

Definition at line 89 of file _parse_baseml.py.

00089 
00090 def parse_kappas(lines, parameters):
00091     """Parse out the kappa parameters.
00092     """
00093     kappa_found = False
00094     for line in lines:
00095         # Find all floating point numbers in this line
00096         line_floats_res = line_floats_re.findall(line)
00097         line_floats = [float(val) for val in line_floats_res]
00098         # Find kappa parameter (F84, HKY85, T92 model)
00099         # Example match:
00100         # "Parameters (kappa) in the rate matrix (F84) (Yang 1994 J Mol Evol 39:105-111):
00101         #    3.00749"
00102         if "Parameters (kappa)" in line:
00103             kappa_found = True
00104         elif kappa_found and len(line_floats) > 0:
00105             branch_res = re.match("\s(\d+\.\.\d+)", line)
00106             if branch_res is None:
00107                 if len(line_floats) == 1:
00108                     parameters["kappa"] = line_floats[0]
00109                 else:
00110                     parameters["kappa"] = line_floats
00111                 kappa_found = False
00112             else:
00113                 if parameters.get("branches") is None:
00114                     parameters["branches"] = {}
00115                 branch = branch_res.group(1)
00116                 if len(line_floats) > 0:
00117                     parameters["branches"][branch] = \
00118                         {"t":line_floats[0], "kappa":line_floats[1],
00119                         "TS":line_floats[2], "TV":line_floats[3]}
00120         # Find kappa under REV
00121         # Example match:
00122         # kappa under REV: 999.00000 145.76453  0.00001  0.00001  0.00001
00123         elif "kappa under" in line and len(line_floats) > 0:
00124             if len(line_floats) == 1:
00125                 parameters["kappa"] = line_floats[0]
00126             else:
00127                 parameters["kappa"] = line_floats
00128     return parameters

Here is the caller graph for this function:

def Bio.Phylo.PAML._parse_baseml.parse_parameter_list (   lines,
  parameters,
  num_params 
)
Parse the parameters list, which is just an unlabeled list of numeric values.

Definition at line 61 of file _parse_baseml.py.

00061 
00062 def parse_parameter_list(lines, parameters, num_params):
00063     """ Parse the parameters list, which is just an unlabeled list of numeric values.
00064     """
00065     for line_num in range(len(lines)):
00066         line = lines[line_num]
00067          # Find all floating point numbers in this line
00068         line_floats_res = line_floats_re.findall(line)
00069         line_floats = [float(val) for val in line_floats_res]
00070         # Get parameter list. This can be useful for specifying starting
00071         # parameters in another run by copying the list of parameters
00072         # to a file called in.baseml. Since the parameters must be in
00073         # a fixed order and format, copying and pasting to the file is
00074         # best. For this reason, they are grabbed here just as a long
00075         # string and not as individual numbers.
00076         if len(line_floats) == num_params:
00077            parameters["parameter list"] = line.strip()
00078         # Find SEs. The same format as parameters above is maintained
00079         # since there is a correspondance between the SE format and
00080         # the parameter format.
00081         # Example match:
00082         # "SEs for parameters:
00083         # -1.00000 -1.00000 -1.00000 801727.63247 730462.67590 -1.00000 
00084            if "SEs for parameters:" in lines[line_num + 1]:
00085                 SEs_line = lines[line_num + 2]
00086                 parameters["SEs"] = SEs_line.strip()
00087            break
00088     return parameters

Here is the caller graph for this function:

def Bio.Phylo.PAML._parse_baseml.parse_parameters (   lines,
  results,
  num_params 
)
Parse the various parameters from the file.

Definition at line 50 of file _parse_baseml.py.

00050 
00051 def parse_parameters(lines, results, num_params): 
00052     """Parse the various parameters from the file.
00053     """
00054     parameters = {}
00055     parameters = parse_parameter_list(lines, parameters, num_params)
00056     parameters = parse_kappas(lines, parameters)
00057     parameters = parse_rates(lines, parameters)
00058     parameters = parse_freqs(lines, parameters)
00059     results["parameters"] = parameters
00060     return results

Here is the call graph for this function:

def Bio.Phylo.PAML._parse_baseml.parse_rates (   lines,
  parameters 
)
Parse the rate parameters.

Definition at line 129 of file _parse_baseml.py.

00129 
00130 def parse_rates(lines, parameters):
00131     """Parse the rate parameters.
00132     """
00133     Q_mat_found = False
00134     trans_probs_found = False
00135     for line in lines:
00136         # Find all floating point numbers in this line
00137         line_floats_res = line_floats_re.findall(line)
00138         line_floats = [float(val) for val in line_floats_res]
00139         # Find rate parameters
00140         # Example match: 
00141         # "Rate parameters:   999.00000 145.59775  0.00001  0.00001  0.00001"
00142         if "Rate parameters:" in line and len(line_floats) > 0:
00143             parameters["rate parameters"] = line_floats
00144         # Find rates
00145         # Example match: 
00146         # "rate:   0.90121  0.96051  0.99831  1.03711  1.10287"
00147         elif "rate: " in line and len(line_floats) > 0:
00148             parameters["rates"] = line_floats
00149         # Find Rate matrix Q & average kappa (REV model)
00150         # Example match:
00151         # Rate matrix Q, Average Ts/Tv =   3.0308
00152         #  -2.483179    1.865730    0.617449    0.000000
00153         #   2.298662   -2.298662    0.000000    0.000000
00154         #   0.335015    0.000000   -0.338059    0.003044
00155         #   0.000000    0.000000    0.004241   -0.004241
00156         elif "matrix Q" in line:
00157             parameters["Q matrix"] = {"matrix":[]}
00158             if len(line_floats) > 0:
00159                 parameters["Q matrix"]["average Ts/Tv"] = \
00160                     line_floats[0]
00161             Q_mat_found = True
00162         elif Q_mat_found and len(line_floats) > 0:
00163             parameters["Q matrix"]["matrix"].append(line_floats)
00164             if len(parameters["Q matrix"]["matrix"]) == 4:
00165                 Q_mat_found = False
00166         # Find alpha (gamma shape parameter for variable rates)
00167         # Example match: "alpha (gamma, K=5) = 192.47918"
00168         elif "alpha" in line and len(line_floats) > 0:
00169             parameters["alpha"] = line_floats[0]
00170         # Find rho for auto-discrete-gamma model
00171         elif "rho" in line and len(line_floats) > 0:
00172             parameters["rho"] = line_floats[0]
00173         elif "transition probabilities" in line:
00174             parameters["transition probs."] = []
00175             trans_probs_found = True
00176         elif trans_probs_found and len(line_floats) > 0:
00177             parameters["transition probs."].append(line_floats)
00178             if len(parameters["transition probs."]) == len(parameters["rates"]):
00179                 trans_probs_found = False
00180     return parameters

Here is the caller graph for this function:


Variable Documentation

tuple Bio.Phylo.PAML._parse_baseml.line_floats_re = re.compile("-*\d+\.\d+")

Definition at line 8 of file _parse_baseml.py.