Back to index

python-biopython  1.60
Controller.py
Go to the documentation of this file.
00001 # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 
00007 
00008 """
00009 This module allows to control fdist.
00010 
00011 This will allow to call fdist and associated program (cplot, datacal, pv).
00012 
00013 http://www.rubic.rdg.ac.uk/~mab/software.html
00014 """
00015 
00016 import os
00017 import tempfile
00018 import sys
00019 from shutil import copyfile
00020 from random import randint, random
00021 from time import strftime, clock
00022 #from logging import debug
00023 
00024 if sys.version_info[0] == 3:
00025     maxint = sys.maxsize
00026 else:
00027     maxint = sys.maxint
00028 
00029 def my_float(f):
00030     #Because of Jython, mostly
00031     if f=="-nan": f="nan"
00032     return float(f)
00033 
00034 class FDistController(object):
00035     def __init__(self, fdist_dir = '', ext = None):
00036         """Initializes the controller.
00037         
00038         fdist_dir is the directory where fdist2 is.
00039         ext is the extension of binaries (.exe on windows, 
00040           none on Unix)
00041         
00042         """
00043         self.tmp_idx = 0
00044         self.fdist_dir = fdist_dir
00045         self.os_name = os.name
00046         if sys.platform=='win32':
00047             py_ext = '.exe'
00048         else:
00049             py_ext = ''
00050         if ext == None:
00051             self.ext = py_ext
00052         else:
00053             self.ext = ext
00054         exec_counts = 0
00055 
00056     def _get_path(self, app):
00057         """Returns the path to an fdist application.
00058 
00059            Includes Path where fdist can be found plus executable extension.
00060         """
00061         if self.fdist_dir == '':
00062             return app + self.ext
00063         else:
00064             return os.sep.join([self.fdist_dir, app]) + self.ext
00065 
00066     def _get_temp_file(self):
00067         """Gets a temporary file name.
00068 
00069            Returns a temporary file name, if executing inside jython
00070            tries to replace unexisting tempfile.mkstemp().
00071         """
00072         self.tmp_idx += 1
00073         return strftime("%H%M%S") + str(int(clock()*100)) + str(randint(0,1000)) + str(self.tmp_idx)
00074 
00075     def run_datacal(self, data_dir='.', version=1,
00076         crit_freq = 0.99, p = 0.5, beta= (0.25, 0.25)):
00077         """Executes datacal.
00078         
00079            data_dir - Where the data is found.
00080         """
00081         in_name = self._get_temp_file()
00082         out_name = self._get_temp_file()
00083         f = open(data_dir + os.sep + in_name, 'w')
00084         if version==1:
00085             f.write('a\n')
00086             datacal_name = "datacal"
00087         else:
00088             f.write('%f\n%f\n%f %f\na\n' % (crit_freq, p, beta[0], beta[1]))
00089             datacal_name = "Ddatacal"
00090         f.close()
00091         curr_dir = os.getcwd()
00092         os.system('cd ' + data_dir + ' && ' +
00093                 self._get_path(datacal_name) + ' < ' + in_name + ' > ' + out_name)
00094         f = open(data_dir + os.sep + out_name)
00095         if version == 1:
00096             fst_line = f.readline().rstrip().split(' ')
00097             fst = my_float(fst_line[4])
00098             sample_line = f.readline().rstrip().split(' ')
00099             sample = int(sample_line[9])
00100         else:
00101             l = f.readline().rstrip().split(" ")
00102             loci, pops = int(l[-5]), int(l[-2])
00103             fst_line = f.readline().rstrip().split(' ')
00104             fst = my_float(fst_line[4])
00105             sample_line = f.readline().rstrip().split(' ')
00106             sample = int(sample_line[9])
00107             F_line = f.readline().rstrip().split(' ')
00108             F, obs = my_float(F_line[5]), int (F_line[8])
00109         f.close()
00110         os.remove(data_dir + os.sep + in_name)
00111         os.remove(data_dir + os.sep + out_name)
00112         if version==1:
00113             return fst, sample
00114         else:
00115             return fst, sample, loci, pops, F, obs
00116 
00117     def _generate_intfile(self, data_dir):
00118         """Generates an INTFILE.
00119 
00120            Parameter:
00121            data_dir - data directory
00122         """
00123         inf = open(data_dir + os.sep + 'INTFILE', 'w')
00124         for i in range(98):
00125             inf.write(str(randint(-maxint+1,maxint-1)) + '\n') 
00126         inf.write('8\n')
00127         inf.close()
00128     
00129     def run_fdist(self, npops, nsamples, fst, sample_size,
00130         mut = 0, num_sims = 50000, data_dir='.',
00131         is_dominant = False, theta = 0.06, beta = (0.25, 0.25),
00132         max_freq = 0.99):
00133         """Executes (d)fdist.
00134         
00135         Parameters:
00136         npops - Number of populations
00137         nsamples - Number of populations sampled
00138         fst - expected Fst
00139         sample_size - Sample size per population
00140                 For dfdist: if zero a sample size file has to be provided 
00141         mut - 1=Stepwise, 0=Infinite allele
00142         num_sims - number of simulations
00143         data_dir - Where the data is found
00144         is_dominant - If true executes dfdist
00145         theta - Theta (=2Nmu)
00146         beta - Parameters for the beta prior
00147         max_freq - Maximum allowed frequency of the commonest allele
00148 
00149         Returns:
00150         fst - Average Fst
00151         
00152         Important Note: This can take quite a while to run!
00153         """
00154         if fst >= 0.9:
00155             #Lets not joke
00156             fst = 0.899
00157         if fst <= 0.0:
00158             #0  will make fdist run forever
00159             fst = 0.001
00160         in_name = 'input.fd'
00161         out_name = 'output.fd'
00162         #print 'writing', data_dir + os.sep + in_name
00163         f = open(data_dir + os.sep + in_name, 'w')
00164         f.write('y\n\n')
00165         f.close()
00166         if is_dominant:
00167             config_name = "Dfdist_params"
00168         else:
00169             config_name = "fdist_params2.dat"
00170 
00171         f = open(data_dir + os.sep + config_name, 'w')
00172         f.write(str(npops) + '\n')
00173         f.write(str(nsamples) + '\n')
00174         f.write(str(fst) + '\n')
00175         f.write(str(sample_size) + '\n')
00176         if is_dominant:
00177             f.write(str(theta) + '\n')
00178         else:
00179             f.write(str(mut) + '\n')
00180         f.write(str(num_sims) + '\n')
00181         if is_dominant:
00182             f.write("%f %f\n" % beta)
00183             f.write("%f\n" % max_freq)
00184         f.close()
00185         self._generate_intfile(data_dir)
00186 
00187         if is_dominant:
00188             bin_name = "Dfdist"
00189         else:
00190             bin_name = "fdist2"
00191         os.system('cd ' + data_dir + ' && ' +
00192             self._get_path(bin_name) + ' < ' + in_name + ' > ' + out_name)
00193         f = open(data_dir + os.sep + out_name)
00194         lines = f.readlines()
00195         f.close()
00196         for line in lines:
00197           if line.startswith('average Fst'):
00198             fst = my_float(line.rstrip().split(' ')[-1])
00199         os.remove(data_dir + os.sep + in_name)
00200         os.remove(data_dir + os.sep + out_name)
00201         return fst
00202 
00203     def run_fdist_force_fst(self, npops, nsamples, fst, sample_size,
00204         mut = 0, num_sims = 50000, data_dir='.',
00205         try_runs = 5000, limit=0.001,
00206         is_dominant = False, theta = 0.06, beta = (0.25, 0.25),
00207         max_freq = 0.99):
00208         """Executes fdist trying to force Fst.
00209         
00210         Parameters:
00211         try_runs - Number of simulations on the part trying to get
00212                    Fst correct
00213         limit - Interval limit
00214         Other parameters can be seen on run_fdist.
00215         """
00216         max_run_fst = 1
00217         min_run_fst = 0
00218         current_run_fst = fst
00219         old_fst = fst
00220         while True:
00221             #debug('testing fst ' +  str(current_run_fst))
00222             real_fst = self.run_fdist(npops, nsamples,
00223                 current_run_fst, sample_size,
00224                 mut, try_runs, data_dir,
00225                 is_dominant, theta, beta, max_freq)
00226             #debug('got real fst ' +  str(real_fst))
00227             if abs(real_fst - fst) < limit:
00228                 #debug('We are OK')
00229                 return self.run_fdist(npops, nsamples, current_run_fst,
00230                     sample_size,
00231                     mut, num_sims, data_dir,
00232                     is_dominant, theta, beta, max_freq)
00233             old_fst = current_run_fst
00234             if real_fst > fst:
00235                 max_run_fst = current_run_fst
00236                 if current_run_fst < min_run_fst + limit:
00237                     #we can do no better
00238                     #debug('Lower limit is ' + str(min_run_fst))
00239                     return self.run_fdist(npops, nsamples, current_run_fst,
00240                         sample_size, mut, num_sims, data_dir)
00241                 current_run_fst = (min_run_fst + current_run_fst)/2
00242             else:
00243                 min_run_fst = current_run_fst
00244                 if current_run_fst > max_run_fst - limit:
00245                     #we can do no better
00246                     #debug('Upper limit is ' + str(max_run_fst))
00247                     return self.run_fdist(npops, nsamples, current_run_fst,
00248                         sample_size, mut, num_sims, data_dir,
00249                         is_dominant, theta, beta, max_freq)
00250                 current_run_fst = (max_run_fst + current_run_fst)/2
00251 
00252     def run_cplot(self, ci= 0.95, data_dir='.', version = 1, smooth=0.04):
00253         """Executes cplot.
00254 
00255         ci - Confidence interval.
00256         data_dir - Where the data is found.
00257         """
00258         in_name = self._get_temp_file()
00259         out_name = self._get_temp_file()
00260         f = open(data_dir + os.sep + in_name, 'w')
00261         if version == 1:
00262             f.write('out.dat out.cpl\n' + str(ci) + '\n')
00263         else:
00264             f.write("\n".join([
00265                 "data_fst_outfile out.cpl out.dat",
00266                 str(ci), str(smooth)]))
00267         f.close()
00268         curr_dir = os.getcwd()
00269         self._generate_intfile(data_dir)
00270         if version == 1:
00271             cplot_name = "cplot"
00272         else:
00273             cplot_name = "cplot2"
00274         os.system('cd ' + data_dir + ' && '  +
00275             self._get_path(cplot_name) + ' < ' + in_name + ' > ' + out_name)
00276         os.remove(data_dir + os.sep + in_name)
00277         os.remove(data_dir + os.sep + out_name)
00278         f = open(data_dir + os.sep + 'out.cpl')
00279         conf_lines = []
00280         l = f.readline()
00281         try:
00282             while l!='':
00283                 conf_lines.append(
00284                     tuple(map(lambda x : my_float(x), l.rstrip().split(' ')))
00285                 )
00286                 l = f.readline()
00287         except ValueError:
00288             f.close()
00289             return []
00290         f.close()
00291         return conf_lines
00292         
00293     def run_pv(self, out_file='probs.dat', data_dir='.',
00294                version = 1, smooth=0.04):
00295         """Executes pv.
00296 
00297         out_file - Name of output file.
00298         data_dir - Where the data is found.
00299         """
00300         in_name = self._get_temp_file()
00301         out_name = self._get_temp_file()
00302         f = open(data_dir + os.sep + in_name, 'w')
00303         f.write('data_fst_outfile ' + out_file + ' out.dat\n')
00304         f.write(str(smooth) + '\n')
00305         f.close()
00306         self._generate_intfile(data_dir)
00307         if version == 1:
00308             pv_name = "pv"
00309         else:
00310             pv_name = "pv2"
00311         os.system('cd ' + data_dir + ' && ' +
00312                 self._get_path(pv_name) + ' < ' + in_name + ' > ' + out_name)
00313         pvf = open(data_dir + os.sep + out_file, 'r')
00314         result = map(lambda x: tuple(map(lambda y: my_float(y), x.rstrip().split(' '))),
00315             pvf.readlines())
00316         pvf.close()
00317         os.remove(data_dir + os.sep + in_name)
00318         os.remove(data_dir + os.sep + out_name)
00319         return result
00320