Back to index

python-biopython  1.60
Public Member Functions | Public Attributes
Bio.PopGen.FDist.Async.FDistAsync Class Reference
Inheritance diagram for Bio.PopGen.FDist.Async.FDistAsync:
Inheritance graph
[legend]
Collaboration diagram for Bio.PopGen.FDist.Async.FDistAsync:
Collaboration graph
[legend]

List of all members.

Public Member Functions

def __init__
def run_job
def run_datacal
def run_fdist
def run_fdist_force_fst
def run_cplot
def run_pv

Public Attributes

 tmp_idx
 fdist_dir
 os_name
 ext

Detailed Description

Asynchronous FDist execution.

Definition at line 25 of file Async.py.


Constructor & Destructor Documentation

def Bio.PopGen.FDist.Async.FDistAsync.__init__ (   self,
  fdist_dir = "",
  ext = None 
)
Constructor.

Parameters:
fdist_dir - Where fdist can be found, if = "", then it
    should be on the path.
ext - Extension of binary names (e.g. nothing on Unix,
      ".exe" on Windows

Reimplemented from Bio.PopGen.FDist.Controller.FDistController.

Definition at line 29 of file Async.py.

00029 
00030     def __init__(self, fdist_dir = "", ext = None):
00031         """Constructor.
00032 
00033         Parameters:
00034         fdist_dir - Where fdist can be found, if = "", then it
00035             should be on the path.
00036         ext - Extension of binary names (e.g. nothing on Unix,
00037               ".exe" on Windows
00038         """
00039         FDistController.__init__(self, fdist_dir, ext)


Member Function Documentation

def Bio.PopGen.FDist.Controller.FDistController.run_cplot (   self,
  ci = 0.95,
  data_dir = '.',
  version = 1,
  smooth = 0.04 
) [inherited]
Executes cplot.

ci - Confidence interval.
data_dir - Where the data is found.

Definition at line 252 of file Controller.py.

00252 
00253     def run_cplot(self, ci= 0.95, data_dir='.', version = 1, smooth=0.04):
00254         """Executes cplot.
00255 
00256         ci - Confidence interval.
00257         data_dir - Where the data is found.
00258         """
00259         in_name = self._get_temp_file()
00260         out_name = self._get_temp_file()
00261         f = open(data_dir + os.sep + in_name, 'w')
00262         if version == 1:
00263             f.write('out.dat out.cpl\n' + str(ci) + '\n')
00264         else:
00265             f.write("\n".join([
00266                 "data_fst_outfile out.cpl out.dat",
00267                 str(ci), str(smooth)]))
00268         f.close()
00269         curr_dir = os.getcwd()
00270         self._generate_intfile(data_dir)
00271         if version == 1:
00272             cplot_name = "cplot"
00273         else:
00274             cplot_name = "cplot2"
00275         os.system('cd ' + data_dir + ' && '  +
00276             self._get_path(cplot_name) + ' < ' + in_name + ' > ' + out_name)
00277         os.remove(data_dir + os.sep + in_name)
00278         os.remove(data_dir + os.sep + out_name)
00279         f = open(data_dir + os.sep + 'out.cpl')
00280         conf_lines = []
00281         l = f.readline()
00282         try:
00283             while l!='':
00284                 conf_lines.append(
00285                     tuple(map(lambda x : my_float(x), l.rstrip().split(' ')))
00286                 )
00287                 l = f.readline()
00288         except ValueError:
00289             f.close()
00290             return []
00291         f.close()
00292         return conf_lines
        

Here is the call graph for this function:

def Bio.PopGen.FDist.Controller.FDistController.run_datacal (   self,
  data_dir = '.',
  version = 1,
  crit_freq = 0.99,
  p = 0.5,
  beta = (0.25, 0.25 
) [inherited]
Executes datacal.

   data_dir - Where the data is found.

Definition at line 76 of file Controller.py.

00076 
00077         crit_freq = 0.99, p = 0.5, beta= (0.25, 0.25)):
00078         """Executes datacal.
00079         
00080            data_dir - Where the data is found.
00081         """
00082         in_name = self._get_temp_file()
00083         out_name = self._get_temp_file()
00084         f = open(data_dir + os.sep + in_name, 'w')
00085         if version==1:
00086             f.write('a\n')
00087             datacal_name = "datacal"
00088         else:
00089             f.write('%f\n%f\n%f %f\na\n' % (crit_freq, p, beta[0], beta[1]))
00090             datacal_name = "Ddatacal"
00091         f.close()
00092         curr_dir = os.getcwd()
00093         os.system('cd ' + data_dir + ' && ' +
00094                 self._get_path(datacal_name) + ' < ' + in_name + ' > ' + out_name)
00095         f = open(data_dir + os.sep + out_name)
00096         if version == 1:
00097             fst_line = f.readline().rstrip().split(' ')
00098             fst = my_float(fst_line[4])
00099             sample_line = f.readline().rstrip().split(' ')
00100             sample = int(sample_line[9])
00101         else:
00102             l = f.readline().rstrip().split(" ")
00103             loci, pops = int(l[-5]), int(l[-2])
00104             fst_line = f.readline().rstrip().split(' ')
00105             fst = my_float(fst_line[4])
00106             sample_line = f.readline().rstrip().split(' ')
00107             sample = int(sample_line[9])
00108             F_line = f.readline().rstrip().split(' ')
00109             F, obs = my_float(F_line[5]), int (F_line[8])
00110         f.close()
00111         os.remove(data_dir + os.sep + in_name)
00112         os.remove(data_dir + os.sep + out_name)
00113         if version==1:
00114             return fst, sample
00115         else:
00116             return fst, sample, loci, pops, F, obs

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.PopGen.FDist.Controller.FDistController.run_fdist (   self,
  npops,
  nsamples,
  fst,
  sample_size,
  mut = 0,
  num_sims = 50000,
  data_dir = '.',
  is_dominant = False,
  theta = 0.06,
  beta = (0.25, 0.25,
  max_freq = 0.99 
) [inherited]
Executes (d)fdist.

Parameters:
npops - Number of populations
nsamples - Number of populations sampled
fst - expected Fst
sample_size - Sample size per population
For dfdist: if zero a sample size file has to be provided 
mut - 1=Stepwise, 0=Infinite allele
num_sims - number of simulations
data_dir - Where the data is found
is_dominant - If true executes dfdist
theta - Theta (=2Nmu)
beta - Parameters for the beta prior
max_freq - Maximum allowed frequency of the commonest allele

Returns:
fst - Average Fst

Important Note: This can take quite a while to run!

Definition at line 132 of file Controller.py.

00132 
00133         max_freq = 0.99):
00134         """Executes (d)fdist.
00135         
00136         Parameters:
00137         npops - Number of populations
00138         nsamples - Number of populations sampled
00139         fst - expected Fst
00140         sample_size - Sample size per population
00141                 For dfdist: if zero a sample size file has to be provided 
00142         mut - 1=Stepwise, 0=Infinite allele
00143         num_sims - number of simulations
00144         data_dir - Where the data is found
00145         is_dominant - If true executes dfdist
00146         theta - Theta (=2Nmu)
00147         beta - Parameters for the beta prior
00148         max_freq - Maximum allowed frequency of the commonest allele
00149 
00150         Returns:
00151         fst - Average Fst
00152         
00153         Important Note: This can take quite a while to run!
00154         """
00155         if fst >= 0.9:
00156             #Lets not joke
00157             fst = 0.899
00158         if fst <= 0.0:
00159             #0  will make fdist run forever
00160             fst = 0.001
00161         in_name = 'input.fd'
00162         out_name = 'output.fd'
00163         #print 'writing', data_dir + os.sep + in_name
00164         f = open(data_dir + os.sep + in_name, 'w')
00165         f.write('y\n\n')
00166         f.close()
00167         if is_dominant:
00168             config_name = "Dfdist_params"
00169         else:
00170             config_name = "fdist_params2.dat"
00171 
00172         f = open(data_dir + os.sep + config_name, 'w')
00173         f.write(str(npops) + '\n')
00174         f.write(str(nsamples) + '\n')
00175         f.write(str(fst) + '\n')
00176         f.write(str(sample_size) + '\n')
00177         if is_dominant:
00178             f.write(str(theta) + '\n')
00179         else:
00180             f.write(str(mut) + '\n')
00181         f.write(str(num_sims) + '\n')
00182         if is_dominant:
00183             f.write("%f %f\n" % beta)
00184             f.write("%f\n" % max_freq)
00185         f.close()
00186         self._generate_intfile(data_dir)
00187 
00188         if is_dominant:
00189             bin_name = "Dfdist"
00190         else:
00191             bin_name = "fdist2"
00192         os.system('cd ' + data_dir + ' && ' +
00193             self._get_path(bin_name) + ' < ' + in_name + ' > ' + out_name)
00194         f = open(data_dir + os.sep + out_name)
00195         lines = f.readlines()
00196         f.close()
00197         for line in lines:
00198           if line.startswith('average Fst'):
00199             fst = my_float(line.rstrip().split(' ')[-1])
00200         os.remove(data_dir + os.sep + in_name)
00201         os.remove(data_dir + os.sep + out_name)
00202         return fst

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.PopGen.FDist.Controller.FDistController.run_fdist_force_fst (   self,
  npops,
  nsamples,
  fst,
  sample_size,
  mut = 0,
  num_sims = 50000,
  data_dir = '.',
  try_runs = 5000,
  limit = 0.001,
  is_dominant = False,
  theta = 0.06,
  beta = (0.25, 0.25,
  max_freq = 0.99 
) [inherited]
Executes fdist trying to force Fst.

Parameters:
try_runs - Number of simulations on the part trying to get
   Fst correct
limit - Interval limit
Other parameters can be seen on run_fdist.

Definition at line 207 of file Controller.py.

00207 
00208         max_freq = 0.99):
00209         """Executes fdist trying to force Fst.
00210         
00211         Parameters:
00212         try_runs - Number of simulations on the part trying to get
00213                    Fst correct
00214         limit - Interval limit
00215         Other parameters can be seen on run_fdist.
00216         """
00217         max_run_fst = 1
00218         min_run_fst = 0
00219         current_run_fst = fst
00220         old_fst = fst
00221         while True:
00222             #debug('testing fst ' +  str(current_run_fst))
00223             real_fst = self.run_fdist(npops, nsamples,
00224                 current_run_fst, sample_size,
00225                 mut, try_runs, data_dir,
00226                 is_dominant, theta, beta, max_freq)
00227             #debug('got real fst ' +  str(real_fst))
00228             if abs(real_fst - fst) < limit:
00229                 #debug('We are OK')
00230                 return self.run_fdist(npops, nsamples, current_run_fst,
00231                     sample_size,
00232                     mut, num_sims, data_dir,
00233                     is_dominant, theta, beta, max_freq)
00234             old_fst = current_run_fst
00235             if real_fst > fst:
00236                 max_run_fst = current_run_fst
00237                 if current_run_fst < min_run_fst + limit:
00238                     #we can do no better
00239                     #debug('Lower limit is ' + str(min_run_fst))
00240                     return self.run_fdist(npops, nsamples, current_run_fst,
00241                         sample_size, mut, num_sims, data_dir)
00242                 current_run_fst = (min_run_fst + current_run_fst)/2
00243             else:
00244                 min_run_fst = current_run_fst
00245                 if current_run_fst > max_run_fst - limit:
00246                     #we can do no better
00247                     #debug('Upper limit is ' + str(max_run_fst))
00248                     return self.run_fdist(npops, nsamples, current_run_fst,
00249                         sample_size, mut, num_sims, data_dir,
00250                         is_dominant, theta, beta, max_freq)
00251                 current_run_fst = (max_run_fst + current_run_fst)/2

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.PopGen.FDist.Async.FDistAsync.run_job (   self,
  parameters,
  input_files 
)
Runs FDist asynchronously.

   Gets typical Fdist parameters from a dictionary and
   makes a "normal" call. This is run, normally, inside
   a separate thread.

Definition at line 40 of file Async.py.

00040 
00041     def run_job(self, parameters, input_files):
00042         """Runs FDist asynchronously.
00043 
00044            Gets typical Fdist parameters from a dictionary and
00045            makes a "normal" call. This is run, normally, inside
00046            a separate thread.
00047         """
00048         npops = parameters['npops']
00049         nsamples = parameters['nsamples']
00050         fst = parameters['fst']
00051         sample_size = parameters['sample_size']
00052         mut = parameters.get('mut', 0)
00053         num_sims = parameters.get('num_sims', 20000)
00054         data_dir = parameters.get('data_dir', '.')
00055         is_dominant = parameters.get('is_dominant', False)
00056         theta = parameters.get('theta', 0.06)
00057         beta = parameters.get('beta', (0.25, 0.25))
00058         max_freq = parameters.get('max_freq', 0.99)
00059         fst = self.run_fdist(npops, nsamples, fst, sample_size,
00060             mut, num_sims, data_dir,
00061             is_dominant, theta, beta,
00062             max_freq)
00063         output_files = {}
00064         output_files['out.dat'] = open(data_dir + os.sep + 'out.dat', 'r')
00065         return fst, output_files

Here is the call graph for this function:

def Bio.PopGen.FDist.Controller.FDistController.run_pv (   self,
  out_file = 'probs.dat',
  data_dir = '.',
  version = 1,
  smooth = 0.04 
) [inherited]
Executes pv.

out_file - Name of output file.
data_dir - Where the data is found.

Definition at line 294 of file Controller.py.

00294 
00295                version = 1, smooth=0.04):
00296         """Executes pv.
00297 
00298         out_file - Name of output file.
00299         data_dir - Where the data is found.
00300         """
00301         in_name = self._get_temp_file()
00302         out_name = self._get_temp_file()
00303         f = open(data_dir + os.sep + in_name, 'w')
00304         f.write('data_fst_outfile ' + out_file + ' out.dat\n')
00305         f.write(str(smooth) + '\n')
00306         f.close()
00307         self._generate_intfile(data_dir)
00308         if version == 1:
00309             pv_name = "pv"
00310         else:
00311             pv_name = "pv2"
00312         os.system('cd ' + data_dir + ' && ' +
00313                 self._get_path(pv_name) + ' < ' + in_name + ' > ' + out_name)
00314         pvf = open(data_dir + os.sep + out_file, 'r')
00315         result = map(lambda x: tuple(map(lambda y: my_float(y), x.rstrip().split(' '))),
00316             pvf.readlines())
00317         pvf.close()
00318         os.remove(data_dir + os.sep + in_name)
00319         os.remove(data_dir + os.sep + out_name)
00320         return result
00321 

Here is the call graph for this function:

Here is the caller graph for this function:


Member Data Documentation

Definition at line 51 of file Controller.py.

Definition at line 44 of file Controller.py.

Definition at line 45 of file Controller.py.

Definition at line 43 of file Controller.py.


The documentation for this class was generated from the following file: