Back to index

python-biopython  1.60
Controller.py
Go to the documentation of this file.
00001 # Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 
00007 
00008 """
00009 This module allows to control GenePop.
00010 
00011 """
00012 
00013 import os
00014 import re
00015 import shutil
00016 import subprocess
00017 import sys
00018 import tempfile
00019 
00020 from Bio.Application import AbstractCommandline, _Argument, _Option
00021 
00022 def _gp_float(tok):
00023     """Gets a float from a token, if it fails, returns the string.
00024     """
00025     try:
00026         return float(tok)
00027     except ValueError:
00028         return str(tok)
00029 
00030 def _gp_int(tok):
00031     """Gets a int from a token, if it fails, returns the string.
00032     """
00033     try:
00034         return int(tok)
00035     except ValueError:
00036         return str(tok)
00037     
00038     
00039 def _read_allele_freq_table(f):
00040     l = f.readline()
00041     while l.find(" --")==-1:
00042         if l == "":
00043             raise StopIteration
00044         if l.find("No data")>-1:
00045             return None, None
00046         l = f.readline()
00047     alleles = filter(lambda x: x != '', f.readline().rstrip().split(" "))
00048     alleles = map(lambda x: _gp_int(x), alleles)
00049     l = f.readline().rstrip()
00050     table = []
00051     while l != "":
00052         line = filter(lambda x: x != '', l.split(" "))
00053         try:
00054             table.append(
00055                 (line[0],
00056                 map(lambda x: _gp_float(x), line[1:-1]),
00057                 _gp_int(line[-1])))
00058         except ValueError:
00059             table.append(
00060                 (line[0],
00061                 [None] * len(alleles),
00062                 0))
00063         l = f.readline().rstrip()
00064     return alleles, table
00065 
00066 def _read_table(f, funs):
00067     table = []
00068     l = f.readline().rstrip()
00069     while l.find("---")==-1:
00070         l = f.readline().rstrip()
00071     l = f.readline().rstrip()
00072     while l.find("===")==-1 and l.find("---")==-1 and l != "":
00073         toks = filter(lambda x: x != "", l.split(" ")) 
00074         line = []
00075         for i in range(len(toks)):
00076             try:
00077                 line.append(funs[i](toks[i]))
00078             except ValueError:
00079                 line.append(toks[i])  # Could not cast
00080         table.append(tuple(line))
00081         l = f.readline().rstrip()
00082     return table
00083 
00084 def _read_triangle_matrix(f):
00085     matrix = []
00086     l = f.readline().rstrip()
00087     while l != "":
00088         matrix.append(
00089             map(lambda x: _gp_float(x),
00090                 filter(lambda y: y != "", l.split(" "))))
00091         l = f.readline().rstrip()
00092     return matrix
00093 
00094 def _read_headed_triangle_matrix(f):
00095     matrix = {}
00096     header = f.readline().rstrip()
00097     if header.find("---")>-1 or header.find("===")>-1:
00098         header = f.readline().rstrip()
00099     nlines = len(filter(lambda x:x != '', header.split(' '))) - 1
00100     for line_pop in range(nlines):
00101         l = f.readline().rstrip()
00102         vals = filter(lambda x:x != '', l.split(' ')[1:])
00103         clean_vals = []
00104         for val in vals:
00105             try:
00106                 clean_vals.append(_gp_float(val))
00107             except ValueError:
00108                 clean_vals.append(None)
00109         for col_pop in range(len(clean_vals)):
00110             matrix[(line_pop+1, col_pop)] = clean_vals[col_pop]
00111     return matrix
00112 
00113 def _hw_func(stream, is_locus, has_fisher = False): 
00114     l = stream.readline()
00115     if is_locus:
00116         hook = "Locus "
00117     else:
00118         hook = " Pop : "
00119     while l != "":
00120         if l.startswith(hook):
00121             stream.readline()
00122             stream.readline()
00123             stream.readline()
00124             table = _read_table(stream,[str,_gp_float,_gp_float,_gp_float,_gp_float,_gp_int,str])
00125             #loci might mean pop if hook="Locus "
00126             loci = {}
00127             for entry in table:
00128                 if len(entry) < 3:
00129                     loci[entry[0]] = None
00130                 else:
00131                     locus, p, se, fis_wc, fis_rh, steps = entry[:-1]
00132                     if se == "-": se = None
00133                     loci[locus] = p, se, fis_wc, fis_rh, steps
00134             return loci
00135         l = stream.readline()
00136     #self.done = True
00137     raise StopIteration
00138 
00139 class _FileIterator:
00140     """Iterator which crawls over a stream of lines with a function.
00141 
00142        The generator function is expected to yield a tuple, while
00143        consuming input
00144     """
00145     def __init__(self, func, stream, fname):
00146         self.func = func
00147         self.stream = stream
00148         self.fname = fname
00149         self.done = False
00150 
00151     def __iter__(self):
00152         if self.done:
00153             self.done = True
00154             raise StopIteration
00155         return self
00156 
00157     def next(self):
00158         return self.func(self)
00159 
00160     def __del__(self):
00161         self.stream.close()
00162         try:
00163             os.remove(self.fname)
00164         except OSError:
00165             #Jython seems to call the iterator twice
00166             pass
00167 
00168 class _GenePopCommandline(AbstractCommandline):
00169     """ Command Line Wrapper for GenePop.
00170     """
00171     def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs):
00172         self.parameters = [
00173                 _Argument(["command"],
00174                     "GenePop option to be called",
00175                     is_required=True),
00176                 _Argument(["mode"],
00177                     "Should allways be batch",
00178                     is_required=True),
00179                 _Argument(["input"],
00180                     "Input file",
00181                     is_required=True),
00182                 _Argument(["Dememorization"],
00183                     "Dememorization step"),
00184                 _Argument(["BatchNumber"],
00185                     "Number of MCMC batches"),
00186                 _Argument(["BatchLength"],
00187                     "Length of MCMC chains"),
00188                 _Argument(["HWtests"],
00189                     "Enumeration or MCMC"),
00190                 _Argument(["IsolBDstatistic"],
00191                     "IBD statistic (a or e)"),
00192                 _Argument(["MinimalDistance"],
00193                     "Minimal IBD distance"),
00194                 _Argument(["GeographicScale"],
00195                     "Log or Linear"),
00196         ]
00197         AbstractCommandline.__init__(self, cmd, **kwargs)
00198         self.set_parameter("mode", "Mode=Batch")
00199 
00200     def set_menu(self, option_list):
00201         """Sets the menu option.
00202 
00203         Example set_menu([6,1]) = get all F statistics (menu 6.1)
00204         """
00205         self.set_parameter("command", "MenuOptions="+
00206                 ".".join(map(lambda x:str(x),option_list)))
00207 
00208     def set_input(self, fname):
00209         """Sets the input file name.
00210         """
00211         self.set_parameter("input", "InputFile="+fname)
00212 
00213 class GenePopController(object):
00214     def __init__(self, genepop_dir = None):
00215         """Initializes the controller.
00216         
00217         genepop_dir is the directory where GenePop is.
00218 
00219         The binary should be called Genepop (capital G)
00220         
00221         """
00222         self.controller = _GenePopCommandline(genepop_dir)
00223 
00224     def _remove_garbage(self, fname_out):
00225         try:
00226             if fname_out != None: os.remove(fname_out)
00227         except OSError:
00228             pass # safe
00229         try:
00230             os.remove("genepop.txt")
00231         except OSError:
00232             pass # safe
00233         try:
00234             os.remove("fichier.in")
00235         except OSError:
00236             pass # safe
00237         try:
00238             os.remove("cmdline.txt")
00239         except OSError:
00240             pass # safe
00241 
00242     def _get_opts(self, dememorization, batches, iterations, enum_test=None):
00243         opts = {}
00244         opts["Dememorization"]=dememorization
00245         opts["BatchNumber"]=batches
00246         opts["BatchLength"]=iterations
00247         if enum_test != None:
00248             if enum_test == True:
00249                 opts["HWtests"]="Enumeration"
00250             else:
00251                 opts["HWtests"]="MCMC"
00252         return opts
00253 
00254     def _run_genepop(self, extensions, option, fname, opts={}):
00255         for extension in extensions:
00256             self._remove_garbage(fname + extension)
00257         self.controller.set_menu(option)
00258         self.controller.set_input(fname)
00259         for opt in opts:
00260             self.controller.set_parameter(opt, opt+"="+str(opts[opt]))
00261         self.controller() #checks error level is zero
00262         self._remove_garbage(None)
00263         return
00264 
00265 
00266     def _test_pop_hz_both(self, fname, type, ext, enum_test = True,
00267         dememorization = 10000, batches = 20, iterations = 5000):
00268         """Hardy-Weinberg test for heterozygote deficiency/excess.
00269 
00270            Returns a population iterator containg
00271                A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps)
00272                  Some loci have a None if the info is not available
00273                  SE might be none (for enumerations)
00274         """
00275         opts = self._get_opts(dememorization, batches, iterations, enum_test)
00276         self._run_genepop([ext], [1, type], fname, opts)
00277         f = open(fname + ext)
00278         def hw_func(self):
00279             return _hw_func(self.stream, False)
00280         return _FileIterator(hw_func, f, fname + ext)
00281 
00282     def _test_global_hz_both(self, fname, type, ext, enum_test = True,
00283         dememorization = 10000, batches = 20, iterations = 5000):
00284         """Global Hardy-Weinberg test for heterozygote deficiency/excess.
00285 
00286            Returns a triple with:
00287              A list per population containg
00288                (pop_name, P-val, SE, switches)
00289                  Some pops have a None if the info is not available
00290                  SE might be none (for enumerations)
00291              A list per loci containg
00292                (locus_name, P-val, SE, switches)
00293                  Some loci have a None if the info is not available
00294                  SE might be none (for enumerations)
00295              Overall results (P-val, SE, switches)
00296 
00297         """
00298         opts = self._get_opts(dememorization, batches, iterations, enum_test)
00299         self._run_genepop([ext], [1, type], fname, opts)
00300         def hw_pop_func(self):
00301             return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
00302         f1 = open(fname + ext)
00303         l = f1.readline()
00304         while l.find("by population") == -1:
00305             l = f1.readline()
00306         pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float])
00307         f2 = open(fname + ext)
00308         l = f2.readline()
00309         while l.find("by locus") == -1:
00310             l = f2.readline()
00311         loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float])
00312         f = open(fname + ext)
00313         l = f.readline()
00314         while l.find("all locus") == -1:
00315             l = f.readline()
00316         f.readline()
00317         f.readline()
00318         f.readline()
00319         f.readline()
00320         l = f.readline().rstrip()
00321         p, se, switches = tuple(map(lambda x: _gp_float(x),
00322             filter(lambda y: y != "",l.split(" "))))
00323         f.close()
00324         return pop_p, loc_p, (p, se, switches)
00325 
00326     #1.1
00327     def test_pop_hz_deficiency(self, fname, enum_test = True,
00328         dememorization = 10000, batches = 20, iterations = 5000):
00329         """Hardy-Weinberg test for heterozygote deficiency.
00330 
00331            Returns a population iterator containg
00332                A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps)
00333                  Some loci have a None if the info is not available
00334                  SE might be none (for enumerations)
00335         """
00336         return self._test_pop_hz_both(fname, 1, ".D", enum_test,
00337             dememorization, batches, iterations)
00338 
00339     #1.2
00340     def test_pop_hz_excess(self, fname, enum_test = True,
00341         dememorization = 10000, batches = 20, iterations = 5000):
00342         """Hardy-Weinberg test for heterozygote deficiency.
00343 
00344            Returns a population iterator containg
00345                A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps)
00346                  Some loci have a None if the info is not available
00347                  SE might be none (for enumerations)
00348         """
00349         return self._test_pop_hz_both(fname, 2, ".E", enum_test,
00350             dememorization, batches, iterations)
00351 
00352     #1.3 P file
00353     def test_pop_hz_prob(self, fname, ext, enum_test = False,
00354         dememorization = 10000, batches = 20, iterations = 5000):
00355         """Hardy-Weinberg test based on probability.
00356 
00357            Returns 2 iterators and a final tuple:
00358 
00359           1. Returns a loci iterator containing
00360                b. A dictionary[pop_pos]=(P-val, SE, Fis-WC, Fis-RH, steps)
00361                  Some pops have a None if the info is not available
00362                  SE might be none (for enumerations)
00363                c. Result of Fisher's test (Chi2, deg freedom, prob)
00364           2. Returns a population iterator containg
00365                a. A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps)
00366                  Some loci have a None if the info is not available
00367                  SE might be none (for enumerations)
00368                b. Result of Fisher's test (Chi2, deg freedom, prob)
00369           3. (Chi2, deg freedom, prob)
00370         """
00371         opts = self._get_opts(dememorization, batches, iterations, enum_test)
00372         self._run_genepop([ext], [1, 3], fname, opts)
00373         def hw_prob_loci_func(self): 
00374             return  _hw_func(self.stream, True, True)
00375         def hw_prob_pop_func(self): 
00376             return _hw_func(self.stream, False, True)
00377         shutil.copyfile(fname+".P", fname+".P2")
00378         f1 = open(fname + ".P")
00379         f2 = open(fname + ".P2")
00380         return _FileIterator(hw_prob_loci_func, f1, fname + ".P"), _FileIterator(hw_prob_pop_func, f2, fname + ".P2")
00381 
00382     #1.4
00383     def test_global_hz_deficiency(self, fname, enum_test = True,
00384         dememorization = 10000, batches = 20, iterations = 5000):
00385         """Global Hardy-Weinberg test for heterozygote deficiency.
00386 
00387            Returns a triple with:
00388              An list per population containg
00389                (pop_name, P-val, SE, switches)
00390                  Some pops have a None if the info is not available
00391                  SE might be none (for enumerations)
00392              An list per loci containg
00393                (locus_name, P-val, SE, switches)
00394                  Some loci have a None if the info is not available
00395                  SE might be none (for enumerations)
00396              Overall results (P-val, SE, switches)
00397         """
00398         return self._test_global_hz_both(fname, 4, ".DG", enum_test,
00399             dememorization, batches, iterations)
00400 
00401 
00402     #1.5
00403     def test_global_hz_excess(self, fname, enum_test = True,
00404         dememorization = 10000, batches = 20, iterations = 5000):
00405         """Global Hardy-Weinberg test for heterozygote excess.
00406 
00407            Returns a triple with:
00408              An list per population containg
00409                (pop_name, P-val, SE, switches)
00410                  Some pops have a None if the info is not available
00411                  SE might be none (for enumerations)
00412              An list per loci containg
00413                (locus_name, P-val, SE, switches)
00414                  Some loci have a None if the info is not available
00415                  SE might be none (for enumerations)
00416              Overall results (P-val, SE, switches)
00417         """
00418         return self._test_global_hz_both(fname, 5, ".EG", enum_test,
00419             dememorization, batches, iterations)
00420 
00421     #2.1
00422     def test_ld(self, fname,
00423         dememorization = 10000, batches = 20, iterations = 5000):
00424         opts = self._get_opts(dememorization, batches, iterations)
00425         self._run_genepop([".DIS"], [2, 1], fname, opts)
00426         def ld_pop_func(self):
00427             current_pop = None
00428             l = self.stream.readline().rstrip()
00429             if l == "":
00430                 self.done = True
00431                 raise StopIteration
00432             toks = filter(lambda x: x != "", l.split(" "))
00433             pop, locus1, locus2 = toks[0], toks[1], toks[2]
00434             if not hasattr(self, "start_locus1"):
00435                 start_locus1, start_locus2 = locus1, locus2
00436                 current_pop = -1
00437             if locus1 == start_locus1 and locus2 == start_locus2:
00438                 current_pop += 1
00439             if toks[3] == "No":
00440                 return current_pop, pop, (locus1, locus2), None
00441             p, se, switches = _gp_float(toks[3]), _gp_float(toks[4]), _gp_int(toks[5])
00442             return current_pop, pop, (locus1, locus2), (p, se, switches)
00443         def ld_func(self):
00444             l = self.stream.readline().rstrip()
00445             if l == "":
00446                 self.done = True
00447                 raise StopIteration
00448             toks = filter(lambda x: x != "", l.split(" "))
00449             locus1, locus2 = toks[0], toks[2]
00450             try:
00451                 chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5])
00452             except ValueError:
00453                 return (locus1, locus2), None
00454             return (locus1, locus2), (chi2, df, p)
00455         f1 = open(fname + ".DIS")
00456         l = f1.readline()
00457         while l.find("----")==-1:
00458             l = f1.readline()
00459         shutil.copyfile(fname + ".DIS", fname + ".DI2")
00460         f2 = open(fname + ".DI2")
00461         l = f2.readline()
00462         while l.find("Locus pair")==-1:
00463             l = f2.readline()
00464         while l.find("----")==-1:
00465             l = f2.readline()
00466         return _FileIterator(ld_pop_func, f1, fname+".DIS"), _FileIterator(ld_func, f2, fname + ".DI2")
00467 
00468     #2.2
00469     def create_contingency_tables(self, fname):
00470         raise NotImplementedError
00471 
00472     #3.1 PR/GE files
00473     def test_genic_diff_all(self, fname,
00474         dememorization = 10000, batches = 20, iterations = 5000):
00475         raise NotImplementedError
00476 
00477     #3.2 PR2/GE2 files
00478     def test_genic_diff_pair(self, fname,
00479         dememorization = 10000, batches = 20, iterations = 5000):
00480         raise NotImplementedError
00481 
00482     #3.3 G files
00483     def test_genotypic_diff_all(self, fname,
00484         dememorization = 10000, batches = 20, iterations = 5000):
00485         raise NotImplementedError
00486 
00487     #3.4 2G2 files
00488     def test_genotypic_diff_pair(self, fname,
00489         dememorization = 10000, batches = 20, iterations = 5000):
00490         raise NotImplementedError
00491 
00492     #4
00493     def estimate_nm(self, fname):
00494         self._run_genepop(["PRI"], [4], fname)
00495         f = open(fname + ".PRI")
00496         lines = f.readlines() # Small file, it is ok
00497         f.close()
00498         for line in lines:
00499             m = re.search("Mean sample size: ([.0-9]+)", line)
00500             if m != None:
00501                 mean_sample_size = _gp_float(m.group(1))
00502             m = re.search("Mean frequency of private alleles p\(1\)= ([.0-9]+)", line)
00503             if m != None:
00504                 mean_priv_alleles = _gp_float(m.group(1))
00505             m = re.search("N=10: ([.0-9]+)", line)
00506             if m != None:
00507                 mig10 = _gp_float(m.group(1))
00508             m = re.search("N=25: ([.0-9]+)", line)
00509             if m != None:
00510                  mig25 = _gp_float(m.group(1))
00511             m = re.search("N=50: ([.0-9]+)", line)
00512             if m != None:
00513                  mig50 = _gp_float(m.group(1))
00514             m = re.search("for size= ([.0-9]+)", line)
00515             if m != None:
00516                  mig_corrected = _gp_float(m.group(1))
00517         os.remove(fname + ".PRI")
00518         return mean_sample_size, mean_priv_alleles, mig10, mig25, mig50, mig_corrected
00519 
00520     #5.1
00521     def calc_allele_genotype_freqs(self, fname):
00522         """Calculates allele and genotype frequencies per locus and per sample.
00523         
00524         Parameters:
00525         fname - file name
00526 
00527         Returns tuple with 2 elements:
00528         Population iterator with
00529             population name
00530             Locus dictionary with key = locus name and content tuple as
00531               Genotype List with
00532                 (Allele1, Allele2, observed, expected)
00533               (expected homozygotes, observed hm, 
00534               expected heterozygotes, observed ht)
00535               Allele frequency/Fis dictionary with allele as key and
00536                 (count, frequency, Fis Weir & Cockerham)
00537               Totals as a pair
00538                 count
00539                 Fis Weir & Cockerham,
00540                 Fis Robertson & Hill
00541         Locus iterator with
00542             Locus name
00543             allele list
00544             Population list with a triple
00545                population name
00546                list of allele frequencies in the same order as allele list above
00547                number of genes
00548 
00549         
00550         Will create a file called fname.INF
00551         """
00552         self._run_genepop(["INF"], [5,1], fname)
00553         #First pass, general information
00554         #num_loci = None
00555         #num_pops = None
00556         #f = open(fname + ".INF")
00557         #l = f.readline()
00558         #while (num_loci == None or num_pops == None) and l != '':
00559         #    m = re.search("Number of populations detected : ([0-9+])", l)
00560         #    if m != None:
00561         #        num_pops = _gp_int(m.group(1))
00562         #    m = re.search("Number of loci detected        : ([0-9+])", l)
00563         #    if m != None:
00564         #        num_loci = _gp_int(m.group(1))
00565         #    l = f.readline()
00566         #f.close()
00567         def pop_parser(self):
00568             if hasattr(self, "old_line"):
00569                 l = self.old_line
00570                 del self.old_line
00571             else:
00572                 l = self.stream.readline()
00573             loci_content = {}
00574             while l != '':
00575                 l = l.rstrip()
00576                 if l.find("Tables of allelic frequencies for each locus")>-1:
00577                     return self.curr_pop, loci_content
00578                 match = re.match(".*Pop: (.+) Locus: (.+)", l)
00579                 if match != None:
00580                     pop = match.group(1)
00581                     locus = match.group(2)
00582                     if not hasattr(self, "first_locus"):
00583                         self.first_locus = locus
00584                     if hasattr(self, "curr_pop"):
00585                         if self.first_locus == locus:
00586                             old_pop = self.curr_pop
00587                             #self.curr_pop = pop
00588                             self.old_line = l
00589                             del self.first_locus
00590                             del self.curr_pop
00591                             return old_pop, loci_content
00592                     self.curr_pop = pop
00593                 else:
00594                     l = self.stream.readline()
00595                     continue
00596                 geno_list = []
00597                 l = self.stream.readline()
00598                 if l.find("No data")>-1: continue
00599 
00600                 while l.find("Genotypes  Obs.")==-1:
00601                     l = self.stream.readline()
00602 
00603                 while l != "\n":
00604                     m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)",l)
00605                     if m2 != None:
00606                         geno_list.append((_gp_int(m2.group(1)), _gp_int(m2.group(2)),
00607                             _gp_int(m2.group(3)), _gp_float(m2.group(4))))
00608                     else:
00609                         l = self.stream.readline()
00610                         continue
00611                     l = self.stream.readline()
00612 
00613                 while l.find("Expected number of ho")==-1:
00614                     l = self.stream.readline()
00615                 expHo =  _gp_float(l[38:])
00616                 l = self.stream.readline()
00617                 obsHo =  _gp_int(l[38:])
00618                 l = self.stream.readline()
00619                 expHe =  _gp_float(l[38:])
00620                 l = self.stream.readline()
00621                 obsHe =  _gp_int(l[38:])
00622                 l = self.stream.readline()
00623 
00624                 while l.find("Sample count")==-1:
00625                     l = self.stream.readline()
00626                 l = self.stream.readline()
00627                 freq_fis={}
00628                 overall_fis = None
00629                 while l.find("----")==-1:
00630                     vals = filter(lambda x: x!='',
00631                             l.rstrip().split(' '))
00632                     if vals[0]=="Tot":
00633                         overall_fis = _gp_int(vals[1]), \
00634                                 _gp_float(vals[2]), _gp_float(vals[3])
00635                     else:
00636                         freq_fis[_gp_int(vals[0])] = _gp_int(vals[1]), \
00637                                 _gp_float(vals[2]), _gp_float(vals[3])
00638                     l = self.stream.readline()
00639                 loci_content[locus] = geno_list, \
00640                         (expHo, obsHo, expHe, obsHe), \
00641                         freq_fis, overall_fis
00642             self.done = True
00643             raise StopIteration
00644         def locus_parser(self):
00645             l = self.stream.readline()
00646             while l != "":
00647                 l = l.rstrip()
00648                 match = re.match(" Locus: (.+)", l)
00649                 if match != None:
00650                     locus = match.group(1)
00651                     alleles, table = _read_allele_freq_table(self.stream)
00652                     return locus, alleles, table
00653                 l = self.stream.readline()
00654             self.done = True
00655             raise StopIteration
00656 
00657         popf = open(fname + ".INF")
00658         shutil.copyfile(fname + ".INF", fname + ".IN2")
00659         locf = open(fname + ".IN2")
00660         pop_iter = _FileIterator(pop_parser, popf, fname + ".INF")
00661         locus_iter = _FileIterator(locus_parser, locf, fname + ".IN2")
00662         return (pop_iter, locus_iter)
00663 
00664     def _calc_diversities_fis(self, fname, ext):
00665         self._run_genepop([ext], [5,2], fname)
00666         f = open(fname + ext)
00667         l = f.readline()
00668         while l != "":
00669             l = l.rstrip()
00670             if l.startswith("Statistics per sample over all loci with at least two individuals typed"):
00671                 avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float])
00672                 avg_Qintra = _read_table(f, [str, _gp_float])
00673             l = f.readline()
00674         f.close()
00675         def fis_func(self): 
00676             l = self.stream.readline()
00677             while l != "":
00678                 l = l.rstrip()
00679                 m = re.search("Locus: (.+)", l)
00680                 if m != None:
00681                     locus = m.group(1)
00682                     self.stream.readline()
00683                     if self.stream.readline().find("No complete")>-1: return locus, None
00684                     self.stream.readline()
00685                     fis_table = _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
00686                     self.stream.readline()
00687                     avg_qinter, avg_fis = tuple(map (lambda x: _gp_float(x),
00688                         filter(lambda y:y != "", self.stream.readline().split(" "))))
00689                     return locus, fis_table, avg_qinter, avg_fis
00690                 l = self.stream.readline()
00691             self.done = True
00692             raise StopIteration
00693         dvf = open(fname + ext)
00694         return _FileIterator(fis_func, dvf, fname + ext), avg_fis, avg_Qintra
00695 
00696     #5.2
00697     def calc_diversities_fis_with_identity(self, fname):
00698         return self._calc_diversities_fis(fname, ".DIV")
00699 
00700     #5.3
00701     def calc_diversities_fis_with_size(self, fname):
00702         raise NotImplementedError
00703 
00704     #6.1 Less genotype frequencies
00705     def calc_fst_all(self, fname):
00706         """Executes GenePop and gets Fst/Fis/Fit (all populations)
00707         
00708         Parameters:
00709         fname - file name
00710 
00711         Returns:
00712         (multiLocusFis, multiLocusFst, multiLocus Fit),
00713         Iterator of tuples
00714           (Locus name, Fis, Fst, Fit, Qintra, Qinter)
00715 
00716         Will create a file called fname.FST .
00717 
00718         This does not return the genotype frequencies.
00719         
00720         """
00721         self._run_genepop([".FST"], [6,1], fname)
00722         f = open(fname + ".FST")
00723         l = f.readline()
00724         while l != '':
00725             if l.startswith('           All:'):
00726                 toks=filter(lambda x:x!="", l.rstrip().split(' '))
00727                 try:
00728                     allFis = _gp_float(toks[1])
00729                 except ValueError:
00730                     allFis = None
00731                 try:
00732                     allFst = _gp_float(toks[2])
00733                 except ValueError:
00734                     allFst = None
00735                 try:
00736                     allFit = _gp_float(toks[3])
00737                 except ValueError:
00738                     allFit = None
00739             l = f.readline()
00740         f.close()
00741         f = open(fname + ".FST")
00742         def proc(self): 
00743             if hasattr(self, "last_line"):
00744                 l = self.last_line
00745                 del self.last_line
00746             else:
00747                 l = self.stream.readline()
00748             locus = None
00749             fis = None
00750             fst = None
00751             fit = None
00752             qintra = None
00753             qinter = None
00754             while l != '':
00755                 l = l.rstrip()
00756                 if l.startswith('  Locus:'):
00757                     if locus != None:
00758                         self.last_line = l
00759                         return locus, fis, fst, fit, qintra, qinter
00760                     else:
00761                         locus = l.split(':')[1].lstrip()
00762                 elif l.startswith('Fis^='):
00763                     fis = _gp_float(l.split(' ')[1])
00764                 elif l.startswith('Fst^='):
00765                     fst = _gp_float(l.split(' ')[1])
00766                 elif l.startswith('Fit^='):
00767                     fit = _gp_float(l.split(' ')[1])
00768                 elif l.startswith('1-Qintra^='):
00769                     qintra = _gp_float(l.split(' ')[1])
00770                 elif l.startswith('1-Qinter^='):
00771                     qinter = _gp_float(l.split(' ')[1])
00772                     return locus, fis, fst, fit, qintra, qinter
00773                 l = self.stream.readline()
00774             if locus != None:
00775                 return locus, fis, fst, fit, qintra, qinter
00776             self.stream.close()
00777             self.done = True
00778             raise StopIteration
00779         return (allFis, allFst, allFit), _FileIterator(proc , f, fname + ".FST")
00780 
00781     #6.2
00782     def calc_fst_pair(self, fname):
00783         self._run_genepop([".ST2", ".MIG"], [6,2], fname)
00784         f = open(fname + ".ST2")
00785         l = f.readline()
00786         while l != "":
00787             l = l.rstrip()
00788             if l.startswith("Estimates for all loci"):
00789                 avg_fst = _read_headed_triangle_matrix(f)
00790             l = f.readline()
00791         f.close()
00792         def loci_func(self): 
00793             l = self.stream.readline()
00794             while l != "":
00795                 l = l.rstrip()
00796                 m = re.search(" Locus: (.+)", l)
00797                 if m != None:
00798                     locus = m.group(1)
00799                     matrix = _read_headed_triangle_matrix(self.stream)
00800                     return locus, matrix
00801                 l = self.stream.readline()
00802             self.done = True
00803             raise StopIteration
00804         stf = open(fname + ".ST2")
00805         os.remove(fname + ".MIG")
00806         return _FileIterator(loci_func, stf, fname + ".ST2"), avg_fst
00807 
00808     #6.3
00809     def calc_rho_all(self, fname):
00810         raise NotImplementedError
00811 
00812     #6.4
00813     def calc_rho_pair(self, fname):
00814         raise NotImplementedError
00815 
00816     def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
00817         """Calculates isolation by distance statistics
00818         """
00819         self._run_genepop([".GRA", ".MIG", ".ISO"], [6,sub],
00820             fname, opts = {
00821             "MinimalDistance" : min_dist,
00822             "GeographicScale" : scale,
00823             "IsolBDstatistic" : stat,
00824             })
00825         f = open(fname + ".ISO")
00826         f.readline()
00827         f.readline()
00828         f.readline()
00829         f.readline()
00830         estimate = _read_triangle_matrix(f)
00831         f.readline()
00832         f.readline()
00833         distance = _read_triangle_matrix(f)
00834         f.readline()
00835         match = re.match("a = (.+), b = (.+)", f.readline().rstrip())
00836         a = _gp_float(match.group(1))
00837         b = _gp_float(match.group(2))
00838         f.readline()
00839         f.readline()
00840         match = re.match(" b=(.+)", f.readline().rstrip())
00841         bb = _gp_float(match.group(1))
00842         match = re.match(".*\[(.+)  ;  (.+)\]", f.readline().rstrip())
00843         bblow = _gp_float(match.group(1))
00844         bbhigh = _gp_float(match.group(2))
00845         f.close()
00846         os.remove(fname + ".MIG")
00847         os.remove(fname + ".GRA")
00848         os.remove(fname + ".ISO")
00849         return estimate, distance, (a, b), (bb, bblow, bbhigh)
00850 
00851     #6.5
00852     def calc_ibd_diplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
00853         """Calculates isolation by distance statistics for diploid data.
00854 
00855            See _calc_ibd for parameter details.
00856            Note that each pop can only have a single individual and
00857            the individual name has to be the sample coordinates.
00858         """
00859         return self._calc_ibd(fname, 5, stat, scale, min_dist)
00860 
00861     #6.6
00862     def calc_ibd_haplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
00863         """Calculates isolation by distance statistics for haploid data.
00864 
00865            See _calc_ibd for parameter details.
00866            Note that each pop can only have a single individual and
00867            the individual name has to be the sample coordinates.
00868         """
00869         return self._calc_ibd(fname, 6, stat, scale, min_dist)
00870 
00871