Back to index

python-biopython  1.60
Utils.py
Go to the documentation of this file.
00001 # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 
00007 import os
00008 from Bio.PopGen import GenePop
00009 from Bio.PopGen.GenePop import FileParser
00010 import Bio.PopGen.FDist
00011 
00012 # Quite a few utility functions could be done (like remove pop,
00013 # add locus, etc...). The recommended strategy is convert back
00014 # and forth from/to GenePop and use GenePop Utils
00015 
00016 def convert_genepop_to_fdist(gp_rec, report_pops = None):
00017     """Converts a GenePop record to a FDist one.
00018 
00019        Parameters:
00020        gp_rec - Genepop Record (either standard or big)
00021 
00022        Returns:
00023        FDist record.
00024     """
00025     if hasattr(gp_rec, "populations"):
00026         return _convert_genepop_to_fdist(gp_rec)
00027     else:
00028         return _convert_genepop_to_fdist_big(gp_rec, report_pops)
00029 
00030 def _convert_genepop_to_fdist(gp_rec):
00031     """Converts a standard GenePop record to a FDist one.
00032 
00033        Parameters:
00034        gp_rec - Genepop Record (Standard)
00035 
00036        Returns:
00037        FDist record.
00038     """
00039     fd_rec = Bio.PopGen.FDist.Record()
00040     
00041     fd_rec.data_org = 0
00042     fd_rec.num_loci = len(gp_rec.loci_list)
00043     fd_rec.num_pops = len(gp_rec.populations)
00044     for lc_i in range(len(gp_rec.loci_list)):
00045         alleles = []
00046         pop_data = []
00047         for pop_i in range(len(gp_rec.populations)):
00048             for indiv in gp_rec.populations[pop_i]:
00049                 for al in indiv[1][lc_i]:
00050                     if al is not None and al not in alleles:
00051                         alleles.append(al)
00052         alleles.sort() #Dominance requires this
00053 
00054         #here we go again (necessary...)
00055         for pop_i in range(len(gp_rec.populations)):
00056             allele_counts = {}
00057             for indiv in gp_rec.populations[pop_i]:
00058                 for al in indiv[1][lc_i]:
00059                     if al is not None:
00060                         count = allele_counts.get(al, 0)
00061                         allele_counts[al] = count + 1
00062             allele_array = [] #We need the same order as in alleles
00063             for allele in alleles:
00064                 allele_array.append(allele_counts.get(allele, 0))
00065             pop_data.append(allele_array)
00066             #if lc_i==3:
00067             #    print alleles, allele_counts#, pop_data
00068         fd_rec.loci_data.append((len(alleles), pop_data))
00069     return fd_rec
00070 
00071 def _convert_genepop_to_fdist_big(gp_rec, report_pops = None):
00072     """Converts a big GenePop record to a FDist one.
00073 
00074        Parameters:
00075        gp_rec - Genepop Record (Big)
00076 
00077        Returns:
00078        FDist record.
00079     """
00080     fd_rec = Bio.PopGen.FDist.Record()
00081 
00082     fd_rec.data_org = 1
00083     fd_rec.num_loci = len(gp_rec.loci_list)
00084     num_loci = len(gp_rec.loci_list)
00085     loci = []
00086     for i in range(num_loci):
00087         loci.append(set())
00088     pops = []
00089     work_rec = FileParser.read(gp_rec.fname)
00090     lParser = work_rec.get_individual()
00091     def init_pop():
00092         my_pop = []
00093         for i in range(num_loci):
00094             my_pop.append({})
00095         return my_pop
00096     curr_pop = init_pop()
00097     num_pops = 1
00098     if report_pops:
00099         report_pops(num_pops)
00100     while lParser:
00101         if lParser != True:
00102             for loci_pos in range(num_loci):
00103                 for al in lParser[1][loci_pos]:
00104                     if al is not None:
00105                         loci[loci_pos].add(al)
00106                         curr_pop[loci_pos][al]= curr_pop[loci_pos].get(al,0)+1
00107         else:
00108             pops.append(curr_pop)
00109             num_pops += 1
00110             if report_pops:
00111                 report_pops(num_pops)
00112             curr_pop = init_pop()
00113         lParser = work_rec.get_individual()
00114     work_rec._handle.close() #TODO - Needs a proper fix
00115     pops.append(curr_pop)
00116     fd_rec.num_pops = num_pops
00117     for loci_pos in range(num_loci):
00118         alleles = list(loci[loci_pos])
00119         alleles.sort()
00120         loci_rec = [len(alleles), []]
00121         for pop in pops:
00122             pop_rec = []
00123             for allele in alleles:
00124                 pop_rec.append(pop[loci_pos].get(allele, 0))
00125             loci_rec[1].append(pop_rec)
00126         fd_rec.loci_data.append(tuple(loci_rec))
00127     return fd_rec
00128 
00129 
00130 def _convert_genepop_to_fdist_big_old(gp_rec, report_loci = None):
00131     """Converts a big GenePop record to a FDist one.
00132 
00133        Parameters:
00134        gp_rec - Genepop Record (Big)
00135 
00136        Returns:
00137        FDist record.
00138     """
00139     fd_rec = Bio.PopGen.FDist.Record()
00140 
00141     def countPops(rec):
00142         f2 = FileParser.read(rec.fname)
00143         popCnt = 1
00144         while f2.skip_population():
00145             popCnt += 1
00146         return popCnt
00147 
00148     
00149     fd_rec.data_org = 0
00150     fd_rec.num_loci = len(gp_rec.loci_list)
00151     work_rec0 = FileParser.read(gp_rec.fname)
00152     fd_rec.num_pops = countPops(work_rec0)
00153 
00154     num_loci = len(gp_rec.loci_list)
00155     for lc_i in range(num_loci):
00156         if report_loci:
00157             report_loci(lc_i, num_loci)
00158         work_rec = FileParser.read(gp_rec.fname)
00159         work_rec2 = FileParser.read(gp_rec.fname)
00160 
00161         alleles = []
00162         pop_data = []
00163         lParser = work_rec.get_individual()
00164         while lParser:
00165             if lParser != True:
00166                 for al in lParser[1][lc_i]:
00167                     if al is not None and al not in alleles:
00168                         alleles.append(al)
00169             lParser = work_rec.get_individual()
00170         #here we go again (necessary...)
00171         alleles.sort()
00172         def process_pop(pop_data, alleles, allele_counts):
00173             allele_array = [] #We need the same order as in alleles
00174             for allele in alleles:
00175                 allele_array.append(allele_counts.get(allele, 0))
00176             pop_data.append(allele_array)
00177         lParser = work_rec2.get_individual()
00178         allele_counts = {}
00179         for allele in alleles:
00180             allele_counts[allele] = 0
00181         allele_counts[None]=0
00182         while lParser:
00183             if lParser == True:
00184                 process_pop(pop_data, alleles, allele_counts)
00185                 allele_counts = {}
00186                 for allele in alleles:
00187                     allele_counts[allele] = 0
00188                 allele_counts[None]=0
00189             else:
00190                 for al in lParser[1][lc_i]:
00191                     allele_counts[al] += 1
00192             lParser = work_rec2.get_individual()
00193         process_pop(pop_data, alleles, allele_counts)
00194         fd_rec.loci_data.append((len(alleles), pop_data))
00195     return fd_rec
00196 
00197 
00198 def approximate_fst(desired_fst, simulated_fst, parameter_fst,
00199            max_run_fst = 1, min_run_fst = 0, limit = 0.005):
00200     """Calculates the next Fst attempt in order to approximate a
00201        desired Fst.
00202     
00203     """
00204     if abs(simulated_fst - desired_fst) < limit:
00205         return parameter_fst, max_run_fst, min_run_fst
00206     if simulated_fst > desired_fst:
00207         max_run_fst = parameter_fst
00208         next_parameter_fst = (min_run_fst + parameter_fst)/2
00209     else:
00210         min_run_fst = parameter_fst
00211         next_parameter_fst = (max_run_fst + parameter_fst)/2
00212     return next_parameter_fst, max_run_fst, min_run_fst
00213