Back to index

python-biopython  1.60
__init__.py
Go to the documentation of this file.
00001 # Copyright 2007 by Tiago Antao.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """
00007 This module provides code to work with GenePop.
00008 
00009 See http://wbiomed.curtin.edu.au/genepop/ , the format is documented
00010 here: http://wbiomed.curtin.edu.au/genepop/help_input.html .
00011 
00012 Classes:
00013 Record           Holds GenePop data.
00014 
00015 Functions:
00016 read             Parses a GenePop record (file) into a Record object.
00017 
00018 
00019 Partially inspired on MedLine Code.
00020 
00021 """
00022 from copy import deepcopy
00023 
00024 
00025 def get_indiv(line):
00026     def int_no_zero(val):
00027         v = int(val)
00028         if v == 0:
00029             return None
00030         return v
00031     indiv_name, marker_line = line.split(',')
00032     markers = marker_line.replace('\t', ' ').split(' ')
00033     markers = [marker for marker in markers if marker!='']
00034     if len(markers[0]) in [2, 4]: #2 digits per allele
00035         marker_len = 2
00036     else:
00037         marker_len = 3
00038     try:
00039         allele_list = [(int_no_zero(marker[0:marker_len]),
00040                        int_no_zero(marker[marker_len:]))
00041                    for marker in markers]
00042     except ValueError: #Haploid
00043         allele_list = [(int_no_zero(marker[0:marker_len]),)
00044                    for marker in markers]
00045     return indiv_name, allele_list, marker_len
00046 
00047 def read(handle):
00048     """Parses a handle containing a GenePop file.
00049 
00050        handle is a file-like object that contains a GenePop record.
00051     """
00052     record = Record()
00053     record.comment_line = str(handle.next()).rstrip()
00054     #We can now have one loci per line or all loci in a single line
00055     #separated by either space or comma+space...
00056     #We will remove all commas on loci... that should not be a problem
00057     sample_loci_line = str(handle.next()).rstrip().replace(',', '')
00058     all_loci = sample_loci_line.split(' ')
00059     record.loci_list.extend(all_loci)
00060     for line in handle:
00061         line = line.rstrip()
00062         if line.upper()=='POP':
00063             break
00064         record.loci_list.append(line)
00065     else:
00066         raise ValueError('No population data found, file probably not GenePop related')
00067     record.populations.append([])
00068     for line in handle:
00069         line = line.rstrip()
00070         if line.upper()=='POP':
00071             record.populations.append([])
00072         else:
00073             indiv_name, allele_list, record.marker_len = get_indiv(line)
00074             record.populations[-1].append((indiv_name, allele_list))
00075     loci = record.loci_list
00076     for pop in record.populations:
00077         record.pop_list.append(pop[-1][0])
00078         for indiv in pop:
00079             for mk_i in range(len(loci)):
00080                 mk_orig = indiv[1][mk_i]
00081                 mk_real = []
00082                 for al in mk_orig:
00083                     if al == 0:
00084                         mk_real.append(None)
00085                     else:
00086                         mk_real.append(al)
00087                 indiv[1][mk_i] = tuple(mk_real)
00088     return record
00089 
00090 
00091 class Record(object):
00092     """Holds information from a GenePop record.
00093 
00094     Members:
00095     marker_len         The marker length (2 or 3 digit code per allele).    
00096     
00097     comment_line       Comment line.
00098 
00099     loci_list          List of loci names.
00100 
00101     pop_list           List of population names.
00102     
00103     populations        List of population data.
00104     
00105     In most genepop files, the population name is not trustable.
00106     It is strongly recommended that populations are referred by index.
00107 
00108     populations has one element per population. Each element is itself
00109     a list of individuals, each individual is a pair composed by individual
00110     name and a list of alleles (2 per marker or 1 for haploids): Example
00111     [
00112         [
00113             ('Ind1', [(1,2),    (3,3), (200,201)],
00114             ('Ind2', [(2,None), (3,3), (None,None)],
00115         ],
00116         [
00117             ('Other1', [(1,1),  (4,3), (200,200)],
00118         ]
00119     ]
00120 
00121     
00122     """
00123     def __init__(self):
00124         self.marker_len      = 0
00125         self.comment_line    = ""
00126         self.loci_list       = []
00127         self.pop_list        = []
00128         self.populations     = []
00129 
00130     def __str__(self):
00131         """Returns (reconstructs) a GenePop textual representation.
00132         """
00133         rep  = [self.comment_line + '\n']
00134         rep.append('\n'.join(self.loci_list) + '\n')
00135         for pop in self.populations:
00136             rep.append('Pop\n')
00137             for indiv in pop:
00138                 name, markers = indiv
00139                 rep.append(name)
00140                 rep.append(',')
00141                 for marker in markers:
00142                     rep.append(' ')
00143                     for al in marker:
00144                         if al == None:
00145                             al = '0'
00146                         aStr = str(al)
00147                         while len(aStr)<self.marker_len:
00148                             aStr = "".join(['0', aStr])
00149                         rep.append(aStr)
00150                 rep.append('\n')
00151         return "".join(rep)
00152 
00153     def split_in_pops(self, pop_names):
00154         """Splits a GP record in a dictionary with 1 pop per entry.
00155 
00156             Given a record with n pops and m loci returns a dictionary
00157             of records (key pop_name) where each item is a record
00158             with a single pop and m loci.
00159 
00160             Parameters:
00161             pop_names - Population names
00162         """
00163         gp_pops = {}
00164         for i in range(len(self.populations)):
00165             gp_pop = Record()
00166             gp_pop.marker_len = self.marker_len
00167             gp_pop.comment_line = self.comment_line
00168             gp_pop.loci_list = deepcopy(self.loci_list)
00169             gp_pop.populations = [deepcopy(self.populations[i])]
00170             gp_pops[pop_names[i]] = gp_pop
00171         return gp_pops
00172 
00173     def split_in_loci(self, gp):
00174         """Splits a GP record in a dictionary with 1 locus per entry.
00175 
00176             Given a record with n pops and m loci returns a dictionary
00177             of records (key locus name) where each item is a record
00178             with a single locus and n pops.
00179         """
00180         gp_loci = {}
00181         for i in range(len(self.loci_list)):
00182             gp_pop = Record()
00183             gp_pop.marker_len = self.marker_len
00184             gp_pop.comment_line = self.comment_line
00185             gp_pop.loci_list = [self.loci_list[i]]
00186             gp_pop.populations = []
00187             for pop in self.populations:
00188                 my_pop = []
00189                 for indiv in pop:
00190                     my_pop.append((indiv[0], [indiv[1][i]]))
00191                 gp_pop.populations.append(my_pop)
00192             gp_loci[gp_pop.loci_list[0]] = gp_pop
00193         return gp_loci
00194 
00195 
00196     def remove_population(self, pos):
00197         """Removes a population (by position).
00198         """
00199         del self.populations[pos]
00200     
00201     def remove_locus_by_position(self, pos):
00202         """Removes a locus by position.
00203         """
00204         del self.loci_list[pos]
00205         for pop in self.populations:
00206             for indiv in pop:
00207                 name, loci = indiv
00208                 del loci[pos]
00209 
00210     def remove_locus_by_name(self, name):
00211         """Removes a locus by name.
00212         """
00213         for i in range(len(self.loci_list)):
00214             if self.loci_list[i] == name:
00215                 self.remove_locus_by_position(i)
00216                 return
00217         #If here than locus not existent... Maybe raise exception?
00218         #   Although it should be Ok... Just a boolean return, maybe?
00219     
00220