Back to index

python-biopython  1.60
Organism.py
Go to the documentation of this file.
00001 """Deal with an Organism in a Genetic Algorithm population.
00002 """
00003 # standard modules
00004 import sys #for Python 3 hack
00005 import random
00006 import array
00007 
00008 # Sequence objects from Biopython
00009 from Bio.Seq import MutableSeq
00010 
00011 def function_population(new_genome, num_organisms, fitness_calculator):
00012     """Generate a population given a function to create genomes
00013 
00014     Arguments:
00015 
00016     o new_genome - A function or callable object that will return
00017     a genome that can be used for a new organism. This new genome
00018     should be a MutableSeq object with a specified alphabet.
00019 
00020     o num_organisms - The number of individuals we want in the population.
00021 
00022     o fitness_calculator -- A funtion that will calculate the fitness
00023     of the organism when given the organisms genome.
00024     """
00025     all_orgs = []
00026 
00027     for org_num in range(num_organisms):
00028         cur_genome = new_genome()
00029         all_orgs.append(Organism(cur_genome, fitness_calculator))
00030 
00031     return all_orgs
00032 
00033 def random_population(genome_alphabet, genome_size, num_organisms,
00034                       fitness_calculator):
00035     """Generate a population of individuals with randomly set genomes.
00036 
00037     Arguments:
00038 
00039     o genome_alphabet -- An Alphabet object describing all of the
00040     possible letters that could potentially be in the genome of an
00041     organism.
00042 
00043     o genome_size -- The size of each organisms genome.
00044 
00045     o num_organism -- The number of organisms we want in the population.
00046 
00047     o fitness_calculator -- A funtion that will calculate the fitness
00048     of the organism when given the organisms genome.
00049     """
00050     all_orgs = []
00051 
00052     # a random number generator to get letters for the genome
00053     letter_rand = random.Random()
00054 
00055     # figure out what type of characters are in the alphabet
00056     if type(genome_alphabet.letters[0]) == type("A"):
00057         if sys.version_info[0] == 3:
00058             alphabet_type = "u" #Use unicode string on Python 3
00059         else:
00060             alphabet_type = "c" #Use byte string on Python 2
00061     elif type(genome_alphabet.letters[0]) == type(1):
00062         alphabet_type = "i"
00063     elif type(genome_alphabet.letters[0]) == type(1.0):
00064         alphabet_type = "d"
00065     else:
00066         raise ValueError(\
00067             "Alphabet type is unsupported: %s" % genome_alphabet.letters)
00068 
00069     for org_num in range(num_organisms):
00070         new_genome = MutableSeq(array.array(alphabet_type), genome_alphabet)
00071 
00072         # generate the genome randomly
00073         for gene_num in range(genome_size):
00074             new_gene = letter_rand.choice(genome_alphabet.letters)
00075             new_genome.append(new_gene)
00076 
00077         # add the new organism with this genome
00078         all_orgs.append(Organism(new_genome, fitness_calculator))
00079 
00080     return all_orgs
00081 
00082 class Organism(object):
00083     """Represent a single individual in a population.
00084 
00085     Attributes:
00086 
00087     o genome -- The genome of the organism. This is a Bio.MutableSeq
00088     object that has the sequence of the genome, and the alphabet
00089     describing all elements that can be a part of the genome.
00090 
00091     o fitness -- The calculate fitness of the organism. This fitness is
00092     based on the last time it was calculated using the fitness_calculator.
00093     So... the fitness could potentially be out of date with the real genome
00094     if you are not careful to recalculate it after changes with
00095     recalculate_fitness()
00096     """
00097     def __init__(self, genome, fitness_calculator, start_fitness = None):
00098         """Initialize an organism
00099 
00100         Arguments:
00101 
00102         o genome -- A MutableSeq object representing the sequence of the
00103         genome.
00104 
00105         o fitness_calculator -- A funtion that will calculate the fitness
00106         of the organism when given the organisms genome.
00107 
00108         o start_fitness - the starting fitness corresponding with the
00109         given genome. If not supplied, the fitness will be calculated
00110         using fitness_calculator.
00111         """
00112         assert isinstance(genome, MutableSeq), "Genome must be a MutableSeq"
00113         
00114         self.genome = genome
00115         self._fitness_calc = fitness_calculator
00116 
00117         # calculate the fitness of the genome
00118         if start_fitness is None:
00119             self.fitness = self._fitness_calc(self.genome)
00120         else:
00121             self.fitness = start_fitness
00122 
00123     def __str__(self):
00124         """Provide a string output for debugging.
00125         """
00126         return "Genome: %s; Fitness %s" % (self.genome.tostring(), self.fitness)
00127 
00128     def __eq__(self, other):
00129         """Compare organisms by their genomes (as strings of letters).
00130         """
00131         # See Bio/Seq.py and the comments there about shifting to
00132         # using simple string equality. Previously Seq objects used
00133         # object equality, while MutableSeq objects used alphabet
00134         # aware string equality.
00135         return str(self.genome) == str(other.genome)
00136 
00137     def __ne__(self, other):
00138         """Compare organisms by their genomes (as strings of letters).
00139         """
00140         return str(self.genome) != str(other.genome)
00141 
00142     def __lt__(self, other):
00143         """Compare organisms by their genomes (as strings of letters).
00144         """
00145         return str(self.genome) < str(other.genome)
00146 
00147     def __le__(self, other):
00148         """Compare organisms by their genomes (as strings of letters).
00149         """
00150         return str(self.genome) <= str(other.genome)
00151 
00152     def __gt__(self, other):
00153         """Compare organisms by their genomes (as strings of letters).
00154         """
00155         return str(self.genome) > str(other.genome)
00156 
00157     def __ge__(self, other):
00158         """Compare organisms by their genomes (as strings of letters).
00159         """
00160         return str(self.genome) >= str(other.genome)
00161 
00162     def copy(self):
00163         """Return a copy of the organism.
00164 
00165         This makes it easy to duplicate an organism before changing it.
00166         """
00167         copy_genome = self.genome[:]
00168         return Organism(copy_genome, self._fitness_calc, self.fitness)
00169 
00170     def recalculate_fitness(self):
00171         """Calculate and reset the fitness of the current genome
00172 
00173         This should be called after the genome is updated to ensure that
00174         fitness always stays in sync with the current genome.
00175         """
00176         self.fitness = self._fitness_calc(self.genome)