Back to index

python-biopython  1.60
test_GAQueens.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """Try out the N-queens problem for an arbitrary number of queens.
00003 
00004 This program uses Genetic Algorithms to try to solve the N queens
00005 problem, in which you place N different queens on an N by N chess
00006 board such that no two queens will be attacking each other.
00007 
00008 We represent queens on the board as a tuple like (1, 2, 3, 4, 5)
00009 which would be 5 queens diaganol across the board.
00010 
00011 Usage:
00012 python test_GAQueens.py <Number of Queens to place>
00013 
00014 where <Number of Queens to place> is just a number specifying how many
00015 queens you want to try to calculate this for.
00016 
00017 When called as part of the Biopython unit test suite, 5 queens are used.
00018 """
00019 # standard library
00020 import sys
00021 import math
00022 import random
00023 import copy
00024 import time
00025 
00026 # Biopython
00027 from Bio import Alphabet
00028 
00029 # Genetic Algorithm stuff
00030 from Bio.GA.Evolver import GenerationEvolver
00031 from Bio.GA import Organism
00032 from Bio.GA.Mutation.Simple import ConversionMutation
00033 from Bio.GA.Crossover.Point import SinglePointCrossover
00034 from Bio.GA.Selection.RouletteWheel import RouletteWheelSelection
00035 from Bio.GA.Selection.Tournament import TournamentSelection
00036 
00037 VERBOSE = 0
00038 
00039 
00040 def main(num_queens):
00041 
00042     print "Calculating for %s queens..." % num_queens
00043 
00044     num_orgs = 1000
00045     print "Generating an initial population of %s organisms..." % num_orgs
00046     queen_alphabet = QueensAlphabet(num_queens)
00047 
00048     start_population = Organism.random_population(queen_alphabet, num_queens,
00049                                                   num_orgs, queens_fitness)
00050 
00051     print "Evolving the population and searching for a solution..."
00052 
00053     mutator = QueensMutation(mutation_rate = 0.05)
00054     crossover = QueensCrossover(queens_fitness, crossover_prob = .2,
00055                                 max_crossover_size = 4)
00056     repair = QueensRepair()
00057     # rw_selector = RouletteWheelSelection(mutator, crossover, repair)
00058     t_selector = TournamentSelection(mutator, crossover, repair, 5)
00059 
00060     start_time = time.ctime(time.time())
00061     evolver = GenerationEvolver(start_population, t_selector)
00062     evolved_pop = evolver.evolve(queens_solved)
00063     end_time = time.ctime(time.time())
00064 
00065     unique_solutions = []
00066     for organism in evolved_pop:
00067         if organism.fitness == num_queens:
00068             if organism not in unique_solutions:
00069                 unique_solutions.append(organism)
00070 
00071     if VERBOSE:
00072         print "Search started at %s and ended at %s" % (start_time, end_time)
00073         for organism in unique_solutions:
00074             print "We did it!", organism
00075             display_board(organism.genome)
00076         
00077 
00078 def display_board(genome):
00079     """Display a genome in the N-queens problem.
00080 
00081     Inspired by the display function in the queens.py solution to the N-queens
00082     problem in the Python demo scripts.
00083     """
00084     print '+-' + '--'*len(genome) + '+'
00085 
00086     for row in range(len(genome)):
00087         print '|',
00088         for genome_item in genome:
00089             if genome_item == row:
00090                 print 'Q',
00091             else:
00092                 print '.',
00093         print '|'
00094 
00095     print '+-' + '--'*len(genome) + '+'
00096 
00097 def queens_solved(organisms):
00098     """Determine if we have solved the problem.
00099 
00100     We just search through the population for an organism that has a
00101     fitness that is equal to the number of queens in the population.
00102     If so, we have a solution, otherwise we need to keep looking.
00103     """
00104     for org in organisms:
00105         if org.fitness == len(org.genome):
00106             return 1
00107 
00108     # if we got here we didn't do it
00109     return 0
00110      
00111 def queens_fitness(genome):
00112     """Calculate the fitness of an organization of queens on the chessboard.
00113 
00114     Arguments:
00115 
00116     o genome -- A MutableSeq object specifying an organism genome.
00117 
00118     The number returned is the number of unattacked queens on the board.
00119     """
00120     fitness = 0
00121 
00122     # check each queen on the board
00123     for check_queen_col in range(len(genome)):
00124         is_attacked = 0
00125         # check against all other queens on the board
00126         for other_queen_col in range(len(genome)):
00127             # only check a queen if it isn't exactly the same queen
00128             if check_queen_col != other_queen_col:
00129                 # get the row for the two queens we are comparing
00130                 check_queen_row = int(genome[check_queen_col])
00131                 other_queen_row = int(genome[other_queen_col])
00132                 
00133                 # a queen is attacked if it is in a row with another queen
00134                 if check_queen_row == other_queen_row:
00135                     is_attacked = 1
00136                     break
00137                 # or it is attacked if it is diaganol to another queen
00138                 elif (abs(check_queen_row - other_queen_row) ==
00139                       abs(check_queen_col - other_queen_col)):
00140                     is_attacked = 1
00141                     break
00142 
00143         if not(is_attacked):
00144             fitness += 1
00145 
00146     return fitness
00147 
00148 class QueensAlphabet(Alphabet.Alphabet):
00149     def __init__(self, num_queens):
00150         """Initialize with the number of queens we are calculating for.
00151         """
00152         # set up the letters for the alphabet
00153         assert 0 <= num_queens <= 9
00154         self.letters = "".join(str(i) for i in range(num_queens))
00155 
00156 # --- Problem specific crossover, mutation and repair operations
00157 class QueensRepair:
00158     """A repair function to help create correct N-Queens solutions.
00159 
00160     This attempts to help generate correct solutions by offering some
00161     amount of repair to remove queens that are located in the same rows.
00162     After repair, a sequence should have no queens in the same row.
00163 
00164     So, if you start with something infeasible like (1, 2, 2, 3, 3, 4),
00165     after running it through repair you'll get a feasible individual
00166     like (1, 2, 5, 3, 6, 4). This should greatly reduce the number of
00167     individuals that need to be searched through in a population.
00168     """
00169     def __init__(self, repair_prob = 1):
00170         """Initialize the repairer.
00171 
00172         Arguments:
00173 
00174         o repair_prob -- The probability that we'll repair a genome.
00175         By default, we always repair.
00176         """
00177         self._repair_prob = repair_prob
00178 
00179     def _get_duplicates(self, genome):
00180         """Return all of the letters in the genome that are duplicated.
00181 
00182         This checks every letter in the genome (which are the rows of
00183         the chessboard, in this case), and adds them to a list of duplicated
00184         items if there is more than one of them, and then returns this list.
00185         """
00186         duplicates = []
00187         for item in genome.alphabet.letters:
00188             if genome.count(str(item)) > 1:
00189                 duplicates.append(item)
00190 
00191         return duplicates
00192 
00193     def _get_unused(self, genome):
00194         """Return all of the letters in the genome which are unused.
00195 
00196         This checks the letters in the genome (which are th rows on the
00197         chessboard) and returns all items which are not used.
00198         """
00199         unused = []
00200         for item in genome.alphabet.letters:
00201             if genome.count(str(item)) == 0:
00202                 unused.append(item)
00203 
00204         return unused
00205 
00206     def repair(self, organism):
00207         """Repair the specified genome to make it feasible.
00208 
00209         Arguments:
00210 
00211         o organism -- The Organism object we are going to perform the
00212         repair on.
00213         """
00214         # check if we should repair or not
00215         repair_chance = random.random()
00216         if repair_chance <= self._repair_prob:
00217             while 1:
00218                 # get the duplicated items we need to work on
00219                 duplicated_items = self._get_duplicates(organism.genome)
00220 
00221                 if len(duplicated_items) == 0:
00222                     break
00223 
00224                 # take the first duplicated element, and convert it to
00225                 # a row that is not already taken
00226                 duplicated_pos = organism.genome.index(duplicated_items[0])
00227 
00228                 free_rows = self._get_unused(organism.genome)
00229                 assert len(free_rows) > 0, "Unexpected lack of empty rows"
00230 
00231                 new_item = random.choice(free_rows)
00232                 organism.genome[duplicated_pos] = new_item
00233 
00234         return organism
00235         
00236 class QueensCrossover:
00237     """Crossover operation to help in solving the N-Queens problem.
00238 
00239     This tries to perform smarter crossovers by picking out regions of
00240     the genome that have high fitness.
00241 
00242     It scans through both genomes in the crossover with a window half the
00243     size of the genome, and finds the region with the highest fitness in
00244     both genomes. It then recombines these high fitness windows to form
00245     the new genome that is returned.
00246     """
00247     def __init__(self, fitness_func, crossover_prob = .1,
00248                  max_crossover_size = 4):
00249         """Initialize to do N-Queens optimized crossover.
00250 
00251         Arguments:
00252 
00253         o fitness_func -- A function that can calculate the fitness of
00254         a genome.
00255 
00256         o crossover_prob -- The probability of having a crossover
00257         between two passed in organisms.
00258 
00259         o max_crossover_size -- The maximum crossover size of the 'best' region
00260         to search for.
00261         """
00262         self._crossover_prob = crossover_prob
00263         self._fitness_calc = fitness_func
00264         self._max_crossover_size = max_crossover_size
00265 
00266     def do_crossover(self, org_1, org_2):
00267         """Perform a crossover between two organisms.
00268         """
00269         new_org_1 = org_1.copy()
00270         new_org_2 = org_2.copy()
00271         
00272         # determine if we have a crossover
00273         crossover_chance = random.random()
00274         if crossover_chance <= self._crossover_prob:
00275             # find the region of highest probability in both orgs
00276             best_1, rest_1 = self._find_best_region(new_org_1.genome,
00277                                                     make_best_larger = 1)
00278             best_2, rest_2 = self._find_best_region(new_org_2.genome,
00279                                                     make_best_larger = 0)
00280 
00281             assert len(best_1) + len(best_2) == len(rest_1) + len(rest_2), \
00282                    "Did not preserve genome length!"
00283             
00284             new_org_1.genome = best_1 + best_2
00285             new_org_2.genome = rest_1 + rest_2
00286 
00287         return new_org_1, new_org_2
00288 
00289     def _find_best_region(self, genome, make_best_larger = 1):
00290         """Find the best region in the given genome.
00291 
00292         Arguments:
00293 
00294         o genome -- A MutableSeq object specifying the genome of an organism
00295 
00296         o make_best_larger -- A flag to determine whether the best region
00297         we should search for should be the larger region of the split
00298         caused by crossover or the smaller region. This makes it easy
00299         to split two genomes, recombine them, and get a solution that
00300         makes sense.
00301 
00302         Returns:
00303         o Two MutableSeq objects. They are both half of the size of the passed
00304         genome. The first is the highest fitness region of the genome and the
00305         second is the rest of the genome.
00306         """
00307         first_region = max(len(genome) / 2, self._max_crossover_size)
00308         second_region = len(genome) - first_region
00309         
00310         if make_best_larger:
00311             region_size = max(first_region, second_region)
00312         else:
00313             region_size = min(first_region, second_region)
00314 
00315         # loop through all of the segments and find the best fitness segment
00316         
00317         # represent best_fitness as a three tuple with the coordinates of
00318         # the start and end as the first two elements, and the fitness of
00319         # the region as the last element. Start with a value that
00320         # will overridden right away
00321         best_fitness = [0, 0, -1]
00322         for start_index in range(len(genome) - region_size):
00323             region_fitness = \
00324              self._fitness_calc(genome[start_index: start_index + region_size])
00325 
00326             if region_fitness > best_fitness[2]:
00327                 best_fitness = [start_index, start_index + region_size,
00328                                 region_fitness]
00329 
00330         # get the the two regions and return 'em
00331         best_region = genome[best_fitness[0]:best_fitness[1]]
00332         rest_region = genome[0:best_fitness[0]] + genome[best_fitness[1]:]
00333 
00334         return best_region, rest_region
00335             
00336 
00337 class QueensMutation:
00338     """Mutation operation to help in the N-Queens problem.
00339 
00340     This performs mutation, but instead of randomly mutating a single
00341     item to any other, it tries to mutate it to a row that is not already
00342     taken at some other position in the genome. This thus tries to
00343     generate more 'correct' mutations that will help achieve the solution.
00344     """
00345     def __init__(self, mutation_rate = 0.001):
00346         """Inititialize a mutator.
00347 
00348         Arguments:
00349 
00350         o mutation_rate -- The change of a mutation happening at any
00351         position in the genome.
00352         """
00353         self._mutation_rate = mutation_rate
00354 
00355     def mutate(self, organism):
00356         """Mutate the genome trying to put in 'helpful' mutations.
00357         """
00358         new_org = organism.copy()
00359         gene_choices = list(new_org.genome.alphabet.letters)
00360 
00361         # potentially mutate any gene in the genome
00362         for gene_index in range(len(new_org.genome)):
00363             mutation_chance = random.random()
00364             # if we have a mutation
00365             if mutation_chance <= self._mutation_rate:
00366                 # find only choices that are not already taken elsewhere
00367                 # in the genome
00368                 gene_choices = list(new_org.genome.alphabet.letters)
00369 
00370                 for gene in new_org.genome:
00371                     if gene in gene_choices:
00372                         gene_choices.remove(gene)
00373 
00374                 # if there are no choices left, we are stuck going for random
00375                 if len(gene_choices) == 0:
00376                     gene_choices = list(new_org.genome.alphabet.letters)
00377                 
00378                 # get a new letter with the left-over choices
00379                 new_letter = random.choice(gene_choices)
00380                 new_org.genome[gene_index] = new_letter
00381 
00382         return new_org
00383  
00384 num_queens = 5
00385 
00386 if __name__ == "__main__":
00387     if len(sys.argv) == 2:
00388         num_queens = int(sys.argv[1])
00389     elif len(sys.argv) > 2:
00390         print "Usage:"
00391         print "python test_GAQueens.py <Number of Queens to place>\n"
00392         print "where <Number of Queens to place> is an optional parameter"
00393         print "specifying how many queens you want to try to calculate"
00394         print "this for. The default number of queens to place is 5."
00395         sys.exit(1)
00396 
00397 main(num_queens)