diff src/population.cpp @ 0:add35537fdbb tip

Initial import
author irh <ian.r.hobson@gmail.com>
date Thu, 25 Aug 2011 11:05:55 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/population.cpp	Thu Aug 25 11:05:55 2011 +0100
@@ -0,0 +1,397 @@
+//  Copyright 2011, Ian Hobson.
+//
+//  This file is part of gpsynth.
+//
+//  gpsynth is free software: you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation, either version 3 of the License, or
+//  (at your option) any later version.
+//
+//  gpsynth is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//
+//  You should have received a copy of the GNU General Public License
+//  along with gpsynth in the file COPYING. 
+//  If not, see http://www.gnu.org/licenses/.
+
+#include "population.hpp"
+
+#include "boost_ex.hpp"
+#include "graph_helpers.hpp"
+#include "logger.hpp"
+#include "statistics.hpp"
+#include "std_ex.hpp"
+
+#include "boost/bind.hpp"
+#include "boost/filesystem.hpp"
+#include "boost/lexical_cast.hpp"
+#include "boost/math/special_functions/fpclassify.hpp"
+
+#include <algorithm>
+#include <cmath>
+#include <fstream>
+#include <iterator>
+#include <limits>
+#include <map>
+#include <numeric>
+#include <set>
+#include <sstream>
+#include <vector>
+
+namespace bf = boost::filesystem;
+
+namespace {
+  
+  const std::string g_default_work_folder = "/tmp";
+  
+// mutated graphs might have imported parameters outside of the graph's range
+// or removed parameter inputs making some parameters redundant
+void UpdateParameters(sg::Graph& graph) {
+  std::size_t parameter_count = graph[boost::graph_bundle].parameters_.size();
+  // scan through vertices to find parameter inputs
+  std::map<int, std::vector<sg::Vertex> > parameters;
+  sg::VertexIterator vertex;
+  sg::VertexIterator vertex_end;
+  for (boost::tie(vertex, vertex_end) = boost::vertices(graph); 
+       vertex != vertex_end; 
+       ++vertex) {
+    const sg::Node& node_info = graph[*vertex];
+    if (node_info.type_ == sg::kParameter) {
+      parameters[node_info.id_].push_back(*vertex);
+    }
+  }
+  // check if any corrections required
+  if (parameters.size() == parameter_count) {
+    if (parameter_count == 0) {
+      // no parameters in the graph, nothing to do
+      return;
+    }
+    // check that last parameter id is correct, otherwise corrections are needed
+    if (parameters.rbegin()->first == (parameter_count)) {
+      // all seems well, get out of here
+      return;  
+    }
+  } else if (parameters.size() > parameter_count) {
+    // add extra parameters to the graph
+    std::generate_n(std::back_inserter(graph[boost::graph_bundle].parameters_),
+                    parameters.size() - parameter_count,
+                    stdx::RandomCoefficient);
+  } else {
+    // remove redundant parameters
+    graph[boost::graph_bundle].parameters_.resize(parameters.size());
+  }
+  // reassign parameter ids through the graph
+  int parameter_id = 1;
+  typedef std::map<int, std::vector<sg::Vertex> >::value_type ParameterMapping;
+  foreach (ParameterMapping p, parameters) {
+    foreach (sg::Vertex v, p.second) {
+      graph[v].id_ = parameter_id;
+    }
+    ++parameter_id;
+  }
+  // job done
+}
+
+} // namespace
+
+Population::Population(GrammarInterface* grammar,
+                       ConverterInterface* converter,
+                       EvaluatorInterface* evaluator)
+: grammar_(grammar),
+  converter_(converter),
+  evaluator_(evaluator),
+  generation_(0),
+  max_generations_(0),
+  fitness_threshold_(0),
+  tournament_size_(7),
+  crossover_rate_(0.65),
+  mutation_rate_(0.3),
+  reproduce_best_(false),
+  keep_temp_folders_(false)
+{
+  evaluator_->SetListener(this);
+  mutators_.push_back(boost::bind(&GrammarInterface::MutationReplaceSubTree,
+                                  grammar, _1));
+  mutators_.push_back(boost::bind(&GrammarInterface::MutationAddSubTree,
+                                  grammar, _1));
+  mutators_.push_back(boost::bind(&GrammarInterface::MutationModifyInputs,
+                                  grammar, _1));
+  mutators_.push_back(boost::bind(&GrammarInterface::MutationModifyConnections,
+                                  grammar, _1));
+  mutators_.push_back(boost::bind(&GrammarInterface::MutationReplaceCommand,
+                                  grammar, _1));
+//  mutators_.push_back(boost::bind(&GrammarInterface::MutationInsertCommand,
+//                                  grammar, _1));
+  SetupGeneticOperationSelector();
+  SetWorkFolder(g_default_work_folder);
+}
+
+void Population::SetWorkFolder(const std::string &path) {
+  work_folder_ = path;
+  log_folder_ = path + "/logs";
+  render_folder_ = path + "/render";
+  dot_folder_ = path + "/dot";
+  export_folder_ = path + "/export";
+}
+
+void Population::Evolve(int max_generations) {
+  max_generations_ = max_generations;
+  // make sure the folders for storing best outputs are created
+  bf::create_directories(render_folder_);
+  bf::create_directories(dot_folder_);
+  bf::create_directories(export_folder_);
+  bf::create_directories(log_folder_);
+  for (generation_ = 1; generation_ <= max_generations; generation_++) {
+    NewGenerationMessage();
+    SetGenerationFolder();
+    ExportToDOT();
+    EvaluateGeneration();
+    AnalyzeGeneration();
+    SaveGenerationBest();
+    CleanTempFolders();
+    if (CheckThreshold()) {
+      logger::Log("Fitness threshold reached\n");
+      return;
+    }
+    BreedNewGeneration();
+  }
+}
+
+void Population::NewGenerationMessage() const {
+  std::stringstream message;
+  message << "Generation " << generation_ << '\n';
+  logger::Log(message.str());
+}
+
+void Population::SetGenerationFolder() {
+  std::stringstream path;
+  path << work_folder_ << "/generation_" 
+       << std::setw(std::log10(max_generations_) + 1) << std::setfill('0')
+       << generation_ << '/';
+  generation_folder_ = path.str();
+}
+
+bool Population::CheckThreshold() const {
+  double best_fitness = population_[best_fit_][boost::graph_bundle].fitness_;
+  return best_fitness <= fitness_threshold_;
+}
+
+void Population::CleanTempFolders() const {
+  if (!keep_temp_folders_) {
+    bf::remove_all(generation_folder_);
+  }
+}
+
+void Population::InitializePopulation(std::size_t size) {
+  population_.resize(size);
+  fitnesses_.resize(size);
+  for (int i = 0; i < size; i++) {
+    grammar_->RandomGraph(population_[i]);
+    population_[i][boost::graph_bundle].id_ = i;
+  }
+}
+
+void Population::EvaluateGeneration() {
+  // make sure we have a clean temp folder for the generation
+  bf::remove_all(generation_folder_);
+  evaluator_->SetWorkFolder(generation_folder_);
+  evaluator_->RateGraphs(population_);
+  logger::Log("\n");
+  // store graph fitnesses
+  for (std::size_t i = 0, size = population_.size(); i < size; i++) {
+    fitnesses_[i] = population_[i][boost::graph_bundle].fitness_;
+  }
+}
+
+void Population::AnalyzeGeneration() {
+  double mean_fitness = stats::Mean(fitnesses_.begin(), fitnesses_.end());
+  if (boost::math::isnan(mean_fitness)) {
+    logger::Log("ruh roh\n"); // TODO
+  }
+  std::stringstream message;
+  std::vector<double>::const_iterator best_fitness;
+  best_fitness = std::min_element(fitnesses_.begin(), fitnesses_.end());
+  best_fit_ = static_cast<int>(best_fitness - fitnesses_.begin());
+  message << "Average Fitness:" << mean_fitness
+          << " Best"
+          //<< " [" << best_fit_ << "]"
+          << ':' << *best_fitness << '\n';
+  logger::Log(message.str());
+  // save fitness values to logs
+  std::stringstream average;
+  average << generation_ << ' ' << mean_fitness;
+  stdx::AppendToFile(average.str(), log_folder_ + "/average_fitnesses.txt");
+  std::stringstream best;
+  best << generation_ << ' ' << *best_fitness;
+  stdx::AppendToFile(best.str(), log_folder_ + "/best_fitnesses.txt");
+}
+
+void Population::SaveGenerationBest() {
+  std::stringstream generation_prefix;
+  generation_prefix << "generation_" 
+                    << std::setw(std::log10(max_generations_) + 1) 
+                    << std::setfill('0')
+                    << generation_;
+  const sg::Graph& best_graph = population_[best_fit_];
+  const sg::GraphProperties& graph_properties = best_graph[boost::graph_bundle];
+  // copy the rendered output for the graph
+  bf::path path = graph_properties.render_path_;
+  if (bf::exists(path)) {
+    std::string render_path = render_folder_ + '/' 
+                              + generation_prefix.str() 
+                              + path.extension().string();
+    bf::copy_file(path, render_path);
+  }
+  // copy the dot script for the graph
+  path = graph_properties.dot_path_;
+  if (bf::exists(path)) {
+    std::string dot_path = dot_folder_ + '/' 
+                           + generation_prefix.str() 
+                           + path.extension().string();
+    bf::copy_file(path, dot_path);
+  }
+  // tell the converter to export the graph
+  converter_->Export(best_graph, export_folder_, generation_prefix.str());
+}
+
+void Population::PerformCrossover(sg::Graph& child_1, sg::Graph& child_2) {
+  int parent_id_1 = SelectIndividual();
+  int parent_id_2;
+  do {
+    parent_id_2 = SelectIndividual(); 
+  } while (parent_id_1 == parent_id_2);
+  // pick the crossover points via the grammar
+  const sg::Graph& parent_1 = population_[parent_id_1];
+  const sg::Graph& parent_2 = population_[parent_id_2];
+  sg::Vertex crossover_1;
+  sg::Vertex crossover_2;
+  grammar_->PickCrossoverNodes(parent_1, parent_2, &crossover_1, &crossover_2);
+  // swap the subtrees
+  SwapSubTrees(crossover_1, crossover_2, parent_1, parent_2, child_1, child_2);
+}
+
+// called for mutated graphs to prepare for next generation
+void Population::PrepareNewChild(sg::Graph& child) {
+  // reset fitness rating
+  child[boost::graph_bundle].fitness_ = sg::kFitnessUnrated;
+  // make sure parameters in graph are valid
+  UpdateParameters(child);
+}
+
+void Population::BreedNewGeneration() {
+  // Used to store reproduced graph ids, prevents a graph being reproduced
+  // more than once.
+  std::set<int> reproduced;
+  std::vector<sg::Graph> next_generation;
+  next_generation.reserve(population_.size());
+  if (reproduce_best_) {
+    next_generation.push_back(population_[best_fit_]);
+  }
+  std::size_t population_size = population_.size();
+  while (next_generation.size() < population_size) {
+    GeneticOperation operation;
+    operation = static_cast<GeneticOperation>(genetic_operation_selector_());
+    if (operation == kCrossover) {
+      // Crossover
+      // create two empty graphs to receive results of crossover
+      // (creating them directly in the next_generation container causes
+      // memory problems when performing crossover, couldn't fathom why..)
+      sg::Graph child_1;
+      sg::Graph child_2;
+      PerformCrossover(child_1, child_2);
+      PrepareNewChild(child_1);
+      PrepareNewChild(child_2);
+      next_generation.push_back(child_1);
+      next_generation.push_back(child_2);
+    } else if (operation == kMutation) {
+      // mutation
+      // store a copy of an individual from current population
+      next_generation.push_back(population_[SelectIndividual()]);
+      sg::Graph& child = next_generation.back();
+      // call random mutation operator on child
+      bool mutated;
+      do {
+        // some mutators might not be able to operate on a specific graph
+        // and will return false, so try another mutator
+        mutated = mutators_[stdx::Random(mutators_.size())](child);
+      } while (!mutated);
+      PrepareNewChild(child);
+    } else {
+      // reproduction
+      int graph_id = SelectIndividual();
+      // only reproduce the individual if it hasn't already been reproduced
+      if (reproduced.find(graph_id) == reproduced.end()) {
+        next_generation.push_back(population_[graph_id]);
+        reproduced.insert(graph_id);
+      }
+    }
+  }
+  // we might have an extra individual from crossover
+  next_generation.resize(population_size);
+  // reassign graph ids
+  for (std::size_t i = 0, size = population_.size(); i < size; i++) {
+    next_generation[i][boost::graph_bundle].id_ = static_cast<int>(i);
+  }
+  // replace the population with the newly created generation
+  population_.swap(next_generation);
+}
+
+// tournament selection 
+int Population::SelectIndividual() {
+  // select n individuals from the population
+  std::set<std::size_t> tournament;
+  while (tournament.size() < tournament_size_) {
+    tournament.insert(stdx::Random(population_.size()));
+  }
+  double best_fitness = std::numeric_limits<double>::max();
+  std::size_t result = -1;
+  // find best graph from selection
+  foreach (std::size_t graph_id, tournament) {
+    if (fitnesses_[graph_id] < best_fitness) {
+      best_fitness = fitnesses_[graph_id];
+      result = graph_id;
+    }
+  }
+  return static_cast<int>(result);
+}
+
+void Population::ExportToDOT() {
+  std::string dot_folder = generation_folder_ + "/dot/";
+  bf::create_directories(dot_folder);
+  std::stringstream path;
+  foreach (sg::Graph& graph, population_) {
+    path.str("");
+    path << dot_folder 
+         << "gpsynth_" << graph[boost::graph_bundle].id_ << ".dot";
+    std::ofstream file(path.str().c_str());
+    file << converter_->ToDOT(graph);
+    file.close();
+    graph[boost::graph_bundle].dot_path_ = path.str();
+  }
+}
+
+void Population::GraphRatedNotification(std::size_t graphs_rated) {
+  std::stringstream message;
+  message << "\rRated " << graphs_rated << " / " << population_.size();
+  logger::Log(message.str());
+}
+
+void Population::SetCrossoverRate(double rate) {
+  crossover_rate_ = rate;
+  SetupGeneticOperationSelector();
+}
+
+void Population::SetMutationRate(double rate) {
+  mutation_rate_ = rate;
+  SetupGeneticOperationSelector();
+}
+
+void Population::SetupGeneticOperationSelector() {
+  std::vector<double> genetic_probabilities(2);
+  genetic_probabilities[kCrossover] = crossover_rate_;
+  genetic_probabilities[kMutation] = mutation_rate_;
+  // reproduction is left over probability
+  genetic_operation_selector_.SetProbabilities(genetic_probabilities);
+}