view src/population.cpp @ 0:add35537fdbb tip

Initial import
author irh <ian.r.hobson@gmail.com>
date Thu, 25 Aug 2011 11:05:55 +0100
parents
children
line wrap: on
line source
//  Copyright 2011, Ian Hobson.
//
//  This file is part of gpsynth.
//
//  gpsynth is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  gpsynth is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with gpsynth in the file COPYING. 
//  If not, see http://www.gnu.org/licenses/.

#include "population.hpp"

#include "boost_ex.hpp"
#include "graph_helpers.hpp"
#include "logger.hpp"
#include "statistics.hpp"
#include "std_ex.hpp"

#include "boost/bind.hpp"
#include "boost/filesystem.hpp"
#include "boost/lexical_cast.hpp"
#include "boost/math/special_functions/fpclassify.hpp"

#include <algorithm>
#include <cmath>
#include <fstream>
#include <iterator>
#include <limits>
#include <map>
#include <numeric>
#include <set>
#include <sstream>
#include <vector>

namespace bf = boost::filesystem;

namespace {
  
  const std::string g_default_work_folder = "/tmp";
  
// mutated graphs might have imported parameters outside of the graph's range
// or removed parameter inputs making some parameters redundant
void UpdateParameters(sg::Graph& graph) {
  std::size_t parameter_count = graph[boost::graph_bundle].parameters_.size();
  // scan through vertices to find parameter inputs
  std::map<int, std::vector<sg::Vertex> > parameters;
  sg::VertexIterator vertex;
  sg::VertexIterator vertex_end;
  for (boost::tie(vertex, vertex_end) = boost::vertices(graph); 
       vertex != vertex_end; 
       ++vertex) {
    const sg::Node& node_info = graph[*vertex];
    if (node_info.type_ == sg::kParameter) {
      parameters[node_info.id_].push_back(*vertex);
    }
  }
  // check if any corrections required
  if (parameters.size() == parameter_count) {
    if (parameter_count == 0) {
      // no parameters in the graph, nothing to do
      return;
    }
    // check that last parameter id is correct, otherwise corrections are needed
    if (parameters.rbegin()->first == (parameter_count)) {
      // all seems well, get out of here
      return;  
    }
  } else if (parameters.size() > parameter_count) {
    // add extra parameters to the graph
    std::generate_n(std::back_inserter(graph[boost::graph_bundle].parameters_),
                    parameters.size() - parameter_count,
                    stdx::RandomCoefficient);
  } else {
    // remove redundant parameters
    graph[boost::graph_bundle].parameters_.resize(parameters.size());
  }
  // reassign parameter ids through the graph
  int parameter_id = 1;
  typedef std::map<int, std::vector<sg::Vertex> >::value_type ParameterMapping;
  foreach (ParameterMapping p, parameters) {
    foreach (sg::Vertex v, p.second) {
      graph[v].id_ = parameter_id;
    }
    ++parameter_id;
  }
  // job done
}

} // namespace

Population::Population(GrammarInterface* grammar,
                       ConverterInterface* converter,
                       EvaluatorInterface* evaluator)
: grammar_(grammar),
  converter_(converter),
  evaluator_(evaluator),
  generation_(0),
  max_generations_(0),
  fitness_threshold_(0),
  tournament_size_(7),
  crossover_rate_(0.65),
  mutation_rate_(0.3),
  reproduce_best_(false),
  keep_temp_folders_(false)
{
  evaluator_->SetListener(this);
  mutators_.push_back(boost::bind(&GrammarInterface::MutationReplaceSubTree,
                                  grammar, _1));
  mutators_.push_back(boost::bind(&GrammarInterface::MutationAddSubTree,
                                  grammar, _1));
  mutators_.push_back(boost::bind(&GrammarInterface::MutationModifyInputs,
                                  grammar, _1));
  mutators_.push_back(boost::bind(&GrammarInterface::MutationModifyConnections,
                                  grammar, _1));
  mutators_.push_back(boost::bind(&GrammarInterface::MutationReplaceCommand,
                                  grammar, _1));
//  mutators_.push_back(boost::bind(&GrammarInterface::MutationInsertCommand,
//                                  grammar, _1));
  SetupGeneticOperationSelector();
  SetWorkFolder(g_default_work_folder);
}

void Population::SetWorkFolder(const std::string &path) {
  work_folder_ = path;
  log_folder_ = path + "/logs";
  render_folder_ = path + "/render";
  dot_folder_ = path + "/dot";
  export_folder_ = path + "/export";
}

void Population::Evolve(int max_generations) {
  max_generations_ = max_generations;
  // make sure the folders for storing best outputs are created
  bf::create_directories(render_folder_);
  bf::create_directories(dot_folder_);
  bf::create_directories(export_folder_);
  bf::create_directories(log_folder_);
  for (generation_ = 1; generation_ <= max_generations; generation_++) {
    NewGenerationMessage();
    SetGenerationFolder();
    ExportToDOT();
    EvaluateGeneration();
    AnalyzeGeneration();
    SaveGenerationBest();
    CleanTempFolders();
    if (CheckThreshold()) {
      logger::Log("Fitness threshold reached\n");
      return;
    }
    BreedNewGeneration();
  }
}

void Population::NewGenerationMessage() const {
  std::stringstream message;
  message << "Generation " << generation_ << '\n';
  logger::Log(message.str());
}

void Population::SetGenerationFolder() {
  std::stringstream path;
  path << work_folder_ << "/generation_" 
       << std::setw(std::log10(max_generations_) + 1) << std::setfill('0')
       << generation_ << '/';
  generation_folder_ = path.str();
}

bool Population::CheckThreshold() const {
  double best_fitness = population_[best_fit_][boost::graph_bundle].fitness_;
  return best_fitness <= fitness_threshold_;
}

void Population::CleanTempFolders() const {
  if (!keep_temp_folders_) {
    bf::remove_all(generation_folder_);
  }
}

void Population::InitializePopulation(std::size_t size) {
  population_.resize(size);
  fitnesses_.resize(size);
  for (int i = 0; i < size; i++) {
    grammar_->RandomGraph(population_[i]);
    population_[i][boost::graph_bundle].id_ = i;
  }
}

void Population::EvaluateGeneration() {
  // make sure we have a clean temp folder for the generation
  bf::remove_all(generation_folder_);
  evaluator_->SetWorkFolder(generation_folder_);
  evaluator_->RateGraphs(population_);
  logger::Log("\n");
  // store graph fitnesses
  for (std::size_t i = 0, size = population_.size(); i < size; i++) {
    fitnesses_[i] = population_[i][boost::graph_bundle].fitness_;
  }
}

void Population::AnalyzeGeneration() {
  double mean_fitness = stats::Mean(fitnesses_.begin(), fitnesses_.end());
  if (boost::math::isnan(mean_fitness)) {
    logger::Log("ruh roh\n"); // TODO
  }
  std::stringstream message;
  std::vector<double>::const_iterator best_fitness;
  best_fitness = std::min_element(fitnesses_.begin(), fitnesses_.end());
  best_fit_ = static_cast<int>(best_fitness - fitnesses_.begin());
  message << "Average Fitness:" << mean_fitness
          << " Best"
          //<< " [" << best_fit_ << "]"
          << ':' << *best_fitness << '\n';
  logger::Log(message.str());
  // save fitness values to logs
  std::stringstream average;
  average << generation_ << ' ' << mean_fitness;
  stdx::AppendToFile(average.str(), log_folder_ + "/average_fitnesses.txt");
  std::stringstream best;
  best << generation_ << ' ' << *best_fitness;
  stdx::AppendToFile(best.str(), log_folder_ + "/best_fitnesses.txt");
}

void Population::SaveGenerationBest() {
  std::stringstream generation_prefix;
  generation_prefix << "generation_" 
                    << std::setw(std::log10(max_generations_) + 1) 
                    << std::setfill('0')
                    << generation_;
  const sg::Graph& best_graph = population_[best_fit_];
  const sg::GraphProperties& graph_properties = best_graph[boost::graph_bundle];
  // copy the rendered output for the graph
  bf::path path = graph_properties.render_path_;
  if (bf::exists(path)) {
    std::string render_path = render_folder_ + '/' 
                              + generation_prefix.str() 
                              + path.extension().string();
    bf::copy_file(path, render_path);
  }
  // copy the dot script for the graph
  path = graph_properties.dot_path_;
  if (bf::exists(path)) {
    std::string dot_path = dot_folder_ + '/' 
                           + generation_prefix.str() 
                           + path.extension().string();
    bf::copy_file(path, dot_path);
  }
  // tell the converter to export the graph
  converter_->Export(best_graph, export_folder_, generation_prefix.str());
}

void Population::PerformCrossover(sg::Graph& child_1, sg::Graph& child_2) {
  int parent_id_1 = SelectIndividual();
  int parent_id_2;
  do {
    parent_id_2 = SelectIndividual(); 
  } while (parent_id_1 == parent_id_2);
  // pick the crossover points via the grammar
  const sg::Graph& parent_1 = population_[parent_id_1];
  const sg::Graph& parent_2 = population_[parent_id_2];
  sg::Vertex crossover_1;
  sg::Vertex crossover_2;
  grammar_->PickCrossoverNodes(parent_1, parent_2, &crossover_1, &crossover_2);
  // swap the subtrees
  SwapSubTrees(crossover_1, crossover_2, parent_1, parent_2, child_1, child_2);
}

// called for mutated graphs to prepare for next generation
void Population::PrepareNewChild(sg::Graph& child) {
  // reset fitness rating
  child[boost::graph_bundle].fitness_ = sg::kFitnessUnrated;
  // make sure parameters in graph are valid
  UpdateParameters(child);
}

void Population::BreedNewGeneration() {
  // Used to store reproduced graph ids, prevents a graph being reproduced
  // more than once.
  std::set<int> reproduced;
  std::vector<sg::Graph> next_generation;
  next_generation.reserve(population_.size());
  if (reproduce_best_) {
    next_generation.push_back(population_[best_fit_]);
  }
  std::size_t population_size = population_.size();
  while (next_generation.size() < population_size) {
    GeneticOperation operation;
    operation = static_cast<GeneticOperation>(genetic_operation_selector_());
    if (operation == kCrossover) {
      // Crossover
      // create two empty graphs to receive results of crossover
      // (creating them directly in the next_generation container causes
      // memory problems when performing crossover, couldn't fathom why..)
      sg::Graph child_1;
      sg::Graph child_2;
      PerformCrossover(child_1, child_2);
      PrepareNewChild(child_1);
      PrepareNewChild(child_2);
      next_generation.push_back(child_1);
      next_generation.push_back(child_2);
    } else if (operation == kMutation) {
      // mutation
      // store a copy of an individual from current population
      next_generation.push_back(population_[SelectIndividual()]);
      sg::Graph& child = next_generation.back();
      // call random mutation operator on child
      bool mutated;
      do {
        // some mutators might not be able to operate on a specific graph
        // and will return false, so try another mutator
        mutated = mutators_[stdx::Random(mutators_.size())](child);
      } while (!mutated);
      PrepareNewChild(child);
    } else {
      // reproduction
      int graph_id = SelectIndividual();
      // only reproduce the individual if it hasn't already been reproduced
      if (reproduced.find(graph_id) == reproduced.end()) {
        next_generation.push_back(population_[graph_id]);
        reproduced.insert(graph_id);
      }
    }
  }
  // we might have an extra individual from crossover
  next_generation.resize(population_size);
  // reassign graph ids
  for (std::size_t i = 0, size = population_.size(); i < size; i++) {
    next_generation[i][boost::graph_bundle].id_ = static_cast<int>(i);
  }
  // replace the population with the newly created generation
  population_.swap(next_generation);
}

// tournament selection 
int Population::SelectIndividual() {
  // select n individuals from the population
  std::set<std::size_t> tournament;
  while (tournament.size() < tournament_size_) {
    tournament.insert(stdx::Random(population_.size()));
  }
  double best_fitness = std::numeric_limits<double>::max();
  std::size_t result = -1;
  // find best graph from selection
  foreach (std::size_t graph_id, tournament) {
    if (fitnesses_[graph_id] < best_fitness) {
      best_fitness = fitnesses_[graph_id];
      result = graph_id;
    }
  }
  return static_cast<int>(result);
}

void Population::ExportToDOT() {
  std::string dot_folder = generation_folder_ + "/dot/";
  bf::create_directories(dot_folder);
  std::stringstream path;
  foreach (sg::Graph& graph, population_) {
    path.str("");
    path << dot_folder 
         << "gpsynth_" << graph[boost::graph_bundle].id_ << ".dot";
    std::ofstream file(path.str().c_str());
    file << converter_->ToDOT(graph);
    file.close();
    graph[boost::graph_bundle].dot_path_ = path.str();
  }
}

void Population::GraphRatedNotification(std::size_t graphs_rated) {
  std::stringstream message;
  message << "\rRated " << graphs_rated << " / " << population_.size();
  logger::Log(message.str());
}

void Population::SetCrossoverRate(double rate) {
  crossover_rate_ = rate;
  SetupGeneticOperationSelector();
}

void Population::SetMutationRate(double rate) {
  mutation_rate_ = rate;
  SetupGeneticOperationSelector();
}

void Population::SetupGeneticOperationSelector() {
  std::vector<double> genetic_probabilities(2);
  genetic_probabilities[kCrossover] = crossover_rate_;
  genetic_probabilities[kMutation] = mutation_rate_;
  // reproduction is left over probability
  genetic_operation_selector_.SetProbabilities(genetic_probabilities);
}