Mercurial > hg > gpsynth
diff src/population.cpp @ 0:add35537fdbb tip
Initial import
author | irh <ian.r.hobson@gmail.com> |
---|---|
date | Thu, 25 Aug 2011 11:05:55 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/population.cpp Thu Aug 25 11:05:55 2011 +0100 @@ -0,0 +1,397 @@ +// Copyright 2011, Ian Hobson. +// +// This file is part of gpsynth. +// +// gpsynth is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// gpsynth is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with gpsynth in the file COPYING. +// If not, see http://www.gnu.org/licenses/. + +#include "population.hpp" + +#include "boost_ex.hpp" +#include "graph_helpers.hpp" +#include "logger.hpp" +#include "statistics.hpp" +#include "std_ex.hpp" + +#include "boost/bind.hpp" +#include "boost/filesystem.hpp" +#include "boost/lexical_cast.hpp" +#include "boost/math/special_functions/fpclassify.hpp" + +#include <algorithm> +#include <cmath> +#include <fstream> +#include <iterator> +#include <limits> +#include <map> +#include <numeric> +#include <set> +#include <sstream> +#include <vector> + +namespace bf = boost::filesystem; + +namespace { + + const std::string g_default_work_folder = "/tmp"; + +// mutated graphs might have imported parameters outside of the graph's range +// or removed parameter inputs making some parameters redundant +void UpdateParameters(sg::Graph& graph) { + std::size_t parameter_count = graph[boost::graph_bundle].parameters_.size(); + // scan through vertices to find parameter inputs + std::map<int, std::vector<sg::Vertex> > parameters; + sg::VertexIterator vertex; + sg::VertexIterator vertex_end; + for (boost::tie(vertex, vertex_end) = boost::vertices(graph); + vertex != vertex_end; + ++vertex) { + const sg::Node& node_info = graph[*vertex]; + if (node_info.type_ == sg::kParameter) { + parameters[node_info.id_].push_back(*vertex); + } + } + // check if any corrections required + if (parameters.size() == parameter_count) { + if (parameter_count == 0) { + // no parameters in the graph, nothing to do + return; + } + // check that last parameter id is correct, otherwise corrections are needed + if (parameters.rbegin()->first == (parameter_count)) { + // all seems well, get out of here + return; + } + } else if (parameters.size() > parameter_count) { + // add extra parameters to the graph + std::generate_n(std::back_inserter(graph[boost::graph_bundle].parameters_), + parameters.size() - parameter_count, + stdx::RandomCoefficient); + } else { + // remove redundant parameters + graph[boost::graph_bundle].parameters_.resize(parameters.size()); + } + // reassign parameter ids through the graph + int parameter_id = 1; + typedef std::map<int, std::vector<sg::Vertex> >::value_type ParameterMapping; + foreach (ParameterMapping p, parameters) { + foreach (sg::Vertex v, p.second) { + graph[v].id_ = parameter_id; + } + ++parameter_id; + } + // job done +} + +} // namespace + +Population::Population(GrammarInterface* grammar, + ConverterInterface* converter, + EvaluatorInterface* evaluator) +: grammar_(grammar), + converter_(converter), + evaluator_(evaluator), + generation_(0), + max_generations_(0), + fitness_threshold_(0), + tournament_size_(7), + crossover_rate_(0.65), + mutation_rate_(0.3), + reproduce_best_(false), + keep_temp_folders_(false) +{ + evaluator_->SetListener(this); + mutators_.push_back(boost::bind(&GrammarInterface::MutationReplaceSubTree, + grammar, _1)); + mutators_.push_back(boost::bind(&GrammarInterface::MutationAddSubTree, + grammar, _1)); + mutators_.push_back(boost::bind(&GrammarInterface::MutationModifyInputs, + grammar, _1)); + mutators_.push_back(boost::bind(&GrammarInterface::MutationModifyConnections, + grammar, _1)); + mutators_.push_back(boost::bind(&GrammarInterface::MutationReplaceCommand, + grammar, _1)); +// mutators_.push_back(boost::bind(&GrammarInterface::MutationInsertCommand, +// grammar, _1)); + SetupGeneticOperationSelector(); + SetWorkFolder(g_default_work_folder); +} + +void Population::SetWorkFolder(const std::string &path) { + work_folder_ = path; + log_folder_ = path + "/logs"; + render_folder_ = path + "/render"; + dot_folder_ = path + "/dot"; + export_folder_ = path + "/export"; +} + +void Population::Evolve(int max_generations) { + max_generations_ = max_generations; + // make sure the folders for storing best outputs are created + bf::create_directories(render_folder_); + bf::create_directories(dot_folder_); + bf::create_directories(export_folder_); + bf::create_directories(log_folder_); + for (generation_ = 1; generation_ <= max_generations; generation_++) { + NewGenerationMessage(); + SetGenerationFolder(); + ExportToDOT(); + EvaluateGeneration(); + AnalyzeGeneration(); + SaveGenerationBest(); + CleanTempFolders(); + if (CheckThreshold()) { + logger::Log("Fitness threshold reached\n"); + return; + } + BreedNewGeneration(); + } +} + +void Population::NewGenerationMessage() const { + std::stringstream message; + message << "Generation " << generation_ << '\n'; + logger::Log(message.str()); +} + +void Population::SetGenerationFolder() { + std::stringstream path; + path << work_folder_ << "/generation_" + << std::setw(std::log10(max_generations_) + 1) << std::setfill('0') + << generation_ << '/'; + generation_folder_ = path.str(); +} + +bool Population::CheckThreshold() const { + double best_fitness = population_[best_fit_][boost::graph_bundle].fitness_; + return best_fitness <= fitness_threshold_; +} + +void Population::CleanTempFolders() const { + if (!keep_temp_folders_) { + bf::remove_all(generation_folder_); + } +} + +void Population::InitializePopulation(std::size_t size) { + population_.resize(size); + fitnesses_.resize(size); + for (int i = 0; i < size; i++) { + grammar_->RandomGraph(population_[i]); + population_[i][boost::graph_bundle].id_ = i; + } +} + +void Population::EvaluateGeneration() { + // make sure we have a clean temp folder for the generation + bf::remove_all(generation_folder_); + evaluator_->SetWorkFolder(generation_folder_); + evaluator_->RateGraphs(population_); + logger::Log("\n"); + // store graph fitnesses + for (std::size_t i = 0, size = population_.size(); i < size; i++) { + fitnesses_[i] = population_[i][boost::graph_bundle].fitness_; + } +} + +void Population::AnalyzeGeneration() { + double mean_fitness = stats::Mean(fitnesses_.begin(), fitnesses_.end()); + if (boost::math::isnan(mean_fitness)) { + logger::Log("ruh roh\n"); // TODO + } + std::stringstream message; + std::vector<double>::const_iterator best_fitness; + best_fitness = std::min_element(fitnesses_.begin(), fitnesses_.end()); + best_fit_ = static_cast<int>(best_fitness - fitnesses_.begin()); + message << "Average Fitness:" << mean_fitness + << " Best" + //<< " [" << best_fit_ << "]" + << ':' << *best_fitness << '\n'; + logger::Log(message.str()); + // save fitness values to logs + std::stringstream average; + average << generation_ << ' ' << mean_fitness; + stdx::AppendToFile(average.str(), log_folder_ + "/average_fitnesses.txt"); + std::stringstream best; + best << generation_ << ' ' << *best_fitness; + stdx::AppendToFile(best.str(), log_folder_ + "/best_fitnesses.txt"); +} + +void Population::SaveGenerationBest() { + std::stringstream generation_prefix; + generation_prefix << "generation_" + << std::setw(std::log10(max_generations_) + 1) + << std::setfill('0') + << generation_; + const sg::Graph& best_graph = population_[best_fit_]; + const sg::GraphProperties& graph_properties = best_graph[boost::graph_bundle]; + // copy the rendered output for the graph + bf::path path = graph_properties.render_path_; + if (bf::exists(path)) { + std::string render_path = render_folder_ + '/' + + generation_prefix.str() + + path.extension().string(); + bf::copy_file(path, render_path); + } + // copy the dot script for the graph + path = graph_properties.dot_path_; + if (bf::exists(path)) { + std::string dot_path = dot_folder_ + '/' + + generation_prefix.str() + + path.extension().string(); + bf::copy_file(path, dot_path); + } + // tell the converter to export the graph + converter_->Export(best_graph, export_folder_, generation_prefix.str()); +} + +void Population::PerformCrossover(sg::Graph& child_1, sg::Graph& child_2) { + int parent_id_1 = SelectIndividual(); + int parent_id_2; + do { + parent_id_2 = SelectIndividual(); + } while (parent_id_1 == parent_id_2); + // pick the crossover points via the grammar + const sg::Graph& parent_1 = population_[parent_id_1]; + const sg::Graph& parent_2 = population_[parent_id_2]; + sg::Vertex crossover_1; + sg::Vertex crossover_2; + grammar_->PickCrossoverNodes(parent_1, parent_2, &crossover_1, &crossover_2); + // swap the subtrees + SwapSubTrees(crossover_1, crossover_2, parent_1, parent_2, child_1, child_2); +} + +// called for mutated graphs to prepare for next generation +void Population::PrepareNewChild(sg::Graph& child) { + // reset fitness rating + child[boost::graph_bundle].fitness_ = sg::kFitnessUnrated; + // make sure parameters in graph are valid + UpdateParameters(child); +} + +void Population::BreedNewGeneration() { + // Used to store reproduced graph ids, prevents a graph being reproduced + // more than once. + std::set<int> reproduced; + std::vector<sg::Graph> next_generation; + next_generation.reserve(population_.size()); + if (reproduce_best_) { + next_generation.push_back(population_[best_fit_]); + } + std::size_t population_size = population_.size(); + while (next_generation.size() < population_size) { + GeneticOperation operation; + operation = static_cast<GeneticOperation>(genetic_operation_selector_()); + if (operation == kCrossover) { + // Crossover + // create two empty graphs to receive results of crossover + // (creating them directly in the next_generation container causes + // memory problems when performing crossover, couldn't fathom why..) + sg::Graph child_1; + sg::Graph child_2; + PerformCrossover(child_1, child_2); + PrepareNewChild(child_1); + PrepareNewChild(child_2); + next_generation.push_back(child_1); + next_generation.push_back(child_2); + } else if (operation == kMutation) { + // mutation + // store a copy of an individual from current population + next_generation.push_back(population_[SelectIndividual()]); + sg::Graph& child = next_generation.back(); + // call random mutation operator on child + bool mutated; + do { + // some mutators might not be able to operate on a specific graph + // and will return false, so try another mutator + mutated = mutators_[stdx::Random(mutators_.size())](child); + } while (!mutated); + PrepareNewChild(child); + } else { + // reproduction + int graph_id = SelectIndividual(); + // only reproduce the individual if it hasn't already been reproduced + if (reproduced.find(graph_id) == reproduced.end()) { + next_generation.push_back(population_[graph_id]); + reproduced.insert(graph_id); + } + } + } + // we might have an extra individual from crossover + next_generation.resize(population_size); + // reassign graph ids + for (std::size_t i = 0, size = population_.size(); i < size; i++) { + next_generation[i][boost::graph_bundle].id_ = static_cast<int>(i); + } + // replace the population with the newly created generation + population_.swap(next_generation); +} + +// tournament selection +int Population::SelectIndividual() { + // select n individuals from the population + std::set<std::size_t> tournament; + while (tournament.size() < tournament_size_) { + tournament.insert(stdx::Random(population_.size())); + } + double best_fitness = std::numeric_limits<double>::max(); + std::size_t result = -1; + // find best graph from selection + foreach (std::size_t graph_id, tournament) { + if (fitnesses_[graph_id] < best_fitness) { + best_fitness = fitnesses_[graph_id]; + result = graph_id; + } + } + return static_cast<int>(result); +} + +void Population::ExportToDOT() { + std::string dot_folder = generation_folder_ + "/dot/"; + bf::create_directories(dot_folder); + std::stringstream path; + foreach (sg::Graph& graph, population_) { + path.str(""); + path << dot_folder + << "gpsynth_" << graph[boost::graph_bundle].id_ << ".dot"; + std::ofstream file(path.str().c_str()); + file << converter_->ToDOT(graph); + file.close(); + graph[boost::graph_bundle].dot_path_ = path.str(); + } +} + +void Population::GraphRatedNotification(std::size_t graphs_rated) { + std::stringstream message; + message << "\rRated " << graphs_rated << " / " << population_.size(); + logger::Log(message.str()); +} + +void Population::SetCrossoverRate(double rate) { + crossover_rate_ = rate; + SetupGeneticOperationSelector(); +} + +void Population::SetMutationRate(double rate) { + mutation_rate_ = rate; + SetupGeneticOperationSelector(); +} + +void Population::SetupGeneticOperationSelector() { + std::vector<double> genetic_probabilities(2); + genetic_probabilities[kCrossover] = crossover_rate_; + genetic_probabilities[kMutation] = mutation_rate_; + // reproduction is left over probability + genetic_operation_selector_.SetProbabilities(genetic_probabilities); +}