annotate src/population.cpp @ 0:add35537fdbb tip

Initial import
author irh <ian.r.hobson@gmail.com>
date Thu, 25 Aug 2011 11:05:55 +0100
parents
children
rev   line source
ian@0 1 // Copyright 2011, Ian Hobson.
ian@0 2 //
ian@0 3 // This file is part of gpsynth.
ian@0 4 //
ian@0 5 // gpsynth is free software: you can redistribute it and/or modify
ian@0 6 // it under the terms of the GNU General Public License as published by
ian@0 7 // the Free Software Foundation, either version 3 of the License, or
ian@0 8 // (at your option) any later version.
ian@0 9 //
ian@0 10 // gpsynth is distributed in the hope that it will be useful,
ian@0 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
ian@0 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ian@0 13 // GNU General Public License for more details.
ian@0 14 //
ian@0 15 // You should have received a copy of the GNU General Public License
ian@0 16 // along with gpsynth in the file COPYING.
ian@0 17 // If not, see http://www.gnu.org/licenses/.
ian@0 18
ian@0 19 #include "population.hpp"
ian@0 20
ian@0 21 #include "boost_ex.hpp"
ian@0 22 #include "graph_helpers.hpp"
ian@0 23 #include "logger.hpp"
ian@0 24 #include "statistics.hpp"
ian@0 25 #include "std_ex.hpp"
ian@0 26
ian@0 27 #include "boost/bind.hpp"
ian@0 28 #include "boost/filesystem.hpp"
ian@0 29 #include "boost/lexical_cast.hpp"
ian@0 30 #include "boost/math/special_functions/fpclassify.hpp"
ian@0 31
ian@0 32 #include <algorithm>
ian@0 33 #include <cmath>
ian@0 34 #include <fstream>
ian@0 35 #include <iterator>
ian@0 36 #include <limits>
ian@0 37 #include <map>
ian@0 38 #include <numeric>
ian@0 39 #include <set>
ian@0 40 #include <sstream>
ian@0 41 #include <vector>
ian@0 42
ian@0 43 namespace bf = boost::filesystem;
ian@0 44
ian@0 45 namespace {
ian@0 46
ian@0 47 const std::string g_default_work_folder = "/tmp";
ian@0 48
ian@0 49 // mutated graphs might have imported parameters outside of the graph's range
ian@0 50 // or removed parameter inputs making some parameters redundant
ian@0 51 void UpdateParameters(sg::Graph& graph) {
ian@0 52 std::size_t parameter_count = graph[boost::graph_bundle].parameters_.size();
ian@0 53 // scan through vertices to find parameter inputs
ian@0 54 std::map<int, std::vector<sg::Vertex> > parameters;
ian@0 55 sg::VertexIterator vertex;
ian@0 56 sg::VertexIterator vertex_end;
ian@0 57 for (boost::tie(vertex, vertex_end) = boost::vertices(graph);
ian@0 58 vertex != vertex_end;
ian@0 59 ++vertex) {
ian@0 60 const sg::Node& node_info = graph[*vertex];
ian@0 61 if (node_info.type_ == sg::kParameter) {
ian@0 62 parameters[node_info.id_].push_back(*vertex);
ian@0 63 }
ian@0 64 }
ian@0 65 // check if any corrections required
ian@0 66 if (parameters.size() == parameter_count) {
ian@0 67 if (parameter_count == 0) {
ian@0 68 // no parameters in the graph, nothing to do
ian@0 69 return;
ian@0 70 }
ian@0 71 // check that last parameter id is correct, otherwise corrections are needed
ian@0 72 if (parameters.rbegin()->first == (parameter_count)) {
ian@0 73 // all seems well, get out of here
ian@0 74 return;
ian@0 75 }
ian@0 76 } else if (parameters.size() > parameter_count) {
ian@0 77 // add extra parameters to the graph
ian@0 78 std::generate_n(std::back_inserter(graph[boost::graph_bundle].parameters_),
ian@0 79 parameters.size() - parameter_count,
ian@0 80 stdx::RandomCoefficient);
ian@0 81 } else {
ian@0 82 // remove redundant parameters
ian@0 83 graph[boost::graph_bundle].parameters_.resize(parameters.size());
ian@0 84 }
ian@0 85 // reassign parameter ids through the graph
ian@0 86 int parameter_id = 1;
ian@0 87 typedef std::map<int, std::vector<sg::Vertex> >::value_type ParameterMapping;
ian@0 88 foreach (ParameterMapping p, parameters) {
ian@0 89 foreach (sg::Vertex v, p.second) {
ian@0 90 graph[v].id_ = parameter_id;
ian@0 91 }
ian@0 92 ++parameter_id;
ian@0 93 }
ian@0 94 // job done
ian@0 95 }
ian@0 96
ian@0 97 } // namespace
ian@0 98
ian@0 99 Population::Population(GrammarInterface* grammar,
ian@0 100 ConverterInterface* converter,
ian@0 101 EvaluatorInterface* evaluator)
ian@0 102 : grammar_(grammar),
ian@0 103 converter_(converter),
ian@0 104 evaluator_(evaluator),
ian@0 105 generation_(0),
ian@0 106 max_generations_(0),
ian@0 107 fitness_threshold_(0),
ian@0 108 tournament_size_(7),
ian@0 109 crossover_rate_(0.65),
ian@0 110 mutation_rate_(0.3),
ian@0 111 reproduce_best_(false),
ian@0 112 keep_temp_folders_(false)
ian@0 113 {
ian@0 114 evaluator_->SetListener(this);
ian@0 115 mutators_.push_back(boost::bind(&GrammarInterface::MutationReplaceSubTree,
ian@0 116 grammar, _1));
ian@0 117 mutators_.push_back(boost::bind(&GrammarInterface::MutationAddSubTree,
ian@0 118 grammar, _1));
ian@0 119 mutators_.push_back(boost::bind(&GrammarInterface::MutationModifyInputs,
ian@0 120 grammar, _1));
ian@0 121 mutators_.push_back(boost::bind(&GrammarInterface::MutationModifyConnections,
ian@0 122 grammar, _1));
ian@0 123 mutators_.push_back(boost::bind(&GrammarInterface::MutationReplaceCommand,
ian@0 124 grammar, _1));
ian@0 125 // mutators_.push_back(boost::bind(&GrammarInterface::MutationInsertCommand,
ian@0 126 // grammar, _1));
ian@0 127 SetupGeneticOperationSelector();
ian@0 128 SetWorkFolder(g_default_work_folder);
ian@0 129 }
ian@0 130
ian@0 131 void Population::SetWorkFolder(const std::string &path) {
ian@0 132 work_folder_ = path;
ian@0 133 log_folder_ = path + "/logs";
ian@0 134 render_folder_ = path + "/render";
ian@0 135 dot_folder_ = path + "/dot";
ian@0 136 export_folder_ = path + "/export";
ian@0 137 }
ian@0 138
ian@0 139 void Population::Evolve(int max_generations) {
ian@0 140 max_generations_ = max_generations;
ian@0 141 // make sure the folders for storing best outputs are created
ian@0 142 bf::create_directories(render_folder_);
ian@0 143 bf::create_directories(dot_folder_);
ian@0 144 bf::create_directories(export_folder_);
ian@0 145 bf::create_directories(log_folder_);
ian@0 146 for (generation_ = 1; generation_ <= max_generations; generation_++) {
ian@0 147 NewGenerationMessage();
ian@0 148 SetGenerationFolder();
ian@0 149 ExportToDOT();
ian@0 150 EvaluateGeneration();
ian@0 151 AnalyzeGeneration();
ian@0 152 SaveGenerationBest();
ian@0 153 CleanTempFolders();
ian@0 154 if (CheckThreshold()) {
ian@0 155 logger::Log("Fitness threshold reached\n");
ian@0 156 return;
ian@0 157 }
ian@0 158 BreedNewGeneration();
ian@0 159 }
ian@0 160 }
ian@0 161
ian@0 162 void Population::NewGenerationMessage() const {
ian@0 163 std::stringstream message;
ian@0 164 message << "Generation " << generation_ << '\n';
ian@0 165 logger::Log(message.str());
ian@0 166 }
ian@0 167
ian@0 168 void Population::SetGenerationFolder() {
ian@0 169 std::stringstream path;
ian@0 170 path << work_folder_ << "/generation_"
ian@0 171 << std::setw(std::log10(max_generations_) + 1) << std::setfill('0')
ian@0 172 << generation_ << '/';
ian@0 173 generation_folder_ = path.str();
ian@0 174 }
ian@0 175
ian@0 176 bool Population::CheckThreshold() const {
ian@0 177 double best_fitness = population_[best_fit_][boost::graph_bundle].fitness_;
ian@0 178 return best_fitness <= fitness_threshold_;
ian@0 179 }
ian@0 180
ian@0 181 void Population::CleanTempFolders() const {
ian@0 182 if (!keep_temp_folders_) {
ian@0 183 bf::remove_all(generation_folder_);
ian@0 184 }
ian@0 185 }
ian@0 186
ian@0 187 void Population::InitializePopulation(std::size_t size) {
ian@0 188 population_.resize(size);
ian@0 189 fitnesses_.resize(size);
ian@0 190 for (int i = 0; i < size; i++) {
ian@0 191 grammar_->RandomGraph(population_[i]);
ian@0 192 population_[i][boost::graph_bundle].id_ = i;
ian@0 193 }
ian@0 194 }
ian@0 195
ian@0 196 void Population::EvaluateGeneration() {
ian@0 197 // make sure we have a clean temp folder for the generation
ian@0 198 bf::remove_all(generation_folder_);
ian@0 199 evaluator_->SetWorkFolder(generation_folder_);
ian@0 200 evaluator_->RateGraphs(population_);
ian@0 201 logger::Log("\n");
ian@0 202 // store graph fitnesses
ian@0 203 for (std::size_t i = 0, size = population_.size(); i < size; i++) {
ian@0 204 fitnesses_[i] = population_[i][boost::graph_bundle].fitness_;
ian@0 205 }
ian@0 206 }
ian@0 207
ian@0 208 void Population::AnalyzeGeneration() {
ian@0 209 double mean_fitness = stats::Mean(fitnesses_.begin(), fitnesses_.end());
ian@0 210 if (boost::math::isnan(mean_fitness)) {
ian@0 211 logger::Log("ruh roh\n"); // TODO
ian@0 212 }
ian@0 213 std::stringstream message;
ian@0 214 std::vector<double>::const_iterator best_fitness;
ian@0 215 best_fitness = std::min_element(fitnesses_.begin(), fitnesses_.end());
ian@0 216 best_fit_ = static_cast<int>(best_fitness - fitnesses_.begin());
ian@0 217 message << "Average Fitness:" << mean_fitness
ian@0 218 << " Best"
ian@0 219 //<< " [" << best_fit_ << "]"
ian@0 220 << ':' << *best_fitness << '\n';
ian@0 221 logger::Log(message.str());
ian@0 222 // save fitness values to logs
ian@0 223 std::stringstream average;
ian@0 224 average << generation_ << ' ' << mean_fitness;
ian@0 225 stdx::AppendToFile(average.str(), log_folder_ + "/average_fitnesses.txt");
ian@0 226 std::stringstream best;
ian@0 227 best << generation_ << ' ' << *best_fitness;
ian@0 228 stdx::AppendToFile(best.str(), log_folder_ + "/best_fitnesses.txt");
ian@0 229 }
ian@0 230
ian@0 231 void Population::SaveGenerationBest() {
ian@0 232 std::stringstream generation_prefix;
ian@0 233 generation_prefix << "generation_"
ian@0 234 << std::setw(std::log10(max_generations_) + 1)
ian@0 235 << std::setfill('0')
ian@0 236 << generation_;
ian@0 237 const sg::Graph& best_graph = population_[best_fit_];
ian@0 238 const sg::GraphProperties& graph_properties = best_graph[boost::graph_bundle];
ian@0 239 // copy the rendered output for the graph
ian@0 240 bf::path path = graph_properties.render_path_;
ian@0 241 if (bf::exists(path)) {
ian@0 242 std::string render_path = render_folder_ + '/'
ian@0 243 + generation_prefix.str()
ian@0 244 + path.extension().string();
ian@0 245 bf::copy_file(path, render_path);
ian@0 246 }
ian@0 247 // copy the dot script for the graph
ian@0 248 path = graph_properties.dot_path_;
ian@0 249 if (bf::exists(path)) {
ian@0 250 std::string dot_path = dot_folder_ + '/'
ian@0 251 + generation_prefix.str()
ian@0 252 + path.extension().string();
ian@0 253 bf::copy_file(path, dot_path);
ian@0 254 }
ian@0 255 // tell the converter to export the graph
ian@0 256 converter_->Export(best_graph, export_folder_, generation_prefix.str());
ian@0 257 }
ian@0 258
ian@0 259 void Population::PerformCrossover(sg::Graph& child_1, sg::Graph& child_2) {
ian@0 260 int parent_id_1 = SelectIndividual();
ian@0 261 int parent_id_2;
ian@0 262 do {
ian@0 263 parent_id_2 = SelectIndividual();
ian@0 264 } while (parent_id_1 == parent_id_2);
ian@0 265 // pick the crossover points via the grammar
ian@0 266 const sg::Graph& parent_1 = population_[parent_id_1];
ian@0 267 const sg::Graph& parent_2 = population_[parent_id_2];
ian@0 268 sg::Vertex crossover_1;
ian@0 269 sg::Vertex crossover_2;
ian@0 270 grammar_->PickCrossoverNodes(parent_1, parent_2, &crossover_1, &crossover_2);
ian@0 271 // swap the subtrees
ian@0 272 SwapSubTrees(crossover_1, crossover_2, parent_1, parent_2, child_1, child_2);
ian@0 273 }
ian@0 274
ian@0 275 // called for mutated graphs to prepare for next generation
ian@0 276 void Population::PrepareNewChild(sg::Graph& child) {
ian@0 277 // reset fitness rating
ian@0 278 child[boost::graph_bundle].fitness_ = sg::kFitnessUnrated;
ian@0 279 // make sure parameters in graph are valid
ian@0 280 UpdateParameters(child);
ian@0 281 }
ian@0 282
ian@0 283 void Population::BreedNewGeneration() {
ian@0 284 // Used to store reproduced graph ids, prevents a graph being reproduced
ian@0 285 // more than once.
ian@0 286 std::set<int> reproduced;
ian@0 287 std::vector<sg::Graph> next_generation;
ian@0 288 next_generation.reserve(population_.size());
ian@0 289 if (reproduce_best_) {
ian@0 290 next_generation.push_back(population_[best_fit_]);
ian@0 291 }
ian@0 292 std::size_t population_size = population_.size();
ian@0 293 while (next_generation.size() < population_size) {
ian@0 294 GeneticOperation operation;
ian@0 295 operation = static_cast<GeneticOperation>(genetic_operation_selector_());
ian@0 296 if (operation == kCrossover) {
ian@0 297 // Crossover
ian@0 298 // create two empty graphs to receive results of crossover
ian@0 299 // (creating them directly in the next_generation container causes
ian@0 300 // memory problems when performing crossover, couldn't fathom why..)
ian@0 301 sg::Graph child_1;
ian@0 302 sg::Graph child_2;
ian@0 303 PerformCrossover(child_1, child_2);
ian@0 304 PrepareNewChild(child_1);
ian@0 305 PrepareNewChild(child_2);
ian@0 306 next_generation.push_back(child_1);
ian@0 307 next_generation.push_back(child_2);
ian@0 308 } else if (operation == kMutation) {
ian@0 309 // mutation
ian@0 310 // store a copy of an individual from current population
ian@0 311 next_generation.push_back(population_[SelectIndividual()]);
ian@0 312 sg::Graph& child = next_generation.back();
ian@0 313 // call random mutation operator on child
ian@0 314 bool mutated;
ian@0 315 do {
ian@0 316 // some mutators might not be able to operate on a specific graph
ian@0 317 // and will return false, so try another mutator
ian@0 318 mutated = mutators_[stdx::Random(mutators_.size())](child);
ian@0 319 } while (!mutated);
ian@0 320 PrepareNewChild(child);
ian@0 321 } else {
ian@0 322 // reproduction
ian@0 323 int graph_id = SelectIndividual();
ian@0 324 // only reproduce the individual if it hasn't already been reproduced
ian@0 325 if (reproduced.find(graph_id) == reproduced.end()) {
ian@0 326 next_generation.push_back(population_[graph_id]);
ian@0 327 reproduced.insert(graph_id);
ian@0 328 }
ian@0 329 }
ian@0 330 }
ian@0 331 // we might have an extra individual from crossover
ian@0 332 next_generation.resize(population_size);
ian@0 333 // reassign graph ids
ian@0 334 for (std::size_t i = 0, size = population_.size(); i < size; i++) {
ian@0 335 next_generation[i][boost::graph_bundle].id_ = static_cast<int>(i);
ian@0 336 }
ian@0 337 // replace the population with the newly created generation
ian@0 338 population_.swap(next_generation);
ian@0 339 }
ian@0 340
ian@0 341 // tournament selection
ian@0 342 int Population::SelectIndividual() {
ian@0 343 // select n individuals from the population
ian@0 344 std::set<std::size_t> tournament;
ian@0 345 while (tournament.size() < tournament_size_) {
ian@0 346 tournament.insert(stdx::Random(population_.size()));
ian@0 347 }
ian@0 348 double best_fitness = std::numeric_limits<double>::max();
ian@0 349 std::size_t result = -1;
ian@0 350 // find best graph from selection
ian@0 351 foreach (std::size_t graph_id, tournament) {
ian@0 352 if (fitnesses_[graph_id] < best_fitness) {
ian@0 353 best_fitness = fitnesses_[graph_id];
ian@0 354 result = graph_id;
ian@0 355 }
ian@0 356 }
ian@0 357 return static_cast<int>(result);
ian@0 358 }
ian@0 359
ian@0 360 void Population::ExportToDOT() {
ian@0 361 std::string dot_folder = generation_folder_ + "/dot/";
ian@0 362 bf::create_directories(dot_folder);
ian@0 363 std::stringstream path;
ian@0 364 foreach (sg::Graph& graph, population_) {
ian@0 365 path.str("");
ian@0 366 path << dot_folder
ian@0 367 << "gpsynth_" << graph[boost::graph_bundle].id_ << ".dot";
ian@0 368 std::ofstream file(path.str().c_str());
ian@0 369 file << converter_->ToDOT(graph);
ian@0 370 file.close();
ian@0 371 graph[boost::graph_bundle].dot_path_ = path.str();
ian@0 372 }
ian@0 373 }
ian@0 374
ian@0 375 void Population::GraphRatedNotification(std::size_t graphs_rated) {
ian@0 376 std::stringstream message;
ian@0 377 message << "\rRated " << graphs_rated << " / " << population_.size();
ian@0 378 logger::Log(message.str());
ian@0 379 }
ian@0 380
ian@0 381 void Population::SetCrossoverRate(double rate) {
ian@0 382 crossover_rate_ = rate;
ian@0 383 SetupGeneticOperationSelector();
ian@0 384 }
ian@0 385
ian@0 386 void Population::SetMutationRate(double rate) {
ian@0 387 mutation_rate_ = rate;
ian@0 388 SetupGeneticOperationSelector();
ian@0 389 }
ian@0 390
ian@0 391 void Population::SetupGeneticOperationSelector() {
ian@0 392 std::vector<double> genetic_probabilities(2);
ian@0 393 genetic_probabilities[kCrossover] = crossover_rate_;
ian@0 394 genetic_probabilities[kMutation] = mutation_rate_;
ian@0 395 // reproduction is left over probability
ian@0 396 genetic_operation_selector_.SetProbabilities(genetic_probabilities);
ian@0 397 }