comparison src/population.cpp @ 0:add35537fdbb tip

Initial import
author irh <ian.r.hobson@gmail.com>
date Thu, 25 Aug 2011 11:05:55 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:add35537fdbb
1 // Copyright 2011, Ian Hobson.
2 //
3 // This file is part of gpsynth.
4 //
5 // gpsynth is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // gpsynth is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with gpsynth in the file COPYING.
17 // If not, see http://www.gnu.org/licenses/.
18
19 #include "population.hpp"
20
21 #include "boost_ex.hpp"
22 #include "graph_helpers.hpp"
23 #include "logger.hpp"
24 #include "statistics.hpp"
25 #include "std_ex.hpp"
26
27 #include "boost/bind.hpp"
28 #include "boost/filesystem.hpp"
29 #include "boost/lexical_cast.hpp"
30 #include "boost/math/special_functions/fpclassify.hpp"
31
32 #include <algorithm>
33 #include <cmath>
34 #include <fstream>
35 #include <iterator>
36 #include <limits>
37 #include <map>
38 #include <numeric>
39 #include <set>
40 #include <sstream>
41 #include <vector>
42
43 namespace bf = boost::filesystem;
44
45 namespace {
46
47 const std::string g_default_work_folder = "/tmp";
48
49 // mutated graphs might have imported parameters outside of the graph's range
50 // or removed parameter inputs making some parameters redundant
51 void UpdateParameters(sg::Graph& graph) {
52 std::size_t parameter_count = graph[boost::graph_bundle].parameters_.size();
53 // scan through vertices to find parameter inputs
54 std::map<int, std::vector<sg::Vertex> > parameters;
55 sg::VertexIterator vertex;
56 sg::VertexIterator vertex_end;
57 for (boost::tie(vertex, vertex_end) = boost::vertices(graph);
58 vertex != vertex_end;
59 ++vertex) {
60 const sg::Node& node_info = graph[*vertex];
61 if (node_info.type_ == sg::kParameter) {
62 parameters[node_info.id_].push_back(*vertex);
63 }
64 }
65 // check if any corrections required
66 if (parameters.size() == parameter_count) {
67 if (parameter_count == 0) {
68 // no parameters in the graph, nothing to do
69 return;
70 }
71 // check that last parameter id is correct, otherwise corrections are needed
72 if (parameters.rbegin()->first == (parameter_count)) {
73 // all seems well, get out of here
74 return;
75 }
76 } else if (parameters.size() > parameter_count) {
77 // add extra parameters to the graph
78 std::generate_n(std::back_inserter(graph[boost::graph_bundle].parameters_),
79 parameters.size() - parameter_count,
80 stdx::RandomCoefficient);
81 } else {
82 // remove redundant parameters
83 graph[boost::graph_bundle].parameters_.resize(parameters.size());
84 }
85 // reassign parameter ids through the graph
86 int parameter_id = 1;
87 typedef std::map<int, std::vector<sg::Vertex> >::value_type ParameterMapping;
88 foreach (ParameterMapping p, parameters) {
89 foreach (sg::Vertex v, p.second) {
90 graph[v].id_ = parameter_id;
91 }
92 ++parameter_id;
93 }
94 // job done
95 }
96
97 } // namespace
98
99 Population::Population(GrammarInterface* grammar,
100 ConverterInterface* converter,
101 EvaluatorInterface* evaluator)
102 : grammar_(grammar),
103 converter_(converter),
104 evaluator_(evaluator),
105 generation_(0),
106 max_generations_(0),
107 fitness_threshold_(0),
108 tournament_size_(7),
109 crossover_rate_(0.65),
110 mutation_rate_(0.3),
111 reproduce_best_(false),
112 keep_temp_folders_(false)
113 {
114 evaluator_->SetListener(this);
115 mutators_.push_back(boost::bind(&GrammarInterface::MutationReplaceSubTree,
116 grammar, _1));
117 mutators_.push_back(boost::bind(&GrammarInterface::MutationAddSubTree,
118 grammar, _1));
119 mutators_.push_back(boost::bind(&GrammarInterface::MutationModifyInputs,
120 grammar, _1));
121 mutators_.push_back(boost::bind(&GrammarInterface::MutationModifyConnections,
122 grammar, _1));
123 mutators_.push_back(boost::bind(&GrammarInterface::MutationReplaceCommand,
124 grammar, _1));
125 // mutators_.push_back(boost::bind(&GrammarInterface::MutationInsertCommand,
126 // grammar, _1));
127 SetupGeneticOperationSelector();
128 SetWorkFolder(g_default_work_folder);
129 }
130
131 void Population::SetWorkFolder(const std::string &path) {
132 work_folder_ = path;
133 log_folder_ = path + "/logs";
134 render_folder_ = path + "/render";
135 dot_folder_ = path + "/dot";
136 export_folder_ = path + "/export";
137 }
138
139 void Population::Evolve(int max_generations) {
140 max_generations_ = max_generations;
141 // make sure the folders for storing best outputs are created
142 bf::create_directories(render_folder_);
143 bf::create_directories(dot_folder_);
144 bf::create_directories(export_folder_);
145 bf::create_directories(log_folder_);
146 for (generation_ = 1; generation_ <= max_generations; generation_++) {
147 NewGenerationMessage();
148 SetGenerationFolder();
149 ExportToDOT();
150 EvaluateGeneration();
151 AnalyzeGeneration();
152 SaveGenerationBest();
153 CleanTempFolders();
154 if (CheckThreshold()) {
155 logger::Log("Fitness threshold reached\n");
156 return;
157 }
158 BreedNewGeneration();
159 }
160 }
161
162 void Population::NewGenerationMessage() const {
163 std::stringstream message;
164 message << "Generation " << generation_ << '\n';
165 logger::Log(message.str());
166 }
167
168 void Population::SetGenerationFolder() {
169 std::stringstream path;
170 path << work_folder_ << "/generation_"
171 << std::setw(std::log10(max_generations_) + 1) << std::setfill('0')
172 << generation_ << '/';
173 generation_folder_ = path.str();
174 }
175
176 bool Population::CheckThreshold() const {
177 double best_fitness = population_[best_fit_][boost::graph_bundle].fitness_;
178 return best_fitness <= fitness_threshold_;
179 }
180
181 void Population::CleanTempFolders() const {
182 if (!keep_temp_folders_) {
183 bf::remove_all(generation_folder_);
184 }
185 }
186
187 void Population::InitializePopulation(std::size_t size) {
188 population_.resize(size);
189 fitnesses_.resize(size);
190 for (int i = 0; i < size; i++) {
191 grammar_->RandomGraph(population_[i]);
192 population_[i][boost::graph_bundle].id_ = i;
193 }
194 }
195
196 void Population::EvaluateGeneration() {
197 // make sure we have a clean temp folder for the generation
198 bf::remove_all(generation_folder_);
199 evaluator_->SetWorkFolder(generation_folder_);
200 evaluator_->RateGraphs(population_);
201 logger::Log("\n");
202 // store graph fitnesses
203 for (std::size_t i = 0, size = population_.size(); i < size; i++) {
204 fitnesses_[i] = population_[i][boost::graph_bundle].fitness_;
205 }
206 }
207
208 void Population::AnalyzeGeneration() {
209 double mean_fitness = stats::Mean(fitnesses_.begin(), fitnesses_.end());
210 if (boost::math::isnan(mean_fitness)) {
211 logger::Log("ruh roh\n"); // TODO
212 }
213 std::stringstream message;
214 std::vector<double>::const_iterator best_fitness;
215 best_fitness = std::min_element(fitnesses_.begin(), fitnesses_.end());
216 best_fit_ = static_cast<int>(best_fitness - fitnesses_.begin());
217 message << "Average Fitness:" << mean_fitness
218 << " Best"
219 //<< " [" << best_fit_ << "]"
220 << ':' << *best_fitness << '\n';
221 logger::Log(message.str());
222 // save fitness values to logs
223 std::stringstream average;
224 average << generation_ << ' ' << mean_fitness;
225 stdx::AppendToFile(average.str(), log_folder_ + "/average_fitnesses.txt");
226 std::stringstream best;
227 best << generation_ << ' ' << *best_fitness;
228 stdx::AppendToFile(best.str(), log_folder_ + "/best_fitnesses.txt");
229 }
230
231 void Population::SaveGenerationBest() {
232 std::stringstream generation_prefix;
233 generation_prefix << "generation_"
234 << std::setw(std::log10(max_generations_) + 1)
235 << std::setfill('0')
236 << generation_;
237 const sg::Graph& best_graph = population_[best_fit_];
238 const sg::GraphProperties& graph_properties = best_graph[boost::graph_bundle];
239 // copy the rendered output for the graph
240 bf::path path = graph_properties.render_path_;
241 if (bf::exists(path)) {
242 std::string render_path = render_folder_ + '/'
243 + generation_prefix.str()
244 + path.extension().string();
245 bf::copy_file(path, render_path);
246 }
247 // copy the dot script for the graph
248 path = graph_properties.dot_path_;
249 if (bf::exists(path)) {
250 std::string dot_path = dot_folder_ + '/'
251 + generation_prefix.str()
252 + path.extension().string();
253 bf::copy_file(path, dot_path);
254 }
255 // tell the converter to export the graph
256 converter_->Export(best_graph, export_folder_, generation_prefix.str());
257 }
258
259 void Population::PerformCrossover(sg::Graph& child_1, sg::Graph& child_2) {
260 int parent_id_1 = SelectIndividual();
261 int parent_id_2;
262 do {
263 parent_id_2 = SelectIndividual();
264 } while (parent_id_1 == parent_id_2);
265 // pick the crossover points via the grammar
266 const sg::Graph& parent_1 = population_[parent_id_1];
267 const sg::Graph& parent_2 = population_[parent_id_2];
268 sg::Vertex crossover_1;
269 sg::Vertex crossover_2;
270 grammar_->PickCrossoverNodes(parent_1, parent_2, &crossover_1, &crossover_2);
271 // swap the subtrees
272 SwapSubTrees(crossover_1, crossover_2, parent_1, parent_2, child_1, child_2);
273 }
274
275 // called for mutated graphs to prepare for next generation
276 void Population::PrepareNewChild(sg::Graph& child) {
277 // reset fitness rating
278 child[boost::graph_bundle].fitness_ = sg::kFitnessUnrated;
279 // make sure parameters in graph are valid
280 UpdateParameters(child);
281 }
282
283 void Population::BreedNewGeneration() {
284 // Used to store reproduced graph ids, prevents a graph being reproduced
285 // more than once.
286 std::set<int> reproduced;
287 std::vector<sg::Graph> next_generation;
288 next_generation.reserve(population_.size());
289 if (reproduce_best_) {
290 next_generation.push_back(population_[best_fit_]);
291 }
292 std::size_t population_size = population_.size();
293 while (next_generation.size() < population_size) {
294 GeneticOperation operation;
295 operation = static_cast<GeneticOperation>(genetic_operation_selector_());
296 if (operation == kCrossover) {
297 // Crossover
298 // create two empty graphs to receive results of crossover
299 // (creating them directly in the next_generation container causes
300 // memory problems when performing crossover, couldn't fathom why..)
301 sg::Graph child_1;
302 sg::Graph child_2;
303 PerformCrossover(child_1, child_2);
304 PrepareNewChild(child_1);
305 PrepareNewChild(child_2);
306 next_generation.push_back(child_1);
307 next_generation.push_back(child_2);
308 } else if (operation == kMutation) {
309 // mutation
310 // store a copy of an individual from current population
311 next_generation.push_back(population_[SelectIndividual()]);
312 sg::Graph& child = next_generation.back();
313 // call random mutation operator on child
314 bool mutated;
315 do {
316 // some mutators might not be able to operate on a specific graph
317 // and will return false, so try another mutator
318 mutated = mutators_[stdx::Random(mutators_.size())](child);
319 } while (!mutated);
320 PrepareNewChild(child);
321 } else {
322 // reproduction
323 int graph_id = SelectIndividual();
324 // only reproduce the individual if it hasn't already been reproduced
325 if (reproduced.find(graph_id) == reproduced.end()) {
326 next_generation.push_back(population_[graph_id]);
327 reproduced.insert(graph_id);
328 }
329 }
330 }
331 // we might have an extra individual from crossover
332 next_generation.resize(population_size);
333 // reassign graph ids
334 for (std::size_t i = 0, size = population_.size(); i < size; i++) {
335 next_generation[i][boost::graph_bundle].id_ = static_cast<int>(i);
336 }
337 // replace the population with the newly created generation
338 population_.swap(next_generation);
339 }
340
341 // tournament selection
342 int Population::SelectIndividual() {
343 // select n individuals from the population
344 std::set<std::size_t> tournament;
345 while (tournament.size() < tournament_size_) {
346 tournament.insert(stdx::Random(population_.size()));
347 }
348 double best_fitness = std::numeric_limits<double>::max();
349 std::size_t result = -1;
350 // find best graph from selection
351 foreach (std::size_t graph_id, tournament) {
352 if (fitnesses_[graph_id] < best_fitness) {
353 best_fitness = fitnesses_[graph_id];
354 result = graph_id;
355 }
356 }
357 return static_cast<int>(result);
358 }
359
360 void Population::ExportToDOT() {
361 std::string dot_folder = generation_folder_ + "/dot/";
362 bf::create_directories(dot_folder);
363 std::stringstream path;
364 foreach (sg::Graph& graph, population_) {
365 path.str("");
366 path << dot_folder
367 << "gpsynth_" << graph[boost::graph_bundle].id_ << ".dot";
368 std::ofstream file(path.str().c_str());
369 file << converter_->ToDOT(graph);
370 file.close();
371 graph[boost::graph_bundle].dot_path_ = path.str();
372 }
373 }
374
375 void Population::GraphRatedNotification(std::size_t graphs_rated) {
376 std::stringstream message;
377 message << "\rRated " << graphs_rated << " / " << population_.size();
378 logger::Log(message.str());
379 }
380
381 void Population::SetCrossoverRate(double rate) {
382 crossover_rate_ = rate;
383 SetupGeneticOperationSelector();
384 }
385
386 void Population::SetMutationRate(double rate) {
387 mutation_rate_ = rate;
388 SetupGeneticOperationSelector();
389 }
390
391 void Population::SetupGeneticOperationSelector() {
392 std::vector<double> genetic_probabilities(2);
393 genetic_probabilities[kCrossover] = crossover_rate_;
394 genetic_probabilities[kMutation] = mutation_rate_;
395 // reproduction is left over probability
396 genetic_operation_selector_.SetProbabilities(genetic_probabilities);
397 }