Mercurial > hg > gpsynth
comparison src/population.cpp @ 0:add35537fdbb tip
Initial import
author | irh <ian.r.hobson@gmail.com> |
---|---|
date | Thu, 25 Aug 2011 11:05:55 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:add35537fdbb |
---|---|
1 // Copyright 2011, Ian Hobson. | |
2 // | |
3 // This file is part of gpsynth. | |
4 // | |
5 // gpsynth is free software: you can redistribute it and/or modify | |
6 // it under the terms of the GNU General Public License as published by | |
7 // the Free Software Foundation, either version 3 of the License, or | |
8 // (at your option) any later version. | |
9 // | |
10 // gpsynth is distributed in the hope that it will be useful, | |
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 // GNU General Public License for more details. | |
14 // | |
15 // You should have received a copy of the GNU General Public License | |
16 // along with gpsynth in the file COPYING. | |
17 // If not, see http://www.gnu.org/licenses/. | |
18 | |
19 #include "population.hpp" | |
20 | |
21 #include "boost_ex.hpp" | |
22 #include "graph_helpers.hpp" | |
23 #include "logger.hpp" | |
24 #include "statistics.hpp" | |
25 #include "std_ex.hpp" | |
26 | |
27 #include "boost/bind.hpp" | |
28 #include "boost/filesystem.hpp" | |
29 #include "boost/lexical_cast.hpp" | |
30 #include "boost/math/special_functions/fpclassify.hpp" | |
31 | |
32 #include <algorithm> | |
33 #include <cmath> | |
34 #include <fstream> | |
35 #include <iterator> | |
36 #include <limits> | |
37 #include <map> | |
38 #include <numeric> | |
39 #include <set> | |
40 #include <sstream> | |
41 #include <vector> | |
42 | |
43 namespace bf = boost::filesystem; | |
44 | |
45 namespace { | |
46 | |
47 const std::string g_default_work_folder = "/tmp"; | |
48 | |
49 // mutated graphs might have imported parameters outside of the graph's range | |
50 // or removed parameter inputs making some parameters redundant | |
51 void UpdateParameters(sg::Graph& graph) { | |
52 std::size_t parameter_count = graph[boost::graph_bundle].parameters_.size(); | |
53 // scan through vertices to find parameter inputs | |
54 std::map<int, std::vector<sg::Vertex> > parameters; | |
55 sg::VertexIterator vertex; | |
56 sg::VertexIterator vertex_end; | |
57 for (boost::tie(vertex, vertex_end) = boost::vertices(graph); | |
58 vertex != vertex_end; | |
59 ++vertex) { | |
60 const sg::Node& node_info = graph[*vertex]; | |
61 if (node_info.type_ == sg::kParameter) { | |
62 parameters[node_info.id_].push_back(*vertex); | |
63 } | |
64 } | |
65 // check if any corrections required | |
66 if (parameters.size() == parameter_count) { | |
67 if (parameter_count == 0) { | |
68 // no parameters in the graph, nothing to do | |
69 return; | |
70 } | |
71 // check that last parameter id is correct, otherwise corrections are needed | |
72 if (parameters.rbegin()->first == (parameter_count)) { | |
73 // all seems well, get out of here | |
74 return; | |
75 } | |
76 } else if (parameters.size() > parameter_count) { | |
77 // add extra parameters to the graph | |
78 std::generate_n(std::back_inserter(graph[boost::graph_bundle].parameters_), | |
79 parameters.size() - parameter_count, | |
80 stdx::RandomCoefficient); | |
81 } else { | |
82 // remove redundant parameters | |
83 graph[boost::graph_bundle].parameters_.resize(parameters.size()); | |
84 } | |
85 // reassign parameter ids through the graph | |
86 int parameter_id = 1; | |
87 typedef std::map<int, std::vector<sg::Vertex> >::value_type ParameterMapping; | |
88 foreach (ParameterMapping p, parameters) { | |
89 foreach (sg::Vertex v, p.second) { | |
90 graph[v].id_ = parameter_id; | |
91 } | |
92 ++parameter_id; | |
93 } | |
94 // job done | |
95 } | |
96 | |
97 } // namespace | |
98 | |
99 Population::Population(GrammarInterface* grammar, | |
100 ConverterInterface* converter, | |
101 EvaluatorInterface* evaluator) | |
102 : grammar_(grammar), | |
103 converter_(converter), | |
104 evaluator_(evaluator), | |
105 generation_(0), | |
106 max_generations_(0), | |
107 fitness_threshold_(0), | |
108 tournament_size_(7), | |
109 crossover_rate_(0.65), | |
110 mutation_rate_(0.3), | |
111 reproduce_best_(false), | |
112 keep_temp_folders_(false) | |
113 { | |
114 evaluator_->SetListener(this); | |
115 mutators_.push_back(boost::bind(&GrammarInterface::MutationReplaceSubTree, | |
116 grammar, _1)); | |
117 mutators_.push_back(boost::bind(&GrammarInterface::MutationAddSubTree, | |
118 grammar, _1)); | |
119 mutators_.push_back(boost::bind(&GrammarInterface::MutationModifyInputs, | |
120 grammar, _1)); | |
121 mutators_.push_back(boost::bind(&GrammarInterface::MutationModifyConnections, | |
122 grammar, _1)); | |
123 mutators_.push_back(boost::bind(&GrammarInterface::MutationReplaceCommand, | |
124 grammar, _1)); | |
125 // mutators_.push_back(boost::bind(&GrammarInterface::MutationInsertCommand, | |
126 // grammar, _1)); | |
127 SetupGeneticOperationSelector(); | |
128 SetWorkFolder(g_default_work_folder); | |
129 } | |
130 | |
131 void Population::SetWorkFolder(const std::string &path) { | |
132 work_folder_ = path; | |
133 log_folder_ = path + "/logs"; | |
134 render_folder_ = path + "/render"; | |
135 dot_folder_ = path + "/dot"; | |
136 export_folder_ = path + "/export"; | |
137 } | |
138 | |
139 void Population::Evolve(int max_generations) { | |
140 max_generations_ = max_generations; | |
141 // make sure the folders for storing best outputs are created | |
142 bf::create_directories(render_folder_); | |
143 bf::create_directories(dot_folder_); | |
144 bf::create_directories(export_folder_); | |
145 bf::create_directories(log_folder_); | |
146 for (generation_ = 1; generation_ <= max_generations; generation_++) { | |
147 NewGenerationMessage(); | |
148 SetGenerationFolder(); | |
149 ExportToDOT(); | |
150 EvaluateGeneration(); | |
151 AnalyzeGeneration(); | |
152 SaveGenerationBest(); | |
153 CleanTempFolders(); | |
154 if (CheckThreshold()) { | |
155 logger::Log("Fitness threshold reached\n"); | |
156 return; | |
157 } | |
158 BreedNewGeneration(); | |
159 } | |
160 } | |
161 | |
162 void Population::NewGenerationMessage() const { | |
163 std::stringstream message; | |
164 message << "Generation " << generation_ << '\n'; | |
165 logger::Log(message.str()); | |
166 } | |
167 | |
168 void Population::SetGenerationFolder() { | |
169 std::stringstream path; | |
170 path << work_folder_ << "/generation_" | |
171 << std::setw(std::log10(max_generations_) + 1) << std::setfill('0') | |
172 << generation_ << '/'; | |
173 generation_folder_ = path.str(); | |
174 } | |
175 | |
176 bool Population::CheckThreshold() const { | |
177 double best_fitness = population_[best_fit_][boost::graph_bundle].fitness_; | |
178 return best_fitness <= fitness_threshold_; | |
179 } | |
180 | |
181 void Population::CleanTempFolders() const { | |
182 if (!keep_temp_folders_) { | |
183 bf::remove_all(generation_folder_); | |
184 } | |
185 } | |
186 | |
187 void Population::InitializePopulation(std::size_t size) { | |
188 population_.resize(size); | |
189 fitnesses_.resize(size); | |
190 for (int i = 0; i < size; i++) { | |
191 grammar_->RandomGraph(population_[i]); | |
192 population_[i][boost::graph_bundle].id_ = i; | |
193 } | |
194 } | |
195 | |
196 void Population::EvaluateGeneration() { | |
197 // make sure we have a clean temp folder for the generation | |
198 bf::remove_all(generation_folder_); | |
199 evaluator_->SetWorkFolder(generation_folder_); | |
200 evaluator_->RateGraphs(population_); | |
201 logger::Log("\n"); | |
202 // store graph fitnesses | |
203 for (std::size_t i = 0, size = population_.size(); i < size; i++) { | |
204 fitnesses_[i] = population_[i][boost::graph_bundle].fitness_; | |
205 } | |
206 } | |
207 | |
208 void Population::AnalyzeGeneration() { | |
209 double mean_fitness = stats::Mean(fitnesses_.begin(), fitnesses_.end()); | |
210 if (boost::math::isnan(mean_fitness)) { | |
211 logger::Log("ruh roh\n"); // TODO | |
212 } | |
213 std::stringstream message; | |
214 std::vector<double>::const_iterator best_fitness; | |
215 best_fitness = std::min_element(fitnesses_.begin(), fitnesses_.end()); | |
216 best_fit_ = static_cast<int>(best_fitness - fitnesses_.begin()); | |
217 message << "Average Fitness:" << mean_fitness | |
218 << " Best" | |
219 //<< " [" << best_fit_ << "]" | |
220 << ':' << *best_fitness << '\n'; | |
221 logger::Log(message.str()); | |
222 // save fitness values to logs | |
223 std::stringstream average; | |
224 average << generation_ << ' ' << mean_fitness; | |
225 stdx::AppendToFile(average.str(), log_folder_ + "/average_fitnesses.txt"); | |
226 std::stringstream best; | |
227 best << generation_ << ' ' << *best_fitness; | |
228 stdx::AppendToFile(best.str(), log_folder_ + "/best_fitnesses.txt"); | |
229 } | |
230 | |
231 void Population::SaveGenerationBest() { | |
232 std::stringstream generation_prefix; | |
233 generation_prefix << "generation_" | |
234 << std::setw(std::log10(max_generations_) + 1) | |
235 << std::setfill('0') | |
236 << generation_; | |
237 const sg::Graph& best_graph = population_[best_fit_]; | |
238 const sg::GraphProperties& graph_properties = best_graph[boost::graph_bundle]; | |
239 // copy the rendered output for the graph | |
240 bf::path path = graph_properties.render_path_; | |
241 if (bf::exists(path)) { | |
242 std::string render_path = render_folder_ + '/' | |
243 + generation_prefix.str() | |
244 + path.extension().string(); | |
245 bf::copy_file(path, render_path); | |
246 } | |
247 // copy the dot script for the graph | |
248 path = graph_properties.dot_path_; | |
249 if (bf::exists(path)) { | |
250 std::string dot_path = dot_folder_ + '/' | |
251 + generation_prefix.str() | |
252 + path.extension().string(); | |
253 bf::copy_file(path, dot_path); | |
254 } | |
255 // tell the converter to export the graph | |
256 converter_->Export(best_graph, export_folder_, generation_prefix.str()); | |
257 } | |
258 | |
259 void Population::PerformCrossover(sg::Graph& child_1, sg::Graph& child_2) { | |
260 int parent_id_1 = SelectIndividual(); | |
261 int parent_id_2; | |
262 do { | |
263 parent_id_2 = SelectIndividual(); | |
264 } while (parent_id_1 == parent_id_2); | |
265 // pick the crossover points via the grammar | |
266 const sg::Graph& parent_1 = population_[parent_id_1]; | |
267 const sg::Graph& parent_2 = population_[parent_id_2]; | |
268 sg::Vertex crossover_1; | |
269 sg::Vertex crossover_2; | |
270 grammar_->PickCrossoverNodes(parent_1, parent_2, &crossover_1, &crossover_2); | |
271 // swap the subtrees | |
272 SwapSubTrees(crossover_1, crossover_2, parent_1, parent_2, child_1, child_2); | |
273 } | |
274 | |
275 // called for mutated graphs to prepare for next generation | |
276 void Population::PrepareNewChild(sg::Graph& child) { | |
277 // reset fitness rating | |
278 child[boost::graph_bundle].fitness_ = sg::kFitnessUnrated; | |
279 // make sure parameters in graph are valid | |
280 UpdateParameters(child); | |
281 } | |
282 | |
283 void Population::BreedNewGeneration() { | |
284 // Used to store reproduced graph ids, prevents a graph being reproduced | |
285 // more than once. | |
286 std::set<int> reproduced; | |
287 std::vector<sg::Graph> next_generation; | |
288 next_generation.reserve(population_.size()); | |
289 if (reproduce_best_) { | |
290 next_generation.push_back(population_[best_fit_]); | |
291 } | |
292 std::size_t population_size = population_.size(); | |
293 while (next_generation.size() < population_size) { | |
294 GeneticOperation operation; | |
295 operation = static_cast<GeneticOperation>(genetic_operation_selector_()); | |
296 if (operation == kCrossover) { | |
297 // Crossover | |
298 // create two empty graphs to receive results of crossover | |
299 // (creating them directly in the next_generation container causes | |
300 // memory problems when performing crossover, couldn't fathom why..) | |
301 sg::Graph child_1; | |
302 sg::Graph child_2; | |
303 PerformCrossover(child_1, child_2); | |
304 PrepareNewChild(child_1); | |
305 PrepareNewChild(child_2); | |
306 next_generation.push_back(child_1); | |
307 next_generation.push_back(child_2); | |
308 } else if (operation == kMutation) { | |
309 // mutation | |
310 // store a copy of an individual from current population | |
311 next_generation.push_back(population_[SelectIndividual()]); | |
312 sg::Graph& child = next_generation.back(); | |
313 // call random mutation operator on child | |
314 bool mutated; | |
315 do { | |
316 // some mutators might not be able to operate on a specific graph | |
317 // and will return false, so try another mutator | |
318 mutated = mutators_[stdx::Random(mutators_.size())](child); | |
319 } while (!mutated); | |
320 PrepareNewChild(child); | |
321 } else { | |
322 // reproduction | |
323 int graph_id = SelectIndividual(); | |
324 // only reproduce the individual if it hasn't already been reproduced | |
325 if (reproduced.find(graph_id) == reproduced.end()) { | |
326 next_generation.push_back(population_[graph_id]); | |
327 reproduced.insert(graph_id); | |
328 } | |
329 } | |
330 } | |
331 // we might have an extra individual from crossover | |
332 next_generation.resize(population_size); | |
333 // reassign graph ids | |
334 for (std::size_t i = 0, size = population_.size(); i < size; i++) { | |
335 next_generation[i][boost::graph_bundle].id_ = static_cast<int>(i); | |
336 } | |
337 // replace the population with the newly created generation | |
338 population_.swap(next_generation); | |
339 } | |
340 | |
341 // tournament selection | |
342 int Population::SelectIndividual() { | |
343 // select n individuals from the population | |
344 std::set<std::size_t> tournament; | |
345 while (tournament.size() < tournament_size_) { | |
346 tournament.insert(stdx::Random(population_.size())); | |
347 } | |
348 double best_fitness = std::numeric_limits<double>::max(); | |
349 std::size_t result = -1; | |
350 // find best graph from selection | |
351 foreach (std::size_t graph_id, tournament) { | |
352 if (fitnesses_[graph_id] < best_fitness) { | |
353 best_fitness = fitnesses_[graph_id]; | |
354 result = graph_id; | |
355 } | |
356 } | |
357 return static_cast<int>(result); | |
358 } | |
359 | |
360 void Population::ExportToDOT() { | |
361 std::string dot_folder = generation_folder_ + "/dot/"; | |
362 bf::create_directories(dot_folder); | |
363 std::stringstream path; | |
364 foreach (sg::Graph& graph, population_) { | |
365 path.str(""); | |
366 path << dot_folder | |
367 << "gpsynth_" << graph[boost::graph_bundle].id_ << ".dot"; | |
368 std::ofstream file(path.str().c_str()); | |
369 file << converter_->ToDOT(graph); | |
370 file.close(); | |
371 graph[boost::graph_bundle].dot_path_ = path.str(); | |
372 } | |
373 } | |
374 | |
375 void Population::GraphRatedNotification(std::size_t graphs_rated) { | |
376 std::stringstream message; | |
377 message << "\rRated " << graphs_rated << " / " << population_.size(); | |
378 logger::Log(message.str()); | |
379 } | |
380 | |
381 void Population::SetCrossoverRate(double rate) { | |
382 crossover_rate_ = rate; | |
383 SetupGeneticOperationSelector(); | |
384 } | |
385 | |
386 void Population::SetMutationRate(double rate) { | |
387 mutation_rate_ = rate; | |
388 SetupGeneticOperationSelector(); | |
389 } | |
390 | |
391 void Population::SetupGeneticOperationSelector() { | |
392 std::vector<double> genetic_probabilities(2); | |
393 genetic_probabilities[kCrossover] = crossover_rate_; | |
394 genetic_probabilities[kMutation] = mutation_rate_; | |
395 // reproduction is left over probability | |
396 genetic_operation_selector_.SetProbabilities(genetic_probabilities); | |
397 } |