annotate src/statistics.hpp @ 0:add35537fdbb tip

Initial import
author irh <ian.r.hobson@gmail.com>
date Thu, 25 Aug 2011 11:05:55 +0100
parents
children
rev   line source
ian@0 1 // Copyright 2011, Ian Hobson.
ian@0 2 //
ian@0 3 // This file is part of gpsynth.
ian@0 4 //
ian@0 5 // gpsynth is free software: you can redistribute it and/or modify
ian@0 6 // it under the terms of the GNU General Public License as published by
ian@0 7 // the Free Software Foundation, either version 3 of the License, or
ian@0 8 // (at your option) any later version.
ian@0 9 //
ian@0 10 // gpsynth is distributed in the hope that it will be useful,
ian@0 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
ian@0 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ian@0 13 // GNU General Public License for more details.
ian@0 14 //
ian@0 15 // You should have received a copy of the GNU General Public License
ian@0 16 // along with gpsynth in the file COPYING.
ian@0 17 // If not, see http://www.gnu.org/licenses/.
ian@0 18
ian@0 19 // Some useful stats functions
ian@0 20
ian@0 21 #pragma once
ian@0 22
ian@0 23 #include "range.hpp"
ian@0 24 #include "std_ex.hpp"
ian@0 25
ian@0 26 #include <algorithm>
ian@0 27 #include <iterator>
ian@0 28 #include <numeric>
ian@0 29 #include <sstream>
ian@0 30 #include <stdexcept>
ian@0 31
ian@0 32 namespace stats {
ian@0 33
ian@0 34 // Sum
ian@0 35 template<typename Iterator>
ian@0 36 typename std::iterator_traits<Iterator>::value_type Sum(Iterator start,
ian@0 37 Iterator end) {
ian@0 38 typename std::iterator_traits<Iterator>::value_type initializer(0);
ian@0 39 return std::accumulate(start, end, initializer);
ian@0 40 }
ian@0 41
ian@0 42 // Sum - container
ian@0 43 template<typename Container>
ian@0 44 typename Container::value_type Sum(const Container& container) {
ian@0 45 return Sum(container.begin(), container.end());
ian@0 46 }
ian@0 47
ian@0 48 // Mean
ian@0 49 template<typename Iterator>
ian@0 50 typename std::iterator_traits<Iterator>::value_type Mean(Iterator start,
ian@0 51 Iterator end) {
ian@0 52 return Sum(start, end) / std::distance(start, end);
ian@0 53 }
ian@0 54
ian@0 55 // Mean - container
ian@0 56 template<typename Container>
ian@0 57 typename Container::value_type Mean(const Container& container) {
ian@0 58 return Mean(container.begin(), container.end());
ian@0 59 }
ian@0 60
ian@0 61 // measures the MSE between a range and a target
ian@0 62 template<typename Iterator1, typename Iterator2>
ian@0 63 typename std::iterator_traits<Iterator1>::value_type
ian@0 64 MeanSquaredError(Iterator1 start1, Iterator1 end1, Iterator2 start2) {
ian@0 65 typedef typename std::iterator_traits<Iterator1>::value_type Value;
ian@0 66 Value n = std::distance(start1, end1);
ian@0 67 Value error(0);
ian@0 68 while (start1 != end1) {
ian@0 69 error += std::pow(*start1 - *start2, Value(2));
ian@0 70 ++start1;
ian@0 71 ++start2;
ian@0 72 }
ian@0 73 return error / n;
ian@0 74 }
ian@0 75
ian@0 76 // MeanSquaredError - container adaptor
ian@0 77 template<typename Container>
ian@0 78 typename Container::value_type MeanSquaredError(const Container& x,
ian@0 79 const Container& y) {
ian@0 80 return MeanSquaredError(x.begin(), x.end(), y.begin());
ian@0 81 }
ian@0 82
ian@0 83 // root mean square error
ian@0 84 template<typename Iterator1, typename Iterator2>
ian@0 85 double RMSE(Iterator1 start, Iterator1 end, Iterator2 target) {
ian@0 86 return std::sqrt(MeanSquaredError(start, end, target));
ian@0 87 }
ian@0 88
ian@0 89 // normalized root mean square error, using precomputed minima and maxima
ian@0 90 template<typename Iterator1, typename Iterator2, typename T>
ian@0 91 T NRMSE(Iterator1 start, Iterator1 end, Iterator2 target,
ian@0 92 const stdx::Range<T>& range1, const stdx::Range<T>& range2) {
ian@0 93 T range = std::max(range1.Maximum(), range2.Maximum())
ian@0 94 - std::min(range1.Minimum(), range2.Minimum());
ian@0 95 return RMSE(start, end, target) / range;
ian@0 96 }
ian@0 97
ian@0 98 // Takes a set of numbers with sum <= 1 which define a distribution.
ian@0 99 // operator() returns index chosen randomly with distribution defined by the
ian@0 100 // provided probabilites.
ian@0 101 // if the probabilites have sum < 1 then the difference is taken to imply a
ian@0 102 // single additional index
ian@0 103 // e.g. given:
ian@0 104 // ProbabilitySelector selector(boost::assign::list_of(0.1)(0.8));
ian@0 105 // selector() will yield
ian@0 106 // '0' 10%,
ian@0 107 // '1' 80%,
ian@0 108 // '2' 10%
ian@0 109 class ProbabilitySelector {
ian@0 110 std::vector<double> boundaries_;
ian@0 111
ian@0 112 public:
ian@0 113 ProbabilitySelector() {}
ian@0 114 ProbabilitySelector(const std::vector<double>& probabilities)
ian@0 115 {
ian@0 116 SetProbabilities(probabilities);
ian@0 117 }
ian@0 118
ian@0 119 void SetProbabilities(const std::vector<double>& probabilities) {
ian@0 120 if (std::count_if(probabilities.begin(), probabilities.end(),
ian@0 121 stdx::LessThan<double>(0.0))) {
ian@0 122 throw std::runtime_error("ProbabilitySelector: negative value found");
ian@0 123 }
ian@0 124 // assign range boundaries
ian@0 125 std::partial_sum(probabilities.begin(), probabilities.end(),
ian@0 126 std::back_inserter(boundaries_));
ian@0 127 }
ian@0 128
ian@0 129 std::size_t operator()() const {
ian@0 130 double random = rand() / static_cast<double>(RAND_MAX);
ian@0 131 std::vector<double>::const_iterator index;
ian@0 132 index = std::upper_bound(boundaries_.begin(), boundaries_.end(), random);
ian@0 133 return std::distance(boundaries_.begin(), index);
ian@0 134 }
ian@0 135
ian@0 136 bool Initialized() const { return !boundaries_.empty(); }
ian@0 137 };
ian@0 138
ian@0 139 } // stats namespace