view src/statistics.hpp @ 0:add35537fdbb tip

Initial import
author irh <ian.r.hobson@gmail.com>
date Thu, 25 Aug 2011 11:05:55 +0100
parents
children
line wrap: on
line source
//  Copyright 2011, Ian Hobson.
//
//  This file is part of gpsynth.
//
//  gpsynth is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  gpsynth is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with gpsynth in the file COPYING. 
//  If not, see http://www.gnu.org/licenses/.

// Some useful stats functions

#pragma once

#include "range.hpp"
#include "std_ex.hpp"

#include <algorithm>
#include <iterator>
#include <numeric>
#include <sstream>
#include <stdexcept>

namespace stats {

// Sum
template<typename Iterator>
typename std::iterator_traits<Iterator>::value_type Sum(Iterator start,
                                                        Iterator end) {
  typename std::iterator_traits<Iterator>::value_type initializer(0);
  return std::accumulate(start, end, initializer);
}

// Sum - container
template<typename Container>
typename Container::value_type Sum(const Container& container) {
  return Sum(container.begin(), container.end());
}

// Mean
template<typename Iterator>
typename std::iterator_traits<Iterator>::value_type Mean(Iterator start,
                                                         Iterator end) {
  return Sum(start, end) / std::distance(start, end);
}

// Mean - container
template<typename Container>
typename Container::value_type Mean(const Container& container) {
  return Mean(container.begin(), container.end());
}

// measures the MSE between a range and a target
template<typename Iterator1, typename Iterator2>
typename std::iterator_traits<Iterator1>::value_type 
MeanSquaredError(Iterator1 start1, Iterator1 end1, Iterator2 start2) {
  typedef typename std::iterator_traits<Iterator1>::value_type Value;
  Value n = std::distance(start1, end1);
  Value error(0);
  while (start1 != end1) {
    error += std::pow(*start1 - *start2, Value(2));
    ++start1;
    ++start2;
  }
  return error / n;
}

// MeanSquaredError - container adaptor
template<typename Container>
typename Container::value_type MeanSquaredError(const Container& x,
                                                const Container& y) {
  return MeanSquaredError(x.begin(), x.end(), y.begin());
}

// root mean square error
template<typename Iterator1, typename Iterator2>
double RMSE(Iterator1 start, Iterator1 end, Iterator2 target) {
  return std::sqrt(MeanSquaredError(start, end, target));
}

// normalized root mean square error, using precomputed minima and maxima
template<typename Iterator1, typename Iterator2, typename T>
T NRMSE(Iterator1 start, Iterator1 end, Iterator2 target,
        const stdx::Range<T>& range1, const stdx::Range<T>& range2) {
  T range = std::max(range1.Maximum(), range2.Maximum())
            - std::min(range1.Minimum(), range2.Minimum());
  return RMSE(start, end, target) / range;
}

// Takes a set of numbers with sum <= 1 which define a distribution.
// operator() returns index chosen randomly with distribution defined by the
// provided probabilites.
// if the probabilites have sum < 1 then the difference is taken to imply a 
// single additional index
// e.g. given:
// ProbabilitySelector selector(boost::assign::list_of(0.1)(0.8));
// selector() will yield 
// '0' 10%,
// '1' 80%,
// '2' 10%
class ProbabilitySelector {
  std::vector<double> boundaries_;
  
public:
  ProbabilitySelector() {}
  ProbabilitySelector(const std::vector<double>& probabilities)
  {
    SetProbabilities(probabilities);
  }
  
  void SetProbabilities(const std::vector<double>& probabilities) {
    if (std::count_if(probabilities.begin(), probabilities.end(),
                      stdx::LessThan<double>(0.0))) {
      throw std::runtime_error("ProbabilitySelector: negative value found");
    }
    // assign range boundaries
    std::partial_sum(probabilities.begin(), probabilities.end(), 
                     std::back_inserter(boundaries_));
  }
  
  std::size_t operator()() const {
    double random = rand() / static_cast<double>(RAND_MAX);
    std::vector<double>::const_iterator index;
    index = std::upper_bound(boundaries_.begin(), boundaries_.end(), random);
    return std::distance(boundaries_.begin(), index);
  }
  
  bool Initialized() const { return !boundaries_.empty(); }
};

} // stats namespace