annotate DEPENDENCIES/generic/include/boost/accumulators/statistics/peaks_over_threshold.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents 2665513ce2d3
children
rev   line source
Chris@16 1 ///////////////////////////////////////////////////////////////////////////////
Chris@16 2 // peaks_over_threshold.hpp
Chris@16 3 //
Chris@16 4 // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
Chris@16 5 // Software License, Version 1.0. (See accompanying file
Chris@16 6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Chris@16 7
Chris@16 8 #ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
Chris@16 9 #define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
Chris@16 10
Chris@16 11 #include <vector>
Chris@16 12 #include <limits>
Chris@16 13 #include <numeric>
Chris@16 14 #include <functional>
Chris@16 15 #include <boost/config/no_tr1/cmath.hpp> // pow
Chris@16 16 #include <sstream> // stringstream
Chris@16 17 #include <stdexcept> // runtime_error
Chris@16 18 #include <boost/throw_exception.hpp>
Chris@16 19 #include <boost/range.hpp>
Chris@16 20 #include <boost/mpl/if.hpp>
Chris@16 21 #include <boost/mpl/int.hpp>
Chris@16 22 #include <boost/mpl/placeholders.hpp>
Chris@16 23 #include <boost/parameter/keyword.hpp>
Chris@16 24 #include <boost/tuple/tuple.hpp>
Chris@16 25 #include <boost/accumulators/accumulators_fwd.hpp>
Chris@16 26 #include <boost/accumulators/framework/accumulator_base.hpp>
Chris@16 27 #include <boost/accumulators/framework/extractor.hpp>
Chris@16 28 #include <boost/accumulators/numeric/functional.hpp>
Chris@16 29 #include <boost/accumulators/framework/parameters/sample.hpp>
Chris@16 30 #include <boost/accumulators/framework/depends_on.hpp>
Chris@16 31 #include <boost/accumulators/statistics_fwd.hpp>
Chris@16 32 #include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
Chris@16 33 #include <boost/accumulators/statistics/count.hpp>
Chris@16 34 #include <boost/accumulators/statistics/tail.hpp>
Chris@16 35
Chris@16 36 #ifdef _MSC_VER
Chris@16 37 # pragma warning(push)
Chris@16 38 # pragma warning(disable: 4127) // conditional expression is constant
Chris@16 39 #endif
Chris@16 40
Chris@16 41 namespace boost { namespace accumulators
Chris@16 42 {
Chris@16 43
Chris@16 44 ///////////////////////////////////////////////////////////////////////////////
Chris@16 45 // threshold_probability and threshold named parameters
Chris@16 46 //
Chris@16 47 BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value)
Chris@16 48 BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability)
Chris@16 49
Chris@16 50 BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_value)
Chris@16 51 BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_probability)
Chris@16 52
Chris@16 53 namespace impl
Chris@16 54 {
Chris@16 55 ///////////////////////////////////////////////////////////////////////////////
Chris@16 56 // peaks_over_threshold_impl
Chris@16 57 // works with an explicit threshold value and does not depend on order statistics
Chris@16 58 /**
Chris@16 59 @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
Chris@16 60
Chris@16 61 According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of
Chris@16 62 the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$
Chris@16 63 may be approximated by a generalized Pareto distribution
Chris@16 64 \f[
Chris@16 65 G_{\xi,\beta}(x) =
Chris@16 66 \left\{
Chris@16 67 \begin{array}{ll}
Chris@16 68 \beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\
Chris@16 69 \beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0,
Chris@16 70 \end{array}
Chris@16 71 \right.
Chris@16 72 \f]
Chris@16 73 with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf.
Chris@16 74 Hosking and Wallis (1987),
Chris@16 75 \f[
Chris@16 76 \begin{array}{lll}
Chris@16 77 \hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\
Chris@16 78 \hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right],
Chris@16 79 \end{array}
Chris@16 80 \f]
Chris@16 81 \f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over
Chris@16 82 the threshold \f$u\f$. Equivalently, the distribution function
Chris@16 83 \f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by
Chris@16 84 \f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$
Chris@16 85 can be written as
Chris@16 86 \f[
Chris@16 87 F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u)
Chris@16 88 \f]
Chris@16 89 and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function
Chris@16 90 \f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by
Chris@16 91 \f[
Chris@16 92 \widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u).
Chris@16 93 \f]
Chris@16 94 It can be shown that \f$\widehat{F}(x)\f$ is a generalized
Chris@16 95 Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$
Chris@16 96 and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$,
Chris@16 97 one obtains an estimator for the \f$\alpha\f$-quantile,
Chris@16 98 \f[
Chris@16 99 \hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right],
Chris@16 100 \f]
Chris@16 101 and similarly an estimator for the (coherent) tail mean,
Chris@16 102 \f[
Chris@16 103 \widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi},
Chris@16 104 \f]
Chris@16 105 cf. McNeil and Frey (2000).
Chris@16 106
Chris@16 107 Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the
Chris@16 108 \f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define
Chris@16 109 the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are
Chris@16 110 computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the
Chris@16 111 correct result.
Chris@16 112
Chris@16 113 For further details, see
Chris@16 114
Chris@16 115 J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution,
Chris@16 116 Technometrics, Volume 29, 1987, p. 339-349
Chris@16 117
Chris@16 118 A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series:
Chris@16 119 an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300
Chris@16 120
Chris@16 121 @param quantile_probability
Chris@16 122 @param pot_threshold_value
Chris@16 123 */
Chris@16 124 template<typename Sample, typename LeftRight>
Chris@16 125 struct peaks_over_threshold_impl
Chris@16 126 : accumulator_base
Chris@16 127 {
Chris@16 128 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
Chris@16 129 // for boost::result_of
Chris@16 130 typedef boost::tuple<float_type, float_type, float_type> result_type;
Chris@16 131 // for left tail fitting, mirror the extreme values
Chris@16 132 typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;
Chris@16 133
Chris@16 134 template<typename Args>
Chris@16 135 peaks_over_threshold_impl(Args const &args)
Chris@16 136 : Nu_(0)
Chris@16 137 , mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
Chris@16 138 , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
Chris@16 139 , threshold_(sign::value * args[pot_threshold_value])
Chris@16 140 , fit_parameters_(boost::make_tuple(0., 0., 0.))
Chris@16 141 , is_dirty_(true)
Chris@16 142 {
Chris@16 143 }
Chris@16 144
Chris@16 145 template<typename Args>
Chris@16 146 void operator ()(Args const &args)
Chris@16 147 {
Chris@16 148 this->is_dirty_ = true;
Chris@16 149
Chris@16 150 if (sign::value * args[sample] > this->threshold_)
Chris@16 151 {
Chris@16 152 this->mu_ += args[sample];
Chris@16 153 this->sigma2_ += args[sample] * args[sample];
Chris@16 154 ++this->Nu_;
Chris@16 155 }
Chris@16 156 }
Chris@16 157
Chris@16 158 template<typename Args>
Chris@16 159 result_type result(Args const &args) const
Chris@16 160 {
Chris@16 161 if (this->is_dirty_)
Chris@16 162 {
Chris@16 163 this->is_dirty_ = false;
Chris@16 164
Chris@16 165 std::size_t cnt = count(args);
Chris@16 166
Chris@16 167 this->mu_ = sign::value * numeric::fdiv(this->mu_, this->Nu_);
Chris@16 168 this->sigma2_ = numeric::fdiv(this->sigma2_, this->Nu_);
Chris@16 169 this->sigma2_ -= this->mu_ * this->mu_;
Chris@16 170
Chris@16 171 float_type threshold_probability = numeric::fdiv(cnt - this->Nu_, cnt);
Chris@16 172
Chris@16 173 float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_);
Chris@16 174 float_type xi_hat = 0.5 * ( 1. - tmp );
Chris@16 175 float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp );
Chris@16 176 float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat);
Chris@16 177 float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat;
Chris@16 178 this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
Chris@16 179 }
Chris@16 180
Chris@16 181 return this->fit_parameters_;
Chris@16 182 }
Chris@16 183
Chris@16 184 private:
Chris@16 185 std::size_t Nu_; // number of samples larger than threshold
Chris@16 186 mutable float_type mu_; // mean of Nu_ largest samples
Chris@16 187 mutable float_type sigma2_; // variance of Nu_ largest samples
Chris@16 188 float_type threshold_;
Chris@16 189 mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
Chris@16 190 mutable bool is_dirty_;
Chris@16 191 };
Chris@16 192
Chris@16 193 ///////////////////////////////////////////////////////////////////////////////
Chris@16 194 // peaks_over_threshold_prob_impl
Chris@16 195 // determines threshold from a given threshold probability using order statistics
Chris@16 196 /**
Chris@16 197 @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
Chris@16 198
Chris@16 199 @sa peaks_over_threshold_impl
Chris@16 200
Chris@16 201 @param quantile_probability
Chris@16 202 @param pot_threshold_probability
Chris@16 203 */
Chris@16 204 template<typename Sample, typename LeftRight>
Chris@16 205 struct peaks_over_threshold_prob_impl
Chris@16 206 : accumulator_base
Chris@16 207 {
Chris@16 208 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
Chris@16 209 // for boost::result_of
Chris@16 210 typedef boost::tuple<float_type, float_type, float_type> result_type;
Chris@16 211 // for left tail fitting, mirror the extreme values
Chris@16 212 typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;
Chris@16 213
Chris@16 214 template<typename Args>
Chris@16 215 peaks_over_threshold_prob_impl(Args const &args)
Chris@16 216 : mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
Chris@16 217 , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
Chris@16 218 , threshold_probability_(args[pot_threshold_probability])
Chris@16 219 , fit_parameters_(boost::make_tuple(0., 0., 0.))
Chris@16 220 , is_dirty_(true)
Chris@16 221 {
Chris@16 222 }
Chris@16 223
Chris@16 224 void operator ()(dont_care)
Chris@16 225 {
Chris@16 226 this->is_dirty_ = true;
Chris@16 227 }
Chris@16 228
Chris@16 229 template<typename Args>
Chris@16 230 result_type result(Args const &args) const
Chris@16 231 {
Chris@16 232 if (this->is_dirty_)
Chris@16 233 {
Chris@16 234 this->is_dirty_ = false;
Chris@16 235
Chris@16 236 std::size_t cnt = count(args);
Chris@16 237
Chris@16 238 // the n'th cached sample provides an approximate threshold value u
Chris@16 239 std::size_t n = static_cast<std::size_t>(
Chris@16 240 std::ceil(
Chris@16 241 cnt * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ )
Chris@16 242 )
Chris@16 243 );
Chris@16 244
Chris@16 245 // If n is in a valid range, return result, otherwise return NaN or throw exception
Chris@16 246 if ( n >= static_cast<std::size_t>(tail(args).size()))
Chris@16 247 {
Chris@16 248 if (std::numeric_limits<float_type>::has_quiet_NaN)
Chris@16 249 {
Chris@16 250 return boost::make_tuple(
Chris@16 251 std::numeric_limits<float_type>::quiet_NaN()
Chris@16 252 , std::numeric_limits<float_type>::quiet_NaN()
Chris@16 253 , std::numeric_limits<float_type>::quiet_NaN()
Chris@16 254 );
Chris@16 255 }
Chris@16 256 else
Chris@16 257 {
Chris@16 258 std::ostringstream msg;
Chris@16 259 msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")";
Chris@16 260 boost::throw_exception(std::runtime_error(msg.str()));
Chris@16 261 return boost::make_tuple(Sample(0), Sample(0), Sample(0));
Chris@16 262 }
Chris@16 263 }
Chris@16 264 else
Chris@16 265 {
Chris@16 266 float_type u = *(tail(args).begin() + n - 1) * sign::value;
Chris@16 267
Chris@16 268 // compute mean and variance of samples above/under threshold value u
Chris@16 269 for (std::size_t i = 0; i < n; ++i)
Chris@16 270 {
Chris@16 271 mu_ += *(tail(args).begin() + i);
Chris@16 272 sigma2_ += *(tail(args).begin() + i) * (*(tail(args).begin() + i));
Chris@16 273 }
Chris@16 274
Chris@16 275 this->mu_ = sign::value * numeric::fdiv(this->mu_, n);
Chris@16 276 this->sigma2_ = numeric::fdiv(this->sigma2_, n);
Chris@16 277 this->sigma2_ -= this->mu_ * this->mu_;
Chris@16 278
Chris@16 279 if (is_same<LeftRight, left>::value)
Chris@16 280 this->threshold_probability_ = 1. - this->threshold_probability_;
Chris@16 281
Chris@16 282 float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_);
Chris@16 283 float_type xi_hat = 0.5 * ( 1. - tmp );
Chris@16 284 float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp );
Chris@16 285 float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat);
Chris@16 286 float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat;
Chris@16 287 this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
Chris@16 288 }
Chris@16 289 }
Chris@16 290
Chris@16 291 return this->fit_parameters_;
Chris@16 292 }
Chris@16 293
Chris@16 294 private:
Chris@16 295 mutable float_type mu_; // mean of samples above threshold u
Chris@16 296 mutable float_type sigma2_; // variance of samples above threshold u
Chris@16 297 mutable float_type threshold_probability_;
Chris@16 298 mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
Chris@16 299 mutable bool is_dirty_;
Chris@16 300 };
Chris@16 301
Chris@16 302 } // namespace impl
Chris@16 303
Chris@16 304 ///////////////////////////////////////////////////////////////////////////////
Chris@16 305 // tag::peaks_over_threshold
Chris@16 306 //
Chris@16 307 namespace tag
Chris@16 308 {
Chris@16 309 template<typename LeftRight>
Chris@16 310 struct peaks_over_threshold
Chris@16 311 : depends_on<count>
Chris@16 312 , pot_threshold_value
Chris@16 313 {
Chris@16 314 /// INTERNAL ONLY
Chris@16 315 ///
Chris@16 316 typedef accumulators::impl::peaks_over_threshold_impl<mpl::_1, LeftRight> impl;
Chris@16 317 };
Chris@16 318
Chris@16 319 template<typename LeftRight>
Chris@16 320 struct peaks_over_threshold_prob
Chris@16 321 : depends_on<count, tail<LeftRight> >
Chris@16 322 , pot_threshold_probability
Chris@16 323 {
Chris@16 324 /// INTERNAL ONLY
Chris@16 325 ///
Chris@16 326 typedef accumulators::impl::peaks_over_threshold_prob_impl<mpl::_1, LeftRight> impl;
Chris@16 327 };
Chris@16 328
Chris@16 329 struct abstract_peaks_over_threshold
Chris@16 330 : depends_on<>
Chris@16 331 {
Chris@16 332 };
Chris@16 333 }
Chris@16 334
Chris@16 335 ///////////////////////////////////////////////////////////////////////////////
Chris@16 336 // extract::peaks_over_threshold
Chris@16 337 //
Chris@16 338 namespace extract
Chris@16 339 {
Chris@16 340 extractor<tag::abstract_peaks_over_threshold> const peaks_over_threshold = {};
Chris@16 341
Chris@16 342 BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold)
Chris@16 343 }
Chris@16 344
Chris@16 345 using extract::peaks_over_threshold;
Chris@16 346
Chris@16 347 // peaks_over_threshold<LeftRight>(with_threshold_value) -> peaks_over_threshold<LeftRight>
Chris@16 348 template<typename LeftRight>
Chris@16 349 struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_value)>
Chris@16 350 {
Chris@16 351 typedef tag::peaks_over_threshold<LeftRight> type;
Chris@16 352 };
Chris@16 353
Chris@16 354 // peaks_over_threshold<LeftRight>(with_threshold_probability) -> peaks_over_threshold_prob<LeftRight>
Chris@16 355 template<typename LeftRight>
Chris@16 356 struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_probability)>
Chris@16 357 {
Chris@16 358 typedef tag::peaks_over_threshold_prob<LeftRight> type;
Chris@16 359 };
Chris@16 360
Chris@16 361 template<typename LeftRight>
Chris@16 362 struct feature_of<tag::peaks_over_threshold<LeftRight> >
Chris@16 363 : feature_of<tag::abstract_peaks_over_threshold>
Chris@16 364 {
Chris@16 365 };
Chris@16 366
Chris@16 367 template<typename LeftRight>
Chris@16 368 struct feature_of<tag::peaks_over_threshold_prob<LeftRight> >
Chris@16 369 : feature_of<tag::abstract_peaks_over_threshold>
Chris@16 370 {
Chris@16 371 };
Chris@16 372
Chris@16 373 // So that peaks_over_threshold can be automatically substituted
Chris@16 374 // with weighted_peaks_over_threshold when the weight parameter is non-void.
Chris@16 375 template<typename LeftRight>
Chris@16 376 struct as_weighted_feature<tag::peaks_over_threshold<LeftRight> >
Chris@16 377 {
Chris@16 378 typedef tag::weighted_peaks_over_threshold<LeftRight> type;
Chris@16 379 };
Chris@16 380
Chris@16 381 template<typename LeftRight>
Chris@16 382 struct feature_of<tag::weighted_peaks_over_threshold<LeftRight> >
Chris@16 383 : feature_of<tag::peaks_over_threshold<LeftRight> >
Chris@16 384 {};
Chris@16 385
Chris@16 386 // So that peaks_over_threshold_prob can be automatically substituted
Chris@16 387 // with weighted_peaks_over_threshold_prob when the weight parameter is non-void.
Chris@16 388 template<typename LeftRight>
Chris@16 389 struct as_weighted_feature<tag::peaks_over_threshold_prob<LeftRight> >
Chris@16 390 {
Chris@16 391 typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type;
Chris@16 392 };
Chris@16 393
Chris@16 394 template<typename LeftRight>
Chris@16 395 struct feature_of<tag::weighted_peaks_over_threshold_prob<LeftRight> >
Chris@16 396 : feature_of<tag::peaks_over_threshold_prob<LeftRight> >
Chris@16 397 {};
Chris@16 398
Chris@16 399 }} // namespace boost::accumulators
Chris@16 400
Chris@16 401 #ifdef _MSC_VER
Chris@16 402 # pragma warning(pop)
Chris@16 403 #endif
Chris@16 404
Chris@16 405 #endif