Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // peaks_over_threshold.hpp Chris@16: // Chris@16: // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost Chris@16: // Software License, Version 1.0. (See accompanying file Chris@16: // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: Chris@16: #ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006 Chris@16: #define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006 Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include // pow Chris@16: #include // stringstream Chris@16: #include // runtime_error Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: #ifdef _MSC_VER Chris@16: # pragma warning(push) Chris@16: # pragma warning(disable: 4127) // conditional expression is constant Chris@16: #endif Chris@16: Chris@16: namespace boost { namespace accumulators Chris@16: { Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // threshold_probability and threshold named parameters Chris@16: // Chris@16: BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value) Chris@16: BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability) Chris@16: Chris@16: BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_value) Chris@16: BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_probability) Chris@16: Chris@16: namespace impl Chris@16: { Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // peaks_over_threshold_impl Chris@16: // works with an explicit threshold value and does not depend on order statistics Chris@16: /** Chris@16: @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation Chris@16: Chris@16: According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of Chris@16: the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$ Chris@16: may be approximated by a generalized Pareto distribution Chris@16: \f[ Chris@16: G_{\xi,\beta}(x) = Chris@16: \left\{ Chris@16: \begin{array}{ll} Chris@16: \beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\ Chris@16: \beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0, Chris@16: \end{array} Chris@16: \right. Chris@16: \f] Chris@16: with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf. Chris@16: Hosking and Wallis (1987), Chris@16: \f[ Chris@16: \begin{array}{lll} Chris@16: \hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\ Chris@16: \hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right], Chris@16: \end{array} Chris@16: \f] Chris@16: \f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over Chris@16: the threshold \f$u\f$. Equivalently, the distribution function Chris@16: \f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by Chris@16: \f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$ Chris@16: can be written as Chris@16: \f[ Chris@16: F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u) Chris@16: \f] Chris@16: and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function Chris@16: \f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by Chris@16: \f[ Chris@16: \widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u). Chris@16: \f] Chris@16: It can be shown that \f$\widehat{F}(x)\f$ is a generalized Chris@16: Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$ Chris@16: and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$, Chris@16: one obtains an estimator for the \f$\alpha\f$-quantile, Chris@16: \f[ Chris@16: \hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right], Chris@16: \f] Chris@16: and similarly an estimator for the (coherent) tail mean, Chris@16: \f[ Chris@16: \widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi}, Chris@16: \f] Chris@16: cf. McNeil and Frey (2000). Chris@16: Chris@16: Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the Chris@16: \f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define Chris@16: the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are Chris@16: computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the Chris@16: correct result. Chris@16: Chris@16: For further details, see Chris@16: Chris@16: J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution, Chris@16: Technometrics, Volume 29, 1987, p. 339-349 Chris@16: Chris@16: A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series: Chris@16: an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300 Chris@16: Chris@16: @param quantile_probability Chris@16: @param pot_threshold_value Chris@16: */ Chris@16: template Chris@16: struct peaks_over_threshold_impl Chris@16: : accumulator_base Chris@16: { Chris@16: typedef typename numeric::functional::fdiv::result_type float_type; Chris@16: // for boost::result_of Chris@16: typedef boost::tuple result_type; Chris@16: // for left tail fitting, mirror the extreme values Chris@16: typedef mpl::int_::value ? -1 : 1> sign; Chris@16: Chris@16: template Chris@16: peaks_over_threshold_impl(Args const &args) Chris@16: : Nu_(0) Chris@16: , mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1)) Chris@16: , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) Chris@16: , threshold_(sign::value * args[pot_threshold_value]) Chris@16: , fit_parameters_(boost::make_tuple(0., 0., 0.)) Chris@16: , is_dirty_(true) Chris@16: { Chris@16: } Chris@16: Chris@16: template Chris@16: void operator ()(Args const &args) Chris@16: { Chris@16: this->is_dirty_ = true; Chris@16: Chris@16: if (sign::value * args[sample] > this->threshold_) Chris@16: { Chris@16: this->mu_ += args[sample]; Chris@16: this->sigma2_ += args[sample] * args[sample]; Chris@16: ++this->Nu_; Chris@16: } Chris@16: } Chris@16: Chris@16: template Chris@16: result_type result(Args const &args) const Chris@16: { Chris@16: if (this->is_dirty_) Chris@16: { Chris@16: this->is_dirty_ = false; Chris@16: Chris@16: std::size_t cnt = count(args); Chris@16: Chris@16: this->mu_ = sign::value * numeric::fdiv(this->mu_, this->Nu_); Chris@16: this->sigma2_ = numeric::fdiv(this->sigma2_, this->Nu_); Chris@16: this->sigma2_ -= this->mu_ * this->mu_; Chris@16: Chris@16: float_type threshold_probability = numeric::fdiv(cnt - this->Nu_, cnt); Chris@16: Chris@16: float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_); Chris@16: float_type xi_hat = 0.5 * ( 1. - tmp ); Chris@16: float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp ); Chris@16: float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat); Chris@16: float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat; Chris@16: this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat); Chris@16: } Chris@16: Chris@16: return this->fit_parameters_; Chris@16: } Chris@16: Chris@16: private: Chris@16: std::size_t Nu_; // number of samples larger than threshold Chris@16: mutable float_type mu_; // mean of Nu_ largest samples Chris@16: mutable float_type sigma2_; // variance of Nu_ largest samples Chris@16: float_type threshold_; Chris@16: mutable result_type fit_parameters_; // boost::tuple that stores fit parameters Chris@16: mutable bool is_dirty_; Chris@16: }; Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // peaks_over_threshold_prob_impl Chris@16: // determines threshold from a given threshold probability using order statistics Chris@16: /** Chris@16: @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation Chris@16: Chris@16: @sa peaks_over_threshold_impl Chris@16: Chris@16: @param quantile_probability Chris@16: @param pot_threshold_probability Chris@16: */ Chris@16: template Chris@16: struct peaks_over_threshold_prob_impl Chris@16: : accumulator_base Chris@16: { Chris@16: typedef typename numeric::functional::fdiv::result_type float_type; Chris@16: // for boost::result_of Chris@16: typedef boost::tuple result_type; Chris@16: // for left tail fitting, mirror the extreme values Chris@16: typedef mpl::int_::value ? -1 : 1> sign; Chris@16: Chris@16: template Chris@16: peaks_over_threshold_prob_impl(Args const &args) Chris@16: : mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1)) Chris@16: , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) Chris@16: , threshold_probability_(args[pot_threshold_probability]) Chris@16: , fit_parameters_(boost::make_tuple(0., 0., 0.)) Chris@16: , is_dirty_(true) Chris@16: { Chris@16: } Chris@16: Chris@16: void operator ()(dont_care) Chris@16: { Chris@16: this->is_dirty_ = true; Chris@16: } Chris@16: Chris@16: template Chris@16: result_type result(Args const &args) const Chris@16: { Chris@16: if (this->is_dirty_) Chris@16: { Chris@16: this->is_dirty_ = false; Chris@16: Chris@16: std::size_t cnt = count(args); Chris@16: Chris@16: // the n'th cached sample provides an approximate threshold value u Chris@16: std::size_t n = static_cast( Chris@16: std::ceil( Chris@16: cnt * ( ( is_same::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ ) Chris@16: ) Chris@16: ); Chris@16: Chris@16: // If n is in a valid range, return result, otherwise return NaN or throw exception Chris@16: if ( n >= static_cast(tail(args).size())) Chris@16: { Chris@16: if (std::numeric_limits::has_quiet_NaN) Chris@16: { Chris@16: return boost::make_tuple( Chris@16: std::numeric_limits::quiet_NaN() Chris@16: , std::numeric_limits::quiet_NaN() Chris@16: , std::numeric_limits::quiet_NaN() Chris@16: ); Chris@16: } Chris@16: else Chris@16: { Chris@16: std::ostringstream msg; Chris@16: msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")"; Chris@16: boost::throw_exception(std::runtime_error(msg.str())); Chris@16: return boost::make_tuple(Sample(0), Sample(0), Sample(0)); Chris@16: } Chris@16: } Chris@16: else Chris@16: { Chris@16: float_type u = *(tail(args).begin() + n - 1) * sign::value; Chris@16: Chris@16: // compute mean and variance of samples above/under threshold value u Chris@16: for (std::size_t i = 0; i < n; ++i) Chris@16: { Chris@16: mu_ += *(tail(args).begin() + i); Chris@16: sigma2_ += *(tail(args).begin() + i) * (*(tail(args).begin() + i)); Chris@16: } Chris@16: Chris@16: this->mu_ = sign::value * numeric::fdiv(this->mu_, n); Chris@16: this->sigma2_ = numeric::fdiv(this->sigma2_, n); Chris@16: this->sigma2_ -= this->mu_ * this->mu_; Chris@16: Chris@16: if (is_same::value) Chris@16: this->threshold_probability_ = 1. - this->threshold_probability_; Chris@16: Chris@16: float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_); Chris@16: float_type xi_hat = 0.5 * ( 1. - tmp ); Chris@16: float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp ); Chris@16: float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat); Chris@16: float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat; Chris@16: this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat); Chris@16: } Chris@16: } Chris@16: Chris@16: return this->fit_parameters_; Chris@16: } Chris@16: Chris@16: private: Chris@16: mutable float_type mu_; // mean of samples above threshold u Chris@16: mutable float_type sigma2_; // variance of samples above threshold u Chris@16: mutable float_type threshold_probability_; Chris@16: mutable result_type fit_parameters_; // boost::tuple that stores fit parameters Chris@16: mutable bool is_dirty_; Chris@16: }; Chris@16: Chris@16: } // namespace impl Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // tag::peaks_over_threshold Chris@16: // Chris@16: namespace tag Chris@16: { Chris@16: template Chris@16: struct peaks_over_threshold Chris@16: : depends_on Chris@16: , pot_threshold_value Chris@16: { Chris@16: /// INTERNAL ONLY Chris@16: /// Chris@16: typedef accumulators::impl::peaks_over_threshold_impl impl; Chris@16: }; Chris@16: Chris@16: template Chris@16: struct peaks_over_threshold_prob Chris@16: : depends_on > Chris@16: , pot_threshold_probability Chris@16: { Chris@16: /// INTERNAL ONLY Chris@16: /// Chris@16: typedef accumulators::impl::peaks_over_threshold_prob_impl impl; Chris@16: }; Chris@16: Chris@16: struct abstract_peaks_over_threshold Chris@16: : depends_on<> Chris@16: { Chris@16: }; Chris@16: } Chris@16: Chris@16: /////////////////////////////////////////////////////////////////////////////// Chris@16: // extract::peaks_over_threshold Chris@16: // Chris@16: namespace extract Chris@16: { Chris@16: extractor const peaks_over_threshold = {}; Chris@16: Chris@16: BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold) Chris@16: } Chris@16: Chris@16: using extract::peaks_over_threshold; Chris@16: Chris@16: // peaks_over_threshold(with_threshold_value) -> peaks_over_threshold Chris@16: template Chris@16: struct as_feature(with_threshold_value)> Chris@16: { Chris@16: typedef tag::peaks_over_threshold type; Chris@16: }; Chris@16: Chris@16: // peaks_over_threshold(with_threshold_probability) -> peaks_over_threshold_prob Chris@16: template Chris@16: struct as_feature(with_threshold_probability)> Chris@16: { Chris@16: typedef tag::peaks_over_threshold_prob type; Chris@16: }; Chris@16: Chris@16: template Chris@16: struct feature_of > Chris@16: : feature_of Chris@16: { Chris@16: }; Chris@16: Chris@16: template Chris@16: struct feature_of > Chris@16: : feature_of Chris@16: { Chris@16: }; Chris@16: Chris@16: // So that peaks_over_threshold can be automatically substituted Chris@16: // with weighted_peaks_over_threshold when the weight parameter is non-void. Chris@16: template Chris@16: struct as_weighted_feature > Chris@16: { Chris@16: typedef tag::weighted_peaks_over_threshold type; Chris@16: }; Chris@16: Chris@16: template Chris@16: struct feature_of > Chris@16: : feature_of > Chris@16: {}; Chris@16: Chris@16: // So that peaks_over_threshold_prob can be automatically substituted Chris@16: // with weighted_peaks_over_threshold_prob when the weight parameter is non-void. Chris@16: template Chris@16: struct as_weighted_feature > Chris@16: { Chris@16: typedef tag::weighted_peaks_over_threshold_prob type; Chris@16: }; Chris@16: Chris@16: template Chris@16: struct feature_of > Chris@16: : feature_of > Chris@16: {}; Chris@16: Chris@16: }} // namespace boost::accumulators Chris@16: Chris@16: #ifdef _MSC_VER Chris@16: # pragma warning(pop) Chris@16: #endif Chris@16: Chris@16: #endif