Chris@16: // (C) Copyright John Maddock 2008. Chris@16: // Use, modification and distribution are subject to the Chris@16: // Boost Software License, Version 1.0. (See accompanying file Chris@16: // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Chris@16: Chris@16: #ifndef BOOST_MATH_SPECIAL_NEXT_HPP Chris@16: #define BOOST_MATH_SPECIAL_NEXT_HPP Chris@16: Chris@16: #ifdef _MSC_VER Chris@16: #pragma once Chris@16: #endif Chris@16: Chris@101: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: #include Chris@101: Chris@101: #if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3))) Chris@101: #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) Chris@101: #include "xmmintrin.h" Chris@101: #define BOOST_MATH_CHECK_SSE2 Chris@101: #endif Chris@16: #endif Chris@16: Chris@16: namespace boost{ namespace math{ Chris@16: Chris@16: namespace detail{ Chris@16: Chris@16: template Chris@16: inline T get_smallest_value(mpl::true_ const&) Chris@16: { Chris@16: // Chris@16: // numeric_limits lies about denorms being present - particularly Chris@16: // when this can be turned on or off at runtime, as is the case Chris@16: // when using the SSE2 registers in DAZ or FTZ mode. Chris@16: // Chris@16: static const T m = std::numeric_limits::denorm_min(); Chris@101: #ifdef BOOST_MATH_CHECK_SSE2 Chris@101: return (_mm_getcsr() & (_MM_FLUSH_ZERO_ON | 0x40)) ? tools::min_value() : m;; Chris@101: #else Chris@101: return ((tools::min_value() / 2) == 0) ? tools::min_value() : m; Chris@101: #endif Chris@16: } Chris@16: Chris@16: template Chris@16: inline T get_smallest_value(mpl::false_ const&) Chris@16: { Chris@16: return tools::min_value(); Chris@16: } Chris@16: Chris@16: template Chris@16: inline T get_smallest_value() Chris@16: { Chris@16: #if defined(BOOST_MSVC) && (BOOST_MSVC <= 1310) Chris@16: return get_smallest_value(mpl::bool_::is_specialized && (std::numeric_limits::has_denorm == 1)>()); Chris@16: #else Chris@16: return get_smallest_value(mpl::bool_::is_specialized && (std::numeric_limits::has_denorm == std::denorm_present)>()); Chris@16: #endif Chris@16: } Chris@16: Chris@16: // Chris@16: // Returns the smallest value that won't generate denorms when Chris@16: // we calculate the value of the least-significant-bit: Chris@16: // Chris@16: template Chris@16: T get_min_shift_value(); Chris@16: Chris@16: template Chris@16: struct min_shift_initializer Chris@16: { Chris@16: struct init Chris@16: { Chris@16: init() Chris@16: { Chris@16: do_init(); Chris@16: } Chris@16: static void do_init() Chris@16: { Chris@16: get_min_shift_value(); Chris@16: } Chris@16: void force_instantiate()const{} Chris@16: }; Chris@16: static const init initializer; Chris@16: static void force_instantiate() Chris@16: { Chris@16: initializer.force_instantiate(); Chris@16: } Chris@16: }; Chris@16: Chris@16: template Chris@16: const typename min_shift_initializer::init min_shift_initializer::initializer; Chris@16: Chris@16: Chris@16: template Chris@16: inline T get_min_shift_value() Chris@16: { Chris@16: BOOST_MATH_STD_USING Chris@16: static const T val = ldexp(tools::min_value(), tools::digits() + 1); Chris@16: min_shift_initializer::force_instantiate(); Chris@16: Chris@16: return val; Chris@16: } Chris@16: Chris@16: template Chris@16: T float_next_imp(const T& val, const Policy& pol) Chris@16: { Chris@16: BOOST_MATH_STD_USING Chris@16: int expon; Chris@16: static const char* function = "float_next<%1%>(%1%)"; Chris@16: Chris@16: int fpclass = (boost::math::fpclassify)(val); Chris@16: Chris@101: if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE)) Chris@16: { Chris@16: if(val < 0) Chris@16: return -tools::max_value(); Chris@16: return policies::raise_domain_error( Chris@16: function, Chris@16: "Argument must be finite, but got %1%", val, pol); Chris@16: } Chris@16: Chris@16: if(val >= tools::max_value()) Chris@16: return policies::raise_overflow_error(function, 0, pol); Chris@16: Chris@16: if(val == 0) Chris@16: return detail::get_smallest_value(); Chris@16: Chris@101: if((fpclass != (int)FP_SUBNORMAL) && (fpclass != (int)FP_ZERO) && (fabs(val) < detail::get_min_shift_value()) && (val != -tools::min_value())) Chris@16: { Chris@16: // Chris@16: // Special case: if the value of the least significant bit is a denorm, and the result Chris@16: // would not be a denorm, then shift the input, increment, and shift back. Chris@16: // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. Chris@16: // Chris@16: return ldexp(float_next(T(ldexp(val, 2 * tools::digits())), pol), -2 * tools::digits()); Chris@16: } Chris@16: Chris@16: if(-0.5f == frexp(val, &expon)) Chris@16: --expon; // reduce exponent when val is a power of two, and negative. Chris@16: T diff = ldexp(T(1), expon - tools::digits()); Chris@16: if(diff == 0) Chris@16: diff = detail::get_smallest_value(); Chris@16: return val + diff; Chris@16: } Chris@16: Chris@16: } Chris@16: Chris@16: template Chris@16: inline typename tools::promote_args::type float_next(const T& val, const Policy& pol) Chris@16: { Chris@16: typedef typename tools::promote_args::type result_type; Chris@16: return detail::float_next_imp(static_cast(val), pol); Chris@16: } Chris@16: Chris@16: #if 0 //def BOOST_MSVC Chris@16: // Chris@16: // We used to use ::_nextafter here, but doing so fails when using Chris@16: // the SSE2 registers if the FTZ or DAZ flags are set, so use our own Chris@16: // - albeit slower - code instead as at least that gives the correct answer. Chris@16: // Chris@16: template Chris@16: inline double float_next(const double& val, const Policy& pol) Chris@16: { Chris@16: static const char* function = "float_next<%1%>(%1%)"; Chris@16: Chris@16: if(!(boost::math::isfinite)(val) && (val > 0)) Chris@16: return policies::raise_domain_error( Chris@16: function, Chris@16: "Argument must be finite, but got %1%", val, pol); Chris@16: Chris@16: if(val >= tools::max_value()) Chris@16: return policies::raise_overflow_error(function, 0, pol); Chris@16: Chris@16: return ::_nextafter(val, tools::max_value()); Chris@16: } Chris@16: #endif Chris@16: Chris@16: template Chris@16: inline typename tools::promote_args::type float_next(const T& val) Chris@16: { Chris@16: return float_next(val, policies::policy<>()); Chris@16: } Chris@16: Chris@16: namespace detail{ Chris@16: Chris@16: template Chris@16: T float_prior_imp(const T& val, const Policy& pol) Chris@16: { Chris@16: BOOST_MATH_STD_USING Chris@16: int expon; Chris@16: static const char* function = "float_prior<%1%>(%1%)"; Chris@16: Chris@16: int fpclass = (boost::math::fpclassify)(val); Chris@16: Chris@101: if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE)) Chris@16: { Chris@16: if(val > 0) Chris@16: return tools::max_value(); Chris@16: return policies::raise_domain_error( Chris@16: function, Chris@16: "Argument must be finite, but got %1%", val, pol); Chris@16: } Chris@16: Chris@16: if(val <= -tools::max_value()) Chris@16: return -policies::raise_overflow_error(function, 0, pol); Chris@16: Chris@16: if(val == 0) Chris@16: return -detail::get_smallest_value(); Chris@16: Chris@101: if((fpclass != (int)FP_SUBNORMAL) && (fpclass != (int)FP_ZERO) && (fabs(val) < detail::get_min_shift_value()) && (val != tools::min_value())) Chris@16: { Chris@16: // Chris@16: // Special case: if the value of the least significant bit is a denorm, and the result Chris@16: // would not be a denorm, then shift the input, increment, and shift back. Chris@16: // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. Chris@16: // Chris@16: return ldexp(float_prior(T(ldexp(val, 2 * tools::digits())), pol), -2 * tools::digits()); Chris@16: } Chris@16: Chris@16: T remain = frexp(val, &expon); Chris@16: if(remain == 0.5) Chris@16: --expon; // when val is a power of two we must reduce the exponent Chris@16: T diff = ldexp(T(1), expon - tools::digits()); Chris@16: if(diff == 0) Chris@16: diff = detail::get_smallest_value(); Chris@16: return val - diff; Chris@16: } Chris@16: Chris@16: } Chris@16: Chris@16: template Chris@16: inline typename tools::promote_args::type float_prior(const T& val, const Policy& pol) Chris@16: { Chris@16: typedef typename tools::promote_args::type result_type; Chris@16: return detail::float_prior_imp(static_cast(val), pol); Chris@16: } Chris@16: Chris@16: #if 0 //def BOOST_MSVC Chris@16: // Chris@16: // We used to use ::_nextafter here, but doing so fails when using Chris@16: // the SSE2 registers if the FTZ or DAZ flags are set, so use our own Chris@16: // - albeit slower - code instead as at least that gives the correct answer. Chris@16: // Chris@16: template Chris@16: inline double float_prior(const double& val, const Policy& pol) Chris@16: { Chris@16: static const char* function = "float_prior<%1%>(%1%)"; Chris@16: Chris@16: if(!(boost::math::isfinite)(val) && (val < 0)) Chris@16: return policies::raise_domain_error( Chris@16: function, Chris@16: "Argument must be finite, but got %1%", val, pol); Chris@16: Chris@16: if(val <= -tools::max_value()) Chris@16: return -policies::raise_overflow_error(function, 0, pol); Chris@16: Chris@16: return ::_nextafter(val, -tools::max_value()); Chris@16: } Chris@16: #endif Chris@16: Chris@16: template Chris@16: inline typename tools::promote_args::type float_prior(const T& val) Chris@16: { Chris@16: return float_prior(val, policies::policy<>()); Chris@16: } Chris@16: Chris@16: template Chris@16: inline typename tools::promote_args::type nextafter(const T& val, const U& direction, const Policy& pol) Chris@16: { Chris@16: typedef typename tools::promote_args::type result_type; Chris@16: return val < direction ? boost::math::float_next(val, pol) : val == direction ? val : boost::math::float_prior(val, pol); Chris@16: } Chris@16: Chris@16: template Chris@16: inline typename tools::promote_args::type nextafter(const T& val, const U& direction) Chris@16: { Chris@16: return nextafter(val, direction, policies::policy<>()); Chris@16: } Chris@16: Chris@16: namespace detail{ Chris@16: Chris@16: template Chris@16: T float_distance_imp(const T& a, const T& b, const Policy& pol) Chris@16: { Chris@16: BOOST_MATH_STD_USING Chris@16: // Chris@16: // Error handling: Chris@16: // Chris@16: static const char* function = "float_distance<%1%>(%1%, %1%)"; Chris@16: if(!(boost::math::isfinite)(a)) Chris@16: return policies::raise_domain_error( Chris@16: function, Chris@16: "Argument a must be finite, but got %1%", a, pol); Chris@16: if(!(boost::math::isfinite)(b)) Chris@16: return policies::raise_domain_error( Chris@16: function, Chris@16: "Argument b must be finite, but got %1%", b, pol); Chris@16: // Chris@16: // Special cases: Chris@16: // Chris@16: if(a > b) Chris@16: return -float_distance(b, a, pol); Chris@16: if(a == b) Chris@16: return 0; Chris@16: if(a == 0) Chris@16: return 1 + fabs(float_distance(static_cast((b < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), b, pol)); Chris@16: if(b == 0) Chris@16: return 1 + fabs(float_distance(static_cast((a < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), a, pol)); Chris@16: if(boost::math::sign(a) != boost::math::sign(b)) Chris@16: return 2 + fabs(float_distance(static_cast((b < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), b, pol)) Chris@16: + fabs(float_distance(static_cast((a < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), a, pol)); Chris@16: // Chris@16: // By the time we get here, both a and b must have the same sign, we want Chris@16: // b > a and both postive for the following logic: Chris@16: // Chris@16: if(a < 0) Chris@16: return float_distance(static_cast(-b), static_cast(-a), pol); Chris@16: Chris@16: BOOST_ASSERT(a >= 0); Chris@16: BOOST_ASSERT(b >= a); Chris@16: Chris@16: int expon; Chris@16: // Chris@16: // Note that if a is a denorm then the usual formula fails Chris@16: // because we actually have fewer than tools::digits() Chris@16: // significant bits in the representation: Chris@16: // Chris@101: frexp(((boost::math::fpclassify)(a) == (int)FP_SUBNORMAL) ? tools::min_value() : a, &expon); Chris@16: T upper = ldexp(T(1), expon); Chris@16: T result = 0; Chris@16: expon = tools::digits() - expon; Chris@16: // Chris@16: // If b is greater than upper, then we *must* split the calculation Chris@16: // as the size of the ULP changes with each order of magnitude change: Chris@16: // Chris@16: if(b > upper) Chris@16: { Chris@16: result = float_distance(upper, b); Chris@16: } Chris@16: // Chris@16: // Use compensated double-double addition to avoid rounding Chris@16: // errors in the subtraction: Chris@16: // Chris@16: T mb, x, y, z; Chris@101: if(((boost::math::fpclassify)(a) == (int)FP_SUBNORMAL) || (b - a < tools::min_value())) Chris@16: { Chris@16: // Chris@16: // Special case - either one end of the range is a denormal, or else the difference is. Chris@16: // The regular code will fail if we're using the SSE2 registers on Intel and either Chris@16: // the FTZ or DAZ flags are set. Chris@16: // Chris@16: T a2 = ldexp(a, tools::digits()); Chris@16: T b2 = ldexp(b, tools::digits()); Chris@16: mb = -(std::min)(T(ldexp(upper, tools::digits())), b2); Chris@16: x = a2 + mb; Chris@16: z = x - a2; Chris@16: y = (a2 - (x - z)) + (mb - z); Chris@16: Chris@16: expon -= tools::digits(); Chris@16: } Chris@16: else Chris@16: { Chris@16: mb = -(std::min)(upper, b); Chris@16: x = a + mb; Chris@16: z = x - a; Chris@16: y = (a - (x - z)) + (mb - z); Chris@16: } Chris@16: if(x < 0) Chris@16: { Chris@16: x = -x; Chris@16: y = -y; Chris@16: } Chris@16: result += ldexp(x, expon) + ldexp(y, expon); Chris@16: // Chris@16: // Result must be an integer: Chris@16: // Chris@16: BOOST_ASSERT(result == floor(result)); Chris@16: return result; Chris@16: } Chris@16: Chris@16: } Chris@16: Chris@16: template Chris@16: inline typename tools::promote_args::type float_distance(const T& a, const U& b, const Policy& pol) Chris@16: { Chris@16: typedef typename tools::promote_args::type result_type; Chris@16: return detail::float_distance_imp(static_cast(a), static_cast(b), pol); Chris@16: } Chris@16: Chris@16: template Chris@16: typename tools::promote_args::type float_distance(const T& a, const U& b) Chris@16: { Chris@16: return boost::math::float_distance(a, b, policies::policy<>()); Chris@16: } Chris@16: Chris@16: namespace detail{ Chris@16: Chris@16: template Chris@16: T float_advance_imp(T val, int distance, const Policy& pol) Chris@16: { Chris@16: BOOST_MATH_STD_USING Chris@16: // Chris@16: // Error handling: Chris@16: // Chris@16: static const char* function = "float_advance<%1%>(%1%, int)"; Chris@16: Chris@16: int fpclass = (boost::math::fpclassify)(val); Chris@16: Chris@101: if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE)) Chris@16: return policies::raise_domain_error( Chris@16: function, Chris@16: "Argument val must be finite, but got %1%", val, pol); Chris@16: Chris@16: if(val < 0) Chris@16: return -float_advance(-val, -distance, pol); Chris@16: if(distance == 0) Chris@16: return val; Chris@16: if(distance == 1) Chris@16: return float_next(val, pol); Chris@16: if(distance == -1) Chris@16: return float_prior(val, pol); Chris@16: Chris@16: if(fabs(val) < detail::get_min_shift_value()) Chris@16: { Chris@16: // Chris@16: // Special case: if the value of the least significant bit is a denorm, Chris@16: // implement in terms of float_next/float_prior. Chris@16: // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. Chris@16: // Chris@16: if(distance > 0) Chris@16: { Chris@16: do{ val = float_next(val, pol); } while(--distance); Chris@16: } Chris@16: else Chris@16: { Chris@16: do{ val = float_prior(val, pol); } while(++distance); Chris@16: } Chris@16: return val; Chris@16: } Chris@16: Chris@16: int expon; Chris@16: frexp(val, &expon); Chris@16: T limit = ldexp((distance < 0 ? T(0.5f) : T(1)), expon); Chris@16: if(val <= tools::min_value()) Chris@16: { Chris@16: limit = sign(T(distance)) * tools::min_value(); Chris@16: } Chris@16: T limit_distance = float_distance(val, limit); Chris@16: while(fabs(limit_distance) < abs(distance)) Chris@16: { Chris@16: distance -= itrunc(limit_distance); Chris@16: val = limit; Chris@16: if(distance < 0) Chris@16: { Chris@16: limit /= 2; Chris@16: expon--; Chris@16: } Chris@16: else Chris@16: { Chris@16: limit *= 2; Chris@16: expon++; Chris@16: } Chris@16: limit_distance = float_distance(val, limit); Chris@16: if(distance && (limit_distance == 0)) Chris@16: { Chris@101: return policies::raise_evaluation_error(function, "Internal logic failed while trying to increment floating point value %1%: most likely your FPU is in non-IEEE conforming mode.", val, pol); Chris@16: } Chris@16: } Chris@16: if((0.5f == frexp(val, &expon)) && (distance < 0)) Chris@16: --expon; Chris@16: T diff = 0; Chris@16: if(val != 0) Chris@16: diff = distance * ldexp(T(1), expon - tools::digits()); Chris@16: if(diff == 0) Chris@16: diff = distance * detail::get_smallest_value(); Chris@16: return val += diff; Chris@16: } Chris@16: Chris@16: } Chris@16: Chris@16: template Chris@16: inline typename tools::promote_args::type float_advance(T val, int distance, const Policy& pol) Chris@16: { Chris@16: typedef typename tools::promote_args::type result_type; Chris@16: return detail::float_advance_imp(static_cast(val), distance, pol); Chris@16: } Chris@16: Chris@16: template Chris@16: inline typename tools::promote_args::type float_advance(const T& val, int distance) Chris@16: { Chris@16: return boost::math::float_advance(val, distance, policies::policy<>()); Chris@16: } Chris@16: Chris@16: }} // namespaces Chris@16: Chris@16: #endif // BOOST_MATH_SPECIAL_NEXT_HPP Chris@16: