annotate DEPENDENCIES/generic/include/boost/uuid/detail/uuid_x86.hpp @ 133:4acb5d8d80b6 tip

Don't fail environmental check if README.md exists (but .txt and no-suffix don't)
author Chris Cannam
date Tue, 30 Jul 2019 12:25:44 +0100
parents f46d142149f5
children
rev   line source
Chris@102 1 /*
Chris@102 2 * Copyright Andrey Semashev 2013.
Chris@102 3 * Distributed under the Boost Software License, Version 1.0.
Chris@102 4 * (See accompanying file LICENSE_1_0.txt or copy at
Chris@102 5 * http://www.boost.org/LICENSE_1_0.txt)
Chris@102 6 */
Chris@102 7 /*!
Chris@102 8 * \file uuid/detail/uuid_x86.hpp
Chris@102 9 *
Chris@102 10 * \brief This header contains optimized SSE implementation of \c boost::uuid operations.
Chris@102 11 */
Chris@102 12
Chris@102 13 #ifndef BOOST_UUID_DETAIL_UUID_X86_HPP_INCLUDED_
Chris@102 14 #define BOOST_UUID_DETAIL_UUID_X86_HPP_INCLUDED_
Chris@102 15
Chris@102 16 // MSVC does not always have immintrin.h (at least, not up to MSVC 10), so include the appropriate header for each instruction set
Chris@102 17 #if defined(BOOST_UUID_USE_SSE41)
Chris@102 18 #include <smmintrin.h>
Chris@102 19 #elif defined(BOOST_UUID_USE_SSE3)
Chris@102 20 #include <pmmintrin.h>
Chris@102 21 #else
Chris@102 22 #include <emmintrin.h>
Chris@102 23 #endif
Chris@102 24
Chris@102 25 namespace boost {
Chris@102 26 namespace uuids {
Chris@102 27 namespace detail {
Chris@102 28
Chris@102 29 BOOST_FORCEINLINE __m128i load_unaligned_si128(const uint8_t* p) BOOST_NOEXCEPT
Chris@102 30 {
Chris@102 31 #if defined(BOOST_UUID_USE_SSE3)
Chris@102 32 return _mm_lddqu_si128(reinterpret_cast< const __m128i* >(p));
Chris@102 33 #else
Chris@102 34 return _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
Chris@102 35 #endif
Chris@102 36 }
Chris@102 37
Chris@102 38 } // namespace detail
Chris@102 39
Chris@102 40 inline bool uuid::is_nil() const BOOST_NOEXCEPT
Chris@102 41 {
Chris@102 42 register __m128i mm = uuids::detail::load_unaligned_si128(data);
Chris@102 43 #if defined(BOOST_UUID_USE_SSE41)
Chris@102 44 return _mm_test_all_zeros(mm, mm) != 0;
Chris@102 45 #else
Chris@102 46 mm = _mm_cmpeq_epi8(mm, _mm_setzero_si128());
Chris@102 47 return _mm_movemask_epi8(mm) == 0xFFFF;
Chris@102 48 #endif
Chris@102 49 }
Chris@102 50
Chris@102 51 inline void uuid::swap(uuid& rhs) BOOST_NOEXCEPT
Chris@102 52 {
Chris@102 53 register __m128i mm_this = uuids::detail::load_unaligned_si128(data);
Chris@102 54 register __m128i mm_rhs = uuids::detail::load_unaligned_si128(rhs.data);
Chris@102 55 _mm_storeu_si128(reinterpret_cast< __m128i* >(rhs.data), mm_this);
Chris@102 56 _mm_storeu_si128(reinterpret_cast< __m128i* >(data), mm_rhs);
Chris@102 57 }
Chris@102 58
Chris@102 59 inline bool operator== (uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT
Chris@102 60 {
Chris@102 61 register __m128i mm_left = uuids::detail::load_unaligned_si128(lhs.data);
Chris@102 62 register __m128i mm_right = uuids::detail::load_unaligned_si128(rhs.data);
Chris@102 63
Chris@102 64 register __m128i mm_cmp = _mm_cmpeq_epi32(mm_left, mm_right);
Chris@102 65 #if defined(BOOST_UUID_USE_SSE41)
Chris@102 66 return _mm_test_all_ones(mm_cmp);
Chris@102 67 #else
Chris@102 68 return _mm_movemask_epi8(mm_cmp) == 0xFFFF;
Chris@102 69 #endif
Chris@102 70 }
Chris@102 71
Chris@102 72 inline bool operator< (uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT
Chris@102 73 {
Chris@102 74 register __m128i mm_left = uuids::detail::load_unaligned_si128(lhs.data);
Chris@102 75 register __m128i mm_right = uuids::detail::load_unaligned_si128(rhs.data);
Chris@102 76
Chris@102 77 // To emulate lexicographical_compare behavior we have to perform two comparisons - the forward and reverse one.
Chris@102 78 // Then we know which bytes are equivalent and which ones are different, and for those different the comparison results
Chris@102 79 // will be opposite. Then we'll be able to find the first differing comparison result (for both forward and reverse ways),
Chris@102 80 // and depending on which way it is for, this will be the result of the operation. There are a few notes to consider:
Chris@102 81 //
Chris@102 82 // 1. Due to little endian byte order the first bytes go into the lower part of the xmm registers,
Chris@102 83 // so the comparison results in the least significant bits will actually be the most signigicant for the final operation result.
Chris@102 84 // This means we have to determine which of the comparison results have the least significant bit on, and this is achieved with
Chris@102 85 // the "(x - 1) ^ x" trick.
Chris@102 86 // 2. Because there is only signed comparison in SSE/AVX, we have to invert byte comparison results whenever signs of the corresponding
Chris@102 87 // bytes are different. I.e. in signed comparison it's -1 < 1, but in unsigned it is the opposite (255 > 1). To do that we XOR left and right,
Chris@102 88 // making the most significant bit of each byte 1 if the signs are different, and later apply this mask with another XOR to the comparison results.
Chris@102 89 // 3. pcmpgtw compares for "greater" relation, so we swap the arguments to get what we need.
Chris@102 90
Chris@102 91 const __m128i mm_signs_mask = _mm_xor_si128(mm_left, mm_right);
Chris@102 92
Chris@102 93 __m128i mm_cmp = _mm_cmpgt_epi8(mm_right, mm_left), mm_rcmp = _mm_cmpgt_epi8(mm_left, mm_right);
Chris@102 94
Chris@102 95 mm_cmp = _mm_xor_si128(mm_signs_mask, mm_cmp);
Chris@102 96 mm_rcmp = _mm_xor_si128(mm_signs_mask, mm_rcmp);
Chris@102 97
Chris@102 98 uint32_t cmp = static_cast< uint32_t >(_mm_movemask_epi8(mm_cmp)), rcmp = static_cast< uint32_t >(_mm_movemask_epi8(mm_rcmp));
Chris@102 99
Chris@102 100 cmp = (cmp - 1u) ^ cmp;
Chris@102 101 rcmp = (rcmp - 1u) ^ rcmp;
Chris@102 102
Chris@102 103 return static_cast< uint16_t >(cmp) < static_cast< uint16_t >(rcmp);
Chris@102 104 }
Chris@102 105
Chris@102 106 } // namespace uuids
Chris@102 107 } // namespace boost
Chris@102 108
Chris@102 109 #endif // BOOST_UUID_DETAIL_UUID_X86_HPP_INCLUDED_