Chris@102: /* Chris@102: * Distributed under the Boost Software License, Version 1.0. Chris@102: * (See accompanying file LICENSE_1_0.txt or copy at Chris@102: * http://www.boost.org/LICENSE_1_0.txt) Chris@102: * Chris@102: * Copyright (c) 2009 Helge Bahmann Chris@102: * Copyright (c) 2012 Tim Blechmann Chris@102: * Copyright (c) 2014 Andrey Semashev Chris@102: */ Chris@102: /*! Chris@102: * \file atomic/detail/ops_gcc_x86_dcas.hpp Chris@102: * Chris@102: * This header contains implementation of the double-width CAS primitive for x86. Chris@102: */ Chris@102: Chris@102: #ifndef BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ Chris@102: #define BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ Chris@102: Chris@102: #include Chris@102: #include Chris@102: #include Chris@102: #include Chris@102: #include Chris@102: Chris@102: #ifdef BOOST_HAS_PRAGMA_ONCE Chris@102: #pragma once Chris@102: #endif Chris@102: Chris@102: namespace boost { Chris@102: namespace atomics { Chris@102: namespace detail { Chris@102: Chris@102: #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) Chris@102: Chris@102: template< bool Signed > Chris@102: struct gcc_dcas_x86 Chris@102: { Chris@102: typedef typename make_storage_type< 8u, Signed >::type storage_type; Chris@102: Chris@102: static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT Chris@102: { Chris@102: if ((((uint32_t)&storage) & 0x00000007) == 0) Chris@102: { Chris@102: #if defined(__SSE2__) Chris@102: __asm__ __volatile__ Chris@102: ( Chris@102: #if defined(__AVX__) Chris@102: "vmovq %1, %%xmm4\n\t" Chris@102: "vmovq %%xmm4, %0\n\t" Chris@102: #else Chris@102: "movq %1, %%xmm4\n\t" Chris@102: "movq %%xmm4, %0\n\t" Chris@102: #endif Chris@102: : "=m" (storage) Chris@102: : "m" (v) Chris@102: : "memory", "xmm4" Chris@102: ); Chris@102: #else Chris@102: __asm__ __volatile__ Chris@102: ( Chris@102: "fildll %1\n\t" Chris@102: "fistpll %0\n\t" Chris@102: : "=m" (storage) Chris@102: : "m" (v) Chris@102: : "memory" Chris@102: ); Chris@102: #endif Chris@102: } Chris@102: else Chris@102: { Chris@102: #if defined(__PIC__) Chris@102: uint32_t scratch; Chris@102: __asm__ __volatile__ Chris@102: ( Chris@102: "movl %%ebx, %[scratch]\n\t" Chris@102: "movl %[value_lo], %%ebx\n\t" Chris@102: "movl 0(%[dest]), %%eax\n\t" Chris@102: "movl 4(%[dest]), %%edx\n\t" Chris@102: ".align 16\n\t" Chris@102: "1: lock; cmpxchg8b 0(%[dest])\n\t" Chris@102: "jne 1b\n\t" Chris@102: "movl %[scratch], %%ebx" Chris@102: #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) Chris@102: : [scratch] "=m,m" (scratch) Chris@102: : [value_lo] "a,a" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage) Chris@102: #else Chris@102: : [scratch] "=m" (scratch) Chris@102: : [value_lo] "a" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) Chris@102: #endif Chris@102: : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory" Chris@102: ); Chris@102: #else Chris@102: __asm__ __volatile__ Chris@102: ( Chris@102: "movl 0(%[dest]), %%eax\n\t" Chris@102: "movl 4(%[dest]), %%edx\n\t" Chris@102: ".align 16\n\t" Chris@102: "1: lock; cmpxchg8b 0(%[dest])\n\t" Chris@102: "jne 1b\n\t" Chris@102: : Chris@102: #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) Chris@102: : [value_lo] "b,b" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage) Chris@102: #else Chris@102: : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) Chris@102: #endif Chris@102: : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory" Chris@102: ); Chris@102: #endif Chris@102: } Chris@102: } Chris@102: Chris@102: static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT Chris@102: { Chris@102: storage_type value; Chris@102: Chris@102: if ((((uint32_t)&storage) & 0x00000007) == 0) Chris@102: { Chris@102: #if defined(__SSE2__) Chris@102: __asm__ __volatile__ Chris@102: ( Chris@102: #if defined(__AVX__) Chris@102: "vmovq %1, %%xmm4\n\t" Chris@102: "vmovq %%xmm4, %0\n\t" Chris@102: #else Chris@102: "movq %1, %%xmm4\n\t" Chris@102: "movq %%xmm4, %0\n\t" Chris@102: #endif Chris@102: : "=m" (value) Chris@102: : "m" (storage) Chris@102: : "memory", "xmm4" Chris@102: ); Chris@102: #else Chris@102: __asm__ __volatile__ Chris@102: ( Chris@102: "fildll %1\n\t" Chris@102: "fistpll %0\n\t" Chris@102: : "=m" (value) Chris@102: : "m" (storage) Chris@102: : "memory" Chris@102: ); Chris@102: #endif Chris@102: } Chris@102: else Chris@102: { Chris@102: #if defined(__clang__) Chris@102: // Clang cannot allocate eax:edx register pairs but it has sync intrinsics Chris@102: value = __sync_val_compare_and_swap(&storage, (storage_type)0, (storage_type)0); Chris@102: #else Chris@102: // We don't care for comparison result here; the previous value will be stored into value anyway. Chris@102: // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b. Chris@102: __asm__ __volatile__ Chris@102: ( Chris@102: "movl %%ebx, %%eax\n\t" Chris@102: "movl %%ecx, %%edx\n\t" Chris@102: "lock; cmpxchg8b %[storage]" Chris@102: : "=&A" (value) Chris@102: : [storage] "m" (storage) Chris@102: : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" Chris@102: ); Chris@102: #endif Chris@102: } Chris@102: Chris@102: return value; Chris@102: } Chris@102: Chris@102: static BOOST_FORCEINLINE bool compare_exchange_strong( Chris@102: storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT Chris@102: { Chris@102: #if defined(__clang__) Chris@102: // Clang cannot allocate eax:edx register pairs but it has sync intrinsics Chris@102: storage_type old_expected = expected; Chris@102: expected = __sync_val_compare_and_swap(&storage, old_expected, desired); Chris@102: return expected == old_expected; Chris@102: #elif defined(__PIC__) Chris@102: // Make sure ebx is saved and restored properly in case Chris@102: // of position independent code. To make this work Chris@102: // setup register constraints such that ebx can not be Chris@102: // used by accident e.g. as base address for the variable Chris@102: // to be modified. Accessing "scratch" should always be okay, Chris@102: // as it can only be placed on the stack (and therefore Chris@102: // accessed through ebp or esp only). Chris@102: // Chris@102: // In theory, could push/pop ebx onto/off the stack, but movs Chris@102: // to a prepared stack slot turn out to be faster. Chris@102: Chris@102: uint32_t scratch; Chris@102: bool success; Chris@102: __asm__ __volatile__ Chris@102: ( Chris@102: "movl %%ebx, %[scratch]\n\t" Chris@102: "movl %[desired_lo], %%ebx\n\t" Chris@102: "lock; cmpxchg8b %[dest]\n\t" Chris@102: "movl %[scratch], %%ebx\n\t" Chris@102: "sete %[success]" Chris@102: #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) Chris@102: : "+A,A,A,A,A,A" (expected), [dest] "+m,m,m,m,m,m" (storage), [scratch] "=m,m,m,m,m,m" (scratch), [success] "=q,m,q,m,q,m" (success) Chris@102: : [desired_lo] "S,S,D,D,m,m" ((uint32_t)desired), "c,c,c,c,c,c" ((uint32_t)(desired >> 32)) Chris@102: #else Chris@102: : "+A" (expected), [dest] "+m" (storage), [scratch] "=m" (scratch), [success] "=q" (success) Chris@102: : [desired_lo] "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) Chris@102: #endif Chris@102: : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" Chris@102: ); Chris@102: return success; Chris@102: #else Chris@102: bool success; Chris@102: __asm__ __volatile__ Chris@102: ( Chris@102: "lock; cmpxchg8b %[dest]\n\t" Chris@102: "sete %[success]" Chris@102: #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) Chris@102: : "+A,A" (expected), [dest] "+m,m" (storage), [success] "=q,m" (success) Chris@102: : "b,b" ((uint32_t)desired), "c,c" ((uint32_t)(desired >> 32)) Chris@102: #else Chris@102: : "+A" (expected), [dest] "+m" (storage), [success] "=q" (success) Chris@102: : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) Chris@102: #endif Chris@102: : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" Chris@102: ); Chris@102: return success; Chris@102: #endif Chris@102: } Chris@102: Chris@102: static BOOST_FORCEINLINE bool compare_exchange_weak( Chris@102: storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT Chris@102: { Chris@102: return compare_exchange_strong(storage, expected, desired, success_order, failure_order); Chris@102: } Chris@102: Chris@102: static BOOST_FORCEINLINE bool is_lock_free(storage_type const volatile&) BOOST_NOEXCEPT Chris@102: { Chris@102: return true; Chris@102: } Chris@102: }; Chris@102: Chris@102: #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) Chris@102: Chris@102: #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) Chris@102: Chris@102: template< bool Signed > Chris@102: struct gcc_dcas_x86_64 Chris@102: { Chris@102: typedef typename make_storage_type< 16u, Signed >::type storage_type; Chris@102: Chris@102: static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT Chris@102: { Chris@102: uint64_t const* p_value = (uint64_t const*)&v; Chris@102: __asm__ __volatile__ Chris@102: ( Chris@102: "movq 0(%[dest]), %%rax\n\t" Chris@102: "movq 8(%[dest]), %%rdx\n\t" Chris@102: ".align 16\n\t" Chris@102: "1: lock; cmpxchg16b 0(%[dest])\n\t" Chris@102: "jne 1b" Chris@102: : Chris@102: : "b" (p_value[0]), "c" (p_value[1]), [dest] "r" (&storage) Chris@102: : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory" Chris@102: ); Chris@102: } Chris@102: Chris@102: static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT Chris@102: { Chris@102: #if defined(__clang__) Chris@102: // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics Chris@102: storage_type value = storage_type(); Chris@102: return __sync_val_compare_and_swap(&storage, value, value); Chris@102: #else Chris@102: storage_type value; Chris@102: Chris@102: // We don't care for comparison result here; the previous value will be stored into value anyway. Chris@102: // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b. Chris@102: __asm__ __volatile__ Chris@102: ( Chris@102: "movq %%rbx, %%rax\n\t" Chris@102: "movq %%rcx, %%rdx\n\t" Chris@102: "lock; cmpxchg16b %[storage]" Chris@102: : "=&A" (value) Chris@102: : [storage] "m" (storage) Chris@102: : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" Chris@102: ); Chris@102: Chris@102: return value; Chris@102: #endif Chris@102: } Chris@102: Chris@102: static BOOST_FORCEINLINE bool compare_exchange_strong( Chris@102: storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT Chris@102: { Chris@102: #if defined(__clang__) Chris@102: // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics Chris@102: storage_type old_expected = expected; Chris@102: expected = __sync_val_compare_and_swap(&storage, old_expected, desired); Chris@102: return expected == old_expected; Chris@102: #else Chris@102: uint64_t const* p_desired = (uint64_t const*)&desired; Chris@102: bool success; Chris@102: __asm__ __volatile__ Chris@102: ( Chris@102: "lock; cmpxchg16b %[dest]\n\t" Chris@102: "sete %[success]" Chris@102: #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) Chris@102: : "+A,A" (expected), [dest] "+m,m" (storage), [success] "=q,m" (success) Chris@102: : "b,b" (p_desired[0]), "c,c" (p_desired[1]) Chris@102: #else Chris@102: : "+A" (expected), [dest] "+m" (storage), [success] "=q" (success) Chris@102: : "b" (p_desired[0]), "c" (p_desired[1]) Chris@102: #endif Chris@102: : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" Chris@102: ); Chris@102: return success; Chris@102: #endif Chris@102: } Chris@102: Chris@102: static BOOST_FORCEINLINE bool compare_exchange_weak( Chris@102: storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT Chris@102: { Chris@102: return compare_exchange_strong(storage, expected, desired, success_order, failure_order); Chris@102: } Chris@102: Chris@102: static BOOST_FORCEINLINE bool is_lock_free(storage_type const volatile&) BOOST_NOEXCEPT Chris@102: { Chris@102: return true; Chris@102: } Chris@102: }; Chris@102: Chris@102: #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) Chris@102: Chris@102: } // namespace detail Chris@102: } // namespace atomics Chris@102: } // namespace boost Chris@102: Chris@102: #endif // BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_