Chris@102
|
1 /*
|
Chris@102
|
2 * Distributed under the Boost Software License, Version 1.0.
|
Chris@102
|
3 * (See accompanying file LICENSE_1_0.txt or copy at
|
Chris@102
|
4 * http://www.boost.org/LICENSE_1_0.txt)
|
Chris@102
|
5 *
|
Chris@102
|
6 * Copyright (c) 2009 Helge Bahmann
|
Chris@102
|
7 * Copyright (c) 2012 Tim Blechmann
|
Chris@102
|
8 * Copyright (c) 2014 Andrey Semashev
|
Chris@102
|
9 */
|
Chris@102
|
10 /*!
|
Chris@102
|
11 * \file atomic/detail/ops_gcc_x86_dcas.hpp
|
Chris@102
|
12 *
|
Chris@102
|
13 * This header contains implementation of the double-width CAS primitive for x86.
|
Chris@102
|
14 */
|
Chris@102
|
15
|
Chris@102
|
16 #ifndef BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_
|
Chris@102
|
17 #define BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_
|
Chris@102
|
18
|
Chris@102
|
19 #include <boost/cstdint.hpp>
|
Chris@102
|
20 #include <boost/memory_order.hpp>
|
Chris@102
|
21 #include <boost/atomic/detail/config.hpp>
|
Chris@102
|
22 #include <boost/atomic/detail/storage_type.hpp>
|
Chris@102
|
23 #include <boost/atomic/capabilities.hpp>
|
Chris@102
|
24
|
Chris@102
|
25 #ifdef BOOST_HAS_PRAGMA_ONCE
|
Chris@102
|
26 #pragma once
|
Chris@102
|
27 #endif
|
Chris@102
|
28
|
Chris@102
|
29 namespace boost {
|
Chris@102
|
30 namespace atomics {
|
Chris@102
|
31 namespace detail {
|
Chris@102
|
32
|
Chris@102
|
33 #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B)
|
Chris@102
|
34
|
Chris@102
|
35 template< bool Signed >
|
Chris@102
|
36 struct gcc_dcas_x86
|
Chris@102
|
37 {
|
Chris@102
|
38 typedef typename make_storage_type< 8u, Signed >::type storage_type;
|
Chris@102
|
39
|
Chris@102
|
40 static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
|
Chris@102
|
41 {
|
Chris@102
|
42 if ((((uint32_t)&storage) & 0x00000007) == 0)
|
Chris@102
|
43 {
|
Chris@102
|
44 #if defined(__SSE2__)
|
Chris@102
|
45 __asm__ __volatile__
|
Chris@102
|
46 (
|
Chris@102
|
47 #if defined(__AVX__)
|
Chris@102
|
48 "vmovq %1, %%xmm4\n\t"
|
Chris@102
|
49 "vmovq %%xmm4, %0\n\t"
|
Chris@102
|
50 #else
|
Chris@102
|
51 "movq %1, %%xmm4\n\t"
|
Chris@102
|
52 "movq %%xmm4, %0\n\t"
|
Chris@102
|
53 #endif
|
Chris@102
|
54 : "=m" (storage)
|
Chris@102
|
55 : "m" (v)
|
Chris@102
|
56 : "memory", "xmm4"
|
Chris@102
|
57 );
|
Chris@102
|
58 #else
|
Chris@102
|
59 __asm__ __volatile__
|
Chris@102
|
60 (
|
Chris@102
|
61 "fildll %1\n\t"
|
Chris@102
|
62 "fistpll %0\n\t"
|
Chris@102
|
63 : "=m" (storage)
|
Chris@102
|
64 : "m" (v)
|
Chris@102
|
65 : "memory"
|
Chris@102
|
66 );
|
Chris@102
|
67 #endif
|
Chris@102
|
68 }
|
Chris@102
|
69 else
|
Chris@102
|
70 {
|
Chris@102
|
71 #if defined(__PIC__)
|
Chris@102
|
72 uint32_t scratch;
|
Chris@102
|
73 __asm__ __volatile__
|
Chris@102
|
74 (
|
Chris@102
|
75 "movl %%ebx, %[scratch]\n\t"
|
Chris@102
|
76 "movl %[value_lo], %%ebx\n\t"
|
Chris@102
|
77 "movl 0(%[dest]), %%eax\n\t"
|
Chris@102
|
78 "movl 4(%[dest]), %%edx\n\t"
|
Chris@102
|
79 ".align 16\n\t"
|
Chris@102
|
80 "1: lock; cmpxchg8b 0(%[dest])\n\t"
|
Chris@102
|
81 "jne 1b\n\t"
|
Chris@102
|
82 "movl %[scratch], %%ebx"
|
Chris@102
|
83 #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES)
|
Chris@102
|
84 : [scratch] "=m,m" (scratch)
|
Chris@102
|
85 : [value_lo] "a,a" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage)
|
Chris@102
|
86 #else
|
Chris@102
|
87 : [scratch] "=m" (scratch)
|
Chris@102
|
88 : [value_lo] "a" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
|
Chris@102
|
89 #endif
|
Chris@102
|
90 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory"
|
Chris@102
|
91 );
|
Chris@102
|
92 #else
|
Chris@102
|
93 __asm__ __volatile__
|
Chris@102
|
94 (
|
Chris@102
|
95 "movl 0(%[dest]), %%eax\n\t"
|
Chris@102
|
96 "movl 4(%[dest]), %%edx\n\t"
|
Chris@102
|
97 ".align 16\n\t"
|
Chris@102
|
98 "1: lock; cmpxchg8b 0(%[dest])\n\t"
|
Chris@102
|
99 "jne 1b\n\t"
|
Chris@102
|
100 :
|
Chris@102
|
101 #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES)
|
Chris@102
|
102 : [value_lo] "b,b" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage)
|
Chris@102
|
103 #else
|
Chris@102
|
104 : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
|
Chris@102
|
105 #endif
|
Chris@102
|
106 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory"
|
Chris@102
|
107 );
|
Chris@102
|
108 #endif
|
Chris@102
|
109 }
|
Chris@102
|
110 }
|
Chris@102
|
111
|
Chris@102
|
112 static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT
|
Chris@102
|
113 {
|
Chris@102
|
114 storage_type value;
|
Chris@102
|
115
|
Chris@102
|
116 if ((((uint32_t)&storage) & 0x00000007) == 0)
|
Chris@102
|
117 {
|
Chris@102
|
118 #if defined(__SSE2__)
|
Chris@102
|
119 __asm__ __volatile__
|
Chris@102
|
120 (
|
Chris@102
|
121 #if defined(__AVX__)
|
Chris@102
|
122 "vmovq %1, %%xmm4\n\t"
|
Chris@102
|
123 "vmovq %%xmm4, %0\n\t"
|
Chris@102
|
124 #else
|
Chris@102
|
125 "movq %1, %%xmm4\n\t"
|
Chris@102
|
126 "movq %%xmm4, %0\n\t"
|
Chris@102
|
127 #endif
|
Chris@102
|
128 : "=m" (value)
|
Chris@102
|
129 : "m" (storage)
|
Chris@102
|
130 : "memory", "xmm4"
|
Chris@102
|
131 );
|
Chris@102
|
132 #else
|
Chris@102
|
133 __asm__ __volatile__
|
Chris@102
|
134 (
|
Chris@102
|
135 "fildll %1\n\t"
|
Chris@102
|
136 "fistpll %0\n\t"
|
Chris@102
|
137 : "=m" (value)
|
Chris@102
|
138 : "m" (storage)
|
Chris@102
|
139 : "memory"
|
Chris@102
|
140 );
|
Chris@102
|
141 #endif
|
Chris@102
|
142 }
|
Chris@102
|
143 else
|
Chris@102
|
144 {
|
Chris@102
|
145 #if defined(__clang__)
|
Chris@102
|
146 // Clang cannot allocate eax:edx register pairs but it has sync intrinsics
|
Chris@102
|
147 value = __sync_val_compare_and_swap(&storage, (storage_type)0, (storage_type)0);
|
Chris@102
|
148 #else
|
Chris@102
|
149 // We don't care for comparison result here; the previous value will be stored into value anyway.
|
Chris@102
|
150 // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b.
|
Chris@102
|
151 __asm__ __volatile__
|
Chris@102
|
152 (
|
Chris@102
|
153 "movl %%ebx, %%eax\n\t"
|
Chris@102
|
154 "movl %%ecx, %%edx\n\t"
|
Chris@102
|
155 "lock; cmpxchg8b %[storage]"
|
Chris@102
|
156 : "=&A" (value)
|
Chris@102
|
157 : [storage] "m" (storage)
|
Chris@102
|
158 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
|
Chris@102
|
159 );
|
Chris@102
|
160 #endif
|
Chris@102
|
161 }
|
Chris@102
|
162
|
Chris@102
|
163 return value;
|
Chris@102
|
164 }
|
Chris@102
|
165
|
Chris@102
|
166 static BOOST_FORCEINLINE bool compare_exchange_strong(
|
Chris@102
|
167 storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
|
Chris@102
|
168 {
|
Chris@102
|
169 #if defined(__clang__)
|
Chris@102
|
170 // Clang cannot allocate eax:edx register pairs but it has sync intrinsics
|
Chris@102
|
171 storage_type old_expected = expected;
|
Chris@102
|
172 expected = __sync_val_compare_and_swap(&storage, old_expected, desired);
|
Chris@102
|
173 return expected == old_expected;
|
Chris@102
|
174 #elif defined(__PIC__)
|
Chris@102
|
175 // Make sure ebx is saved and restored properly in case
|
Chris@102
|
176 // of position independent code. To make this work
|
Chris@102
|
177 // setup register constraints such that ebx can not be
|
Chris@102
|
178 // used by accident e.g. as base address for the variable
|
Chris@102
|
179 // to be modified. Accessing "scratch" should always be okay,
|
Chris@102
|
180 // as it can only be placed on the stack (and therefore
|
Chris@102
|
181 // accessed through ebp or esp only).
|
Chris@102
|
182 //
|
Chris@102
|
183 // In theory, could push/pop ebx onto/off the stack, but movs
|
Chris@102
|
184 // to a prepared stack slot turn out to be faster.
|
Chris@102
|
185
|
Chris@102
|
186 uint32_t scratch;
|
Chris@102
|
187 bool success;
|
Chris@102
|
188 __asm__ __volatile__
|
Chris@102
|
189 (
|
Chris@102
|
190 "movl %%ebx, %[scratch]\n\t"
|
Chris@102
|
191 "movl %[desired_lo], %%ebx\n\t"
|
Chris@102
|
192 "lock; cmpxchg8b %[dest]\n\t"
|
Chris@102
|
193 "movl %[scratch], %%ebx\n\t"
|
Chris@102
|
194 "sete %[success]"
|
Chris@102
|
195 #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES)
|
Chris@102
|
196 : "+A,A,A,A,A,A" (expected), [dest] "+m,m,m,m,m,m" (storage), [scratch] "=m,m,m,m,m,m" (scratch), [success] "=q,m,q,m,q,m" (success)
|
Chris@102
|
197 : [desired_lo] "S,S,D,D,m,m" ((uint32_t)desired), "c,c,c,c,c,c" ((uint32_t)(desired >> 32))
|
Chris@102
|
198 #else
|
Chris@102
|
199 : "+A" (expected), [dest] "+m" (storage), [scratch] "=m" (scratch), [success] "=q" (success)
|
Chris@102
|
200 : [desired_lo] "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32))
|
Chris@102
|
201 #endif
|
Chris@102
|
202 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
|
Chris@102
|
203 );
|
Chris@102
|
204 return success;
|
Chris@102
|
205 #else
|
Chris@102
|
206 bool success;
|
Chris@102
|
207 __asm__ __volatile__
|
Chris@102
|
208 (
|
Chris@102
|
209 "lock; cmpxchg8b %[dest]\n\t"
|
Chris@102
|
210 "sete %[success]"
|
Chris@102
|
211 #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES)
|
Chris@102
|
212 : "+A,A" (expected), [dest] "+m,m" (storage), [success] "=q,m" (success)
|
Chris@102
|
213 : "b,b" ((uint32_t)desired), "c,c" ((uint32_t)(desired >> 32))
|
Chris@102
|
214 #else
|
Chris@102
|
215 : "+A" (expected), [dest] "+m" (storage), [success] "=q" (success)
|
Chris@102
|
216 : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32))
|
Chris@102
|
217 #endif
|
Chris@102
|
218 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
|
Chris@102
|
219 );
|
Chris@102
|
220 return success;
|
Chris@102
|
221 #endif
|
Chris@102
|
222 }
|
Chris@102
|
223
|
Chris@102
|
224 static BOOST_FORCEINLINE bool compare_exchange_weak(
|
Chris@102
|
225 storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
|
Chris@102
|
226 {
|
Chris@102
|
227 return compare_exchange_strong(storage, expected, desired, success_order, failure_order);
|
Chris@102
|
228 }
|
Chris@102
|
229
|
Chris@102
|
230 static BOOST_FORCEINLINE bool is_lock_free(storage_type const volatile&) BOOST_NOEXCEPT
|
Chris@102
|
231 {
|
Chris@102
|
232 return true;
|
Chris@102
|
233 }
|
Chris@102
|
234 };
|
Chris@102
|
235
|
Chris@102
|
236 #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B)
|
Chris@102
|
237
|
Chris@102
|
238 #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B)
|
Chris@102
|
239
|
Chris@102
|
240 template< bool Signed >
|
Chris@102
|
241 struct gcc_dcas_x86_64
|
Chris@102
|
242 {
|
Chris@102
|
243 typedef typename make_storage_type< 16u, Signed >::type storage_type;
|
Chris@102
|
244
|
Chris@102
|
245 static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
|
Chris@102
|
246 {
|
Chris@102
|
247 uint64_t const* p_value = (uint64_t const*)&v;
|
Chris@102
|
248 __asm__ __volatile__
|
Chris@102
|
249 (
|
Chris@102
|
250 "movq 0(%[dest]), %%rax\n\t"
|
Chris@102
|
251 "movq 8(%[dest]), %%rdx\n\t"
|
Chris@102
|
252 ".align 16\n\t"
|
Chris@102
|
253 "1: lock; cmpxchg16b 0(%[dest])\n\t"
|
Chris@102
|
254 "jne 1b"
|
Chris@102
|
255 :
|
Chris@102
|
256 : "b" (p_value[0]), "c" (p_value[1]), [dest] "r" (&storage)
|
Chris@102
|
257 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory"
|
Chris@102
|
258 );
|
Chris@102
|
259 }
|
Chris@102
|
260
|
Chris@102
|
261 static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT
|
Chris@102
|
262 {
|
Chris@102
|
263 #if defined(__clang__)
|
Chris@102
|
264 // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics
|
Chris@102
|
265 storage_type value = storage_type();
|
Chris@102
|
266 return __sync_val_compare_and_swap(&storage, value, value);
|
Chris@102
|
267 #else
|
Chris@102
|
268 storage_type value;
|
Chris@102
|
269
|
Chris@102
|
270 // We don't care for comparison result here; the previous value will be stored into value anyway.
|
Chris@102
|
271 // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b.
|
Chris@102
|
272 __asm__ __volatile__
|
Chris@102
|
273 (
|
Chris@102
|
274 "movq %%rbx, %%rax\n\t"
|
Chris@102
|
275 "movq %%rcx, %%rdx\n\t"
|
Chris@102
|
276 "lock; cmpxchg16b %[storage]"
|
Chris@102
|
277 : "=&A" (value)
|
Chris@102
|
278 : [storage] "m" (storage)
|
Chris@102
|
279 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
|
Chris@102
|
280 );
|
Chris@102
|
281
|
Chris@102
|
282 return value;
|
Chris@102
|
283 #endif
|
Chris@102
|
284 }
|
Chris@102
|
285
|
Chris@102
|
286 static BOOST_FORCEINLINE bool compare_exchange_strong(
|
Chris@102
|
287 storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
|
Chris@102
|
288 {
|
Chris@102
|
289 #if defined(__clang__)
|
Chris@102
|
290 // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics
|
Chris@102
|
291 storage_type old_expected = expected;
|
Chris@102
|
292 expected = __sync_val_compare_and_swap(&storage, old_expected, desired);
|
Chris@102
|
293 return expected == old_expected;
|
Chris@102
|
294 #else
|
Chris@102
|
295 uint64_t const* p_desired = (uint64_t const*)&desired;
|
Chris@102
|
296 bool success;
|
Chris@102
|
297 __asm__ __volatile__
|
Chris@102
|
298 (
|
Chris@102
|
299 "lock; cmpxchg16b %[dest]\n\t"
|
Chris@102
|
300 "sete %[success]"
|
Chris@102
|
301 #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES)
|
Chris@102
|
302 : "+A,A" (expected), [dest] "+m,m" (storage), [success] "=q,m" (success)
|
Chris@102
|
303 : "b,b" (p_desired[0]), "c,c" (p_desired[1])
|
Chris@102
|
304 #else
|
Chris@102
|
305 : "+A" (expected), [dest] "+m" (storage), [success] "=q" (success)
|
Chris@102
|
306 : "b" (p_desired[0]), "c" (p_desired[1])
|
Chris@102
|
307 #endif
|
Chris@102
|
308 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
|
Chris@102
|
309 );
|
Chris@102
|
310 return success;
|
Chris@102
|
311 #endif
|
Chris@102
|
312 }
|
Chris@102
|
313
|
Chris@102
|
314 static BOOST_FORCEINLINE bool compare_exchange_weak(
|
Chris@102
|
315 storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
|
Chris@102
|
316 {
|
Chris@102
|
317 return compare_exchange_strong(storage, expected, desired, success_order, failure_order);
|
Chris@102
|
318 }
|
Chris@102
|
319
|
Chris@102
|
320 static BOOST_FORCEINLINE bool is_lock_free(storage_type const volatile&) BOOST_NOEXCEPT
|
Chris@102
|
321 {
|
Chris@102
|
322 return true;
|
Chris@102
|
323 }
|
Chris@102
|
324 };
|
Chris@102
|
325
|
Chris@102
|
326 #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B)
|
Chris@102
|
327
|
Chris@102
|
328 } // namespace detail
|
Chris@102
|
329 } // namespace atomics
|
Chris@102
|
330 } // namespace boost
|
Chris@102
|
331
|
Chris@102
|
332 #endif // BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_
|