cannam@154
|
1 /* Copyright (c) 2014, Cisco Systems, INC
|
cannam@154
|
2 Written by XiangMingZhu WeiZhou MinPeng YanWang
|
cannam@154
|
3
|
cannam@154
|
4 Redistribution and use in source and binary forms, with or without
|
cannam@154
|
5 modification, are permitted provided that the following conditions
|
cannam@154
|
6 are met:
|
cannam@154
|
7
|
cannam@154
|
8 - Redistributions of source code must retain the above copyright
|
cannam@154
|
9 notice, this list of conditions and the following disclaimer.
|
cannam@154
|
10
|
cannam@154
|
11 - Redistributions in binary form must reproduce the above copyright
|
cannam@154
|
12 notice, this list of conditions and the following disclaimer in the
|
cannam@154
|
13 documentation and/or other materials provided with the distribution.
|
cannam@154
|
14
|
cannam@154
|
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
cannam@154
|
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
cannam@154
|
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
cannam@154
|
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
cannam@154
|
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
cannam@154
|
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
cannam@154
|
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
cannam@154
|
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
cannam@154
|
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
cannam@154
|
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
cannam@154
|
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
cannam@154
|
26 */
|
cannam@154
|
27
|
cannam@154
|
28 #if !defined(X86CPU_H)
|
cannam@154
|
29 # define X86CPU_H
|
cannam@154
|
30
|
cannam@154
|
31 # if defined(OPUS_X86_MAY_HAVE_SSE)
|
cannam@154
|
32 # define MAY_HAVE_SSE(name) name ## _sse
|
cannam@154
|
33 # else
|
cannam@154
|
34 # define MAY_HAVE_SSE(name) name ## _c
|
cannam@154
|
35 # endif
|
cannam@154
|
36
|
cannam@154
|
37 # if defined(OPUS_X86_MAY_HAVE_SSE2)
|
cannam@154
|
38 # define MAY_HAVE_SSE2(name) name ## _sse2
|
cannam@154
|
39 # else
|
cannam@154
|
40 # define MAY_HAVE_SSE2(name) name ## _c
|
cannam@154
|
41 # endif
|
cannam@154
|
42
|
cannam@154
|
43 # if defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
cannam@154
|
44 # define MAY_HAVE_SSE4_1(name) name ## _sse4_1
|
cannam@154
|
45 # else
|
cannam@154
|
46 # define MAY_HAVE_SSE4_1(name) name ## _c
|
cannam@154
|
47 # endif
|
cannam@154
|
48
|
cannam@154
|
49 # if defined(OPUS_X86_MAY_HAVE_AVX)
|
cannam@154
|
50 # define MAY_HAVE_AVX(name) name ## _avx
|
cannam@154
|
51 # else
|
cannam@154
|
52 # define MAY_HAVE_AVX(name) name ## _c
|
cannam@154
|
53 # endif
|
cannam@154
|
54
|
cannam@154
|
55 # if defined(OPUS_HAVE_RTCD)
|
cannam@154
|
56 int opus_select_arch(void);
|
cannam@154
|
57 # endif
|
cannam@154
|
58
|
cannam@154
|
59 /*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
|
cannam@154
|
60 or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
|
cannam@154
|
61 actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
|
cannam@154
|
62 reference, these require 16-byte alignment and load a full 16 bytes (instead
|
cannam@154
|
63 of 4 or 8), possibly reading out of bounds.
|
cannam@154
|
64
|
cannam@154
|
65 We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
|
cannam@154
|
66 _mm_loadl_epi64(), which should have the same semantics as an m32 or m64
|
cannam@154
|
67 reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
|
cannam@154
|
68 optimize this out when optimizations ARE enabled.
|
cannam@154
|
69
|
cannam@154
|
70 Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
|
cannam@154
|
71 (which is fair, since technically the compiler is always allowed to do the
|
cannam@154
|
72 dereference before invoking the function implementing the intrinsic).
|
cannam@154
|
73 However, it is smart enough to eliminate the extra MOVD instruction.
|
cannam@154
|
74 For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
|
cannam@154
|
75 the extra MOVQ if it's specified explicitly */
|
cannam@154
|
76
|
cannam@154
|
77 # if defined(__clang__) || !defined(__OPTIMIZE__)
|
cannam@154
|
78 # define OP_CVTEPI8_EPI32_M32(x) \
|
cannam@154
|
79 (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
|
cannam@154
|
80 # else
|
cannam@154
|
81 # define OP_CVTEPI8_EPI32_M32(x) \
|
cannam@154
|
82 (_mm_cvtepi8_epi32(*(__m128i *)(x)))
|
cannam@154
|
83 #endif
|
cannam@154
|
84
|
cannam@154
|
85 /* similar reasoning about the instruction sequence as in the 32-bit macro above,
|
cannam@154
|
86 */
|
cannam@154
|
87 # if defined(__clang__) || !defined(__OPTIMIZE__)
|
cannam@154
|
88 # define OP_CVTEPI16_EPI32_M64(x) \
|
cannam@154
|
89 (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
|
cannam@154
|
90 # else
|
cannam@154
|
91 # define OP_CVTEPI16_EPI32_M64(x) \
|
cannam@154
|
92 (_mm_cvtepi16_epi32(*(__m128i *)(x)))
|
cannam@154
|
93 # endif
|
cannam@154
|
94
|
cannam@154
|
95 #endif
|