Chris@69
|
1 /* Copyright (c) 2014, Cisco Systems, INC
|
Chris@69
|
2 Written by XiangMingZhu WeiZhou MinPeng YanWang
|
Chris@69
|
3
|
Chris@69
|
4 Redistribution and use in source and binary forms, with or without
|
Chris@69
|
5 modification, are permitted provided that the following conditions
|
Chris@69
|
6 are met:
|
Chris@69
|
7
|
Chris@69
|
8 - Redistributions of source code must retain the above copyright
|
Chris@69
|
9 notice, this list of conditions and the following disclaimer.
|
Chris@69
|
10
|
Chris@69
|
11 - Redistributions in binary form must reproduce the above copyright
|
Chris@69
|
12 notice, this list of conditions and the following disclaimer in the
|
Chris@69
|
13 documentation and/or other materials provided with the distribution.
|
Chris@69
|
14
|
Chris@69
|
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
Chris@69
|
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
Chris@69
|
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
Chris@69
|
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
Chris@69
|
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
Chris@69
|
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
Chris@69
|
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
Chris@69
|
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
Chris@69
|
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
Chris@69
|
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
Chris@69
|
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
Chris@69
|
26 */
|
Chris@69
|
27
|
Chris@69
|
28 #if !defined(X86CPU_H)
|
Chris@69
|
29 # define X86CPU_H
|
Chris@69
|
30
|
Chris@69
|
31 # if defined(OPUS_X86_MAY_HAVE_SSE)
|
Chris@69
|
32 # define MAY_HAVE_SSE(name) name ## _sse
|
Chris@69
|
33 # else
|
Chris@69
|
34 # define MAY_HAVE_SSE(name) name ## _c
|
Chris@69
|
35 # endif
|
Chris@69
|
36
|
Chris@69
|
37 # if defined(OPUS_X86_MAY_HAVE_SSE2)
|
Chris@69
|
38 # define MAY_HAVE_SSE2(name) name ## _sse2
|
Chris@69
|
39 # else
|
Chris@69
|
40 # define MAY_HAVE_SSE2(name) name ## _c
|
Chris@69
|
41 # endif
|
Chris@69
|
42
|
Chris@69
|
43 # if defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
Chris@69
|
44 # define MAY_HAVE_SSE4_1(name) name ## _sse4_1
|
Chris@69
|
45 # else
|
Chris@69
|
46 # define MAY_HAVE_SSE4_1(name) name ## _c
|
Chris@69
|
47 # endif
|
Chris@69
|
48
|
Chris@69
|
49 # if defined(OPUS_X86_MAY_HAVE_AVX)
|
Chris@69
|
50 # define MAY_HAVE_AVX(name) name ## _avx
|
Chris@69
|
51 # else
|
Chris@69
|
52 # define MAY_HAVE_AVX(name) name ## _c
|
Chris@69
|
53 # endif
|
Chris@69
|
54
|
Chris@69
|
55 # if defined(OPUS_HAVE_RTCD)
|
Chris@69
|
56 int opus_select_arch(void);
|
Chris@69
|
57 # endif
|
Chris@69
|
58
|
Chris@69
|
59 /*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
|
Chris@69
|
60 or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
|
Chris@69
|
61 actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
|
Chris@69
|
62 reference, these require 16-byte alignment and load a full 16 bytes (instead
|
Chris@69
|
63 of 4 or 8), possibly reading out of bounds.
|
Chris@69
|
64
|
Chris@69
|
65 We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
|
Chris@69
|
66 _mm_loadl_epi64(), which should have the same semantics as an m32 or m64
|
Chris@69
|
67 reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
|
Chris@69
|
68 optimize this out when optimizations ARE enabled.
|
Chris@69
|
69
|
Chris@69
|
70 Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
|
Chris@69
|
71 (which is fair, since technically the compiler is always allowed to do the
|
Chris@69
|
72 dereference before invoking the function implementing the intrinsic).
|
Chris@69
|
73 However, it is smart enough to eliminate the extra MOVD instruction.
|
Chris@69
|
74 For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
|
Chris@69
|
75 the extra MOVQ if it's specified explicitly */
|
Chris@69
|
76
|
Chris@69
|
77 # if defined(__clang__) || !defined(__OPTIMIZE__)
|
Chris@69
|
78 # define OP_CVTEPI8_EPI32_M32(x) \
|
Chris@69
|
79 (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
|
Chris@69
|
80 # else
|
Chris@69
|
81 # define OP_CVTEPI8_EPI32_M32(x) \
|
Chris@69
|
82 (_mm_cvtepi8_epi32(*(__m128i *)(x)))
|
Chris@69
|
83 #endif
|
Chris@69
|
84
|
Chris@69
|
85 /* similar reasoning about the instruction sequence as in the 32-bit macro above,
|
Chris@69
|
86 */
|
Chris@69
|
87 # if defined(__clang__) || !defined(__OPTIMIZE__)
|
Chris@69
|
88 # define OP_CVTEPI16_EPI32_M64(x) \
|
Chris@69
|
89 (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
|
Chris@69
|
90 # else
|
Chris@69
|
91 # define OP_CVTEPI16_EPI32_M64(x) \
|
Chris@69
|
92 (_mm_cvtepi16_epi32(*(__m128i *)(x)))
|
Chris@69
|
93 # endif
|
Chris@69
|
94
|
Chris@69
|
95 #endif
|