annotate src/opus-1.3/celt/x86/x86cpu.h @ 169:223a55898ab9 tip default

Add null config files
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 02 Mar 2020 14:03:47 +0000
parents 4664ac0c1032
children
rev   line source
cannam@154 1 /* Copyright (c) 2014, Cisco Systems, INC
cannam@154 2 Written by XiangMingZhu WeiZhou MinPeng YanWang
cannam@154 3
cannam@154 4 Redistribution and use in source and binary forms, with or without
cannam@154 5 modification, are permitted provided that the following conditions
cannam@154 6 are met:
cannam@154 7
cannam@154 8 - Redistributions of source code must retain the above copyright
cannam@154 9 notice, this list of conditions and the following disclaimer.
cannam@154 10
cannam@154 11 - Redistributions in binary form must reproduce the above copyright
cannam@154 12 notice, this list of conditions and the following disclaimer in the
cannam@154 13 documentation and/or other materials provided with the distribution.
cannam@154 14
cannam@154 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
cannam@154 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
cannam@154 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
cannam@154 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
cannam@154 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
cannam@154 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
cannam@154 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
cannam@154 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
cannam@154 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
cannam@154 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
cannam@154 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cannam@154 26 */
cannam@154 27
cannam@154 28 #if !defined(X86CPU_H)
cannam@154 29 # define X86CPU_H
cannam@154 30
cannam@154 31 # if defined(OPUS_X86_MAY_HAVE_SSE)
cannam@154 32 # define MAY_HAVE_SSE(name) name ## _sse
cannam@154 33 # else
cannam@154 34 # define MAY_HAVE_SSE(name) name ## _c
cannam@154 35 # endif
cannam@154 36
cannam@154 37 # if defined(OPUS_X86_MAY_HAVE_SSE2)
cannam@154 38 # define MAY_HAVE_SSE2(name) name ## _sse2
cannam@154 39 # else
cannam@154 40 # define MAY_HAVE_SSE2(name) name ## _c
cannam@154 41 # endif
cannam@154 42
cannam@154 43 # if defined(OPUS_X86_MAY_HAVE_SSE4_1)
cannam@154 44 # define MAY_HAVE_SSE4_1(name) name ## _sse4_1
cannam@154 45 # else
cannam@154 46 # define MAY_HAVE_SSE4_1(name) name ## _c
cannam@154 47 # endif
cannam@154 48
cannam@154 49 # if defined(OPUS_X86_MAY_HAVE_AVX)
cannam@154 50 # define MAY_HAVE_AVX(name) name ## _avx
cannam@154 51 # else
cannam@154 52 # define MAY_HAVE_AVX(name) name ## _c
cannam@154 53 # endif
cannam@154 54
cannam@154 55 # if defined(OPUS_HAVE_RTCD)
cannam@154 56 int opus_select_arch(void);
cannam@154 57 # endif
cannam@154 58
cannam@154 59 /*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
cannam@154 60 or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
cannam@154 61 actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
cannam@154 62 reference, these require 16-byte alignment and load a full 16 bytes (instead
cannam@154 63 of 4 or 8), possibly reading out of bounds.
cannam@154 64
cannam@154 65 We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
cannam@154 66 _mm_loadl_epi64(), which should have the same semantics as an m32 or m64
cannam@154 67 reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
cannam@154 68 optimize this out when optimizations ARE enabled.
cannam@154 69
cannam@154 70 Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
cannam@154 71 (which is fair, since technically the compiler is always allowed to do the
cannam@154 72 dereference before invoking the function implementing the intrinsic).
cannam@154 73 However, it is smart enough to eliminate the extra MOVD instruction.
cannam@154 74 For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
cannam@154 75 the extra MOVQ if it's specified explicitly */
cannam@154 76
cannam@154 77 # if defined(__clang__) || !defined(__OPTIMIZE__)
cannam@154 78 # define OP_CVTEPI8_EPI32_M32(x) \
cannam@154 79 (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
cannam@154 80 # else
cannam@154 81 # define OP_CVTEPI8_EPI32_M32(x) \
cannam@154 82 (_mm_cvtepi8_epi32(*(__m128i *)(x)))
cannam@154 83 #endif
cannam@154 84
cannam@154 85 /* similar reasoning about the instruction sequence as in the 32-bit macro above,
cannam@154 86 */
cannam@154 87 # if defined(__clang__) || !defined(__OPTIMIZE__)
cannam@154 88 # define OP_CVTEPI16_EPI32_M64(x) \
cannam@154 89 (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
cannam@154 90 # else
cannam@154 91 # define OP_CVTEPI16_EPI32_M64(x) \
cannam@154 92 (_mm_cvtepi16_epi32(*(__m128i *)(x)))
cannam@154 93 # endif
cannam@154 94
cannam@154 95 #endif