annotate src/opus-1.3/celt/x86/x86cpu.h @ 84:08ae793730bd

Add null config files
author Chris Cannam
date Mon, 02 Mar 2020 14:03:47 +0000
parents 7aeed7906520
children
rev   line source
Chris@69 1 /* Copyright (c) 2014, Cisco Systems, INC
Chris@69 2 Written by XiangMingZhu WeiZhou MinPeng YanWang
Chris@69 3
Chris@69 4 Redistribution and use in source and binary forms, with or without
Chris@69 5 modification, are permitted provided that the following conditions
Chris@69 6 are met:
Chris@69 7
Chris@69 8 - Redistributions of source code must retain the above copyright
Chris@69 9 notice, this list of conditions and the following disclaimer.
Chris@69 10
Chris@69 11 - Redistributions in binary form must reproduce the above copyright
Chris@69 12 notice, this list of conditions and the following disclaimer in the
Chris@69 13 documentation and/or other materials provided with the distribution.
Chris@69 14
Chris@69 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
Chris@69 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
Chris@69 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
Chris@69 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
Chris@69 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
Chris@69 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
Chris@69 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
Chris@69 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
Chris@69 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
Chris@69 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
Chris@69 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Chris@69 26 */
Chris@69 27
Chris@69 28 #if !defined(X86CPU_H)
Chris@69 29 # define X86CPU_H
Chris@69 30
Chris@69 31 # if defined(OPUS_X86_MAY_HAVE_SSE)
Chris@69 32 # define MAY_HAVE_SSE(name) name ## _sse
Chris@69 33 # else
Chris@69 34 # define MAY_HAVE_SSE(name) name ## _c
Chris@69 35 # endif
Chris@69 36
Chris@69 37 # if defined(OPUS_X86_MAY_HAVE_SSE2)
Chris@69 38 # define MAY_HAVE_SSE2(name) name ## _sse2
Chris@69 39 # else
Chris@69 40 # define MAY_HAVE_SSE2(name) name ## _c
Chris@69 41 # endif
Chris@69 42
Chris@69 43 # if defined(OPUS_X86_MAY_HAVE_SSE4_1)
Chris@69 44 # define MAY_HAVE_SSE4_1(name) name ## _sse4_1
Chris@69 45 # else
Chris@69 46 # define MAY_HAVE_SSE4_1(name) name ## _c
Chris@69 47 # endif
Chris@69 48
Chris@69 49 # if defined(OPUS_X86_MAY_HAVE_AVX)
Chris@69 50 # define MAY_HAVE_AVX(name) name ## _avx
Chris@69 51 # else
Chris@69 52 # define MAY_HAVE_AVX(name) name ## _c
Chris@69 53 # endif
Chris@69 54
Chris@69 55 # if defined(OPUS_HAVE_RTCD)
Chris@69 56 int opus_select_arch(void);
Chris@69 57 # endif
Chris@69 58
Chris@69 59 /*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
Chris@69 60 or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
Chris@69 61 actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
Chris@69 62 reference, these require 16-byte alignment and load a full 16 bytes (instead
Chris@69 63 of 4 or 8), possibly reading out of bounds.
Chris@69 64
Chris@69 65 We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
Chris@69 66 _mm_loadl_epi64(), which should have the same semantics as an m32 or m64
Chris@69 67 reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
Chris@69 68 optimize this out when optimizations ARE enabled.
Chris@69 69
Chris@69 70 Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
Chris@69 71 (which is fair, since technically the compiler is always allowed to do the
Chris@69 72 dereference before invoking the function implementing the intrinsic).
Chris@69 73 However, it is smart enough to eliminate the extra MOVD instruction.
Chris@69 74 For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
Chris@69 75 the extra MOVQ if it's specified explicitly */
Chris@69 76
Chris@69 77 # if defined(__clang__) || !defined(__OPTIMIZE__)
Chris@69 78 # define OP_CVTEPI8_EPI32_M32(x) \
Chris@69 79 (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
Chris@69 80 # else
Chris@69 81 # define OP_CVTEPI8_EPI32_M32(x) \
Chris@69 82 (_mm_cvtepi8_epi32(*(__m128i *)(x)))
Chris@69 83 #endif
Chris@69 84
Chris@69 85 /* similar reasoning about the instruction sequence as in the 32-bit macro above,
Chris@69 86 */
Chris@69 87 # if defined(__clang__) || !defined(__OPTIMIZE__)
Chris@69 88 # define OP_CVTEPI16_EPI32_M64(x) \
Chris@69 89 (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
Chris@69 90 # else
Chris@69 91 # define OP_CVTEPI16_EPI32_M64(x) \
Chris@69 92 (_mm_cvtepi16_epi32(*(__m128i *)(x)))
Chris@69 93 # endif
Chris@69 94
Chris@69 95 #endif