| Chris@69 | 1 /* Copyright (c) 2013 Jean-Marc Valin and John Ridges | 
| Chris@69 | 2    Copyright (c) 2014, Cisco Systems, INC MingXiang WeiZhou MinPeng YanWang*/ | 
| Chris@69 | 3 /** | 
| Chris@69 | 4    @file pitch_sse.h | 
| Chris@69 | 5    @brief Pitch analysis | 
| Chris@69 | 6  */ | 
| Chris@69 | 7 | 
| Chris@69 | 8 /* | 
| Chris@69 | 9    Redistribution and use in source and binary forms, with or without | 
| Chris@69 | 10    modification, are permitted provided that the following conditions | 
| Chris@69 | 11    are met: | 
| Chris@69 | 12 | 
| Chris@69 | 13    - Redistributions of source code must retain the above copyright | 
| Chris@69 | 14    notice, this list of conditions and the following disclaimer. | 
| Chris@69 | 15 | 
| Chris@69 | 16    - Redistributions in binary form must reproduce the above copyright | 
| Chris@69 | 17    notice, this list of conditions and the following disclaimer in the | 
| Chris@69 | 18    documentation and/or other materials provided with the distribution. | 
| Chris@69 | 19 | 
| Chris@69 | 20    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
| Chris@69 | 21    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
| Chris@69 | 22    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
| Chris@69 | 23    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | 
| Chris@69 | 24    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 
| Chris@69 | 25    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 
| Chris@69 | 26    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 
| Chris@69 | 27    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | 
| Chris@69 | 28    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | 
| Chris@69 | 29    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 
| Chris@69 | 30    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
| Chris@69 | 31 */ | 
| Chris@69 | 32 | 
| Chris@69 | 33 #ifndef PITCH_SSE_H | 
| Chris@69 | 34 #define PITCH_SSE_H | 
| Chris@69 | 35 | 
| Chris@69 | 36 #if defined(HAVE_CONFIG_H) | 
| Chris@69 | 37 #include "config.h" | 
| Chris@69 | 38 #endif | 
| Chris@69 | 39 | 
| Chris@69 | 40 #if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) | 
| Chris@69 | 41 void xcorr_kernel_sse4_1( | 
| Chris@69 | 42                     const opus_int16 *x, | 
| Chris@69 | 43                     const opus_int16 *y, | 
| Chris@69 | 44                     opus_val32       sum[4], | 
| Chris@69 | 45                     int              len); | 
| Chris@69 | 46 #endif | 
| Chris@69 | 47 | 
| Chris@69 | 48 #if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT) | 
| Chris@69 | 49 void xcorr_kernel_sse( | 
| Chris@69 | 50                     const opus_val16 *x, | 
| Chris@69 | 51                     const opus_val16 *y, | 
| Chris@69 | 52                     opus_val32       sum[4], | 
| Chris@69 | 53                     int              len); | 
| Chris@69 | 54 #endif | 
| Chris@69 | 55 | 
| Chris@69 | 56 #if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT) | 
| Chris@69 | 57 #define OVERRIDE_XCORR_KERNEL | 
| Chris@69 | 58 #define xcorr_kernel(x, y, sum, len, arch) \ | 
| Chris@69 | 59     ((void)arch, xcorr_kernel_sse4_1(x, y, sum, len)) | 
| Chris@69 | 60 | 
| Chris@69 | 61 #elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT) | 
| Chris@69 | 62 #define OVERRIDE_XCORR_KERNEL | 
| Chris@69 | 63 #define xcorr_kernel(x, y, sum, len, arch) \ | 
| Chris@69 | 64     ((void)arch, xcorr_kernel_sse(x, y, sum, len)) | 
| Chris@69 | 65 | 
| Chris@69 | 66 #elif (defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)) || (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) | 
| Chris@69 | 67 | 
| Chris@69 | 68 extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( | 
| Chris@69 | 69                     const opus_val16 *x, | 
| Chris@69 | 70                     const opus_val16 *y, | 
| Chris@69 | 71                     opus_val32       sum[4], | 
| Chris@69 | 72                     int              len); | 
| Chris@69 | 73 | 
| Chris@69 | 74 #define OVERRIDE_XCORR_KERNEL | 
| Chris@69 | 75 #define xcorr_kernel(x, y, sum, len, arch) \ | 
| Chris@69 | 76     ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len)) | 
| Chris@69 | 77 | 
| Chris@69 | 78 #endif | 
| Chris@69 | 79 | 
| Chris@69 | 80 #if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT) | 
| Chris@69 | 81 opus_val32 celt_inner_prod_sse4_1( | 
| Chris@69 | 82     const opus_int16 *x, | 
| Chris@69 | 83     const opus_int16 *y, | 
| Chris@69 | 84     int               N); | 
| Chris@69 | 85 #endif | 
| Chris@69 | 86 | 
| Chris@69 | 87 #if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT) | 
| Chris@69 | 88 opus_val32 celt_inner_prod_sse2( | 
| Chris@69 | 89     const opus_int16 *x, | 
| Chris@69 | 90     const opus_int16 *y, | 
| Chris@69 | 91     int               N); | 
| Chris@69 | 92 #endif | 
| Chris@69 | 93 | 
| Chris@69 | 94 #if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT) | 
| Chris@69 | 95 opus_val32 celt_inner_prod_sse( | 
| Chris@69 | 96     const opus_val16 *x, | 
| Chris@69 | 97     const opus_val16 *y, | 
| Chris@69 | 98     int               N); | 
| Chris@69 | 99 #endif | 
| Chris@69 | 100 | 
| Chris@69 | 101 | 
| Chris@69 | 102 #if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT) | 
| Chris@69 | 103 #define OVERRIDE_CELT_INNER_PROD | 
| Chris@69 | 104 #define celt_inner_prod(x, y, N, arch) \ | 
| Chris@69 | 105     ((void)arch, celt_inner_prod_sse4_1(x, y, N)) | 
| Chris@69 | 106 | 
| Chris@69 | 107 #elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) | 
| Chris@69 | 108 #define OVERRIDE_CELT_INNER_PROD | 
| Chris@69 | 109 #define celt_inner_prod(x, y, N, arch) \ | 
| Chris@69 | 110     ((void)arch, celt_inner_prod_sse2(x, y, N)) | 
| Chris@69 | 111 | 
| Chris@69 | 112 #elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT) | 
| Chris@69 | 113 #define OVERRIDE_CELT_INNER_PROD | 
| Chris@69 | 114 #define celt_inner_prod(x, y, N, arch) \ | 
| Chris@69 | 115     ((void)arch, celt_inner_prod_sse(x, y, N)) | 
| Chris@69 | 116 | 
| Chris@69 | 117 | 
| Chris@69 | 118 #elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \ | 
| Chris@69 | 119     (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) | 
| Chris@69 | 120 | 
| Chris@69 | 121 extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( | 
| Chris@69 | 122                     const opus_val16 *x, | 
| Chris@69 | 123                     const opus_val16 *y, | 
| Chris@69 | 124                     int               N); | 
| Chris@69 | 125 | 
| Chris@69 | 126 #define OVERRIDE_CELT_INNER_PROD | 
| Chris@69 | 127 #define celt_inner_prod(x, y, N, arch) \ | 
| Chris@69 | 128     ((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N)) | 
| Chris@69 | 129 | 
| Chris@69 | 130 #endif | 
| Chris@69 | 131 | 
| Chris@69 | 132 #if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT) | 
| Chris@69 | 133 | 
| Chris@69 | 134 #define OVERRIDE_DUAL_INNER_PROD | 
| Chris@69 | 135 #define OVERRIDE_COMB_FILTER_CONST | 
| Chris@69 | 136 | 
| Chris@69 | 137 #undef dual_inner_prod | 
| Chris@69 | 138 #undef comb_filter_const | 
| Chris@69 | 139 | 
| Chris@69 | 140 void dual_inner_prod_sse(const opus_val16 *x, | 
| Chris@69 | 141     const opus_val16 *y01, | 
| Chris@69 | 142     const opus_val16 *y02, | 
| Chris@69 | 143     int               N, | 
| Chris@69 | 144     opus_val32       *xy1, | 
| Chris@69 | 145     opus_val32       *xy2); | 
| Chris@69 | 146 | 
| Chris@69 | 147 void comb_filter_const_sse(opus_val32 *y, | 
| Chris@69 | 148     opus_val32 *x, | 
| Chris@69 | 149     int         T, | 
| Chris@69 | 150     int         N, | 
| Chris@69 | 151     opus_val16  g10, | 
| Chris@69 | 152     opus_val16  g11, | 
| Chris@69 | 153     opus_val16  g12); | 
| Chris@69 | 154 | 
| Chris@69 | 155 | 
| Chris@69 | 156 #if defined(OPUS_X86_PRESUME_SSE) | 
| Chris@69 | 157 # define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ | 
| Chris@69 | 158     ((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2)) | 
| Chris@69 | 159 | 
| Chris@69 | 160 # define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ | 
| Chris@69 | 161     ((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12)) | 
| Chris@69 | 162 #else | 
| Chris@69 | 163 | 
| Chris@69 | 164 extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( | 
| Chris@69 | 165               const opus_val16 *x, | 
| Chris@69 | 166               const opus_val16 *y01, | 
| Chris@69 | 167               const opus_val16 *y02, | 
| Chris@69 | 168               int               N, | 
| Chris@69 | 169               opus_val32       *xy1, | 
| Chris@69 | 170               opus_val32       *xy2); | 
| Chris@69 | 171 | 
| Chris@69 | 172 #define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ | 
| Chris@69 | 173     ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2)) | 
| Chris@69 | 174 | 
| Chris@69 | 175 extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( | 
| Chris@69 | 176               opus_val32 *y, | 
| Chris@69 | 177               opus_val32 *x, | 
| Chris@69 | 178               int         T, | 
| Chris@69 | 179               int         N, | 
| Chris@69 | 180               opus_val16  g10, | 
| Chris@69 | 181               opus_val16  g11, | 
| Chris@69 | 182               opus_val16  g12); | 
| Chris@69 | 183 | 
| Chris@69 | 184 #define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ | 
| Chris@69 | 185     ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12)) | 
| Chris@69 | 186 | 
| Chris@69 | 187 #define NON_STATIC_COMB_FILTER_CONST_C | 
| Chris@69 | 188 | 
| Chris@69 | 189 #endif | 
| Chris@69 | 190 #endif | 
| Chris@69 | 191 | 
| Chris@69 | 192 #endif |