annotate ffmpeg/libswresample/x86/swresample_x86.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents f445c3017523
children
rev   line source
yading@11 1 /*
yading@11 2 * Copyright (C) 2012 Michael Niedermayer (michaelni@gmx.at)
yading@11 3 *
yading@11 4 * This file is part of libswresample
yading@11 5 *
yading@11 6 * libswresample is free software; you can redistribute it and/or
yading@11 7 * modify it under the terms of the GNU Lesser General Public
yading@11 8 * License as published by the Free Software Foundation; either
yading@11 9 * version 2.1 of the License, or (at your option) any later version.
yading@11 10 *
yading@11 11 * libswresample is distributed in the hope that it will be useful,
yading@11 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@11 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@11 14 * Lesser General Public License for more details.
yading@11 15 *
yading@11 16 * You should have received a copy of the GNU Lesser General Public
yading@11 17 * License along with libswresample; if not, write to the Free Software
yading@11 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@11 19 */
yading@11 20
yading@11 21 #include "libswresample/swresample_internal.h"
yading@11 22 #include "libswresample/audioconvert.h"
yading@11 23
yading@11 24 #define PROTO(pre, in, out, cap) void ff ## pre ## _ ##in## _to_ ##out## _a_ ##cap(uint8_t **dst, const uint8_t **src, int len);
yading@11 25 #define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap)
yading@11 26 #define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap)
yading@11 27 #define PROTO4(pre) PROTO3(pre, mmx) PROTO3(pre, sse) PROTO3(pre, sse2) PROTO3(pre, ssse3) PROTO3(pre, sse4) PROTO3(pre, avx)
yading@11 28 PROTO4()
yading@11 29 PROTO4(_pack_2ch)
yading@11 30 PROTO4(_pack_6ch)
yading@11 31 PROTO4(_unpack_2ch)
yading@11 32
yading@11 33 av_cold void swri_audio_convert_init_x86(struct AudioConvert *ac,
yading@11 34 enum AVSampleFormat out_fmt,
yading@11 35 enum AVSampleFormat in_fmt,
yading@11 36 int channels){
yading@11 37 int mm_flags = av_get_cpu_flags();
yading@11 38
yading@11 39 ac->simd_f= NULL;
yading@11 40
yading@11 41 //FIXME add memcpy case
yading@11 42
yading@11 43 #define MULTI_CAPS_FUNC(flag, cap) \
yading@11 44 if (mm_flags & flag) {\
yading@11 45 if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16P)\
yading@11 46 ac->simd_f = ff_int16_to_int32_a_ ## cap;\
yading@11 47 if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32P)\
yading@11 48 ac->simd_f = ff_int32_to_int16_a_ ## cap;\
yading@11 49 }
yading@11 50
yading@11 51 MULTI_CAPS_FUNC(AV_CPU_FLAG_MMX, mmx)
yading@11 52 MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE2, sse2)
yading@11 53
yading@11 54 if(mm_flags & AV_CPU_FLAG_MMX) {
yading@11 55 if(channels == 6) {
yading@11 56 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
yading@11 57 ac->simd_f = ff_pack_6ch_float_to_float_a_mmx;
yading@11 58 }
yading@11 59 }
yading@11 60
yading@11 61 if(mm_flags & AV_CPU_FLAG_SSE2) {
yading@11 62 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
yading@11 63 ac->simd_f = ff_int32_to_float_a_sse2;
yading@11 64 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16P)
yading@11 65 ac->simd_f = ff_int16_to_float_a_sse2;
yading@11 66 if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
yading@11 67 ac->simd_f = ff_float_to_int32_a_sse2;
yading@11 68 if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
yading@11 69 ac->simd_f = ff_float_to_int16_a_sse2;
yading@11 70
yading@11 71 if(channels == 2) {
yading@11 72 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
yading@11 73 ac->simd_f = ff_pack_2ch_int32_to_int32_a_sse2;
yading@11 74 if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S16P)
yading@11 75 ac->simd_f = ff_pack_2ch_int16_to_int16_a_sse2;
yading@11 76 if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S16P)
yading@11 77 ac->simd_f = ff_pack_2ch_int16_to_int32_a_sse2;
yading@11 78 if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S32P)
yading@11 79 ac->simd_f = ff_pack_2ch_int32_to_int16_a_sse2;
yading@11 80
yading@11 81 if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
yading@11 82 ac->simd_f = ff_unpack_2ch_int32_to_int32_a_sse2;
yading@11 83 if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S16)
yading@11 84 ac->simd_f = ff_unpack_2ch_int16_to_int16_a_sse2;
yading@11 85 if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16)
yading@11 86 ac->simd_f = ff_unpack_2ch_int16_to_int32_a_sse2;
yading@11 87 if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32)
yading@11 88 ac->simd_f = ff_unpack_2ch_int32_to_int16_a_sse2;
yading@11 89
yading@11 90 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P)
yading@11 91 ac->simd_f = ff_pack_2ch_int32_to_float_a_sse2;
yading@11 92 if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP)
yading@11 93 ac->simd_f = ff_pack_2ch_float_to_int32_a_sse2;
yading@11 94 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S16P)
yading@11 95 ac->simd_f = ff_pack_2ch_int16_to_float_a_sse2;
yading@11 96 if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP)
yading@11 97 ac->simd_f = ff_pack_2ch_float_to_int16_a_sse2;
yading@11 98 if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32)
yading@11 99 ac->simd_f = ff_unpack_2ch_int32_to_float_a_sse2;
yading@11 100 if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLT)
yading@11 101 ac->simd_f = ff_unpack_2ch_float_to_int32_a_sse2;
yading@11 102 if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16)
yading@11 103 ac->simd_f = ff_unpack_2ch_int16_to_float_a_sse2;
yading@11 104 if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLT)
yading@11 105 ac->simd_f = ff_unpack_2ch_float_to_int16_a_sse2;
yading@11 106 }
yading@11 107 }
yading@11 108 if(mm_flags & AV_CPU_FLAG_SSSE3) {
yading@11 109 if(channels == 2) {
yading@11 110 if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S16)
yading@11 111 ac->simd_f = ff_unpack_2ch_int16_to_int16_a_ssse3;
yading@11 112 if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16)
yading@11 113 ac->simd_f = ff_unpack_2ch_int16_to_int32_a_ssse3;
yading@11 114 if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16)
yading@11 115 ac->simd_f = ff_unpack_2ch_int16_to_float_a_ssse3;
yading@11 116 }
yading@11 117 }
yading@11 118 if(mm_flags & AV_CPU_FLAG_SSE4) {
yading@11 119 if(channels == 6) {
yading@11 120 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
yading@11 121 ac->simd_f = ff_pack_6ch_float_to_float_a_sse4;
yading@11 122 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P)
yading@11 123 ac->simd_f = ff_pack_6ch_int32_to_float_a_sse4;
yading@11 124 if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP)
yading@11 125 ac->simd_f = ff_pack_6ch_float_to_int32_a_sse4;
yading@11 126 }
yading@11 127 }
yading@11 128 if(HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
yading@11 129 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
yading@11 130 ac->simd_f = ff_int32_to_float_a_avx;
yading@11 131 if(channels == 6) {
yading@11 132 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
yading@11 133 ac->simd_f = ff_pack_6ch_float_to_float_a_avx;
yading@11 134 if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P)
yading@11 135 ac->simd_f = ff_pack_6ch_int32_to_float_a_avx;
yading@11 136 if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP)
yading@11 137 ac->simd_f = ff_pack_6ch_float_to_int32_a_avx;
yading@11 138 }
yading@11 139 }
yading@11 140 }
yading@11 141
yading@11 142 #define D(type, simd) \
yading@11 143 mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\
yading@11 144 mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd;
yading@11 145
yading@11 146 D(float, sse)
yading@11 147 D(float, avx)
yading@11 148 D(int16, mmx)
yading@11 149 D(int16, sse2)
yading@11 150
yading@11 151
yading@11 152 av_cold void swri_rematrix_init_x86(struct SwrContext *s){
yading@11 153 int mm_flags = av_get_cpu_flags();
yading@11 154 int nb_in = av_get_channel_layout_nb_channels(s->in_ch_layout);
yading@11 155 int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout);
yading@11 156 int num = nb_in * nb_out;
yading@11 157 int i,j;
yading@11 158
yading@11 159 s->mix_1_1_simd = NULL;
yading@11 160 s->mix_2_1_simd = NULL;
yading@11 161
yading@11 162 if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
yading@11 163 if(mm_flags & AV_CPU_FLAG_MMX) {
yading@11 164 s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx;
yading@11 165 s->mix_2_1_simd = ff_mix_2_1_a_int16_mmx;
yading@11 166 }
yading@11 167 if(mm_flags & AV_CPU_FLAG_SSE2) {
yading@11 168 s->mix_1_1_simd = ff_mix_1_1_a_int16_sse2;
yading@11 169 s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2;
yading@11 170 }
yading@11 171 s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t));
yading@11 172 for(i=0; i<nb_out; i++){
yading@11 173 int sh = 0;
yading@11 174 for(j=0; j<nb_in; j++)
yading@11 175 sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j]));
yading@11 176 sh = FFMAX(av_log2(sh) - 14, 0);
yading@11 177 for(j=0; j<nb_in; j++) {
yading@11 178 ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh;
yading@11 179 ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] =
yading@11 180 ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh;
yading@11 181 }
yading@11 182 }
yading@11 183 } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
yading@11 184 if(mm_flags & AV_CPU_FLAG_SSE) {
yading@11 185 s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
yading@11 186 s->mix_2_1_simd = ff_mix_2_1_a_float_sse;
yading@11 187 }
yading@11 188 if(HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
yading@11 189 s->mix_1_1_simd = ff_mix_1_1_a_float_avx;
yading@11 190 s->mix_2_1_simd = ff_mix_2_1_a_float_avx;
yading@11 191 }
yading@11 192 s->native_simd_matrix = av_mallocz(num * sizeof(float));
yading@11 193 memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float));
yading@11 194 }
yading@11 195 }