yading@10: /* yading@10: * Copyright (c) 2008 Siarhei Siamashka yading@10: * yading@10: * This file is part of FFmpeg. yading@10: * yading@10: * FFmpeg is free software; you can redistribute it and/or yading@10: * modify it under the terms of the GNU Lesser General Public yading@10: * License as published by the Free Software Foundation; either yading@10: * version 2.1 of the License, or (at your option) any later version. yading@10: * yading@10: * FFmpeg is distributed in the hope that it will be useful, yading@10: * but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: * Lesser General Public License for more details. yading@10: * yading@10: * You should have received a copy of the GNU Lesser General Public yading@10: * License along with FFmpeg; if not, write to the Free Software yading@10: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: */ yading@10: yading@10: #include "config.h" yading@10: #include "libavutil/arm/asm.S" yading@10: yading@10: /** yading@10: * ARM VFP optimized float to int16 conversion. yading@10: * Assume that len is a positive number and is multiple of 8, destination yading@10: * buffer is at least 4 bytes aligned (8 bytes alignment is better for yading@10: * performance), little-endian byte sex. yading@10: */ yading@10: @ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len) yading@10: function ff_float_to_int16_vfp, export=1 yading@10: push {r4-r8,lr} yading@10: vpush {d8-d11} yading@10: vldmia r1!, {s16-s23} yading@10: vcvt.s32.f32 s0, s16 yading@10: vcvt.s32.f32 s1, s17 yading@10: vcvt.s32.f32 s2, s18 yading@10: vcvt.s32.f32 s3, s19 yading@10: vcvt.s32.f32 s4, s20 yading@10: vcvt.s32.f32 s5, s21 yading@10: vcvt.s32.f32 s6, s22 yading@10: vcvt.s32.f32 s7, s23 yading@10: 1: yading@10: subs r2, r2, #8 yading@10: vmov r3, r4, s0, s1 yading@10: vmov r5, r6, s2, s3 yading@10: vmov r7, r8, s4, s5 yading@10: vmov ip, lr, s6, s7 yading@10: it gt yading@10: vldmiagt r1!, {s16-s23} yading@10: ssat r4, #16, r4 yading@10: ssat r3, #16, r3 yading@10: ssat r6, #16, r6 yading@10: ssat r5, #16, r5 yading@10: pkhbt r3, r3, r4, lsl #16 yading@10: pkhbt r4, r5, r6, lsl #16 yading@10: itttt gt yading@10: vcvtgt.s32.f32 s0, s16 yading@10: vcvtgt.s32.f32 s1, s17 yading@10: vcvtgt.s32.f32 s2, s18 yading@10: vcvtgt.s32.f32 s3, s19 yading@10: itttt gt yading@10: vcvtgt.s32.f32 s4, s20 yading@10: vcvtgt.s32.f32 s5, s21 yading@10: vcvtgt.s32.f32 s6, s22 yading@10: vcvtgt.s32.f32 s7, s23 yading@10: ssat r8, #16, r8 yading@10: ssat r7, #16, r7 yading@10: ssat lr, #16, lr yading@10: ssat ip, #16, ip yading@10: pkhbt r5, r7, r8, lsl #16 yading@10: pkhbt r6, ip, lr, lsl #16 yading@10: stmia r0!, {r3-r6} yading@10: bgt 1b yading@10: yading@10: vpop {d8-d11} yading@10: pop {r4-r8,pc} yading@10: endfunc