arm/dca.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef AVCODEC_ARM_DCA_H
22 #define AVCODEC_ARM_DCA_H
23 
24 #include <stdint.h>
25 
26 #include "config.h"
27 #include "libavcodec/mathops.h"
28 
29 #if HAVE_ARMV6_INLINE && AV_GCC_VERSION_AT_LEAST(4,4) && !CONFIG_THUMB
30 
31 #define decode_blockcodes decode_blockcodes
32 static inline int decode_blockcodes(int code1, int code2, int levels,
33  int *values)
34 {
35  int v0, v1, v2, v3, v4, v5;
36 
37  __asm__ ("smmul %0, %6, %10 \n"
38  "smmul %3, %7, %10 \n"
39  "smlabb %6, %0, %9, %6 \n"
40  "smlabb %7, %3, %9, %7 \n"
41  "smmul %1, %0, %10 \n"
42  "smmul %4, %3, %10 \n"
43  "sub %6, %6, %8, lsr #1 \n"
44  "sub %7, %7, %8, lsr #1 \n"
45  "smlabb %0, %1, %9, %0 \n"
46  "smlabb %3, %4, %9, %3 \n"
47  "smmul %2, %1, %10 \n"
48  "smmul %5, %4, %10 \n"
49  "str %6, [%11, #0] \n"
50  "str %7, [%11, #16] \n"
51  "sub %0, %0, %8, lsr #1 \n"
52  "sub %3, %3, %8, lsr #1 \n"
53  "smlabb %1, %2, %9, %1 \n"
54  "smlabb %4, %5, %9, %4 \n"
55  "smmul %6, %2, %10 \n"
56  "smmul %7, %5, %10 \n"
57  "str %0, [%11, #4] \n"
58  "str %3, [%11, #20] \n"
59  "sub %1, %1, %8, lsr #1 \n"
60  "sub %4, %4, %8, lsr #1 \n"
61  "smlabb %2, %6, %9, %2 \n"
62  "smlabb %5, %7, %9, %5 \n"
63  "str %1, [%11, #8] \n"
64  "str %4, [%11, #24] \n"
65  "sub %2, %2, %8, lsr #1 \n"
66  "sub %5, %5, %8, lsr #1 \n"
67  "str %2, [%11, #12] \n"
68  "str %5, [%11, #28] \n"
69  : "=&r"(v0), "=&r"(v1), "=&r"(v2),
70  "=&r"(v3), "=&r"(v4), "=&r"(v5),
71  "+&r"(code1), "+&r"(code2)
72  : "r"(levels - 1), "r"(-levels),
73  "r"(ff_inverse[levels]), "r"(values)
74  : "memory");
75 
76  return code1 | code2;
77 }
78 
79 #endif
80 
81 #if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y
82 
83 #define int8x8_fmul_int32 int8x8_fmul_int32
84 static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale)
85 {
86  __asm__ ("vcvt.f32.s32 %2, %2, #4 \n"
87  "vld1.8 {d0}, [%1,:64] \n"
88  "vmovl.s8 q0, d0 \n"
89  "vmovl.s16 q1, d1 \n"
90  "vmovl.s16 q0, d0 \n"
91  "vcvt.f32.s32 q0, q0 \n"
92  "vcvt.f32.s32 q1, q1 \n"
93  "vmul.f32 q0, q0, %y2 \n"
94  "vmul.f32 q1, q1, %y2 \n"
95  "vst1.32 {q0-q1}, [%m0,:128] \n"
96  : "=Um"(*(float (*)[8])dst)
97  : "r"(src), "x"(scale)
98  : "d0", "d1", "d2", "d3");
99 }
100 
101 #endif
102 
103 #endif /* AVCODEC_ARM_DCA_H */
const uint32_t ff_inverse[257]
Definition: mathtables.c:25
AVS_Value src
Definition: avisynth_c.h:523
static void int8x8_fmul_int32(float *dst, const int8_t *src, int scale)
Definition: dcadec.c:1286
static int decode_blockcodes(int code1, int code2, int levels, int *values)
Definition: dcadec.c:1275
#define v0
Definition: regdef.h:26
Same thing on a dB scale
else dst[i][x+y *dst_stride[i]]
Definition: vf_mcdeint.c:160
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame This method is called when a frame is wanted on an output For an it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values