vp3dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004 the ffmpeg project
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * Standard C DSP-oriented functions cribbed from the original VP3
24  * source code.
25  */
26 
27 #include "libavutil/attributes.h"
28 #include "libavutil/common.h"
29 #include "avcodec.h"
30 #include "dsputil.h"
31 #include "rnd_avg.h"
32 #include "vp3dsp.h"
33 
34 #define IdctAdjustBeforeShift 8
35 #define xC1S7 64277
36 #define xC2S6 60547
37 #define xC3S5 54491
38 #define xC4S4 46341
39 #define xC5S3 36410
40 #define xC6S2 25080
41 #define xC7S1 12785
42 
43 #define M(a,b) (((a) * (b))>>16)
44 
45 static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
46 {
47  int16_t *ip = input;
48 
49  int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
50  int Ed, Gd, Add, Bdd, Fd, Hd;
51 
52  int i;
53 
54  /* Inverse DCT on the rows now */
55  for (i = 0; i < 8; i++) {
56  /* Check for non-zero values */
57  if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
58  ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
59  A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]);
60  B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]);
61  C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]);
62  D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]);
63 
64  Ad = M(xC4S4, (A - C));
65  Bd = M(xC4S4, (B - D));
66 
67  Cd = A + C;
68  Dd = B + D;
69 
70  E = M(xC4S4, (ip[0 * 8] + ip[4 * 8]));
71  F = M(xC4S4, (ip[0 * 8] - ip[4 * 8]));
72 
73  G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]);
74  H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]);
75 
76  Ed = E - G;
77  Gd = E + G;
78 
79  Add = F + Ad;
80  Bdd = Bd - H;
81 
82  Fd = F - Ad;
83  Hd = Bd + H;
84 
85  /* Final sequence of operations over-write original inputs. */
86  ip[0 * 8] = Gd + Cd ;
87  ip[7 * 8] = Gd - Cd ;
88 
89  ip[1 * 8] = Add + Hd;
90  ip[2 * 8] = Add - Hd;
91 
92  ip[3 * 8] = Ed + Dd ;
93  ip[4 * 8] = Ed - Dd ;
94 
95  ip[5 * 8] = Fd + Bdd;
96  ip[6 * 8] = Fd - Bdd;
97  }
98 
99  ip += 1; /* next row */
100  }
101 
102  ip = input;
103 
104  for ( i = 0; i < 8; i++) {
105  /* Check for non-zero values (bitwise or faster than ||) */
106  if ( ip[1] | ip[2] | ip[3] |
107  ip[4] | ip[5] | ip[6] | ip[7] ) {
108 
109  A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
110  B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
111  C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
112  D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
113 
114  Ad = M(xC4S4, (A - C));
115  Bd = M(xC4S4, (B - D));
116 
117  Cd = A + C;
118  Dd = B + D;
119 
120  E = M(xC4S4, (ip[0] + ip[4])) + 8;
121  F = M(xC4S4, (ip[0] - ip[4])) + 8;
122 
123  if(type==1){ //HACK
124  E += 16*128;
125  F += 16*128;
126  }
127 
128  G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
129  H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
130 
131  Ed = E - G;
132  Gd = E + G;
133 
134  Add = F + Ad;
135  Bdd = Bd - H;
136 
137  Fd = F - Ad;
138  Hd = Bd + H;
139 
140  /* Final sequence of operations over-write original inputs. */
141  if (type == 1) {
142  dst[0*stride] = av_clip_uint8((Gd + Cd ) >> 4);
143  dst[7*stride] = av_clip_uint8((Gd - Cd ) >> 4);
144 
145  dst[1*stride] = av_clip_uint8((Add + Hd ) >> 4);
146  dst[2*stride] = av_clip_uint8((Add - Hd ) >> 4);
147 
148  dst[3*stride] = av_clip_uint8((Ed + Dd ) >> 4);
149  dst[4*stride] = av_clip_uint8((Ed - Dd ) >> 4);
150 
151  dst[5*stride] = av_clip_uint8((Fd + Bdd ) >> 4);
152  dst[6*stride] = av_clip_uint8((Fd - Bdd ) >> 4);
153  }else{
154  dst[0*stride] = av_clip_uint8(dst[0*stride] + ((Gd + Cd ) >> 4));
155  dst[7*stride] = av_clip_uint8(dst[7*stride] + ((Gd - Cd ) >> 4));
156 
157  dst[1*stride] = av_clip_uint8(dst[1*stride] + ((Add + Hd ) >> 4));
158  dst[2*stride] = av_clip_uint8(dst[2*stride] + ((Add - Hd ) >> 4));
159 
160  dst[3*stride] = av_clip_uint8(dst[3*stride] + ((Ed + Dd ) >> 4));
161  dst[4*stride] = av_clip_uint8(dst[4*stride] + ((Ed - Dd ) >> 4));
162 
163  dst[5*stride] = av_clip_uint8(dst[5*stride] + ((Fd + Bdd ) >> 4));
164  dst[6*stride] = av_clip_uint8(dst[6*stride] + ((Fd - Bdd ) >> 4));
165  }
166 
167  } else {
168  if (type == 1) {
169  dst[0*stride]=
170  dst[1*stride]=
171  dst[2*stride]=
172  dst[3*stride]=
173  dst[4*stride]=
174  dst[5*stride]=
175  dst[6*stride]=
176  dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift<<16))>>20));
177  }else{
178  if(ip[0]){
179  int v= ((xC4S4 * ip[0] + (IdctAdjustBeforeShift<<16))>>20);
180  dst[0*stride] = av_clip_uint8(dst[0*stride] + v);
181  dst[1*stride] = av_clip_uint8(dst[1*stride] + v);
182  dst[2*stride] = av_clip_uint8(dst[2*stride] + v);
183  dst[3*stride] = av_clip_uint8(dst[3*stride] + v);
184  dst[4*stride] = av_clip_uint8(dst[4*stride] + v);
185  dst[5*stride] = av_clip_uint8(dst[5*stride] + v);
186  dst[6*stride] = av_clip_uint8(dst[6*stride] + v);
187  dst[7*stride] = av_clip_uint8(dst[7*stride] + v);
188  }
189  }
190  }
191 
192  ip += 8; /* next column */
193  dst++;
194  }
195 }
196 
197 static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size,
198  int16_t *block/*align 16*/)
199 {
200  idct(dest, line_size, block, 1);
201  memset(block, 0, sizeof(*block) * 64);
202 }
203 
204 static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size,
205  int16_t *block/*align 16*/)
206 {
207  idct(dest, line_size, block, 2);
208  memset(block, 0, sizeof(*block) * 64);
209 }
210 
211 static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
212  int16_t *block/*align 16*/)
213 {
214  int i, dc = (block[0] + 15) >> 5;
215 
216  for(i = 0; i < 8; i++){
217  dest[0] = av_clip_uint8(dest[0] + dc);
218  dest[1] = av_clip_uint8(dest[1] + dc);
219  dest[2] = av_clip_uint8(dest[2] + dc);
220  dest[3] = av_clip_uint8(dest[3] + dc);
221  dest[4] = av_clip_uint8(dest[4] + dc);
222  dest[5] = av_clip_uint8(dest[5] + dc);
223  dest[6] = av_clip_uint8(dest[6] + dc);
224  dest[7] = av_clip_uint8(dest[7] + dc);
225  dest += line_size;
226  }
227  block[0] = 0;
228 }
229 
230 static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
231  int *bounding_values)
232 {
233  unsigned char *end;
234  int filter_value;
235  const int nstride= -stride;
236 
237  for (end= first_pixel + 8; first_pixel < end; first_pixel++) {
238  filter_value =
239  (first_pixel[2 * nstride] - first_pixel[ stride])
240  +3*(first_pixel[0 ] - first_pixel[nstride]);
241  filter_value = bounding_values[(filter_value + 4) >> 3];
242  first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value);
243  first_pixel[0] = av_clip_uint8(first_pixel[0] - filter_value);
244  }
245 }
246 
247 static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride,
248  int *bounding_values)
249 {
250  unsigned char *end;
251  int filter_value;
252 
253  for (end= first_pixel + 8*stride; first_pixel != end; first_pixel += stride) {
254  filter_value =
255  (first_pixel[-2] - first_pixel[ 1])
256  +3*(first_pixel[ 0] - first_pixel[-1]);
257  filter_value = bounding_values[(filter_value + 4) >> 3];
258  first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value);
259  first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
260  }
261 }
262 
263 static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1,
264  const uint8_t *src2, ptrdiff_t stride, int h)
265 {
266  int i;
267 
268  for (i = 0; i < h; i++) {
269  uint32_t a, b;
270 
271  a = AV_RN32(&src1[i * stride]);
272  b = AV_RN32(&src2[i * stride]);
273  AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
274  a = AV_RN32(&src1[i * stride + 4]);
275  b = AV_RN32(&src2[i * stride + 4]);
276  AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
277  }
278 }
279 
281 {
283 
289 
290  if (ARCH_ARM)
291  ff_vp3dsp_init_arm(c, flags);
292  if (ARCH_BFIN)
293  ff_vp3dsp_init_bfin(c, flags);
294  if (ARCH_PPC)
295  ff_vp3dsp_init_ppc(c, flags);
296  if (ARCH_X86)
297  ff_vp3dsp_init_x86(c, flags);
298 }
float v
#define C
#define ARCH_PPC
Definition: config.h:26
void(* put_no_rnd_pixels_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h)
Copy 8xH pixels from source to destination buffer using a bilinear filter with no rounding (i...
Definition: vp3dsp.h:36
#define B
Definition: dsputil.c:2025
#define ARCH_BFIN
Definition: config.h:20
#define xC4S4
Definition: vp3dsp.c:38
static void vp3_idct_put_c(uint8_t *dest, int line_size, int16_t *block)
Definition: vp3dsp.c:197
static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: vp3dsp.c:263
int stride
Definition: mace.c:144
#define AV_WN32A(p, v)
Definition: intreadwrite.h:530
Macro definitions for various function/variable attributes.
void(* idct_add)(uint8_t *dest, int line_size, int16_t *block)
Definition: vp3dsp.h:42
#define xC1S7
Definition: vp3dsp.c:35
av_cold void ff_vp3dsp_init_bfin(VP3DSPContext *c, int flags)
Definition: vp3_bfin.c:59
#define M(a, b)
Definition: vp3dsp.c:43
uint8_t
#define av_cold
Definition: attributes.h:78
#define H
Definition: swscale-test.c:342
#define b
Definition: input.c:42
end end
D(D(float, sse)
static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values)
Definition: vp3dsp.c:230
#define A(x)
#define xC6S2
Definition: vp3dsp.c:40
#define F
Definition: af_aformat.c:50
#define ARCH_X86
Definition: config.h:35
#define xC2S6
Definition: vp3dsp.c:36
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame This method is called when a frame is wanted on an output For an input
external API header
void(* idct_put)(uint8_t *dest, int line_size, int16_t *block)
Definition: vp3dsp.h:41
#define xC7S1
Definition: vp3dsp.c:41
static uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
Definition: rnd_avg.h:42
static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
Definition: vp3dsp.c:45
static int filter_value(int in, int rrp[8], int v[9])
#define ARCH_ARM
Definition: config.h:16
#define E
static void vp3_idct_dc_add_c(uint8_t *dest, int line_size, int16_t *block)
Definition: vp3dsp.c:211
dest
Definition: start.py:60
av_cold void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags)
void(* h_loop_filter)(uint8_t *src, int stride, int *bounding_values)
Definition: vp3dsp.h:45
FIXME Range Coding of cr are mx and my are Motion Vector top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff)*mv_scale Intra DC Predicton block[y][x] dc[1]
Definition: snow.txt:392
synthesis window for stochastic i
static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values)
Definition: vp3dsp.c:247
static void vp3_idct_add_c(uint8_t *dest, int line_size, int16_t *block)
Definition: vp3dsp.c:204
void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
Definition: vp3dsp_init.c:100
#define AV_RN32(p)
Definition: intreadwrite.h:356
#define type
static int flags
Definition: cpu.c:23
#define xC3S5
Definition: vp3dsp.c:37
common internal and external API header
static double c[64]
DSP utils.
av_cold void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags)
else dst[i][x+y *dst_stride[i]]
Definition: vf_mcdeint.c:160
#define G
Definition: dsputil.c:2026
void(* v_loop_filter)(uint8_t *src, int stride, int *bounding_values)
Definition: vp3dsp.h:44
void(* idct_dc_add)(uint8_t *dest, int line_size, int16_t *block)
Definition: vp3dsp.h:43
#define av_always_inline
Definition: attributes.h:41
#define IdctAdjustBeforeShift
Definition: vp3dsp.c:34
av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
Definition: vp3dsp.c:280
#define xC5S3
Definition: vp3dsp.c:39