rv40dsp.c
Go to the documentation of this file.
1 /*
2  * RV40 decoder motion compensation functions
3  * Copyright (c) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * RV40 decoder motion compensation functions
25  */
26 
27 #include "avcodec.h"
28 #include "h264qpel.h"
29 #include "rv34dsp.h"
30 #include "libavutil/avassert.h"
31 #include "libavutil/common.h"
32 
33 #define RV40_LOWPASS(OPNAME, OP) \
34 static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
35  const int h, const int C1, const int C2, const int SHIFT){\
36  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
37  int i;\
38  for(i = 0; i < h; i++)\
39  {\
40  OP(dst[0], (src[-2] + src[ 3] - 5*(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
41  OP(dst[1], (src[-1] + src[ 4] - 5*(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
42  OP(dst[2], (src[ 0] + src[ 5] - 5*(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
43  OP(dst[3], (src[ 1] + src[ 6] - 5*(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
44  OP(dst[4], (src[ 2] + src[ 7] - 5*(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
45  OP(dst[5], (src[ 3] + src[ 8] - 5*(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
46  OP(dst[6], (src[ 4] + src[ 9] - 5*(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
47  OP(dst[7], (src[ 5] + src[10] - 5*(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
48  dst += dstStride;\
49  src += srcStride;\
50  }\
51 }\
52 \
53 static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
54  const int w, const int C1, const int C2, const int SHIFT){\
55  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
56  int i;\
57  for(i = 0; i < w; i++)\
58  {\
59  const int srcB = src[-2*srcStride];\
60  const int srcA = src[-1*srcStride];\
61  const int src0 = src[0 *srcStride];\
62  const int src1 = src[1 *srcStride];\
63  const int src2 = src[2 *srcStride];\
64  const int src3 = src[3 *srcStride];\
65  const int src4 = src[4 *srcStride];\
66  const int src5 = src[5 *srcStride];\
67  const int src6 = src[6 *srcStride];\
68  const int src7 = src[7 *srcStride];\
69  const int src8 = src[8 *srcStride];\
70  const int src9 = src[9 *srcStride];\
71  const int src10 = src[10*srcStride];\
72  OP(dst[0*dstStride], (srcB + src3 - 5*(srcA+src2) + src0*C1 + src1*C2 + (1<<(SHIFT-1))) >> SHIFT);\
73  OP(dst[1*dstStride], (srcA + src4 - 5*(src0+src3) + src1*C1 + src2*C2 + (1<<(SHIFT-1))) >> SHIFT);\
74  OP(dst[2*dstStride], (src0 + src5 - 5*(src1+src4) + src2*C1 + src3*C2 + (1<<(SHIFT-1))) >> SHIFT);\
75  OP(dst[3*dstStride], (src1 + src6 - 5*(src2+src5) + src3*C1 + src4*C2 + (1<<(SHIFT-1))) >> SHIFT);\
76  OP(dst[4*dstStride], (src2 + src7 - 5*(src3+src6) + src4*C1 + src5*C2 + (1<<(SHIFT-1))) >> SHIFT);\
77  OP(dst[5*dstStride], (src3 + src8 - 5*(src4+src7) + src5*C1 + src6*C2 + (1<<(SHIFT-1))) >> SHIFT);\
78  OP(dst[6*dstStride], (src4 + src9 - 5*(src5+src8) + src6*C1 + src7*C2 + (1<<(SHIFT-1))) >> SHIFT);\
79  OP(dst[7*dstStride], (src5 + src10 - 5*(src6+src9) + src7*C1 + src8*C2 + (1<<(SHIFT-1))) >> SHIFT);\
80  dst++;\
81  src++;\
82  }\
83 }\
84 \
85 static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
86  const int w, const int C1, const int C2, const int SHIFT){\
87  OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
88  OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
89  src += 8*srcStride;\
90  dst += 8*dstStride;\
91  OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, w-8, C1, C2, SHIFT);\
92  OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\
93 }\
94 \
95 static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
96  const int h, const int C1, const int C2, const int SHIFT){\
97  OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
98  OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
99  src += 8*srcStride;\
100  dst += 8*dstStride;\
101  OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, h-8, C1, C2, SHIFT);\
102  OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, h-8, C1, C2, SHIFT);\
103 }\
104 \
105 
106 #define RV40_MC(OPNAME, SIZE) \
107 static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
108 {\
109  OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
110 }\
111 \
112 static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
113 {\
114  OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
115 }\
116 \
117 static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
118 {\
119  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
120 }\
121 \
122 static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
123 {\
124  uint8_t full[SIZE*(SIZE+5)];\
125  uint8_t * const full_mid = full + SIZE*2;\
126  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
127  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
128 }\
129 \
130 static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
131 {\
132  uint8_t full[SIZE*(SIZE+5)];\
133  uint8_t * const full_mid = full + SIZE*2;\
134  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
135  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
136 }\
137 \
138 static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
139 {\
140  uint8_t full[SIZE*(SIZE+5)];\
141  uint8_t * const full_mid = full + SIZE*2;\
142  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
143  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
144 }\
145 \
146 static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
147 {\
148  uint8_t full[SIZE*(SIZE+5)];\
149  uint8_t * const full_mid = full + SIZE*2;\
150  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
151  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
152 }\
153 \
154 static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
155 {\
156  uint8_t full[SIZE*(SIZE+5)];\
157  uint8_t * const full_mid = full + SIZE*2;\
158  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
159  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
160 }\
161 \
162 static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
163 {\
164  uint8_t full[SIZE*(SIZE+5)];\
165  uint8_t * const full_mid = full + SIZE*2;\
166  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
167  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
168 }\
169 \
170 static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
171 {\
172  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
173 }\
174 \
175 static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
176 {\
177  uint8_t full[SIZE*(SIZE+5)];\
178  uint8_t * const full_mid = full + SIZE*2;\
179  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
180  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
181 }\
182 \
183 static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
184 {\
185  uint8_t full[SIZE*(SIZE+5)];\
186  uint8_t * const full_mid = full + SIZE*2;\
187  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
188  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
189 }\
190 \
191 
192 #define op_avg(a, b) a = (((a)+cm[b]+1)>>1)
193 #define op_put(a, b) a = cm[b]
194 
195 RV40_LOWPASS(put_ , op_put)
196 RV40_LOWPASS(avg_ , op_avg)
197 
198 #undef op_avg
199 #undef op_put
200 
201 RV40_MC(put_, 8)
202 RV40_MC(put_, 16)
203 RV40_MC(avg_, 8)
204 RV40_MC(avg_, 16)
205 
206 static const int rv40_bias[4][4] = {
207  { 0, 16, 32, 16 },
208  { 32, 28, 32, 28 },
209  { 0, 32, 16, 32 },
210  { 32, 28, 32, 28 }
211 };
212 
213 #define RV40_CHROMA_MC(OPNAME, OP)\
214 static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
215  const int A = (8-x) * (8-y);\
216  const int B = ( x) * (8-y);\
217  const int C = (8-x) * ( y);\
218  const int D = ( x) * ( y);\
219  int i;\
220  int bias = rv40_bias[y>>1][x>>1];\
221  \
222  av_assert2(x<8 && y<8 && x>=0 && y>=0);\
223 \
224  if(D){\
225  for(i = 0; i < h; i++){\
226  OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
227  OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
228  OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
229  OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
230  dst += stride;\
231  src += stride;\
232  }\
233  }else{\
234  const int E = B + C;\
235  const int step = C ? stride : 1;\
236  for(i = 0; i < h; i++){\
237  OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
238  OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
239  OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
240  OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
241  dst += stride;\
242  src += stride;\
243  }\
244  }\
245 }\
246 \
247 static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
248  const int A = (8-x) * (8-y);\
249  const int B = ( x) * (8-y);\
250  const int C = (8-x) * ( y);\
251  const int D = ( x) * ( y);\
252  int i;\
253  int bias = rv40_bias[y>>1][x>>1];\
254  \
255  av_assert2(x<8 && y<8 && x>=0 && y>=0);\
256 \
257  if(D){\
258  for(i = 0; i < h; i++){\
259  OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
260  OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
261  OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
262  OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
263  OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + bias));\
264  OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + bias));\
265  OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + bias));\
266  OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + bias));\
267  dst += stride;\
268  src += stride;\
269  }\
270  }else{\
271  const int E = B + C;\
272  const int step = C ? stride : 1;\
273  for(i = 0; i < h; i++){\
274  OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
275  OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
276  OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
277  OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
278  OP(dst[4], (A*src[4] + E*src[step+4] + bias));\
279  OP(dst[5], (A*src[5] + E*src[step+5] + bias));\
280  OP(dst[6], (A*src[6] + E*src[step+6] + bias));\
281  OP(dst[7], (A*src[7] + E*src[step+7] + bias));\
282  dst += stride;\
283  src += stride;\
284  }\
285  }\
286 }
287 
288 #define op_avg(a, b) a = (((a)+((b)>>6)+1)>>1)
289 #define op_put(a, b) a = ((b)>>6)
290 
291 RV40_CHROMA_MC(put_, op_put)
292 RV40_CHROMA_MC(avg_, op_avg)
293 
294 #define RV40_WEIGHT_FUNC(size) \
295 static void rv40_weight_func_rnd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
296 {\
297  int i, j;\
298 \
299  for (j = 0; j < size; j++) {\
300  for (i = 0; i < size; i++)\
301  dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\
302  src1 += stride;\
303  src2 += stride;\
304  dst += stride;\
305  }\
306 }\
307 static void rv40_weight_func_nornd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
308 {\
309  int i, j;\
310 \
311  for (j = 0; j < size; j++) {\
312  for (i = 0; i < size; i++)\
313  dst[i] = (w2 * src1[i] + w1 * src2[i] + 0x10) >> 5;\
314  src1 += stride;\
315  src2 += stride;\
316  dst += stride;\
317  }\
318 }
319 
322 
323 /**
324  * dither values for deblocking filter - left/top values
325  */
326 static const uint8_t rv40_dither_l[16] = {
327  0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
328  0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
329 };
330 
331 /**
332  * dither values for deblocking filter - right/bottom values
333  */
334 static const uint8_t rv40_dither_r[16] = {
335  0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
336  0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
337 };
338 
339 #define CLIP_SYMM(a, b) av_clip(a, -(b), b)
340 /**
341  * weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
342  */
344  const int step,
345  const ptrdiff_t stride,
346  const int filter_p1,
347  const int filter_q1,
348  const int alpha,
349  const int beta,
350  const int lim_p0q0,
351  const int lim_q1,
352  const int lim_p1)
353 {
354  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
355  int i, t, u, diff;
356 
357  for (i = 0; i < 4; i++, src += stride) {
358  int diff_p1p0 = src[-2*step] - src[-1*step];
359  int diff_q1q0 = src[ 1*step] - src[ 0*step];
360  int diff_p1p2 = src[-2*step] - src[-3*step];
361  int diff_q1q2 = src[ 1*step] - src[ 2*step];
362 
363  t = src[0*step] - src[-1*step];
364  if (!t)
365  continue;
366 
367  u = (alpha * FFABS(t)) >> 7;
368  if (u > 3 - (filter_p1 && filter_q1))
369  continue;
370 
371  t <<= 2;
372  if (filter_p1 && filter_q1)
373  t += src[-2*step] - src[1*step];
374 
375  diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0);
376  src[-1*step] = cm[src[-1*step] + diff];
377  src[ 0*step] = cm[src[ 0*step] - diff];
378 
379  if (filter_p1 && FFABS(diff_p1p2) <= beta) {
380  t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
381  src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)];
382  }
383 
384  if (filter_q1 && FFABS(diff_q1q2) <= beta) {
385  t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
386  src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)];
387  }
388  }
389 }
390 
391 static void rv40_h_weak_loop_filter(uint8_t *src, const ptrdiff_t stride,
392  const int filter_p1, const int filter_q1,
393  const int alpha, const int beta,
394  const int lim_p0q0, const int lim_q1,
395  const int lim_p1)
396 {
397  rv40_weak_loop_filter(src, stride, 1, filter_p1, filter_q1,
398  alpha, beta, lim_p0q0, lim_q1, lim_p1);
399 }
400 
401 static void rv40_v_weak_loop_filter(uint8_t *src, const ptrdiff_t stride,
402  const int filter_p1, const int filter_q1,
403  const int alpha, const int beta,
404  const int lim_p0q0, const int lim_q1,
405  const int lim_p1)
406 {
407  rv40_weak_loop_filter(src, 1, stride, filter_p1, filter_q1,
408  alpha, beta, lim_p0q0, lim_q1, lim_p1);
409 }
410 
412  const int step,
413  const ptrdiff_t stride,
414  const int alpha,
415  const int lims,
416  const int dmode,
417  const int chroma)
418 {
419  int i;
420 
421  for(i = 0; i < 4; i++, src += stride){
422  int sflag, p0, q0, p1, q1;
423  int t = src[0*step] - src[-1*step];
424 
425  if (!t)
426  continue;
427 
428  sflag = (alpha * FFABS(t)) >> 7;
429  if (sflag > 1)
430  continue;
431 
432  p0 = (25*src[-3*step] + 26*src[-2*step] + 26*src[-1*step] +
433  26*src[ 0*step] + 25*src[ 1*step] +
434  rv40_dither_l[dmode + i]) >> 7;
435 
436  q0 = (25*src[-2*step] + 26*src[-1*step] + 26*src[ 0*step] +
437  26*src[ 1*step] + 25*src[ 2*step] +
438  rv40_dither_r[dmode + i]) >> 7;
439 
440  if (sflag) {
441  p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
442  q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
443  }
444 
445  p1 = (25*src[-4*step] + 26*src[-3*step] + 26*src[-2*step] + 26*p0 +
446  25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7;
447  q1 = (25*src[-1*step] + 26*q0 + 26*src[ 1*step] + 26*src[ 2*step] +
448  25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
449 
450  if (sflag) {
451  p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
452  q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
453  }
454 
455  src[-2*step] = p1;
456  src[-1*step] = p0;
457  src[ 0*step] = q0;
458  src[ 1*step] = q1;
459 
460  if(!chroma){
461  src[-3*step] = (25*src[-1*step] + 26*src[-2*step] +
462  51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
463  src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] +
464  51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
465  }
466  }
467 }
468 
469 static void rv40_h_strong_loop_filter(uint8_t *src, const ptrdiff_t stride,
470  const int alpha, const int lims,
471  const int dmode, const int chroma)
472 {
473  rv40_strong_loop_filter(src, stride, 1, alpha, lims, dmode, chroma);
474 }
475 
476 static void rv40_v_strong_loop_filter(uint8_t *src, const ptrdiff_t stride,
477  const int alpha, const int lims,
478  const int dmode, const int chroma)
479 {
480  rv40_strong_loop_filter(src, 1, stride, alpha, lims, dmode, chroma);
481 }
482 
484  int step, ptrdiff_t stride,
485  int beta, int beta2,
486  int edge,
487  int *p1, int *q1)
488 {
489  int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
490  int strong0 = 0, strong1 = 0;
491  uint8_t *ptr;
492  int i;
493 
494  for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
495  sum_p1p0 += ptr[-2*step] - ptr[-1*step];
496  sum_q1q0 += ptr[ 1*step] - ptr[ 0*step];
497  }
498 
499  *p1 = FFABS(sum_p1p0) < (beta << 2);
500  *q1 = FFABS(sum_q1q0) < (beta << 2);
501 
502  if(!*p1 && !*q1)
503  return 0;
504 
505  if (!edge)
506  return 0;
507 
508  for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
509  sum_p1p2 += ptr[-2*step] - ptr[-3*step];
510  sum_q1q2 += ptr[ 1*step] - ptr[ 2*step];
511  }
512 
513  strong0 = *p1 && (FFABS(sum_p1p2) < beta2);
514  strong1 = *q1 && (FFABS(sum_q1q2) < beta2);
515 
516  return strong0 && strong1;
517 }
518 
520  int beta, int beta2, int edge,
521  int *p1, int *q1)
522 {
523  return rv40_loop_filter_strength(src, stride, 1, beta, beta2, edge, p1, q1);
524 }
525 
527  int beta, int beta2, int edge,
528  int *p1, int *q1)
529 {
530  return rv40_loop_filter_strength(src, 1, stride, beta, beta2, edge, p1, q1);
531 }
532 
534 {
535  H264QpelContext qpel;
536 
537  ff_rv34dsp_init(c);
538  ff_h264qpel_init(&qpel, 8);
539 
540  c->put_pixels_tab[0][ 0] = qpel.put_h264_qpel_pixels_tab[0][0];
541  c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c;
542  c->put_pixels_tab[0][ 2] = qpel.put_h264_qpel_pixels_tab[0][2];
543  c->put_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c;
544  c->put_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c;
545  c->put_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c;
546  c->put_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c;
547  c->put_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c;
548  c->put_pixels_tab[0][ 8] = qpel.put_h264_qpel_pixels_tab[0][8];
549  c->put_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c;
550  c->put_pixels_tab[0][10] = put_rv40_qpel16_mc22_c;
551  c->put_pixels_tab[0][11] = put_rv40_qpel16_mc32_c;
552  c->put_pixels_tab[0][12] = put_rv40_qpel16_mc03_c;
553  c->put_pixels_tab[0][13] = put_rv40_qpel16_mc13_c;
554  c->put_pixels_tab[0][14] = put_rv40_qpel16_mc23_c;
556  c->avg_pixels_tab[0][ 0] = qpel.avg_h264_qpel_pixels_tab[0][0];
557  c->avg_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c;
558  c->avg_pixels_tab[0][ 2] = qpel.avg_h264_qpel_pixels_tab[0][2];
559  c->avg_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c;
560  c->avg_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c;
561  c->avg_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c;
562  c->avg_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c;
563  c->avg_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c;
564  c->avg_pixels_tab[0][ 8] = qpel.avg_h264_qpel_pixels_tab[0][8];
565  c->avg_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c;
566  c->avg_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c;
567  c->avg_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c;
568  c->avg_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c;
569  c->avg_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c;
570  c->avg_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c;
572  c->put_pixels_tab[1][ 0] = qpel.put_h264_qpel_pixels_tab[1][0];
573  c->put_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c;
574  c->put_pixels_tab[1][ 2] = qpel.put_h264_qpel_pixels_tab[1][2];
575  c->put_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c;
576  c->put_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c;
577  c->put_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c;
578  c->put_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c;
579  c->put_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c;
580  c->put_pixels_tab[1][ 8] = qpel.put_h264_qpel_pixels_tab[1][8];
581  c->put_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c;
582  c->put_pixels_tab[1][10] = put_rv40_qpel8_mc22_c;
583  c->put_pixels_tab[1][11] = put_rv40_qpel8_mc32_c;
584  c->put_pixels_tab[1][12] = put_rv40_qpel8_mc03_c;
585  c->put_pixels_tab[1][13] = put_rv40_qpel8_mc13_c;
586  c->put_pixels_tab[1][14] = put_rv40_qpel8_mc23_c;
588  c->avg_pixels_tab[1][ 0] = qpel.avg_h264_qpel_pixels_tab[1][0];
589  c->avg_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c;
590  c->avg_pixels_tab[1][ 2] = qpel.avg_h264_qpel_pixels_tab[1][2];
591  c->avg_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c;
592  c->avg_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c;
593  c->avg_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c;
594  c->avg_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c;
595  c->avg_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c;
596  c->avg_pixels_tab[1][ 8] = qpel.avg_h264_qpel_pixels_tab[1][8];
597  c->avg_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c;
598  c->avg_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c;
599  c->avg_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c;
600  c->avg_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c;
601  c->avg_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c;
602  c->avg_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c;
604 
605  c->put_chroma_pixels_tab[0] = put_rv40_chroma_mc8_c;
606  c->put_chroma_pixels_tab[1] = put_rv40_chroma_mc4_c;
607  c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c;
608  c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c;
609 
610  c->rv40_weight_pixels_tab[0][0] = rv40_weight_func_rnd_16;
611  c->rv40_weight_pixels_tab[0][1] = rv40_weight_func_rnd_8;
612  c->rv40_weight_pixels_tab[1][0] = rv40_weight_func_nornd_16;
613  c->rv40_weight_pixels_tab[1][1] = rv40_weight_func_nornd_8;
614 
621 
622  if (ARCH_X86)
624  if (ARCH_ARM)
626 }
qpel_mc_func put_pixels_tab[4][16]
Definition: rv34dsp.h:58
static void rv40_h_weak_loop_filter(uint8_t *src, const ptrdiff_t stride, const int filter_p1, const int filter_q1, const int alpha, const int beta, const int lim_p0q0, const int lim_q1, const int lim_p1)
Definition: rv40dsp.c:391
#define ff_cropTbl
static void rv40_h_strong_loop_filter(uint8_t *src, const ptrdiff_t stride, const int alpha, const int lims, const int dmode, const int chroma)
Definition: rv40dsp.c:469
rv40_loop_filter_strength_func rv40_loop_filter_strength[2]
Definition: rv34dsp.h:74
av_cold void ff_rv40dsp_init_arm(RV34DSPContext *c)
void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
#define MAX_NEG_CROP
Definition: dsputil.h:47
int stride
Definition: mace.c:144
static av_always_inline void rv40_strong_loop_filter(uint8_t *src, const int step, const ptrdiff_t stride, const int alpha, const int lims, const int dmode, const int chroma)
Definition: rv40dsp.c:411
uint8_t
#define av_cold
Definition: attributes.h:78
#define RV40_LOWPASS(OPNAME, OP)
Definition: rv40dsp.c:33
rv40_weak_loop_filter_func rv40_weak_loop_filter[2]
Definition: rv34dsp.h:72
rv40_weight_func rv40_weight_pixels_tab[2][2]
Biweight functions, first dimension is transform size (16/8), second is whether the weight is prescal...
Definition: rv34dsp.h:67
av_cold void ff_rv34dsp_init(RV34DSPContext *c)
Definition: rv34dsp.c:131
static av_always_inline int rv40_loop_filter_strength(uint8_t *src, int step, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
Definition: rv40dsp.c:483
qpel_mc_func avg_h264_qpel_pixels_tab[4][16]
Definition: h264qpel.h:29
#define cm
Definition: dvbsubdec.c:34
void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
static void rv40_v_strong_loop_filter(uint8_t *src, const ptrdiff_t stride, const int alpha, const int lims, const int dmode, const int chroma)
Definition: rv40dsp.c:476
static double alpha(void *priv, double x, double y)
Definition: vf_geq.c:86
qpel_mc_func avg_pixels_tab[4][16]
Definition: rv34dsp.h:59
RV30/40 decoder motion compensation functions.
void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
#define ARCH_X86
Definition: config.h:35
av_cold void ff_rv40dsp_init(RV34DSPContext *c)
Definition: rv40dsp.c:533
simple assert() macros that are a bit more flexible than ISO C assert().
#define RV40_MC(OPNAME, SIZE)
Definition: rv40dsp.c:106
static av_always_inline void rv40_weak_loop_filter(uint8_t *src, const int step, const ptrdiff_t stride, const int filter_p1, const int filter_q1, const int alpha, const int beta, const int lim_p0q0, const int lim_q1, const int lim_p1)
weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
Definition: rv40dsp.c:343
external API header
qpel_mc_func put_h264_qpel_pixels_tab[4][16]
Definition: h264qpel.h:28
#define ARCH_ARM
Definition: config.h:16
t
Definition: genspecsines3.m:6
#define op_put(a, b)
Definition: rv40dsp.c:289
static const uint8_t rv40_dither_r[16]
dither values for deblocking filter - right/bottom values
Definition: rv40dsp.c:334
#define FFABS(a)
Definition: common.h:53
static void rv40_v_weak_loop_filter(uint8_t *src, const ptrdiff_t stride, const int filter_p1, const int filter_q1, const int alpha, const int beta, const int lim_p0q0, const int lim_q1, const int lim_p1)
Definition: rv40dsp.c:401
float u
#define diff(a, as, b, bs)
Definition: vf_phase.c:80
#define op_avg(a, b)
Definition: rv40dsp.c:288
AVS_Value src
Definition: avisynth_c.h:523
#define CLIP_SYMM(a, b)
Definition: rv40dsp.c:339
synthesis window for stochastic i
static const int rv40_bias[4][4]
Definition: rv40dsp.c:206
#define RV40_WEIGHT_FUNC(size)
Definition: rv40dsp.c:294
void ff_rv40dsp_init_x86(RV34DSPContext *c)
Definition: rv40dsp_init.c:191
static const uint8_t rv40_dither_l[16]
dither values for deblocking filter - left/top values
Definition: rv40dsp.c:326
static int rv40_h_loop_filter_strength(uint8_t *src, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
Definition: rv40dsp.c:519
common internal and external API header
void ff_h264qpel_init(H264QpelContext *c, int bit_depth)
Definition: h264qpel.c:48
static double c[64]
rv40_strong_loop_filter_func rv40_strong_loop_filter[2]
Definition: rv34dsp.h:73
static int rv40_v_loop_filter_strength(uint8_t *src, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
Definition: rv40dsp.c:526
void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
h264_chroma_mc_func avg_chroma_pixels_tab[3]
Definition: rv34dsp.h:61
#define av_always_inline
Definition: attributes.h:41
h264_chroma_mc_func put_chroma_pixels_tab[3]
Definition: rv34dsp.h:60
#define RV40_CHROMA_MC(OPNAME, OP)
Definition: rv40dsp.c:213
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step