yading@11
|
1 /*
|
yading@11
|
2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
|
yading@11
|
3 *
|
yading@11
|
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
|
yading@11
|
5 *
|
yading@11
|
6 * This file is part of FFmpeg.
|
yading@11
|
7 *
|
yading@11
|
8 * FFmpeg is free software; you can redistribute it and/or modify
|
yading@11
|
9 * it under the terms of the GNU General Public License as published by
|
yading@11
|
10 * the Free Software Foundation; either version 2 of the License, or
|
yading@11
|
11 * (at your option) any later version.
|
yading@11
|
12 *
|
yading@11
|
13 * FFmpeg is distributed in the hope that it will be useful,
|
yading@11
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@11
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
yading@11
|
16 * GNU General Public License for more details.
|
yading@11
|
17 *
|
yading@11
|
18 * You should have received a copy of the GNU General Public License
|
yading@11
|
19 * along with FFmpeg; if not, write to the Free Software
|
yading@11
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@11
|
21 */
|
yading@11
|
22
|
yading@11
|
23 /**
|
yading@11
|
24 * @file
|
yading@11
|
25 * postprocessing.
|
yading@11
|
26 */
|
yading@11
|
27
|
yading@11
|
28 /*
|
yading@11
|
29 C MMX MMX2 3DNow AltiVec
|
yading@11
|
30 isVertDC Ec Ec Ec
|
yading@11
|
31 isVertMinMaxOk Ec Ec Ec
|
yading@11
|
32 doVertLowPass E e e Ec
|
yading@11
|
33 doVertDefFilter Ec Ec e e Ec
|
yading@11
|
34 isHorizDC Ec Ec Ec
|
yading@11
|
35 isHorizMinMaxOk a E Ec
|
yading@11
|
36 doHorizLowPass E e e Ec
|
yading@11
|
37 doHorizDefFilter Ec Ec e e Ec
|
yading@11
|
38 do_a_deblock Ec E Ec E
|
yading@11
|
39 deRing E e e* Ecp
|
yading@11
|
40 Vertical RKAlgo1 E a a
|
yading@11
|
41 Horizontal RKAlgo1 a a
|
yading@11
|
42 Vertical X1# a E E
|
yading@11
|
43 Horizontal X1# a E E
|
yading@11
|
44 LinIpolDeinterlace e E E*
|
yading@11
|
45 CubicIpolDeinterlace a e e*
|
yading@11
|
46 LinBlendDeinterlace e E E*
|
yading@11
|
47 MedianDeinterlace# E Ec Ec
|
yading@11
|
48 TempDeNoiser# E e e Ec
|
yading@11
|
49
|
yading@11
|
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
|
yading@11
|
51 # more or less selfinvented filters so the exactness is not too meaningful
|
yading@11
|
52 E = Exact implementation
|
yading@11
|
53 e = almost exact implementation (slightly different rounding,...)
|
yading@11
|
54 a = alternative / approximate impl
|
yading@11
|
55 c = checked against the other implementations (-vo md5)
|
yading@11
|
56 p = partially optimized, still some work to do
|
yading@11
|
57 */
|
yading@11
|
58
|
yading@11
|
59 /*
|
yading@11
|
60 TODO:
|
yading@11
|
61 reduce the time wasted on the mem transfer
|
yading@11
|
62 unroll stuff if instructions depend too much on the prior one
|
yading@11
|
63 move YScale thing to the end instead of fixing QP
|
yading@11
|
64 write a faster and higher quality deblocking filter :)
|
yading@11
|
65 make the mainloop more flexible (variable number of blocks at once
|
yading@11
|
66 (the if/else stuff per block is slowing things down)
|
yading@11
|
67 compare the quality & speed of all filters
|
yading@11
|
68 split this huge file
|
yading@11
|
69 optimize c versions
|
yading@11
|
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
|
yading@11
|
71 ...
|
yading@11
|
72 */
|
yading@11
|
73
|
yading@11
|
74 //Changelog: use git log
|
yading@11
|
75
|
yading@11
|
76 #include "config.h"
|
yading@11
|
77 #include "libavutil/avutil.h"
|
yading@11
|
78 #include "libavutil/avassert.h"
|
yading@11
|
79 #include <inttypes.h>
|
yading@11
|
80 #include <stdio.h>
|
yading@11
|
81 #include <stdlib.h>
|
yading@11
|
82 #include <string.h>
|
yading@11
|
83 //#undef HAVE_MMXEXT_INLINE
|
yading@11
|
84 //#define HAVE_AMD3DNOW_INLINE
|
yading@11
|
85 //#undef HAVE_MMX_INLINE
|
yading@11
|
86 //#undef ARCH_X86
|
yading@11
|
87 //#define DEBUG_BRIGHTNESS
|
yading@11
|
88 #include "postprocess.h"
|
yading@11
|
89 #include "postprocess_internal.h"
|
yading@11
|
90 #include "libavutil/avstring.h"
|
yading@11
|
91
|
yading@11
|
92 unsigned postproc_version(void)
|
yading@11
|
93 {
|
yading@11
|
94 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
|
yading@11
|
95 return LIBPOSTPROC_VERSION_INT;
|
yading@11
|
96 }
|
yading@11
|
97
|
yading@11
|
98 const char *postproc_configuration(void)
|
yading@11
|
99 {
|
yading@11
|
100 return FFMPEG_CONFIGURATION;
|
yading@11
|
101 }
|
yading@11
|
102
|
yading@11
|
103 const char *postproc_license(void)
|
yading@11
|
104 {
|
yading@11
|
105 #define LICENSE_PREFIX "libpostproc license: "
|
yading@11
|
106 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
|
yading@11
|
107 }
|
yading@11
|
108
|
yading@11
|
109 #if HAVE_ALTIVEC_H
|
yading@11
|
110 #include <altivec.h>
|
yading@11
|
111 #endif
|
yading@11
|
112
|
yading@11
|
113 #define GET_MODE_BUFFER_SIZE 500
|
yading@11
|
114 #define OPTIONS_ARRAY_SIZE 10
|
yading@11
|
115 #define BLOCK_SIZE 8
|
yading@11
|
116 #define TEMP_STRIDE 8
|
yading@11
|
117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
|
yading@11
|
118
|
yading@11
|
119 #if ARCH_X86 && HAVE_INLINE_ASM
|
yading@11
|
120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
|
yading@11
|
121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
|
yading@11
|
122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
|
yading@11
|
123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
|
yading@11
|
124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
|
yading@11
|
125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
|
yading@11
|
126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
|
yading@11
|
127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
|
yading@11
|
128 #endif
|
yading@11
|
129
|
yading@11
|
130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
|
yading@11
|
131
|
yading@11
|
132
|
yading@11
|
133 static const struct PPFilter filters[]=
|
yading@11
|
134 {
|
yading@11
|
135 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
|
yading@11
|
136 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
|
yading@11
|
137 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
|
yading@11
|
138 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
|
yading@11
|
139 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
|
yading@11
|
140 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
|
yading@11
|
141 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
|
yading@11
|
142 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
|
yading@11
|
143 {"dr", "dering", 1, 5, 6, DERING},
|
yading@11
|
144 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
|
yading@11
|
145 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
|
yading@11
|
146 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
|
yading@11
|
147 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
|
yading@11
|
148 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
|
yading@11
|
149 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
|
yading@11
|
150 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
|
yading@11
|
151 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
|
yading@11
|
152 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
|
yading@11
|
153 {"be", "bitexact", 1, 0, 0, BITEXACT},
|
yading@11
|
154 {NULL, NULL,0,0,0,0} //End Marker
|
yading@11
|
155 };
|
yading@11
|
156
|
yading@11
|
157 static const char *replaceTable[]=
|
yading@11
|
158 {
|
yading@11
|
159 "default", "hb:a,vb:a,dr:a",
|
yading@11
|
160 "de", "hb:a,vb:a,dr:a",
|
yading@11
|
161 "fast", "h1:a,v1:a,dr:a",
|
yading@11
|
162 "fa", "h1:a,v1:a,dr:a",
|
yading@11
|
163 "ac", "ha:a:128:7,va:a,dr:a",
|
yading@11
|
164 NULL //End Marker
|
yading@11
|
165 };
|
yading@11
|
166
|
yading@11
|
167
|
yading@11
|
168 #if ARCH_X86 && HAVE_INLINE_ASM
|
yading@11
|
169 static inline void prefetchnta(void *p)
|
yading@11
|
170 {
|
yading@11
|
171 __asm__ volatile( "prefetchnta (%0)\n\t"
|
yading@11
|
172 : : "r" (p)
|
yading@11
|
173 );
|
yading@11
|
174 }
|
yading@11
|
175
|
yading@11
|
176 static inline void prefetcht0(void *p)
|
yading@11
|
177 {
|
yading@11
|
178 __asm__ volatile( "prefetcht0 (%0)\n\t"
|
yading@11
|
179 : : "r" (p)
|
yading@11
|
180 );
|
yading@11
|
181 }
|
yading@11
|
182
|
yading@11
|
183 static inline void prefetcht1(void *p)
|
yading@11
|
184 {
|
yading@11
|
185 __asm__ volatile( "prefetcht1 (%0)\n\t"
|
yading@11
|
186 : : "r" (p)
|
yading@11
|
187 );
|
yading@11
|
188 }
|
yading@11
|
189
|
yading@11
|
190 static inline void prefetcht2(void *p)
|
yading@11
|
191 {
|
yading@11
|
192 __asm__ volatile( "prefetcht2 (%0)\n\t"
|
yading@11
|
193 : : "r" (p)
|
yading@11
|
194 );
|
yading@11
|
195 }
|
yading@11
|
196 #endif
|
yading@11
|
197
|
yading@11
|
198 /* The horizontal functions exist only in C because the MMX
|
yading@11
|
199 * code is faster with vertical filters and transposing. */
|
yading@11
|
200
|
yading@11
|
201 /**
|
yading@11
|
202 * Check if the given 8x8 Block is mostly "flat"
|
yading@11
|
203 */
|
yading@11
|
204 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
|
yading@11
|
205 {
|
yading@11
|
206 int numEq= 0;
|
yading@11
|
207 int y;
|
yading@11
|
208 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
|
yading@11
|
209 const int dcThreshold= dcOffset*2 + 1;
|
yading@11
|
210
|
yading@11
|
211 for(y=0; y<BLOCK_SIZE; y++){
|
yading@11
|
212 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
213 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
214 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
215 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
216 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
217 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
218 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
219 src+= stride;
|
yading@11
|
220 }
|
yading@11
|
221 return numEq > c->ppMode.flatnessThreshold;
|
yading@11
|
222 }
|
yading@11
|
223
|
yading@11
|
224 /**
|
yading@11
|
225 * Check if the middle 8x8 Block in the given 8x16 block is flat
|
yading@11
|
226 */
|
yading@11
|
227 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
|
yading@11
|
228 {
|
yading@11
|
229 int numEq= 0;
|
yading@11
|
230 int y;
|
yading@11
|
231 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
|
yading@11
|
232 const int dcThreshold= dcOffset*2 + 1;
|
yading@11
|
233
|
yading@11
|
234 src+= stride*4; // src points to begin of the 8x8 Block
|
yading@11
|
235 for(y=0; y<BLOCK_SIZE-1; y++){
|
yading@11
|
236 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
237 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
238 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
239 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
240 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
241 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
242 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
243 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
244 src+= stride;
|
yading@11
|
245 }
|
yading@11
|
246 return numEq > c->ppMode.flatnessThreshold;
|
yading@11
|
247 }
|
yading@11
|
248
|
yading@11
|
249 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
|
yading@11
|
250 {
|
yading@11
|
251 int i;
|
yading@11
|
252 for(i=0; i<2; i++){
|
yading@11
|
253 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
|
yading@11
|
254 src += stride;
|
yading@11
|
255 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
|
yading@11
|
256 src += stride;
|
yading@11
|
257 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
|
yading@11
|
258 src += stride;
|
yading@11
|
259 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
|
yading@11
|
260 src += stride;
|
yading@11
|
261 }
|
yading@11
|
262 return 1;
|
yading@11
|
263 }
|
yading@11
|
264
|
yading@11
|
265 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
|
yading@11
|
266 {
|
yading@11
|
267 int x;
|
yading@11
|
268 src+= stride*4;
|
yading@11
|
269 for(x=0; x<BLOCK_SIZE; x+=4){
|
yading@11
|
270 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
|
yading@11
|
271 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
|
yading@11
|
272 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
|
yading@11
|
273 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
|
yading@11
|
274 }
|
yading@11
|
275 return 1;
|
yading@11
|
276 }
|
yading@11
|
277
|
yading@11
|
278 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
|
yading@11
|
279 {
|
yading@11
|
280 if( isHorizDC_C(src, stride, c) ){
|
yading@11
|
281 if( isHorizMinMaxOk_C(src, stride, c->QP) )
|
yading@11
|
282 return 1;
|
yading@11
|
283 else
|
yading@11
|
284 return 0;
|
yading@11
|
285 }else{
|
yading@11
|
286 return 2;
|
yading@11
|
287 }
|
yading@11
|
288 }
|
yading@11
|
289
|
yading@11
|
290 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
|
yading@11
|
291 {
|
yading@11
|
292 if( isVertDC_C(src, stride, c) ){
|
yading@11
|
293 if( isVertMinMaxOk_C(src, stride, c->QP) )
|
yading@11
|
294 return 1;
|
yading@11
|
295 else
|
yading@11
|
296 return 0;
|
yading@11
|
297 }else{
|
yading@11
|
298 return 2;
|
yading@11
|
299 }
|
yading@11
|
300 }
|
yading@11
|
301
|
yading@11
|
302 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
|
yading@11
|
303 {
|
yading@11
|
304 int y;
|
yading@11
|
305 for(y=0; y<BLOCK_SIZE; y++){
|
yading@11
|
306 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
|
yading@11
|
307
|
yading@11
|
308 if(FFABS(middleEnergy) < 8*c->QP){
|
yading@11
|
309 const int q=(dst[3] - dst[4])/2;
|
yading@11
|
310 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
|
yading@11
|
311 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
|
yading@11
|
312
|
yading@11
|
313 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
|
yading@11
|
314 d= FFMAX(d, 0);
|
yading@11
|
315
|
yading@11
|
316 d= (5*d + 32) >> 6;
|
yading@11
|
317 d*= FFSIGN(-middleEnergy);
|
yading@11
|
318
|
yading@11
|
319 if(q>0)
|
yading@11
|
320 {
|
yading@11
|
321 d= d<0 ? 0 : d;
|
yading@11
|
322 d= d>q ? q : d;
|
yading@11
|
323 }
|
yading@11
|
324 else
|
yading@11
|
325 {
|
yading@11
|
326 d= d>0 ? 0 : d;
|
yading@11
|
327 d= d<q ? q : d;
|
yading@11
|
328 }
|
yading@11
|
329
|
yading@11
|
330 dst[3]-= d;
|
yading@11
|
331 dst[4]+= d;
|
yading@11
|
332 }
|
yading@11
|
333 dst+= stride;
|
yading@11
|
334 }
|
yading@11
|
335 }
|
yading@11
|
336
|
yading@11
|
337 /**
|
yading@11
|
338 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
|
yading@11
|
339 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
|
yading@11
|
340 */
|
yading@11
|
341 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
|
yading@11
|
342 {
|
yading@11
|
343 int y;
|
yading@11
|
344 for(y=0; y<BLOCK_SIZE; y++){
|
yading@11
|
345 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
|
yading@11
|
346 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
|
yading@11
|
347
|
yading@11
|
348 int sums[10];
|
yading@11
|
349 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
|
yading@11
|
350 sums[1] = sums[0] - first + dst[3];
|
yading@11
|
351 sums[2] = sums[1] - first + dst[4];
|
yading@11
|
352 sums[3] = sums[2] - first + dst[5];
|
yading@11
|
353 sums[4] = sums[3] - first + dst[6];
|
yading@11
|
354 sums[5] = sums[4] - dst[0] + dst[7];
|
yading@11
|
355 sums[6] = sums[5] - dst[1] + last;
|
yading@11
|
356 sums[7] = sums[6] - dst[2] + last;
|
yading@11
|
357 sums[8] = sums[7] - dst[3] + last;
|
yading@11
|
358 sums[9] = sums[8] - dst[4] + last;
|
yading@11
|
359
|
yading@11
|
360 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
|
yading@11
|
361 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
|
yading@11
|
362 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
|
yading@11
|
363 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
|
yading@11
|
364 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
|
yading@11
|
365 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
|
yading@11
|
366 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
|
yading@11
|
367 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
|
yading@11
|
368
|
yading@11
|
369 dst+= stride;
|
yading@11
|
370 }
|
yading@11
|
371 }
|
yading@11
|
372
|
yading@11
|
373 /**
|
yading@11
|
374 * Experimental Filter 1 (Horizontal)
|
yading@11
|
375 * will not damage linear gradients
|
yading@11
|
376 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
|
yading@11
|
377 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
|
yading@11
|
378 * MMX2 version does correct clipping C version does not
|
yading@11
|
379 * not identical with the vertical one
|
yading@11
|
380 */
|
yading@11
|
381 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
|
yading@11
|
382 {
|
yading@11
|
383 int y;
|
yading@11
|
384 static uint64_t lut[256];
|
yading@11
|
385 if(!lut[255])
|
yading@11
|
386 {
|
yading@11
|
387 int i;
|
yading@11
|
388 for(i=0; i<256; i++)
|
yading@11
|
389 {
|
yading@11
|
390 int v= i < 128 ? 2*i : 2*(i-256);
|
yading@11
|
391 /*
|
yading@11
|
392 //Simulate 112242211 9-Tap filter
|
yading@11
|
393 uint64_t a= (v/16) & 0xFF;
|
yading@11
|
394 uint64_t b= (v/8) & 0xFF;
|
yading@11
|
395 uint64_t c= (v/4) & 0xFF;
|
yading@11
|
396 uint64_t d= (3*v/8) & 0xFF;
|
yading@11
|
397 */
|
yading@11
|
398 //Simulate piecewise linear interpolation
|
yading@11
|
399 uint64_t a= (v/16) & 0xFF;
|
yading@11
|
400 uint64_t b= (v*3/16) & 0xFF;
|
yading@11
|
401 uint64_t c= (v*5/16) & 0xFF;
|
yading@11
|
402 uint64_t d= (7*v/16) & 0xFF;
|
yading@11
|
403 uint64_t A= (0x100 - a)&0xFF;
|
yading@11
|
404 uint64_t B= (0x100 - b)&0xFF;
|
yading@11
|
405 uint64_t C= (0x100 - c)&0xFF;
|
yading@11
|
406 uint64_t D= (0x100 - c)&0xFF;
|
yading@11
|
407
|
yading@11
|
408 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
|
yading@11
|
409 (D<<24) | (C<<16) | (B<<8) | (A);
|
yading@11
|
410 //lut[i] = (v<<32) | (v<<24);
|
yading@11
|
411 }
|
yading@11
|
412 }
|
yading@11
|
413
|
yading@11
|
414 for(y=0; y<BLOCK_SIZE; y++){
|
yading@11
|
415 int a= src[1] - src[2];
|
yading@11
|
416 int b= src[3] - src[4];
|
yading@11
|
417 int c= src[5] - src[6];
|
yading@11
|
418
|
yading@11
|
419 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
|
yading@11
|
420
|
yading@11
|
421 if(d < QP){
|
yading@11
|
422 int v = d * FFSIGN(-b);
|
yading@11
|
423
|
yading@11
|
424 src[1] +=v/8;
|
yading@11
|
425 src[2] +=v/4;
|
yading@11
|
426 src[3] +=3*v/8;
|
yading@11
|
427 src[4] -=3*v/8;
|
yading@11
|
428 src[5] -=v/4;
|
yading@11
|
429 src[6] -=v/8;
|
yading@11
|
430 }
|
yading@11
|
431 src+=stride;
|
yading@11
|
432 }
|
yading@11
|
433 }
|
yading@11
|
434
|
yading@11
|
435 /**
|
yading@11
|
436 * accurate deblock filter
|
yading@11
|
437 */
|
yading@11
|
438 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
|
yading@11
|
439 int stride, const PPContext *c)
|
yading@11
|
440 {
|
yading@11
|
441 int y;
|
yading@11
|
442 const int QP= c->QP;
|
yading@11
|
443 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
|
yading@11
|
444 const int dcThreshold= dcOffset*2 + 1;
|
yading@11
|
445 //START_TIMER
|
yading@11
|
446 src+= step*4; // src points to begin of the 8x8 Block
|
yading@11
|
447 for(y=0; y<8; y++){
|
yading@11
|
448 int numEq= 0;
|
yading@11
|
449
|
yading@11
|
450 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
451 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
452 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
453 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
454 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
455 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
456 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
457 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
458 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
|
yading@11
|
459 if(numEq > c->ppMode.flatnessThreshold){
|
yading@11
|
460 int min, max, x;
|
yading@11
|
461
|
yading@11
|
462 if(src[0] > src[step]){
|
yading@11
|
463 max= src[0];
|
yading@11
|
464 min= src[step];
|
yading@11
|
465 }else{
|
yading@11
|
466 max= src[step];
|
yading@11
|
467 min= src[0];
|
yading@11
|
468 }
|
yading@11
|
469 for(x=2; x<8; x+=2){
|
yading@11
|
470 if(src[x*step] > src[(x+1)*step]){
|
yading@11
|
471 if(src[x *step] > max) max= src[ x *step];
|
yading@11
|
472 if(src[(x+1)*step] < min) min= src[(x+1)*step];
|
yading@11
|
473 }else{
|
yading@11
|
474 if(src[(x+1)*step] > max) max= src[(x+1)*step];
|
yading@11
|
475 if(src[ x *step] < min) min= src[ x *step];
|
yading@11
|
476 }
|
yading@11
|
477 }
|
yading@11
|
478 if(max-min < 2*QP){
|
yading@11
|
479 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
|
yading@11
|
480 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
|
yading@11
|
481
|
yading@11
|
482 int sums[10];
|
yading@11
|
483 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
|
yading@11
|
484 sums[1] = sums[0] - first + src[3*step];
|
yading@11
|
485 sums[2] = sums[1] - first + src[4*step];
|
yading@11
|
486 sums[3] = sums[2] - first + src[5*step];
|
yading@11
|
487 sums[4] = sums[3] - first + src[6*step];
|
yading@11
|
488 sums[5] = sums[4] - src[0*step] + src[7*step];
|
yading@11
|
489 sums[6] = sums[5] - src[1*step] + last;
|
yading@11
|
490 sums[7] = sums[6] - src[2*step] + last;
|
yading@11
|
491 sums[8] = sums[7] - src[3*step] + last;
|
yading@11
|
492 sums[9] = sums[8] - src[4*step] + last;
|
yading@11
|
493
|
yading@11
|
494 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
|
yading@11
|
495 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
|
yading@11
|
496 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
|
yading@11
|
497 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
|
yading@11
|
498 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
|
yading@11
|
499 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
|
yading@11
|
500 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
|
yading@11
|
501 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
|
yading@11
|
502 }
|
yading@11
|
503 }else{
|
yading@11
|
504 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
|
yading@11
|
505
|
yading@11
|
506 if(FFABS(middleEnergy) < 8*QP){
|
yading@11
|
507 const int q=(src[3*step] - src[4*step])/2;
|
yading@11
|
508 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
|
yading@11
|
509 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
|
yading@11
|
510
|
yading@11
|
511 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
|
yading@11
|
512 d= FFMAX(d, 0);
|
yading@11
|
513
|
yading@11
|
514 d= (5*d + 32) >> 6;
|
yading@11
|
515 d*= FFSIGN(-middleEnergy);
|
yading@11
|
516
|
yading@11
|
517 if(q>0){
|
yading@11
|
518 d= d<0 ? 0 : d;
|
yading@11
|
519 d= d>q ? q : d;
|
yading@11
|
520 }else{
|
yading@11
|
521 d= d>0 ? 0 : d;
|
yading@11
|
522 d= d<q ? q : d;
|
yading@11
|
523 }
|
yading@11
|
524
|
yading@11
|
525 src[3*step]-= d;
|
yading@11
|
526 src[4*step]+= d;
|
yading@11
|
527 }
|
yading@11
|
528 }
|
yading@11
|
529
|
yading@11
|
530 src += stride;
|
yading@11
|
531 }
|
yading@11
|
532 /*if(step==16){
|
yading@11
|
533 STOP_TIMER("step16")
|
yading@11
|
534 }else{
|
yading@11
|
535 STOP_TIMER("stepX")
|
yading@11
|
536 }*/
|
yading@11
|
537 }
|
yading@11
|
538
|
yading@11
|
539 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
|
yading@11
|
540 //Plain C versions
|
yading@11
|
541 //we always compile C for testing which needs bitexactness
|
yading@11
|
542 #define TEMPLATE_PP_C 1
|
yading@11
|
543 #include "postprocess_template.c"
|
yading@11
|
544
|
yading@11
|
545 #if HAVE_ALTIVEC
|
yading@11
|
546 # define TEMPLATE_PP_ALTIVEC 1
|
yading@11
|
547 # include "postprocess_altivec_template.c"
|
yading@11
|
548 # include "postprocess_template.c"
|
yading@11
|
549 #endif
|
yading@11
|
550
|
yading@11
|
551 #if ARCH_X86 && HAVE_INLINE_ASM
|
yading@11
|
552 # if CONFIG_RUNTIME_CPUDETECT
|
yading@11
|
553 # define TEMPLATE_PP_MMX 1
|
yading@11
|
554 # include "postprocess_template.c"
|
yading@11
|
555 # define TEMPLATE_PP_MMXEXT 1
|
yading@11
|
556 # include "postprocess_template.c"
|
yading@11
|
557 # define TEMPLATE_PP_3DNOW 1
|
yading@11
|
558 # include "postprocess_template.c"
|
yading@11
|
559 # define TEMPLATE_PP_SSE2 1
|
yading@11
|
560 # include "postprocess_template.c"
|
yading@11
|
561 # else
|
yading@11
|
562 # if HAVE_SSE2_INLINE
|
yading@11
|
563 # define TEMPLATE_PP_SSE2 1
|
yading@11
|
564 # include "postprocess_template.c"
|
yading@11
|
565 # elif HAVE_MMXEXT_INLINE
|
yading@11
|
566 # define TEMPLATE_PP_MMXEXT 1
|
yading@11
|
567 # include "postprocess_template.c"
|
yading@11
|
568 # elif HAVE_AMD3DNOW_INLINE
|
yading@11
|
569 # define TEMPLATE_PP_3DNOW 1
|
yading@11
|
570 # include "postprocess_template.c"
|
yading@11
|
571 # elif HAVE_MMX_INLINE
|
yading@11
|
572 # define TEMPLATE_PP_MMX 1
|
yading@11
|
573 # include "postprocess_template.c"
|
yading@11
|
574 # endif
|
yading@11
|
575 # endif
|
yading@11
|
576 #endif
|
yading@11
|
577
|
yading@11
|
578 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
|
yading@11
|
579 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
|
yading@11
|
580
|
yading@11
|
581 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
|
yading@11
|
582 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
|
yading@11
|
583 {
|
yading@11
|
584 pp_fn pp = postProcess_C;
|
yading@11
|
585 PPContext *c= (PPContext *)vc;
|
yading@11
|
586 PPMode *ppMode= (PPMode *)vm;
|
yading@11
|
587 c->ppMode= *ppMode; //FIXME
|
yading@11
|
588
|
yading@11
|
589 if (!(ppMode->lumMode & BITEXACT)) {
|
yading@11
|
590 #if CONFIG_RUNTIME_CPUDETECT
|
yading@11
|
591 #if ARCH_X86 && HAVE_INLINE_ASM
|
yading@11
|
592 // ordered per speed fastest first
|
yading@11
|
593 if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
|
yading@11
|
594 else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
|
yading@11
|
595 else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
|
yading@11
|
596 else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
|
yading@11
|
597 #elif HAVE_ALTIVEC
|
yading@11
|
598 if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
|
yading@11
|
599 #endif
|
yading@11
|
600 #else /* CONFIG_RUNTIME_CPUDETECT */
|
yading@11
|
601 #if HAVE_SSE2_INLINE
|
yading@11
|
602 pp = postProcess_SSE2;
|
yading@11
|
603 #elif HAVE_MMXEXT_INLINE
|
yading@11
|
604 pp = postProcess_MMX2;
|
yading@11
|
605 #elif HAVE_AMD3DNOW_INLINE
|
yading@11
|
606 pp = postProcess_3DNow;
|
yading@11
|
607 #elif HAVE_MMX_INLINE
|
yading@11
|
608 pp = postProcess_MMX;
|
yading@11
|
609 #elif HAVE_ALTIVEC
|
yading@11
|
610 pp = postProcess_altivec;
|
yading@11
|
611 #endif
|
yading@11
|
612 #endif /* !CONFIG_RUNTIME_CPUDETECT */
|
yading@11
|
613 }
|
yading@11
|
614
|
yading@11
|
615 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
|
yading@11
|
616 }
|
yading@11
|
617
|
yading@11
|
618 /* -pp Command line Help
|
yading@11
|
619 */
|
yading@11
|
620 const char pp_help[] =
|
yading@11
|
621 "Available postprocessing filters:\n"
|
yading@11
|
622 "Filters Options\n"
|
yading@11
|
623 "short long name short long option Description\n"
|
yading@11
|
624 "* * a autoq CPU power dependent enabler\n"
|
yading@11
|
625 " c chrom chrominance filtering enabled\n"
|
yading@11
|
626 " y nochrom chrominance filtering disabled\n"
|
yading@11
|
627 " n noluma luma filtering disabled\n"
|
yading@11
|
628 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
|
yading@11
|
629 " 1. difference factor: default=32, higher -> more deblocking\n"
|
yading@11
|
630 " 2. flatness threshold: default=39, lower -> more deblocking\n"
|
yading@11
|
631 " the h & v deblocking filters share these\n"
|
yading@11
|
632 " so you can't set different thresholds for h / v\n"
|
yading@11
|
633 "vb vdeblock (2 threshold) vertical deblocking filter\n"
|
yading@11
|
634 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
|
yading@11
|
635 "va vadeblock (2 threshold) vertical deblocking filter\n"
|
yading@11
|
636 "h1 x1hdeblock experimental h deblock filter 1\n"
|
yading@11
|
637 "v1 x1vdeblock experimental v deblock filter 1\n"
|
yading@11
|
638 "dr dering deringing filter\n"
|
yading@11
|
639 "al autolevels automatic brightness / contrast\n"
|
yading@11
|
640 " f fullyrange stretch luminance to (0..255)\n"
|
yading@11
|
641 "lb linblenddeint linear blend deinterlacer\n"
|
yading@11
|
642 "li linipoldeint linear interpolating deinterlace\n"
|
yading@11
|
643 "ci cubicipoldeint cubic interpolating deinterlacer\n"
|
yading@11
|
644 "md mediandeint median deinterlacer\n"
|
yading@11
|
645 "fd ffmpegdeint ffmpeg deinterlacer\n"
|
yading@11
|
646 "l5 lowpass5 FIR lowpass deinterlacer\n"
|
yading@11
|
647 "de default hb:a,vb:a,dr:a\n"
|
yading@11
|
648 "fa fast h1:a,v1:a,dr:a\n"
|
yading@11
|
649 "ac ha:a:128:7,va:a,dr:a\n"
|
yading@11
|
650 "tn tmpnoise (3 threshold) temporal noise reducer\n"
|
yading@11
|
651 " 1. <= 2. <= 3. larger -> stronger filtering\n"
|
yading@11
|
652 "fq forceQuant <quantizer> force quantizer\n"
|
yading@11
|
653 "Usage:\n"
|
yading@11
|
654 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
|
yading@11
|
655 "long form example:\n"
|
yading@11
|
656 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
|
yading@11
|
657 "short form example:\n"
|
yading@11
|
658 "vb:a/hb:a/lb de,-vb\n"
|
yading@11
|
659 "more examples:\n"
|
yading@11
|
660 "tn:64:128:256\n"
|
yading@11
|
661 "\n"
|
yading@11
|
662 ;
|
yading@11
|
663
|
yading@11
|
664 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
|
yading@11
|
665 {
|
yading@11
|
666 char temp[GET_MODE_BUFFER_SIZE];
|
yading@11
|
667 char *p= temp;
|
yading@11
|
668 static const char filterDelimiters[] = ",/";
|
yading@11
|
669 static const char optionDelimiters[] = ":|";
|
yading@11
|
670 struct PPMode *ppMode;
|
yading@11
|
671 char *filterToken;
|
yading@11
|
672
|
yading@11
|
673 if (!name) {
|
yading@11
|
674 av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
|
yading@11
|
675 return NULL;
|
yading@11
|
676 }
|
yading@11
|
677
|
yading@11
|
678 if (!strcmp(name, "help")) {
|
yading@11
|
679 const char *p;
|
yading@11
|
680 for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
|
yading@11
|
681 av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
|
yading@11
|
682 av_log(NULL, AV_LOG_INFO, "%s", temp);
|
yading@11
|
683 }
|
yading@11
|
684 return NULL;
|
yading@11
|
685 }
|
yading@11
|
686
|
yading@11
|
687 ppMode= av_malloc(sizeof(PPMode));
|
yading@11
|
688
|
yading@11
|
689 ppMode->lumMode= 0;
|
yading@11
|
690 ppMode->chromMode= 0;
|
yading@11
|
691 ppMode->maxTmpNoise[0]= 700;
|
yading@11
|
692 ppMode->maxTmpNoise[1]= 1500;
|
yading@11
|
693 ppMode->maxTmpNoise[2]= 3000;
|
yading@11
|
694 ppMode->maxAllowedY= 234;
|
yading@11
|
695 ppMode->minAllowedY= 16;
|
yading@11
|
696 ppMode->baseDcDiff= 256/8;
|
yading@11
|
697 ppMode->flatnessThreshold= 56-16-1;
|
yading@11
|
698 ppMode->maxClippedThreshold= 0.01;
|
yading@11
|
699 ppMode->error=0;
|
yading@11
|
700
|
yading@11
|
701 memset(temp, 0, GET_MODE_BUFFER_SIZE);
|
yading@11
|
702 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
|
yading@11
|
703
|
yading@11
|
704 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
|
yading@11
|
705
|
yading@11
|
706 for(;;){
|
yading@11
|
707 char *filterName;
|
yading@11
|
708 int q= 1000000; //PP_QUALITY_MAX;
|
yading@11
|
709 int chrom=-1;
|
yading@11
|
710 int luma=-1;
|
yading@11
|
711 char *option;
|
yading@11
|
712 char *options[OPTIONS_ARRAY_SIZE];
|
yading@11
|
713 int i;
|
yading@11
|
714 int filterNameOk=0;
|
yading@11
|
715 int numOfUnknownOptions=0;
|
yading@11
|
716 int enable=1; //does the user want us to enabled or disabled the filter
|
yading@11
|
717
|
yading@11
|
718 filterToken= strtok(p, filterDelimiters);
|
yading@11
|
719 if(filterToken == NULL) break;
|
yading@11
|
720 p+= strlen(filterToken) + 1; // p points to next filterToken
|
yading@11
|
721 filterName= strtok(filterToken, optionDelimiters);
|
yading@11
|
722 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
|
yading@11
|
723
|
yading@11
|
724 if(*filterName == '-'){
|
yading@11
|
725 enable=0;
|
yading@11
|
726 filterName++;
|
yading@11
|
727 }
|
yading@11
|
728
|
yading@11
|
729 for(;;){ //for all options
|
yading@11
|
730 option= strtok(NULL, optionDelimiters);
|
yading@11
|
731 if(option == NULL) break;
|
yading@11
|
732
|
yading@11
|
733 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
|
yading@11
|
734 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
|
yading@11
|
735 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
|
yading@11
|
736 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
|
yading@11
|
737 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
|
yading@11
|
738 else{
|
yading@11
|
739 options[numOfUnknownOptions] = option;
|
yading@11
|
740 numOfUnknownOptions++;
|
yading@11
|
741 }
|
yading@11
|
742 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
|
yading@11
|
743 }
|
yading@11
|
744 options[numOfUnknownOptions] = NULL;
|
yading@11
|
745
|
yading@11
|
746 /* replace stuff from the replace Table */
|
yading@11
|
747 for(i=0; replaceTable[2*i]!=NULL; i++){
|
yading@11
|
748 if(!strcmp(replaceTable[2*i], filterName)){
|
yading@11
|
749 int newlen= strlen(replaceTable[2*i + 1]);
|
yading@11
|
750 int plen;
|
yading@11
|
751 int spaceLeft;
|
yading@11
|
752
|
yading@11
|
753 p--, *p=',';
|
yading@11
|
754
|
yading@11
|
755 plen= strlen(p);
|
yading@11
|
756 spaceLeft= p - temp + plen;
|
yading@11
|
757 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
|
yading@11
|
758 ppMode->error++;
|
yading@11
|
759 break;
|
yading@11
|
760 }
|
yading@11
|
761 memmove(p + newlen, p, plen+1);
|
yading@11
|
762 memcpy(p, replaceTable[2*i + 1], newlen);
|
yading@11
|
763 filterNameOk=1;
|
yading@11
|
764 }
|
yading@11
|
765 }
|
yading@11
|
766
|
yading@11
|
767 for(i=0; filters[i].shortName!=NULL; i++){
|
yading@11
|
768 if( !strcmp(filters[i].longName, filterName)
|
yading@11
|
769 || !strcmp(filters[i].shortName, filterName)){
|
yading@11
|
770 ppMode->lumMode &= ~filters[i].mask;
|
yading@11
|
771 ppMode->chromMode &= ~filters[i].mask;
|
yading@11
|
772
|
yading@11
|
773 filterNameOk=1;
|
yading@11
|
774 if(!enable) break; // user wants to disable it
|
yading@11
|
775
|
yading@11
|
776 if(q >= filters[i].minLumQuality && luma)
|
yading@11
|
777 ppMode->lumMode|= filters[i].mask;
|
yading@11
|
778 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
|
yading@11
|
779 if(q >= filters[i].minChromQuality)
|
yading@11
|
780 ppMode->chromMode|= filters[i].mask;
|
yading@11
|
781
|
yading@11
|
782 if(filters[i].mask == LEVEL_FIX){
|
yading@11
|
783 int o;
|
yading@11
|
784 ppMode->minAllowedY= 16;
|
yading@11
|
785 ppMode->maxAllowedY= 234;
|
yading@11
|
786 for(o=0; options[o]!=NULL; o++){
|
yading@11
|
787 if( !strcmp(options[o],"fullyrange")
|
yading@11
|
788 ||!strcmp(options[o],"f")){
|
yading@11
|
789 ppMode->minAllowedY= 0;
|
yading@11
|
790 ppMode->maxAllowedY= 255;
|
yading@11
|
791 numOfUnknownOptions--;
|
yading@11
|
792 }
|
yading@11
|
793 }
|
yading@11
|
794 }
|
yading@11
|
795 else if(filters[i].mask == TEMP_NOISE_FILTER)
|
yading@11
|
796 {
|
yading@11
|
797 int o;
|
yading@11
|
798 int numOfNoises=0;
|
yading@11
|
799
|
yading@11
|
800 for(o=0; options[o]!=NULL; o++){
|
yading@11
|
801 char *tail;
|
yading@11
|
802 ppMode->maxTmpNoise[numOfNoises]=
|
yading@11
|
803 strtol(options[o], &tail, 0);
|
yading@11
|
804 if(tail!=options[o]){
|
yading@11
|
805 numOfNoises++;
|
yading@11
|
806 numOfUnknownOptions--;
|
yading@11
|
807 if(numOfNoises >= 3) break;
|
yading@11
|
808 }
|
yading@11
|
809 }
|
yading@11
|
810 }
|
yading@11
|
811 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
|
yading@11
|
812 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
|
yading@11
|
813 int o;
|
yading@11
|
814
|
yading@11
|
815 for(o=0; options[o]!=NULL && o<2; o++){
|
yading@11
|
816 char *tail;
|
yading@11
|
817 int val= strtol(options[o], &tail, 0);
|
yading@11
|
818 if(tail==options[o]) break;
|
yading@11
|
819
|
yading@11
|
820 numOfUnknownOptions--;
|
yading@11
|
821 if(o==0) ppMode->baseDcDiff= val;
|
yading@11
|
822 else ppMode->flatnessThreshold= val;
|
yading@11
|
823 }
|
yading@11
|
824 }
|
yading@11
|
825 else if(filters[i].mask == FORCE_QUANT){
|
yading@11
|
826 int o;
|
yading@11
|
827 ppMode->forcedQuant= 15;
|
yading@11
|
828
|
yading@11
|
829 for(o=0; options[o]!=NULL && o<1; o++){
|
yading@11
|
830 char *tail;
|
yading@11
|
831 int val= strtol(options[o], &tail, 0);
|
yading@11
|
832 if(tail==options[o]) break;
|
yading@11
|
833
|
yading@11
|
834 numOfUnknownOptions--;
|
yading@11
|
835 ppMode->forcedQuant= val;
|
yading@11
|
836 }
|
yading@11
|
837 }
|
yading@11
|
838 }
|
yading@11
|
839 }
|
yading@11
|
840 if(!filterNameOk) ppMode->error++;
|
yading@11
|
841 ppMode->error += numOfUnknownOptions;
|
yading@11
|
842 }
|
yading@11
|
843
|
yading@11
|
844 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
|
yading@11
|
845 if(ppMode->error){
|
yading@11
|
846 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
|
yading@11
|
847 av_free(ppMode);
|
yading@11
|
848 return NULL;
|
yading@11
|
849 }
|
yading@11
|
850 return ppMode;
|
yading@11
|
851 }
|
yading@11
|
852
|
yading@11
|
853 void pp_free_mode(pp_mode *mode){
|
yading@11
|
854 av_free(mode);
|
yading@11
|
855 }
|
yading@11
|
856
|
yading@11
|
857 static void reallocAlign(void **p, int alignment, int size){
|
yading@11
|
858 av_free(*p);
|
yading@11
|
859 *p= av_mallocz(size);
|
yading@11
|
860 }
|
yading@11
|
861
|
yading@11
|
862 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
|
yading@11
|
863 int mbWidth = (width+15)>>4;
|
yading@11
|
864 int mbHeight= (height+15)>>4;
|
yading@11
|
865 int i;
|
yading@11
|
866
|
yading@11
|
867 c->stride= stride;
|
yading@11
|
868 c->qpStride= qpStride;
|
yading@11
|
869
|
yading@11
|
870 reallocAlign((void **)&c->tempDst, 8, stride*24+32);
|
yading@11
|
871 reallocAlign((void **)&c->tempSrc, 8, stride*24);
|
yading@11
|
872 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
|
yading@11
|
873 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
|
yading@11
|
874 for(i=0; i<256; i++)
|
yading@11
|
875 c->yHistogram[i]= width*height/64*15/256;
|
yading@11
|
876
|
yading@11
|
877 for(i=0; i<3; i++){
|
yading@11
|
878 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
|
yading@11
|
879 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
|
yading@11
|
880 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
|
yading@11
|
881 }
|
yading@11
|
882
|
yading@11
|
883 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
|
yading@11
|
884 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
|
yading@11
|
885 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
|
yading@11
|
886 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
|
yading@11
|
887 }
|
yading@11
|
888
|
yading@11
|
889 static const char * context_to_name(void * ptr) {
|
yading@11
|
890 return "postproc";
|
yading@11
|
891 }
|
yading@11
|
892
|
yading@11
|
893 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
|
yading@11
|
894
|
yading@11
|
895 pp_context *pp_get_context(int width, int height, int cpuCaps){
|
yading@11
|
896 PPContext *c= av_malloc(sizeof(PPContext));
|
yading@11
|
897 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
|
yading@11
|
898 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
|
yading@11
|
899
|
yading@11
|
900 memset(c, 0, sizeof(PPContext));
|
yading@11
|
901 c->av_class = &av_codec_context_class;
|
yading@11
|
902 if(cpuCaps&PP_FORMAT){
|
yading@11
|
903 c->hChromaSubSample= cpuCaps&0x3;
|
yading@11
|
904 c->vChromaSubSample= (cpuCaps>>4)&0x3;
|
yading@11
|
905 }else{
|
yading@11
|
906 c->hChromaSubSample= 1;
|
yading@11
|
907 c->vChromaSubSample= 1;
|
yading@11
|
908 }
|
yading@11
|
909 if (cpuCaps & PP_CPU_CAPS_AUTO) {
|
yading@11
|
910 c->cpuCaps = av_get_cpu_flags();
|
yading@11
|
911 } else {
|
yading@11
|
912 c->cpuCaps = 0;
|
yading@11
|
913 if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
|
yading@11
|
914 if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
|
yading@11
|
915 if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
|
yading@11
|
916 if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
|
yading@11
|
917 }
|
yading@11
|
918
|
yading@11
|
919 reallocBuffers(c, width, height, stride, qpStride);
|
yading@11
|
920
|
yading@11
|
921 c->frameNum=-1;
|
yading@11
|
922
|
yading@11
|
923 return c;
|
yading@11
|
924 }
|
yading@11
|
925
|
yading@11
|
926 void pp_free_context(void *vc){
|
yading@11
|
927 PPContext *c = (PPContext*)vc;
|
yading@11
|
928 int i;
|
yading@11
|
929
|
yading@11
|
930 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
|
yading@11
|
931 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
|
yading@11
|
932
|
yading@11
|
933 av_free(c->tempBlocks);
|
yading@11
|
934 av_free(c->yHistogram);
|
yading@11
|
935 av_free(c->tempDst);
|
yading@11
|
936 av_free(c->tempSrc);
|
yading@11
|
937 av_free(c->deintTemp);
|
yading@11
|
938 av_free(c->stdQPTable);
|
yading@11
|
939 av_free(c->nonBQPTable);
|
yading@11
|
940 av_free(c->forcedQPTable);
|
yading@11
|
941
|
yading@11
|
942 memset(c, 0, sizeof(PPContext));
|
yading@11
|
943
|
yading@11
|
944 av_free(c);
|
yading@11
|
945 }
|
yading@11
|
946
|
yading@11
|
947 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
|
yading@11
|
948 uint8_t * dst[3], const int dstStride[3],
|
yading@11
|
949 int width, int height,
|
yading@11
|
950 const QP_STORE_T *QP_store, int QPStride,
|
yading@11
|
951 pp_mode *vm, void *vc, int pict_type)
|
yading@11
|
952 {
|
yading@11
|
953 int mbWidth = (width+15)>>4;
|
yading@11
|
954 int mbHeight= (height+15)>>4;
|
yading@11
|
955 PPMode *mode = (PPMode*)vm;
|
yading@11
|
956 PPContext *c = (PPContext*)vc;
|
yading@11
|
957 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
|
yading@11
|
958 int absQPStride = FFABS(QPStride);
|
yading@11
|
959
|
yading@11
|
960 // c->stride and c->QPStride are always positive
|
yading@11
|
961 if(c->stride < minStride || c->qpStride < absQPStride)
|
yading@11
|
962 reallocBuffers(c, width, height,
|
yading@11
|
963 FFMAX(minStride, c->stride),
|
yading@11
|
964 FFMAX(c->qpStride, absQPStride));
|
yading@11
|
965
|
yading@11
|
966 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
|
yading@11
|
967 int i;
|
yading@11
|
968 QP_store= c->forcedQPTable;
|
yading@11
|
969 absQPStride = QPStride = 0;
|
yading@11
|
970 if(mode->lumMode & FORCE_QUANT)
|
yading@11
|
971 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
|
yading@11
|
972 else
|
yading@11
|
973 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
|
yading@11
|
974 }
|
yading@11
|
975
|
yading@11
|
976 if(pict_type & PP_PICT_TYPE_QP2){
|
yading@11
|
977 int i;
|
yading@11
|
978 const int count= mbHeight * absQPStride;
|
yading@11
|
979 for(i=0; i<(count>>2); i++){
|
yading@11
|
980 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
|
yading@11
|
981 }
|
yading@11
|
982 for(i<<=2; i<count; i++){
|
yading@11
|
983 c->stdQPTable[i] = QP_store[i]>>1;
|
yading@11
|
984 }
|
yading@11
|
985 QP_store= c->stdQPTable;
|
yading@11
|
986 QPStride= absQPStride;
|
yading@11
|
987 }
|
yading@11
|
988
|
yading@11
|
989 if(0){
|
yading@11
|
990 int x,y;
|
yading@11
|
991 for(y=0; y<mbHeight; y++){
|
yading@11
|
992 for(x=0; x<mbWidth; x++){
|
yading@11
|
993 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
|
yading@11
|
994 }
|
yading@11
|
995 av_log(c, AV_LOG_INFO, "\n");
|
yading@11
|
996 }
|
yading@11
|
997 av_log(c, AV_LOG_INFO, "\n");
|
yading@11
|
998 }
|
yading@11
|
999
|
yading@11
|
1000 if((pict_type&7)!=3){
|
yading@11
|
1001 if (QPStride >= 0){
|
yading@11
|
1002 int i;
|
yading@11
|
1003 const int count= mbHeight * QPStride;
|
yading@11
|
1004 for(i=0; i<(count>>2); i++){
|
yading@11
|
1005 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
|
yading@11
|
1006 }
|
yading@11
|
1007 for(i<<=2; i<count; i++){
|
yading@11
|
1008 c->nonBQPTable[i] = QP_store[i] & 0x3F;
|
yading@11
|
1009 }
|
yading@11
|
1010 } else {
|
yading@11
|
1011 int i,j;
|
yading@11
|
1012 for(i=0; i<mbHeight; i++) {
|
yading@11
|
1013 for(j=0; j<absQPStride; j++) {
|
yading@11
|
1014 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
|
yading@11
|
1015 }
|
yading@11
|
1016 }
|
yading@11
|
1017 }
|
yading@11
|
1018 }
|
yading@11
|
1019
|
yading@11
|
1020 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
|
yading@11
|
1021 mode->lumMode, mode->chromMode);
|
yading@11
|
1022
|
yading@11
|
1023 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
|
yading@11
|
1024 width, height, QP_store, QPStride, 0, mode, c);
|
yading@11
|
1025
|
yading@11
|
1026 width = (width )>>c->hChromaSubSample;
|
yading@11
|
1027 height = (height)>>c->vChromaSubSample;
|
yading@11
|
1028
|
yading@11
|
1029 if(mode->chromMode){
|
yading@11
|
1030 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
|
yading@11
|
1031 width, height, QP_store, QPStride, 1, mode, c);
|
yading@11
|
1032 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
|
yading@11
|
1033 width, height, QP_store, QPStride, 2, mode, c);
|
yading@11
|
1034 }
|
yading@11
|
1035 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
|
yading@11
|
1036 linecpy(dst[1], src[1], height, srcStride[1]);
|
yading@11
|
1037 linecpy(dst[2], src[2], height, srcStride[2]);
|
yading@11
|
1038 }else{
|
yading@11
|
1039 int y;
|
yading@11
|
1040 for(y=0; y<height; y++){
|
yading@11
|
1041 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
|
yading@11
|
1042 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
|
yading@11
|
1043 }
|
yading@11
|
1044 }
|
yading@11
|
1045 }
|