annotate ffmpeg/libpostproc/postprocess.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents f445c3017523
children
rev   line source
yading@11 1 /*
yading@11 2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
yading@11 3 *
yading@11 4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
yading@11 5 *
yading@11 6 * This file is part of FFmpeg.
yading@11 7 *
yading@11 8 * FFmpeg is free software; you can redistribute it and/or modify
yading@11 9 * it under the terms of the GNU General Public License as published by
yading@11 10 * the Free Software Foundation; either version 2 of the License, or
yading@11 11 * (at your option) any later version.
yading@11 12 *
yading@11 13 * FFmpeg is distributed in the hope that it will be useful,
yading@11 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@11 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
yading@11 16 * GNU General Public License for more details.
yading@11 17 *
yading@11 18 * You should have received a copy of the GNU General Public License
yading@11 19 * along with FFmpeg; if not, write to the Free Software
yading@11 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@11 21 */
yading@11 22
yading@11 23 /**
yading@11 24 * @file
yading@11 25 * postprocessing.
yading@11 26 */
yading@11 27
yading@11 28 /*
yading@11 29 C MMX MMX2 3DNow AltiVec
yading@11 30 isVertDC Ec Ec Ec
yading@11 31 isVertMinMaxOk Ec Ec Ec
yading@11 32 doVertLowPass E e e Ec
yading@11 33 doVertDefFilter Ec Ec e e Ec
yading@11 34 isHorizDC Ec Ec Ec
yading@11 35 isHorizMinMaxOk a E Ec
yading@11 36 doHorizLowPass E e e Ec
yading@11 37 doHorizDefFilter Ec Ec e e Ec
yading@11 38 do_a_deblock Ec E Ec E
yading@11 39 deRing E e e* Ecp
yading@11 40 Vertical RKAlgo1 E a a
yading@11 41 Horizontal RKAlgo1 a a
yading@11 42 Vertical X1# a E E
yading@11 43 Horizontal X1# a E E
yading@11 44 LinIpolDeinterlace e E E*
yading@11 45 CubicIpolDeinterlace a e e*
yading@11 46 LinBlendDeinterlace e E E*
yading@11 47 MedianDeinterlace# E Ec Ec
yading@11 48 TempDeNoiser# E e e Ec
yading@11 49
yading@11 50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
yading@11 51 # more or less selfinvented filters so the exactness is not too meaningful
yading@11 52 E = Exact implementation
yading@11 53 e = almost exact implementation (slightly different rounding,...)
yading@11 54 a = alternative / approximate impl
yading@11 55 c = checked against the other implementations (-vo md5)
yading@11 56 p = partially optimized, still some work to do
yading@11 57 */
yading@11 58
yading@11 59 /*
yading@11 60 TODO:
yading@11 61 reduce the time wasted on the mem transfer
yading@11 62 unroll stuff if instructions depend too much on the prior one
yading@11 63 move YScale thing to the end instead of fixing QP
yading@11 64 write a faster and higher quality deblocking filter :)
yading@11 65 make the mainloop more flexible (variable number of blocks at once
yading@11 66 (the if/else stuff per block is slowing things down)
yading@11 67 compare the quality & speed of all filters
yading@11 68 split this huge file
yading@11 69 optimize c versions
yading@11 70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
yading@11 71 ...
yading@11 72 */
yading@11 73
yading@11 74 //Changelog: use git log
yading@11 75
yading@11 76 #include "config.h"
yading@11 77 #include "libavutil/avutil.h"
yading@11 78 #include "libavutil/avassert.h"
yading@11 79 #include <inttypes.h>
yading@11 80 #include <stdio.h>
yading@11 81 #include <stdlib.h>
yading@11 82 #include <string.h>
yading@11 83 //#undef HAVE_MMXEXT_INLINE
yading@11 84 //#define HAVE_AMD3DNOW_INLINE
yading@11 85 //#undef HAVE_MMX_INLINE
yading@11 86 //#undef ARCH_X86
yading@11 87 //#define DEBUG_BRIGHTNESS
yading@11 88 #include "postprocess.h"
yading@11 89 #include "postprocess_internal.h"
yading@11 90 #include "libavutil/avstring.h"
yading@11 91
yading@11 92 unsigned postproc_version(void)
yading@11 93 {
yading@11 94 av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
yading@11 95 return LIBPOSTPROC_VERSION_INT;
yading@11 96 }
yading@11 97
yading@11 98 const char *postproc_configuration(void)
yading@11 99 {
yading@11 100 return FFMPEG_CONFIGURATION;
yading@11 101 }
yading@11 102
yading@11 103 const char *postproc_license(void)
yading@11 104 {
yading@11 105 #define LICENSE_PREFIX "libpostproc license: "
yading@11 106 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
yading@11 107 }
yading@11 108
yading@11 109 #if HAVE_ALTIVEC_H
yading@11 110 #include <altivec.h>
yading@11 111 #endif
yading@11 112
yading@11 113 #define GET_MODE_BUFFER_SIZE 500
yading@11 114 #define OPTIONS_ARRAY_SIZE 10
yading@11 115 #define BLOCK_SIZE 8
yading@11 116 #define TEMP_STRIDE 8
yading@11 117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
yading@11 118
yading@11 119 #if ARCH_X86 && HAVE_INLINE_ASM
yading@11 120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
yading@11 121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
yading@11 122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
yading@11 123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
yading@11 124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
yading@11 125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
yading@11 126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
yading@11 127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
yading@11 128 #endif
yading@11 129
yading@11 130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
yading@11 131
yading@11 132
yading@11 133 static const struct PPFilter filters[]=
yading@11 134 {
yading@11 135 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
yading@11 136 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
yading@11 137 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
yading@11 138 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
yading@11 139 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
yading@11 140 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
yading@11 141 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
yading@11 142 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
yading@11 143 {"dr", "dering", 1, 5, 6, DERING},
yading@11 144 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
yading@11 145 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
yading@11 146 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
yading@11 147 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
yading@11 148 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
yading@11 149 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
yading@11 150 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
yading@11 151 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
yading@11 152 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
yading@11 153 {"be", "bitexact", 1, 0, 0, BITEXACT},
yading@11 154 {NULL, NULL,0,0,0,0} //End Marker
yading@11 155 };
yading@11 156
yading@11 157 static const char *replaceTable[]=
yading@11 158 {
yading@11 159 "default", "hb:a,vb:a,dr:a",
yading@11 160 "de", "hb:a,vb:a,dr:a",
yading@11 161 "fast", "h1:a,v1:a,dr:a",
yading@11 162 "fa", "h1:a,v1:a,dr:a",
yading@11 163 "ac", "ha:a:128:7,va:a,dr:a",
yading@11 164 NULL //End Marker
yading@11 165 };
yading@11 166
yading@11 167
yading@11 168 #if ARCH_X86 && HAVE_INLINE_ASM
yading@11 169 static inline void prefetchnta(void *p)
yading@11 170 {
yading@11 171 __asm__ volatile( "prefetchnta (%0)\n\t"
yading@11 172 : : "r" (p)
yading@11 173 );
yading@11 174 }
yading@11 175
yading@11 176 static inline void prefetcht0(void *p)
yading@11 177 {
yading@11 178 __asm__ volatile( "prefetcht0 (%0)\n\t"
yading@11 179 : : "r" (p)
yading@11 180 );
yading@11 181 }
yading@11 182
yading@11 183 static inline void prefetcht1(void *p)
yading@11 184 {
yading@11 185 __asm__ volatile( "prefetcht1 (%0)\n\t"
yading@11 186 : : "r" (p)
yading@11 187 );
yading@11 188 }
yading@11 189
yading@11 190 static inline void prefetcht2(void *p)
yading@11 191 {
yading@11 192 __asm__ volatile( "prefetcht2 (%0)\n\t"
yading@11 193 : : "r" (p)
yading@11 194 );
yading@11 195 }
yading@11 196 #endif
yading@11 197
yading@11 198 /* The horizontal functions exist only in C because the MMX
yading@11 199 * code is faster with vertical filters and transposing. */
yading@11 200
yading@11 201 /**
yading@11 202 * Check if the given 8x8 Block is mostly "flat"
yading@11 203 */
yading@11 204 static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
yading@11 205 {
yading@11 206 int numEq= 0;
yading@11 207 int y;
yading@11 208 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
yading@11 209 const int dcThreshold= dcOffset*2 + 1;
yading@11 210
yading@11 211 for(y=0; y<BLOCK_SIZE; y++){
yading@11 212 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
yading@11 213 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
yading@11 214 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
yading@11 215 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
yading@11 216 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
yading@11 217 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
yading@11 218 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
yading@11 219 src+= stride;
yading@11 220 }
yading@11 221 return numEq > c->ppMode.flatnessThreshold;
yading@11 222 }
yading@11 223
yading@11 224 /**
yading@11 225 * Check if the middle 8x8 Block in the given 8x16 block is flat
yading@11 226 */
yading@11 227 static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
yading@11 228 {
yading@11 229 int numEq= 0;
yading@11 230 int y;
yading@11 231 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
yading@11 232 const int dcThreshold= dcOffset*2 + 1;
yading@11 233
yading@11 234 src+= stride*4; // src points to begin of the 8x8 Block
yading@11 235 for(y=0; y<BLOCK_SIZE-1; y++){
yading@11 236 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
yading@11 237 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
yading@11 238 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
yading@11 239 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
yading@11 240 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
yading@11 241 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
yading@11 242 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
yading@11 243 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
yading@11 244 src+= stride;
yading@11 245 }
yading@11 246 return numEq > c->ppMode.flatnessThreshold;
yading@11 247 }
yading@11 248
yading@11 249 static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
yading@11 250 {
yading@11 251 int i;
yading@11 252 for(i=0; i<2; i++){
yading@11 253 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
yading@11 254 src += stride;
yading@11 255 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
yading@11 256 src += stride;
yading@11 257 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
yading@11 258 src += stride;
yading@11 259 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
yading@11 260 src += stride;
yading@11 261 }
yading@11 262 return 1;
yading@11 263 }
yading@11 264
yading@11 265 static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
yading@11 266 {
yading@11 267 int x;
yading@11 268 src+= stride*4;
yading@11 269 for(x=0; x<BLOCK_SIZE; x+=4){
yading@11 270 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
yading@11 271 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
yading@11 272 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
yading@11 273 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
yading@11 274 }
yading@11 275 return 1;
yading@11 276 }
yading@11 277
yading@11 278 static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
yading@11 279 {
yading@11 280 if( isHorizDC_C(src, stride, c) ){
yading@11 281 if( isHorizMinMaxOk_C(src, stride, c->QP) )
yading@11 282 return 1;
yading@11 283 else
yading@11 284 return 0;
yading@11 285 }else{
yading@11 286 return 2;
yading@11 287 }
yading@11 288 }
yading@11 289
yading@11 290 static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
yading@11 291 {
yading@11 292 if( isVertDC_C(src, stride, c) ){
yading@11 293 if( isVertMinMaxOk_C(src, stride, c->QP) )
yading@11 294 return 1;
yading@11 295 else
yading@11 296 return 0;
yading@11 297 }else{
yading@11 298 return 2;
yading@11 299 }
yading@11 300 }
yading@11 301
yading@11 302 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
yading@11 303 {
yading@11 304 int y;
yading@11 305 for(y=0; y<BLOCK_SIZE; y++){
yading@11 306 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
yading@11 307
yading@11 308 if(FFABS(middleEnergy) < 8*c->QP){
yading@11 309 const int q=(dst[3] - dst[4])/2;
yading@11 310 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
yading@11 311 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
yading@11 312
yading@11 313 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
yading@11 314 d= FFMAX(d, 0);
yading@11 315
yading@11 316 d= (5*d + 32) >> 6;
yading@11 317 d*= FFSIGN(-middleEnergy);
yading@11 318
yading@11 319 if(q>0)
yading@11 320 {
yading@11 321 d= d<0 ? 0 : d;
yading@11 322 d= d>q ? q : d;
yading@11 323 }
yading@11 324 else
yading@11 325 {
yading@11 326 d= d>0 ? 0 : d;
yading@11 327 d= d<q ? q : d;
yading@11 328 }
yading@11 329
yading@11 330 dst[3]-= d;
yading@11 331 dst[4]+= d;
yading@11 332 }
yading@11 333 dst+= stride;
yading@11 334 }
yading@11 335 }
yading@11 336
yading@11 337 /**
yading@11 338 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
yading@11 339 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
yading@11 340 */
yading@11 341 static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
yading@11 342 {
yading@11 343 int y;
yading@11 344 for(y=0; y<BLOCK_SIZE; y++){
yading@11 345 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
yading@11 346 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
yading@11 347
yading@11 348 int sums[10];
yading@11 349 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
yading@11 350 sums[1] = sums[0] - first + dst[3];
yading@11 351 sums[2] = sums[1] - first + dst[4];
yading@11 352 sums[3] = sums[2] - first + dst[5];
yading@11 353 sums[4] = sums[3] - first + dst[6];
yading@11 354 sums[5] = sums[4] - dst[0] + dst[7];
yading@11 355 sums[6] = sums[5] - dst[1] + last;
yading@11 356 sums[7] = sums[6] - dst[2] + last;
yading@11 357 sums[8] = sums[7] - dst[3] + last;
yading@11 358 sums[9] = sums[8] - dst[4] + last;
yading@11 359
yading@11 360 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
yading@11 361 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
yading@11 362 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
yading@11 363 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
yading@11 364 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
yading@11 365 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
yading@11 366 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
yading@11 367 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
yading@11 368
yading@11 369 dst+= stride;
yading@11 370 }
yading@11 371 }
yading@11 372
yading@11 373 /**
yading@11 374 * Experimental Filter 1 (Horizontal)
yading@11 375 * will not damage linear gradients
yading@11 376 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
yading@11 377 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
yading@11 378 * MMX2 version does correct clipping C version does not
yading@11 379 * not identical with the vertical one
yading@11 380 */
yading@11 381 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
yading@11 382 {
yading@11 383 int y;
yading@11 384 static uint64_t lut[256];
yading@11 385 if(!lut[255])
yading@11 386 {
yading@11 387 int i;
yading@11 388 for(i=0; i<256; i++)
yading@11 389 {
yading@11 390 int v= i < 128 ? 2*i : 2*(i-256);
yading@11 391 /*
yading@11 392 //Simulate 112242211 9-Tap filter
yading@11 393 uint64_t a= (v/16) & 0xFF;
yading@11 394 uint64_t b= (v/8) & 0xFF;
yading@11 395 uint64_t c= (v/4) & 0xFF;
yading@11 396 uint64_t d= (3*v/8) & 0xFF;
yading@11 397 */
yading@11 398 //Simulate piecewise linear interpolation
yading@11 399 uint64_t a= (v/16) & 0xFF;
yading@11 400 uint64_t b= (v*3/16) & 0xFF;
yading@11 401 uint64_t c= (v*5/16) & 0xFF;
yading@11 402 uint64_t d= (7*v/16) & 0xFF;
yading@11 403 uint64_t A= (0x100 - a)&0xFF;
yading@11 404 uint64_t B= (0x100 - b)&0xFF;
yading@11 405 uint64_t C= (0x100 - c)&0xFF;
yading@11 406 uint64_t D= (0x100 - c)&0xFF;
yading@11 407
yading@11 408 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
yading@11 409 (D<<24) | (C<<16) | (B<<8) | (A);
yading@11 410 //lut[i] = (v<<32) | (v<<24);
yading@11 411 }
yading@11 412 }
yading@11 413
yading@11 414 for(y=0; y<BLOCK_SIZE; y++){
yading@11 415 int a= src[1] - src[2];
yading@11 416 int b= src[3] - src[4];
yading@11 417 int c= src[5] - src[6];
yading@11 418
yading@11 419 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
yading@11 420
yading@11 421 if(d < QP){
yading@11 422 int v = d * FFSIGN(-b);
yading@11 423
yading@11 424 src[1] +=v/8;
yading@11 425 src[2] +=v/4;
yading@11 426 src[3] +=3*v/8;
yading@11 427 src[4] -=3*v/8;
yading@11 428 src[5] -=v/4;
yading@11 429 src[6] -=v/8;
yading@11 430 }
yading@11 431 src+=stride;
yading@11 432 }
yading@11 433 }
yading@11 434
yading@11 435 /**
yading@11 436 * accurate deblock filter
yading@11 437 */
yading@11 438 static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
yading@11 439 int stride, const PPContext *c)
yading@11 440 {
yading@11 441 int y;
yading@11 442 const int QP= c->QP;
yading@11 443 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
yading@11 444 const int dcThreshold= dcOffset*2 + 1;
yading@11 445 //START_TIMER
yading@11 446 src+= step*4; // src points to begin of the 8x8 Block
yading@11 447 for(y=0; y<8; y++){
yading@11 448 int numEq= 0;
yading@11 449
yading@11 450 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
yading@11 451 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
yading@11 452 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
yading@11 453 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
yading@11 454 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
yading@11 455 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
yading@11 456 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
yading@11 457 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
yading@11 458 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
yading@11 459 if(numEq > c->ppMode.flatnessThreshold){
yading@11 460 int min, max, x;
yading@11 461
yading@11 462 if(src[0] > src[step]){
yading@11 463 max= src[0];
yading@11 464 min= src[step];
yading@11 465 }else{
yading@11 466 max= src[step];
yading@11 467 min= src[0];
yading@11 468 }
yading@11 469 for(x=2; x<8; x+=2){
yading@11 470 if(src[x*step] > src[(x+1)*step]){
yading@11 471 if(src[x *step] > max) max= src[ x *step];
yading@11 472 if(src[(x+1)*step] < min) min= src[(x+1)*step];
yading@11 473 }else{
yading@11 474 if(src[(x+1)*step] > max) max= src[(x+1)*step];
yading@11 475 if(src[ x *step] < min) min= src[ x *step];
yading@11 476 }
yading@11 477 }
yading@11 478 if(max-min < 2*QP){
yading@11 479 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
yading@11 480 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
yading@11 481
yading@11 482 int sums[10];
yading@11 483 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
yading@11 484 sums[1] = sums[0] - first + src[3*step];
yading@11 485 sums[2] = sums[1] - first + src[4*step];
yading@11 486 sums[3] = sums[2] - first + src[5*step];
yading@11 487 sums[4] = sums[3] - first + src[6*step];
yading@11 488 sums[5] = sums[4] - src[0*step] + src[7*step];
yading@11 489 sums[6] = sums[5] - src[1*step] + last;
yading@11 490 sums[7] = sums[6] - src[2*step] + last;
yading@11 491 sums[8] = sums[7] - src[3*step] + last;
yading@11 492 sums[9] = sums[8] - src[4*step] + last;
yading@11 493
yading@11 494 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
yading@11 495 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
yading@11 496 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
yading@11 497 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
yading@11 498 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
yading@11 499 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
yading@11 500 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
yading@11 501 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
yading@11 502 }
yading@11 503 }else{
yading@11 504 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
yading@11 505
yading@11 506 if(FFABS(middleEnergy) < 8*QP){
yading@11 507 const int q=(src[3*step] - src[4*step])/2;
yading@11 508 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
yading@11 509 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
yading@11 510
yading@11 511 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
yading@11 512 d= FFMAX(d, 0);
yading@11 513
yading@11 514 d= (5*d + 32) >> 6;
yading@11 515 d*= FFSIGN(-middleEnergy);
yading@11 516
yading@11 517 if(q>0){
yading@11 518 d= d<0 ? 0 : d;
yading@11 519 d= d>q ? q : d;
yading@11 520 }else{
yading@11 521 d= d>0 ? 0 : d;
yading@11 522 d= d<q ? q : d;
yading@11 523 }
yading@11 524
yading@11 525 src[3*step]-= d;
yading@11 526 src[4*step]+= d;
yading@11 527 }
yading@11 528 }
yading@11 529
yading@11 530 src += stride;
yading@11 531 }
yading@11 532 /*if(step==16){
yading@11 533 STOP_TIMER("step16")
yading@11 534 }else{
yading@11 535 STOP_TIMER("stepX")
yading@11 536 }*/
yading@11 537 }
yading@11 538
yading@11 539 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
yading@11 540 //Plain C versions
yading@11 541 //we always compile C for testing which needs bitexactness
yading@11 542 #define TEMPLATE_PP_C 1
yading@11 543 #include "postprocess_template.c"
yading@11 544
yading@11 545 #if HAVE_ALTIVEC
yading@11 546 # define TEMPLATE_PP_ALTIVEC 1
yading@11 547 # include "postprocess_altivec_template.c"
yading@11 548 # include "postprocess_template.c"
yading@11 549 #endif
yading@11 550
yading@11 551 #if ARCH_X86 && HAVE_INLINE_ASM
yading@11 552 # if CONFIG_RUNTIME_CPUDETECT
yading@11 553 # define TEMPLATE_PP_MMX 1
yading@11 554 # include "postprocess_template.c"
yading@11 555 # define TEMPLATE_PP_MMXEXT 1
yading@11 556 # include "postprocess_template.c"
yading@11 557 # define TEMPLATE_PP_3DNOW 1
yading@11 558 # include "postprocess_template.c"
yading@11 559 # define TEMPLATE_PP_SSE2 1
yading@11 560 # include "postprocess_template.c"
yading@11 561 # else
yading@11 562 # if HAVE_SSE2_INLINE
yading@11 563 # define TEMPLATE_PP_SSE2 1
yading@11 564 # include "postprocess_template.c"
yading@11 565 # elif HAVE_MMXEXT_INLINE
yading@11 566 # define TEMPLATE_PP_MMXEXT 1
yading@11 567 # include "postprocess_template.c"
yading@11 568 # elif HAVE_AMD3DNOW_INLINE
yading@11 569 # define TEMPLATE_PP_3DNOW 1
yading@11 570 # include "postprocess_template.c"
yading@11 571 # elif HAVE_MMX_INLINE
yading@11 572 # define TEMPLATE_PP_MMX 1
yading@11 573 # include "postprocess_template.c"
yading@11 574 # endif
yading@11 575 # endif
yading@11 576 #endif
yading@11 577
yading@11 578 typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
yading@11 579 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
yading@11 580
yading@11 581 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
yading@11 582 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
yading@11 583 {
yading@11 584 pp_fn pp = postProcess_C;
yading@11 585 PPContext *c= (PPContext *)vc;
yading@11 586 PPMode *ppMode= (PPMode *)vm;
yading@11 587 c->ppMode= *ppMode; //FIXME
yading@11 588
yading@11 589 if (!(ppMode->lumMode & BITEXACT)) {
yading@11 590 #if CONFIG_RUNTIME_CPUDETECT
yading@11 591 #if ARCH_X86 && HAVE_INLINE_ASM
yading@11 592 // ordered per speed fastest first
yading@11 593 if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
yading@11 594 else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
yading@11 595 else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
yading@11 596 else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
yading@11 597 #elif HAVE_ALTIVEC
yading@11 598 if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
yading@11 599 #endif
yading@11 600 #else /* CONFIG_RUNTIME_CPUDETECT */
yading@11 601 #if HAVE_SSE2_INLINE
yading@11 602 pp = postProcess_SSE2;
yading@11 603 #elif HAVE_MMXEXT_INLINE
yading@11 604 pp = postProcess_MMX2;
yading@11 605 #elif HAVE_AMD3DNOW_INLINE
yading@11 606 pp = postProcess_3DNow;
yading@11 607 #elif HAVE_MMX_INLINE
yading@11 608 pp = postProcess_MMX;
yading@11 609 #elif HAVE_ALTIVEC
yading@11 610 pp = postProcess_altivec;
yading@11 611 #endif
yading@11 612 #endif /* !CONFIG_RUNTIME_CPUDETECT */
yading@11 613 }
yading@11 614
yading@11 615 pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
yading@11 616 }
yading@11 617
yading@11 618 /* -pp Command line Help
yading@11 619 */
yading@11 620 const char pp_help[] =
yading@11 621 "Available postprocessing filters:\n"
yading@11 622 "Filters Options\n"
yading@11 623 "short long name short long option Description\n"
yading@11 624 "* * a autoq CPU power dependent enabler\n"
yading@11 625 " c chrom chrominance filtering enabled\n"
yading@11 626 " y nochrom chrominance filtering disabled\n"
yading@11 627 " n noluma luma filtering disabled\n"
yading@11 628 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
yading@11 629 " 1. difference factor: default=32, higher -> more deblocking\n"
yading@11 630 " 2. flatness threshold: default=39, lower -> more deblocking\n"
yading@11 631 " the h & v deblocking filters share these\n"
yading@11 632 " so you can't set different thresholds for h / v\n"
yading@11 633 "vb vdeblock (2 threshold) vertical deblocking filter\n"
yading@11 634 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
yading@11 635 "va vadeblock (2 threshold) vertical deblocking filter\n"
yading@11 636 "h1 x1hdeblock experimental h deblock filter 1\n"
yading@11 637 "v1 x1vdeblock experimental v deblock filter 1\n"
yading@11 638 "dr dering deringing filter\n"
yading@11 639 "al autolevels automatic brightness / contrast\n"
yading@11 640 " f fullyrange stretch luminance to (0..255)\n"
yading@11 641 "lb linblenddeint linear blend deinterlacer\n"
yading@11 642 "li linipoldeint linear interpolating deinterlace\n"
yading@11 643 "ci cubicipoldeint cubic interpolating deinterlacer\n"
yading@11 644 "md mediandeint median deinterlacer\n"
yading@11 645 "fd ffmpegdeint ffmpeg deinterlacer\n"
yading@11 646 "l5 lowpass5 FIR lowpass deinterlacer\n"
yading@11 647 "de default hb:a,vb:a,dr:a\n"
yading@11 648 "fa fast h1:a,v1:a,dr:a\n"
yading@11 649 "ac ha:a:128:7,va:a,dr:a\n"
yading@11 650 "tn tmpnoise (3 threshold) temporal noise reducer\n"
yading@11 651 " 1. <= 2. <= 3. larger -> stronger filtering\n"
yading@11 652 "fq forceQuant <quantizer> force quantizer\n"
yading@11 653 "Usage:\n"
yading@11 654 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
yading@11 655 "long form example:\n"
yading@11 656 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
yading@11 657 "short form example:\n"
yading@11 658 "vb:a/hb:a/lb de,-vb\n"
yading@11 659 "more examples:\n"
yading@11 660 "tn:64:128:256\n"
yading@11 661 "\n"
yading@11 662 ;
yading@11 663
yading@11 664 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
yading@11 665 {
yading@11 666 char temp[GET_MODE_BUFFER_SIZE];
yading@11 667 char *p= temp;
yading@11 668 static const char filterDelimiters[] = ",/";
yading@11 669 static const char optionDelimiters[] = ":|";
yading@11 670 struct PPMode *ppMode;
yading@11 671 char *filterToken;
yading@11 672
yading@11 673 if (!name) {
yading@11 674 av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
yading@11 675 return NULL;
yading@11 676 }
yading@11 677
yading@11 678 if (!strcmp(name, "help")) {
yading@11 679 const char *p;
yading@11 680 for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
yading@11 681 av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
yading@11 682 av_log(NULL, AV_LOG_INFO, "%s", temp);
yading@11 683 }
yading@11 684 return NULL;
yading@11 685 }
yading@11 686
yading@11 687 ppMode= av_malloc(sizeof(PPMode));
yading@11 688
yading@11 689 ppMode->lumMode= 0;
yading@11 690 ppMode->chromMode= 0;
yading@11 691 ppMode->maxTmpNoise[0]= 700;
yading@11 692 ppMode->maxTmpNoise[1]= 1500;
yading@11 693 ppMode->maxTmpNoise[2]= 3000;
yading@11 694 ppMode->maxAllowedY= 234;
yading@11 695 ppMode->minAllowedY= 16;
yading@11 696 ppMode->baseDcDiff= 256/8;
yading@11 697 ppMode->flatnessThreshold= 56-16-1;
yading@11 698 ppMode->maxClippedThreshold= 0.01;
yading@11 699 ppMode->error=0;
yading@11 700
yading@11 701 memset(temp, 0, GET_MODE_BUFFER_SIZE);
yading@11 702 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
yading@11 703
yading@11 704 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
yading@11 705
yading@11 706 for(;;){
yading@11 707 char *filterName;
yading@11 708 int q= 1000000; //PP_QUALITY_MAX;
yading@11 709 int chrom=-1;
yading@11 710 int luma=-1;
yading@11 711 char *option;
yading@11 712 char *options[OPTIONS_ARRAY_SIZE];
yading@11 713 int i;
yading@11 714 int filterNameOk=0;
yading@11 715 int numOfUnknownOptions=0;
yading@11 716 int enable=1; //does the user want us to enabled or disabled the filter
yading@11 717
yading@11 718 filterToken= strtok(p, filterDelimiters);
yading@11 719 if(filterToken == NULL) break;
yading@11 720 p+= strlen(filterToken) + 1; // p points to next filterToken
yading@11 721 filterName= strtok(filterToken, optionDelimiters);
yading@11 722 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
yading@11 723
yading@11 724 if(*filterName == '-'){
yading@11 725 enable=0;
yading@11 726 filterName++;
yading@11 727 }
yading@11 728
yading@11 729 for(;;){ //for all options
yading@11 730 option= strtok(NULL, optionDelimiters);
yading@11 731 if(option == NULL) break;
yading@11 732
yading@11 733 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
yading@11 734 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
yading@11 735 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
yading@11 736 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
yading@11 737 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
yading@11 738 else{
yading@11 739 options[numOfUnknownOptions] = option;
yading@11 740 numOfUnknownOptions++;
yading@11 741 }
yading@11 742 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
yading@11 743 }
yading@11 744 options[numOfUnknownOptions] = NULL;
yading@11 745
yading@11 746 /* replace stuff from the replace Table */
yading@11 747 for(i=0; replaceTable[2*i]!=NULL; i++){
yading@11 748 if(!strcmp(replaceTable[2*i], filterName)){
yading@11 749 int newlen= strlen(replaceTable[2*i + 1]);
yading@11 750 int plen;
yading@11 751 int spaceLeft;
yading@11 752
yading@11 753 p--, *p=',';
yading@11 754
yading@11 755 plen= strlen(p);
yading@11 756 spaceLeft= p - temp + plen;
yading@11 757 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
yading@11 758 ppMode->error++;
yading@11 759 break;
yading@11 760 }
yading@11 761 memmove(p + newlen, p, plen+1);
yading@11 762 memcpy(p, replaceTable[2*i + 1], newlen);
yading@11 763 filterNameOk=1;
yading@11 764 }
yading@11 765 }
yading@11 766
yading@11 767 for(i=0; filters[i].shortName!=NULL; i++){
yading@11 768 if( !strcmp(filters[i].longName, filterName)
yading@11 769 || !strcmp(filters[i].shortName, filterName)){
yading@11 770 ppMode->lumMode &= ~filters[i].mask;
yading@11 771 ppMode->chromMode &= ~filters[i].mask;
yading@11 772
yading@11 773 filterNameOk=1;
yading@11 774 if(!enable) break; // user wants to disable it
yading@11 775
yading@11 776 if(q >= filters[i].minLumQuality && luma)
yading@11 777 ppMode->lumMode|= filters[i].mask;
yading@11 778 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
yading@11 779 if(q >= filters[i].minChromQuality)
yading@11 780 ppMode->chromMode|= filters[i].mask;
yading@11 781
yading@11 782 if(filters[i].mask == LEVEL_FIX){
yading@11 783 int o;
yading@11 784 ppMode->minAllowedY= 16;
yading@11 785 ppMode->maxAllowedY= 234;
yading@11 786 for(o=0; options[o]!=NULL; o++){
yading@11 787 if( !strcmp(options[o],"fullyrange")
yading@11 788 ||!strcmp(options[o],"f")){
yading@11 789 ppMode->minAllowedY= 0;
yading@11 790 ppMode->maxAllowedY= 255;
yading@11 791 numOfUnknownOptions--;
yading@11 792 }
yading@11 793 }
yading@11 794 }
yading@11 795 else if(filters[i].mask == TEMP_NOISE_FILTER)
yading@11 796 {
yading@11 797 int o;
yading@11 798 int numOfNoises=0;
yading@11 799
yading@11 800 for(o=0; options[o]!=NULL; o++){
yading@11 801 char *tail;
yading@11 802 ppMode->maxTmpNoise[numOfNoises]=
yading@11 803 strtol(options[o], &tail, 0);
yading@11 804 if(tail!=options[o]){
yading@11 805 numOfNoises++;
yading@11 806 numOfUnknownOptions--;
yading@11 807 if(numOfNoises >= 3) break;
yading@11 808 }
yading@11 809 }
yading@11 810 }
yading@11 811 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
yading@11 812 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
yading@11 813 int o;
yading@11 814
yading@11 815 for(o=0; options[o]!=NULL && o<2; o++){
yading@11 816 char *tail;
yading@11 817 int val= strtol(options[o], &tail, 0);
yading@11 818 if(tail==options[o]) break;
yading@11 819
yading@11 820 numOfUnknownOptions--;
yading@11 821 if(o==0) ppMode->baseDcDiff= val;
yading@11 822 else ppMode->flatnessThreshold= val;
yading@11 823 }
yading@11 824 }
yading@11 825 else if(filters[i].mask == FORCE_QUANT){
yading@11 826 int o;
yading@11 827 ppMode->forcedQuant= 15;
yading@11 828
yading@11 829 for(o=0; options[o]!=NULL && o<1; o++){
yading@11 830 char *tail;
yading@11 831 int val= strtol(options[o], &tail, 0);
yading@11 832 if(tail==options[o]) break;
yading@11 833
yading@11 834 numOfUnknownOptions--;
yading@11 835 ppMode->forcedQuant= val;
yading@11 836 }
yading@11 837 }
yading@11 838 }
yading@11 839 }
yading@11 840 if(!filterNameOk) ppMode->error++;
yading@11 841 ppMode->error += numOfUnknownOptions;
yading@11 842 }
yading@11 843
yading@11 844 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
yading@11 845 if(ppMode->error){
yading@11 846 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
yading@11 847 av_free(ppMode);
yading@11 848 return NULL;
yading@11 849 }
yading@11 850 return ppMode;
yading@11 851 }
yading@11 852
yading@11 853 void pp_free_mode(pp_mode *mode){
yading@11 854 av_free(mode);
yading@11 855 }
yading@11 856
yading@11 857 static void reallocAlign(void **p, int alignment, int size){
yading@11 858 av_free(*p);
yading@11 859 *p= av_mallocz(size);
yading@11 860 }
yading@11 861
yading@11 862 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
yading@11 863 int mbWidth = (width+15)>>4;
yading@11 864 int mbHeight= (height+15)>>4;
yading@11 865 int i;
yading@11 866
yading@11 867 c->stride= stride;
yading@11 868 c->qpStride= qpStride;
yading@11 869
yading@11 870 reallocAlign((void **)&c->tempDst, 8, stride*24+32);
yading@11 871 reallocAlign((void **)&c->tempSrc, 8, stride*24);
yading@11 872 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
yading@11 873 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
yading@11 874 for(i=0; i<256; i++)
yading@11 875 c->yHistogram[i]= width*height/64*15/256;
yading@11 876
yading@11 877 for(i=0; i<3; i++){
yading@11 878 //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
yading@11 879 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
yading@11 880 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
yading@11 881 }
yading@11 882
yading@11 883 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
yading@11 884 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
yading@11 885 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
yading@11 886 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
yading@11 887 }
yading@11 888
yading@11 889 static const char * context_to_name(void * ptr) {
yading@11 890 return "postproc";
yading@11 891 }
yading@11 892
yading@11 893 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
yading@11 894
yading@11 895 pp_context *pp_get_context(int width, int height, int cpuCaps){
yading@11 896 PPContext *c= av_malloc(sizeof(PPContext));
yading@11 897 int stride= FFALIGN(width, 16); //assumed / will realloc if needed
yading@11 898 int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
yading@11 899
yading@11 900 memset(c, 0, sizeof(PPContext));
yading@11 901 c->av_class = &av_codec_context_class;
yading@11 902 if(cpuCaps&PP_FORMAT){
yading@11 903 c->hChromaSubSample= cpuCaps&0x3;
yading@11 904 c->vChromaSubSample= (cpuCaps>>4)&0x3;
yading@11 905 }else{
yading@11 906 c->hChromaSubSample= 1;
yading@11 907 c->vChromaSubSample= 1;
yading@11 908 }
yading@11 909 if (cpuCaps & PP_CPU_CAPS_AUTO) {
yading@11 910 c->cpuCaps = av_get_cpu_flags();
yading@11 911 } else {
yading@11 912 c->cpuCaps = 0;
yading@11 913 if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
yading@11 914 if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
yading@11 915 if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
yading@11 916 if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
yading@11 917 }
yading@11 918
yading@11 919 reallocBuffers(c, width, height, stride, qpStride);
yading@11 920
yading@11 921 c->frameNum=-1;
yading@11 922
yading@11 923 return c;
yading@11 924 }
yading@11 925
yading@11 926 void pp_free_context(void *vc){
yading@11 927 PPContext *c = (PPContext*)vc;
yading@11 928 int i;
yading@11 929
yading@11 930 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
yading@11 931 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
yading@11 932
yading@11 933 av_free(c->tempBlocks);
yading@11 934 av_free(c->yHistogram);
yading@11 935 av_free(c->tempDst);
yading@11 936 av_free(c->tempSrc);
yading@11 937 av_free(c->deintTemp);
yading@11 938 av_free(c->stdQPTable);
yading@11 939 av_free(c->nonBQPTable);
yading@11 940 av_free(c->forcedQPTable);
yading@11 941
yading@11 942 memset(c, 0, sizeof(PPContext));
yading@11 943
yading@11 944 av_free(c);
yading@11 945 }
yading@11 946
yading@11 947 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
yading@11 948 uint8_t * dst[3], const int dstStride[3],
yading@11 949 int width, int height,
yading@11 950 const QP_STORE_T *QP_store, int QPStride,
yading@11 951 pp_mode *vm, void *vc, int pict_type)
yading@11 952 {
yading@11 953 int mbWidth = (width+15)>>4;
yading@11 954 int mbHeight= (height+15)>>4;
yading@11 955 PPMode *mode = (PPMode*)vm;
yading@11 956 PPContext *c = (PPContext*)vc;
yading@11 957 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
yading@11 958 int absQPStride = FFABS(QPStride);
yading@11 959
yading@11 960 // c->stride and c->QPStride are always positive
yading@11 961 if(c->stride < minStride || c->qpStride < absQPStride)
yading@11 962 reallocBuffers(c, width, height,
yading@11 963 FFMAX(minStride, c->stride),
yading@11 964 FFMAX(c->qpStride, absQPStride));
yading@11 965
yading@11 966 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
yading@11 967 int i;
yading@11 968 QP_store= c->forcedQPTable;
yading@11 969 absQPStride = QPStride = 0;
yading@11 970 if(mode->lumMode & FORCE_QUANT)
yading@11 971 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
yading@11 972 else
yading@11 973 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
yading@11 974 }
yading@11 975
yading@11 976 if(pict_type & PP_PICT_TYPE_QP2){
yading@11 977 int i;
yading@11 978 const int count= mbHeight * absQPStride;
yading@11 979 for(i=0; i<(count>>2); i++){
yading@11 980 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
yading@11 981 }
yading@11 982 for(i<<=2; i<count; i++){
yading@11 983 c->stdQPTable[i] = QP_store[i]>>1;
yading@11 984 }
yading@11 985 QP_store= c->stdQPTable;
yading@11 986 QPStride= absQPStride;
yading@11 987 }
yading@11 988
yading@11 989 if(0){
yading@11 990 int x,y;
yading@11 991 for(y=0; y<mbHeight; y++){
yading@11 992 for(x=0; x<mbWidth; x++){
yading@11 993 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
yading@11 994 }
yading@11 995 av_log(c, AV_LOG_INFO, "\n");
yading@11 996 }
yading@11 997 av_log(c, AV_LOG_INFO, "\n");
yading@11 998 }
yading@11 999
yading@11 1000 if((pict_type&7)!=3){
yading@11 1001 if (QPStride >= 0){
yading@11 1002 int i;
yading@11 1003 const int count= mbHeight * QPStride;
yading@11 1004 for(i=0; i<(count>>2); i++){
yading@11 1005 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
yading@11 1006 }
yading@11 1007 for(i<<=2; i<count; i++){
yading@11 1008 c->nonBQPTable[i] = QP_store[i] & 0x3F;
yading@11 1009 }
yading@11 1010 } else {
yading@11 1011 int i,j;
yading@11 1012 for(i=0; i<mbHeight; i++) {
yading@11 1013 for(j=0; j<absQPStride; j++) {
yading@11 1014 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
yading@11 1015 }
yading@11 1016 }
yading@11 1017 }
yading@11 1018 }
yading@11 1019
yading@11 1020 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
yading@11 1021 mode->lumMode, mode->chromMode);
yading@11 1022
yading@11 1023 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
yading@11 1024 width, height, QP_store, QPStride, 0, mode, c);
yading@11 1025
yading@11 1026 width = (width )>>c->hChromaSubSample;
yading@11 1027 height = (height)>>c->vChromaSubSample;
yading@11 1028
yading@11 1029 if(mode->chromMode){
yading@11 1030 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
yading@11 1031 width, height, QP_store, QPStride, 1, mode, c);
yading@11 1032 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
yading@11 1033 width, height, QP_store, QPStride, 2, mode, c);
yading@11 1034 }
yading@11 1035 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
yading@11 1036 linecpy(dst[1], src[1], height, srcStride[1]);
yading@11 1037 linecpy(dst[2], src[2], height, srcStride[2]);
yading@11 1038 }else{
yading@11 1039 int y;
yading@11 1040 for(y=0; y<height; y++){
yading@11 1041 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
yading@11 1042 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
yading@11 1043 }
yading@11 1044 }
yading@11 1045 }