annotate ffmpeg/libavcodec/dct-test.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * (c) 2001 Fabrice Bellard
yading@10 3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
yading@10 4 *
yading@10 5 * This file is part of FFmpeg.
yading@10 6 *
yading@10 7 * FFmpeg is free software; you can redistribute it and/or
yading@10 8 * modify it under the terms of the GNU Lesser General Public
yading@10 9 * License as published by the Free Software Foundation; either
yading@10 10 * version 2.1 of the License, or (at your option) any later version.
yading@10 11 *
yading@10 12 * FFmpeg is distributed in the hope that it will be useful,
yading@10 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 15 * Lesser General Public License for more details.
yading@10 16 *
yading@10 17 * You should have received a copy of the GNU Lesser General Public
yading@10 18 * License along with FFmpeg; if not, write to the Free Software
yading@10 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 20 */
yading@10 21
yading@10 22 /**
yading@10 23 * @file
yading@10 24 * DCT test (c) 2001 Fabrice Bellard
yading@10 25 * Started from sample code by Juan J. Sierralta P.
yading@10 26 */
yading@10 27
yading@10 28 #include "config.h"
yading@10 29 #include <stdlib.h>
yading@10 30 #include <stdio.h>
yading@10 31 #include <string.h>
yading@10 32 #if HAVE_UNISTD_H
yading@10 33 #include <unistd.h>
yading@10 34 #endif
yading@10 35 #include <math.h>
yading@10 36
yading@10 37 #include "libavutil/cpu.h"
yading@10 38 #include "libavutil/common.h"
yading@10 39 #include "libavutil/lfg.h"
yading@10 40 #include "libavutil/time.h"
yading@10 41
yading@10 42 #include "dct.h"
yading@10 43 #include "simple_idct.h"
yading@10 44 #include "aandcttab.h"
yading@10 45 #include "faandct.h"
yading@10 46 #include "faanidct.h"
yading@10 47 #include "x86/idct_xvid.h"
yading@10 48 #include "dctref.h"
yading@10 49
yading@10 50 #undef printf
yading@10 51
yading@10 52 // BFIN
yading@10 53 void ff_bfin_idct(int16_t *block);
yading@10 54 void ff_bfin_fdct(int16_t *block);
yading@10 55
yading@10 56 // ALTIVEC
yading@10 57 void ff_fdct_altivec(int16_t *block);
yading@10 58
yading@10 59 // ARM
yading@10 60 void ff_j_rev_dct_arm(int16_t *data);
yading@10 61 void ff_simple_idct_arm(int16_t *data);
yading@10 62 void ff_simple_idct_armv5te(int16_t *data);
yading@10 63 void ff_simple_idct_armv6(int16_t *data);
yading@10 64 void ff_simple_idct_neon(int16_t *data);
yading@10 65
yading@10 66 void ff_simple_idct_axp(int16_t *data);
yading@10 67
yading@10 68 struct algo {
yading@10 69 const char *name;
yading@10 70 void (*func)(int16_t *block);
yading@10 71 enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
yading@10 72 SSE2_PERM, PARTTRANS_PERM, TRANSPOSE_PERM } format;
yading@10 73 int mm_support;
yading@10 74 int nonspec;
yading@10 75 };
yading@10 76
yading@10 77 static int cpu_flags;
yading@10 78
yading@10 79 static const struct algo fdct_tab[] = {
yading@10 80 { "REF-DBL", ff_ref_fdct, NO_PERM },
yading@10 81 { "FAAN", ff_faandct, NO_PERM },
yading@10 82 { "IJG-AAN-INT", ff_fdct_ifast, SCALE_PERM },
yading@10 83 { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM },
yading@10 84
yading@10 85 #if HAVE_MMX_INLINE
yading@10 86 { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
yading@10 87 { "MMXEXT", ff_fdct_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT },
yading@10 88 { "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 },
yading@10 89 #endif
yading@10 90
yading@10 91 #if HAVE_ALTIVEC
yading@10 92 { "altivecfdct", ff_fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC },
yading@10 93 #endif
yading@10 94
yading@10 95 #if ARCH_BFIN
yading@10 96 { "BFINfdct", ff_bfin_fdct, NO_PERM },
yading@10 97 #endif
yading@10 98
yading@10 99 { 0 }
yading@10 100 };
yading@10 101
yading@10 102 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
yading@10 103 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
yading@10 104 int16_t *block, int16_t *qmat);
yading@10 105
yading@10 106 static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){
yading@10 107 DECLARE_ALIGNED(16, static int16_t, qmat)[64];
yading@10 108 DECLARE_ALIGNED(16, static int16_t, tmp)[64];
yading@10 109 int i;
yading@10 110
yading@10 111 for(i=0; i<64; i++){
yading@10 112 qmat[i]=4;
yading@10 113 tmp[i]= dst[i];
yading@10 114 }
yading@10 115 ff_prores_idct_put_10_sse2(dst, 16, tmp, qmat);
yading@10 116 }
yading@10 117 #endif
yading@10 118
yading@10 119 static const struct algo idct_tab[] = {
yading@10 120 { "FAANI", ff_faanidct, NO_PERM },
yading@10 121 { "REF-DBL", ff_ref_idct, NO_PERM },
yading@10 122 { "INT", ff_j_rev_dct, MMX_PERM },
yading@10 123 { "SIMPLE-C", ff_simple_idct_8, NO_PERM },
yading@10 124
yading@10 125 #if HAVE_MMX_INLINE
yading@10 126 { "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
yading@10 127 { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
yading@10 128 { "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },
yading@10 129 { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
yading@10 130 #if ARCH_X86_64 && HAVE_YASM
yading@10 131 { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 },
yading@10 132 #endif
yading@10 133 #endif
yading@10 134
yading@10 135 #if ARCH_BFIN
yading@10 136 { "BFINidct", ff_bfin_idct, NO_PERM },
yading@10 137 #endif
yading@10 138
yading@10 139 #if ARCH_ARM
yading@10 140 { "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM },
yading@10 141 { "INT-ARM", ff_j_rev_dct_arm, MMX_PERM },
yading@10 142 #endif
yading@10 143 #if HAVE_ARMV5TE
yading@10 144 { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM, AV_CPU_FLAG_ARMV5TE },
yading@10 145 #endif
yading@10 146 #if HAVE_ARMV6
yading@10 147 { "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM, AV_CPU_FLAG_ARMV6 },
yading@10 148 #endif
yading@10 149 #if HAVE_NEON
yading@10 150 { "SIMPLE-NEON", ff_simple_idct_neon, PARTTRANS_PERM, AV_CPU_FLAG_NEON },
yading@10 151 #endif
yading@10 152
yading@10 153 #if ARCH_ALPHA
yading@10 154 { "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM },
yading@10 155 #endif
yading@10 156
yading@10 157 { 0 }
yading@10 158 };
yading@10 159
yading@10 160 #define AANSCALE_BITS 12
yading@10 161
yading@10 162 #define NB_ITS 20000
yading@10 163 #define NB_ITS_SPEED 50000
yading@10 164
yading@10 165 static short idct_mmx_perm[64];
yading@10 166
yading@10 167 static short idct_simple_mmx_perm[64] = {
yading@10 168 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
yading@10 169 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
yading@10 170 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
yading@10 171 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
yading@10 172 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
yading@10 173 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
yading@10 174 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
yading@10 175 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
yading@10 176 };
yading@10 177
yading@10 178 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
yading@10 179
yading@10 180 static void idct_mmx_init(void)
yading@10 181 {
yading@10 182 int i;
yading@10 183
yading@10 184 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
yading@10 185 for (i = 0; i < 64; i++) {
yading@10 186 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
yading@10 187 }
yading@10 188 }
yading@10 189
yading@10 190 DECLARE_ALIGNED(16, static int16_t, block)[64];
yading@10 191 DECLARE_ALIGNED(8, static int16_t, block1)[64];
yading@10 192
yading@10 193 static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
yading@10 194 {
yading@10 195 int i, j;
yading@10 196
yading@10 197 memset(block, 0, 64 * sizeof(*block));
yading@10 198
yading@10 199 switch (test) {
yading@10 200 case 0:
yading@10 201 for (i = 0; i < 64; i++)
yading@10 202 block[i] = (av_lfg_get(prng) % (2*vals)) -vals;
yading@10 203 if (is_idct) {
yading@10 204 ff_ref_fdct(block);
yading@10 205 for (i = 0; i < 64; i++)
yading@10 206 block[i] >>= 3;
yading@10 207 }
yading@10 208 break;
yading@10 209 case 1:
yading@10 210 j = av_lfg_get(prng) % 10 + 1;
yading@10 211 for (i = 0; i < j; i++) {
yading@10 212 int idx = av_lfg_get(prng) % 64;
yading@10 213 block[idx] = av_lfg_get(prng) % (2*vals) -vals;
yading@10 214 }
yading@10 215 break;
yading@10 216 case 2:
yading@10 217 block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);
yading@10 218 block[63] = (block[0] & 1) ^ 1;
yading@10 219 break;
yading@10 220 }
yading@10 221 }
yading@10 222
yading@10 223 static void permute(int16_t dst[64], const int16_t src[64], int perm)
yading@10 224 {
yading@10 225 int i;
yading@10 226
yading@10 227 if (perm == MMX_PERM) {
yading@10 228 for (i = 0; i < 64; i++)
yading@10 229 dst[idct_mmx_perm[i]] = src[i];
yading@10 230 } else if (perm == MMX_SIMPLE_PERM) {
yading@10 231 for (i = 0; i < 64; i++)
yading@10 232 dst[idct_simple_mmx_perm[i]] = src[i];
yading@10 233 } else if (perm == SSE2_PERM) {
yading@10 234 for (i = 0; i < 64; i++)
yading@10 235 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
yading@10 236 } else if (perm == PARTTRANS_PERM) {
yading@10 237 for (i = 0; i < 64; i++)
yading@10 238 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
yading@10 239 } else if (perm == TRANSPOSE_PERM) {
yading@10 240 for (i = 0; i < 64; i++)
yading@10 241 dst[(i>>3) | ((i<<3)&0x38)] = src[i];
yading@10 242 } else {
yading@10 243 for (i = 0; i < 64; i++)
yading@10 244 dst[i] = src[i];
yading@10 245 }
yading@10 246 }
yading@10 247
yading@10 248 static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
yading@10 249 {
yading@10 250 void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
yading@10 251 int it, i, scale;
yading@10 252 int err_inf, v;
yading@10 253 int64_t err2, ti, ti1, it1, err_sum = 0;
yading@10 254 int64_t sysErr[64], sysErrMax = 0;
yading@10 255 int maxout = 0;
yading@10 256 int blockSumErrMax = 0, blockSumErr;
yading@10 257 AVLFG prng;
yading@10 258 const int vals=1<<bits;
yading@10 259 double omse, ome;
yading@10 260 int spec_err;
yading@10 261
yading@10 262 av_lfg_init(&prng, 1);
yading@10 263
yading@10 264 err_inf = 0;
yading@10 265 err2 = 0;
yading@10 266 for (i = 0; i < 64; i++)
yading@10 267 sysErr[i] = 0;
yading@10 268 for (it = 0; it < NB_ITS; it++) {
yading@10 269 init_block(block1, test, is_idct, &prng, vals);
yading@10 270 permute(block, block1, dct->format);
yading@10 271
yading@10 272 dct->func(block);
yading@10 273 emms_c();
yading@10 274
yading@10 275 if (dct->format == SCALE_PERM) {
yading@10 276 for (i = 0; i < 64; i++) {
yading@10 277 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
yading@10 278 block[i] = (block[i] * scale) >> AANSCALE_BITS;
yading@10 279 }
yading@10 280 }
yading@10 281
yading@10 282 ref(block1);
yading@10 283
yading@10 284 blockSumErr = 0;
yading@10 285 for (i = 0; i < 64; i++) {
yading@10 286 int err = block[i] - block1[i];
yading@10 287 err_sum += err;
yading@10 288 v = abs(err);
yading@10 289 if (v > err_inf)
yading@10 290 err_inf = v;
yading@10 291 err2 += v * v;
yading@10 292 sysErr[i] += block[i] - block1[i];
yading@10 293 blockSumErr += v;
yading@10 294 if (abs(block[i]) > maxout)
yading@10 295 maxout = abs(block[i]);
yading@10 296 }
yading@10 297 if (blockSumErrMax < blockSumErr)
yading@10 298 blockSumErrMax = blockSumErr;
yading@10 299 }
yading@10 300 for (i = 0; i < 64; i++)
yading@10 301 sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
yading@10 302
yading@10 303 for (i = 0; i < 64; i++) {
yading@10 304 if (i % 8 == 0)
yading@10 305 printf("\n");
yading@10 306 printf("%7d ", (int) sysErr[i]);
yading@10 307 }
yading@10 308 printf("\n");
yading@10 309
yading@10 310 omse = (double) err2 / NB_ITS / 64;
yading@10 311 ome = (double) err_sum / NB_ITS / 64;
yading@10 312
yading@10 313 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
yading@10 314
yading@10 315 printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
yading@10 316 is_idct ? "IDCT" : "DCT", dct->name, err_inf,
yading@10 317 omse, ome, (double) sysErrMax / NB_ITS,
yading@10 318 maxout, blockSumErrMax);
yading@10 319
yading@10 320 if (spec_err && !dct->nonspec)
yading@10 321 return 1;
yading@10 322
yading@10 323 if (!speed)
yading@10 324 return 0;
yading@10 325
yading@10 326 /* speed test */
yading@10 327
yading@10 328 init_block(block, test, is_idct, &prng, vals);
yading@10 329 permute(block1, block, dct->format);
yading@10 330
yading@10 331 ti = av_gettime();
yading@10 332 it1 = 0;
yading@10 333 do {
yading@10 334 for (it = 0; it < NB_ITS_SPEED; it++) {
yading@10 335 memcpy(block, block1, sizeof(block));
yading@10 336 dct->func(block);
yading@10 337 }
yading@10 338 emms_c();
yading@10 339 it1 += NB_ITS_SPEED;
yading@10 340 ti1 = av_gettime() - ti;
yading@10 341 } while (ti1 < 1000000);
yading@10 342
yading@10 343 printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
yading@10 344 (double) it1 * 1000.0 / (double) ti1);
yading@10 345
yading@10 346 return 0;
yading@10 347 }
yading@10 348
yading@10 349 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
yading@10 350 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
yading@10 351
yading@10 352 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
yading@10 353 {
yading@10 354 static int init;
yading@10 355 static double c8[8][8];
yading@10 356 static double c4[4][4];
yading@10 357 double block1[64], block2[64], block3[64];
yading@10 358 double s, sum, v;
yading@10 359 int i, j, k;
yading@10 360
yading@10 361 if (!init) {
yading@10 362 init = 1;
yading@10 363
yading@10 364 for (i = 0; i < 8; i++) {
yading@10 365 sum = 0;
yading@10 366 for (j = 0; j < 8; j++) {
yading@10 367 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
yading@10 368 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
yading@10 369 sum += c8[i][j] * c8[i][j];
yading@10 370 }
yading@10 371 }
yading@10 372
yading@10 373 for (i = 0; i < 4; i++) {
yading@10 374 sum = 0;
yading@10 375 for (j = 0; j < 4; j++) {
yading@10 376 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
yading@10 377 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
yading@10 378 sum += c4[i][j] * c4[i][j];
yading@10 379 }
yading@10 380 }
yading@10 381 }
yading@10 382
yading@10 383 /* butterfly */
yading@10 384 s = 0.5 * sqrt(2.0);
yading@10 385 for (i = 0; i < 4; i++) {
yading@10 386 for (j = 0; j < 8; j++) {
yading@10 387 block1[8 * (2 * i) + j] =
yading@10 388 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
yading@10 389 block1[8 * (2 * i + 1) + j] =
yading@10 390 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
yading@10 391 }
yading@10 392 }
yading@10 393
yading@10 394 /* idct8 on lines */
yading@10 395 for (i = 0; i < 8; i++) {
yading@10 396 for (j = 0; j < 8; j++) {
yading@10 397 sum = 0;
yading@10 398 for (k = 0; k < 8; k++)
yading@10 399 sum += c8[k][j] * block1[8 * i + k];
yading@10 400 block2[8 * i + j] = sum;
yading@10 401 }
yading@10 402 }
yading@10 403
yading@10 404 /* idct4 */
yading@10 405 for (i = 0; i < 8; i++) {
yading@10 406 for (j = 0; j < 4; j++) {
yading@10 407 /* top */
yading@10 408 sum = 0;
yading@10 409 for (k = 0; k < 4; k++)
yading@10 410 sum += c4[k][j] * block2[8 * (2 * k) + i];
yading@10 411 block3[8 * (2 * j) + i] = sum;
yading@10 412
yading@10 413 /* bottom */
yading@10 414 sum = 0;
yading@10 415 for (k = 0; k < 4; k++)
yading@10 416 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
yading@10 417 block3[8 * (2 * j + 1) + i] = sum;
yading@10 418 }
yading@10 419 }
yading@10 420
yading@10 421 /* clamp and store the result */
yading@10 422 for (i = 0; i < 8; i++) {
yading@10 423 for (j = 0; j < 8; j++) {
yading@10 424 v = block3[8 * i + j];
yading@10 425 if (v < 0) v = 0;
yading@10 426 else if (v > 255) v = 255;
yading@10 427 dest[i * linesize + j] = (int) rint(v);
yading@10 428 }
yading@10 429 }
yading@10 430 }
yading@10 431
yading@10 432 static void idct248_error(const char *name,
yading@10 433 void (*idct248_put)(uint8_t *dest, int line_size,
yading@10 434 int16_t *block),
yading@10 435 int speed)
yading@10 436 {
yading@10 437 int it, i, it1, ti, ti1, err_max, v;
yading@10 438 AVLFG prng;
yading@10 439
yading@10 440 av_lfg_init(&prng, 1);
yading@10 441
yading@10 442 /* just one test to see if code is correct (precision is less
yading@10 443 important here) */
yading@10 444 err_max = 0;
yading@10 445 for (it = 0; it < NB_ITS; it++) {
yading@10 446 /* XXX: use forward transform to generate values */
yading@10 447 for (i = 0; i < 64; i++)
yading@10 448 block1[i] = av_lfg_get(&prng) % 256 - 128;
yading@10 449 block1[0] += 1024;
yading@10 450
yading@10 451 for (i = 0; i < 64; i++)
yading@10 452 block[i] = block1[i];
yading@10 453 idct248_ref(img_dest1, 8, block);
yading@10 454
yading@10 455 for (i = 0; i < 64; i++)
yading@10 456 block[i] = block1[i];
yading@10 457 idct248_put(img_dest, 8, block);
yading@10 458
yading@10 459 for (i = 0; i < 64; i++) {
yading@10 460 v = abs((int) img_dest[i] - (int) img_dest1[i]);
yading@10 461 if (v == 255)
yading@10 462 printf("%d %d\n", img_dest[i], img_dest1[i]);
yading@10 463 if (v > err_max)
yading@10 464 err_max = v;
yading@10 465 }
yading@10 466 #if 0
yading@10 467 printf("ref=\n");
yading@10 468 for(i=0;i<8;i++) {
yading@10 469 int j;
yading@10 470 for(j=0;j<8;j++) {
yading@10 471 printf(" %3d", img_dest1[i*8+j]);
yading@10 472 }
yading@10 473 printf("\n");
yading@10 474 }
yading@10 475
yading@10 476 printf("out=\n");
yading@10 477 for(i=0;i<8;i++) {
yading@10 478 int j;
yading@10 479 for(j=0;j<8;j++) {
yading@10 480 printf(" %3d", img_dest[i*8+j]);
yading@10 481 }
yading@10 482 printf("\n");
yading@10 483 }
yading@10 484 #endif
yading@10 485 }
yading@10 486 printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
yading@10 487
yading@10 488 if (!speed)
yading@10 489 return;
yading@10 490
yading@10 491 ti = av_gettime();
yading@10 492 it1 = 0;
yading@10 493 do {
yading@10 494 for (it = 0; it < NB_ITS_SPEED; it++) {
yading@10 495 for (i = 0; i < 64; i++)
yading@10 496 block[i] = block1[i];
yading@10 497 idct248_put(img_dest, 8, block);
yading@10 498 }
yading@10 499 emms_c();
yading@10 500 it1 += NB_ITS_SPEED;
yading@10 501 ti1 = av_gettime() - ti;
yading@10 502 } while (ti1 < 1000000);
yading@10 503
yading@10 504 printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
yading@10 505 (double) it1 * 1000.0 / (double) ti1);
yading@10 506 }
yading@10 507
yading@10 508 static void help(void)
yading@10 509 {
yading@10 510 printf("dct-test [-i] [<test-number>] [<bits>]\n"
yading@10 511 "test-number 0 -> test with random matrixes\n"
yading@10 512 " 1 -> test with random sparse matrixes\n"
yading@10 513 " 2 -> do 3. test from mpeg4 std\n"
yading@10 514 "bits Number of time domain bits to use, 8 is default\n"
yading@10 515 "-i test IDCT implementations\n"
yading@10 516 "-4 test IDCT248 implementations\n"
yading@10 517 "-t speed test\n");
yading@10 518 }
yading@10 519
yading@10 520 #if !HAVE_GETOPT
yading@10 521 #include "compat/getopt.c"
yading@10 522 #endif
yading@10 523
yading@10 524 int main(int argc, char **argv)
yading@10 525 {
yading@10 526 int test_idct = 0, test_248_dct = 0;
yading@10 527 int c, i;
yading@10 528 int test = 1;
yading@10 529 int speed = 0;
yading@10 530 int err = 0;
yading@10 531 int bits=8;
yading@10 532
yading@10 533 cpu_flags = av_get_cpu_flags();
yading@10 534
yading@10 535 ff_ref_dct_init();
yading@10 536 idct_mmx_init();
yading@10 537
yading@10 538 for (;;) {
yading@10 539 c = getopt(argc, argv, "ih4t");
yading@10 540 if (c == -1)
yading@10 541 break;
yading@10 542 switch (c) {
yading@10 543 case 'i':
yading@10 544 test_idct = 1;
yading@10 545 break;
yading@10 546 case '4':
yading@10 547 test_248_dct = 1;
yading@10 548 break;
yading@10 549 case 't':
yading@10 550 speed = 1;
yading@10 551 break;
yading@10 552 default:
yading@10 553 case 'h':
yading@10 554 help();
yading@10 555 return 0;
yading@10 556 }
yading@10 557 }
yading@10 558
yading@10 559 if (optind < argc)
yading@10 560 test = atoi(argv[optind]);
yading@10 561 if(optind+1 < argc) bits= atoi(argv[optind+1]);
yading@10 562
yading@10 563 printf("ffmpeg DCT/IDCT test\n");
yading@10 564
yading@10 565 if (test_248_dct) {
yading@10 566 idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
yading@10 567 } else {
yading@10 568 const struct algo *algos = test_idct ? idct_tab : fdct_tab;
yading@10 569 for (i = 0; algos[i].name; i++)
yading@10 570 if (!(~cpu_flags & algos[i].mm_support)) {
yading@10 571 err |= dct_error(&algos[i], test, test_idct, speed, bits);
yading@10 572 }
yading@10 573 }
yading@10 574
yading@10 575 return err;
yading@10 576 }