102 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM 104 int16_t *block, int16_t *qmat);
106 static void ff_prores_idct_put_10_sse2_wrap(int16_t *
dst){
130 #if ARCH_X86_64 && HAVE_YASM 160 #define AANSCALE_BITS 12 163 #define NB_ITS_SPEED 50000 168 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
169 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
170 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
171 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
172 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
173 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
174 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
175 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
185 for (i = 0; i < 64; i++) {
197 memset(block, 0, 64 *
sizeof(*block));
201 for (i = 0; i < 64; i++)
202 block[i] = (
av_lfg_get(prng) % (2*vals)) -vals;
205 for (i = 0; i < 64; i++)
211 for (i = 0; i < j; i++) {
213 block[idx] =
av_lfg_get(prng) % (2*vals) -vals;
217 block[ 0] =
av_lfg_get(prng) % (16*vals) - (8*vals);
218 block[63] = (block[0] & 1) ^ 1;
228 for (i = 0; i < 64; i++)
231 for (i = 0; i < 64; i++)
234 for (i = 0; i < 64; i++)
237 for (i = 0; i < 64; i++)
238 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[
i];
240 for (i = 0; i < 64; i++)
241 dst[(i>>3) | ((i<<3)&0x38)] = src[
i];
243 for (i = 0; i < 64; i++)
253 int64_t err2, ti, ti1, it1, err_sum = 0;
254 int64_t sysErr[64], sysErrMax = 0;
256 int blockSumErrMax = 0, blockSumErr;
258 const int vals=1<<
bits;
266 for (i = 0; i < 64; i++)
268 for (it = 0; it <
NB_ITS; it++) {
269 init_block(block1, test, is_idct, &prng, vals);
276 for (i = 0; i < 64; i++) {
285 for (i = 0; i < 64; i++) {
286 int err = block[
i] - block1[
i];
292 sysErr[
i] += block[
i] - block1[
i];
294 if (abs(block[i]) > maxout)
295 maxout = abs(block[i]);
297 if (blockSumErrMax < blockSumErr)
298 blockSumErrMax = blockSumErr;
300 for (i = 0; i < 64; i++)
301 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
303 for (i = 0; i < 64; i++) {
306 printf(
"%7d ", (
int) sysErr[i]);
310 omse = (double) err2 / NB_ITS / 64;
311 ome = (double) err_sum / NB_ITS / 64;
313 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
315 printf(
"%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
316 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
317 omse, ome, (
double) sysErrMax / NB_ITS,
318 maxout, blockSumErrMax);
328 init_block(block, test, is_idct, &prng, vals);
335 memcpy(block, block1,
sizeof(block));
341 }
while (ti1 < 1000000);
343 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
344 (
double) it1 * 1000.0 / (
double) ti1);
355 static double c8[8][8];
356 static double c4[4][4];
357 double block1[64], block2[64], block3[64];
364 for (i = 0; i < 8; i++) {
366 for (j = 0; j < 8; j++) {
367 s = (i == 0) ?
sqrt(1.0 / 8.0) :
sqrt(1.0 / 4.0);
368 c8[
i][j] = s * cos(
M_PI * i * (j + 0.5) / 8.0);
369 sum += c8[
i][j] * c8[
i][j];
373 for (i = 0; i < 4; i++) {
375 for (j = 0; j < 4; j++) {
376 s = (i == 0) ?
sqrt(1.0 / 4.0) :
sqrt(1.0 / 2.0);
377 c4[
i][j] = s * cos(
M_PI * i * (j + 0.5) / 4.0);
378 sum += c4[
i][j] * c4[
i][j];
385 for (i = 0; i < 4; i++) {
386 for (j = 0; j < 8; j++) {
387 block1[8 * (2 *
i) + j] =
388 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) *
s;
389 block1[8 * (2 * i + 1) + j] =
390 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) *
s;
395 for (i = 0; i < 8; i++) {
396 for (j = 0; j < 8; j++) {
398 for (k = 0; k < 8; k++)
399 sum += c8[k][j] * block1[8 * i + k];
400 block2[8 * i + j] = sum;
405 for (i = 0; i < 8; i++) {
406 for (j = 0; j < 4; j++) {
409 for (k = 0; k < 4; k++)
410 sum += c4[k][j] * block2[8 * (2 * k) +
i];
411 block3[8 * (2 * j) + i] = sum;
415 for (k = 0; k < 4; k++)
416 sum += c4[k][j] * block2[8 * (2 * k + 1) +
i];
417 block3[8 * (2 * j + 1) + i] = sum;
422 for (i = 0; i < 8; i++) {
423 for (j = 0; j < 8; j++) {
424 v = block3[8 * i + j];
426 else if (v > 255) v = 255;
427 dest[i * linesize + j] = (int)
rint(v);
437 int it,
i, it1, ti, ti1, err_max,
v;
445 for (it = 0; it <
NB_ITS; it++) {
447 for (i = 0; i < 64; i++)
451 for (i = 0; i < 64; i++)
452 block[i] = block1[i];
455 for (i = 0; i < 64; i++)
456 block[i] = block1[i];
457 idct248_put(img_dest, 8, block);
459 for (i = 0; i < 64; i++) {
460 v = abs((
int) img_dest[i] - (
int) img_dest1[i]);
462 printf(
"%d %d\n", img_dest[i], img_dest1[i]);
471 printf(
" %3d", img_dest1[i*8+j]);
480 printf(
" %3d", img_dest[i*8+j]);
486 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248", name, err_max);
495 for (i = 0; i < 64; i++)
496 block[i] = block1[i];
497 idct248_put(img_dest, 8, block);
502 }
while (ti1 < 1000000);
504 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248", name,
505 (
double) it1 * 1000.0 / (
double) ti1);
510 printf(
"dct-test [-i] [<test-number>] [<bits>]\n" 511 "test-number 0 -> test with random matrixes\n" 512 " 1 -> test with random sparse matrixes\n" 513 " 2 -> do 3. test from mpeg4 std\n" 514 "bits Number of time domain bits to use, 8 is default\n" 515 "-i test IDCT implementations\n" 516 "-4 test IDCT248 implementations\n" 524 int main(
int argc,
char **argv)
526 int test_idct = 0, test_248_dct = 0;
539 c =
getopt(argc, argv,
"ih4t");
560 test = atoi(argv[
optind]);
561 if(optind+1 < argc) bits= atoi(argv[optind+1]);
563 printf(
"ffmpeg DCT/IDCT test\n");
568 const struct algo *algos = test_idct ? idct_tab :
fdct_tab;
569 for (i = 0; algos[
i].
name; i++)
571 err |=
dct_error(&algos[i], test, test_idct, speed, bits);
#define AV_CPU_FLAG_ALTIVEC
standard
FIXME Range Coding of cr are ref
static double rint(double x)
void ff_fdct_ifast(int16_t *data)
static const struct algo idct_tab[]
void ff_simple_idct_neon(int16_t *data)
static av_cold int init(AVCodecContext *avctx)
void ff_idct_xvid_sse2(short *block)
int main(int argc, char **argv)
av_cold void ff_ref_dct_init(void)
Initialize the double precision discrete cosine transform functions fdct & idct.
void ff_faanidct(int16_t block[64])
void ff_bfin_idct(int16_t *block)
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
void ff_simple_idct248_put(uint8_t *dest, int line_size, int16_t *block)
const uint16_t ff_aanscales[64]
void ff_fdct_mmxext(int16_t *block)
DECLARE_ALIGNED(16, static int16_t, block)[64]
void ff_simple_idct_armv6(int16_t *data)
void ff_simple_idct_mmx(int16_t *block)
void ff_fdct_mmx(int16_t *block)
static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
static const struct algo fdct_tab[]
void(* func)(int16_t *block)
#define AV_CPU_FLAG_ARMV5TE
void ff_fdct_sse2(int16_t *block)
static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
static short idct_simple_mmx_perm[64]
void ff_simple_idct_axp(int16_t *data)
static void permute(int16_t dst[64], const int16_t src[64], int perm)
static short idct_mmx_perm[64]
int64_t av_gettime(void)
Get the current time in microseconds.
static void idct_mmx_init(void)
#define AV_CPU_FLAG_ARMV6
void ff_jpeg_fdct_islow_8(int16_t *data)
void ff_j_rev_dct(int16_t *data)
void ff_faandct(int16_t *data)
typedef void(RENAME(mix_any_func_type))
#define AV_CPU_FLAG_MMX
standard MMX
static const uint8_t idct_sse2_row_perm[8]
static int getopt(int argc, char *argv[], char *opts)
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
void ff_bfin_fdct(int16_t *block)
synthesis window for stochastic i
void ff_ref_fdct(short *block)
Transform 8x8 block of data with a double precision forward DCT This is a reference implementation...
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
AAN (Arai Agui Nakajima) (I)DCT tables.
header for Xvid IDCT functions
common internal and external API header
void ff_fdct_altivec(int16_t *block)
void ff_simple_idct_armv5te(int16_t *data)
void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize, int16_t *block, const int16_t *qmat)
static void idct248_error(const char *name, void(*idct248_put)(uint8_t *dest, int line_size, int16_t *block), int speed)
void ff_ref_idct(short *block)
Transform 8x8 block of data with a double precision inverse DCT This is a reference implementation...
printf("static const uint8_t my_array[100] = {\n")
else dst[i][x+y *dst_stride[i]]
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
enum algo::formattag format
static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
void ff_idct_xvid_mmx(short *block)
void ff_idct_xvid_mmxext(short *block)
void ff_j_rev_dct_arm(int16_t *data)
void ff_simple_idct_arm(int16_t *data)
void ff_simple_idct_8(int16_t *block)