dsputil.c
Go to the documentation of this file.
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * DSP utils
28  */
29 
30 #include "libavutil/imgutils.h"
31 #include "libavutil/internal.h"
32 #include "avcodec.h"
33 #include "copy_block.h"
34 #include "dct.h"
35 #include "dsputil.h"
36 #include "simple_idct.h"
37 #include "faandct.h"
38 #include "faanidct.h"
39 #include "imgconvert.h"
40 #include "mathops.h"
41 #include "mpegvideo.h"
42 #include "config.h"
43 #include "diracdsp.h"
44 
45 uint32_t ff_squareTbl[512] = {0, };
46 
47 #define BIT_DEPTH 16
48 #include "dsputil_template.c"
49 #undef BIT_DEPTH
50 
51 #define BIT_DEPTH 8
52 #include "dsputil_template.c"
53 
54 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
55 #define pb_7f (~0UL/255 * 0x7f)
56 #define pb_80 (~0UL/255 * 0x80)
57 
58 /* Specific zigzag scan for 248 idct. NOTE that unlike the
59  specification, we interleave the fields */
61  0, 8, 1, 9, 16, 24, 2, 10,
62  17, 25, 32, 40, 48, 56, 33, 41,
63  18, 26, 3, 11, 4, 12, 19, 27,
64  34, 42, 49, 57, 50, 58, 35, 43,
65  20, 28, 5, 13, 6, 14, 21, 29,
66  36, 44, 51, 59, 52, 60, 37, 45,
67  22, 30, 7, 15, 23, 31, 38, 46,
68  53, 61, 54, 62, 39, 47, 55, 63,
69 };
70 
71 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
72 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
73 
75  0, 1, 2, 3, 8, 9, 16, 17,
76  10, 11, 4, 5, 6, 7, 15, 14,
77  13, 12, 19, 18, 24, 25, 32, 33,
78  26, 27, 20, 21, 22, 23, 28, 29,
79  30, 31, 34, 35, 40, 41, 48, 49,
80  42, 43, 36, 37, 38, 39, 44, 45,
81  46, 47, 50, 51, 56, 57, 58, 59,
82  52, 53, 54, 55, 60, 61, 62, 63,
83 };
84 
86  0, 8, 16, 24, 1, 9, 2, 10,
87  17, 25, 32, 40, 48, 56, 57, 49,
88  41, 33, 26, 18, 3, 11, 4, 12,
89  19, 27, 34, 42, 50, 58, 35, 43,
90  51, 59, 20, 28, 5, 13, 6, 14,
91  21, 29, 36, 44, 52, 60, 37, 45,
92  53, 61, 22, 30, 7, 15, 23, 31,
93  38, 46, 54, 62, 39, 47, 55, 63,
94 };
95 
96 /* Input permutation for the simple_idct_mmx */
97 static const uint8_t simple_mmx_permutation[64]={
98  0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
99  0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
100  0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
101  0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
102  0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
103  0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
104  0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
105  0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
106 };
107 
108 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
109 
110 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
111  int i;
112  int end;
113 
114  st->scantable= src_scantable;
115 
116  for(i=0; i<64; i++){
117  int j;
118  j = src_scantable[i];
119  st->permutated[i] = permutation[j];
120  }
121 
122  end=-1;
123  for(i=0; i<64; i++){
124  int j;
125  j = st->permutated[i];
126  if(j>end) end=j;
127  st->raster_end[i]= end;
128  }
129 }
130 
131 void ff_init_scantable_permutation(uint8_t *idct_permutation,
132  int idct_permutation_type)
133 {
134  int i;
135 
136  switch(idct_permutation_type){
137  case FF_NO_IDCT_PERM:
138  for(i=0; i<64; i++)
139  idct_permutation[i]= i;
140  break;
142  for(i=0; i<64; i++)
143  idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
144  break;
145  case FF_SIMPLE_IDCT_PERM:
146  for(i=0; i<64; i++)
147  idct_permutation[i]= simple_mmx_permutation[i];
148  break;
150  for(i=0; i<64; i++)
151  idct_permutation[i]= ((i&7)<<3) | (i>>3);
152  break;
154  for(i=0; i<64; i++)
155  idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
156  break;
157  case FF_SSE2_IDCT_PERM:
158  for(i=0; i<64; i++)
159  idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
160  break;
161  default:
162  av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
163  }
164 }
165 
166 static int pix_sum_c(uint8_t * pix, int line_size)
167 {
168  int s, i, j;
169 
170  s = 0;
171  for (i = 0; i < 16; i++) {
172  for (j = 0; j < 16; j += 8) {
173  s += pix[0];
174  s += pix[1];
175  s += pix[2];
176  s += pix[3];
177  s += pix[4];
178  s += pix[5];
179  s += pix[6];
180  s += pix[7];
181  pix += 8;
182  }
183  pix += line_size - 16;
184  }
185  return s;
186 }
187 
188 static int pix_norm1_c(uint8_t * pix, int line_size)
189 {
190  int s, i, j;
191  uint32_t *sq = ff_squareTbl + 256;
192 
193  s = 0;
194  for (i = 0; i < 16; i++) {
195  for (j = 0; j < 16; j += 8) {
196 #if 0
197  s += sq[pix[0]];
198  s += sq[pix[1]];
199  s += sq[pix[2]];
200  s += sq[pix[3]];
201  s += sq[pix[4]];
202  s += sq[pix[5]];
203  s += sq[pix[6]];
204  s += sq[pix[7]];
205 #else
206 #if HAVE_FAST_64BIT
207  register uint64_t x=*(uint64_t*)pix;
208  s += sq[x&0xff];
209  s += sq[(x>>8)&0xff];
210  s += sq[(x>>16)&0xff];
211  s += sq[(x>>24)&0xff];
212  s += sq[(x>>32)&0xff];
213  s += sq[(x>>40)&0xff];
214  s += sq[(x>>48)&0xff];
215  s += sq[(x>>56)&0xff];
216 #else
217  register uint32_t x=*(uint32_t*)pix;
218  s += sq[x&0xff];
219  s += sq[(x>>8)&0xff];
220  s += sq[(x>>16)&0xff];
221  s += sq[(x>>24)&0xff];
222  x=*(uint32_t*)(pix+4);
223  s += sq[x&0xff];
224  s += sq[(x>>8)&0xff];
225  s += sq[(x>>16)&0xff];
226  s += sq[(x>>24)&0xff];
227 #endif
228 #endif
229  pix += 8;
230  }
231  pix += line_size - 16;
232  }
233  return s;
234 }
235 
236 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
237  int i;
238 
239  for(i=0; i+8<=w; i+=8){
240  dst[i+0]= av_bswap32(src[i+0]);
241  dst[i+1]= av_bswap32(src[i+1]);
242  dst[i+2]= av_bswap32(src[i+2]);
243  dst[i+3]= av_bswap32(src[i+3]);
244  dst[i+4]= av_bswap32(src[i+4]);
245  dst[i+5]= av_bswap32(src[i+5]);
246  dst[i+6]= av_bswap32(src[i+6]);
247  dst[i+7]= av_bswap32(src[i+7]);
248  }
249  for(;i<w; i++){
250  dst[i+0]= av_bswap32(src[i+0]);
251  }
252 }
253 
254 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
255 {
256  while (len--)
257  *dst++ = av_bswap16(*src++);
258 }
259 
260 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
261 {
262  int s, i;
263  uint32_t *sq = ff_squareTbl + 256;
264 
265  s = 0;
266  for (i = 0; i < h; i++) {
267  s += sq[pix1[0] - pix2[0]];
268  s += sq[pix1[1] - pix2[1]];
269  s += sq[pix1[2] - pix2[2]];
270  s += sq[pix1[3] - pix2[3]];
271  pix1 += line_size;
272  pix2 += line_size;
273  }
274  return s;
275 }
276 
277 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
278 {
279  int s, i;
280  uint32_t *sq = ff_squareTbl + 256;
281 
282  s = 0;
283  for (i = 0; i < h; i++) {
284  s += sq[pix1[0] - pix2[0]];
285  s += sq[pix1[1] - pix2[1]];
286  s += sq[pix1[2] - pix2[2]];
287  s += sq[pix1[3] - pix2[3]];
288  s += sq[pix1[4] - pix2[4]];
289  s += sq[pix1[5] - pix2[5]];
290  s += sq[pix1[6] - pix2[6]];
291  s += sq[pix1[7] - pix2[7]];
292  pix1 += line_size;
293  pix2 += line_size;
294  }
295  return s;
296 }
297 
298 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
299 {
300  int s, i;
301  uint32_t *sq = ff_squareTbl + 256;
302 
303  s = 0;
304  for (i = 0; i < h; i++) {
305  s += sq[pix1[ 0] - pix2[ 0]];
306  s += sq[pix1[ 1] - pix2[ 1]];
307  s += sq[pix1[ 2] - pix2[ 2]];
308  s += sq[pix1[ 3] - pix2[ 3]];
309  s += sq[pix1[ 4] - pix2[ 4]];
310  s += sq[pix1[ 5] - pix2[ 5]];
311  s += sq[pix1[ 6] - pix2[ 6]];
312  s += sq[pix1[ 7] - pix2[ 7]];
313  s += sq[pix1[ 8] - pix2[ 8]];
314  s += sq[pix1[ 9] - pix2[ 9]];
315  s += sq[pix1[10] - pix2[10]];
316  s += sq[pix1[11] - pix2[11]];
317  s += sq[pix1[12] - pix2[12]];
318  s += sq[pix1[13] - pix2[13]];
319  s += sq[pix1[14] - pix2[14]];
320  s += sq[pix1[15] - pix2[15]];
321 
322  pix1 += line_size;
323  pix2 += line_size;
324  }
325  return s;
326 }
327 
328 static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
329  const uint8_t *s2, int stride){
330  int i;
331 
332  /* read the pixels */
333  for(i=0;i<8;i++) {
334  block[0] = s1[0] - s2[0];
335  block[1] = s1[1] - s2[1];
336  block[2] = s1[2] - s2[2];
337  block[3] = s1[3] - s2[3];
338  block[4] = s1[4] - s2[4];
339  block[5] = s1[5] - s2[5];
340  block[6] = s1[6] - s2[6];
341  block[7] = s1[7] - s2[7];
342  s1 += stride;
343  s2 += stride;
344  block += 8;
345  }
346 }
347 
348 static void put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
349  int line_size)
350 {
351  int i;
352 
353  /* read the pixels */
354  for(i=0;i<8;i++) {
355  pixels[0] = av_clip_uint8(block[0]);
356  pixels[1] = av_clip_uint8(block[1]);
357  pixels[2] = av_clip_uint8(block[2]);
358  pixels[3] = av_clip_uint8(block[3]);
359  pixels[4] = av_clip_uint8(block[4]);
360  pixels[5] = av_clip_uint8(block[5]);
361  pixels[6] = av_clip_uint8(block[6]);
362  pixels[7] = av_clip_uint8(block[7]);
363 
364  pixels += line_size;
365  block += 8;
366  }
367 }
368 
369 static void put_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
370  int line_size)
371 {
372  int i;
373 
374  /* read the pixels */
375  for(i=0;i<4;i++) {
376  pixels[0] = av_clip_uint8(block[0]);
377  pixels[1] = av_clip_uint8(block[1]);
378  pixels[2] = av_clip_uint8(block[2]);
379  pixels[3] = av_clip_uint8(block[3]);
380 
381  pixels += line_size;
382  block += 8;
383  }
384 }
385 
386 static void put_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
387  int line_size)
388 {
389  int i;
390 
391  /* read the pixels */
392  for(i=0;i<2;i++) {
393  pixels[0] = av_clip_uint8(block[0]);
394  pixels[1] = av_clip_uint8(block[1]);
395 
396  pixels += line_size;
397  block += 8;
398  }
399 }
400 
401 static void put_signed_pixels_clamped_c(const int16_t *block,
402  uint8_t *av_restrict pixels,
403  int line_size)
404 {
405  int i, j;
406 
407  for (i = 0; i < 8; i++) {
408  for (j = 0; j < 8; j++) {
409  if (*block < -128)
410  *pixels = 0;
411  else if (*block > 127)
412  *pixels = 255;
413  else
414  *pixels = (uint8_t)(*block + 128);
415  block++;
416  pixels++;
417  }
418  pixels += (line_size - 8);
419  }
420 }
421 
422 static void add_pixels8_c(uint8_t *av_restrict pixels,
423  int16_t *block,
424  int line_size)
425 {
426  int i;
427 
428  for(i=0;i<8;i++) {
429  pixels[0] += block[0];
430  pixels[1] += block[1];
431  pixels[2] += block[2];
432  pixels[3] += block[3];
433  pixels[4] += block[4];
434  pixels[5] += block[5];
435  pixels[6] += block[6];
436  pixels[7] += block[7];
437  pixels += line_size;
438  block += 8;
439  }
440 }
441 
442 static void add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
443  int line_size)
444 {
445  int i;
446 
447  /* read the pixels */
448  for(i=0;i<8;i++) {
449  pixels[0] = av_clip_uint8(pixels[0] + block[0]);
450  pixels[1] = av_clip_uint8(pixels[1] + block[1]);
451  pixels[2] = av_clip_uint8(pixels[2] + block[2]);
452  pixels[3] = av_clip_uint8(pixels[3] + block[3]);
453  pixels[4] = av_clip_uint8(pixels[4] + block[4]);
454  pixels[5] = av_clip_uint8(pixels[5] + block[5]);
455  pixels[6] = av_clip_uint8(pixels[6] + block[6]);
456  pixels[7] = av_clip_uint8(pixels[7] + block[7]);
457  pixels += line_size;
458  block += 8;
459  }
460 }
461 
462 static void add_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
463  int line_size)
464 {
465  int i;
466 
467  /* read the pixels */
468  for(i=0;i<4;i++) {
469  pixels[0] = av_clip_uint8(pixels[0] + block[0]);
470  pixels[1] = av_clip_uint8(pixels[1] + block[1]);
471  pixels[2] = av_clip_uint8(pixels[2] + block[2]);
472  pixels[3] = av_clip_uint8(pixels[3] + block[3]);
473  pixels += line_size;
474  block += 8;
475  }
476 }
477 
478 static void add_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
479  int line_size)
480 {
481  int i;
482 
483  /* read the pixels */
484  for(i=0;i<2;i++) {
485  pixels[0] = av_clip_uint8(pixels[0] + block[0]);
486  pixels[1] = av_clip_uint8(pixels[1] + block[1]);
487  pixels += line_size;
488  block += 8;
489  }
490 }
491 
492 static int sum_abs_dctelem_c(int16_t *block)
493 {
494  int sum=0, i;
495  for(i=0; i<64; i++)
496  sum+= FFABS(block[i]);
497  return sum;
498 }
499 
500 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
501 {
502  int i;
503 
504  for (i = 0; i < h; i++) {
505  memset(block, value, 16);
506  block += line_size;
507  }
508 }
509 
510 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
511 {
512  int i;
513 
514  for (i = 0; i < h; i++) {
515  memset(block, value, 8);
516  block += line_size;
517  }
518 }
519 
520 #define avg2(a,b) ((a+b+1)>>1)
521 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
522 
523 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
524 {
525  const int A=(16-x16)*(16-y16);
526  const int B=( x16)*(16-y16);
527  const int C=(16-x16)*( y16);
528  const int D=( x16)*( y16);
529  int i;
530 
531  for(i=0; i<h; i++)
532  {
533  dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
534  dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
535  dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
536  dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
537  dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
538  dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
539  dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
540  dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
541  dst+= stride;
542  src+= stride;
543  }
544 }
545 
546 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
547  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
548 {
549  int y, vx, vy;
550  const int s= 1<<shift;
551 
552  width--;
553  height--;
554 
555  for(y=0; y<h; y++){
556  int x;
557 
558  vx= ox;
559  vy= oy;
560  for(x=0; x<8; x++){ //XXX FIXME optimize
561  int src_x, src_y, frac_x, frac_y, index;
562 
563  src_x= vx>>16;
564  src_y= vy>>16;
565  frac_x= src_x&(s-1);
566  frac_y= src_y&(s-1);
567  src_x>>=shift;
568  src_y>>=shift;
569 
570  if((unsigned)src_x < width){
571  if((unsigned)src_y < height){
572  index= src_x + src_y*stride;
573  dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
574  + src[index +1]* frac_x )*(s-frac_y)
575  + ( src[index+stride ]*(s-frac_x)
576  + src[index+stride+1]* frac_x )* frac_y
577  + r)>>(shift*2);
578  }else{
579  index= src_x + av_clip(src_y, 0, height)*stride;
580  dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
581  + src[index +1]* frac_x )*s
582  + r)>>(shift*2);
583  }
584  }else{
585  if((unsigned)src_y < height){
586  index= av_clip(src_x, 0, width) + src_y*stride;
587  dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
588  + src[index+stride ]* frac_y )*s
589  + r)>>(shift*2);
590  }else{
591  index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
592  dst[y*stride + x]= src[index ];
593  }
594  }
595 
596  vx+= dxx;
597  vy+= dyx;
598  }
599  ox += dxy;
600  oy += dyy;
601  }
602 }
603 
604 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
605  switch(width){
606  case 2: put_pixels2_8_c (dst, src, stride, height); break;
607  case 4: put_pixels4_8_c (dst, src, stride, height); break;
608  case 8: put_pixels8_8_c (dst, src, stride, height); break;
609  case 16:put_pixels16_8_c(dst, src, stride, height); break;
610  }
611 }
612 
613 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
614  int i,j;
615  for (i=0; i < height; i++) {
616  for (j=0; j < width; j++) {
617  dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
618  }
619  src += stride;
620  dst += stride;
621  }
622 }
623 
624 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
625  int i,j;
626  for (i=0; i < height; i++) {
627  for (j=0; j < width; j++) {
628  dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
629  }
630  src += stride;
631  dst += stride;
632  }
633 }
634 
635 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
636  int i,j;
637  for (i=0; i < height; i++) {
638  for (j=0; j < width; j++) {
639  dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
640  }
641  src += stride;
642  dst += stride;
643  }
644 }
645 
646 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
647  int i,j;
648  for (i=0; i < height; i++) {
649  for (j=0; j < width; j++) {
650  dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
651  }
652  src += stride;
653  dst += stride;
654  }
655 }
656 
657 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
658  int i,j;
659  for (i=0; i < height; i++) {
660  for (j=0; j < width; j++) {
661  dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
662  }
663  src += stride;
664  dst += stride;
665  }
666 }
667 
668 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
669  int i,j;
670  for (i=0; i < height; i++) {
671  for (j=0; j < width; j++) {
672  dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
673  }
674  src += stride;
675  dst += stride;
676  }
677 }
678 
679 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
680  int i,j;
681  for (i=0; i < height; i++) {
682  for (j=0; j < width; j++) {
683  dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
684  }
685  src += stride;
686  dst += stride;
687  }
688 }
689 
690 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
691  int i,j;
692  for (i=0; i < height; i++) {
693  for (j=0; j < width; j++) {
694  dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
695  }
696  src += stride;
697  dst += stride;
698  }
699 }
700 
701 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
702  switch(width){
703  case 2: avg_pixels2_8_c (dst, src, stride, height); break;
704  case 4: avg_pixels4_8_c (dst, src, stride, height); break;
705  case 8: avg_pixels8_8_c (dst, src, stride, height); break;
706  case 16:avg_pixels16_8_c(dst, src, stride, height); break;
707  }
708 }
709 
710 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
711  int i,j;
712  for (i=0; i < height; i++) {
713  for (j=0; j < width; j++) {
714  dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
715  }
716  src += stride;
717  dst += stride;
718  }
719 }
720 
721 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
722  int i,j;
723  for (i=0; i < height; i++) {
724  for (j=0; j < width; j++) {
725  dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
726  }
727  src += stride;
728  dst += stride;
729  }
730 }
731 
732 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
733  int i,j;
734  for (i=0; i < height; i++) {
735  for (j=0; j < width; j++) {
736  dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
737  }
738  src += stride;
739  dst += stride;
740  }
741 }
742 
743 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
744  int i,j;
745  for (i=0; i < height; i++) {
746  for (j=0; j < width; j++) {
747  dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
748  }
749  src += stride;
750  dst += stride;
751  }
752 }
753 
754 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
755  int i,j;
756  for (i=0; i < height; i++) {
757  for (j=0; j < width; j++) {
758  dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
759  }
760  src += stride;
761  dst += stride;
762  }
763 }
764 
765 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
766  int i,j;
767  for (i=0; i < height; i++) {
768  for (j=0; j < width; j++) {
769  dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
770  }
771  src += stride;
772  dst += stride;
773  }
774 }
775 
776 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
777  int i,j;
778  for (i=0; i < height; i++) {
779  for (j=0; j < width; j++) {
780  dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
781  }
782  src += stride;
783  dst += stride;
784  }
785 }
786 
787 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
788  int i,j;
789  for (i=0; i < height; i++) {
790  for (j=0; j < width; j++) {
791  dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
792  }
793  src += stride;
794  dst += stride;
795  }
796 }
797 
798 #define QPEL_MC(r, OPNAME, RND, OP) \
799 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
800  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
801  int i;\
802  for(i=0; i<h; i++)\
803  {\
804  OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
805  OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
806  OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
807  OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
808  OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
809  OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
810  OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
811  OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
812  dst+=dstStride;\
813  src+=srcStride;\
814  }\
815 }\
816 \
817 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
818  const int w=8;\
819  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
820  int i;\
821  for(i=0; i<w; i++)\
822  {\
823  const int src0= src[0*srcStride];\
824  const int src1= src[1*srcStride];\
825  const int src2= src[2*srcStride];\
826  const int src3= src[3*srcStride];\
827  const int src4= src[4*srcStride];\
828  const int src5= src[5*srcStride];\
829  const int src6= src[6*srcStride];\
830  const int src7= src[7*srcStride];\
831  const int src8= src[8*srcStride];\
832  OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
833  OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
834  OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
835  OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
836  OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
837  OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
838  OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
839  OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
840  dst++;\
841  src++;\
842  }\
843 }\
844 \
845 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
846  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
847  int i;\
848  \
849  for(i=0; i<h; i++)\
850  {\
851  OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
852  OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
853  OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
854  OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
855  OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
856  OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
857  OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
858  OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
859  OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
860  OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
861  OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
862  OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
863  OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
864  OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
865  OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
866  OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
867  dst+=dstStride;\
868  src+=srcStride;\
869  }\
870 }\
871 \
872 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
873  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
874  int i;\
875  const int w=16;\
876  for(i=0; i<w; i++)\
877  {\
878  const int src0= src[0*srcStride];\
879  const int src1= src[1*srcStride];\
880  const int src2= src[2*srcStride];\
881  const int src3= src[3*srcStride];\
882  const int src4= src[4*srcStride];\
883  const int src5= src[5*srcStride];\
884  const int src6= src[6*srcStride];\
885  const int src7= src[7*srcStride];\
886  const int src8= src[8*srcStride];\
887  const int src9= src[9*srcStride];\
888  const int src10= src[10*srcStride];\
889  const int src11= src[11*srcStride];\
890  const int src12= src[12*srcStride];\
891  const int src13= src[13*srcStride];\
892  const int src14= src[14*srcStride];\
893  const int src15= src[15*srcStride];\
894  const int src16= src[16*srcStride];\
895  OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
896  OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
897  OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
898  OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
899  OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
900  OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
901  OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
902  OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
903  OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
904  OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
905  OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
906  OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
907  OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
908  OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
909  OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
910  OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
911  dst++;\
912  src++;\
913  }\
914 }\
915 \
916 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
917 {\
918  uint8_t half[64];\
919  put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
920  OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
921 }\
922 \
923 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
924 {\
925  OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
926 }\
927 \
928 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
929 {\
930  uint8_t half[64];\
931  put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
932  OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
933 }\
934 \
935 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
936 {\
937  uint8_t full[16*9];\
938  uint8_t half[64];\
939  copy_block9(full, src, 16, stride, 9);\
940  put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
941  OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
942 }\
943 \
944 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
945 {\
946  uint8_t full[16*9];\
947  copy_block9(full, src, 16, stride, 9);\
948  OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
949 }\
950 \
951 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
952 {\
953  uint8_t full[16*9];\
954  uint8_t half[64];\
955  copy_block9(full, src, 16, stride, 9);\
956  put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
957  OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
958 }\
959 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
960 {\
961  uint8_t full[16*9];\
962  uint8_t halfH[72];\
963  uint8_t halfV[64];\
964  uint8_t halfHV[64];\
965  copy_block9(full, src, 16, stride, 9);\
966  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
967  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
968  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
969  OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
970 }\
971 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
972 {\
973  uint8_t full[16*9];\
974  uint8_t halfH[72];\
975  uint8_t halfHV[64];\
976  copy_block9(full, src, 16, stride, 9);\
977  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
978  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
979  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
980  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
981 }\
982 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
983 {\
984  uint8_t full[16*9];\
985  uint8_t halfH[72];\
986  uint8_t halfV[64];\
987  uint8_t halfHV[64];\
988  copy_block9(full, src, 16, stride, 9);\
989  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
990  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
991  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
992  OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
993 }\
994 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
995 {\
996  uint8_t full[16*9];\
997  uint8_t halfH[72];\
998  uint8_t halfHV[64];\
999  copy_block9(full, src, 16, stride, 9);\
1000  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1001  put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1002  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1003  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1004 }\
1005 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1006 {\
1007  uint8_t full[16*9];\
1008  uint8_t halfH[72];\
1009  uint8_t halfV[64];\
1010  uint8_t halfHV[64];\
1011  copy_block9(full, src, 16, stride, 9);\
1012  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1013  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1014  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1015  OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1016 }\
1017 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1018 {\
1019  uint8_t full[16*9];\
1020  uint8_t halfH[72];\
1021  uint8_t halfHV[64];\
1022  copy_block9(full, src, 16, stride, 9);\
1023  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1024  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1025  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1026  OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1027 }\
1028 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1029 {\
1030  uint8_t full[16*9];\
1031  uint8_t halfH[72];\
1032  uint8_t halfV[64];\
1033  uint8_t halfHV[64];\
1034  copy_block9(full, src, 16, stride, 9);\
1035  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1036  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1037  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1038  OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1039 }\
1040 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1041 {\
1042  uint8_t full[16*9];\
1043  uint8_t halfH[72];\
1044  uint8_t halfHV[64];\
1045  copy_block9(full, src, 16, stride, 9);\
1046  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1047  put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1048  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1049  OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1050 }\
1051 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1052 {\
1053  uint8_t halfH[72];\
1054  uint8_t halfHV[64];\
1055  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1056  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1057  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1058 }\
1059 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1060 {\
1061  uint8_t halfH[72];\
1062  uint8_t halfHV[64];\
1063  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1064  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1065  OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1066 }\
1067 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1068 {\
1069  uint8_t full[16*9];\
1070  uint8_t halfH[72];\
1071  uint8_t halfV[64];\
1072  uint8_t halfHV[64];\
1073  copy_block9(full, src, 16, stride, 9);\
1074  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1075  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1076  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1077  OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1078 }\
1079 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1080 {\
1081  uint8_t full[16*9];\
1082  uint8_t halfH[72];\
1083  copy_block9(full, src, 16, stride, 9);\
1084  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1085  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1086  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1087 }\
1088 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1089 {\
1090  uint8_t full[16*9];\
1091  uint8_t halfH[72];\
1092  uint8_t halfV[64];\
1093  uint8_t halfHV[64];\
1094  copy_block9(full, src, 16, stride, 9);\
1095  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1096  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1097  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1098  OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1099 }\
1100 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1101 {\
1102  uint8_t full[16*9];\
1103  uint8_t halfH[72];\
1104  copy_block9(full, src, 16, stride, 9);\
1105  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1106  put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1107  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1108 }\
1109 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1110 {\
1111  uint8_t halfH[72];\
1112  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1113  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1114 }\
1115 \
1116 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1117 {\
1118  uint8_t half[256];\
1119  put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1120  OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1121 }\
1122 \
1123 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1124 {\
1125  OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1126 }\
1127 \
1128 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1129 {\
1130  uint8_t half[256];\
1131  put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1132  OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1133 }\
1134 \
1135 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1136 {\
1137  uint8_t full[24*17];\
1138  uint8_t half[256];\
1139  copy_block17(full, src, 24, stride, 17);\
1140  put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1141  OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1142 }\
1143 \
1144 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1145 {\
1146  uint8_t full[24*17];\
1147  copy_block17(full, src, 24, stride, 17);\
1148  OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1149 }\
1150 \
1151 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1152 {\
1153  uint8_t full[24*17];\
1154  uint8_t half[256];\
1155  copy_block17(full, src, 24, stride, 17);\
1156  put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1157  OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1158 }\
1159 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1160 {\
1161  uint8_t full[24*17];\
1162  uint8_t halfH[272];\
1163  uint8_t halfV[256];\
1164  uint8_t halfHV[256];\
1165  copy_block17(full, src, 24, stride, 17);\
1166  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1167  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1168  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1169  OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1170 }\
1171 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1172 {\
1173  uint8_t full[24*17];\
1174  uint8_t halfH[272];\
1175  uint8_t halfHV[256];\
1176  copy_block17(full, src, 24, stride, 17);\
1177  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1178  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1179  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1180  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1181 }\
1182 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1183 {\
1184  uint8_t full[24*17];\
1185  uint8_t halfH[272];\
1186  uint8_t halfV[256];\
1187  uint8_t halfHV[256];\
1188  copy_block17(full, src, 24, stride, 17);\
1189  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1190  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1191  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1192  OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1193 }\
1194 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1195 {\
1196  uint8_t full[24*17];\
1197  uint8_t halfH[272];\
1198  uint8_t halfHV[256];\
1199  copy_block17(full, src, 24, stride, 17);\
1200  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1201  put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1202  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1203  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1204 }\
1205 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1206 {\
1207  uint8_t full[24*17];\
1208  uint8_t halfH[272];\
1209  uint8_t halfV[256];\
1210  uint8_t halfHV[256];\
1211  copy_block17(full, src, 24, stride, 17);\
1212  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1213  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1214  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1215  OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1216 }\
1217 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1218 {\
1219  uint8_t full[24*17];\
1220  uint8_t halfH[272];\
1221  uint8_t halfHV[256];\
1222  copy_block17(full, src, 24, stride, 17);\
1223  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1224  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1225  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1226  OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1227 }\
1228 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1229 {\
1230  uint8_t full[24*17];\
1231  uint8_t halfH[272];\
1232  uint8_t halfV[256];\
1233  uint8_t halfHV[256];\
1234  copy_block17(full, src, 24, stride, 17);\
1235  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1236  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1237  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1238  OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1239 }\
1240 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1241 {\
1242  uint8_t full[24*17];\
1243  uint8_t halfH[272];\
1244  uint8_t halfHV[256];\
1245  copy_block17(full, src, 24, stride, 17);\
1246  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1247  put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1248  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1249  OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1250 }\
1251 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1252 {\
1253  uint8_t halfH[272];\
1254  uint8_t halfHV[256];\
1255  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1256  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1257  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1258 }\
1259 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1260 {\
1261  uint8_t halfH[272];\
1262  uint8_t halfHV[256];\
1263  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1264  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1265  OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1266 }\
1267 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1268 {\
1269  uint8_t full[24*17];\
1270  uint8_t halfH[272];\
1271  uint8_t halfV[256];\
1272  uint8_t halfHV[256];\
1273  copy_block17(full, src, 24, stride, 17);\
1274  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1275  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1276  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1277  OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1278 }\
1279 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1280 {\
1281  uint8_t full[24*17];\
1282  uint8_t halfH[272];\
1283  copy_block17(full, src, 24, stride, 17);\
1284  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1285  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1286  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1287 }\
1288 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1289 {\
1290  uint8_t full[24*17];\
1291  uint8_t halfH[272];\
1292  uint8_t halfV[256];\
1293  uint8_t halfHV[256];\
1294  copy_block17(full, src, 24, stride, 17);\
1295  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1296  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1297  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1298  OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1299 }\
1300 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1301 {\
1302  uint8_t full[24*17];\
1303  uint8_t halfH[272];\
1304  copy_block17(full, src, 24, stride, 17);\
1305  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1306  put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1307  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1308 }\
1309 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1310 {\
1311  uint8_t halfH[272];\
1312  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1313  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1314 }
1315 
1316 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1317 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1318 #define op_put(a, b) a = cm[((b) + 16)>>5]
1319 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1320 
1321 QPEL_MC(0, put_ , _ , op_put)
1322 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1323 QPEL_MC(0, avg_ , _ , op_avg)
1324 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1325 #undef op_avg
1326 #undef op_avg_no_rnd
1327 #undef op_put
1328 #undef op_put_no_rnd
1329 
1331 {
1332  put_pixels8_8_c(dst, src, stride, 8);
1333 }
1335 {
1336  avg_pixels8_8_c(dst, src, stride, 8);
1337 }
1339 {
1340  put_pixels16_8_c(dst, src, stride, 16);
1341 }
1343 {
1344  avg_pixels16_8_c(dst, src, stride, 16);
1345 }
1346 
1347 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1348 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1349 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1350 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1351 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1352 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
1353 
1354 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1355  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1356  int i;
1357 
1358  for(i=0; i<h; i++){
1359  dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1360  dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1361  dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1362  dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1363  dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1364  dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1365  dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1366  dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1367  dst+=dstStride;
1368  src+=srcStride;
1369  }
1370 }
1371 
1372 #if CONFIG_RV40_DECODER
1374 {
1375  put_pixels16_xy2_8_c(dst, src, stride, 16);
1376 }
1377 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1378 {
1379  avg_pixels16_xy2_8_c(dst, src, stride, 16);
1380 }
1381 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1382 {
1383  put_pixels8_xy2_8_c(dst, src, stride, 8);
1384 }
1385 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1386 {
1387  avg_pixels8_xy2_8_c(dst, src, stride, 8);
1388 }
1389 #endif /* CONFIG_RV40_DECODER */
1390 
1391 #if CONFIG_DIRAC_DECODER
1392 #define DIRAC_MC(OPNAME)\
1393 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1394 {\
1395  OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
1396 }\
1397 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1398 {\
1399  OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
1400 }\
1401 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1402 {\
1403  OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
1404  OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
1405 }\
1406 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1407 {\
1408  OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1409 }\
1410 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1411 {\
1412  OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1413 }\
1414 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1415 {\
1416  OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
1417  OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
1418 }\
1419 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1420 {\
1421  OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1422 }\
1423 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1424 {\
1425  OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1426 }\
1427 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1428 {\
1429  OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
1430  OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
1431 }
1432 DIRAC_MC(put)
1433 DIRAC_MC(avg)
1434 #endif
1435 
1436 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1437  const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1438  int i;
1439 
1440  for(i=0; i<w; i++){
1441  const int src_1= src[ -srcStride];
1442  const int src0 = src[0 ];
1443  const int src1 = src[ srcStride];
1444  const int src2 = src[2*srcStride];
1445  const int src3 = src[3*srcStride];
1446  const int src4 = src[4*srcStride];
1447  const int src5 = src[5*srcStride];
1448  const int src6 = src[6*srcStride];
1449  const int src7 = src[7*srcStride];
1450  const int src8 = src[8*srcStride];
1451  const int src9 = src[9*srcStride];
1452  dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1453  dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1454  dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1455  dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1456  dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1457  dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1458  dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1459  dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1460  src++;
1461  dst++;
1462  }
1463 }
1464 
1465 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1466 {
1467  uint8_t half[64];
1468  wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1469  put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1470 }
1471 
1472 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1473 {
1474  wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1475 }
1476 
1477 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1478 {
1479  uint8_t half[64];
1480  wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1481  put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1482 }
1483 
1484 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1485 {
1486  wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1487 }
1488 
1489 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1490 {
1491  uint8_t halfH[88];
1492  uint8_t halfV[64];
1493  uint8_t halfHV[64];
1494  wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1495  wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1496  wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1497  put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1498 }
1499 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1500 {
1501  uint8_t halfH[88];
1502  uint8_t halfV[64];
1503  uint8_t halfHV[64];
1504  wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1505  wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1506  wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1507  put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1508 }
1509 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1510 {
1511  uint8_t halfH[88];
1512  wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1513  wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1514 }
1515 
1516 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1518  int x;
1519  const int strength= ff_h263_loop_filter_strength[qscale];
1520 
1521  for(x=0; x<8; x++){
1522  int d1, d2, ad1;
1523  int p0= src[x-2*stride];
1524  int p1= src[x-1*stride];
1525  int p2= src[x+0*stride];
1526  int p3= src[x+1*stride];
1527  int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1528 
1529  if (d<-2*strength) d1= 0;
1530  else if(d<- strength) d1=-2*strength - d;
1531  else if(d< strength) d1= d;
1532  else if(d< 2*strength) d1= 2*strength - d;
1533  else d1= 0;
1534 
1535  p1 += d1;
1536  p2 -= d1;
1537  if(p1&256) p1= ~(p1>>31);
1538  if(p2&256) p2= ~(p2>>31);
1539 
1540  src[x-1*stride] = p1;
1541  src[x+0*stride] = p2;
1542 
1543  ad1= FFABS(d1)>>1;
1544 
1545  d2= av_clip((p0-p3)/4, -ad1, ad1);
1546 
1547  src[x-2*stride] = p0 - d2;
1548  src[x+ stride] = p3 + d2;
1549  }
1550  }
1551 }
1552 
1553 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1555  int y;
1556  const int strength= ff_h263_loop_filter_strength[qscale];
1557 
1558  for(y=0; y<8; y++){
1559  int d1, d2, ad1;
1560  int p0= src[y*stride-2];
1561  int p1= src[y*stride-1];
1562  int p2= src[y*stride+0];
1563  int p3= src[y*stride+1];
1564  int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1565 
1566  if (d<-2*strength) d1= 0;
1567  else if(d<- strength) d1=-2*strength - d;
1568  else if(d< strength) d1= d;
1569  else if(d< 2*strength) d1= 2*strength - d;
1570  else d1= 0;
1571 
1572  p1 += d1;
1573  p2 -= d1;
1574  if(p1&256) p1= ~(p1>>31);
1575  if(p2&256) p2= ~(p2>>31);
1576 
1577  src[y*stride-1] = p1;
1578  src[y*stride+0] = p2;
1579 
1580  ad1= FFABS(d1)>>1;
1581 
1582  d2= av_clip((p0-p3)/4, -ad1, ad1);
1583 
1584  src[y*stride-2] = p0 - d2;
1585  src[y*stride+1] = p3 + d2;
1586  }
1587  }
1588 }
1589 
1590 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1591 {
1592  int s, i;
1593 
1594  s = 0;
1595  for(i=0;i<h;i++) {
1596  s += abs(pix1[0] - pix2[0]);
1597  s += abs(pix1[1] - pix2[1]);
1598  s += abs(pix1[2] - pix2[2]);
1599  s += abs(pix1[3] - pix2[3]);
1600  s += abs(pix1[4] - pix2[4]);
1601  s += abs(pix1[5] - pix2[5]);
1602  s += abs(pix1[6] - pix2[6]);
1603  s += abs(pix1[7] - pix2[7]);
1604  s += abs(pix1[8] - pix2[8]);
1605  s += abs(pix1[9] - pix2[9]);
1606  s += abs(pix1[10] - pix2[10]);
1607  s += abs(pix1[11] - pix2[11]);
1608  s += abs(pix1[12] - pix2[12]);
1609  s += abs(pix1[13] - pix2[13]);
1610  s += abs(pix1[14] - pix2[14]);
1611  s += abs(pix1[15] - pix2[15]);
1612  pix1 += line_size;
1613  pix2 += line_size;
1614  }
1615  return s;
1616 }
1617 
1618 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1619 {
1620  int s, i;
1621 
1622  s = 0;
1623  for(i=0;i<h;i++) {
1624  s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1625  s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1626  s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1627  s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1628  s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1629  s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1630  s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1631  s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1632  s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1633  s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1634  s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1635  s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1636  s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1637  s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1638  s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1639  s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1640  pix1 += line_size;
1641  pix2 += line_size;
1642  }
1643  return s;
1644 }
1645 
1646 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1647 {
1648  int s, i;
1649  uint8_t *pix3 = pix2 + line_size;
1650 
1651  s = 0;
1652  for(i=0;i<h;i++) {
1653  s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1654  s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1655  s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1656  s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1657  s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1658  s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1659  s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1660  s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1661  s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1662  s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1663  s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1664  s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1665  s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1666  s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1667  s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1668  s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1669  pix1 += line_size;
1670  pix2 += line_size;
1671  pix3 += line_size;
1672  }
1673  return s;
1674 }
1675 
1676 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1677 {
1678  int s, i;
1679  uint8_t *pix3 = pix2 + line_size;
1680 
1681  s = 0;
1682  for(i=0;i<h;i++) {
1683  s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1684  s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1685  s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1686  s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1687  s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1688  s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1689  s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1690  s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1691  s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1692  s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1693  s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1694  s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1695  s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1696  s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1697  s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1698  s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1699  pix1 += line_size;
1700  pix2 += line_size;
1701  pix3 += line_size;
1702  }
1703  return s;
1704 }
1705 
1706 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1707 {
1708  int s, i;
1709 
1710  s = 0;
1711  for(i=0;i<h;i++) {
1712  s += abs(pix1[0] - pix2[0]);
1713  s += abs(pix1[1] - pix2[1]);
1714  s += abs(pix1[2] - pix2[2]);
1715  s += abs(pix1[3] - pix2[3]);
1716  s += abs(pix1[4] - pix2[4]);
1717  s += abs(pix1[5] - pix2[5]);
1718  s += abs(pix1[6] - pix2[6]);
1719  s += abs(pix1[7] - pix2[7]);
1720  pix1 += line_size;
1721  pix2 += line_size;
1722  }
1723  return s;
1724 }
1725 
1726 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1727 {
1728  int s, i;
1729 
1730  s = 0;
1731  for(i=0;i<h;i++) {
1732  s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1733  s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1734  s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1735  s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1736  s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1737  s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1738  s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1739  s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1740  pix1 += line_size;
1741  pix2 += line_size;
1742  }
1743  return s;
1744 }
1745 
1746 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1747 {
1748  int s, i;
1749  uint8_t *pix3 = pix2 + line_size;
1750 
1751  s = 0;
1752  for(i=0;i<h;i++) {
1753  s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1754  s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1755  s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1756  s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1757  s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1758  s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1759  s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1760  s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1761  pix1 += line_size;
1762  pix2 += line_size;
1763  pix3 += line_size;
1764  }
1765  return s;
1766 }
1767 
1768 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1769 {
1770  int s, i;
1771  uint8_t *pix3 = pix2 + line_size;
1772 
1773  s = 0;
1774  for(i=0;i<h;i++) {
1775  s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1776  s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1777  s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1778  s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1779  s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1780  s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1781  s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1782  s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1783  pix1 += line_size;
1784  pix2 += line_size;
1785  pix3 += line_size;
1786  }
1787  return s;
1788 }
1789 
1790 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1791  MpegEncContext *c = v;
1792  int score1=0;
1793  int score2=0;
1794  int x,y;
1795 
1796  for(y=0; y<h; y++){
1797  for(x=0; x<16; x++){
1798  score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1799  }
1800  if(y+1<h){
1801  for(x=0; x<15; x++){
1802  score2+= FFABS( s1[x ] - s1[x +stride]
1803  - s1[x+1] + s1[x+1+stride])
1804  -FFABS( s2[x ] - s2[x +stride]
1805  - s2[x+1] + s2[x+1+stride]);
1806  }
1807  }
1808  s1+= stride;
1809  s2+= stride;
1810  }
1811 
1812  if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1813  else return score1 + FFABS(score2)*8;
1814 }
1815 
1816 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1817  MpegEncContext *c = v;
1818  int score1=0;
1819  int score2=0;
1820  int x,y;
1821 
1822  for(y=0; y<h; y++){
1823  for(x=0; x<8; x++){
1824  score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1825  }
1826  if(y+1<h){
1827  for(x=0; x<7; x++){
1828  score2+= FFABS( s1[x ] - s1[x +stride]
1829  - s1[x+1] + s1[x+1+stride])
1830  -FFABS( s2[x ] - s2[x +stride]
1831  - s2[x+1] + s2[x+1+stride]);
1832  }
1833  }
1834  s1+= stride;
1835  s2+= stride;
1836  }
1837 
1838  if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1839  else return score1 + FFABS(score2)*8;
1840 }
1841 
1842 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1843  int i;
1844  unsigned int sum=0;
1845 
1846  for(i=0; i<8*8; i++){
1847  int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1848  int w= weight[i];
1849  b>>= RECON_SHIFT;
1850  av_assert2(-512<b && b<512);
1851 
1852  sum += (w*b)*(w*b)>>4;
1853  }
1854  return sum>>2;
1855 }
1856 
1857 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1858  int i;
1859 
1860  for(i=0; i<8*8; i++){
1861  rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1862  }
1863 }
1864 
1865 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1866  return 0;
1867 }
1868 
1870  int i;
1871 
1872  memset(cmp, 0, sizeof(void*)*6);
1873 
1874  for(i=0; i<6; i++){
1875  switch(type&0xFF){
1876  case FF_CMP_SAD:
1877  cmp[i]= c->sad[i];
1878  break;
1879  case FF_CMP_SATD:
1880  cmp[i]= c->hadamard8_diff[i];
1881  break;
1882  case FF_CMP_SSE:
1883  cmp[i]= c->sse[i];
1884  break;
1885  case FF_CMP_DCT:
1886  cmp[i]= c->dct_sad[i];
1887  break;
1888  case FF_CMP_DCT264:
1889  cmp[i]= c->dct264_sad[i];
1890  break;
1891  case FF_CMP_DCTMAX:
1892  cmp[i]= c->dct_max[i];
1893  break;
1894  case FF_CMP_PSNR:
1895  cmp[i]= c->quant_psnr[i];
1896  break;
1897  case FF_CMP_BIT:
1898  cmp[i]= c->bit[i];
1899  break;
1900  case FF_CMP_RD:
1901  cmp[i]= c->rd[i];
1902  break;
1903  case FF_CMP_VSAD:
1904  cmp[i]= c->vsad[i];
1905  break;
1906  case FF_CMP_VSSE:
1907  cmp[i]= c->vsse[i];
1908  break;
1909  case FF_CMP_ZERO:
1910  cmp[i]= zero_cmp;
1911  break;
1912  case FF_CMP_NSSE:
1913  cmp[i]= c->nsse[i];
1914  break;
1915 #if CONFIG_DWT
1916  case FF_CMP_W53:
1917  cmp[i]= c->w53[i];
1918  break;
1919  case FF_CMP_W97:
1920  cmp[i]= c->w97[i];
1921  break;
1922 #endif
1923  default:
1924  av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1925  }
1926  }
1927 }
1928 
1929 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1930  long i;
1931  for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1932  long a = *(long*)(src+i);
1933  long b = *(long*)(dst+i);
1934  *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1935  }
1936  for(; i<w; i++)
1937  dst[i+0] += src[i+0];
1938 }
1939 
1940 static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w){
1941  long i;
1942 #if !HAVE_FAST_UNALIGNED
1943  if((long)src2 & (sizeof(long)-1)){
1944  for(i=0; i+7<w; i+=8){
1945  dst[i+0] = src1[i+0]-src2[i+0];
1946  dst[i+1] = src1[i+1]-src2[i+1];
1947  dst[i+2] = src1[i+2]-src2[i+2];
1948  dst[i+3] = src1[i+3]-src2[i+3];
1949  dst[i+4] = src1[i+4]-src2[i+4];
1950  dst[i+5] = src1[i+5]-src2[i+5];
1951  dst[i+6] = src1[i+6]-src2[i+6];
1952  dst[i+7] = src1[i+7]-src2[i+7];
1953  }
1954  }else
1955 #endif
1956  for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1957  long a = *(long*)(src1+i);
1958  long b = *(long*)(src2+i);
1959  *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1960  }
1961  for(; i<w; i++)
1962  dst[i+0] = src1[i+0]-src2[i+0];
1963 }
1964 
1965 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1966  int i;
1967  uint8_t l, lt;
1968 
1969  l= *left;
1970  lt= *left_top;
1971 
1972  for(i=0; i<w; i++){
1973  l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1974  lt= src1[i];
1975  dst[i]= l;
1976  }
1977 
1978  *left= l;
1979  *left_top= lt;
1980 }
1981 
1982 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1983  int i;
1984  uint8_t l, lt;
1985 
1986  l= *left;
1987  lt= *left_top;
1988 
1989  for(i=0; i<w; i++){
1990  const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1991  lt= src1[i];
1992  l= src2[i];
1993  dst[i]= l - pred;
1994  }
1995 
1996  *left= l;
1997  *left_top= lt;
1998 }
1999 
2000 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
2001  int i;
2002 
2003  for(i=0; i<w-1; i++){
2004  acc+= src[i];
2005  dst[i]= acc;
2006  i++;
2007  acc+= src[i];
2008  dst[i]= acc;
2009  }
2010 
2011  for(; i<w; i++){
2012  acc+= src[i];
2013  dst[i]= acc;
2014  }
2015 
2016  return acc;
2017 }
2018 
2019 #if HAVE_BIGENDIAN
2020 #define B 3
2021 #define G 2
2022 #define R 1
2023 #define A 0
2024 #else
2025 #define B 0
2026 #define G 1
2027 #define R 2
2028 #define A 3
2029 #endif
2030 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
2031  int i;
2032  int r,g,b,a;
2033  r= *red;
2034  g= *green;
2035  b= *blue;
2036  a= *alpha;
2037 
2038  for(i=0; i<w; i++){
2039  b+= src[4*i+B];
2040  g+= src[4*i+G];
2041  r+= src[4*i+R];
2042  a+= src[4*i+A];
2043 
2044  dst[4*i+B]= b;
2045  dst[4*i+G]= g;
2046  dst[4*i+R]= r;
2047  dst[4*i+A]= a;
2048  }
2049 
2050  *red= r;
2051  *green= g;
2052  *blue= b;
2053  *alpha= a;
2054 }
2055 #undef B
2056 #undef G
2057 #undef R
2058 #undef A
2059 
2060 #define BUTTERFLY2(o1,o2,i1,i2) \
2061 o1= (i1)+(i2);\
2062 o2= (i1)-(i2);
2063 
2064 #define BUTTERFLY1(x,y) \
2065 {\
2066  int a,b;\
2067  a= x;\
2068  b= y;\
2069  x= a+b;\
2070  y= a-b;\
2071 }
2072 
2073 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
2074 
2075 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
2076  int i;
2077  int temp[64];
2078  int sum=0;
2079 
2080  av_assert2(h==8);
2081 
2082  for(i=0; i<8; i++){
2083  //FIXME try pointer walks
2084  BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2085  BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2086  BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2087  BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2088 
2089  BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2090  BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2091  BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2092  BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2093 
2094  BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2095  BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2096  BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2097  BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2098  }
2099 
2100  for(i=0; i<8; i++){
2101  BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2102  BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2103  BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2104  BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2105 
2106  BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2107  BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2108  BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2109  BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2110 
2111  sum +=
2112  BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2113  +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2114  +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2115  +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2116  }
2117  return sum;
2118 }
2119 
2120 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
2121  int i;
2122  int temp[64];
2123  int sum=0;
2124 
2125  av_assert2(h==8);
2126 
2127  for(i=0; i<8; i++){
2128  //FIXME try pointer walks
2129  BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2130  BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2131  BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2132  BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2133 
2134  BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2135  BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2136  BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2137  BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2138 
2139  BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2140  BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2141  BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2142  BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2143  }
2144 
2145  for(i=0; i<8; i++){
2146  BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2147  BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2148  BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2149  BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2150 
2151  BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2152  BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2153  BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2154  BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2155 
2156  sum +=
2157  BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2158  +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2159  +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2160  +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2161  }
2162 
2163  sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2164 
2165  return sum;
2166 }
2167 
2168 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2169  MpegEncContext * const s= (MpegEncContext *)c;
2170  LOCAL_ALIGNED_16(int16_t, temp, [64]);
2171 
2172  av_assert2(h==8);
2173 
2174  s->dsp.diff_pixels(temp, src1, src2, stride);
2175  s->dsp.fdct(temp);
2176  return s->dsp.sum_abs_dctelem(temp);
2177 }
2178 
2179 #if CONFIG_GPL
2180 #define DCT8_1D {\
2181  const int s07 = SRC(0) + SRC(7);\
2182  const int s16 = SRC(1) + SRC(6);\
2183  const int s25 = SRC(2) + SRC(5);\
2184  const int s34 = SRC(3) + SRC(4);\
2185  const int a0 = s07 + s34;\
2186  const int a1 = s16 + s25;\
2187  const int a2 = s07 - s34;\
2188  const int a3 = s16 - s25;\
2189  const int d07 = SRC(0) - SRC(7);\
2190  const int d16 = SRC(1) - SRC(6);\
2191  const int d25 = SRC(2) - SRC(5);\
2192  const int d34 = SRC(3) - SRC(4);\
2193  const int a4 = d16 + d25 + (d07 + (d07>>1));\
2194  const int a5 = d07 - d34 - (d25 + (d25>>1));\
2195  const int a6 = d07 + d34 - (d16 + (d16>>1));\
2196  const int a7 = d16 - d25 + (d34 + (d34>>1));\
2197  DST(0, a0 + a1 ) ;\
2198  DST(1, a4 + (a7>>2)) ;\
2199  DST(2, a2 + (a3>>1)) ;\
2200  DST(3, a5 + (a6>>2)) ;\
2201  DST(4, a0 - a1 ) ;\
2202  DST(5, a6 - (a5>>2)) ;\
2203  DST(6, (a2>>1) - a3 ) ;\
2204  DST(7, (a4>>2) - a7 ) ;\
2205 }
2206 
2207 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2208  MpegEncContext * const s= (MpegEncContext *)c;
2209  int16_t dct[8][8];
2210  int i;
2211  int sum=0;
2212 
2213  s->dsp.diff_pixels(dct[0], src1, src2, stride);
2214 
2215 #define SRC(x) dct[i][x]
2216 #define DST(x,v) dct[i][x]= v
2217  for( i = 0; i < 8; i++ )
2218  DCT8_1D
2219 #undef SRC
2220 #undef DST
2221 
2222 #define SRC(x) dct[x][i]
2223 #define DST(x,v) sum += FFABS(v)
2224  for( i = 0; i < 8; i++ )
2225  DCT8_1D
2226 #undef SRC
2227 #undef DST
2228  return sum;
2229 }
2230 #endif
2231 
2232 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2233  MpegEncContext * const s= (MpegEncContext *)c;
2234  LOCAL_ALIGNED_16(int16_t, temp, [64]);
2235  int sum=0, i;
2236 
2237  av_assert2(h==8);
2238 
2239  s->dsp.diff_pixels(temp, src1, src2, stride);
2240  s->dsp.fdct(temp);
2241 
2242  for(i=0; i<64; i++)
2243  sum= FFMAX(sum, FFABS(temp[i]));
2244 
2245  return sum;
2246 }
2247 
2248 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2249  MpegEncContext * const s= (MpegEncContext *)c;
2250  LOCAL_ALIGNED_16(int16_t, temp, [64*2]);
2251  int16_t * const bak = temp+64;
2252  int sum=0, i;
2253 
2254  av_assert2(h==8);
2255  s->mb_intra=0;
2256 
2257  s->dsp.diff_pixels(temp, src1, src2, stride);
2258 
2259  memcpy(bak, temp, 64*sizeof(int16_t));
2260 
2261  s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2262  s->dct_unquantize_inter(s, temp, 0, s->qscale);
2263  ff_simple_idct_8(temp); //FIXME
2264 
2265  for(i=0; i<64; i++)
2266  sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2267 
2268  return sum;
2269 }
2270 
2271 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2272  MpegEncContext * const s= (MpegEncContext *)c;
2273  const uint8_t *scantable= s->intra_scantable.permutated;
2274  LOCAL_ALIGNED_16(int16_t, temp, [64]);
2275  LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2276  LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2277  int i, last, run, bits, level, distortion, start_i;
2278  const int esc_length= s->ac_esc_length;
2279  uint8_t * length;
2280  uint8_t * last_length;
2281 
2282  av_assert2(h==8);
2283 
2284  copy_block8(lsrc1, src1, 8, stride, 8);
2285  copy_block8(lsrc2, src2, 8, stride, 8);
2286 
2287  s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2288 
2289  s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2290 
2291  bits=0;
2292 
2293  if (s->mb_intra) {
2294  start_i = 1;
2295  length = s->intra_ac_vlc_length;
2296  last_length= s->intra_ac_vlc_last_length;
2297  bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2298  } else {
2299  start_i = 0;
2300  length = s->inter_ac_vlc_length;
2301  last_length= s->inter_ac_vlc_last_length;
2302  }
2303 
2304  if(last>=start_i){
2305  run=0;
2306  for(i=start_i; i<last; i++){
2307  int j= scantable[i];
2308  level= temp[j];
2309 
2310  if(level){
2311  level+=64;
2312  if((level&(~127)) == 0){
2313  bits+= length[UNI_AC_ENC_INDEX(run, level)];
2314  }else
2315  bits+= esc_length;
2316  run=0;
2317  }else
2318  run++;
2319  }
2320  i= scantable[last];
2321 
2322  level= temp[i] + 64;
2323 
2324  av_assert2(level - 64);
2325 
2326  if((level&(~127)) == 0){
2327  bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2328  }else
2329  bits+= esc_length;
2330 
2331  }
2332 
2333  if(last>=0){
2334  if(s->mb_intra)
2335  s->dct_unquantize_intra(s, temp, 0, s->qscale);
2336  else
2337  s->dct_unquantize_inter(s, temp, 0, s->qscale);
2338  }
2339 
2340  s->dsp.idct_add(lsrc2, 8, temp);
2341 
2342  distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2343 
2344  return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2345 }
2346 
2347 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2348  MpegEncContext * const s= (MpegEncContext *)c;
2349  const uint8_t *scantable= s->intra_scantable.permutated;
2350  LOCAL_ALIGNED_16(int16_t, temp, [64]);
2351  int i, last, run, bits, level, start_i;
2352  const int esc_length= s->ac_esc_length;
2353  uint8_t * length;
2354  uint8_t * last_length;
2355 
2356  av_assert2(h==8);
2357 
2358  s->dsp.diff_pixels(temp, src1, src2, stride);
2359 
2360  s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2361 
2362  bits=0;
2363 
2364  if (s->mb_intra) {
2365  start_i = 1;
2366  length = s->intra_ac_vlc_length;
2367  last_length= s->intra_ac_vlc_last_length;
2368  bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2369  } else {
2370  start_i = 0;
2371  length = s->inter_ac_vlc_length;
2372  last_length= s->inter_ac_vlc_last_length;
2373  }
2374 
2375  if(last>=start_i){
2376  run=0;
2377  for(i=start_i; i<last; i++){
2378  int j= scantable[i];
2379  level= temp[j];
2380 
2381  if(level){
2382  level+=64;
2383  if((level&(~127)) == 0){
2384  bits+= length[UNI_AC_ENC_INDEX(run, level)];
2385  }else
2386  bits+= esc_length;
2387  run=0;
2388  }else
2389  run++;
2390  }
2391  i= scantable[last];
2392 
2393  level= temp[i] + 64;
2394 
2395  av_assert2(level - 64);
2396 
2397  if((level&(~127)) == 0){
2398  bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2399  }else
2400  bits+= esc_length;
2401  }
2402 
2403  return bits;
2404 }
2405 
2406 #define VSAD_INTRA(size) \
2407 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2408  int score=0; \
2409  int x,y; \
2410  \
2411  for(y=1; y<h; y++){ \
2412  for(x=0; x<size; x+=4){ \
2413  score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2414  +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2415  } \
2416  s+= stride; \
2417  } \
2418  \
2419  return score; \
2420 }
2421 VSAD_INTRA(8)
2422 VSAD_INTRA(16)
2423 
2424 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2425  int score=0;
2426  int x,y;
2427 
2428  for(y=1; y<h; y++){
2429  for(x=0; x<16; x++){
2430  score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2431  }
2432  s1+= stride;
2433  s2+= stride;
2434  }
2435 
2436  return score;
2437 }
2438 
2439 #define SQ(a) ((a)*(a))
2440 #define VSSE_INTRA(size) \
2441 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2442  int score=0; \
2443  int x,y; \
2444  \
2445  for(y=1; y<h; y++){ \
2446  for(x=0; x<size; x+=4){ \
2447  score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2448  +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2449  } \
2450  s+= stride; \
2451  } \
2452  \
2453  return score; \
2454 }
2455 VSSE_INTRA(8)
2456 VSSE_INTRA(16)
2457 
2458 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2459  int score=0;
2460  int x,y;
2461 
2462  for(y=1; y<h; y++){
2463  for(x=0; x<16; x++){
2464  score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2465  }
2466  s1+= stride;
2467  s2+= stride;
2468  }
2469 
2470  return score;
2471 }
2472 
2473 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2474  int size){
2475  int score=0;
2476  int i;
2477  for(i=0; i<size; i++)
2478  score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2479  return score;
2480 }
2481 
2482 #define WRAPPER8_16_SQ(name8, name16)\
2483 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
2484  int score=0;\
2485  score +=name8(s, dst , src , stride, 8);\
2486  score +=name8(s, dst+8 , src+8 , stride, 8);\
2487  if(h==16){\
2488  dst += 8*stride;\
2489  src += 8*stride;\
2490  score +=name8(s, dst , src , stride, 8);\
2491  score +=name8(s, dst+8 , src+8 , stride, 8);\
2492  }\
2493  return score;\
2494 }
2495 
2496 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2497 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2498 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2499 #if CONFIG_GPL
2500 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2501 #endif
2502 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2503 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2504 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2505 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2506 
2507 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2508  uint32_t maxi, uint32_t maxisign)
2509 {
2510 
2511  if(a > mini) return mini;
2512  else if((a^(1U<<31)) > maxisign) return maxi;
2513  else return a;
2514 }
2515 
2516 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2517  int i;
2518  uint32_t mini = *(uint32_t*)min;
2519  uint32_t maxi = *(uint32_t*)max;
2520  uint32_t maxisign = maxi ^ (1U<<31);
2521  uint32_t *dsti = (uint32_t*)dst;
2522  const uint32_t *srci = (const uint32_t*)src;
2523  for(i=0; i<len; i+=8) {
2524  dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2525  dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2526  dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2527  dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2528  dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2529  dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2530  dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2531  dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2532  }
2533 }
2534 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2535  int i;
2536  if(min < 0 && max > 0) {
2537  vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2538  } else {
2539  for(i=0; i < len; i+=8) {
2540  dst[i ] = av_clipf(src[i ], min, max);
2541  dst[i + 1] = av_clipf(src[i + 1], min, max);
2542  dst[i + 2] = av_clipf(src[i + 2], min, max);
2543  dst[i + 3] = av_clipf(src[i + 3], min, max);
2544  dst[i + 4] = av_clipf(src[i + 4], min, max);
2545  dst[i + 5] = av_clipf(src[i + 5], min, max);
2546  dst[i + 6] = av_clipf(src[i + 6], min, max);
2547  dst[i + 7] = av_clipf(src[i + 7], min, max);
2548  }
2549  }
2550 }
2551 
2552 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2553 {
2554  int res = 0;
2555 
2556  while (order--)
2557  res += *v1++ * *v2++;
2558 
2559  return res;
2560 }
2561 
2562 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2563 {
2564  int res = 0;
2565  while (order--) {
2566  res += *v1 * *v2++;
2567  *v1++ += mul * *v3++;
2568  }
2569  return res;
2570 }
2571 
2572 static void apply_window_int16_c(int16_t *output, const int16_t *input,
2573  const int16_t *window, unsigned int len)
2574 {
2575  int i;
2576  int len2 = len >> 1;
2577 
2578  for (i = 0; i < len2; i++) {
2579  int16_t w = window[i];
2580  output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2581  output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2582  }
2583 }
2584 
2585 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2586  int32_t max, unsigned int len)
2587 {
2588  do {
2589  *dst++ = av_clip(*src++, min, max);
2590  *dst++ = av_clip(*src++, min, max);
2591  *dst++ = av_clip(*src++, min, max);
2592  *dst++ = av_clip(*src++, min, max);
2593  *dst++ = av_clip(*src++, min, max);
2594  *dst++ = av_clip(*src++, min, max);
2595  *dst++ = av_clip(*src++, min, max);
2596  *dst++ = av_clip(*src++, min, max);
2597  len -= 8;
2598  } while (len > 0);
2599 }
2600 
2601 static void ff_jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
2602 {
2603  ff_j_rev_dct (block);
2604  put_pixels_clamped_c(block, dest, line_size);
2605 }
2606 static void ff_jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
2607 {
2608  ff_j_rev_dct (block);
2609  add_pixels_clamped_c(block, dest, line_size);
2610 }
2611 
2612 static void ff_jref_idct4_put(uint8_t *dest, int line_size, int16_t *block)
2613 {
2614  ff_j_rev_dct4 (block);
2615  put_pixels_clamped4_c(block, dest, line_size);
2616 }
2617 static void ff_jref_idct4_add(uint8_t *dest, int line_size, int16_t *block)
2618 {
2619  ff_j_rev_dct4 (block);
2620  add_pixels_clamped4_c(block, dest, line_size);
2621 }
2622 
2623 static void ff_jref_idct2_put(uint8_t *dest, int line_size, int16_t *block)
2624 {
2625  ff_j_rev_dct2 (block);
2626  put_pixels_clamped2_c(block, dest, line_size);
2627 }
2628 static void ff_jref_idct2_add(uint8_t *dest, int line_size, int16_t *block)
2629 {
2630  ff_j_rev_dct2 (block);
2631  add_pixels_clamped2_c(block, dest, line_size);
2632 }
2633 
2634 static void ff_jref_idct1_put(uint8_t *dest, int line_size, int16_t *block)
2635 {
2636  dest[0] = av_clip_uint8((block[0] + 4)>>3);
2637 }
2638 static void ff_jref_idct1_add(uint8_t *dest, int line_size, int16_t *block)
2639 {
2640  dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
2641 }
2642 
2643 /* init static data */
2645 {
2646  int i;
2647 
2648  for(i=0;i<512;i++) {
2649  ff_squareTbl[i] = (i - 256) * (i - 256);
2650  }
2651 
2652  for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2653 }
2654 
2656  static int did_fail=0;
2657  LOCAL_ALIGNED_16(int, aligned, [4]);
2658 
2659  if((intptr_t)aligned & 15){
2660  if(!did_fail){
2661 #if HAVE_MMX || HAVE_ALTIVEC
2663  "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2664  "and may be very slow or crash. This is not a bug in libavcodec,\n"
2665  "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2666  "Do not report crashes to FFmpeg developers.\n");
2667 #endif
2668  did_fail=1;
2669  }
2670  return -1;
2671  }
2672  return 0;
2673 }
2674 
2676 {
2678 
2679 #if CONFIG_ENCODERS
2680  if (avctx->bits_per_raw_sample == 10) {
2683  } else {
2684  if(avctx->dct_algo==FF_DCT_FASTINT) {
2685  c->fdct = ff_fdct_ifast;
2687  }
2688  else if(avctx->dct_algo==FF_DCT_FAAN) {
2689  c->fdct = ff_faandct;
2690  c->fdct248 = ff_faandct248;
2691  }
2692  else {
2693  c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2695  }
2696  }
2697 #endif //CONFIG_ENCODERS
2698 
2699  if(avctx->lowres==1){
2702  c->idct = ff_j_rev_dct4;
2704  }else if(avctx->lowres==2){
2707  c->idct = ff_j_rev_dct2;
2709  }else if(avctx->lowres==3){
2712  c->idct = ff_j_rev_dct1;
2714  }else{
2715  if (avctx->bits_per_raw_sample == 10) {
2718  c->idct = ff_simple_idct_10;
2720  } else {
2721  if(avctx->idct_algo==FF_IDCT_INT){
2724  c->idct = ff_j_rev_dct;
2726  }else if(avctx->idct_algo==FF_IDCT_FAAN){
2729  c->idct = ff_faanidct;
2731  }else{ //accurate/default
2734  c->idct = ff_simple_idct_8;
2736  }
2737  }
2738  }
2739 
2745  c->gmc1 = gmc1_c;
2746  c->gmc = ff_gmc_c;
2747  c->pix_sum = pix_sum_c;
2748  c->pix_norm1 = pix_norm1_c;
2749 
2751  c->fill_block_tab[1] = fill_block8_c;
2752 
2753  /* TODO [0] 16 [1] 8 */
2754  c->pix_abs[0][0] = pix_abs16_c;
2755  c->pix_abs[0][1] = pix_abs16_x2_c;
2756  c->pix_abs[0][2] = pix_abs16_y2_c;
2757  c->pix_abs[0][3] = pix_abs16_xy2_c;
2758  c->pix_abs[1][0] = pix_abs8_c;
2759  c->pix_abs[1][1] = pix_abs8_x2_c;
2760  c->pix_abs[1][2] = pix_abs8_y2_c;
2761  c->pix_abs[1][3] = pix_abs8_xy2_c;
2762 
2772 
2782 
2783 #define dspfunc(PFX, IDX, NUM) \
2784  c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2785  c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2786  c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2787  c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2788  c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2789  c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2790  c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2791  c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2792  c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2793  c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2794  c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2795  c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2796  c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2797  c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2798  c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2799  c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2800 
2801  dspfunc(put_qpel, 0, 16);
2802  dspfunc(put_no_rnd_qpel, 0, 16);
2803 
2804  dspfunc(avg_qpel, 0, 16);
2805  /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2806 
2807  dspfunc(put_qpel, 1, 8);
2808  dspfunc(put_no_rnd_qpel, 1, 8);
2809 
2810  dspfunc(avg_qpel, 1, 8);
2811  /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2812 
2813 #undef dspfunc
2814 
2823 
2824 #define SET_CMP_FUNC(name) \
2825  c->name[0]= name ## 16_c;\
2826  c->name[1]= name ## 8x8_c;
2827 
2828  SET_CMP_FUNC(hadamard8_diff)
2829  c->hadamard8_diff[4]= hadamard8_intra16_c;
2831  SET_CMP_FUNC(dct_sad)
2832  SET_CMP_FUNC(dct_max)
2833 #if CONFIG_GPL
2834  SET_CMP_FUNC(dct264_sad)
2835 #endif
2836  c->sad[0]= pix_abs16_c;
2837  c->sad[1]= pix_abs8_c;
2838  c->sse[0]= sse16_c;
2839  c->sse[1]= sse8_c;
2840  c->sse[2]= sse4_c;
2841  SET_CMP_FUNC(quant_psnr)
2842  SET_CMP_FUNC(rd)
2843  SET_CMP_FUNC(bit)
2844  c->vsad[0]= vsad16_c;
2845  c->vsad[4]= vsad_intra16_c;
2846  c->vsad[5]= vsad_intra8_c;
2847  c->vsse[0]= vsse16_c;
2848  c->vsse[4]= vsse_intra16_c;
2849  c->vsse[5]= vsse_intra8_c;
2850  c->nsse[0]= nsse16_c;
2851  c->nsse[1]= nsse8_c;
2852 #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
2854 #endif
2855 
2857 
2858  c->add_bytes= add_bytes_c;
2864  c->bswap_buf= bswap_buf;
2865  c->bswap16_buf = bswap16_buf;
2866 
2870  }
2871 
2874 
2880 
2881  c->shrink[0]= av_image_copy_plane;
2882  c->shrink[1]= ff_shrink22;
2883  c->shrink[2]= ff_shrink44;
2884  c->shrink[3]= ff_shrink88;
2885 
2887 
2888 #undef FUNC
2889 #undef FUNCC
2890 #define FUNC(f, depth) f ## _ ## depth
2891 #define FUNCC(f, depth) f ## _ ## depth ## _c
2892 
2893  c->draw_edges = FUNCC(draw_edges, 8);
2894  c->clear_block = FUNCC(clear_block, 8);
2895  c->clear_blocks = FUNCC(clear_blocks, 8);
2896 
2897 #define BIT_DEPTH_FUNCS(depth) \
2898  c->get_pixels = FUNCC(get_pixels, depth);
2899 
2900  switch (avctx->bits_per_raw_sample) {
2901  case 9:
2902  case 10:
2903  case 12:
2904  case 14:
2905  BIT_DEPTH_FUNCS(16);
2906  break;
2907  default:
2908  if(avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
2909  BIT_DEPTH_FUNCS(8);
2910  }
2911  break;
2912  }
2913 
2914 
2915  if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
2916  if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
2917  if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
2918  if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
2919  if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
2920  if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
2921  if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
2922 
2925 }
2926 
2928 {
2929  ff_dsputil_init(c, avctx);
2930 }
2931 
2933 {
2934  ff_dsputil_init(c, avctx);
2935 }
static int bit8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2347
void ff_jpeg_fdct_islow_10(int16_t *data)
int(* sum_abs_dctelem)(int16_t *block)
Definition: dsputil.h:135
static int vsse16_c(void *c, uint8_t *s1, uint8_t *s2, int stride, int h)
Definition: dsputil.c:2458
me_cmp_func vsad[6]
Definition: dsputil.h:158
void ff_fdct248_islow_10(int16_t *data)
av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil.c:2675
float v
void(* sub_hfyu_median_prediction)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top)
subtract huffyuv&#39;s variant of median prediction note, this might read from src1[-1], src2[-1]
Definition: dsputil.h:203
const char * s
Definition: avisynth_c.h:668
static void ff_jref_idct1_add(uint8_t *dest, int line_size, int16_t *block)
Definition: dsputil.c:2638
static int shift(int a, int b)
Definition: sonic.c:86
void(* fdct248)(int16_t *block)
Definition: dsputil.h:219
#define ff_cropTbl
static int pix_sum_c(uint8_t *pix, int line_size)
Definition: dsputil.c:166
void ff_fdct_ifast(int16_t *data)
Definition: jfdctfst.c:208
int dct_algo
DCT algorithm, see FF_DCT_* below.
#define C
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
Definition: dsputil.c:2585
#define ARCH_PPC
Definition: config.h:26
static void add_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels, int line_size)
Definition: dsputil.c:462
#define B
Definition: dsputil.c:2025
void ff_j_rev_dct4(int16_t *data)
misc image utilities
void(* shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height)
Definition: dsputil.h:268
#define FF_CMP_NSSE
void ff_simple_idct_add_10(uint8_t *dest, int line_size, int16_t *block)
else temp
Definition: vf_mcdeint.c:148
static void ff_jref_idct2_put(uint8_t *dest, int line_size, int16_t *block)
Definition: dsputil.c:2623
if max(w)>1 w=0.9 *w/max(w)
#define SET_CMP_FUNC(name)
#define ARCH_BFIN
Definition: config.h:20
static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
Definition: dsputil.c:1857
static void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:679
int acc
Definition: yuv2rgb.c:519
void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_ppc.c:140
const uint8_t ff_zigzag248_direct[64]
Definition: dsputil.c:60
void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
#define FF_PARTTRANS_IDCT_PERM
Definition: dsputil.h:255
Scantable.
Definition: dsputil.h:114
me_cmp_func dct_max[6]
Definition: dsputil.h:163
#define MAX_NEG_CROP
Definition: dsputil.h:47
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
Definition: dsputil.c:1842
void(* gmc1)(uint8_t *dst, uint8_t *src, int srcStride, int h, int x16, int y16, int rounder)
translational global motion compensation.
Definition: dsputil.h:139
#define FF_CMP_SSE
#define BIT_DEPTH_FUNCS(depth)
#define av_bswap16
Definition: sh4/bswap.h:31
static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top)
Definition: dsputil.c:1965
me_cmp_func sse[6]
Definition: dsputil.h:152
const uint8_t ff_h263_loop_filter_strength[32]
Definition: h263data.h:275
static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len)
Definition: dsputil.c:2516
void ff_faandct248(int16_t *data)
Definition: faandct.c:181
uint8_t raster_end[64]
Definition: dsputil.h:117
#define op_avg(a, b)
Definition: dsputil.c:1316
static void ff_jref_idct1_put(uint8_t *dest, int line_size, int16_t *block)
Definition: dsputil.c:2634
static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha)
Definition: dsputil.c:2030
mpegvideo header.
#define pb_80
Definition: dsputil.c:56
void(* bswap16_buf)(uint16_t *dst, const uint16_t *src, int len)
Definition: dsputil.h:209
uint8_t permutated[64]
Definition: dsputil.h:116
uint8_t run
Definition: svq3.c:136
int bits_per_raw_sample
Bits per sample/pixel of internal libavcodec pixel/sample format.
uint8_t * intra_ac_vlc_length
Definition: mpegvideo.h:482
static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2, int order)
Definition: dsputil.c:2552
#define UNI_AC_ENC_INDEX(run, level)
Definition: mpegvideo.h:487
void(* clear_block)(int16_t *block)
Definition: dsputil.h:145
void(* fdct)(int16_t *block)
Definition: dsputil.h:218
int stride
Definition: mace.c:144
int qscale
QP.
Definition: mpegvideo.h:369
static int vsad16_c(void *c, uint8_t *s1, uint8_t *s2, int stride, int h)
Definition: dsputil.c:2424
#define FF_CMP_RD
output residual component w
#define _(x)
static void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:721
set threshold d
DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64]
#define dspfunc(PFX, IDX, NUM)
static void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:690
static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1726
void(* add_pixels8)(uint8_t *pixels, int16_t *block, int line_size)
Definition: dsputil.h:134
const uint8_t ff_alternate_vertical_scan[64]
Definition: dsputil.c:85
#define FF_CMP_W53
static int hadamard8_diff8x8_c(void *s, uint8_t *dst, uint8_t *src, int stride, int h)
Definition: dsputil.c:2075
static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1477
static int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1706
#define FF_DCT_FAAN
uint8_t bits
Definition: crc.c:216
uint8_t
#define RECON_SHIFT
Definition: dsputil.h:261
#define av_cold
Definition: attributes.h:78
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1465
static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h)
Definition: dsputil.c:1790
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:63
static void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:668
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
Definition: dsputil.c:500
void ff_faanidct(int16_t block[64])
Definition: faanidct.c:132
the pkt_dts and pkt_pts fields in AVFrame will work as usual Restrictions on codec whose streams don t reset across will not work because their bitstreams cannot be decoded in parallel *The contents of buffers must not be read before as well as code calling up to before the decode process starts Call have add an so the codec calls ff_thread_report set AVCodecInternal allocate_progress The frames must then be freed with as it s useful too and the implementation is trivial when you re doing this Note that draw_edges() needs to be called before reporting progress.Before accessing a reference frame or its MVs
static void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:765
#define b
Definition: input.c:42
end end
#define WRAPPER8_16_SQ(name8, name16)
Definition: dsputil.c:2482
#define put(d, s)
Definition: dsputil_align.c:51
int(* pix_sum)(uint8_t *pix, int line_size)
Definition: dsputil.h:147
static void vector_clipf_c(float *dst, const float *src, float min, float max, int len)
Definition: dsputil.c:2534
D(D(float, sse)
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, int size)
Definition: dsputil.c:2473
me_cmp_func dct_sad[6]
Definition: dsputil.h:154
#define R
Definition: dsputil.c:2027
void ff_set_cmp(DSPContext *c, me_cmp_func *cmp, int type)
Definition: dsputil.c:1869
static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1489
#define av_restrict
Definition: config.h:9
void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_mmx.c:1563
uint8_t idct_permutation[64]
idct input permutation.
Definition: dsputil.h:249
const uint8_t * scantable
Definition: dsputil.h:115
static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc)
Definition: dsputil.c:2000
static void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:604
void ff_simple_idct_put_10(uint8_t *dest, int line_size, int16_t *block)
int lowres
low resolution decoding, 1-> 1/2 size, 2->1/4 size
static void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:754
static int hadamard8_intra8x8_c(void *s, uint8_t *src, uint8_t *dummy, int stride, int h)
Definition: dsputil.c:2120
static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
Definition: dsputil.c:510
void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height)
Definition: imgconvert.c:311
void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1334
av_cold void dsputil_init(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil.c:2927
#define cm
Definition: dvbsubdec.c:34
static void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:732
me_cmp_func nsse[6]
Definition: dsputil.h:160
qpel_mc_func put_mspel_pixels_tab[8]
Definition: dsputil.h:192
#define FF_CMP_VSSE
void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
me_cmp_func w53[6]
Definition: dsputil.h:161
Discrete Time axis x
#define U(x)
static double alpha(void *priv, double x, double y)
Definition: vf_geq.c:86
void(* vector_clipf)(float *dst, const float *src, float min, float max, int len)
Definition: dsputil.h:215
#define FF_SSE2_IDCT_PERM
Definition: dsputil.h:256
#define MUL16(a, b)
Definition: bfin/mathops.h:38
void(* add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
Definition: dsputil.h:204
uint8_t * inter_ac_vlc_last_length
Definition: mpegvideo.h:485
#define s2
Definition: regdef.h:39
tpel_mc_func avg_tpel_pixels_tab[11]
Definition: dsputil.h:187
void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
#define FF_CMP_W97
void(* add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
Definition: dsputil.h:259
const uint8_t ff_alternate_horizontal_scan[64]
Definition: dsputil.c:74
#define FF_CMP_BIT
#define FF_CMP_DCT
static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
Definition: dsputil.c:254
const char * r
Definition: vf_curves.c:94
void(* h263_h_loop_filter)(uint8_t *src, int stride, int qscale)
Definition: dsputil.h:212
static uint32_t clipf_c_one(uint32_t a, uint32_t mini, uint32_t maxi, uint32_t maxisign)
Definition: dsputil.c:2507
#define VSAD_INTRA(size)
Definition: dsputil.c:2406
static void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:646
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame This method is called when a frame is wanted on an output For an input
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
Definition: dsputil.c:546
void(* clear_blocks)(int16_t *blocks)
Definition: dsputil.h:146
static int quant_psnr8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2248
#define op_put(a, b)
Definition: dsputil.c:1318
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:246
init variable d2
overlapping window(triangular window to avoid too much overlapping) ovidx
int ff_check_alignment(void)
Definition: dsputil.c:2655
void ff_faanidct_put(uint8_t *dest, int line_size, int16_t block[64])
Definition: faanidct.c:158
void(* apply_window_int16)(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
Apply symmetric window in 16-bit fixed-point.
Definition: dsputil.h:294
#define FF_LIBMPEG2_IDCT_PERM
Definition: dsputil.h:252
#define FFMAX(a, b)
Definition: common.h:56
#define BASIS_SHIFT
Definition: dsputil.h:260
#define FF_CMP_PSNR
external API header
void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1338
int size
#define FF_IDCT_FAAN
#define QPEL_MC(r, OPNAME, RND, OP)
Definition: dsputil.c:798
me_cmp_func vsse[6]
Definition: dsputil.h:159
#define FF_CMP_DCTMAX
void ff_dsputil_init_dwt(DSPContext *c)
Definition: snow_dwt.c:847
static int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1590
static int dct_max8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2232
static void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:743
static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1768
common internal API header
#define BUTTERFLYA(x, y)
Definition: dsputil.c:2073
uint8_t * intra_ac_vlc_last_length
Definition: mpegvideo.h:483
static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1484
void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height)
Definition: imgconvert.c:342
FFT buffer for g
Definition: stft_peak.m:17
uint32_t ff_squareTbl[512]
Definition: dsputil.c:45
void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height)
Definition: imgconvert.c:278
static void add_pixels8_c(uint8_t *av_restrict pixels, int16_t *block, int line_size)
Definition: dsputil.c:422
static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1509
av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx)
int32_t(* scalarproduct_and_madd_int16)(int16_t *v1, const int16_t *v2, const int16_t *v3, int len, int mul)
Calculate scalar product of v1 and v2, and v1[i] += v3[i] * mul.
Definition: dsputil.h:281
#define ARCH_ARM
Definition: config.h:16
void(* draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides)
Definition: dsputil.h:263
int idct_algo
IDCT algorithm, see FF_IDCT_* below.
static void ff_jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
Definition: dsputil.c:2606
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
Definition: dsputil.c:523
void(* put_signed_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
Definition: dsputil.h:132
static int sse8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:277
void(* add_bytes)(uint8_t *dst, uint8_t *src, int w)
Definition: dsputil.h:197
#define FF_DCT_FASTINT
int(* me_cmp_func)(void *s, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
Definition: dsputil.h:109
void(* put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
Definition: dsputil.h:131
int32_t
static void ff_jref_idct4_add(uint8_t *dest, int line_size, int16_t *block)
Definition: dsputil.c:2617
void ff_dsputil_init_vis(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_vis.c:26
#define FF_CMP_ZERO
void ff_fdct248_islow_8(int16_t *data)
#define FF_NO_IDCT_PERM
Definition: dsputil.h:251
#define FFABS(a)
Definition: common.h:53
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby, const int size, const int h, int ref_index, int src_index, me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags)
compares a block (either a full macroblock or a partition thereof) against a proposed motion-compensa...
Definition: motion_est.c:251
int block_last_index[12]
last non zero coefficient in block
Definition: mpegvideo.h:291
static void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:787
#define diff(a, as, b, bs)
Definition: vf_phase.c:80
static void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:657
static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1746
FIXME Range Coding of cr are level
Definition: snow.txt:367
void ff_simple_idct_10(int16_t *block)
int ac_esc_length
num of bits needed to encode the longest esc
Definition: mpegvideo.h:481
void(* diff_bytes)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w)
Definition: dsputil.h:198
#define HAVE_VIS
Definition: config.h:58
static void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:710
me_cmp_func bit[6]
Definition: dsputil.h:156
static const float pred[4]
Definition: siprdata.h:259
static int dct_sad8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2168
void ff_j_rev_dct1(int16_t *data)
void(* add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
Definition: dsputil.h:133
#define ARCH_SH4
Definition: config.h:29
void(* vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
Clip each element in an array of int32_t to a given minimum and maximum value.
Definition: dsputil.h:310
NULL
Definition: eval.c:55
#define FF_IDCT_INT
static int width
Definition: tests/utils.c:158
dest
Definition: start.py:60
#define av_bswap32
Definition: bfin/bswap.h:33
uint8_t * luma_dc_vlc_length
Definition: mpegvideo.h:486
void ff_jpeg_fdct_islow_8(int16_t *data)
void ff_j_rev_dct(int16_t *data)
AVS_Value src
Definition: avisynth_c.h:523
void ff_faandct(int16_t *data)
Definition: faandct.c:121
static void put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels, int line_size)
Definition: dsputil.c:348
enum AVMediaType codec_type
static void put_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels, int line_size)
Definition: dsputil.c:369
int idct_permutation_type
Definition: dsputil.h:250
static void ff_jref_idct4_put(uint8_t *dest, int line_size, int16_t *block)
Definition: dsputil.c:2612
static int pix_norm1_c(uint8_t *pix, int line_size)
Definition: dsputil.c:188
void(* idct_add)(uint8_t *dest, int line_size, int16_t *block)
block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
Definition: dsputil.h:235
main external API structure.
#define FF_SIMPLE_IDCT_PERM
Definition: dsputil.h:253
#define avg4(a, b, c, d)
Definition: dsputil.c:521
ScanTable intra_scantable
Definition: mpegvideo.h:296
me_cmp_func dct264_sad[6]
Definition: dsputil.h:164
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:148
op_fill_func fill_block_tab[2]
Definition: dsputil.h:313
uint16_t ff_inv_zigzag_direct16[64]
static void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:776
#define CONFIG_H263_DECODER
Definition: config.h:498
uint8_t * inter_ac_vlc_length
Definition: mpegvideo.h:484
#define SQ(a)
Definition: dsputil.c:2439
BYTE int const BYTE int int int height
Definition: avisynth_c.h:713
int(* pix_norm1)(uint8_t *pix, int line_size)
Definition: dsputil.h:148
double value
Definition: eval.c:82
static void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:701
static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1499
av_cold void ff_dsputil_static_init(void)
Definition: dsputil.c:2644
void(* add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha)
Definition: dsputil.h:206
int index
Definition: gxfenc.c:89
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1472
synthesis window for stochastic i
void(* bswap_buf)(uint32_t *dst, const uint32_t *src, int w)
Definition: dsputil.h:208
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
Definition: dsputil.c:1354
void(* gmc)(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
global motion compensation.
Definition: dsputil.h:143
#define avg2(a, b)
Definition: dsputil.c:520
static void add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels, int line_size)
Definition: dsputil.c:442
#define mid_pred
Definition: mathops.h:94
DSPContext dsp
pointers for accelerated dsp functions
Definition: mpegvideo.h:391
#define s1
Definition: regdef.h:38
int32_t(* scalarproduct_int16)(const int16_t *v1, const int16_t *v2, int len)
Calculate scalar product of two vectors.
Definition: dsputil.h:274
av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx)
static void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:635
#define pb_7f
Definition: dsputil.c:55
void ff_init_scantable_permutation(uint8_t *idct_permutation, int idct_permutation_type)
Definition: dsputil.c:131
#define type
static void bswap_buf(uint32_t *dst, const uint32_t *src, int w)
Definition: dsputil.c:236
static int sum_abs_dctelem_c(int16_t *block)
Definition: dsputil.c:492
void ff_fdct_ifast248(int16_t *data)
Definition: jfdctfst.c:274
me_cmp_func w97[6]
Definition: dsputil.h:162
static int weight(int i, int blen, int offset)
av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_bfin.c:126
static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top)
Definition: dsputil.c:1982
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w)
Definition: dsputil.c:1436
static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1646
void ff_j_rev_dct2(int16_t *data)
void ff_simple_idct_add_8(uint8_t *dest, int line_size, int16_t *block)
static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h)
Definition: dsputil.c:1816
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h)
Definition: dsputil.c:1865
static int rd8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2271
void(* idct)(int16_t *block)
Definition: dsputil.h:222
static void ff_jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
Definition: dsputil.c:2601
me_cmp_func rd[6]
Definition: dsputil.h:157
static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
Definition: dsputil.c:2562
MpegEncContext.
Definition: mpegvideo.h:241
tpel_mc_func put_tpel_pixels_tab[11]
Thirdpel motion compensation with rounding (a+b+1)>>1.
Definition: dsputil.h:186
struct AVCodecContext * avctx
Definition: mpegvideo.h:243
#define FF_CMP_DCT264
#define FF_CMP_SAD
static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1618
static void add_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels, int line_size)
Definition: dsputil.c:478
#define BUTTERFLY1(x, y)
Definition: dsputil.c:2064
#define op_put_no_rnd(a, b)
Definition: dsputil.c:1319
static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale)
Definition: dsputil.c:1553
void(* dct_unquantize_inter)(struct MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo.h:726
static const uint8_t idct_sse2_row_perm[8]
Definition: dsputil.c:108
static void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:624
int(* fast_dct_quantize)(struct MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow)
Definition: mpegvideo.h:729
#define VSSE_INTRA(size)
Definition: dsputil.c:2440
const uint8_t ff_zigzag_direct[64]
Definition: mathtables.c:115
#define ARCH_ALPHA
Definition: config.h:15
int nsse_weight
noise vs.
static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1676
static double c[64]
void ff_dsputil_init_sh4(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_sh4.c:93
static int16_t basis[64][64]
static void add_bytes_c(uint8_t *dst, uint8_t *src, int w)
Definition: dsputil.c:1929
static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale)
Definition: dsputil.c:1516
me_cmp_func sad[6]
Definition: dsputil.h:151
static void put_signed_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels, int line_size)
Definition: dsputil.c:401
these buffered frames must be flushed immediately if a new input produces new output(Example:frame rate-doubling filter:filter_frame must(1) flush the second copy of the previous frame, if it is still there,(2) push the first copy of the incoming frame,(3) keep the second copy for later.) If the input frame is not enough to produce output
int(* ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, int size)
Definition: dsputil.h:173
static const uint8_t simple_mmx_permutation[64]
Definition: dsputil.c:97
Same thing on a dB scale
#define CONFIG_H263_ENCODER
Definition: config.h:1040
function y
Definition: D.m:1
int(* add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left)
Definition: dsputil.h:205
static int sse4_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:260
void ff_simple_idct_put_8(uint8_t *dest, int line_size, int16_t *block)
DSP utils.
static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride)
Definition: dsputil.c:328
void(* idct_put)(uint8_t *dest, int line_size, int16_t *block)
block -> idct -> clip to unsigned 8 bit -> dest.
Definition: dsputil.h:229
void(* h263_v_loop_filter)(uint8_t *src, int stride, int qscale)
Definition: dsputil.h:211
me_cmp_func hadamard8_diff[6]
Definition: dsputil.h:153
simple idct header.
int len
void(* dct_unquantize_intra)(struct MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo.h:724
void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable)
Definition: dsputil.c:110
#define avg(d, s)
Definition: dsputil_align.c:52
else dst[i][x+y *dst_stride[i]]
Definition: vf_mcdeint.c:160
void ff_faanidct_add(uint8_t *dest, int line_size, int16_t block[64])
Definition: faanidct.c:145
#define FF_TRANSPOSE_IDCT_PERM
Definition: dsputil.h:254
me_cmp_func quant_psnr[6]
Definition: dsputil.h:155
#define G
Definition: dsputil.c:2026
static void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: copy_block.h:48
static void ff_jref_idct2_add(uint8_t *dest, int line_size, int16_t *block)
Definition: dsputil.c:2628
#define FF_CMP_SATD
static void put_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels, int line_size)
Definition: dsputil.c:386
#define LOCAL_ALIGNED_16(t, v,...)
#define FF_CMP_VSAD
void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1342
av_cold void avpriv_dsputil_init(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil.c:2932
Floating point AAN DCT
static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w)
Definition: dsputil.c:1940
const char int length
Definition: avisynth_c.h:668
static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:298
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:242
#define BUTTERFLY2(o1, o2, i1, i2)
Definition: dsputil.c:2060
int dummy
Definition: motion-test.c:64
void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
Definition: dsputil.c:1330
#define A
Definition: dsputil.c:2028
float min
int(* try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
Definition: dsputil.h:258
void(* diff_pixels)(int16_t *block, const uint8_t *s1, const uint8_t *s2, int stride)
Definition: dsputil.h:130
static void apply_window_int16_c(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
Definition: dsputil.c:2572
static void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:613
me_cmp_func pix_abs[2][4]
Definition: dsputil.h:194
#define HAVE_MMX
Definition: config.h:48
DSPContext.
Definition: dsputil.h:127
#define FUNCC(f, depth)
void ff_simple_idct_8(int16_t *block)
#define SRC(x, y)