yading@10
|
1 /*
|
yading@10
|
2 * DSP utils
|
yading@10
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard
|
yading@10
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
|
yading@10
|
5 *
|
yading@10
|
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
|
yading@10
|
7 *
|
yading@10
|
8 * This file is part of FFmpeg.
|
yading@10
|
9 *
|
yading@10
|
10 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
11 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
12 * License as published by the Free Software Foundation; either
|
yading@10
|
13 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
14 *
|
yading@10
|
15 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
18 * Lesser General Public License for more details.
|
yading@10
|
19 *
|
yading@10
|
20 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
21 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
23 */
|
yading@10
|
24
|
yading@10
|
25 /**
|
yading@10
|
26 * @file
|
yading@10
|
27 * DSP utils
|
yading@10
|
28 */
|
yading@10
|
29
|
yading@10
|
30 #include "libavutil/imgutils.h"
|
yading@10
|
31 #include "libavutil/internal.h"
|
yading@10
|
32 #include "avcodec.h"
|
yading@10
|
33 #include "copy_block.h"
|
yading@10
|
34 #include "dct.h"
|
yading@10
|
35 #include "dsputil.h"
|
yading@10
|
36 #include "simple_idct.h"
|
yading@10
|
37 #include "faandct.h"
|
yading@10
|
38 #include "faanidct.h"
|
yading@10
|
39 #include "imgconvert.h"
|
yading@10
|
40 #include "mathops.h"
|
yading@10
|
41 #include "mpegvideo.h"
|
yading@10
|
42 #include "config.h"
|
yading@10
|
43 #include "diracdsp.h"
|
yading@10
|
44
|
yading@10
|
45 uint32_t ff_squareTbl[512] = {0, };
|
yading@10
|
46
|
yading@10
|
47 #define BIT_DEPTH 16
|
yading@10
|
48 #include "dsputil_template.c"
|
yading@10
|
49 #undef BIT_DEPTH
|
yading@10
|
50
|
yading@10
|
51 #define BIT_DEPTH 8
|
yading@10
|
52 #include "dsputil_template.c"
|
yading@10
|
53
|
yading@10
|
54 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
|
yading@10
|
55 #define pb_7f (~0UL/255 * 0x7f)
|
yading@10
|
56 #define pb_80 (~0UL/255 * 0x80)
|
yading@10
|
57
|
yading@10
|
58 /* Specific zigzag scan for 248 idct. NOTE that unlike the
|
yading@10
|
59 specification, we interleave the fields */
|
yading@10
|
60 const uint8_t ff_zigzag248_direct[64] = {
|
yading@10
|
61 0, 8, 1, 9, 16, 24, 2, 10,
|
yading@10
|
62 17, 25, 32, 40, 48, 56, 33, 41,
|
yading@10
|
63 18, 26, 3, 11, 4, 12, 19, 27,
|
yading@10
|
64 34, 42, 49, 57, 50, 58, 35, 43,
|
yading@10
|
65 20, 28, 5, 13, 6, 14, 21, 29,
|
yading@10
|
66 36, 44, 51, 59, 52, 60, 37, 45,
|
yading@10
|
67 22, 30, 7, 15, 23, 31, 38, 46,
|
yading@10
|
68 53, 61, 54, 62, 39, 47, 55, 63,
|
yading@10
|
69 };
|
yading@10
|
70
|
yading@10
|
71 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
|
yading@10
|
72 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
|
yading@10
|
73
|
yading@10
|
74 const uint8_t ff_alternate_horizontal_scan[64] = {
|
yading@10
|
75 0, 1, 2, 3, 8, 9, 16, 17,
|
yading@10
|
76 10, 11, 4, 5, 6, 7, 15, 14,
|
yading@10
|
77 13, 12, 19, 18, 24, 25, 32, 33,
|
yading@10
|
78 26, 27, 20, 21, 22, 23, 28, 29,
|
yading@10
|
79 30, 31, 34, 35, 40, 41, 48, 49,
|
yading@10
|
80 42, 43, 36, 37, 38, 39, 44, 45,
|
yading@10
|
81 46, 47, 50, 51, 56, 57, 58, 59,
|
yading@10
|
82 52, 53, 54, 55, 60, 61, 62, 63,
|
yading@10
|
83 };
|
yading@10
|
84
|
yading@10
|
85 const uint8_t ff_alternate_vertical_scan[64] = {
|
yading@10
|
86 0, 8, 16, 24, 1, 9, 2, 10,
|
yading@10
|
87 17, 25, 32, 40, 48, 56, 57, 49,
|
yading@10
|
88 41, 33, 26, 18, 3, 11, 4, 12,
|
yading@10
|
89 19, 27, 34, 42, 50, 58, 35, 43,
|
yading@10
|
90 51, 59, 20, 28, 5, 13, 6, 14,
|
yading@10
|
91 21, 29, 36, 44, 52, 60, 37, 45,
|
yading@10
|
92 53, 61, 22, 30, 7, 15, 23, 31,
|
yading@10
|
93 38, 46, 54, 62, 39, 47, 55, 63,
|
yading@10
|
94 };
|
yading@10
|
95
|
yading@10
|
96 /* Input permutation for the simple_idct_mmx */
|
yading@10
|
97 static const uint8_t simple_mmx_permutation[64]={
|
yading@10
|
98 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
yading@10
|
99 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
yading@10
|
100 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
yading@10
|
101 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
yading@10
|
102 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
yading@10
|
103 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
yading@10
|
104 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
yading@10
|
105 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
yading@10
|
106 };
|
yading@10
|
107
|
yading@10
|
108 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
|
yading@10
|
109
|
yading@10
|
110 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
|
yading@10
|
111 int i;
|
yading@10
|
112 int end;
|
yading@10
|
113
|
yading@10
|
114 st->scantable= src_scantable;
|
yading@10
|
115
|
yading@10
|
116 for(i=0; i<64; i++){
|
yading@10
|
117 int j;
|
yading@10
|
118 j = src_scantable[i];
|
yading@10
|
119 st->permutated[i] = permutation[j];
|
yading@10
|
120 }
|
yading@10
|
121
|
yading@10
|
122 end=-1;
|
yading@10
|
123 for(i=0; i<64; i++){
|
yading@10
|
124 int j;
|
yading@10
|
125 j = st->permutated[i];
|
yading@10
|
126 if(j>end) end=j;
|
yading@10
|
127 st->raster_end[i]= end;
|
yading@10
|
128 }
|
yading@10
|
129 }
|
yading@10
|
130
|
yading@10
|
131 void ff_init_scantable_permutation(uint8_t *idct_permutation,
|
yading@10
|
132 int idct_permutation_type)
|
yading@10
|
133 {
|
yading@10
|
134 int i;
|
yading@10
|
135
|
yading@10
|
136 switch(idct_permutation_type){
|
yading@10
|
137 case FF_NO_IDCT_PERM:
|
yading@10
|
138 for(i=0; i<64; i++)
|
yading@10
|
139 idct_permutation[i]= i;
|
yading@10
|
140 break;
|
yading@10
|
141 case FF_LIBMPEG2_IDCT_PERM:
|
yading@10
|
142 for(i=0; i<64; i++)
|
yading@10
|
143 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
|
yading@10
|
144 break;
|
yading@10
|
145 case FF_SIMPLE_IDCT_PERM:
|
yading@10
|
146 for(i=0; i<64; i++)
|
yading@10
|
147 idct_permutation[i]= simple_mmx_permutation[i];
|
yading@10
|
148 break;
|
yading@10
|
149 case FF_TRANSPOSE_IDCT_PERM:
|
yading@10
|
150 for(i=0; i<64; i++)
|
yading@10
|
151 idct_permutation[i]= ((i&7)<<3) | (i>>3);
|
yading@10
|
152 break;
|
yading@10
|
153 case FF_PARTTRANS_IDCT_PERM:
|
yading@10
|
154 for(i=0; i<64; i++)
|
yading@10
|
155 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
|
yading@10
|
156 break;
|
yading@10
|
157 case FF_SSE2_IDCT_PERM:
|
yading@10
|
158 for(i=0; i<64; i++)
|
yading@10
|
159 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
|
yading@10
|
160 break;
|
yading@10
|
161 default:
|
yading@10
|
162 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
|
yading@10
|
163 }
|
yading@10
|
164 }
|
yading@10
|
165
|
yading@10
|
166 static int pix_sum_c(uint8_t * pix, int line_size)
|
yading@10
|
167 {
|
yading@10
|
168 int s, i, j;
|
yading@10
|
169
|
yading@10
|
170 s = 0;
|
yading@10
|
171 for (i = 0; i < 16; i++) {
|
yading@10
|
172 for (j = 0; j < 16; j += 8) {
|
yading@10
|
173 s += pix[0];
|
yading@10
|
174 s += pix[1];
|
yading@10
|
175 s += pix[2];
|
yading@10
|
176 s += pix[3];
|
yading@10
|
177 s += pix[4];
|
yading@10
|
178 s += pix[5];
|
yading@10
|
179 s += pix[6];
|
yading@10
|
180 s += pix[7];
|
yading@10
|
181 pix += 8;
|
yading@10
|
182 }
|
yading@10
|
183 pix += line_size - 16;
|
yading@10
|
184 }
|
yading@10
|
185 return s;
|
yading@10
|
186 }
|
yading@10
|
187
|
yading@10
|
188 static int pix_norm1_c(uint8_t * pix, int line_size)
|
yading@10
|
189 {
|
yading@10
|
190 int s, i, j;
|
yading@10
|
191 uint32_t *sq = ff_squareTbl + 256;
|
yading@10
|
192
|
yading@10
|
193 s = 0;
|
yading@10
|
194 for (i = 0; i < 16; i++) {
|
yading@10
|
195 for (j = 0; j < 16; j += 8) {
|
yading@10
|
196 #if 0
|
yading@10
|
197 s += sq[pix[0]];
|
yading@10
|
198 s += sq[pix[1]];
|
yading@10
|
199 s += sq[pix[2]];
|
yading@10
|
200 s += sq[pix[3]];
|
yading@10
|
201 s += sq[pix[4]];
|
yading@10
|
202 s += sq[pix[5]];
|
yading@10
|
203 s += sq[pix[6]];
|
yading@10
|
204 s += sq[pix[7]];
|
yading@10
|
205 #else
|
yading@10
|
206 #if HAVE_FAST_64BIT
|
yading@10
|
207 register uint64_t x=*(uint64_t*)pix;
|
yading@10
|
208 s += sq[x&0xff];
|
yading@10
|
209 s += sq[(x>>8)&0xff];
|
yading@10
|
210 s += sq[(x>>16)&0xff];
|
yading@10
|
211 s += sq[(x>>24)&0xff];
|
yading@10
|
212 s += sq[(x>>32)&0xff];
|
yading@10
|
213 s += sq[(x>>40)&0xff];
|
yading@10
|
214 s += sq[(x>>48)&0xff];
|
yading@10
|
215 s += sq[(x>>56)&0xff];
|
yading@10
|
216 #else
|
yading@10
|
217 register uint32_t x=*(uint32_t*)pix;
|
yading@10
|
218 s += sq[x&0xff];
|
yading@10
|
219 s += sq[(x>>8)&0xff];
|
yading@10
|
220 s += sq[(x>>16)&0xff];
|
yading@10
|
221 s += sq[(x>>24)&0xff];
|
yading@10
|
222 x=*(uint32_t*)(pix+4);
|
yading@10
|
223 s += sq[x&0xff];
|
yading@10
|
224 s += sq[(x>>8)&0xff];
|
yading@10
|
225 s += sq[(x>>16)&0xff];
|
yading@10
|
226 s += sq[(x>>24)&0xff];
|
yading@10
|
227 #endif
|
yading@10
|
228 #endif
|
yading@10
|
229 pix += 8;
|
yading@10
|
230 }
|
yading@10
|
231 pix += line_size - 16;
|
yading@10
|
232 }
|
yading@10
|
233 return s;
|
yading@10
|
234 }
|
yading@10
|
235
|
yading@10
|
236 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
|
yading@10
|
237 int i;
|
yading@10
|
238
|
yading@10
|
239 for(i=0; i+8<=w; i+=8){
|
yading@10
|
240 dst[i+0]= av_bswap32(src[i+0]);
|
yading@10
|
241 dst[i+1]= av_bswap32(src[i+1]);
|
yading@10
|
242 dst[i+2]= av_bswap32(src[i+2]);
|
yading@10
|
243 dst[i+3]= av_bswap32(src[i+3]);
|
yading@10
|
244 dst[i+4]= av_bswap32(src[i+4]);
|
yading@10
|
245 dst[i+5]= av_bswap32(src[i+5]);
|
yading@10
|
246 dst[i+6]= av_bswap32(src[i+6]);
|
yading@10
|
247 dst[i+7]= av_bswap32(src[i+7]);
|
yading@10
|
248 }
|
yading@10
|
249 for(;i<w; i++){
|
yading@10
|
250 dst[i+0]= av_bswap32(src[i+0]);
|
yading@10
|
251 }
|
yading@10
|
252 }
|
yading@10
|
253
|
yading@10
|
254 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
|
yading@10
|
255 {
|
yading@10
|
256 while (len--)
|
yading@10
|
257 *dst++ = av_bswap16(*src++);
|
yading@10
|
258 }
|
yading@10
|
259
|
yading@10
|
260 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
|
yading@10
|
261 {
|
yading@10
|
262 int s, i;
|
yading@10
|
263 uint32_t *sq = ff_squareTbl + 256;
|
yading@10
|
264
|
yading@10
|
265 s = 0;
|
yading@10
|
266 for (i = 0; i < h; i++) {
|
yading@10
|
267 s += sq[pix1[0] - pix2[0]];
|
yading@10
|
268 s += sq[pix1[1] - pix2[1]];
|
yading@10
|
269 s += sq[pix1[2] - pix2[2]];
|
yading@10
|
270 s += sq[pix1[3] - pix2[3]];
|
yading@10
|
271 pix1 += line_size;
|
yading@10
|
272 pix2 += line_size;
|
yading@10
|
273 }
|
yading@10
|
274 return s;
|
yading@10
|
275 }
|
yading@10
|
276
|
yading@10
|
277 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
|
yading@10
|
278 {
|
yading@10
|
279 int s, i;
|
yading@10
|
280 uint32_t *sq = ff_squareTbl + 256;
|
yading@10
|
281
|
yading@10
|
282 s = 0;
|
yading@10
|
283 for (i = 0; i < h; i++) {
|
yading@10
|
284 s += sq[pix1[0] - pix2[0]];
|
yading@10
|
285 s += sq[pix1[1] - pix2[1]];
|
yading@10
|
286 s += sq[pix1[2] - pix2[2]];
|
yading@10
|
287 s += sq[pix1[3] - pix2[3]];
|
yading@10
|
288 s += sq[pix1[4] - pix2[4]];
|
yading@10
|
289 s += sq[pix1[5] - pix2[5]];
|
yading@10
|
290 s += sq[pix1[6] - pix2[6]];
|
yading@10
|
291 s += sq[pix1[7] - pix2[7]];
|
yading@10
|
292 pix1 += line_size;
|
yading@10
|
293 pix2 += line_size;
|
yading@10
|
294 }
|
yading@10
|
295 return s;
|
yading@10
|
296 }
|
yading@10
|
297
|
yading@10
|
298 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
yading@10
|
299 {
|
yading@10
|
300 int s, i;
|
yading@10
|
301 uint32_t *sq = ff_squareTbl + 256;
|
yading@10
|
302
|
yading@10
|
303 s = 0;
|
yading@10
|
304 for (i = 0; i < h; i++) {
|
yading@10
|
305 s += sq[pix1[ 0] - pix2[ 0]];
|
yading@10
|
306 s += sq[pix1[ 1] - pix2[ 1]];
|
yading@10
|
307 s += sq[pix1[ 2] - pix2[ 2]];
|
yading@10
|
308 s += sq[pix1[ 3] - pix2[ 3]];
|
yading@10
|
309 s += sq[pix1[ 4] - pix2[ 4]];
|
yading@10
|
310 s += sq[pix1[ 5] - pix2[ 5]];
|
yading@10
|
311 s += sq[pix1[ 6] - pix2[ 6]];
|
yading@10
|
312 s += sq[pix1[ 7] - pix2[ 7]];
|
yading@10
|
313 s += sq[pix1[ 8] - pix2[ 8]];
|
yading@10
|
314 s += sq[pix1[ 9] - pix2[ 9]];
|
yading@10
|
315 s += sq[pix1[10] - pix2[10]];
|
yading@10
|
316 s += sq[pix1[11] - pix2[11]];
|
yading@10
|
317 s += sq[pix1[12] - pix2[12]];
|
yading@10
|
318 s += sq[pix1[13] - pix2[13]];
|
yading@10
|
319 s += sq[pix1[14] - pix2[14]];
|
yading@10
|
320 s += sq[pix1[15] - pix2[15]];
|
yading@10
|
321
|
yading@10
|
322 pix1 += line_size;
|
yading@10
|
323 pix2 += line_size;
|
yading@10
|
324 }
|
yading@10
|
325 return s;
|
yading@10
|
326 }
|
yading@10
|
327
|
yading@10
|
328 static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
|
yading@10
|
329 const uint8_t *s2, int stride){
|
yading@10
|
330 int i;
|
yading@10
|
331
|
yading@10
|
332 /* read the pixels */
|
yading@10
|
333 for(i=0;i<8;i++) {
|
yading@10
|
334 block[0] = s1[0] - s2[0];
|
yading@10
|
335 block[1] = s1[1] - s2[1];
|
yading@10
|
336 block[2] = s1[2] - s2[2];
|
yading@10
|
337 block[3] = s1[3] - s2[3];
|
yading@10
|
338 block[4] = s1[4] - s2[4];
|
yading@10
|
339 block[5] = s1[5] - s2[5];
|
yading@10
|
340 block[6] = s1[6] - s2[6];
|
yading@10
|
341 block[7] = s1[7] - s2[7];
|
yading@10
|
342 s1 += stride;
|
yading@10
|
343 s2 += stride;
|
yading@10
|
344 block += 8;
|
yading@10
|
345 }
|
yading@10
|
346 }
|
yading@10
|
347
|
yading@10
|
348 static void put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
|
yading@10
|
349 int line_size)
|
yading@10
|
350 {
|
yading@10
|
351 int i;
|
yading@10
|
352
|
yading@10
|
353 /* read the pixels */
|
yading@10
|
354 for(i=0;i<8;i++) {
|
yading@10
|
355 pixels[0] = av_clip_uint8(block[0]);
|
yading@10
|
356 pixels[1] = av_clip_uint8(block[1]);
|
yading@10
|
357 pixels[2] = av_clip_uint8(block[2]);
|
yading@10
|
358 pixels[3] = av_clip_uint8(block[3]);
|
yading@10
|
359 pixels[4] = av_clip_uint8(block[4]);
|
yading@10
|
360 pixels[5] = av_clip_uint8(block[5]);
|
yading@10
|
361 pixels[6] = av_clip_uint8(block[6]);
|
yading@10
|
362 pixels[7] = av_clip_uint8(block[7]);
|
yading@10
|
363
|
yading@10
|
364 pixels += line_size;
|
yading@10
|
365 block += 8;
|
yading@10
|
366 }
|
yading@10
|
367 }
|
yading@10
|
368
|
yading@10
|
369 static void put_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
|
yading@10
|
370 int line_size)
|
yading@10
|
371 {
|
yading@10
|
372 int i;
|
yading@10
|
373
|
yading@10
|
374 /* read the pixels */
|
yading@10
|
375 for(i=0;i<4;i++) {
|
yading@10
|
376 pixels[0] = av_clip_uint8(block[0]);
|
yading@10
|
377 pixels[1] = av_clip_uint8(block[1]);
|
yading@10
|
378 pixels[2] = av_clip_uint8(block[2]);
|
yading@10
|
379 pixels[3] = av_clip_uint8(block[3]);
|
yading@10
|
380
|
yading@10
|
381 pixels += line_size;
|
yading@10
|
382 block += 8;
|
yading@10
|
383 }
|
yading@10
|
384 }
|
yading@10
|
385
|
yading@10
|
386 static void put_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
|
yading@10
|
387 int line_size)
|
yading@10
|
388 {
|
yading@10
|
389 int i;
|
yading@10
|
390
|
yading@10
|
391 /* read the pixels */
|
yading@10
|
392 for(i=0;i<2;i++) {
|
yading@10
|
393 pixels[0] = av_clip_uint8(block[0]);
|
yading@10
|
394 pixels[1] = av_clip_uint8(block[1]);
|
yading@10
|
395
|
yading@10
|
396 pixels += line_size;
|
yading@10
|
397 block += 8;
|
yading@10
|
398 }
|
yading@10
|
399 }
|
yading@10
|
400
|
yading@10
|
401 static void put_signed_pixels_clamped_c(const int16_t *block,
|
yading@10
|
402 uint8_t *av_restrict pixels,
|
yading@10
|
403 int line_size)
|
yading@10
|
404 {
|
yading@10
|
405 int i, j;
|
yading@10
|
406
|
yading@10
|
407 for (i = 0; i < 8; i++) {
|
yading@10
|
408 for (j = 0; j < 8; j++) {
|
yading@10
|
409 if (*block < -128)
|
yading@10
|
410 *pixels = 0;
|
yading@10
|
411 else if (*block > 127)
|
yading@10
|
412 *pixels = 255;
|
yading@10
|
413 else
|
yading@10
|
414 *pixels = (uint8_t)(*block + 128);
|
yading@10
|
415 block++;
|
yading@10
|
416 pixels++;
|
yading@10
|
417 }
|
yading@10
|
418 pixels += (line_size - 8);
|
yading@10
|
419 }
|
yading@10
|
420 }
|
yading@10
|
421
|
yading@10
|
422 static void add_pixels8_c(uint8_t *av_restrict pixels,
|
yading@10
|
423 int16_t *block,
|
yading@10
|
424 int line_size)
|
yading@10
|
425 {
|
yading@10
|
426 int i;
|
yading@10
|
427
|
yading@10
|
428 for(i=0;i<8;i++) {
|
yading@10
|
429 pixels[0] += block[0];
|
yading@10
|
430 pixels[1] += block[1];
|
yading@10
|
431 pixels[2] += block[2];
|
yading@10
|
432 pixels[3] += block[3];
|
yading@10
|
433 pixels[4] += block[4];
|
yading@10
|
434 pixels[5] += block[5];
|
yading@10
|
435 pixels[6] += block[6];
|
yading@10
|
436 pixels[7] += block[7];
|
yading@10
|
437 pixels += line_size;
|
yading@10
|
438 block += 8;
|
yading@10
|
439 }
|
yading@10
|
440 }
|
yading@10
|
441
|
yading@10
|
442 static void add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
|
yading@10
|
443 int line_size)
|
yading@10
|
444 {
|
yading@10
|
445 int i;
|
yading@10
|
446
|
yading@10
|
447 /* read the pixels */
|
yading@10
|
448 for(i=0;i<8;i++) {
|
yading@10
|
449 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
|
yading@10
|
450 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
|
yading@10
|
451 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
|
yading@10
|
452 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
|
yading@10
|
453 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
|
yading@10
|
454 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
|
yading@10
|
455 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
|
yading@10
|
456 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
|
yading@10
|
457 pixels += line_size;
|
yading@10
|
458 block += 8;
|
yading@10
|
459 }
|
yading@10
|
460 }
|
yading@10
|
461
|
yading@10
|
462 static void add_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
|
yading@10
|
463 int line_size)
|
yading@10
|
464 {
|
yading@10
|
465 int i;
|
yading@10
|
466
|
yading@10
|
467 /* read the pixels */
|
yading@10
|
468 for(i=0;i<4;i++) {
|
yading@10
|
469 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
|
yading@10
|
470 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
|
yading@10
|
471 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
|
yading@10
|
472 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
|
yading@10
|
473 pixels += line_size;
|
yading@10
|
474 block += 8;
|
yading@10
|
475 }
|
yading@10
|
476 }
|
yading@10
|
477
|
yading@10
|
478 static void add_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
|
yading@10
|
479 int line_size)
|
yading@10
|
480 {
|
yading@10
|
481 int i;
|
yading@10
|
482
|
yading@10
|
483 /* read the pixels */
|
yading@10
|
484 for(i=0;i<2;i++) {
|
yading@10
|
485 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
|
yading@10
|
486 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
|
yading@10
|
487 pixels += line_size;
|
yading@10
|
488 block += 8;
|
yading@10
|
489 }
|
yading@10
|
490 }
|
yading@10
|
491
|
yading@10
|
492 static int sum_abs_dctelem_c(int16_t *block)
|
yading@10
|
493 {
|
yading@10
|
494 int sum=0, i;
|
yading@10
|
495 for(i=0; i<64; i++)
|
yading@10
|
496 sum+= FFABS(block[i]);
|
yading@10
|
497 return sum;
|
yading@10
|
498 }
|
yading@10
|
499
|
yading@10
|
500 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
|
yading@10
|
501 {
|
yading@10
|
502 int i;
|
yading@10
|
503
|
yading@10
|
504 for (i = 0; i < h; i++) {
|
yading@10
|
505 memset(block, value, 16);
|
yading@10
|
506 block += line_size;
|
yading@10
|
507 }
|
yading@10
|
508 }
|
yading@10
|
509
|
yading@10
|
510 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
|
yading@10
|
511 {
|
yading@10
|
512 int i;
|
yading@10
|
513
|
yading@10
|
514 for (i = 0; i < h; i++) {
|
yading@10
|
515 memset(block, value, 8);
|
yading@10
|
516 block += line_size;
|
yading@10
|
517 }
|
yading@10
|
518 }
|
yading@10
|
519
|
yading@10
|
520 #define avg2(a,b) ((a+b+1)>>1)
|
yading@10
|
521 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
|
yading@10
|
522
|
yading@10
|
523 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
|
yading@10
|
524 {
|
yading@10
|
525 const int A=(16-x16)*(16-y16);
|
yading@10
|
526 const int B=( x16)*(16-y16);
|
yading@10
|
527 const int C=(16-x16)*( y16);
|
yading@10
|
528 const int D=( x16)*( y16);
|
yading@10
|
529 int i;
|
yading@10
|
530
|
yading@10
|
531 for(i=0; i<h; i++)
|
yading@10
|
532 {
|
yading@10
|
533 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
|
yading@10
|
534 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
|
yading@10
|
535 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
|
yading@10
|
536 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
|
yading@10
|
537 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
|
yading@10
|
538 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
|
yading@10
|
539 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
|
yading@10
|
540 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
|
yading@10
|
541 dst+= stride;
|
yading@10
|
542 src+= stride;
|
yading@10
|
543 }
|
yading@10
|
544 }
|
yading@10
|
545
|
yading@10
|
546 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
|
yading@10
|
547 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
|
yading@10
|
548 {
|
yading@10
|
549 int y, vx, vy;
|
yading@10
|
550 const int s= 1<<shift;
|
yading@10
|
551
|
yading@10
|
552 width--;
|
yading@10
|
553 height--;
|
yading@10
|
554
|
yading@10
|
555 for(y=0; y<h; y++){
|
yading@10
|
556 int x;
|
yading@10
|
557
|
yading@10
|
558 vx= ox;
|
yading@10
|
559 vy= oy;
|
yading@10
|
560 for(x=0; x<8; x++){ //XXX FIXME optimize
|
yading@10
|
561 int src_x, src_y, frac_x, frac_y, index;
|
yading@10
|
562
|
yading@10
|
563 src_x= vx>>16;
|
yading@10
|
564 src_y= vy>>16;
|
yading@10
|
565 frac_x= src_x&(s-1);
|
yading@10
|
566 frac_y= src_y&(s-1);
|
yading@10
|
567 src_x>>=shift;
|
yading@10
|
568 src_y>>=shift;
|
yading@10
|
569
|
yading@10
|
570 if((unsigned)src_x < width){
|
yading@10
|
571 if((unsigned)src_y < height){
|
yading@10
|
572 index= src_x + src_y*stride;
|
yading@10
|
573 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
|
yading@10
|
574 + src[index +1]* frac_x )*(s-frac_y)
|
yading@10
|
575 + ( src[index+stride ]*(s-frac_x)
|
yading@10
|
576 + src[index+stride+1]* frac_x )* frac_y
|
yading@10
|
577 + r)>>(shift*2);
|
yading@10
|
578 }else{
|
yading@10
|
579 index= src_x + av_clip(src_y, 0, height)*stride;
|
yading@10
|
580 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
|
yading@10
|
581 + src[index +1]* frac_x )*s
|
yading@10
|
582 + r)>>(shift*2);
|
yading@10
|
583 }
|
yading@10
|
584 }else{
|
yading@10
|
585 if((unsigned)src_y < height){
|
yading@10
|
586 index= av_clip(src_x, 0, width) + src_y*stride;
|
yading@10
|
587 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
|
yading@10
|
588 + src[index+stride ]* frac_y )*s
|
yading@10
|
589 + r)>>(shift*2);
|
yading@10
|
590 }else{
|
yading@10
|
591 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
|
yading@10
|
592 dst[y*stride + x]= src[index ];
|
yading@10
|
593 }
|
yading@10
|
594 }
|
yading@10
|
595
|
yading@10
|
596 vx+= dxx;
|
yading@10
|
597 vy+= dyx;
|
yading@10
|
598 }
|
yading@10
|
599 ox += dxy;
|
yading@10
|
600 oy += dyy;
|
yading@10
|
601 }
|
yading@10
|
602 }
|
yading@10
|
603
|
yading@10
|
604 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
605 switch(width){
|
yading@10
|
606 case 2: put_pixels2_8_c (dst, src, stride, height); break;
|
yading@10
|
607 case 4: put_pixels4_8_c (dst, src, stride, height); break;
|
yading@10
|
608 case 8: put_pixels8_8_c (dst, src, stride, height); break;
|
yading@10
|
609 case 16:put_pixels16_8_c(dst, src, stride, height); break;
|
yading@10
|
610 }
|
yading@10
|
611 }
|
yading@10
|
612
|
yading@10
|
613 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
614 int i,j;
|
yading@10
|
615 for (i=0; i < height; i++) {
|
yading@10
|
616 for (j=0; j < width; j++) {
|
yading@10
|
617 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
|
yading@10
|
618 }
|
yading@10
|
619 src += stride;
|
yading@10
|
620 dst += stride;
|
yading@10
|
621 }
|
yading@10
|
622 }
|
yading@10
|
623
|
yading@10
|
624 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
625 int i,j;
|
yading@10
|
626 for (i=0; i < height; i++) {
|
yading@10
|
627 for (j=0; j < width; j++) {
|
yading@10
|
628 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
|
yading@10
|
629 }
|
yading@10
|
630 src += stride;
|
yading@10
|
631 dst += stride;
|
yading@10
|
632 }
|
yading@10
|
633 }
|
yading@10
|
634
|
yading@10
|
635 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
636 int i,j;
|
yading@10
|
637 for (i=0; i < height; i++) {
|
yading@10
|
638 for (j=0; j < width; j++) {
|
yading@10
|
639 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
|
yading@10
|
640 }
|
yading@10
|
641 src += stride;
|
yading@10
|
642 dst += stride;
|
yading@10
|
643 }
|
yading@10
|
644 }
|
yading@10
|
645
|
yading@10
|
646 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
647 int i,j;
|
yading@10
|
648 for (i=0; i < height; i++) {
|
yading@10
|
649 for (j=0; j < width; j++) {
|
yading@10
|
650 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
|
yading@10
|
651 }
|
yading@10
|
652 src += stride;
|
yading@10
|
653 dst += stride;
|
yading@10
|
654 }
|
yading@10
|
655 }
|
yading@10
|
656
|
yading@10
|
657 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
658 int i,j;
|
yading@10
|
659 for (i=0; i < height; i++) {
|
yading@10
|
660 for (j=0; j < width; j++) {
|
yading@10
|
661 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
yading@10
|
662 }
|
yading@10
|
663 src += stride;
|
yading@10
|
664 dst += stride;
|
yading@10
|
665 }
|
yading@10
|
666 }
|
yading@10
|
667
|
yading@10
|
668 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
669 int i,j;
|
yading@10
|
670 for (i=0; i < height; i++) {
|
yading@10
|
671 for (j=0; j < width; j++) {
|
yading@10
|
672 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
|
yading@10
|
673 }
|
yading@10
|
674 src += stride;
|
yading@10
|
675 dst += stride;
|
yading@10
|
676 }
|
yading@10
|
677 }
|
yading@10
|
678
|
yading@10
|
679 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
680 int i,j;
|
yading@10
|
681 for (i=0; i < height; i++) {
|
yading@10
|
682 for (j=0; j < width; j++) {
|
yading@10
|
683 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
yading@10
|
684 }
|
yading@10
|
685 src += stride;
|
yading@10
|
686 dst += stride;
|
yading@10
|
687 }
|
yading@10
|
688 }
|
yading@10
|
689
|
yading@10
|
690 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
691 int i,j;
|
yading@10
|
692 for (i=0; i < height; i++) {
|
yading@10
|
693 for (j=0; j < width; j++) {
|
yading@10
|
694 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
|
yading@10
|
695 }
|
yading@10
|
696 src += stride;
|
yading@10
|
697 dst += stride;
|
yading@10
|
698 }
|
yading@10
|
699 }
|
yading@10
|
700
|
yading@10
|
701 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
702 switch(width){
|
yading@10
|
703 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
|
yading@10
|
704 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
|
yading@10
|
705 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
|
yading@10
|
706 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
|
yading@10
|
707 }
|
yading@10
|
708 }
|
yading@10
|
709
|
yading@10
|
710 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
711 int i,j;
|
yading@10
|
712 for (i=0; i < height; i++) {
|
yading@10
|
713 for (j=0; j < width; j++) {
|
yading@10
|
714 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
|
yading@10
|
715 }
|
yading@10
|
716 src += stride;
|
yading@10
|
717 dst += stride;
|
yading@10
|
718 }
|
yading@10
|
719 }
|
yading@10
|
720
|
yading@10
|
721 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
722 int i,j;
|
yading@10
|
723 for (i=0; i < height; i++) {
|
yading@10
|
724 for (j=0; j < width; j++) {
|
yading@10
|
725 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
|
yading@10
|
726 }
|
yading@10
|
727 src += stride;
|
yading@10
|
728 dst += stride;
|
yading@10
|
729 }
|
yading@10
|
730 }
|
yading@10
|
731
|
yading@10
|
732 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
733 int i,j;
|
yading@10
|
734 for (i=0; i < height; i++) {
|
yading@10
|
735 for (j=0; j < width; j++) {
|
yading@10
|
736 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
|
yading@10
|
737 }
|
yading@10
|
738 src += stride;
|
yading@10
|
739 dst += stride;
|
yading@10
|
740 }
|
yading@10
|
741 }
|
yading@10
|
742
|
yading@10
|
743 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
744 int i,j;
|
yading@10
|
745 for (i=0; i < height; i++) {
|
yading@10
|
746 for (j=0; j < width; j++) {
|
yading@10
|
747 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
yading@10
|
748 }
|
yading@10
|
749 src += stride;
|
yading@10
|
750 dst += stride;
|
yading@10
|
751 }
|
yading@10
|
752 }
|
yading@10
|
753
|
yading@10
|
754 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
755 int i,j;
|
yading@10
|
756 for (i=0; i < height; i++) {
|
yading@10
|
757 for (j=0; j < width; j++) {
|
yading@10
|
758 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
yading@10
|
759 }
|
yading@10
|
760 src += stride;
|
yading@10
|
761 dst += stride;
|
yading@10
|
762 }
|
yading@10
|
763 }
|
yading@10
|
764
|
yading@10
|
765 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
766 int i,j;
|
yading@10
|
767 for (i=0; i < height; i++) {
|
yading@10
|
768 for (j=0; j < width; j++) {
|
yading@10
|
769 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
|
yading@10
|
770 }
|
yading@10
|
771 src += stride;
|
yading@10
|
772 dst += stride;
|
yading@10
|
773 }
|
yading@10
|
774 }
|
yading@10
|
775
|
yading@10
|
776 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
777 int i,j;
|
yading@10
|
778 for (i=0; i < height; i++) {
|
yading@10
|
779 for (j=0; j < width; j++) {
|
yading@10
|
780 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
yading@10
|
781 }
|
yading@10
|
782 src += stride;
|
yading@10
|
783 dst += stride;
|
yading@10
|
784 }
|
yading@10
|
785 }
|
yading@10
|
786
|
yading@10
|
787 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
yading@10
|
788 int i,j;
|
yading@10
|
789 for (i=0; i < height; i++) {
|
yading@10
|
790 for (j=0; j < width; j++) {
|
yading@10
|
791 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
yading@10
|
792 }
|
yading@10
|
793 src += stride;
|
yading@10
|
794 dst += stride;
|
yading@10
|
795 }
|
yading@10
|
796 }
|
yading@10
|
797
|
yading@10
|
798 #define QPEL_MC(r, OPNAME, RND, OP) \
|
yading@10
|
799 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
|
yading@10
|
800 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
|
yading@10
|
801 int i;\
|
yading@10
|
802 for(i=0; i<h; i++)\
|
yading@10
|
803 {\
|
yading@10
|
804 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
|
yading@10
|
805 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
|
yading@10
|
806 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
|
yading@10
|
807 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
|
yading@10
|
808 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
|
yading@10
|
809 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
|
yading@10
|
810 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
|
yading@10
|
811 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
|
yading@10
|
812 dst+=dstStride;\
|
yading@10
|
813 src+=srcStride;\
|
yading@10
|
814 }\
|
yading@10
|
815 }\
|
yading@10
|
816 \
|
yading@10
|
817 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
yading@10
|
818 const int w=8;\
|
yading@10
|
819 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
|
yading@10
|
820 int i;\
|
yading@10
|
821 for(i=0; i<w; i++)\
|
yading@10
|
822 {\
|
yading@10
|
823 const int src0= src[0*srcStride];\
|
yading@10
|
824 const int src1= src[1*srcStride];\
|
yading@10
|
825 const int src2= src[2*srcStride];\
|
yading@10
|
826 const int src3= src[3*srcStride];\
|
yading@10
|
827 const int src4= src[4*srcStride];\
|
yading@10
|
828 const int src5= src[5*srcStride];\
|
yading@10
|
829 const int src6= src[6*srcStride];\
|
yading@10
|
830 const int src7= src[7*srcStride];\
|
yading@10
|
831 const int src8= src[8*srcStride];\
|
yading@10
|
832 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
|
yading@10
|
833 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
|
yading@10
|
834 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
|
yading@10
|
835 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
|
yading@10
|
836 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
|
yading@10
|
837 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
|
yading@10
|
838 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
|
yading@10
|
839 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
|
yading@10
|
840 dst++;\
|
yading@10
|
841 src++;\
|
yading@10
|
842 }\
|
yading@10
|
843 }\
|
yading@10
|
844 \
|
yading@10
|
845 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
|
yading@10
|
846 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
|
yading@10
|
847 int i;\
|
yading@10
|
848 \
|
yading@10
|
849 for(i=0; i<h; i++)\
|
yading@10
|
850 {\
|
yading@10
|
851 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
|
yading@10
|
852 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
|
yading@10
|
853 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
|
yading@10
|
854 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
|
yading@10
|
855 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
|
yading@10
|
856 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
|
yading@10
|
857 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
|
yading@10
|
858 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
|
yading@10
|
859 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
|
yading@10
|
860 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
|
yading@10
|
861 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
|
yading@10
|
862 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
|
yading@10
|
863 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
|
yading@10
|
864 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
|
yading@10
|
865 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
|
yading@10
|
866 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
|
yading@10
|
867 dst+=dstStride;\
|
yading@10
|
868 src+=srcStride;\
|
yading@10
|
869 }\
|
yading@10
|
870 }\
|
yading@10
|
871 \
|
yading@10
|
872 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
yading@10
|
873 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
|
yading@10
|
874 int i;\
|
yading@10
|
875 const int w=16;\
|
yading@10
|
876 for(i=0; i<w; i++)\
|
yading@10
|
877 {\
|
yading@10
|
878 const int src0= src[0*srcStride];\
|
yading@10
|
879 const int src1= src[1*srcStride];\
|
yading@10
|
880 const int src2= src[2*srcStride];\
|
yading@10
|
881 const int src3= src[3*srcStride];\
|
yading@10
|
882 const int src4= src[4*srcStride];\
|
yading@10
|
883 const int src5= src[5*srcStride];\
|
yading@10
|
884 const int src6= src[6*srcStride];\
|
yading@10
|
885 const int src7= src[7*srcStride];\
|
yading@10
|
886 const int src8= src[8*srcStride];\
|
yading@10
|
887 const int src9= src[9*srcStride];\
|
yading@10
|
888 const int src10= src[10*srcStride];\
|
yading@10
|
889 const int src11= src[11*srcStride];\
|
yading@10
|
890 const int src12= src[12*srcStride];\
|
yading@10
|
891 const int src13= src[13*srcStride];\
|
yading@10
|
892 const int src14= src[14*srcStride];\
|
yading@10
|
893 const int src15= src[15*srcStride];\
|
yading@10
|
894 const int src16= src[16*srcStride];\
|
yading@10
|
895 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
|
yading@10
|
896 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
|
yading@10
|
897 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
|
yading@10
|
898 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
|
yading@10
|
899 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
|
yading@10
|
900 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
|
yading@10
|
901 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
|
yading@10
|
902 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
|
yading@10
|
903 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
|
yading@10
|
904 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
|
yading@10
|
905 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
|
yading@10
|
906 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
|
yading@10
|
907 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
|
yading@10
|
908 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
|
yading@10
|
909 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
|
yading@10
|
910 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
|
yading@10
|
911 dst++;\
|
yading@10
|
912 src++;\
|
yading@10
|
913 }\
|
yading@10
|
914 }\
|
yading@10
|
915 \
|
yading@10
|
916 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
917 {\
|
yading@10
|
918 uint8_t half[64];\
|
yading@10
|
919 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
|
yading@10
|
920 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
|
yading@10
|
921 }\
|
yading@10
|
922 \
|
yading@10
|
923 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
924 {\
|
yading@10
|
925 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
|
yading@10
|
926 }\
|
yading@10
|
927 \
|
yading@10
|
928 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
929 {\
|
yading@10
|
930 uint8_t half[64];\
|
yading@10
|
931 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
|
yading@10
|
932 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
|
yading@10
|
933 }\
|
yading@10
|
934 \
|
yading@10
|
935 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
936 {\
|
yading@10
|
937 uint8_t full[16*9];\
|
yading@10
|
938 uint8_t half[64];\
|
yading@10
|
939 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
940 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
|
yading@10
|
941 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
|
yading@10
|
942 }\
|
yading@10
|
943 \
|
yading@10
|
944 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
945 {\
|
yading@10
|
946 uint8_t full[16*9];\
|
yading@10
|
947 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
948 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
|
yading@10
|
949 }\
|
yading@10
|
950 \
|
yading@10
|
951 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
952 {\
|
yading@10
|
953 uint8_t full[16*9];\
|
yading@10
|
954 uint8_t half[64];\
|
yading@10
|
955 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
956 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
|
yading@10
|
957 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
|
yading@10
|
958 }\
|
yading@10
|
959 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
960 {\
|
yading@10
|
961 uint8_t full[16*9];\
|
yading@10
|
962 uint8_t halfH[72];\
|
yading@10
|
963 uint8_t halfV[64];\
|
yading@10
|
964 uint8_t halfHV[64];\
|
yading@10
|
965 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
966 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
yading@10
|
967 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
|
yading@10
|
968 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
yading@10
|
969 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
|
yading@10
|
970 }\
|
yading@10
|
971 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
972 {\
|
yading@10
|
973 uint8_t full[16*9];\
|
yading@10
|
974 uint8_t halfH[72];\
|
yading@10
|
975 uint8_t halfHV[64];\
|
yading@10
|
976 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
977 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
yading@10
|
978 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
|
yading@10
|
979 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
yading@10
|
980 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
|
yading@10
|
981 }\
|
yading@10
|
982 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
983 {\
|
yading@10
|
984 uint8_t full[16*9];\
|
yading@10
|
985 uint8_t halfH[72];\
|
yading@10
|
986 uint8_t halfV[64];\
|
yading@10
|
987 uint8_t halfHV[64];\
|
yading@10
|
988 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
989 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
yading@10
|
990 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
|
yading@10
|
991 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
yading@10
|
992 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
|
yading@10
|
993 }\
|
yading@10
|
994 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
995 {\
|
yading@10
|
996 uint8_t full[16*9];\
|
yading@10
|
997 uint8_t halfH[72];\
|
yading@10
|
998 uint8_t halfHV[64];\
|
yading@10
|
999 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
1000 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
yading@10
|
1001 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
|
yading@10
|
1002 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
yading@10
|
1003 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
|
yading@10
|
1004 }\
|
yading@10
|
1005 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1006 {\
|
yading@10
|
1007 uint8_t full[16*9];\
|
yading@10
|
1008 uint8_t halfH[72];\
|
yading@10
|
1009 uint8_t halfV[64];\
|
yading@10
|
1010 uint8_t halfHV[64];\
|
yading@10
|
1011 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
1012 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
yading@10
|
1013 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
|
yading@10
|
1014 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
yading@10
|
1015 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
|
yading@10
|
1016 }\
|
yading@10
|
1017 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1018 {\
|
yading@10
|
1019 uint8_t full[16*9];\
|
yading@10
|
1020 uint8_t halfH[72];\
|
yading@10
|
1021 uint8_t halfHV[64];\
|
yading@10
|
1022 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
1023 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
yading@10
|
1024 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
|
yading@10
|
1025 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
yading@10
|
1026 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
|
yading@10
|
1027 }\
|
yading@10
|
1028 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1029 {\
|
yading@10
|
1030 uint8_t full[16*9];\
|
yading@10
|
1031 uint8_t halfH[72];\
|
yading@10
|
1032 uint8_t halfV[64];\
|
yading@10
|
1033 uint8_t halfHV[64];\
|
yading@10
|
1034 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
1035 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
|
yading@10
|
1036 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
|
yading@10
|
1037 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
yading@10
|
1038 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
|
yading@10
|
1039 }\
|
yading@10
|
1040 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1041 {\
|
yading@10
|
1042 uint8_t full[16*9];\
|
yading@10
|
1043 uint8_t halfH[72];\
|
yading@10
|
1044 uint8_t halfHV[64];\
|
yading@10
|
1045 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
1046 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
yading@10
|
1047 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
|
yading@10
|
1048 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
yading@10
|
1049 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
|
yading@10
|
1050 }\
|
yading@10
|
1051 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1052 {\
|
yading@10
|
1053 uint8_t halfH[72];\
|
yading@10
|
1054 uint8_t halfHV[64];\
|
yading@10
|
1055 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
|
yading@10
|
1056 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
yading@10
|
1057 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
|
yading@10
|
1058 }\
|
yading@10
|
1059 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1060 {\
|
yading@10
|
1061 uint8_t halfH[72];\
|
yading@10
|
1062 uint8_t halfHV[64];\
|
yading@10
|
1063 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
|
yading@10
|
1064 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
yading@10
|
1065 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
|
yading@10
|
1066 }\
|
yading@10
|
1067 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1068 {\
|
yading@10
|
1069 uint8_t full[16*9];\
|
yading@10
|
1070 uint8_t halfH[72];\
|
yading@10
|
1071 uint8_t halfV[64];\
|
yading@10
|
1072 uint8_t halfHV[64];\
|
yading@10
|
1073 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
1074 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
yading@10
|
1075 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
|
yading@10
|
1076 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
yading@10
|
1077 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
|
yading@10
|
1078 }\
|
yading@10
|
1079 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1080 {\
|
yading@10
|
1081 uint8_t full[16*9];\
|
yading@10
|
1082 uint8_t halfH[72];\
|
yading@10
|
1083 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
1084 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
yading@10
|
1085 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
|
yading@10
|
1086 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
|
yading@10
|
1087 }\
|
yading@10
|
1088 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1089 {\
|
yading@10
|
1090 uint8_t full[16*9];\
|
yading@10
|
1091 uint8_t halfH[72];\
|
yading@10
|
1092 uint8_t halfV[64];\
|
yading@10
|
1093 uint8_t halfHV[64];\
|
yading@10
|
1094 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
1095 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
yading@10
|
1096 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
|
yading@10
|
1097 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
yading@10
|
1098 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
|
yading@10
|
1099 }\
|
yading@10
|
1100 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1101 {\
|
yading@10
|
1102 uint8_t full[16*9];\
|
yading@10
|
1103 uint8_t halfH[72];\
|
yading@10
|
1104 copy_block9(full, src, 16, stride, 9);\
|
yading@10
|
1105 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
yading@10
|
1106 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
|
yading@10
|
1107 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
|
yading@10
|
1108 }\
|
yading@10
|
1109 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1110 {\
|
yading@10
|
1111 uint8_t halfH[72];\
|
yading@10
|
1112 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
|
yading@10
|
1113 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
|
yading@10
|
1114 }\
|
yading@10
|
1115 \
|
yading@10
|
1116 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1117 {\
|
yading@10
|
1118 uint8_t half[256];\
|
yading@10
|
1119 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
|
yading@10
|
1120 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
|
yading@10
|
1121 }\
|
yading@10
|
1122 \
|
yading@10
|
1123 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1124 {\
|
yading@10
|
1125 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
|
yading@10
|
1126 }\
|
yading@10
|
1127 \
|
yading@10
|
1128 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1129 {\
|
yading@10
|
1130 uint8_t half[256];\
|
yading@10
|
1131 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
|
yading@10
|
1132 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
|
yading@10
|
1133 }\
|
yading@10
|
1134 \
|
yading@10
|
1135 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1136 {\
|
yading@10
|
1137 uint8_t full[24*17];\
|
yading@10
|
1138 uint8_t half[256];\
|
yading@10
|
1139 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1140 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
|
yading@10
|
1141 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
|
yading@10
|
1142 }\
|
yading@10
|
1143 \
|
yading@10
|
1144 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1145 {\
|
yading@10
|
1146 uint8_t full[24*17];\
|
yading@10
|
1147 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1148 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
|
yading@10
|
1149 }\
|
yading@10
|
1150 \
|
yading@10
|
1151 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1152 {\
|
yading@10
|
1153 uint8_t full[24*17];\
|
yading@10
|
1154 uint8_t half[256];\
|
yading@10
|
1155 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1156 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
|
yading@10
|
1157 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
|
yading@10
|
1158 }\
|
yading@10
|
1159 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1160 {\
|
yading@10
|
1161 uint8_t full[24*17];\
|
yading@10
|
1162 uint8_t halfH[272];\
|
yading@10
|
1163 uint8_t halfV[256];\
|
yading@10
|
1164 uint8_t halfHV[256];\
|
yading@10
|
1165 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1166 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
yading@10
|
1167 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
|
yading@10
|
1168 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
yading@10
|
1169 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
|
yading@10
|
1170 }\
|
yading@10
|
1171 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1172 {\
|
yading@10
|
1173 uint8_t full[24*17];\
|
yading@10
|
1174 uint8_t halfH[272];\
|
yading@10
|
1175 uint8_t halfHV[256];\
|
yading@10
|
1176 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1177 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
yading@10
|
1178 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
|
yading@10
|
1179 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
yading@10
|
1180 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
|
yading@10
|
1181 }\
|
yading@10
|
1182 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1183 {\
|
yading@10
|
1184 uint8_t full[24*17];\
|
yading@10
|
1185 uint8_t halfH[272];\
|
yading@10
|
1186 uint8_t halfV[256];\
|
yading@10
|
1187 uint8_t halfHV[256];\
|
yading@10
|
1188 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1189 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
yading@10
|
1190 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
|
yading@10
|
1191 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
yading@10
|
1192 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
|
yading@10
|
1193 }\
|
yading@10
|
1194 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1195 {\
|
yading@10
|
1196 uint8_t full[24*17];\
|
yading@10
|
1197 uint8_t halfH[272];\
|
yading@10
|
1198 uint8_t halfHV[256];\
|
yading@10
|
1199 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1200 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
yading@10
|
1201 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
|
yading@10
|
1202 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
yading@10
|
1203 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
|
yading@10
|
1204 }\
|
yading@10
|
1205 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1206 {\
|
yading@10
|
1207 uint8_t full[24*17];\
|
yading@10
|
1208 uint8_t halfH[272];\
|
yading@10
|
1209 uint8_t halfV[256];\
|
yading@10
|
1210 uint8_t halfHV[256];\
|
yading@10
|
1211 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1212 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
yading@10
|
1213 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
|
yading@10
|
1214 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
yading@10
|
1215 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
|
yading@10
|
1216 }\
|
yading@10
|
1217 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1218 {\
|
yading@10
|
1219 uint8_t full[24*17];\
|
yading@10
|
1220 uint8_t halfH[272];\
|
yading@10
|
1221 uint8_t halfHV[256];\
|
yading@10
|
1222 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1223 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
yading@10
|
1224 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
|
yading@10
|
1225 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
yading@10
|
1226 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
|
yading@10
|
1227 }\
|
yading@10
|
1228 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1229 {\
|
yading@10
|
1230 uint8_t full[24*17];\
|
yading@10
|
1231 uint8_t halfH[272];\
|
yading@10
|
1232 uint8_t halfV[256];\
|
yading@10
|
1233 uint8_t halfHV[256];\
|
yading@10
|
1234 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1235 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
|
yading@10
|
1236 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
|
yading@10
|
1237 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
yading@10
|
1238 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
|
yading@10
|
1239 }\
|
yading@10
|
1240 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1241 {\
|
yading@10
|
1242 uint8_t full[24*17];\
|
yading@10
|
1243 uint8_t halfH[272];\
|
yading@10
|
1244 uint8_t halfHV[256];\
|
yading@10
|
1245 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1246 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
yading@10
|
1247 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
|
yading@10
|
1248 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
yading@10
|
1249 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
|
yading@10
|
1250 }\
|
yading@10
|
1251 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1252 {\
|
yading@10
|
1253 uint8_t halfH[272];\
|
yading@10
|
1254 uint8_t halfHV[256];\
|
yading@10
|
1255 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
|
yading@10
|
1256 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
yading@10
|
1257 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
|
yading@10
|
1258 }\
|
yading@10
|
1259 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1260 {\
|
yading@10
|
1261 uint8_t halfH[272];\
|
yading@10
|
1262 uint8_t halfHV[256];\
|
yading@10
|
1263 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
|
yading@10
|
1264 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
yading@10
|
1265 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
|
yading@10
|
1266 }\
|
yading@10
|
1267 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1268 {\
|
yading@10
|
1269 uint8_t full[24*17];\
|
yading@10
|
1270 uint8_t halfH[272];\
|
yading@10
|
1271 uint8_t halfV[256];\
|
yading@10
|
1272 uint8_t halfHV[256];\
|
yading@10
|
1273 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1274 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
yading@10
|
1275 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
|
yading@10
|
1276 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
yading@10
|
1277 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
|
yading@10
|
1278 }\
|
yading@10
|
1279 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1280 {\
|
yading@10
|
1281 uint8_t full[24*17];\
|
yading@10
|
1282 uint8_t halfH[272];\
|
yading@10
|
1283 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1284 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
yading@10
|
1285 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
|
yading@10
|
1286 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
|
yading@10
|
1287 }\
|
yading@10
|
1288 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1289 {\
|
yading@10
|
1290 uint8_t full[24*17];\
|
yading@10
|
1291 uint8_t halfH[272];\
|
yading@10
|
1292 uint8_t halfV[256];\
|
yading@10
|
1293 uint8_t halfHV[256];\
|
yading@10
|
1294 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1295 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
yading@10
|
1296 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
|
yading@10
|
1297 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
yading@10
|
1298 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
|
yading@10
|
1299 }\
|
yading@10
|
1300 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1301 {\
|
yading@10
|
1302 uint8_t full[24*17];\
|
yading@10
|
1303 uint8_t halfH[272];\
|
yading@10
|
1304 copy_block17(full, src, 24, stride, 17);\
|
yading@10
|
1305 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
yading@10
|
1306 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
|
yading@10
|
1307 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
|
yading@10
|
1308 }\
|
yading@10
|
1309 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
|
yading@10
|
1310 {\
|
yading@10
|
1311 uint8_t halfH[272];\
|
yading@10
|
1312 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
|
yading@10
|
1313 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
|
yading@10
|
1314 }
|
yading@10
|
1315
|
yading@10
|
1316 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
|
yading@10
|
1317 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
|
yading@10
|
1318 #define op_put(a, b) a = cm[((b) + 16)>>5]
|
yading@10
|
1319 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
|
yading@10
|
1320
|
yading@10
|
1321 QPEL_MC(0, put_ , _ , op_put)
|
yading@10
|
1322 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
|
yading@10
|
1323 QPEL_MC(0, avg_ , _ , op_avg)
|
yading@10
|
1324 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
|
yading@10
|
1325 #undef op_avg
|
yading@10
|
1326 #undef op_avg_no_rnd
|
yading@10
|
1327 #undef op_put
|
yading@10
|
1328 #undef op_put_no_rnd
|
yading@10
|
1329
|
yading@10
|
1330 void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1331 {
|
yading@10
|
1332 put_pixels8_8_c(dst, src, stride, 8);
|
yading@10
|
1333 }
|
yading@10
|
1334 void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1335 {
|
yading@10
|
1336 avg_pixels8_8_c(dst, src, stride, 8);
|
yading@10
|
1337 }
|
yading@10
|
1338 void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1339 {
|
yading@10
|
1340 put_pixels16_8_c(dst, src, stride, 16);
|
yading@10
|
1341 }
|
yading@10
|
1342 void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1343 {
|
yading@10
|
1344 avg_pixels16_8_c(dst, src, stride, 16);
|
yading@10
|
1345 }
|
yading@10
|
1346
|
yading@10
|
1347 #define put_qpel8_mc00_c ff_put_pixels8x8_c
|
yading@10
|
1348 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
|
yading@10
|
1349 #define put_qpel16_mc00_c ff_put_pixels16x16_c
|
yading@10
|
1350 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
|
yading@10
|
1351 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
|
yading@10
|
1352 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
|
yading@10
|
1353
|
yading@10
|
1354 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
|
yading@10
|
1355 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
yading@10
|
1356 int i;
|
yading@10
|
1357
|
yading@10
|
1358 for(i=0; i<h; i++){
|
yading@10
|
1359 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
|
yading@10
|
1360 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
|
yading@10
|
1361 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
|
yading@10
|
1362 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
|
yading@10
|
1363 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
|
yading@10
|
1364 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
|
yading@10
|
1365 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
|
yading@10
|
1366 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
|
yading@10
|
1367 dst+=dstStride;
|
yading@10
|
1368 src+=srcStride;
|
yading@10
|
1369 }
|
yading@10
|
1370 }
|
yading@10
|
1371
|
yading@10
|
1372 #if CONFIG_RV40_DECODER
|
yading@10
|
1373 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1374 {
|
yading@10
|
1375 put_pixels16_xy2_8_c(dst, src, stride, 16);
|
yading@10
|
1376 }
|
yading@10
|
1377 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1378 {
|
yading@10
|
1379 avg_pixels16_xy2_8_c(dst, src, stride, 16);
|
yading@10
|
1380 }
|
yading@10
|
1381 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1382 {
|
yading@10
|
1383 put_pixels8_xy2_8_c(dst, src, stride, 8);
|
yading@10
|
1384 }
|
yading@10
|
1385 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1386 {
|
yading@10
|
1387 avg_pixels8_xy2_8_c(dst, src, stride, 8);
|
yading@10
|
1388 }
|
yading@10
|
1389 #endif /* CONFIG_RV40_DECODER */
|
yading@10
|
1390
|
yading@10
|
1391 #if CONFIG_DIRAC_DECODER
|
yading@10
|
1392 #define DIRAC_MC(OPNAME)\
|
yading@10
|
1393 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
|
yading@10
|
1394 {\
|
yading@10
|
1395 OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
|
yading@10
|
1396 }\
|
yading@10
|
1397 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
|
yading@10
|
1398 {\
|
yading@10
|
1399 OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
|
yading@10
|
1400 }\
|
yading@10
|
1401 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
|
yading@10
|
1402 {\
|
yading@10
|
1403 OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
|
yading@10
|
1404 OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
|
yading@10
|
1405 }\
|
yading@10
|
1406 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
|
yading@10
|
1407 {\
|
yading@10
|
1408 OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
|
yading@10
|
1409 }\
|
yading@10
|
1410 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
|
yading@10
|
1411 {\
|
yading@10
|
1412 OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
|
yading@10
|
1413 }\
|
yading@10
|
1414 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
|
yading@10
|
1415 {\
|
yading@10
|
1416 OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
|
yading@10
|
1417 OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
|
yading@10
|
1418 }\
|
yading@10
|
1419 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
|
yading@10
|
1420 {\
|
yading@10
|
1421 OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
|
yading@10
|
1422 }\
|
yading@10
|
1423 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
|
yading@10
|
1424 {\
|
yading@10
|
1425 OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
|
yading@10
|
1426 }\
|
yading@10
|
1427 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
|
yading@10
|
1428 {\
|
yading@10
|
1429 OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
|
yading@10
|
1430 OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
|
yading@10
|
1431 }
|
yading@10
|
1432 DIRAC_MC(put)
|
yading@10
|
1433 DIRAC_MC(avg)
|
yading@10
|
1434 #endif
|
yading@10
|
1435
|
yading@10
|
1436 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
|
yading@10
|
1437 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
yading@10
|
1438 int i;
|
yading@10
|
1439
|
yading@10
|
1440 for(i=0; i<w; i++){
|
yading@10
|
1441 const int src_1= src[ -srcStride];
|
yading@10
|
1442 const int src0 = src[0 ];
|
yading@10
|
1443 const int src1 = src[ srcStride];
|
yading@10
|
1444 const int src2 = src[2*srcStride];
|
yading@10
|
1445 const int src3 = src[3*srcStride];
|
yading@10
|
1446 const int src4 = src[4*srcStride];
|
yading@10
|
1447 const int src5 = src[5*srcStride];
|
yading@10
|
1448 const int src6 = src[6*srcStride];
|
yading@10
|
1449 const int src7 = src[7*srcStride];
|
yading@10
|
1450 const int src8 = src[8*srcStride];
|
yading@10
|
1451 const int src9 = src[9*srcStride];
|
yading@10
|
1452 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
|
yading@10
|
1453 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
|
yading@10
|
1454 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
|
yading@10
|
1455 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
|
yading@10
|
1456 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
|
yading@10
|
1457 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
|
yading@10
|
1458 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
|
yading@10
|
1459 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
|
yading@10
|
1460 src++;
|
yading@10
|
1461 dst++;
|
yading@10
|
1462 }
|
yading@10
|
1463 }
|
yading@10
|
1464
|
yading@10
|
1465 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1466 {
|
yading@10
|
1467 uint8_t half[64];
|
yading@10
|
1468 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
|
yading@10
|
1469 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
|
yading@10
|
1470 }
|
yading@10
|
1471
|
yading@10
|
1472 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1473 {
|
yading@10
|
1474 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
|
yading@10
|
1475 }
|
yading@10
|
1476
|
yading@10
|
1477 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1478 {
|
yading@10
|
1479 uint8_t half[64];
|
yading@10
|
1480 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
|
yading@10
|
1481 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
|
yading@10
|
1482 }
|
yading@10
|
1483
|
yading@10
|
1484 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1485 {
|
yading@10
|
1486 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
|
yading@10
|
1487 }
|
yading@10
|
1488
|
yading@10
|
1489 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1490 {
|
yading@10
|
1491 uint8_t halfH[88];
|
yading@10
|
1492 uint8_t halfV[64];
|
yading@10
|
1493 uint8_t halfHV[64];
|
yading@10
|
1494 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
|
yading@10
|
1495 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
|
yading@10
|
1496 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
|
yading@10
|
1497 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
|
yading@10
|
1498 }
|
yading@10
|
1499 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1500 {
|
yading@10
|
1501 uint8_t halfH[88];
|
yading@10
|
1502 uint8_t halfV[64];
|
yading@10
|
1503 uint8_t halfHV[64];
|
yading@10
|
1504 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
|
yading@10
|
1505 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
|
yading@10
|
1506 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
|
yading@10
|
1507 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
|
yading@10
|
1508 }
|
yading@10
|
1509 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
|
yading@10
|
1510 {
|
yading@10
|
1511 uint8_t halfH[88];
|
yading@10
|
1512 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
|
yading@10
|
1513 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
|
yading@10
|
1514 }
|
yading@10
|
1515
|
yading@10
|
1516 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
|
yading@10
|
1517 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
|
yading@10
|
1518 int x;
|
yading@10
|
1519 const int strength= ff_h263_loop_filter_strength[qscale];
|
yading@10
|
1520
|
yading@10
|
1521 for(x=0; x<8; x++){
|
yading@10
|
1522 int d1, d2, ad1;
|
yading@10
|
1523 int p0= src[x-2*stride];
|
yading@10
|
1524 int p1= src[x-1*stride];
|
yading@10
|
1525 int p2= src[x+0*stride];
|
yading@10
|
1526 int p3= src[x+1*stride];
|
yading@10
|
1527 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
|
yading@10
|
1528
|
yading@10
|
1529 if (d<-2*strength) d1= 0;
|
yading@10
|
1530 else if(d<- strength) d1=-2*strength - d;
|
yading@10
|
1531 else if(d< strength) d1= d;
|
yading@10
|
1532 else if(d< 2*strength) d1= 2*strength - d;
|
yading@10
|
1533 else d1= 0;
|
yading@10
|
1534
|
yading@10
|
1535 p1 += d1;
|
yading@10
|
1536 p2 -= d1;
|
yading@10
|
1537 if(p1&256) p1= ~(p1>>31);
|
yading@10
|
1538 if(p2&256) p2= ~(p2>>31);
|
yading@10
|
1539
|
yading@10
|
1540 src[x-1*stride] = p1;
|
yading@10
|
1541 src[x+0*stride] = p2;
|
yading@10
|
1542
|
yading@10
|
1543 ad1= FFABS(d1)>>1;
|
yading@10
|
1544
|
yading@10
|
1545 d2= av_clip((p0-p3)/4, -ad1, ad1);
|
yading@10
|
1546
|
yading@10
|
1547 src[x-2*stride] = p0 - d2;
|
yading@10
|
1548 src[x+ stride] = p3 + d2;
|
yading@10
|
1549 }
|
yading@10
|
1550 }
|
yading@10
|
1551 }
|
yading@10
|
1552
|
yading@10
|
1553 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
|
yading@10
|
1554 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
|
yading@10
|
1555 int y;
|
yading@10
|
1556 const int strength= ff_h263_loop_filter_strength[qscale];
|
yading@10
|
1557
|
yading@10
|
1558 for(y=0; y<8; y++){
|
yading@10
|
1559 int d1, d2, ad1;
|
yading@10
|
1560 int p0= src[y*stride-2];
|
yading@10
|
1561 int p1= src[y*stride-1];
|
yading@10
|
1562 int p2= src[y*stride+0];
|
yading@10
|
1563 int p3= src[y*stride+1];
|
yading@10
|
1564 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
|
yading@10
|
1565
|
yading@10
|
1566 if (d<-2*strength) d1= 0;
|
yading@10
|
1567 else if(d<- strength) d1=-2*strength - d;
|
yading@10
|
1568 else if(d< strength) d1= d;
|
yading@10
|
1569 else if(d< 2*strength) d1= 2*strength - d;
|
yading@10
|
1570 else d1= 0;
|
yading@10
|
1571
|
yading@10
|
1572 p1 += d1;
|
yading@10
|
1573 p2 -= d1;
|
yading@10
|
1574 if(p1&256) p1= ~(p1>>31);
|
yading@10
|
1575 if(p2&256) p2= ~(p2>>31);
|
yading@10
|
1576
|
yading@10
|
1577 src[y*stride-1] = p1;
|
yading@10
|
1578 src[y*stride+0] = p2;
|
yading@10
|
1579
|
yading@10
|
1580 ad1= FFABS(d1)>>1;
|
yading@10
|
1581
|
yading@10
|
1582 d2= av_clip((p0-p3)/4, -ad1, ad1);
|
yading@10
|
1583
|
yading@10
|
1584 src[y*stride-2] = p0 - d2;
|
yading@10
|
1585 src[y*stride+1] = p3 + d2;
|
yading@10
|
1586 }
|
yading@10
|
1587 }
|
yading@10
|
1588 }
|
yading@10
|
1589
|
yading@10
|
1590 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
yading@10
|
1591 {
|
yading@10
|
1592 int s, i;
|
yading@10
|
1593
|
yading@10
|
1594 s = 0;
|
yading@10
|
1595 for(i=0;i<h;i++) {
|
yading@10
|
1596 s += abs(pix1[0] - pix2[0]);
|
yading@10
|
1597 s += abs(pix1[1] - pix2[1]);
|
yading@10
|
1598 s += abs(pix1[2] - pix2[2]);
|
yading@10
|
1599 s += abs(pix1[3] - pix2[3]);
|
yading@10
|
1600 s += abs(pix1[4] - pix2[4]);
|
yading@10
|
1601 s += abs(pix1[5] - pix2[5]);
|
yading@10
|
1602 s += abs(pix1[6] - pix2[6]);
|
yading@10
|
1603 s += abs(pix1[7] - pix2[7]);
|
yading@10
|
1604 s += abs(pix1[8] - pix2[8]);
|
yading@10
|
1605 s += abs(pix1[9] - pix2[9]);
|
yading@10
|
1606 s += abs(pix1[10] - pix2[10]);
|
yading@10
|
1607 s += abs(pix1[11] - pix2[11]);
|
yading@10
|
1608 s += abs(pix1[12] - pix2[12]);
|
yading@10
|
1609 s += abs(pix1[13] - pix2[13]);
|
yading@10
|
1610 s += abs(pix1[14] - pix2[14]);
|
yading@10
|
1611 s += abs(pix1[15] - pix2[15]);
|
yading@10
|
1612 pix1 += line_size;
|
yading@10
|
1613 pix2 += line_size;
|
yading@10
|
1614 }
|
yading@10
|
1615 return s;
|
yading@10
|
1616 }
|
yading@10
|
1617
|
yading@10
|
1618 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
yading@10
|
1619 {
|
yading@10
|
1620 int s, i;
|
yading@10
|
1621
|
yading@10
|
1622 s = 0;
|
yading@10
|
1623 for(i=0;i<h;i++) {
|
yading@10
|
1624 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
|
yading@10
|
1625 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
|
yading@10
|
1626 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
|
yading@10
|
1627 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
|
yading@10
|
1628 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
|
yading@10
|
1629 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
|
yading@10
|
1630 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
|
yading@10
|
1631 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
|
yading@10
|
1632 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
|
yading@10
|
1633 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
|
yading@10
|
1634 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
|
yading@10
|
1635 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
|
yading@10
|
1636 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
|
yading@10
|
1637 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
|
yading@10
|
1638 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
|
yading@10
|
1639 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
|
yading@10
|
1640 pix1 += line_size;
|
yading@10
|
1641 pix2 += line_size;
|
yading@10
|
1642 }
|
yading@10
|
1643 return s;
|
yading@10
|
1644 }
|
yading@10
|
1645
|
yading@10
|
1646 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
yading@10
|
1647 {
|
yading@10
|
1648 int s, i;
|
yading@10
|
1649 uint8_t *pix3 = pix2 + line_size;
|
yading@10
|
1650
|
yading@10
|
1651 s = 0;
|
yading@10
|
1652 for(i=0;i<h;i++) {
|
yading@10
|
1653 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
|
yading@10
|
1654 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
|
yading@10
|
1655 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
|
yading@10
|
1656 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
|
yading@10
|
1657 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
|
yading@10
|
1658 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
|
yading@10
|
1659 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
|
yading@10
|
1660 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
|
yading@10
|
1661 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
|
yading@10
|
1662 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
|
yading@10
|
1663 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
|
yading@10
|
1664 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
|
yading@10
|
1665 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
|
yading@10
|
1666 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
|
yading@10
|
1667 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
|
yading@10
|
1668 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
|
yading@10
|
1669 pix1 += line_size;
|
yading@10
|
1670 pix2 += line_size;
|
yading@10
|
1671 pix3 += line_size;
|
yading@10
|
1672 }
|
yading@10
|
1673 return s;
|
yading@10
|
1674 }
|
yading@10
|
1675
|
yading@10
|
1676 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
yading@10
|
1677 {
|
yading@10
|
1678 int s, i;
|
yading@10
|
1679 uint8_t *pix3 = pix2 + line_size;
|
yading@10
|
1680
|
yading@10
|
1681 s = 0;
|
yading@10
|
1682 for(i=0;i<h;i++) {
|
yading@10
|
1683 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
|
yading@10
|
1684 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
|
yading@10
|
1685 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
|
yading@10
|
1686 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
|
yading@10
|
1687 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
|
yading@10
|
1688 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
|
yading@10
|
1689 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
|
yading@10
|
1690 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
|
yading@10
|
1691 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
|
yading@10
|
1692 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
|
yading@10
|
1693 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
|
yading@10
|
1694 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
|
yading@10
|
1695 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
|
yading@10
|
1696 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
|
yading@10
|
1697 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
|
yading@10
|
1698 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
|
yading@10
|
1699 pix1 += line_size;
|
yading@10
|
1700 pix2 += line_size;
|
yading@10
|
1701 pix3 += line_size;
|
yading@10
|
1702 }
|
yading@10
|
1703 return s;
|
yading@10
|
1704 }
|
yading@10
|
1705
|
yading@10
|
1706 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
yading@10
|
1707 {
|
yading@10
|
1708 int s, i;
|
yading@10
|
1709
|
yading@10
|
1710 s = 0;
|
yading@10
|
1711 for(i=0;i<h;i++) {
|
yading@10
|
1712 s += abs(pix1[0] - pix2[0]);
|
yading@10
|
1713 s += abs(pix1[1] - pix2[1]);
|
yading@10
|
1714 s += abs(pix1[2] - pix2[2]);
|
yading@10
|
1715 s += abs(pix1[3] - pix2[3]);
|
yading@10
|
1716 s += abs(pix1[4] - pix2[4]);
|
yading@10
|
1717 s += abs(pix1[5] - pix2[5]);
|
yading@10
|
1718 s += abs(pix1[6] - pix2[6]);
|
yading@10
|
1719 s += abs(pix1[7] - pix2[7]);
|
yading@10
|
1720 pix1 += line_size;
|
yading@10
|
1721 pix2 += line_size;
|
yading@10
|
1722 }
|
yading@10
|
1723 return s;
|
yading@10
|
1724 }
|
yading@10
|
1725
|
yading@10
|
1726 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
yading@10
|
1727 {
|
yading@10
|
1728 int s, i;
|
yading@10
|
1729
|
yading@10
|
1730 s = 0;
|
yading@10
|
1731 for(i=0;i<h;i++) {
|
yading@10
|
1732 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
|
yading@10
|
1733 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
|
yading@10
|
1734 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
|
yading@10
|
1735 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
|
yading@10
|
1736 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
|
yading@10
|
1737 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
|
yading@10
|
1738 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
|
yading@10
|
1739 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
|
yading@10
|
1740 pix1 += line_size;
|
yading@10
|
1741 pix2 += line_size;
|
yading@10
|
1742 }
|
yading@10
|
1743 return s;
|
yading@10
|
1744 }
|
yading@10
|
1745
|
yading@10
|
1746 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
yading@10
|
1747 {
|
yading@10
|
1748 int s, i;
|
yading@10
|
1749 uint8_t *pix3 = pix2 + line_size;
|
yading@10
|
1750
|
yading@10
|
1751 s = 0;
|
yading@10
|
1752 for(i=0;i<h;i++) {
|
yading@10
|
1753 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
|
yading@10
|
1754 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
|
yading@10
|
1755 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
|
yading@10
|
1756 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
|
yading@10
|
1757 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
|
yading@10
|
1758 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
|
yading@10
|
1759 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
|
yading@10
|
1760 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
|
yading@10
|
1761 pix1 += line_size;
|
yading@10
|
1762 pix2 += line_size;
|
yading@10
|
1763 pix3 += line_size;
|
yading@10
|
1764 }
|
yading@10
|
1765 return s;
|
yading@10
|
1766 }
|
yading@10
|
1767
|
yading@10
|
1768 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
yading@10
|
1769 {
|
yading@10
|
1770 int s, i;
|
yading@10
|
1771 uint8_t *pix3 = pix2 + line_size;
|
yading@10
|
1772
|
yading@10
|
1773 s = 0;
|
yading@10
|
1774 for(i=0;i<h;i++) {
|
yading@10
|
1775 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
|
yading@10
|
1776 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
|
yading@10
|
1777 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
|
yading@10
|
1778 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
|
yading@10
|
1779 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
|
yading@10
|
1780 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
|
yading@10
|
1781 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
|
yading@10
|
1782 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
|
yading@10
|
1783 pix1 += line_size;
|
yading@10
|
1784 pix2 += line_size;
|
yading@10
|
1785 pix3 += line_size;
|
yading@10
|
1786 }
|
yading@10
|
1787 return s;
|
yading@10
|
1788 }
|
yading@10
|
1789
|
yading@10
|
1790 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
|
yading@10
|
1791 MpegEncContext *c = v;
|
yading@10
|
1792 int score1=0;
|
yading@10
|
1793 int score2=0;
|
yading@10
|
1794 int x,y;
|
yading@10
|
1795
|
yading@10
|
1796 for(y=0; y<h; y++){
|
yading@10
|
1797 for(x=0; x<16; x++){
|
yading@10
|
1798 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
|
yading@10
|
1799 }
|
yading@10
|
1800 if(y+1<h){
|
yading@10
|
1801 for(x=0; x<15; x++){
|
yading@10
|
1802 score2+= FFABS( s1[x ] - s1[x +stride]
|
yading@10
|
1803 - s1[x+1] + s1[x+1+stride])
|
yading@10
|
1804 -FFABS( s2[x ] - s2[x +stride]
|
yading@10
|
1805 - s2[x+1] + s2[x+1+stride]);
|
yading@10
|
1806 }
|
yading@10
|
1807 }
|
yading@10
|
1808 s1+= stride;
|
yading@10
|
1809 s2+= stride;
|
yading@10
|
1810 }
|
yading@10
|
1811
|
yading@10
|
1812 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
|
yading@10
|
1813 else return score1 + FFABS(score2)*8;
|
yading@10
|
1814 }
|
yading@10
|
1815
|
yading@10
|
1816 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
|
yading@10
|
1817 MpegEncContext *c = v;
|
yading@10
|
1818 int score1=0;
|
yading@10
|
1819 int score2=0;
|
yading@10
|
1820 int x,y;
|
yading@10
|
1821
|
yading@10
|
1822 for(y=0; y<h; y++){
|
yading@10
|
1823 for(x=0; x<8; x++){
|
yading@10
|
1824 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
|
yading@10
|
1825 }
|
yading@10
|
1826 if(y+1<h){
|
yading@10
|
1827 for(x=0; x<7; x++){
|
yading@10
|
1828 score2+= FFABS( s1[x ] - s1[x +stride]
|
yading@10
|
1829 - s1[x+1] + s1[x+1+stride])
|
yading@10
|
1830 -FFABS( s2[x ] - s2[x +stride]
|
yading@10
|
1831 - s2[x+1] + s2[x+1+stride]);
|
yading@10
|
1832 }
|
yading@10
|
1833 }
|
yading@10
|
1834 s1+= stride;
|
yading@10
|
1835 s2+= stride;
|
yading@10
|
1836 }
|
yading@10
|
1837
|
yading@10
|
1838 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
|
yading@10
|
1839 else return score1 + FFABS(score2)*8;
|
yading@10
|
1840 }
|
yading@10
|
1841
|
yading@10
|
1842 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
|
yading@10
|
1843 int i;
|
yading@10
|
1844 unsigned int sum=0;
|
yading@10
|
1845
|
yading@10
|
1846 for(i=0; i<8*8; i++){
|
yading@10
|
1847 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
|
yading@10
|
1848 int w= weight[i];
|
yading@10
|
1849 b>>= RECON_SHIFT;
|
yading@10
|
1850 av_assert2(-512<b && b<512);
|
yading@10
|
1851
|
yading@10
|
1852 sum += (w*b)*(w*b)>>4;
|
yading@10
|
1853 }
|
yading@10
|
1854 return sum>>2;
|
yading@10
|
1855 }
|
yading@10
|
1856
|
yading@10
|
1857 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
|
yading@10
|
1858 int i;
|
yading@10
|
1859
|
yading@10
|
1860 for(i=0; i<8*8; i++){
|
yading@10
|
1861 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
|
yading@10
|
1862 }
|
yading@10
|
1863 }
|
yading@10
|
1864
|
yading@10
|
1865 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
|
yading@10
|
1866 return 0;
|
yading@10
|
1867 }
|
yading@10
|
1868
|
yading@10
|
1869 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
|
yading@10
|
1870 int i;
|
yading@10
|
1871
|
yading@10
|
1872 memset(cmp, 0, sizeof(void*)*6);
|
yading@10
|
1873
|
yading@10
|
1874 for(i=0; i<6; i++){
|
yading@10
|
1875 switch(type&0xFF){
|
yading@10
|
1876 case FF_CMP_SAD:
|
yading@10
|
1877 cmp[i]= c->sad[i];
|
yading@10
|
1878 break;
|
yading@10
|
1879 case FF_CMP_SATD:
|
yading@10
|
1880 cmp[i]= c->hadamard8_diff[i];
|
yading@10
|
1881 break;
|
yading@10
|
1882 case FF_CMP_SSE:
|
yading@10
|
1883 cmp[i]= c->sse[i];
|
yading@10
|
1884 break;
|
yading@10
|
1885 case FF_CMP_DCT:
|
yading@10
|
1886 cmp[i]= c->dct_sad[i];
|
yading@10
|
1887 break;
|
yading@10
|
1888 case FF_CMP_DCT264:
|
yading@10
|
1889 cmp[i]= c->dct264_sad[i];
|
yading@10
|
1890 break;
|
yading@10
|
1891 case FF_CMP_DCTMAX:
|
yading@10
|
1892 cmp[i]= c->dct_max[i];
|
yading@10
|
1893 break;
|
yading@10
|
1894 case FF_CMP_PSNR:
|
yading@10
|
1895 cmp[i]= c->quant_psnr[i];
|
yading@10
|
1896 break;
|
yading@10
|
1897 case FF_CMP_BIT:
|
yading@10
|
1898 cmp[i]= c->bit[i];
|
yading@10
|
1899 break;
|
yading@10
|
1900 case FF_CMP_RD:
|
yading@10
|
1901 cmp[i]= c->rd[i];
|
yading@10
|
1902 break;
|
yading@10
|
1903 case FF_CMP_VSAD:
|
yading@10
|
1904 cmp[i]= c->vsad[i];
|
yading@10
|
1905 break;
|
yading@10
|
1906 case FF_CMP_VSSE:
|
yading@10
|
1907 cmp[i]= c->vsse[i];
|
yading@10
|
1908 break;
|
yading@10
|
1909 case FF_CMP_ZERO:
|
yading@10
|
1910 cmp[i]= zero_cmp;
|
yading@10
|
1911 break;
|
yading@10
|
1912 case FF_CMP_NSSE:
|
yading@10
|
1913 cmp[i]= c->nsse[i];
|
yading@10
|
1914 break;
|
yading@10
|
1915 #if CONFIG_DWT
|
yading@10
|
1916 case FF_CMP_W53:
|
yading@10
|
1917 cmp[i]= c->w53[i];
|
yading@10
|
1918 break;
|
yading@10
|
1919 case FF_CMP_W97:
|
yading@10
|
1920 cmp[i]= c->w97[i];
|
yading@10
|
1921 break;
|
yading@10
|
1922 #endif
|
yading@10
|
1923 default:
|
yading@10
|
1924 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
|
yading@10
|
1925 }
|
yading@10
|
1926 }
|
yading@10
|
1927 }
|
yading@10
|
1928
|
yading@10
|
1929 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
|
yading@10
|
1930 long i;
|
yading@10
|
1931 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
|
yading@10
|
1932 long a = *(long*)(src+i);
|
yading@10
|
1933 long b = *(long*)(dst+i);
|
yading@10
|
1934 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
|
yading@10
|
1935 }
|
yading@10
|
1936 for(; i<w; i++)
|
yading@10
|
1937 dst[i+0] += src[i+0];
|
yading@10
|
1938 }
|
yading@10
|
1939
|
yading@10
|
1940 static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w){
|
yading@10
|
1941 long i;
|
yading@10
|
1942 #if !HAVE_FAST_UNALIGNED
|
yading@10
|
1943 if((long)src2 & (sizeof(long)-1)){
|
yading@10
|
1944 for(i=0; i+7<w; i+=8){
|
yading@10
|
1945 dst[i+0] = src1[i+0]-src2[i+0];
|
yading@10
|
1946 dst[i+1] = src1[i+1]-src2[i+1];
|
yading@10
|
1947 dst[i+2] = src1[i+2]-src2[i+2];
|
yading@10
|
1948 dst[i+3] = src1[i+3]-src2[i+3];
|
yading@10
|
1949 dst[i+4] = src1[i+4]-src2[i+4];
|
yading@10
|
1950 dst[i+5] = src1[i+5]-src2[i+5];
|
yading@10
|
1951 dst[i+6] = src1[i+6]-src2[i+6];
|
yading@10
|
1952 dst[i+7] = src1[i+7]-src2[i+7];
|
yading@10
|
1953 }
|
yading@10
|
1954 }else
|
yading@10
|
1955 #endif
|
yading@10
|
1956 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
|
yading@10
|
1957 long a = *(long*)(src1+i);
|
yading@10
|
1958 long b = *(long*)(src2+i);
|
yading@10
|
1959 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
|
yading@10
|
1960 }
|
yading@10
|
1961 for(; i<w; i++)
|
yading@10
|
1962 dst[i+0] = src1[i+0]-src2[i+0];
|
yading@10
|
1963 }
|
yading@10
|
1964
|
yading@10
|
1965 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
|
yading@10
|
1966 int i;
|
yading@10
|
1967 uint8_t l, lt;
|
yading@10
|
1968
|
yading@10
|
1969 l= *left;
|
yading@10
|
1970 lt= *left_top;
|
yading@10
|
1971
|
yading@10
|
1972 for(i=0; i<w; i++){
|
yading@10
|
1973 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
|
yading@10
|
1974 lt= src1[i];
|
yading@10
|
1975 dst[i]= l;
|
yading@10
|
1976 }
|
yading@10
|
1977
|
yading@10
|
1978 *left= l;
|
yading@10
|
1979 *left_top= lt;
|
yading@10
|
1980 }
|
yading@10
|
1981
|
yading@10
|
1982 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
|
yading@10
|
1983 int i;
|
yading@10
|
1984 uint8_t l, lt;
|
yading@10
|
1985
|
yading@10
|
1986 l= *left;
|
yading@10
|
1987 lt= *left_top;
|
yading@10
|
1988
|
yading@10
|
1989 for(i=0; i<w; i++){
|
yading@10
|
1990 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
|
yading@10
|
1991 lt= src1[i];
|
yading@10
|
1992 l= src2[i];
|
yading@10
|
1993 dst[i]= l - pred;
|
yading@10
|
1994 }
|
yading@10
|
1995
|
yading@10
|
1996 *left= l;
|
yading@10
|
1997 *left_top= lt;
|
yading@10
|
1998 }
|
yading@10
|
1999
|
yading@10
|
2000 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
|
yading@10
|
2001 int i;
|
yading@10
|
2002
|
yading@10
|
2003 for(i=0; i<w-1; i++){
|
yading@10
|
2004 acc+= src[i];
|
yading@10
|
2005 dst[i]= acc;
|
yading@10
|
2006 i++;
|
yading@10
|
2007 acc+= src[i];
|
yading@10
|
2008 dst[i]= acc;
|
yading@10
|
2009 }
|
yading@10
|
2010
|
yading@10
|
2011 for(; i<w; i++){
|
yading@10
|
2012 acc+= src[i];
|
yading@10
|
2013 dst[i]= acc;
|
yading@10
|
2014 }
|
yading@10
|
2015
|
yading@10
|
2016 return acc;
|
yading@10
|
2017 }
|
yading@10
|
2018
|
yading@10
|
2019 #if HAVE_BIGENDIAN
|
yading@10
|
2020 #define B 3
|
yading@10
|
2021 #define G 2
|
yading@10
|
2022 #define R 1
|
yading@10
|
2023 #define A 0
|
yading@10
|
2024 #else
|
yading@10
|
2025 #define B 0
|
yading@10
|
2026 #define G 1
|
yading@10
|
2027 #define R 2
|
yading@10
|
2028 #define A 3
|
yading@10
|
2029 #endif
|
yading@10
|
2030 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
|
yading@10
|
2031 int i;
|
yading@10
|
2032 int r,g,b,a;
|
yading@10
|
2033 r= *red;
|
yading@10
|
2034 g= *green;
|
yading@10
|
2035 b= *blue;
|
yading@10
|
2036 a= *alpha;
|
yading@10
|
2037
|
yading@10
|
2038 for(i=0; i<w; i++){
|
yading@10
|
2039 b+= src[4*i+B];
|
yading@10
|
2040 g+= src[4*i+G];
|
yading@10
|
2041 r+= src[4*i+R];
|
yading@10
|
2042 a+= src[4*i+A];
|
yading@10
|
2043
|
yading@10
|
2044 dst[4*i+B]= b;
|
yading@10
|
2045 dst[4*i+G]= g;
|
yading@10
|
2046 dst[4*i+R]= r;
|
yading@10
|
2047 dst[4*i+A]= a;
|
yading@10
|
2048 }
|
yading@10
|
2049
|
yading@10
|
2050 *red= r;
|
yading@10
|
2051 *green= g;
|
yading@10
|
2052 *blue= b;
|
yading@10
|
2053 *alpha= a;
|
yading@10
|
2054 }
|
yading@10
|
2055 #undef B
|
yading@10
|
2056 #undef G
|
yading@10
|
2057 #undef R
|
yading@10
|
2058 #undef A
|
yading@10
|
2059
|
yading@10
|
2060 #define BUTTERFLY2(o1,o2,i1,i2) \
|
yading@10
|
2061 o1= (i1)+(i2);\
|
yading@10
|
2062 o2= (i1)-(i2);
|
yading@10
|
2063
|
yading@10
|
2064 #define BUTTERFLY1(x,y) \
|
yading@10
|
2065 {\
|
yading@10
|
2066 int a,b;\
|
yading@10
|
2067 a= x;\
|
yading@10
|
2068 b= y;\
|
yading@10
|
2069 x= a+b;\
|
yading@10
|
2070 y= a-b;\
|
yading@10
|
2071 }
|
yading@10
|
2072
|
yading@10
|
2073 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
|
yading@10
|
2074
|
yading@10
|
2075 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
|
yading@10
|
2076 int i;
|
yading@10
|
2077 int temp[64];
|
yading@10
|
2078 int sum=0;
|
yading@10
|
2079
|
yading@10
|
2080 av_assert2(h==8);
|
yading@10
|
2081
|
yading@10
|
2082 for(i=0; i<8; i++){
|
yading@10
|
2083 //FIXME try pointer walks
|
yading@10
|
2084 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
|
yading@10
|
2085 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
|
yading@10
|
2086 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
|
yading@10
|
2087 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
|
yading@10
|
2088
|
yading@10
|
2089 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
|
yading@10
|
2090 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
|
yading@10
|
2091 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
|
yading@10
|
2092 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
|
yading@10
|
2093
|
yading@10
|
2094 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
|
yading@10
|
2095 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
|
yading@10
|
2096 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
|
yading@10
|
2097 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
|
yading@10
|
2098 }
|
yading@10
|
2099
|
yading@10
|
2100 for(i=0; i<8; i++){
|
yading@10
|
2101 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
|
yading@10
|
2102 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
|
yading@10
|
2103 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
|
yading@10
|
2104 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
|
yading@10
|
2105
|
yading@10
|
2106 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
|
yading@10
|
2107 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
|
yading@10
|
2108 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
|
yading@10
|
2109 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
|
yading@10
|
2110
|
yading@10
|
2111 sum +=
|
yading@10
|
2112 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
|
yading@10
|
2113 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
|
yading@10
|
2114 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
|
yading@10
|
2115 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
|
yading@10
|
2116 }
|
yading@10
|
2117 return sum;
|
yading@10
|
2118 }
|
yading@10
|
2119
|
yading@10
|
2120 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
|
yading@10
|
2121 int i;
|
yading@10
|
2122 int temp[64];
|
yading@10
|
2123 int sum=0;
|
yading@10
|
2124
|
yading@10
|
2125 av_assert2(h==8);
|
yading@10
|
2126
|
yading@10
|
2127 for(i=0; i<8; i++){
|
yading@10
|
2128 //FIXME try pointer walks
|
yading@10
|
2129 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
|
yading@10
|
2130 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
|
yading@10
|
2131 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
|
yading@10
|
2132 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
|
yading@10
|
2133
|
yading@10
|
2134 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
|
yading@10
|
2135 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
|
yading@10
|
2136 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
|
yading@10
|
2137 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
|
yading@10
|
2138
|
yading@10
|
2139 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
|
yading@10
|
2140 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
|
yading@10
|
2141 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
|
yading@10
|
2142 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
|
yading@10
|
2143 }
|
yading@10
|
2144
|
yading@10
|
2145 for(i=0; i<8; i++){
|
yading@10
|
2146 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
|
yading@10
|
2147 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
|
yading@10
|
2148 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
|
yading@10
|
2149 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
|
yading@10
|
2150
|
yading@10
|
2151 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
|
yading@10
|
2152 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
|
yading@10
|
2153 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
|
yading@10
|
2154 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
|
yading@10
|
2155
|
yading@10
|
2156 sum +=
|
yading@10
|
2157 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
|
yading@10
|
2158 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
|
yading@10
|
2159 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
|
yading@10
|
2160 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
|
yading@10
|
2161 }
|
yading@10
|
2162
|
yading@10
|
2163 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
|
yading@10
|
2164
|
yading@10
|
2165 return sum;
|
yading@10
|
2166 }
|
yading@10
|
2167
|
yading@10
|
2168 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
|
yading@10
|
2169 MpegEncContext * const s= (MpegEncContext *)c;
|
yading@10
|
2170 LOCAL_ALIGNED_16(int16_t, temp, [64]);
|
yading@10
|
2171
|
yading@10
|
2172 av_assert2(h==8);
|
yading@10
|
2173
|
yading@10
|
2174 s->dsp.diff_pixels(temp, src1, src2, stride);
|
yading@10
|
2175 s->dsp.fdct(temp);
|
yading@10
|
2176 return s->dsp.sum_abs_dctelem(temp);
|
yading@10
|
2177 }
|
yading@10
|
2178
|
yading@10
|
2179 #if CONFIG_GPL
|
yading@10
|
2180 #define DCT8_1D {\
|
yading@10
|
2181 const int s07 = SRC(0) + SRC(7);\
|
yading@10
|
2182 const int s16 = SRC(1) + SRC(6);\
|
yading@10
|
2183 const int s25 = SRC(2) + SRC(5);\
|
yading@10
|
2184 const int s34 = SRC(3) + SRC(4);\
|
yading@10
|
2185 const int a0 = s07 + s34;\
|
yading@10
|
2186 const int a1 = s16 + s25;\
|
yading@10
|
2187 const int a2 = s07 - s34;\
|
yading@10
|
2188 const int a3 = s16 - s25;\
|
yading@10
|
2189 const int d07 = SRC(0) - SRC(7);\
|
yading@10
|
2190 const int d16 = SRC(1) - SRC(6);\
|
yading@10
|
2191 const int d25 = SRC(2) - SRC(5);\
|
yading@10
|
2192 const int d34 = SRC(3) - SRC(4);\
|
yading@10
|
2193 const int a4 = d16 + d25 + (d07 + (d07>>1));\
|
yading@10
|
2194 const int a5 = d07 - d34 - (d25 + (d25>>1));\
|
yading@10
|
2195 const int a6 = d07 + d34 - (d16 + (d16>>1));\
|
yading@10
|
2196 const int a7 = d16 - d25 + (d34 + (d34>>1));\
|
yading@10
|
2197 DST(0, a0 + a1 ) ;\
|
yading@10
|
2198 DST(1, a4 + (a7>>2)) ;\
|
yading@10
|
2199 DST(2, a2 + (a3>>1)) ;\
|
yading@10
|
2200 DST(3, a5 + (a6>>2)) ;\
|
yading@10
|
2201 DST(4, a0 - a1 ) ;\
|
yading@10
|
2202 DST(5, a6 - (a5>>2)) ;\
|
yading@10
|
2203 DST(6, (a2>>1) - a3 ) ;\
|
yading@10
|
2204 DST(7, (a4>>2) - a7 ) ;\
|
yading@10
|
2205 }
|
yading@10
|
2206
|
yading@10
|
2207 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
|
yading@10
|
2208 MpegEncContext * const s= (MpegEncContext *)c;
|
yading@10
|
2209 int16_t dct[8][8];
|
yading@10
|
2210 int i;
|
yading@10
|
2211 int sum=0;
|
yading@10
|
2212
|
yading@10
|
2213 s->dsp.diff_pixels(dct[0], src1, src2, stride);
|
yading@10
|
2214
|
yading@10
|
2215 #define SRC(x) dct[i][x]
|
yading@10
|
2216 #define DST(x,v) dct[i][x]= v
|
yading@10
|
2217 for( i = 0; i < 8; i++ )
|
yading@10
|
2218 DCT8_1D
|
yading@10
|
2219 #undef SRC
|
yading@10
|
2220 #undef DST
|
yading@10
|
2221
|
yading@10
|
2222 #define SRC(x) dct[x][i]
|
yading@10
|
2223 #define DST(x,v) sum += FFABS(v)
|
yading@10
|
2224 for( i = 0; i < 8; i++ )
|
yading@10
|
2225 DCT8_1D
|
yading@10
|
2226 #undef SRC
|
yading@10
|
2227 #undef DST
|
yading@10
|
2228 return sum;
|
yading@10
|
2229 }
|
yading@10
|
2230 #endif
|
yading@10
|
2231
|
yading@10
|
2232 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
|
yading@10
|
2233 MpegEncContext * const s= (MpegEncContext *)c;
|
yading@10
|
2234 LOCAL_ALIGNED_16(int16_t, temp, [64]);
|
yading@10
|
2235 int sum=0, i;
|
yading@10
|
2236
|
yading@10
|
2237 av_assert2(h==8);
|
yading@10
|
2238
|
yading@10
|
2239 s->dsp.diff_pixels(temp, src1, src2, stride);
|
yading@10
|
2240 s->dsp.fdct(temp);
|
yading@10
|
2241
|
yading@10
|
2242 for(i=0; i<64; i++)
|
yading@10
|
2243 sum= FFMAX(sum, FFABS(temp[i]));
|
yading@10
|
2244
|
yading@10
|
2245 return sum;
|
yading@10
|
2246 }
|
yading@10
|
2247
|
yading@10
|
2248 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
|
yading@10
|
2249 MpegEncContext * const s= (MpegEncContext *)c;
|
yading@10
|
2250 LOCAL_ALIGNED_16(int16_t, temp, [64*2]);
|
yading@10
|
2251 int16_t * const bak = temp+64;
|
yading@10
|
2252 int sum=0, i;
|
yading@10
|
2253
|
yading@10
|
2254 av_assert2(h==8);
|
yading@10
|
2255 s->mb_intra=0;
|
yading@10
|
2256
|
yading@10
|
2257 s->dsp.diff_pixels(temp, src1, src2, stride);
|
yading@10
|
2258
|
yading@10
|
2259 memcpy(bak, temp, 64*sizeof(int16_t));
|
yading@10
|
2260
|
yading@10
|
2261 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
|
yading@10
|
2262 s->dct_unquantize_inter(s, temp, 0, s->qscale);
|
yading@10
|
2263 ff_simple_idct_8(temp); //FIXME
|
yading@10
|
2264
|
yading@10
|
2265 for(i=0; i<64; i++)
|
yading@10
|
2266 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
|
yading@10
|
2267
|
yading@10
|
2268 return sum;
|
yading@10
|
2269 }
|
yading@10
|
2270
|
yading@10
|
2271 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
|
yading@10
|
2272 MpegEncContext * const s= (MpegEncContext *)c;
|
yading@10
|
2273 const uint8_t *scantable= s->intra_scantable.permutated;
|
yading@10
|
2274 LOCAL_ALIGNED_16(int16_t, temp, [64]);
|
yading@10
|
2275 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
|
yading@10
|
2276 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
|
yading@10
|
2277 int i, last, run, bits, level, distortion, start_i;
|
yading@10
|
2278 const int esc_length= s->ac_esc_length;
|
yading@10
|
2279 uint8_t * length;
|
yading@10
|
2280 uint8_t * last_length;
|
yading@10
|
2281
|
yading@10
|
2282 av_assert2(h==8);
|
yading@10
|
2283
|
yading@10
|
2284 copy_block8(lsrc1, src1, 8, stride, 8);
|
yading@10
|
2285 copy_block8(lsrc2, src2, 8, stride, 8);
|
yading@10
|
2286
|
yading@10
|
2287 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
|
yading@10
|
2288
|
yading@10
|
2289 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
|
yading@10
|
2290
|
yading@10
|
2291 bits=0;
|
yading@10
|
2292
|
yading@10
|
2293 if (s->mb_intra) {
|
yading@10
|
2294 start_i = 1;
|
yading@10
|
2295 length = s->intra_ac_vlc_length;
|
yading@10
|
2296 last_length= s->intra_ac_vlc_last_length;
|
yading@10
|
2297 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
|
yading@10
|
2298 } else {
|
yading@10
|
2299 start_i = 0;
|
yading@10
|
2300 length = s->inter_ac_vlc_length;
|
yading@10
|
2301 last_length= s->inter_ac_vlc_last_length;
|
yading@10
|
2302 }
|
yading@10
|
2303
|
yading@10
|
2304 if(last>=start_i){
|
yading@10
|
2305 run=0;
|
yading@10
|
2306 for(i=start_i; i<last; i++){
|
yading@10
|
2307 int j= scantable[i];
|
yading@10
|
2308 level= temp[j];
|
yading@10
|
2309
|
yading@10
|
2310 if(level){
|
yading@10
|
2311 level+=64;
|
yading@10
|
2312 if((level&(~127)) == 0){
|
yading@10
|
2313 bits+= length[UNI_AC_ENC_INDEX(run, level)];
|
yading@10
|
2314 }else
|
yading@10
|
2315 bits+= esc_length;
|
yading@10
|
2316 run=0;
|
yading@10
|
2317 }else
|
yading@10
|
2318 run++;
|
yading@10
|
2319 }
|
yading@10
|
2320 i= scantable[last];
|
yading@10
|
2321
|
yading@10
|
2322 level= temp[i] + 64;
|
yading@10
|
2323
|
yading@10
|
2324 av_assert2(level - 64);
|
yading@10
|
2325
|
yading@10
|
2326 if((level&(~127)) == 0){
|
yading@10
|
2327 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
|
yading@10
|
2328 }else
|
yading@10
|
2329 bits+= esc_length;
|
yading@10
|
2330
|
yading@10
|
2331 }
|
yading@10
|
2332
|
yading@10
|
2333 if(last>=0){
|
yading@10
|
2334 if(s->mb_intra)
|
yading@10
|
2335 s->dct_unquantize_intra(s, temp, 0, s->qscale);
|
yading@10
|
2336 else
|
yading@10
|
2337 s->dct_unquantize_inter(s, temp, 0, s->qscale);
|
yading@10
|
2338 }
|
yading@10
|
2339
|
yading@10
|
2340 s->dsp.idct_add(lsrc2, 8, temp);
|
yading@10
|
2341
|
yading@10
|
2342 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
|
yading@10
|
2343
|
yading@10
|
2344 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
|
yading@10
|
2345 }
|
yading@10
|
2346
|
yading@10
|
2347 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
|
yading@10
|
2348 MpegEncContext * const s= (MpegEncContext *)c;
|
yading@10
|
2349 const uint8_t *scantable= s->intra_scantable.permutated;
|
yading@10
|
2350 LOCAL_ALIGNED_16(int16_t, temp, [64]);
|
yading@10
|
2351 int i, last, run, bits, level, start_i;
|
yading@10
|
2352 const int esc_length= s->ac_esc_length;
|
yading@10
|
2353 uint8_t * length;
|
yading@10
|
2354 uint8_t * last_length;
|
yading@10
|
2355
|
yading@10
|
2356 av_assert2(h==8);
|
yading@10
|
2357
|
yading@10
|
2358 s->dsp.diff_pixels(temp, src1, src2, stride);
|
yading@10
|
2359
|
yading@10
|
2360 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
|
yading@10
|
2361
|
yading@10
|
2362 bits=0;
|
yading@10
|
2363
|
yading@10
|
2364 if (s->mb_intra) {
|
yading@10
|
2365 start_i = 1;
|
yading@10
|
2366 length = s->intra_ac_vlc_length;
|
yading@10
|
2367 last_length= s->intra_ac_vlc_last_length;
|
yading@10
|
2368 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
|
yading@10
|
2369 } else {
|
yading@10
|
2370 start_i = 0;
|
yading@10
|
2371 length = s->inter_ac_vlc_length;
|
yading@10
|
2372 last_length= s->inter_ac_vlc_last_length;
|
yading@10
|
2373 }
|
yading@10
|
2374
|
yading@10
|
2375 if(last>=start_i){
|
yading@10
|
2376 run=0;
|
yading@10
|
2377 for(i=start_i; i<last; i++){
|
yading@10
|
2378 int j= scantable[i];
|
yading@10
|
2379 level= temp[j];
|
yading@10
|
2380
|
yading@10
|
2381 if(level){
|
yading@10
|
2382 level+=64;
|
yading@10
|
2383 if((level&(~127)) == 0){
|
yading@10
|
2384 bits+= length[UNI_AC_ENC_INDEX(run, level)];
|
yading@10
|
2385 }else
|
yading@10
|
2386 bits+= esc_length;
|
yading@10
|
2387 run=0;
|
yading@10
|
2388 }else
|
yading@10
|
2389 run++;
|
yading@10
|
2390 }
|
yading@10
|
2391 i= scantable[last];
|
yading@10
|
2392
|
yading@10
|
2393 level= temp[i] + 64;
|
yading@10
|
2394
|
yading@10
|
2395 av_assert2(level - 64);
|
yading@10
|
2396
|
yading@10
|
2397 if((level&(~127)) == 0){
|
yading@10
|
2398 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
|
yading@10
|
2399 }else
|
yading@10
|
2400 bits+= esc_length;
|
yading@10
|
2401 }
|
yading@10
|
2402
|
yading@10
|
2403 return bits;
|
yading@10
|
2404 }
|
yading@10
|
2405
|
yading@10
|
2406 #define VSAD_INTRA(size) \
|
yading@10
|
2407 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
|
yading@10
|
2408 int score=0; \
|
yading@10
|
2409 int x,y; \
|
yading@10
|
2410 \
|
yading@10
|
2411 for(y=1; y<h; y++){ \
|
yading@10
|
2412 for(x=0; x<size; x+=4){ \
|
yading@10
|
2413 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
|
yading@10
|
2414 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
|
yading@10
|
2415 } \
|
yading@10
|
2416 s+= stride; \
|
yading@10
|
2417 } \
|
yading@10
|
2418 \
|
yading@10
|
2419 return score; \
|
yading@10
|
2420 }
|
yading@10
|
2421 VSAD_INTRA(8)
|
yading@10
|
2422 VSAD_INTRA(16)
|
yading@10
|
2423
|
yading@10
|
2424 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
|
yading@10
|
2425 int score=0;
|
yading@10
|
2426 int x,y;
|
yading@10
|
2427
|
yading@10
|
2428 for(y=1; y<h; y++){
|
yading@10
|
2429 for(x=0; x<16; x++){
|
yading@10
|
2430 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
|
yading@10
|
2431 }
|
yading@10
|
2432 s1+= stride;
|
yading@10
|
2433 s2+= stride;
|
yading@10
|
2434 }
|
yading@10
|
2435
|
yading@10
|
2436 return score;
|
yading@10
|
2437 }
|
yading@10
|
2438
|
yading@10
|
2439 #define SQ(a) ((a)*(a))
|
yading@10
|
2440 #define VSSE_INTRA(size) \
|
yading@10
|
2441 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
|
yading@10
|
2442 int score=0; \
|
yading@10
|
2443 int x,y; \
|
yading@10
|
2444 \
|
yading@10
|
2445 for(y=1; y<h; y++){ \
|
yading@10
|
2446 for(x=0; x<size; x+=4){ \
|
yading@10
|
2447 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
|
yading@10
|
2448 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
|
yading@10
|
2449 } \
|
yading@10
|
2450 s+= stride; \
|
yading@10
|
2451 } \
|
yading@10
|
2452 \
|
yading@10
|
2453 return score; \
|
yading@10
|
2454 }
|
yading@10
|
2455 VSSE_INTRA(8)
|
yading@10
|
2456 VSSE_INTRA(16)
|
yading@10
|
2457
|
yading@10
|
2458 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
|
yading@10
|
2459 int score=0;
|
yading@10
|
2460 int x,y;
|
yading@10
|
2461
|
yading@10
|
2462 for(y=1; y<h; y++){
|
yading@10
|
2463 for(x=0; x<16; x++){
|
yading@10
|
2464 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
|
yading@10
|
2465 }
|
yading@10
|
2466 s1+= stride;
|
yading@10
|
2467 s2+= stride;
|
yading@10
|
2468 }
|
yading@10
|
2469
|
yading@10
|
2470 return score;
|
yading@10
|
2471 }
|
yading@10
|
2472
|
yading@10
|
2473 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
|
yading@10
|
2474 int size){
|
yading@10
|
2475 int score=0;
|
yading@10
|
2476 int i;
|
yading@10
|
2477 for(i=0; i<size; i++)
|
yading@10
|
2478 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
|
yading@10
|
2479 return score;
|
yading@10
|
2480 }
|
yading@10
|
2481
|
yading@10
|
2482 #define WRAPPER8_16_SQ(name8, name16)\
|
yading@10
|
2483 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
|
yading@10
|
2484 int score=0;\
|
yading@10
|
2485 score +=name8(s, dst , src , stride, 8);\
|
yading@10
|
2486 score +=name8(s, dst+8 , src+8 , stride, 8);\
|
yading@10
|
2487 if(h==16){\
|
yading@10
|
2488 dst += 8*stride;\
|
yading@10
|
2489 src += 8*stride;\
|
yading@10
|
2490 score +=name8(s, dst , src , stride, 8);\
|
yading@10
|
2491 score +=name8(s, dst+8 , src+8 , stride, 8);\
|
yading@10
|
2492 }\
|
yading@10
|
2493 return score;\
|
yading@10
|
2494 }
|
yading@10
|
2495
|
yading@10
|
2496 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
|
yading@10
|
2497 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
|
yading@10
|
2498 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
|
yading@10
|
2499 #if CONFIG_GPL
|
yading@10
|
2500 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
|
yading@10
|
2501 #endif
|
yading@10
|
2502 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
|
yading@10
|
2503 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
|
yading@10
|
2504 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
|
yading@10
|
2505 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
|
yading@10
|
2506
|
yading@10
|
2507 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
|
yading@10
|
2508 uint32_t maxi, uint32_t maxisign)
|
yading@10
|
2509 {
|
yading@10
|
2510
|
yading@10
|
2511 if(a > mini) return mini;
|
yading@10
|
2512 else if((a^(1U<<31)) > maxisign) return maxi;
|
yading@10
|
2513 else return a;
|
yading@10
|
2514 }
|
yading@10
|
2515
|
yading@10
|
2516 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
|
yading@10
|
2517 int i;
|
yading@10
|
2518 uint32_t mini = *(uint32_t*)min;
|
yading@10
|
2519 uint32_t maxi = *(uint32_t*)max;
|
yading@10
|
2520 uint32_t maxisign = maxi ^ (1U<<31);
|
yading@10
|
2521 uint32_t *dsti = (uint32_t*)dst;
|
yading@10
|
2522 const uint32_t *srci = (const uint32_t*)src;
|
yading@10
|
2523 for(i=0; i<len; i+=8) {
|
yading@10
|
2524 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
|
yading@10
|
2525 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
|
yading@10
|
2526 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
|
yading@10
|
2527 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
|
yading@10
|
2528 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
|
yading@10
|
2529 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
|
yading@10
|
2530 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
|
yading@10
|
2531 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
|
yading@10
|
2532 }
|
yading@10
|
2533 }
|
yading@10
|
2534 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
|
yading@10
|
2535 int i;
|
yading@10
|
2536 if(min < 0 && max > 0) {
|
yading@10
|
2537 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
|
yading@10
|
2538 } else {
|
yading@10
|
2539 for(i=0; i < len; i+=8) {
|
yading@10
|
2540 dst[i ] = av_clipf(src[i ], min, max);
|
yading@10
|
2541 dst[i + 1] = av_clipf(src[i + 1], min, max);
|
yading@10
|
2542 dst[i + 2] = av_clipf(src[i + 2], min, max);
|
yading@10
|
2543 dst[i + 3] = av_clipf(src[i + 3], min, max);
|
yading@10
|
2544 dst[i + 4] = av_clipf(src[i + 4], min, max);
|
yading@10
|
2545 dst[i + 5] = av_clipf(src[i + 5], min, max);
|
yading@10
|
2546 dst[i + 6] = av_clipf(src[i + 6], min, max);
|
yading@10
|
2547 dst[i + 7] = av_clipf(src[i + 7], min, max);
|
yading@10
|
2548 }
|
yading@10
|
2549 }
|
yading@10
|
2550 }
|
yading@10
|
2551
|
yading@10
|
2552 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
|
yading@10
|
2553 {
|
yading@10
|
2554 int res = 0;
|
yading@10
|
2555
|
yading@10
|
2556 while (order--)
|
yading@10
|
2557 res += *v1++ * *v2++;
|
yading@10
|
2558
|
yading@10
|
2559 return res;
|
yading@10
|
2560 }
|
yading@10
|
2561
|
yading@10
|
2562 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
|
yading@10
|
2563 {
|
yading@10
|
2564 int res = 0;
|
yading@10
|
2565 while (order--) {
|
yading@10
|
2566 res += *v1 * *v2++;
|
yading@10
|
2567 *v1++ += mul * *v3++;
|
yading@10
|
2568 }
|
yading@10
|
2569 return res;
|
yading@10
|
2570 }
|
yading@10
|
2571
|
yading@10
|
2572 static void apply_window_int16_c(int16_t *output, const int16_t *input,
|
yading@10
|
2573 const int16_t *window, unsigned int len)
|
yading@10
|
2574 {
|
yading@10
|
2575 int i;
|
yading@10
|
2576 int len2 = len >> 1;
|
yading@10
|
2577
|
yading@10
|
2578 for (i = 0; i < len2; i++) {
|
yading@10
|
2579 int16_t w = window[i];
|
yading@10
|
2580 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
|
yading@10
|
2581 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
|
yading@10
|
2582 }
|
yading@10
|
2583 }
|
yading@10
|
2584
|
yading@10
|
2585 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
|
yading@10
|
2586 int32_t max, unsigned int len)
|
yading@10
|
2587 {
|
yading@10
|
2588 do {
|
yading@10
|
2589 *dst++ = av_clip(*src++, min, max);
|
yading@10
|
2590 *dst++ = av_clip(*src++, min, max);
|
yading@10
|
2591 *dst++ = av_clip(*src++, min, max);
|
yading@10
|
2592 *dst++ = av_clip(*src++, min, max);
|
yading@10
|
2593 *dst++ = av_clip(*src++, min, max);
|
yading@10
|
2594 *dst++ = av_clip(*src++, min, max);
|
yading@10
|
2595 *dst++ = av_clip(*src++, min, max);
|
yading@10
|
2596 *dst++ = av_clip(*src++, min, max);
|
yading@10
|
2597 len -= 8;
|
yading@10
|
2598 } while (len > 0);
|
yading@10
|
2599 }
|
yading@10
|
2600
|
yading@10
|
2601 static void ff_jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
|
yading@10
|
2602 {
|
yading@10
|
2603 ff_j_rev_dct (block);
|
yading@10
|
2604 put_pixels_clamped_c(block, dest, line_size);
|
yading@10
|
2605 }
|
yading@10
|
2606 static void ff_jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
|
yading@10
|
2607 {
|
yading@10
|
2608 ff_j_rev_dct (block);
|
yading@10
|
2609 add_pixels_clamped_c(block, dest, line_size);
|
yading@10
|
2610 }
|
yading@10
|
2611
|
yading@10
|
2612 static void ff_jref_idct4_put(uint8_t *dest, int line_size, int16_t *block)
|
yading@10
|
2613 {
|
yading@10
|
2614 ff_j_rev_dct4 (block);
|
yading@10
|
2615 put_pixels_clamped4_c(block, dest, line_size);
|
yading@10
|
2616 }
|
yading@10
|
2617 static void ff_jref_idct4_add(uint8_t *dest, int line_size, int16_t *block)
|
yading@10
|
2618 {
|
yading@10
|
2619 ff_j_rev_dct4 (block);
|
yading@10
|
2620 add_pixels_clamped4_c(block, dest, line_size);
|
yading@10
|
2621 }
|
yading@10
|
2622
|
yading@10
|
2623 static void ff_jref_idct2_put(uint8_t *dest, int line_size, int16_t *block)
|
yading@10
|
2624 {
|
yading@10
|
2625 ff_j_rev_dct2 (block);
|
yading@10
|
2626 put_pixels_clamped2_c(block, dest, line_size);
|
yading@10
|
2627 }
|
yading@10
|
2628 static void ff_jref_idct2_add(uint8_t *dest, int line_size, int16_t *block)
|
yading@10
|
2629 {
|
yading@10
|
2630 ff_j_rev_dct2 (block);
|
yading@10
|
2631 add_pixels_clamped2_c(block, dest, line_size);
|
yading@10
|
2632 }
|
yading@10
|
2633
|
yading@10
|
2634 static void ff_jref_idct1_put(uint8_t *dest, int line_size, int16_t *block)
|
yading@10
|
2635 {
|
yading@10
|
2636 dest[0] = av_clip_uint8((block[0] + 4)>>3);
|
yading@10
|
2637 }
|
yading@10
|
2638 static void ff_jref_idct1_add(uint8_t *dest, int line_size, int16_t *block)
|
yading@10
|
2639 {
|
yading@10
|
2640 dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
|
yading@10
|
2641 }
|
yading@10
|
2642
|
yading@10
|
2643 /* init static data */
|
yading@10
|
2644 av_cold void ff_dsputil_static_init(void)
|
yading@10
|
2645 {
|
yading@10
|
2646 int i;
|
yading@10
|
2647
|
yading@10
|
2648 for(i=0;i<512;i++) {
|
yading@10
|
2649 ff_squareTbl[i] = (i - 256) * (i - 256);
|
yading@10
|
2650 }
|
yading@10
|
2651
|
yading@10
|
2652 for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
|
yading@10
|
2653 }
|
yading@10
|
2654
|
yading@10
|
2655 int ff_check_alignment(void){
|
yading@10
|
2656 static int did_fail=0;
|
yading@10
|
2657 LOCAL_ALIGNED_16(int, aligned, [4]);
|
yading@10
|
2658
|
yading@10
|
2659 if((intptr_t)aligned & 15){
|
yading@10
|
2660 if(!did_fail){
|
yading@10
|
2661 #if HAVE_MMX || HAVE_ALTIVEC
|
yading@10
|
2662 av_log(NULL, AV_LOG_ERROR,
|
yading@10
|
2663 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
|
yading@10
|
2664 "and may be very slow or crash. This is not a bug in libavcodec,\n"
|
yading@10
|
2665 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
|
yading@10
|
2666 "Do not report crashes to FFmpeg developers.\n");
|
yading@10
|
2667 #endif
|
yading@10
|
2668 did_fail=1;
|
yading@10
|
2669 }
|
yading@10
|
2670 return -1;
|
yading@10
|
2671 }
|
yading@10
|
2672 return 0;
|
yading@10
|
2673 }
|
yading@10
|
2674
|
yading@10
|
2675 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
yading@10
|
2676 {
|
yading@10
|
2677 ff_check_alignment();
|
yading@10
|
2678
|
yading@10
|
2679 #if CONFIG_ENCODERS
|
yading@10
|
2680 if (avctx->bits_per_raw_sample == 10) {
|
yading@10
|
2681 c->fdct = ff_jpeg_fdct_islow_10;
|
yading@10
|
2682 c->fdct248 = ff_fdct248_islow_10;
|
yading@10
|
2683 } else {
|
yading@10
|
2684 if(avctx->dct_algo==FF_DCT_FASTINT) {
|
yading@10
|
2685 c->fdct = ff_fdct_ifast;
|
yading@10
|
2686 c->fdct248 = ff_fdct_ifast248;
|
yading@10
|
2687 }
|
yading@10
|
2688 else if(avctx->dct_algo==FF_DCT_FAAN) {
|
yading@10
|
2689 c->fdct = ff_faandct;
|
yading@10
|
2690 c->fdct248 = ff_faandct248;
|
yading@10
|
2691 }
|
yading@10
|
2692 else {
|
yading@10
|
2693 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
|
yading@10
|
2694 c->fdct248 = ff_fdct248_islow_8;
|
yading@10
|
2695 }
|
yading@10
|
2696 }
|
yading@10
|
2697 #endif //CONFIG_ENCODERS
|
yading@10
|
2698
|
yading@10
|
2699 if(avctx->lowres==1){
|
yading@10
|
2700 c->idct_put= ff_jref_idct4_put;
|
yading@10
|
2701 c->idct_add= ff_jref_idct4_add;
|
yading@10
|
2702 c->idct = ff_j_rev_dct4;
|
yading@10
|
2703 c->idct_permutation_type= FF_NO_IDCT_PERM;
|
yading@10
|
2704 }else if(avctx->lowres==2){
|
yading@10
|
2705 c->idct_put= ff_jref_idct2_put;
|
yading@10
|
2706 c->idct_add= ff_jref_idct2_add;
|
yading@10
|
2707 c->idct = ff_j_rev_dct2;
|
yading@10
|
2708 c->idct_permutation_type= FF_NO_IDCT_PERM;
|
yading@10
|
2709 }else if(avctx->lowres==3){
|
yading@10
|
2710 c->idct_put= ff_jref_idct1_put;
|
yading@10
|
2711 c->idct_add= ff_jref_idct1_add;
|
yading@10
|
2712 c->idct = ff_j_rev_dct1;
|
yading@10
|
2713 c->idct_permutation_type= FF_NO_IDCT_PERM;
|
yading@10
|
2714 }else{
|
yading@10
|
2715 if (avctx->bits_per_raw_sample == 10) {
|
yading@10
|
2716 c->idct_put = ff_simple_idct_put_10;
|
yading@10
|
2717 c->idct_add = ff_simple_idct_add_10;
|
yading@10
|
2718 c->idct = ff_simple_idct_10;
|
yading@10
|
2719 c->idct_permutation_type = FF_NO_IDCT_PERM;
|
yading@10
|
2720 } else {
|
yading@10
|
2721 if(avctx->idct_algo==FF_IDCT_INT){
|
yading@10
|
2722 c->idct_put= ff_jref_idct_put;
|
yading@10
|
2723 c->idct_add= ff_jref_idct_add;
|
yading@10
|
2724 c->idct = ff_j_rev_dct;
|
yading@10
|
2725 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
|
yading@10
|
2726 }else if(avctx->idct_algo==FF_IDCT_FAAN){
|
yading@10
|
2727 c->idct_put= ff_faanidct_put;
|
yading@10
|
2728 c->idct_add= ff_faanidct_add;
|
yading@10
|
2729 c->idct = ff_faanidct;
|
yading@10
|
2730 c->idct_permutation_type= FF_NO_IDCT_PERM;
|
yading@10
|
2731 }else{ //accurate/default
|
yading@10
|
2732 c->idct_put = ff_simple_idct_put_8;
|
yading@10
|
2733 c->idct_add = ff_simple_idct_add_8;
|
yading@10
|
2734 c->idct = ff_simple_idct_8;
|
yading@10
|
2735 c->idct_permutation_type= FF_NO_IDCT_PERM;
|
yading@10
|
2736 }
|
yading@10
|
2737 }
|
yading@10
|
2738 }
|
yading@10
|
2739
|
yading@10
|
2740 c->diff_pixels = diff_pixels_c;
|
yading@10
|
2741 c->put_pixels_clamped = put_pixels_clamped_c;
|
yading@10
|
2742 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
|
yading@10
|
2743 c->add_pixels_clamped = add_pixels_clamped_c;
|
yading@10
|
2744 c->sum_abs_dctelem = sum_abs_dctelem_c;
|
yading@10
|
2745 c->gmc1 = gmc1_c;
|
yading@10
|
2746 c->gmc = ff_gmc_c;
|
yading@10
|
2747 c->pix_sum = pix_sum_c;
|
yading@10
|
2748 c->pix_norm1 = pix_norm1_c;
|
yading@10
|
2749
|
yading@10
|
2750 c->fill_block_tab[0] = fill_block16_c;
|
yading@10
|
2751 c->fill_block_tab[1] = fill_block8_c;
|
yading@10
|
2752
|
yading@10
|
2753 /* TODO [0] 16 [1] 8 */
|
yading@10
|
2754 c->pix_abs[0][0] = pix_abs16_c;
|
yading@10
|
2755 c->pix_abs[0][1] = pix_abs16_x2_c;
|
yading@10
|
2756 c->pix_abs[0][2] = pix_abs16_y2_c;
|
yading@10
|
2757 c->pix_abs[0][3] = pix_abs16_xy2_c;
|
yading@10
|
2758 c->pix_abs[1][0] = pix_abs8_c;
|
yading@10
|
2759 c->pix_abs[1][1] = pix_abs8_x2_c;
|
yading@10
|
2760 c->pix_abs[1][2] = pix_abs8_y2_c;
|
yading@10
|
2761 c->pix_abs[1][3] = pix_abs8_xy2_c;
|
yading@10
|
2762
|
yading@10
|
2763 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
|
yading@10
|
2764 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
|
yading@10
|
2765 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
|
yading@10
|
2766 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
|
yading@10
|
2767 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
|
yading@10
|
2768 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
|
yading@10
|
2769 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
|
yading@10
|
2770 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
|
yading@10
|
2771 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
|
yading@10
|
2772
|
yading@10
|
2773 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
|
yading@10
|
2774 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
|
yading@10
|
2775 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
|
yading@10
|
2776 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
|
yading@10
|
2777 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
|
yading@10
|
2778 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
|
yading@10
|
2779 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
|
yading@10
|
2780 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
|
yading@10
|
2781 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
|
yading@10
|
2782
|
yading@10
|
2783 #define dspfunc(PFX, IDX, NUM) \
|
yading@10
|
2784 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
|
yading@10
|
2785 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
|
yading@10
|
2786 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
|
yading@10
|
2787 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
|
yading@10
|
2788 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
|
yading@10
|
2789 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
|
yading@10
|
2790 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
|
yading@10
|
2791 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
|
yading@10
|
2792 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
|
yading@10
|
2793 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
|
yading@10
|
2794 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
|
yading@10
|
2795 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
|
yading@10
|
2796 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
|
yading@10
|
2797 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
|
yading@10
|
2798 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
|
yading@10
|
2799 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
|
yading@10
|
2800
|
yading@10
|
2801 dspfunc(put_qpel, 0, 16);
|
yading@10
|
2802 dspfunc(put_no_rnd_qpel, 0, 16);
|
yading@10
|
2803
|
yading@10
|
2804 dspfunc(avg_qpel, 0, 16);
|
yading@10
|
2805 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
|
yading@10
|
2806
|
yading@10
|
2807 dspfunc(put_qpel, 1, 8);
|
yading@10
|
2808 dspfunc(put_no_rnd_qpel, 1, 8);
|
yading@10
|
2809
|
yading@10
|
2810 dspfunc(avg_qpel, 1, 8);
|
yading@10
|
2811 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
|
yading@10
|
2812
|
yading@10
|
2813 #undef dspfunc
|
yading@10
|
2814
|
yading@10
|
2815 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
|
yading@10
|
2816 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
|
yading@10
|
2817 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
|
yading@10
|
2818 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
|
yading@10
|
2819 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
|
yading@10
|
2820 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
|
yading@10
|
2821 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
|
yading@10
|
2822 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
|
yading@10
|
2823
|
yading@10
|
2824 #define SET_CMP_FUNC(name) \
|
yading@10
|
2825 c->name[0]= name ## 16_c;\
|
yading@10
|
2826 c->name[1]= name ## 8x8_c;
|
yading@10
|
2827
|
yading@10
|
2828 SET_CMP_FUNC(hadamard8_diff)
|
yading@10
|
2829 c->hadamard8_diff[4]= hadamard8_intra16_c;
|
yading@10
|
2830 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
|
yading@10
|
2831 SET_CMP_FUNC(dct_sad)
|
yading@10
|
2832 SET_CMP_FUNC(dct_max)
|
yading@10
|
2833 #if CONFIG_GPL
|
yading@10
|
2834 SET_CMP_FUNC(dct264_sad)
|
yading@10
|
2835 #endif
|
yading@10
|
2836 c->sad[0]= pix_abs16_c;
|
yading@10
|
2837 c->sad[1]= pix_abs8_c;
|
yading@10
|
2838 c->sse[0]= sse16_c;
|
yading@10
|
2839 c->sse[1]= sse8_c;
|
yading@10
|
2840 c->sse[2]= sse4_c;
|
yading@10
|
2841 SET_CMP_FUNC(quant_psnr)
|
yading@10
|
2842 SET_CMP_FUNC(rd)
|
yading@10
|
2843 SET_CMP_FUNC(bit)
|
yading@10
|
2844 c->vsad[0]= vsad16_c;
|
yading@10
|
2845 c->vsad[4]= vsad_intra16_c;
|
yading@10
|
2846 c->vsad[5]= vsad_intra8_c;
|
yading@10
|
2847 c->vsse[0]= vsse16_c;
|
yading@10
|
2848 c->vsse[4]= vsse_intra16_c;
|
yading@10
|
2849 c->vsse[5]= vsse_intra8_c;
|
yading@10
|
2850 c->nsse[0]= nsse16_c;
|
yading@10
|
2851 c->nsse[1]= nsse8_c;
|
yading@10
|
2852 #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
|
yading@10
|
2853 ff_dsputil_init_dwt(c);
|
yading@10
|
2854 #endif
|
yading@10
|
2855
|
yading@10
|
2856 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
|
yading@10
|
2857
|
yading@10
|
2858 c->add_bytes= add_bytes_c;
|
yading@10
|
2859 c->diff_bytes= diff_bytes_c;
|
yading@10
|
2860 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
|
yading@10
|
2861 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
|
yading@10
|
2862 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
|
yading@10
|
2863 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
|
yading@10
|
2864 c->bswap_buf= bswap_buf;
|
yading@10
|
2865 c->bswap16_buf = bswap16_buf;
|
yading@10
|
2866
|
yading@10
|
2867 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
|
yading@10
|
2868 c->h263_h_loop_filter= h263_h_loop_filter_c;
|
yading@10
|
2869 c->h263_v_loop_filter= h263_v_loop_filter_c;
|
yading@10
|
2870 }
|
yading@10
|
2871
|
yading@10
|
2872 c->try_8x8basis= try_8x8basis_c;
|
yading@10
|
2873 c->add_8x8basis= add_8x8basis_c;
|
yading@10
|
2874
|
yading@10
|
2875 c->vector_clipf = vector_clipf_c;
|
yading@10
|
2876 c->scalarproduct_int16 = scalarproduct_int16_c;
|
yading@10
|
2877 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
|
yading@10
|
2878 c->apply_window_int16 = apply_window_int16_c;
|
yading@10
|
2879 c->vector_clip_int32 = vector_clip_int32_c;
|
yading@10
|
2880
|
yading@10
|
2881 c->shrink[0]= av_image_copy_plane;
|
yading@10
|
2882 c->shrink[1]= ff_shrink22;
|
yading@10
|
2883 c->shrink[2]= ff_shrink44;
|
yading@10
|
2884 c->shrink[3]= ff_shrink88;
|
yading@10
|
2885
|
yading@10
|
2886 c->add_pixels8 = add_pixels8_c;
|
yading@10
|
2887
|
yading@10
|
2888 #undef FUNC
|
yading@10
|
2889 #undef FUNCC
|
yading@10
|
2890 #define FUNC(f, depth) f ## _ ## depth
|
yading@10
|
2891 #define FUNCC(f, depth) f ## _ ## depth ## _c
|
yading@10
|
2892
|
yading@10
|
2893 c->draw_edges = FUNCC(draw_edges, 8);
|
yading@10
|
2894 c->clear_block = FUNCC(clear_block, 8);
|
yading@10
|
2895 c->clear_blocks = FUNCC(clear_blocks, 8);
|
yading@10
|
2896
|
yading@10
|
2897 #define BIT_DEPTH_FUNCS(depth) \
|
yading@10
|
2898 c->get_pixels = FUNCC(get_pixels, depth);
|
yading@10
|
2899
|
yading@10
|
2900 switch (avctx->bits_per_raw_sample) {
|
yading@10
|
2901 case 9:
|
yading@10
|
2902 case 10:
|
yading@10
|
2903 case 12:
|
yading@10
|
2904 case 14:
|
yading@10
|
2905 BIT_DEPTH_FUNCS(16);
|
yading@10
|
2906 break;
|
yading@10
|
2907 default:
|
yading@10
|
2908 if(avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
|
yading@10
|
2909 BIT_DEPTH_FUNCS(8);
|
yading@10
|
2910 }
|
yading@10
|
2911 break;
|
yading@10
|
2912 }
|
yading@10
|
2913
|
yading@10
|
2914
|
yading@10
|
2915 if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
|
yading@10
|
2916 if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
|
yading@10
|
2917 if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
|
yading@10
|
2918 if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
|
yading@10
|
2919 if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
|
yading@10
|
2920 if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
|
yading@10
|
2921 if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
|
yading@10
|
2922
|
yading@10
|
2923 ff_init_scantable_permutation(c->idct_permutation,
|
yading@10
|
2924 c->idct_permutation_type);
|
yading@10
|
2925 }
|
yading@10
|
2926
|
yading@10
|
2927 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
yading@10
|
2928 {
|
yading@10
|
2929 ff_dsputil_init(c, avctx);
|
yading@10
|
2930 }
|
yading@10
|
2931
|
yading@10
|
2932 av_cold void avpriv_dsputil_init(DSPContext *c, AVCodecContext *avctx)
|
yading@10
|
2933 {
|
yading@10
|
2934 ff_dsputil_init(c, avctx);
|
yading@10
|
2935 }
|