33 #define VEC_1D_DCT(vb0,vb1,vb2,vb3,va0,va1,va2,va3) \ 35 vz0 = vec_add(vb0,vb2); \ 36 vz1 = vec_sub(vb0,vb2); \ 37 vz2 = vec_sra(vb1,vec_splat_u16(1)); \ 38 vz2 = vec_sub(vz2,vb3); \ 39 vz3 = vec_sra(vb3,vec_splat_u16(1)); \ 40 vz3 = vec_add(vb1,vz3); \ 42 va0 = vec_add(vz0,vz3); \ 43 va1 = vec_add(vz1,vz2); \ 44 va2 = vec_sub(vz1,vz2); \ 45 va3 = vec_sub(vz0,vz3) 47 #define VEC_TRANSPOSE_4(a0,a1,a2,a3,b0,b1,b2,b3) \ 48 b0 = vec_mergeh( a0, a0 ); \ 49 b1 = vec_mergeh( a1, a0 ); \ 50 b2 = vec_mergeh( a2, a0 ); \ 51 b3 = vec_mergeh( a3, a0 ); \ 52 a0 = vec_mergeh( b0, b2 ); \ 53 a1 = vec_mergel( b0, b2 ); \ 54 a2 = vec_mergeh( b1, b3 ); \ 55 a3 = vec_mergel( b1, b3 ); \ 56 b0 = vec_mergeh( a0, a2 ); \ 57 b1 = vec_mergel( a0, a2 ); \ 58 b2 = vec_mergeh( a1, a3 ); \ 59 b3 = vec_mergel( a1, a3 ) 61 #define VEC_LOAD_U8_ADD_S16_STORE_U8(va) \ 62 vdst_orig = vec_ld(0, dst); \ 63 vdst = vec_perm(vdst_orig, zero_u8v, vdst_mask); \ 64 vdst_ss = (vec_s16) vec_mergeh(zero_u8v, vdst); \ 65 va = vec_add(va, vdst_ss); \ 66 va_u8 = vec_packsu(va, zero_s16v); \ 67 va_u32 = vec_splat((vec_u32)va_u8, 0); \ 68 vec_ste(va_u32, element, (uint32_t*)dst); 74 vec_s16 vtmp0, vtmp1, vtmp2, vtmp3;
78 const vec_u16 v6us = vec_splat_u16(6);
80 vec_u8 vdst_mask = vec_lvsl(0, dst);
81 int element = ((
unsigned long)dst & 0xf) >> 2;
86 vtmp0 = vec_ld(0,block);
87 vtmp1 = vec_sld(vtmp0, vtmp0, 8);
88 vtmp2 = vec_ld(16,block);
89 vtmp3 = vec_sld(vtmp2, vtmp2, 8);
90 memset(block, 0, 16 *
sizeof(int16_t));
92 VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);
94 VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);
96 va0 = vec_sra(va0,v6us);
97 va1 = vec_sra(va1,v6us);
98 va2 = vec_sra(va2,v6us);
99 va3 = vec_sra(va3,v6us);
110 #define IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7) {\ 112 vec_s16 a0v = vec_add(s0, s4); \ 114 vec_s16 a2v = vec_sub(s0, s4); \ 116 vec_s16 a4v = vec_sub(vec_sra(s2, onev), s6); \ 118 vec_s16 a6v = vec_add(vec_sra(s6, onev), s2); \ 120 vec_s16 b0v = vec_add(a0v, a6v); \ 122 vec_s16 b2v = vec_add(a2v, a4v); \ 124 vec_s16 b4v = vec_sub(a2v, a4v); \ 126 vec_s16 b6v = vec_sub(a0v, a6v); \ 129 vec_s16 a1v = vec_sub( vec_sub(s5, s3), vec_add(s7, vec_sra(s7, onev)) ); \ 132 vec_s16 a3v = vec_sub( vec_add(s7, s1), vec_add(s3, vec_sra(s3, onev)) );\ 135 vec_s16 a5v = vec_add( vec_sub(s7, s1), vec_add(s5, vec_sra(s5, onev)) );\ 137 vec_s16 a7v = vec_add( vec_add(s5, s3), vec_add(s1, vec_sra(s1, onev)) );\ 139 vec_s16 b1v = vec_add( vec_sra(a7v, twov), a1v); \ 141 vec_s16 b3v = vec_add(a3v, vec_sra(a5v, twov)); \ 143 vec_s16 b5v = vec_sub( vec_sra(a3v, twov), a5v); \ 145 vec_s16 b7v = vec_sub( a7v, vec_sra(a1v, twov)); \ 147 d0 = vec_add(b0v, b7v); \ 149 d1 = vec_add(b2v, b5v); \ 151 d2 = vec_add(b4v, b3v); \ 153 d3 = vec_add(b6v, b1v); \ 155 d4 = vec_sub(b6v, b1v); \ 157 d5 = vec_sub(b4v, b3v); \ 159 d6 = vec_sub(b2v, b5v); \ 161 d7 = vec_sub(b0v, b7v); \ 164 #define ALTIVEC_STORE_SUM_CLIP(dest, idctv, perm_ldv, perm_stv, sel) { \ 166 vec_u8 hv = vec_ld( 0, dest ); \ 167 vec_u8 lv = vec_ld( 7, dest ); \ 168 vec_u8 dstv = vec_perm( hv, lv, (vec_u8)perm_ldv ); \ 169 vec_s16 idct_sh6 = vec_sra(idctv, sixv); \ 170 vec_u16 dst16 = (vec_u16)vec_mergeh(zero_u8v, dstv); \ 171 vec_s16 idstsum = vec_adds(idct_sh6, (vec_s16)dst16); \ 172 vec_u8 idstsum8 = vec_packsu(zero_s16v, idstsum); \ 175 vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\ 176 vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \ 177 lv = vec_sel( lv, bodyv, edgelv ); \ 178 vec_st( lv, 7, dest ); \ 179 hv = vec_ld( 0, dest ); \ 180 edgehv = vec_perm( zero_u8v, sel, perm_stv ); \ 181 hv = vec_sel( hv, bodyv, edgehv ); \ 182 vec_st( hv, 0, dest ); \ 188 vec_s16 idct0, idct1, idct2, idct3, idct4, idct5,
idct6, idct7;
190 vec_u8 perm_ldv = vec_lvsl(0, dst);
191 vec_u8 perm_stv = vec_lvsr(8, dst);
193 const vec_u16 onev = vec_splat_u16(1);
194 const vec_u16 twov = vec_splat_u16(2);
195 const vec_u16 sixv = vec_splat_u16(6);
197 const vec_u8 sel = (
vec_u8) {0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1};
202 s0 = vec_ld(0x00, (int16_t*)dct);
203 s1 = vec_ld(0x10, (int16_t*)dct);
204 s2 = vec_ld(0x20, (int16_t*)dct);
205 s3 = vec_ld(0x30, (int16_t*)dct);
206 s4 = vec_ld(0x40, (int16_t*)dct);
207 s5 = vec_ld(0x50, (int16_t*)dct);
208 s6 = vec_ld(0x60, (int16_t*)dct);
209 s7 = vec_ld(0x70, (int16_t*)dct);
210 memset(dct, 0, 64 *
sizeof(int16_t));
213 d0, d1, d2, d3, d4, d5, d6, d7);
218 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7);
233 vec_u8 dcplus, dcminus,
v0, v1, v2, v3, aligner;
238 dc = (block[0] + 32) >> 6;
240 dc16 = vec_splat((
vec_s16) vec_lde(0, &
dc), 1);
247 aligner = vec_lvsr(0, dst);
248 dcplus = vec_perm(dcplus, dcplus, aligner);
249 dcminus = vec_perm(dcminus, dcminus, aligner);
251 for (i = 0; i <
size; i += 4) {
252 v0 = vec_ld(0, dst+0*stride);
253 v1 = vec_ld(0, dst+1*stride);
254 v2 = vec_ld(0, dst+2*stride);
255 v3 = vec_ld(0, dst+3*stride);
257 v0 = vec_adds(v0, dcplus);
258 v1 = vec_adds(v1, dcplus);
259 v2 = vec_adds(v2, dcplus);
260 v3 = vec_adds(v3, dcplus);
262 v0 = vec_subs(v0, dcminus);
263 v1 = vec_subs(v1, dcminus);
264 v2 = vec_subs(v2, dcminus);
265 v3 = vec_subs(v3, dcminus);
267 vec_st(v0, 0, dst+0*stride);
268 vec_st(v1, 0, dst+1*stride);
269 vec_st(v2, 0, dst+2*stride);
270 vec_st(v3, 0, dst+3*stride);
289 int nnz = nnzc[
scan8[
i] ];
307 for(i=0; i<16; i+=4){
308 int nnz = nnzc[
scan8[
i] ];
318 for (j = 1; j < 3; j++) {
319 for(i = j * 16; i < j * 16 + 4; i++){
328 #define transpose4x16(r0, r1, r2, r3) { \ 329 register vec_u8 r4; \ 330 register vec_u8 r5; \ 331 register vec_u8 r6; \ 332 register vec_u8 r7; \ 334 r4 = vec_mergeh(r0, r2); \ 335 r5 = vec_mergel(r0, r2); \ 336 r6 = vec_mergeh(r1, r3); \ 337 r7 = vec_mergel(r1, r3); \ 339 r0 = vec_mergeh(r4, r6); \ 340 r1 = vec_mergel(r4, r6); \ 341 r2 = vec_mergeh(r5, r7); \ 342 r3 = vec_mergel(r5, r7); \ 349 uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)
dst;
350 int int_dst_stride = dst_stride/4;
352 vec_st(r0, 0, result);
353 vec_st(r1, 16, result);
354 vec_st(r2, 32, result);
355 vec_st(r3, 48, result);
358 *(dst_int+ int_dst_stride) = *(src_int + 1);
359 *(dst_int+ 2*int_dst_stride) = *(src_int + 2);
360 *(dst_int+ 3*int_dst_stride) = *(src_int + 3);
361 *(dst_int+ 4*int_dst_stride) = *(src_int + 4);
362 *(dst_int+ 5*int_dst_stride) = *(src_int + 5);
363 *(dst_int+ 6*int_dst_stride) = *(src_int + 6);
364 *(dst_int+ 7*int_dst_stride) = *(src_int + 7);
365 *(dst_int+ 8*int_dst_stride) = *(src_int + 8);
366 *(dst_int+ 9*int_dst_stride) = *(src_int + 9);
367 *(dst_int+10*int_dst_stride) = *(src_int + 10);
368 *(dst_int+11*int_dst_stride) = *(src_int + 11);
369 *(dst_int+12*int_dst_stride) = *(src_int + 12);
370 *(dst_int+13*int_dst_stride) = *(src_int + 13);
371 *(dst_int+14*int_dst_stride) = *(src_int + 14);
372 *(dst_int+15*int_dst_stride) = *(src_int + 15);
378 #define readAndTranspose16x6(src, src_stride, r8, r9, r10, r11, r12, r13) {\ 379 register vec_u8 r0 = unaligned_load(0, src); \ 380 register vec_u8 r1 = unaligned_load( src_stride, src); \ 381 register vec_u8 r2 = unaligned_load(2* src_stride, src); \ 382 register vec_u8 r3 = unaligned_load(3* src_stride, src); \ 383 register vec_u8 r4 = unaligned_load(4* src_stride, src); \ 384 register vec_u8 r5 = unaligned_load(5* src_stride, src); \ 385 register vec_u8 r6 = unaligned_load(6* src_stride, src); \ 386 register vec_u8 r7 = unaligned_load(7* src_stride, src); \ 387 register vec_u8 r14 = unaligned_load(14*src_stride, src); \ 388 register vec_u8 r15 = unaligned_load(15*src_stride, src); \ 390 r8 = unaligned_load( 8*src_stride, src); \ 391 r9 = unaligned_load( 9*src_stride, src); \ 392 r10 = unaligned_load(10*src_stride, src); \ 393 r11 = unaligned_load(11*src_stride, src); \ 394 r12 = unaligned_load(12*src_stride, src); \ 395 r13 = unaligned_load(13*src_stride, src); \ 398 r0 = vec_mergeh(r0, r8); \ 399 r1 = vec_mergeh(r1, r9); \ 400 r2 = vec_mergeh(r2, r10); \ 401 r3 = vec_mergeh(r3, r11); \ 402 r4 = vec_mergeh(r4, r12); \ 403 r5 = vec_mergeh(r5, r13); \ 404 r6 = vec_mergeh(r6, r14); \ 405 r7 = vec_mergeh(r7, r15); \ 408 r8 = vec_mergeh(r0, r4); \ 409 r9 = vec_mergel(r0, r4); \ 410 r10 = vec_mergeh(r1, r5); \ 411 r11 = vec_mergel(r1, r5); \ 412 r12 = vec_mergeh(r2, r6); \ 413 r13 = vec_mergel(r2, r6); \ 414 r14 = vec_mergeh(r3, r7); \ 415 r15 = vec_mergel(r3, r7); \ 418 r0 = vec_mergeh(r8, r12); \ 419 r1 = vec_mergel(r8, r12); \ 420 r2 = vec_mergeh(r9, r13); \ 421 r4 = vec_mergeh(r10, r14); \ 422 r5 = vec_mergel(r10, r14); \ 423 r6 = vec_mergeh(r11, r15); \ 427 r8 = vec_mergeh(r0, r4); \ 428 r9 = vec_mergel(r0, r4); \ 429 r10 = vec_mergeh(r1, r5); \ 430 r11 = vec_mergel(r1, r5); \ 431 r12 = vec_mergeh(r2, r6); \ 432 r13 = vec_mergel(r2, r6); \ 443 register vec_u8 diffneg = vec_subs(y, x);
444 register vec_u8 o = vec_or(diff, diffneg);
445 o = (
vec_u8)vec_cmplt(o, a);
461 mask = vec_and(mask, tempmask);
463 mask = vec_and(mask, tempmask);
475 register vec_u8 average = vec_avg(p0, q0);
483 temp = vec_xor(average, p2);
484 average = vec_avg(average, p2);
485 ones = vec_splat_u8(1);
486 temp = vec_and(temp, ones);
487 uncliped = vec_subs(average, temp);
488 max = vec_adds(p1, tc0);
489 min = vec_subs(p1, tc0);
490 newp1 = vec_max(min, uncliped);
491 newp1 = vec_min(max, newp1);
495 #define h264_deblock_p0_q0(p0, p1, q0, q1, tc0masked) { \ 497 const vec_u8 A0v = vec_sl(vec_splat_u8(10), vec_splat_u8(4)); \ 499 register vec_u8 pq0bit = vec_xor(p0,q0); \ 500 register vec_u8 q1minus; \ 501 register vec_u8 p0minus; \ 502 register vec_u8 stage1; \ 503 register vec_u8 stage2; \ 504 register vec_u8 vec160; \ 505 register vec_u8 delta; \ 506 register vec_u8 deltaneg; \ 508 q1minus = vec_nor(q1, q1); \ 509 stage1 = vec_avg(p1, q1minus); \ 510 stage2 = vec_sr(stage1, vec_splat_u8(1)); \ 511 p0minus = vec_nor(p0, p0); \ 512 stage1 = vec_avg(q0, p0minus); \ 513 pq0bit = vec_and(pq0bit, vec_splat_u8(1)); \ 514 stage2 = vec_avg(stage2, pq0bit); \ 515 stage2 = vec_adds(stage2, stage1); \ 516 vec160 = vec_ld(0, &A0v); \ 517 deltaneg = vec_subs(vec160, stage2); \ 518 delta = vec_subs(stage2, vec160); \ 519 deltaneg = vec_min(tc0masked, deltaneg); \ 520 delta = vec_min(tc0masked, delta); \ 521 p0 = vec_subs(p0, deltaneg); \ 522 q0 = vec_subs(q0, delta); \ 523 p0 = vec_adds(p0, delta); \ 524 q0 = vec_adds(q0, deltaneg); \ 527 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \ 528 DECLARE_ALIGNED(16, unsigned char, temp)[16]; \ 529 register vec_u8 alphavec; \ 530 register vec_u8 betavec; \ 531 register vec_u8 mask; \ 532 register vec_u8 p1mask; \ 533 register vec_u8 q1mask; \ 534 register vector signed char tc0vec; \ 535 register vec_u8 finaltc0; \ 536 register vec_u8 tc0masked; \ 537 register vec_u8 newp1; \ 538 register vec_u8 newq1; \ 542 alphavec = vec_ld(0, temp); \ 543 betavec = vec_splat(alphavec, 0x1); \ 544 alphavec = vec_splat(alphavec, 0x0); \ 545 mask = h264_deblock_mask(p0, p1, q0, q1, alphavec, betavec); \ 547 AV_COPY32(temp, tc0); \ 548 tc0vec = vec_ld(0, (signed char*)temp); \ 549 tc0vec = vec_mergeh(tc0vec, tc0vec); \ 550 tc0vec = vec_mergeh(tc0vec, tc0vec); \ 551 mask = vec_and(mask, vec_cmpgt(tc0vec, vec_splat_s8(-1))); \ 552 finaltc0 = vec_and((vec_u8)tc0vec, mask); \ 554 p1mask = diff_lt_altivec(p2, p0, betavec); \ 555 p1mask = vec_and(p1mask, mask); \ 556 tc0masked = vec_and(p1mask, (vec_u8)tc0vec); \ 557 finaltc0 = vec_sub(finaltc0, p1mask); \ 558 newp1 = h264_deblock_q1(p0, p1, p2, q0, tc0masked); \ 561 q1mask = diff_lt_altivec(q2, q0, betavec); \ 562 q1mask = vec_and(q1mask, mask); \ 563 tc0masked = vec_and(q1mask, (vec_u8)tc0vec); \ 564 finaltc0 = vec_sub(finaltc0, q1mask); \ 565 newq1 = h264_deblock_q1(p0, q1, q2, q0, tc0masked); \ 568 h264_deblock_p0_q0(p0, p1, q0, q1, finaltc0); \ 575 if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) >= 0) {
576 register vec_u8 p2 = vec_ld(-3*stride, pix);
577 register vec_u8 p1 = vec_ld(-2*stride, pix);
578 register vec_u8 p0 = vec_ld(-1*stride, pix);
579 register vec_u8 q0 = vec_ld(0, pix);
580 register vec_u8 q1 = vec_ld(stride, pix);
581 register vec_u8 q2 = vec_ld(2*stride, pix);
583 vec_st(p1, -2*stride, pix);
584 vec_st(p0, -1*stride, pix);
586 vec_st(q1, stride, pix);
592 register vec_u8 line0, line1, line2, line3, line4, line5;
593 if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) < 0)
598 write16x4(pix-2, stride, line1, line2, line3, line4);
612 offset <<= log2_denom;
613 if(log2_denom) offset += 1<<(log2_denom-1);
614 temp[0] = log2_denom;
619 vlog2_denom = (
vec_u16)vec_splat(vtemp, 1);
620 vweight = vec_splat(vtemp, 3);
621 voffset = vec_splat(vtemp, 5);
622 aligned = !((
unsigned long)block & 0xf);
624 for (y = 0; y <
height; y++) {
625 vblock = vec_ld(0, block);
630 if (w == 16 || aligned) {
632 v0 = vec_adds(v0, voffset);
633 v0 = vec_sra(v0, vlog2_denom);
635 if (w == 16 || !aligned) {
637 v1 = vec_adds(v1, voffset);
638 v1 = vec_sra(v1, vlog2_denom);
640 vblock = vec_packsu(v0, v1);
641 vec_st(vblock, 0, block);
649 int log2_denom,
int weightd,
int weights,
int offset,
int w)
651 int y, dst_aligned, src_aligned;
653 vec_s16 vtemp, vweights, vweightd, voffset,
v0, v1, v2, v3;
658 offset = ((offset + 1) | 1) << log2_denom;
659 temp[0] = log2_denom+1;
665 vlog2_denom = (
vec_u16)vec_splat(vtemp, 1);
666 vweights = vec_splat(vtemp, 3);
667 vweightd = vec_splat(vtemp, 5);
668 voffset = vec_splat(vtemp, 7);
669 dst_aligned = !((
unsigned long)dst & 0xf);
670 src_aligned = !((
unsigned long)src & 0xf);
672 for (y = 0; y <
height; y++) {
673 vdst = vec_ld(0, dst);
674 vsrc = vec_ld(0, src);
688 if (w == 16 || dst_aligned) {
692 v0 = vec_adds(v0, voffset);
693 v0 = vec_adds(v0, v2);
694 v0 = vec_sra(v0, vlog2_denom);
696 if (w == 16 || !dst_aligned) {
700 v1 = vec_adds(v1, voffset);
701 v1 = vec_adds(v1, v3);
702 v1 = vec_sra(v1, vlog2_denom);
704 vdst = vec_packsu(v0, v1);
705 vec_st(vdst, 0, dst);
712 #define H264_WEIGHT(W) \ 713 static void ff_weight_h264_pixels ## W ## _altivec(uint8_t *block, int stride, int height, \ 714 int log2_denom, int weight, int offset){ \ 715 weight_h264_W_altivec(block, stride, height, log2_denom, weight, offset, W); \ 717 static void ff_biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, int stride, int height, \ 718 int log2_denom, int weightd, int weights, int offset){ \ 719 biweight_h264_W_altivec(dst, src, stride, height, log2_denom, weightd, weights, offset, W); \ 729 if (bit_depth == 8) {
731 if (chroma_format_idc == 1)
742 c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_altivec;
743 c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_altivec;
744 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_altivec;
745 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_altivec;
static void ff_h264_idct8_add_altivec(uint8_t *dst, int16_t *dct, int stride)
#define AV_CPU_FLAG_ALTIVEC
standard
#define VEC_LOAD_U8_ADD_S16_STORE_U8(va)
#define transpose4x16(r0, r1, r2, r3)
#define ALTIVEC_STORE_SUM_CLIP(dest, idctv, perm_ldv, perm_stv, sel)
if max(w)>1 w=0.9 *w/max(w)
static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *block, int stride, int size)
#define DECLARE_ALIGNED(n, t, v)
static av_always_inline void weight_h264_W_altivec(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset, int w)
static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
output residual component w
Macro definitions for various function/variable attributes.
static vec_u8 h264_deblock_mask(register vec_u8 p0, register vec_u8 p1, register vec_u8 q0, register vec_u8 q1, register vec_u8 alpha, register vec_u8 beta)
#define TRANSPOSE8(a, b, c, d, e, f, g, h)
static vec_u8 diff_lt_altivec(register vec_u8 x, register vec_u8 y, register vec_u8 a)
static double alpha(void *priv, double x, double y)
static const uint16_t mask[17]
static void h264_v_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
static const uint8_t offset[127][2]
static const uint8_t scan8[16 *3+3]
static void idct6(int pre_mant[6])
Calculate 6-point IDCT of the pre-mantissas.
#define IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,d0, d1, d2, d3, d4, d5, d6, d7)
Context for storing H.264 DSP functions.
static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, int16_t *block, int stride)
static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
#define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0)
#define diff(a, as, b, bs)
static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
int chroma_format_idc
chroma format from sps to detect changes
H264 / AVC / MPEG4 part10 codec data table
FIXME Range Coding of cr are mx and my are Motion Vector top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff)*mv_scale Intra DC Predicton block[y][x] dc[1]
BYTE int const BYTE int int int height
synthesis window for stochastic i
static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
static void write16x4(uint8_t *dst, int dst_stride, register vec_u8 r0, register vec_u8 r1, register vec_u8 r2, register vec_u8 r3)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static int weight(int i, int blen, int offset)
int block_offset[2 *(16 *3)]
block_offset[ 0..23] for frame macroblocks block_offset[24..47] for field macroblocks ...
Contains misc utility macros and inline functions.
av_cold void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
#define VEC_TRANSPOSE_4(a0, a1, a2, a3, b0, b1, b2, b3)
static void h264_idct_dc_add_altivec(uint8_t *dst, int16_t *block, int stride)
else dst[i][x+y *dst_stride[i]]
static void ff_h264_idct_add_altivec(uint8_t *dst, int16_t *block, int stride)
#define VEC_1D_DCT(vb0, vb1, vb2, vb3, va0, va1, va2, va3)
static av_always_inline void biweight_h264_W_altivec(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset, int w)
static vec_u8 h264_deblock_q1(register vec_u8 p0, register vec_u8 p1, register vec_u8 p2, register vec_u8 q0, register vec_u8 tc0)
#define readAndTranspose16x6(src, src_stride, r8, r9, r10, r11, r12, r13)
performs a 6x16 transpose of data in src, and stores it to dst