35 {0, 64277, 60547, 54491, 46341, 36410, 25080, 12785};
36 static const vec_u8 interleave_high =
37 {0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29};
40 vec_s16 A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;\ 41 vec_s16 Ed, Gd, Add, Bdd, Fd, Hd;\ 42 vec_s16 eight = vec_splat_s16(8);\ 43 vec_u16 four = vec_splat_u16(4);\ 45 vec_s16 C1 = vec_splat(constants, 1);\ 46 vec_s16 C2 = vec_splat(constants, 2);\ 47 vec_s16 C3 = vec_splat(constants, 3);\ 48 vec_s16 C4 = vec_splat(constants, 4);\ 49 vec_s16 C5 = vec_splat(constants, 5);\ 50 vec_s16 C6 = vec_splat(constants, 6);\ 51 vec_s16 C7 = vec_splat(constants, 7);\ 53 vec_s16 b0 = vec_ld(0x00, block);\ 54 vec_s16 b1 = vec_ld(0x10, block);\ 55 vec_s16 b2 = vec_ld(0x20, block);\ 56 vec_s16 b3 = vec_ld(0x30, block);\ 57 vec_s16 b4 = vec_ld(0x40, block);\ 58 vec_s16 b5 = vec_ld(0x50, block);\ 59 vec_s16 b6 = vec_ld(0x60, block);\ 60 vec_s16 b7 = vec_ld(0x70, block); 68 return (
vec_s16)vec_perm(vec_mule(a,C), vec_mulo(a,C), interleave_high);
72 return vec_add(a, M15(a, C));
75 #define IDCT_1D(ADD, SHIFT)\ 76 A = vec_add(M16(b1, C1), M15(b7, C7));\ 77 B = vec_sub(M15(b1, C7), M16(b7, C1));\ 78 C = vec_add(M16(b3, C3), M16(b5, C5));\ 79 D = vec_sub(M16(b5, C3), M16(b3, C5));\ 81 Ad = M16(vec_sub(A, C), C4);\ 82 Bd = M16(vec_sub(B, D), C4);\ 87 E = ADD(M16(vec_add(b0, b4), C4));\ 88 F = ADD(M16(vec_sub(b0, b4), C4));\ 90 G = vec_add(M16(b2, C2), M15(b6, C6));\ 91 H = vec_sub(M15(b2, C6), M16(b6, C2));\ 96 Add = vec_add(F, Ad);\ 97 Bdd = vec_sub(Bd, H);\ 100 Hd = vec_add(Bd, H);\ 102 b0 = SHIFT(vec_add(Gd, Cd));\ 103 b7 = SHIFT(vec_sub(Gd, Cd));\ 105 b1 = SHIFT(vec_add(Add, Hd));\ 106 b2 = SHIFT(vec_sub(Add, Hd));\ 108 b3 = SHIFT(vec_add(Ed, Dd));\ 109 b4 = SHIFT(vec_sub(Ed, Dd));\ 111 b5 = SHIFT(vec_add(Fd, Bdd));\ 112 b6 = SHIFT(vec_sub(Fd, Bdd)); 115 #define ADD8(a) vec_add(a, eight) 116 #define SHIFT4(a) vec_sra(a, four) 118 static void vp3_idct_put_altivec(
uint8_t *
dst,
int stride, int16_t block[64])
124 vec_s16 v2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11));
125 eight = vec_add(eight, v2048);
129 IDCT_1D(ADD8, SHIFT4)
132 t = vec_packsu(a, a);\ 133 vec_ste((vec_u32)t, 0, (unsigned int *)dst);\ 134 vec_ste((vec_u32)t, 4, (unsigned int *)dst); 136 PUT(b0) dst += stride;
137 PUT(b1) dst += stride;
138 PUT(b2) dst += stride;
139 PUT(b3) dst += stride;
140 PUT(b4) dst += stride;
141 PUT(b5) dst += stride;
142 PUT(b6) dst += stride;
144 memset(block, 0, sizeof(*block) * 64);
147 static
void vp3_idct_add_altivec(
uint8_t *dst,
int stride, int16_t block[64])
152 vec_u8 vdst_mask = vec_mergeh(vec_splat_u8(-1), vec_lvsl(0, dst));
158 IDCT_1D(ADD8, SHIFT4)
161 vdst = vec_ld(0, dst);\ 162 vdst_16 = (vec_s16)vec_perm(vdst, zero_u8v, vdst_mask);\ 163 vdst_16 = vec_adds(a, vdst_16);\ 164 t = vec_packsu(vdst_16, vdst_16);\ 165 vec_ste((vec_u32)t, 0, (unsigned int *)dst);\ 166 vec_ste((vec_u32)t, 4, (unsigned int *)dst); 168 ADD(b0) dst += stride;
169 ADD(b1) dst += stride;
170 ADD(b2) dst += stride;
171 ADD(b3) dst += stride;
172 ADD(b4) dst += stride;
173 ADD(b5) dst += stride;
174 ADD(b6) dst += stride;
176 memset(block, 0, sizeof(*block) * 64);
#define AV_CPU_FLAG_ALTIVEC
standard
Macro definitions for various function/variable attributes.
it can be given away to ff_start_frame *A reference passed to cur_buf_copy and partial_buf are used by libavfilter internally and must not be accessed by filters Reference permissions The AVFilterBufferRef structure has a perms field that describes what the code that owns the reference is allowed to do to the buffer data Different references for the same buffer can have different permissions For video filters that implement the deprecated start_frame draw_slice end_frame the permissions only apply to the parts of the buffer that have already been covered by the draw_slice method The value is a binary OR of the following constants
void(* idct_add)(uint8_t *dest, int line_size, int16_t *block)
#define TRANSPOSE8(a, b, c, d, e, f, g, h)
void(* idct_put)(uint8_t *dest, int line_size, int16_t *block)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Contains misc utility macros and inline functions.
av_cold void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags)
else dst[i][x+y *dst_stride[i]]