25 #define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \ 26 vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\ 27 vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\ 29 psum = vec_mladd(vA, vsrc0ssH, BIAS1);\ 30 psum = vec_mladd(vB, vsrc1ssH, psum);\ 31 psum = vec_mladd(vC, vsrc2ssH, psum);\ 32 psum = vec_mladd(vD, vsrc3ssH, psum);\ 34 psum = vec_sr(psum, v6us);\ 36 vdst = vec_ld(0, dst);\ 37 ppsum = (vec_u8)vec_pack(psum, psum);\ 38 vfdst = vec_perm(vdst, ppsum, fperm);\ 40 OP_U8_ALTIVEC(fsum, vfdst, vdst);\ 42 vec_st(fsum, 0, dst);\ 50 #define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \ 52 vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\ 53 vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\ 55 psum = vec_mladd(vA, vsrc0ssH, v32ss);\ 56 psum = vec_mladd(vE, vsrc1ssH, psum);\ 57 psum = vec_sr(psum, v6us);\ 59 vdst = vec_ld(0, dst);\ 60 ppsum = (vec_u8)vec_pack(psum, psum);\ 61 vfdst = vec_perm(vdst, ppsum, fperm);\ 63 OP_U8_ALTIVEC(fsum, vfdst, vdst);\ 65 vec_st(fsum, 0, dst);\ 71 #define add28(a) vec_add(v28ss, a) 73 #ifdef PREFIX_h264_chroma_mc8_altivec 83 const vec_s32 vABCD = vec_ld(0, ABCD);
89 const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
90 const vec_u16 v6us = vec_splat_u16(6);
91 register int loadSecond = (((
unsigned long)src) % 16) <= 7 ? 0 : 1;
92 register int reallyBadAlign = (((
unsigned long)src) % 16) == 15 ? 1 : 0;
97 vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
98 vec_s16 vsrc2ssH, vsrc3ssH, psum;
99 vec_u8 vdst, ppsum, vfdst, fsum;
101 if (((
unsigned long)dst) % 16 == 0) {
102 fperm = (
vec_u8){0x10, 0x11, 0x12, 0x13,
103 0x14, 0x15, 0x16, 0x17,
104 0x08, 0x09, 0x0A, 0x0B,
105 0x0C, 0x0D, 0x0E, 0x0F};
107 fperm = (
vec_u8){0x00, 0x01, 0x02, 0x03,
108 0x04, 0x05, 0x06, 0x07,
109 0x18, 0x19, 0x1A, 0x1B,
110 0x1C, 0x1D, 0x1E, 0x1F};
113 vsrcAuc = vec_ld(0, src);
116 vsrcBuc = vec_ld(16, src);
117 vsrcperm0 = vec_lvsl(0, src);
118 vsrcperm1 = vec_lvsl(1, src);
120 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
124 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
131 for (i = 0 ; i < h ; i++) {
132 vsrcCuc = vec_ld(stride + 0, src);
133 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
134 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
140 for (i = 0 ; i < h ; i++) {
141 vsrcCuc = vec_ld(stride + 0, src);
142 vsrcDuc = vec_ld(stride + 16, src);
143 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
147 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
153 const vec_s16 vE = vec_add(vB, vC);
156 for (i = 0 ; i < h ; i++) {
157 vsrcCuc = vec_ld(stride + 0, src);
158 vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
165 for (i = 0 ; i < h ; i++) {
166 vsrcCuc = vec_ld(stride + 0, src);
167 vsrcDuc = vec_ld(stride + 15, src);
168 vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
176 for (i = 0 ; i < h ; i++) {
177 vsrcCuc = vec_ld(0, src);
178 vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
179 vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
185 for (i = 0 ; i < h ; i++) {
186 vsrcCuc = vec_ld(0, src);
187 vsrcDuc = vec_ld(15, src);
188 vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
192 vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
203 #ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec 206 {((8 -
x) * (8 - y)),
212 const vec_s32 vABCD = vec_ld(0, ABCD);
218 const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
219 const vec_u16 v6us = vec_splat_u16(6);
220 register int loadSecond = (((
unsigned long)
src) % 16) <= 7 ? 0 : 1;
221 register int reallyBadAlign = (((
unsigned long)
src) % 16) == 15 ? 1 : 0;
226 vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
227 vec_s16 vsrc2ssH, vsrc3ssH, psum;
228 vec_u8 vdst, ppsum, vfdst, fsum;
230 if (((
unsigned long)
dst) % 16 == 0) {
231 fperm = (
vec_u8){0x10, 0x11, 0x12, 0x13,
232 0x14, 0x15, 0x16, 0x17,
233 0x08, 0x09, 0x0A, 0x0B,
234 0x0C, 0x0D, 0x0E, 0x0F};
236 fperm = (
vec_u8){0x00, 0x01, 0x02, 0x03,
237 0x04, 0x05, 0x06, 0x07,
238 0x18, 0x19, 0x1A, 0x1B,
239 0x1C, 0x1D, 0x1E, 0x1F};
242 vsrcAuc = vec_ld(0,
src);
245 vsrcBuc = vec_ld(16,
src);
246 vsrcperm0 = vec_lvsl(0,
src);
247 vsrcperm1 = vec_lvsl(1,
src);
249 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
253 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
259 for (i = 0 ; i < h ; i++) {
264 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
265 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
271 for (i = 0 ; i < h ; i++) {
275 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
279 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
289 #undef CHROMA_MC8_ALTIVEC_CORE memory handling functions
#define DECLARE_ALIGNED(n, t, v)
synthesis window for stochastic i
#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE
else dst[i][x+y *dst_stride[i]]
#define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2)
#define PREFIX_no_rnd_vc1_chroma_mc8_altivec