25 #define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \    26         vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\    27         vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\    29         psum = vec_mladd(vA, vsrc0ssH, BIAS1);\    30         psum = vec_mladd(vB, vsrc1ssH, psum);\    31         psum = vec_mladd(vC, vsrc2ssH, psum);\    32         psum = vec_mladd(vD, vsrc3ssH, psum);\    34         psum = vec_sr(psum, v6us);\    36         vdst = vec_ld(0, dst);\    37         ppsum = (vec_u8)vec_pack(psum, psum);\    38         vfdst = vec_perm(vdst, ppsum, fperm);\    40         OP_U8_ALTIVEC(fsum, vfdst, vdst);\    42         vec_st(fsum, 0, dst);\    50 #define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \    52         vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\    53         vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\    55         psum = vec_mladd(vA, vsrc0ssH, v32ss);\    56         psum = vec_mladd(vE, vsrc1ssH, psum);\    57         psum = vec_sr(psum, v6us);\    59         vdst = vec_ld(0, dst);\    60         ppsum = (vec_u8)vec_pack(psum, psum);\    61         vfdst = vec_perm(vdst, ppsum, fperm);\    63         OP_U8_ALTIVEC(fsum, vfdst, vdst);\    65         vec_st(fsum, 0, dst);\    71 #define add28(a) vec_add(v28ss, a)    73 #ifdef PREFIX_h264_chroma_mc8_altivec    83     const vec_s32 vABCD = vec_ld(0, ABCD);
    89     const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
    90     const vec_u16 v6us = vec_splat_u16(6);
    91     register int loadSecond = (((
unsigned long)src) % 16) <= 7 ? 0 : 1;
    92     register int reallyBadAlign = (((
unsigned long)src) % 16) == 15 ? 1 : 0;
    97     vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
    98     vec_s16 vsrc2ssH, vsrc3ssH, psum;
    99     vec_u8 vdst, ppsum, vfdst, fsum;
   101     if (((
unsigned long)dst) % 16 == 0) {
   102         fperm = (
vec_u8){0x10, 0x11, 0x12, 0x13,
   103                          0x14, 0x15, 0x16, 0x17,
   104                          0x08, 0x09, 0x0A, 0x0B,
   105                          0x0C, 0x0D, 0x0E, 0x0F};
   107         fperm = (
vec_u8){0x00, 0x01, 0x02, 0x03,
   108                          0x04, 0x05, 0x06, 0x07,
   109                          0x18, 0x19, 0x1A, 0x1B,
   110                          0x1C, 0x1D, 0x1E, 0x1F};
   113     vsrcAuc = vec_ld(0, src);
   116         vsrcBuc = vec_ld(16, src);
   117     vsrcperm0 = vec_lvsl(0, src);
   118     vsrcperm1 = vec_lvsl(1, src);
   120     vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
   124         vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
   131             for (i = 0 ; i < h ; i++) {
   132                 vsrcCuc = vec_ld(stride + 0, src);
   133                 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
   134                 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
   140             for (i = 0 ; i < h ; i++) {
   141                 vsrcCuc = vec_ld(stride + 0, src);
   142                 vsrcDuc = vec_ld(stride + 16, src);
   143                 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
   147                     vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
   153         const vec_s16 vE = vec_add(vB, vC);
   156                 for (i = 0 ; i < h ; i++) {
   157                     vsrcCuc = vec_ld(stride + 0, src);
   158                     vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
   165                 for (i = 0 ; i < h ; i++) {
   166                     vsrcCuc = vec_ld(stride + 0, src);
   167                     vsrcDuc = vec_ld(stride + 15, src);
   168                     vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
   176                 for (i = 0 ; i < h ; i++) {
   177                     vsrcCuc = vec_ld(0, src);
   178                     vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
   179                     vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
   185                 for (i = 0 ; i < h ; i++) {
   186                     vsrcCuc = vec_ld(0, src);
   187                     vsrcDuc = vec_ld(15, src);
   188                     vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
   192                         vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
   203 #ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec   206                         {((8 - 
x) * (8 - y)),
   212     const vec_s32 vABCD = vec_ld(0, ABCD);
   218     const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
   219     const vec_u16 v6us  = vec_splat_u16(6);
   220     register int loadSecond     = (((
unsigned long)
src) % 16) <= 7 ? 0 : 1;
   221     register int reallyBadAlign = (((
unsigned long)
src) % 16) == 15 ? 1 : 0;
   226     vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
   227     vec_s16 vsrc2ssH, vsrc3ssH, psum;
   228     vec_u8 vdst, ppsum, vfdst, fsum;
   230     if (((
unsigned long)
dst) % 16 == 0) {
   231         fperm = (
vec_u8){0x10, 0x11, 0x12, 0x13,
   232                          0x14, 0x15, 0x16, 0x17,
   233                          0x08, 0x09, 0x0A, 0x0B,
   234                          0x0C, 0x0D, 0x0E, 0x0F};
   236         fperm = (
vec_u8){0x00, 0x01, 0x02, 0x03,
   237                          0x04, 0x05, 0x06, 0x07,
   238                          0x18, 0x19, 0x1A, 0x1B,
   239                          0x1C, 0x1D, 0x1E, 0x1F};
   242     vsrcAuc = vec_ld(0, 
src);
   245         vsrcBuc = vec_ld(16, 
src);
   246     vsrcperm0 = vec_lvsl(0, 
src);
   247     vsrcperm1 = vec_lvsl(1, 
src);
   249     vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
   253         vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
   259         for (i = 0 ; i < h ; i++) {
   264             vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
   265             vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
   271         for (i = 0 ; i < h ; i++) {
   275             vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
   279                 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
   289 #undef CHROMA_MC8_ALTIVEC_CORE memory handling functions 
#define DECLARE_ALIGNED(n, t, v)
synthesis window for stochastic i
#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE
else dst[i][x+y *dst_stride[i]]
#define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2)
#define PREFIX_no_rnd_vc1_chroma_mc8_altivec