annotate ffmpeg/libswscale/ppc/yuv2yuv_altivec.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents f445c3017523
children
rev   line source
yading@11 1 /*
yading@11 2 * AltiVec-enhanced yuv-to-yuv conversion routines.
yading@11 3 *
yading@11 4 * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
yading@11 5 * based on the equivalent C code in swscale.c
yading@11 6 *
yading@11 7 * This file is part of FFmpeg.
yading@11 8 *
yading@11 9 * FFmpeg is free software; you can redistribute it and/or
yading@11 10 * modify it under the terms of the GNU Lesser General Public
yading@11 11 * License as published by the Free Software Foundation; either
yading@11 12 * version 2.1 of the License, or (at your option) any later version.
yading@11 13 *
yading@11 14 * FFmpeg is distributed in the hope that it will be useful,
yading@11 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@11 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@11 17 * Lesser General Public License for more details.
yading@11 18 *
yading@11 19 * You should have received a copy of the GNU Lesser General Public
yading@11 20 * License along with FFmpeg; if not, write to the Free Software
yading@11 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@11 22 */
yading@11 23
yading@11 24 #include <inttypes.h>
yading@11 25
yading@11 26 #include "config.h"
yading@11 27 #include "libswscale/swscale.h"
yading@11 28 #include "libswscale/swscale_internal.h"
yading@11 29 #include "libavutil/cpu.h"
yading@11 30
yading@11 31 static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t *src[],
yading@11 32 int srcStride[], int srcSliceY,
yading@11 33 int srcSliceH, uint8_t *dstParam[],
yading@11 34 int dstStride_a[])
yading@11 35 {
yading@11 36 uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
yading@11 37 // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
yading@11 38 // srcStride[0], srcStride[1], dstStride[0]);
yading@11 39 const uint8_t *ysrc = src[0];
yading@11 40 const uint8_t *usrc = src[1];
yading@11 41 const uint8_t *vsrc = src[2];
yading@11 42 const int width = c->srcW;
yading@11 43 const int height = srcSliceH;
yading@11 44 const int lumStride = srcStride[0];
yading@11 45 const int chromStride = srcStride[1];
yading@11 46 const int dstStride = dstStride_a[0];
yading@11 47 const vector unsigned char yperm = vec_lvsl(0, ysrc);
yading@11 48 const int vertLumPerChroma = 2;
yading@11 49 register unsigned int y;
yading@11 50
yading@11 51 /* This code assumes:
yading@11 52 *
yading@11 53 * 1) dst is 16 bytes-aligned
yading@11 54 * 2) dstStride is a multiple of 16
yading@11 55 * 3) width is a multiple of 16
yading@11 56 * 4) lum & chrom stride are multiples of 8
yading@11 57 */
yading@11 58
yading@11 59 for (y = 0; y < height; y++) {
yading@11 60 int i;
yading@11 61 for (i = 0; i < width - 31; i += 32) {
yading@11 62 const unsigned int j = i >> 1;
yading@11 63 vector unsigned char v_yA = vec_ld(i, ysrc);
yading@11 64 vector unsigned char v_yB = vec_ld(i + 16, ysrc);
yading@11 65 vector unsigned char v_yC = vec_ld(i + 32, ysrc);
yading@11 66 vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
yading@11 67 vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
yading@11 68 vector unsigned char v_uA = vec_ld(j, usrc);
yading@11 69 vector unsigned char v_uB = vec_ld(j + 16, usrc);
yading@11 70 vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
yading@11 71 vector unsigned char v_vA = vec_ld(j, vsrc);
yading@11 72 vector unsigned char v_vB = vec_ld(j + 16, vsrc);
yading@11 73 vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
yading@11 74 vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
yading@11 75 vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
yading@11 76 vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
yading@11 77 vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
yading@11 78 vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
yading@11 79 vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
yading@11 80 vec_st(v_yuy2_0, (i << 1), dst);
yading@11 81 vec_st(v_yuy2_1, (i << 1) + 16, dst);
yading@11 82 vec_st(v_yuy2_2, (i << 1) + 32, dst);
yading@11 83 vec_st(v_yuy2_3, (i << 1) + 48, dst);
yading@11 84 }
yading@11 85 if (i < width) {
yading@11 86 const unsigned int j = i >> 1;
yading@11 87 vector unsigned char v_y1 = vec_ld(i, ysrc);
yading@11 88 vector unsigned char v_u = vec_ld(j, usrc);
yading@11 89 vector unsigned char v_v = vec_ld(j, vsrc);
yading@11 90 vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
yading@11 91 vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
yading@11 92 vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
yading@11 93 vec_st(v_yuy2_0, (i << 1), dst);
yading@11 94 vec_st(v_yuy2_1, (i << 1) + 16, dst);
yading@11 95 }
yading@11 96 if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
yading@11 97 usrc += chromStride;
yading@11 98 vsrc += chromStride;
yading@11 99 }
yading@11 100 ysrc += lumStride;
yading@11 101 dst += dstStride;
yading@11 102 }
yading@11 103
yading@11 104 return srcSliceH;
yading@11 105 }
yading@11 106
yading@11 107 static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t *src[],
yading@11 108 int srcStride[], int srcSliceY,
yading@11 109 int srcSliceH, uint8_t *dstParam[],
yading@11 110 int dstStride_a[])
yading@11 111 {
yading@11 112 uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
yading@11 113 // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
yading@11 114 // srcStride[0], srcStride[1], dstStride[0]);
yading@11 115 const uint8_t *ysrc = src[0];
yading@11 116 const uint8_t *usrc = src[1];
yading@11 117 const uint8_t *vsrc = src[2];
yading@11 118 const int width = c->srcW;
yading@11 119 const int height = srcSliceH;
yading@11 120 const int lumStride = srcStride[0];
yading@11 121 const int chromStride = srcStride[1];
yading@11 122 const int dstStride = dstStride_a[0];
yading@11 123 const int vertLumPerChroma = 2;
yading@11 124 const vector unsigned char yperm = vec_lvsl(0, ysrc);
yading@11 125 register unsigned int y;
yading@11 126
yading@11 127 /* This code assumes:
yading@11 128 *
yading@11 129 * 1) dst is 16 bytes-aligned
yading@11 130 * 2) dstStride is a multiple of 16
yading@11 131 * 3) width is a multiple of 16
yading@11 132 * 4) lum & chrom stride are multiples of 8
yading@11 133 */
yading@11 134
yading@11 135 for (y = 0; y < height; y++) {
yading@11 136 int i;
yading@11 137 for (i = 0; i < width - 31; i += 32) {
yading@11 138 const unsigned int j = i >> 1;
yading@11 139 vector unsigned char v_yA = vec_ld(i, ysrc);
yading@11 140 vector unsigned char v_yB = vec_ld(i + 16, ysrc);
yading@11 141 vector unsigned char v_yC = vec_ld(i + 32, ysrc);
yading@11 142 vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
yading@11 143 vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
yading@11 144 vector unsigned char v_uA = vec_ld(j, usrc);
yading@11 145 vector unsigned char v_uB = vec_ld(j + 16, usrc);
yading@11 146 vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
yading@11 147 vector unsigned char v_vA = vec_ld(j, vsrc);
yading@11 148 vector unsigned char v_vB = vec_ld(j + 16, vsrc);
yading@11 149 vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
yading@11 150 vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
yading@11 151 vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
yading@11 152 vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
yading@11 153 vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
yading@11 154 vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
yading@11 155 vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
yading@11 156 vec_st(v_uyvy_0, (i << 1), dst);
yading@11 157 vec_st(v_uyvy_1, (i << 1) + 16, dst);
yading@11 158 vec_st(v_uyvy_2, (i << 1) + 32, dst);
yading@11 159 vec_st(v_uyvy_3, (i << 1) + 48, dst);
yading@11 160 }
yading@11 161 if (i < width) {
yading@11 162 const unsigned int j = i >> 1;
yading@11 163 vector unsigned char v_y1 = vec_ld(i, ysrc);
yading@11 164 vector unsigned char v_u = vec_ld(j, usrc);
yading@11 165 vector unsigned char v_v = vec_ld(j, vsrc);
yading@11 166 vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
yading@11 167 vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
yading@11 168 vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
yading@11 169 vec_st(v_uyvy_0, (i << 1), dst);
yading@11 170 vec_st(v_uyvy_1, (i << 1) + 16, dst);
yading@11 171 }
yading@11 172 if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
yading@11 173 usrc += chromStride;
yading@11 174 vsrc += chromStride;
yading@11 175 }
yading@11 176 ysrc += lumStride;
yading@11 177 dst += dstStride;
yading@11 178 }
yading@11 179 return srcSliceH;
yading@11 180 }
yading@11 181
yading@11 182 void ff_swscale_get_unscaled_altivec(SwsContext *c)
yading@11 183 {
yading@11 184 if ((av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) && !(c->srcW & 15) &&
yading@11 185 !(c->flags & SWS_BITEXACT) && c->srcFormat == AV_PIX_FMT_YUV420P) {
yading@11 186 enum AVPixelFormat dstFormat = c->dstFormat;
yading@11 187
yading@11 188 // unscaled YV12 -> packed YUV, we want speed
yading@11 189 if (dstFormat == AV_PIX_FMT_YUYV422)
yading@11 190 c->swScale = yv12toyuy2_unscaled_altivec;
yading@11 191 else if (dstFormat == AV_PIX_FMT_UYVY422)
yading@11 192 c->swScale = yv12touyvy_unscaled_altivec;
yading@11 193 }
yading@11 194 }