33 #define vzero vec_splat_s32(0) 35 #define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do { \ 36 vector signed short l2 = vec_ld(((x) << 1) + 16, src); \ 37 vector signed short ls = vec_perm(l1, l2, perm); \ 38 vector signed int i1 = vec_mule(filter, ls); \ 39 vector signed int i2 = vec_mulo(filter, ls); \ 40 vector signed int vf1 = vec_mergeh(i1, i2); \ 41 vector signed int vf2 = vec_mergel(i1, i2); \ 42 d1 = vec_add(d1, vf1); \ 43 d2 = vec_add(d2, vf2); \ 53 vector
signed int vo1, vo2, vo3, vo4;
54 vector
unsigned short vs1, vs2;
55 vector
unsigned char vf;
56 vector
unsigned int altivec_vectorShiftInt19 =
57 vec_add(vec_splat_u32(10), vec_splat_u32(9));
59 for (i = 0; i < 16; i++)
60 val[i] = dither[(x + i + offset) & 7] << 12;
63 vo2 = vec_ld(16,
val);
64 vo3 = vec_ld(32,
val);
65 vo4 = vec_ld(48,
val);
67 for (j = 0; j < filterSize; j++) {
68 vector
signed short l1,
vLumFilter = vec_ld(j << 1, filter);
69 vector
unsigned char perm, perm0 = vec_lvsl(j << 1, filter);
70 vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0);
71 vLumFilter = vec_splat(vLumFilter, 0);
73 perm = vec_lvsl(x << 1, src[j]);
74 l1 = vec_ld(x << 1, src[j]);
77 yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter);
80 vo1 = vec_sra(vo1, altivec_vectorShiftInt19);
81 vo2 = vec_sra(vo2, altivec_vectorShiftInt19);
82 vo3 = vec_sra(vo3, altivec_vectorShiftInt19);
83 vo4 = vec_sra(vo4, altivec_vectorShiftInt19);
84 vs1 = vec_packsu(vo1, vo2);
85 vs2 = vec_packsu(vo3, vo4);
86 vf = vec_packsu(vs1, vs2);
96 for (i = x; i <
dstW; i++) {
97 int t = dither[(i +
offset) & 7] << 12;
98 for (j = 0; j < filterSize; j++)
99 t += src[j][i] * filter[j];
100 dest[
i] = av_clip_uint8(t >> 19);
108 int dst_u = -(uintptr_t)dest & 15;
111 yuv2planeX_u(filter, filterSize, src, dest, dst_u, dither, offset, 0);
113 for (i = dst_u; i < dstW - 15; i += 16)
117 yuv2planeX_u(filter, filterSize, src, dest, dstW, dither, offset, i);
122 const int32_t *filterPos,
int filterSize)
127 if (filterSize % 4) {
128 for (i = 0; i <
dstW; i++) {
130 register int srcPos = filterPos[
i];
131 register int val = 0;
132 for (j = 0; j < filterSize; j++)
133 val += ((
int)src[srcPos + j]) * filter[filterSize * i + j];
134 dst[
i] =
FFMIN(val >> 7, (1 << 15) - 1);
137 switch (filterSize) {
139 for (i = 0; i <
dstW; i++) {
140 register int srcPos = filterPos[
i];
142 vector
unsigned char src_v0 = vec_ld(srcPos, src);
143 vector
unsigned char src_v1, src_vF;
144 vector
signed short src_v, filter_v;
145 vector
signed int val_vEven, val_s;
146 if ((((uintptr_t)src + srcPos) % 16) > 12) {
147 src_v1 = vec_ld(srcPos + 16, src);
149 src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
152 (vector
signed short)(vec_mergeh((vector
unsigned char)
vzero, src_vF));
154 src_v = vec_mergeh(src_v, (vector
signed short)vzero);
156 filter_v = vec_ld(i << 3, filter);
164 filter_v = vec_mergel(filter_v, (vector
signed short)vzero);
166 filter_v = vec_mergeh(filter_v, (vector
signed short)vzero);
168 val_vEven = vec_mule(src_v, filter_v);
169 val_s = vec_sums(val_vEven, vzero);
170 vec_st(val_s, 0, tempo);
171 dst[
i] =
FFMIN(tempo[3] >> 7, (1 << 15) - 1);
176 for (i = 0; i <
dstW; i++) {
177 register int srcPos = filterPos[
i];
179 vector
unsigned char src_v0 = vec_ld(srcPos, src);
180 vector
unsigned char src_v1, src_vF;
181 vector
signed short src_v, filter_v;
182 vector
signed int val_v, val_s;
183 if ((((uintptr_t)src + srcPos) % 16) > 8) {
184 src_v1 = vec_ld(srcPos + 16, src);
186 src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
189 (vector
signed short)(vec_mergeh((vector
unsigned char)
vzero, src_vF));
190 filter_v = vec_ld(i << 4, filter);
193 val_v = vec_msums(src_v, filter_v, (vector
signed int)vzero);
194 val_s = vec_sums(val_v, vzero);
195 vec_st(val_s, 0, tempo);
196 dst[
i] =
FFMIN(tempo[3] >> 7, (1 << 15) - 1);
201 for (i = 0; i <
dstW; i++) {
202 register int srcPos = filterPos[
i];
204 vector
unsigned char src_v0 = vec_ld(srcPos, src);
205 vector
unsigned char src_v1 = vec_ld(srcPos + 16, src);
206 vector
unsigned char src_vF = vec_perm(src_v0, src_v1, vec_lvsl(srcPos, src));
208 vector
signed short src_vA =
209 (vector
signed short)(vec_mergeh((vector
unsigned char)
vzero, src_vF));
210 vector
signed short src_vB =
211 (vector
signed short)(vec_mergel((vector
unsigned char)
vzero, src_vF));
213 vector
signed short filter_v0 = vec_ld(i << 5, filter);
214 vector
signed short filter_v1 = vec_ld((i << 5) + 16, filter);
217 vector
signed int val_acc = vec_msums(src_vA, filter_v0, (vector
signed int)vzero);
218 vector
signed int val_v = vec_msums(src_vB, filter_v1, val_acc);
220 vector
signed int val_s = vec_sums(val_v, vzero);
222 vec_st(val_s, 0, tempo);
223 dst[
i] =
FFMIN(tempo[3] >> 7, (1 << 15) - 1);
228 for (i = 0; i <
dstW; i++) {
230 register int srcPos = filterPos[
i];
232 vector
signed int val_s, val_v = (vector
signed int)
vzero;
233 vector
signed short filter_v0R = vec_ld(i * 2 * filterSize, filter);
234 vector
unsigned char permF = vec_lvsl((i * 2 * filterSize), filter);
236 vector
unsigned char src_v0 = vec_ld(srcPos, src);
237 vector
unsigned char permS = vec_lvsl(srcPos, src);
239 for (j = 0; j < filterSize - 15; j += 16) {
240 vector
unsigned char src_v1 = vec_ld(srcPos + j + 16, src);
241 vector
unsigned char src_vF = vec_perm(src_v0, src_v1, permS);
243 vector
signed short src_vA =
244 (vector
signed short)(vec_mergeh((vector
unsigned char)
vzero, src_vF));
245 vector
signed short src_vB =
246 (vector
signed short)(vec_mergel((vector
unsigned char)
vzero, src_vF));
248 vector
signed short filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
249 vector
signed short filter_v2R = vec_ld((i * 2 * filterSize) + (j * 2) + 32, filter);
250 vector
signed short filter_v0 = vec_perm(filter_v0R, filter_v1R, permF);
251 vector
signed short filter_v1 = vec_perm(filter_v1R, filter_v2R, permF);
253 vector
signed int val_acc = vec_msums(src_vA, filter_v0, val_v);
254 val_v = vec_msums(src_vB, filter_v1, val_acc);
256 filter_v0R = filter_v2R;
260 if (j < filterSize - 7) {
263 vector
unsigned char src_v1, src_vF;
264 vector
signed short src_v, filter_v1R, filter_v;
265 if ((((uintptr_t)src + srcPos) % 16) > 8) {
266 src_v1 = vec_ld(srcPos + j + 16, src);
268 src_vF = vec_perm(src_v0, src_v1, permS);
271 (vector
signed short)(vec_mergeh((vector
unsigned char)
vzero, src_vF));
274 filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
275 filter_v = vec_perm(filter_v0R, filter_v1R, permF);
277 val_v = vec_msums(src_v, filter_v, val_v);
280 val_s = vec_sums(val_v,
vzero);
282 vec_st(val_s, 0, tempo);
283 dst[
i] =
FFMIN(tempo[3] >> 7, (1 << 15) - 1);
int16_t ** alpPixBuf
Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
static void yuv2planeX_u(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset, int x)
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
#define AV_CPU_FLAG_ALTIVEC
standard
packed RGB 8:8:8, 24bpp, RGBRGB...
#define DECLARE_ALIGNED(n, t, v)
#define SWS_FULL_CHR_H_INT
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
void(* hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Scale one horizontal line of input data using a filter over the input lines, to produce one (differen...
Macro definitions for various function/variable attributes.
#define yuv2planeX_8(d1, d2, l1, src, x, perm, filter)
packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
the mask is usually to keep the same permissions Filters should remove permissions on reference they give to output whenever necessary It can be automatically done by setting the rej_perms field on the output pad Here are a few guidelines corresponding to common then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
enum AVPixelFormat dstFormat
Destination pixel format.
yuv2packedX_fn yuv2packedX
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
static const uint8_t offset[127][2]
packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
as above, but U and V bytes are swapped
static av_always_inline int is9_OR_10BPS(enum AVPixelFormat pix_fmt)
packed RGB 8:8:8, 24bpp, BGRBGR...
static void yuv2planeX_altivec(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
int dstW
Width of destination luma/alpha planes.
yuv2planarX_fn yuv2planeX
synthesis window for stochastic i
static void yuv2planeX_16_altivec(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, const uint8_t *dither, int offset, int x)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
av_cold void ff_sws_init_swScale_altivec(SwsContext *c)
else dst[i][x+y *dst_stride[i]]
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
int flags
Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
AVPixelFormat
Pixel format.