38 register vector
unsigned char pixelsv1, pixelsv2;
39 register vector
unsigned char pixelsv1B, pixelsv2B;
40 register vector
unsigned char pixelsv1C, pixelsv2C;
41 register vector
unsigned char pixelsv1D, pixelsv2D;
43 register vector
unsigned char perm = vec_lvsl(0, pixels);
45 register ptrdiff_t line_size_2 = line_size << 1;
46 register ptrdiff_t line_size_3 = line_size + line_size_2;
47 register ptrdiff_t line_size_4 = line_size << 2;
54 for (i = 0; i < h; i += 4) {
55 pixelsv1 = vec_ld( 0, pixels);
56 pixelsv2 = vec_ld(15, pixels);
57 pixelsv1B = vec_ld(line_size, pixels);
58 pixelsv2B = vec_ld(15 + line_size, pixels);
59 pixelsv1C = vec_ld(line_size_2, pixels);
60 pixelsv2C = vec_ld(15 + line_size_2, pixels);
61 pixelsv1D = vec_ld(line_size_3, pixels);
62 pixelsv2D = vec_ld(15 + line_size_3, pixels);
63 vec_st(vec_perm(pixelsv1, pixelsv2, perm),
64 0, (
unsigned char*)block);
65 vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),
66 line_size, (
unsigned char*)block);
67 vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),
68 line_size_2, (
unsigned char*)block);
69 vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),
70 line_size_3, (
unsigned char*)block);
77 #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) 80 register vector
unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
81 register vector
unsigned char perm = vec_lvsl(0, pixels);
84 for (i = 0; i < h; i++) {
85 pixelsv1 = vec_ld( 0, pixels);
86 pixelsv2 = vec_ld(16,pixels);
87 blockv = vec_ld(0, block);
88 pixelsv = vec_perm(pixelsv1, pixelsv2, perm);
89 blockv = vec_avg(blockv,pixelsv);
90 vec_st(blockv, 0, (
unsigned char*)block);
97 static void avg_pixels8_altivec(
uint8_t * block,
const uint8_t * pixels, ptrdiff_t line_size,
int h)
99 register vector
unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
102 for (i = 0; i < h; i++) {
105 int rightside = ((
unsigned long)block & 0x0000000F);
107 blockv = vec_ld(0, block);
108 pixelsv1 = vec_ld( 0, pixels);
109 pixelsv2 = vec_ld(16, pixels);
110 pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));
113 pixelsv = vec_perm(blockv, pixelsv,
vcprm(0,1,
s0,
s1));
115 pixelsv = vec_perm(blockv, pixelsv,
vcprm(
s0,
s1,2,3));
118 blockv = vec_avg(blockv, pixelsv);
120 vec_st(blockv, 0, block);
128 static void put_pixels8_xy2_altivec(
uint8_t *block,
const uint8_t *pixels, ptrdiff_t line_size,
int h)
131 register vector
unsigned char pixelsv1, pixelsv2, pixelsavg;
132 register vector
unsigned char blockv, temp1, temp2;
133 register vector
unsigned short pixelssum1, pixelssum2, temp3;
134 register const vector
unsigned char vczero = (
const vector
unsigned char)vec_splat_u8(0);
135 register const vector
unsigned short vctwo = (
const vector
unsigned short)vec_splat_u16(2);
137 temp1 = vec_ld(0, pixels);
138 temp2 = vec_ld(16, pixels);
139 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
140 if ((((
unsigned long)pixels) & 0x0000000F) == 0x0000000F) {
143 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
145 pixelsv1 = vec_mergeh(vczero, pixelsv1);
146 pixelsv2 = vec_mergeh(vczero, pixelsv2);
147 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
148 (vector
unsigned short)pixelsv2);
149 pixelssum1 = vec_add(pixelssum1, vctwo);
151 for (i = 0; i < h ; i++) {
152 int rightside = ((
unsigned long)block & 0x0000000F);
153 blockv = vec_ld(0, block);
155 temp1 = vec_ld(line_size, pixels);
156 temp2 = vec_ld(line_size + 16, pixels);
157 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
158 if (((((
unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) {
161 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
164 pixelsv1 = vec_mergeh(vczero, pixelsv1);
165 pixelsv2 = vec_mergeh(vczero, pixelsv2);
166 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
167 (vector
unsigned short)pixelsv2);
168 temp3 = vec_add(pixelssum1, pixelssum2);
169 temp3 = vec_sra(temp3, vctwo);
170 pixelssum1 = vec_add(pixelssum2, vctwo);
171 pixelsavg = vec_packsu(temp3, (vector
unsigned short) vczero);
174 blockv = vec_perm(blockv, pixelsavg,
vcprm(0, 1,
s0,
s1));
176 blockv = vec_perm(blockv, pixelsavg,
vcprm(
s0,
s1, 2, 3));
179 vec_st(blockv, 0, block);
187 static void put_no_rnd_pixels8_xy2_altivec(
uint8_t *block,
const uint8_t *pixels, ptrdiff_t line_size,
int h)
190 register vector
unsigned char pixelsv1, pixelsv2, pixelsavg;
191 register vector
unsigned char blockv, temp1, temp2;
192 register vector
unsigned short pixelssum1, pixelssum2, temp3;
193 register const vector
unsigned char vczero = (
const vector
unsigned char)vec_splat_u8(0);
194 register const vector
unsigned short vcone = (
const vector
unsigned short)vec_splat_u16(1);
195 register const vector
unsigned short vctwo = (
const vector
unsigned short)vec_splat_u16(2);
197 temp1 = vec_ld(0, pixels);
198 temp2 = vec_ld(16, pixels);
199 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
200 if ((((
unsigned long)pixels) & 0x0000000F) == 0x0000000F) {
203 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
205 pixelsv1 = vec_mergeh(vczero, pixelsv1);
206 pixelsv2 = vec_mergeh(vczero, pixelsv2);
207 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
208 (vector
unsigned short)pixelsv2);
209 pixelssum1 = vec_add(pixelssum1, vcone);
211 for (i = 0; i < h ; i++) {
212 int rightside = ((
unsigned long)block & 0x0000000F);
213 blockv = vec_ld(0, block);
215 temp1 = vec_ld(line_size, pixels);
216 temp2 = vec_ld(line_size + 16, pixels);
217 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
218 if (((((
unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) {
221 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
224 pixelsv1 = vec_mergeh(vczero, pixelsv1);
225 pixelsv2 = vec_mergeh(vczero, pixelsv2);
226 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
227 (vector
unsigned short)pixelsv2);
228 temp3 = vec_add(pixelssum1, pixelssum2);
229 temp3 = vec_sra(temp3, vctwo);
230 pixelssum1 = vec_add(pixelssum2, vcone);
231 pixelsavg = vec_packsu(temp3, (vector
unsigned short) vczero);
234 blockv = vec_perm(blockv, pixelsavg,
vcprm(0, 1,
s0,
s1));
236 blockv = vec_perm(blockv, pixelsavg,
vcprm(
s0,
s1, 2, 3));
239 vec_st(blockv, 0, block);
247 static void put_pixels16_xy2_altivec(
uint8_t * block,
const uint8_t * pixels, ptrdiff_t line_size,
int h)
250 register vector
unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
251 register vector
unsigned char blockv, temp1, temp2;
252 register vector
unsigned short temp3, temp4,
253 pixelssum1, pixelssum2, pixelssum3, pixelssum4;
254 register const vector
unsigned char vczero = (
const vector
unsigned char)vec_splat_u8(0);
255 register const vector
unsigned short vctwo = (
const vector
unsigned short)vec_splat_u16(2);
257 temp1 = vec_ld(0, pixels);
258 temp2 = vec_ld(16, pixels);
259 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
260 if ((((
unsigned long)pixels) & 0x0000000F) == 0x0000000F) {
263 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
265 pixelsv3 = vec_mergel(vczero, pixelsv1);
266 pixelsv4 = vec_mergel(vczero, pixelsv2);
267 pixelsv1 = vec_mergeh(vczero, pixelsv1);
268 pixelsv2 = vec_mergeh(vczero, pixelsv2);
269 pixelssum3 = vec_add((vector
unsigned short)pixelsv3,
270 (vector
unsigned short)pixelsv4);
271 pixelssum3 = vec_add(pixelssum3, vctwo);
272 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
273 (vector
unsigned short)pixelsv2);
274 pixelssum1 = vec_add(pixelssum1, vctwo);
276 for (i = 0; i < h ; i++) {
277 blockv = vec_ld(0, block);
279 temp1 = vec_ld(line_size, pixels);
280 temp2 = vec_ld(line_size + 16, pixels);
281 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
282 if (((((
unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) {
285 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
288 pixelsv3 = vec_mergel(vczero, pixelsv1);
289 pixelsv4 = vec_mergel(vczero, pixelsv2);
290 pixelsv1 = vec_mergeh(vczero, pixelsv1);
291 pixelsv2 = vec_mergeh(vczero, pixelsv2);
293 pixelssum4 = vec_add((vector
unsigned short)pixelsv3,
294 (vector
unsigned short)pixelsv4);
295 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
296 (vector
unsigned short)pixelsv2);
297 temp4 = vec_add(pixelssum3, pixelssum4);
298 temp4 = vec_sra(temp4, vctwo);
299 temp3 = vec_add(pixelssum1, pixelssum2);
300 temp3 = vec_sra(temp3, vctwo);
302 pixelssum3 = vec_add(pixelssum4, vctwo);
303 pixelssum1 = vec_add(pixelssum2, vctwo);
305 blockv = vec_packsu(temp3, temp4);
307 vec_st(blockv, 0, block);
315 static void put_no_rnd_pixels16_xy2_altivec(
uint8_t * block,
const uint8_t * pixels, ptrdiff_t line_size,
int h)
318 register vector
unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
319 register vector
unsigned char blockv, temp1, temp2;
320 register vector
unsigned short temp3, temp4,
321 pixelssum1, pixelssum2, pixelssum3, pixelssum4;
322 register const vector
unsigned char vczero = (
const vector
unsigned char)vec_splat_u8(0);
323 register const vector
unsigned short vcone = (
const vector
unsigned short)vec_splat_u16(1);
324 register const vector
unsigned short vctwo = (
const vector
unsigned short)vec_splat_u16(2);
326 temp1 = vec_ld(0, pixels);
327 temp2 = vec_ld(16, pixels);
328 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
329 if ((((
unsigned long)pixels) & 0x0000000F) == 0x0000000F) {
332 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
334 pixelsv3 = vec_mergel(vczero, pixelsv1);
335 pixelsv4 = vec_mergel(vczero, pixelsv2);
336 pixelsv1 = vec_mergeh(vczero, pixelsv1);
337 pixelsv2 = vec_mergeh(vczero, pixelsv2);
338 pixelssum3 = vec_add((vector
unsigned short)pixelsv3,
339 (vector
unsigned short)pixelsv4);
340 pixelssum3 = vec_add(pixelssum3, vcone);
341 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
342 (vector
unsigned short)pixelsv2);
343 pixelssum1 = vec_add(pixelssum1, vcone);
345 for (i = 0; i < h ; i++) {
346 blockv = vec_ld(0, block);
348 temp1 = vec_ld(line_size, pixels);
349 temp2 = vec_ld(line_size + 16, pixels);
350 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
351 if (((((
unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) {
354 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
357 pixelsv3 = vec_mergel(vczero, pixelsv1);
358 pixelsv4 = vec_mergel(vczero, pixelsv2);
359 pixelsv1 = vec_mergeh(vczero, pixelsv1);
360 pixelsv2 = vec_mergeh(vczero, pixelsv2);
362 pixelssum4 = vec_add((vector
unsigned short)pixelsv3,
363 (vector
unsigned short)pixelsv4);
364 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
365 (vector
unsigned short)pixelsv2);
366 temp4 = vec_add(pixelssum3, pixelssum4);
367 temp4 = vec_sra(temp4, vctwo);
368 temp3 = vec_add(pixelssum1, pixelssum2);
369 temp3 = vec_sra(temp3, vctwo);
371 pixelssum3 = vec_add(pixelssum4, vcone);
372 pixelssum1 = vec_add(pixelssum2, vcone);
374 blockv = vec_packsu(temp3, temp4);
376 vec_st(blockv, 0, block);
384 static void avg_pixels8_xy2_altivec(
uint8_t *block,
const uint8_t *pixels, ptrdiff_t line_size,
int h)
387 register vector
unsigned char pixelsv1, pixelsv2, pixelsavg;
388 register vector
unsigned char blockv, temp1, temp2, blocktemp;
389 register vector
unsigned short pixelssum1, pixelssum2, temp3;
391 register const vector
unsigned char vczero = (
const vector
unsigned char)
393 register const vector
unsigned short vctwo = (
const vector
unsigned short)
396 temp1 = vec_ld(0, pixels);
397 temp2 = vec_ld(16, pixels);
398 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
399 if ((((
unsigned long)pixels) & 0x0000000F) == 0x0000000F) {
402 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
404 pixelsv1 = vec_mergeh(vczero, pixelsv1);
405 pixelsv2 = vec_mergeh(vczero, pixelsv2);
406 pixelssum1 = vec_add((vector
unsigned short)pixelsv1,
407 (vector
unsigned short)pixelsv2);
408 pixelssum1 = vec_add(pixelssum1, vctwo);
410 for (i = 0; i < h ; i++) {
411 int rightside = ((
unsigned long)block & 0x0000000F);
412 blockv = vec_ld(0, block);
414 temp1 = vec_ld(line_size, pixels);
415 temp2 = vec_ld(line_size + 16, pixels);
416 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
417 if (((((
unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) {
420 pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
423 pixelsv1 = vec_mergeh(vczero, pixelsv1);
424 pixelsv2 = vec_mergeh(vczero, pixelsv2);
425 pixelssum2 = vec_add((vector
unsigned short)pixelsv1,
426 (vector
unsigned short)pixelsv2);
427 temp3 = vec_add(pixelssum1, pixelssum2);
428 temp3 = vec_sra(temp3, vctwo);
429 pixelssum1 = vec_add(pixelssum2, vctwo);
430 pixelsavg = vec_packsu(temp3, (vector
unsigned short) vczero);
433 blocktemp = vec_perm(blockv, pixelsavg,
vcprm(0, 1,
s0,
s1));
435 blocktemp = vec_perm(blockv, pixelsavg,
vcprm(
s0,
s1, 2, 3));
438 blockv = vec_avg(blocktemp, blockv);
439 vec_st(blockv, 0, block);
#define AV_CPU_FLAG_ALTIVEC
standard
op_pixels_func avg_pixels_tab[4][4]
Halfpel motion compensation with rounding (a+b+1)>>1.
void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
#define vcprm(a, b, c, d)
op_pixels_func put_pixels_tab[4][4]
Halfpel motion compensation with rounding (a+b+1)>>1.
op_pixels_func put_no_rnd_pixels_tab[4][4]
Halfpel motion compensation with no rounding (a+b)>>1.
synthesis window for stochastic i
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Contains misc utility macros and inline functions.
av_cold void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags)