dsputil_alpha.c
Go to the documentation of this file.
1 /*
2  * Alpha optimized DSP utils
3  * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "libavutil/attributes.h"
23 #include "libavcodec/dsputil.h"
24 #include "dsputil_alpha.h"
25 #include "asm.h"
26 
27 void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
28  int line_size);
29 void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
30  int line_size);
31 
32 #if 0
33 /* These functions were the base for the optimized assembler routines,
34  and remain here for documentation purposes. */
35 static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
36  ptrdiff_t line_size)
37 {
38  int i = 8;
39  uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
40 
41  do {
42  uint64_t shorts0, shorts1;
43 
44  shorts0 = ldq(block);
45  shorts0 = maxsw4(shorts0, 0);
46  shorts0 = minsw4(shorts0, clampmask);
47  stl(pkwb(shorts0), pixels);
48 
49  shorts1 = ldq(block + 4);
50  shorts1 = maxsw4(shorts1, 0);
51  shorts1 = minsw4(shorts1, clampmask);
52  stl(pkwb(shorts1), pixels + 4);
53 
54  pixels += line_size;
55  block += 8;
56  } while (--i);
57 }
58 
59 void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
60  ptrdiff_t line_size)
61 {
62  int h = 8;
63  /* Keep this function a leaf function by generating the constants
64  manually (mainly for the hack value ;-). */
65  uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
66  uint64_t signmask = zap(-1, 0x33);
67  signmask ^= signmask >> 1; /* 0x8000800080008000 */
68 
69  do {
70  uint64_t shorts0, pix0, signs0;
71  uint64_t shorts1, pix1, signs1;
72 
73  shorts0 = ldq(block);
74  shorts1 = ldq(block + 4);
75 
76  pix0 = unpkbw(ldl(pixels));
77  /* Signed subword add (MMX paddw). */
78  signs0 = shorts0 & signmask;
79  shorts0 &= ~signmask;
80  shorts0 += pix0;
81  shorts0 ^= signs0;
82  /* Clamp. */
83  shorts0 = maxsw4(shorts0, 0);
84  shorts0 = minsw4(shorts0, clampmask);
85 
86  /* Next 4. */
87  pix1 = unpkbw(ldl(pixels + 4));
88  signs1 = shorts1 & signmask;
89  shorts1 &= ~signmask;
90  shorts1 += pix1;
91  shorts1 ^= signs1;
92  shorts1 = maxsw4(shorts1, 0);
93  shorts1 = minsw4(shorts1, clampmask);
94 
95  stl(pkwb(shorts0), pixels);
96  stl(pkwb(shorts1), pixels + 4);
97 
98  pixels += line_size;
99  block += 8;
100  } while (--h);
101 }
102 #endif
103 
104 static void clear_blocks_axp(int16_t *blocks) {
105  uint64_t *p = (uint64_t *) blocks;
106  int n = sizeof(int16_t) * 6 * 64;
107 
108  do {
109  p[0] = 0;
110  p[1] = 0;
111  p[2] = 0;
112  p[3] = 0;
113  p[4] = 0;
114  p[5] = 0;
115  p[6] = 0;
116  p[7] = 0;
117  p += 8;
118  n -= 8 * 8;
119  } while (n);
120 }
121 
123 {
124  const int high_bit_depth = avctx->bits_per_raw_sample > 8;
125 
126  if (!high_bit_depth) {
128  }
129 
130  /* amask clears all bits that correspond to present features. */
131  if (amask(AMASK_MVI) == 0) {
134 
135  if (!high_bit_depth)
138  c->sad[0] = pix_abs16x16_mvi_asm;
139  c->sad[1] = pix_abs8x8_mvi;
140  c->pix_abs[0][0] = pix_abs16x16_mvi_asm;
141  c->pix_abs[1][0] = pix_abs8x8_mvi;
142  c->pix_abs[0][1] = pix_abs16x16_x2_mvi;
143  c->pix_abs[0][2] = pix_abs16x16_y2_mvi;
144  c->pix_abs[0][3] = pix_abs16x16_xy2_mvi;
145  }
146 
149 
150  if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 &&
151  (avctx->idct_algo == FF_IDCT_AUTO ||
152  avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) {
156  }
157 }
int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
int bits_per_raw_sample
Bits per sample/pixel of internal libavcodec pixel/sample format.
Macro definitions for various function/variable attributes.
void(* get_pixels)(int16_t *block, const uint8_t *pixels, int line_size)
Definition: dsputil.h:129
void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block)
uint8_t
#define av_cold
Definition: attributes.h:78
int pix_abs16x16_mvi_asm(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, int line_size)
int lowres
low resolution decoding, 1-> 1/2 size, 2->1/4 size
#define AMASK_MVI
int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
#define FF_IDCT_SIMPLEALPHA
int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
void(* clear_blocks)(int16_t *blocks)
Definition: dsputil.h:146
void(* add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, int line_size)
Definition: dsputil_alpha.c:29
int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx)
int idct_algo
IDCT algorithm, see FF_IDCT_* below.
void(* put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
Definition: dsputil.h:131
void(* add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
Definition: dsputil.h:133
#define FF_IDCT_AUTO
void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block)
typedef void(RENAME(mix_any_func_type))
void(* idct_add)(uint8_t *dest, int line_size, int16_t *block)
block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
Definition: dsputil.h:235
main external API structure.
synthesis window for stochastic i
void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2, int stride)
void get_pixels_mvi(int16_t *restrict block, const uint8_t *restrict pixels, int line_size)
void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, int line_size)
void(* idct)(int16_t *block)
Definition: dsputil.h:222
void(* put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, int line_size)
Definition: dsputil_alpha.c:27
static double c[64]
me_cmp_func sad[6]
Definition: dsputil.h:151
static void clear_blocks_axp(int16_t *blocks)
DSP utils.
void(* idct_put)(uint8_t *dest, int line_size, int16_t *block)
block -> idct -> clip to unsigned 8 bit -> dest.
Definition: dsputil.h:229
void ff_simple_idct_axp(int16_t *block)
void(* diff_pixels)(int16_t *block, const uint8_t *s1, const uint8_t *s2, int stride)
Definition: dsputil.h:130
me_cmp_func pix_abs[2][4]
Definition: dsputil.h:194
DSPContext.
Definition: dsputil.h:127