yading@10
|
1 /*
|
yading@10
|
2 * Alpha optimized DSP utils
|
yading@10
|
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
yading@10
|
4 *
|
yading@10
|
5 * This file is part of FFmpeg.
|
yading@10
|
6 *
|
yading@10
|
7 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
8 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
9 * License as published by the Free Software Foundation; either
|
yading@10
|
10 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
11 *
|
yading@10
|
12 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
15 * Lesser General Public License for more details.
|
yading@10
|
16 *
|
yading@10
|
17 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
18 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
20 */
|
yading@10
|
21
|
yading@10
|
22 #include "libavutil/attributes.h"
|
yading@10
|
23 #include "libavcodec/hpeldsp.h"
|
yading@10
|
24 #include "hpeldsp_alpha.h"
|
yading@10
|
25 #include "asm.h"
|
yading@10
|
26
|
yading@10
|
27 static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
|
yading@10
|
28 {
|
yading@10
|
29 return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
|
yading@10
|
30 }
|
yading@10
|
31
|
yading@10
|
32 static inline uint64_t avg2(uint64_t a, uint64_t b)
|
yading@10
|
33 {
|
yading@10
|
34 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
|
yading@10
|
35 }
|
yading@10
|
36
|
yading@10
|
37 #if 0
|
yading@10
|
38 /* The XY2 routines basically utilize this scheme, but reuse parts in
|
yading@10
|
39 each iteration. */
|
yading@10
|
40 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
|
yading@10
|
41 {
|
yading@10
|
42 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
|
yading@10
|
43 + ((l2 & ~BYTE_VEC(0x03)) >> 2)
|
yading@10
|
44 + ((l3 & ~BYTE_VEC(0x03)) >> 2)
|
yading@10
|
45 + ((l4 & ~BYTE_VEC(0x03)) >> 2);
|
yading@10
|
46 uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
|
yading@10
|
47 + (l2 & BYTE_VEC(0x03))
|
yading@10
|
48 + (l3 & BYTE_VEC(0x03))
|
yading@10
|
49 + (l4 & BYTE_VEC(0x03))
|
yading@10
|
50 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
|
yading@10
|
51 return r1 + r2;
|
yading@10
|
52 }
|
yading@10
|
53 #endif
|
yading@10
|
54
|
yading@10
|
55 #define OP(LOAD, STORE) \
|
yading@10
|
56 do { \
|
yading@10
|
57 STORE(LOAD(pixels), block); \
|
yading@10
|
58 pixels += line_size; \
|
yading@10
|
59 block += line_size; \
|
yading@10
|
60 } while (--h)
|
yading@10
|
61
|
yading@10
|
62 #define OP_X2(LOAD, STORE) \
|
yading@10
|
63 do { \
|
yading@10
|
64 uint64_t pix1, pix2; \
|
yading@10
|
65 \
|
yading@10
|
66 pix1 = LOAD(pixels); \
|
yading@10
|
67 pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
yading@10
|
68 STORE(AVG2(pix1, pix2), block); \
|
yading@10
|
69 pixels += line_size; \
|
yading@10
|
70 block += line_size; \
|
yading@10
|
71 } while (--h)
|
yading@10
|
72
|
yading@10
|
73 #define OP_Y2(LOAD, STORE) \
|
yading@10
|
74 do { \
|
yading@10
|
75 uint64_t pix = LOAD(pixels); \
|
yading@10
|
76 do { \
|
yading@10
|
77 uint64_t next_pix; \
|
yading@10
|
78 \
|
yading@10
|
79 pixels += line_size; \
|
yading@10
|
80 next_pix = LOAD(pixels); \
|
yading@10
|
81 STORE(AVG2(pix, next_pix), block); \
|
yading@10
|
82 block += line_size; \
|
yading@10
|
83 pix = next_pix; \
|
yading@10
|
84 } while (--h); \
|
yading@10
|
85 } while (0)
|
yading@10
|
86
|
yading@10
|
87 #define OP_XY2(LOAD, STORE) \
|
yading@10
|
88 do { \
|
yading@10
|
89 uint64_t pix1 = LOAD(pixels); \
|
yading@10
|
90 uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
yading@10
|
91 uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
|
yading@10
|
92 + (pix2 & BYTE_VEC(0x03)); \
|
yading@10
|
93 uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
|
yading@10
|
94 + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
|
yading@10
|
95 \
|
yading@10
|
96 do { \
|
yading@10
|
97 uint64_t npix1, npix2; \
|
yading@10
|
98 uint64_t npix_l, npix_h; \
|
yading@10
|
99 uint64_t avg; \
|
yading@10
|
100 \
|
yading@10
|
101 pixels += line_size; \
|
yading@10
|
102 npix1 = LOAD(pixels); \
|
yading@10
|
103 npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
|
yading@10
|
104 npix_l = (npix1 & BYTE_VEC(0x03)) \
|
yading@10
|
105 + (npix2 & BYTE_VEC(0x03)); \
|
yading@10
|
106 npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
|
yading@10
|
107 + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
|
yading@10
|
108 avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
|
yading@10
|
109 + pix_h + npix_h; \
|
yading@10
|
110 STORE(avg, block); \
|
yading@10
|
111 \
|
yading@10
|
112 block += line_size; \
|
yading@10
|
113 pix_l = npix_l; \
|
yading@10
|
114 pix_h = npix_h; \
|
yading@10
|
115 } while (--h); \
|
yading@10
|
116 } while (0)
|
yading@10
|
117
|
yading@10
|
118 #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
|
yading@10
|
119 static void OPNAME ## _pixels ## SUFF ## _axp \
|
yading@10
|
120 (uint8_t *restrict block, const uint8_t *restrict pixels, \
|
yading@10
|
121 ptrdiff_t line_size, int h) \
|
yading@10
|
122 { \
|
yading@10
|
123 if ((size_t) pixels & 0x7) { \
|
yading@10
|
124 OPKIND(uldq, STORE); \
|
yading@10
|
125 } else { \
|
yading@10
|
126 OPKIND(ldq, STORE); \
|
yading@10
|
127 } \
|
yading@10
|
128 } \
|
yading@10
|
129 \
|
yading@10
|
130 static void OPNAME ## _pixels16 ## SUFF ## _axp \
|
yading@10
|
131 (uint8_t *restrict block, const uint8_t *restrict pixels, \
|
yading@10
|
132 ptrdiff_t line_size, int h) \
|
yading@10
|
133 { \
|
yading@10
|
134 OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
|
yading@10
|
135 OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
|
yading@10
|
136 }
|
yading@10
|
137
|
yading@10
|
138 #define PIXOP(OPNAME, STORE) \
|
yading@10
|
139 MAKE_OP(OPNAME, , OP, STORE) \
|
yading@10
|
140 MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
|
yading@10
|
141 MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
|
yading@10
|
142 MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
|
yading@10
|
143
|
yading@10
|
144 /* Rounding primitives. */
|
yading@10
|
145 #define AVG2 avg2
|
yading@10
|
146 #define AVG4 avg4
|
yading@10
|
147 #define AVG4_ROUNDER BYTE_VEC(0x02)
|
yading@10
|
148 #define STORE(l, b) stq(l, b)
|
yading@10
|
149 PIXOP(put, STORE);
|
yading@10
|
150
|
yading@10
|
151 #undef STORE
|
yading@10
|
152 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
|
yading@10
|
153 PIXOP(avg, STORE);
|
yading@10
|
154
|
yading@10
|
155 /* Not rounding primitives. */
|
yading@10
|
156 #undef AVG2
|
yading@10
|
157 #undef AVG4
|
yading@10
|
158 #undef AVG4_ROUNDER
|
yading@10
|
159 #undef STORE
|
yading@10
|
160 #define AVG2 avg2_no_rnd
|
yading@10
|
161 #define AVG4 avg4_no_rnd
|
yading@10
|
162 #define AVG4_ROUNDER BYTE_VEC(0x01)
|
yading@10
|
163 #define STORE(l, b) stq(l, b)
|
yading@10
|
164 PIXOP(put_no_rnd, STORE);
|
yading@10
|
165
|
yading@10
|
166 #undef STORE
|
yading@10
|
167 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
|
yading@10
|
168 PIXOP(avg_no_rnd, STORE);
|
yading@10
|
169
|
yading@10
|
170 static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
|
yading@10
|
171 ptrdiff_t line_size, int h)
|
yading@10
|
172 {
|
yading@10
|
173 put_pixels_axp_asm(block, pixels, line_size, h);
|
yading@10
|
174 put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
|
yading@10
|
175 }
|
yading@10
|
176
|
yading@10
|
177 av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags)
|
yading@10
|
178 {
|
yading@10
|
179 c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
|
yading@10
|
180 c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
|
yading@10
|
181 c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
|
yading@10
|
182 c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
|
yading@10
|
183
|
yading@10
|
184 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
|
yading@10
|
185 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
|
yading@10
|
186 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
|
yading@10
|
187 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
|
yading@10
|
188
|
yading@10
|
189 c->avg_pixels_tab[0][0] = avg_pixels16_axp;
|
yading@10
|
190 c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
|
yading@10
|
191 c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
|
yading@10
|
192 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
|
yading@10
|
193
|
yading@10
|
194 c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
|
yading@10
|
195 c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
|
yading@10
|
196 c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
|
yading@10
|
197 c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
|
yading@10
|
198
|
yading@10
|
199 c->put_pixels_tab[1][0] = put_pixels_axp_asm;
|
yading@10
|
200 c->put_pixels_tab[1][1] = put_pixels_x2_axp;
|
yading@10
|
201 c->put_pixels_tab[1][2] = put_pixels_y2_axp;
|
yading@10
|
202 c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
|
yading@10
|
203
|
yading@10
|
204 c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
|
yading@10
|
205 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
|
yading@10
|
206 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
|
yading@10
|
207 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
|
yading@10
|
208
|
yading@10
|
209 c->avg_pixels_tab[1][0] = avg_pixels_axp;
|
yading@10
|
210 c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
|
yading@10
|
211 c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
|
yading@10
|
212 c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
|
yading@10
|
213 }
|