yading@10
|
1 /*
|
yading@10
|
2 * aligned/packed access motion
|
yading@10
|
3 *
|
yading@10
|
4 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
|
yading@10
|
5 *
|
yading@10
|
6 * This file is part of FFmpeg.
|
yading@10
|
7 *
|
yading@10
|
8 * FFmpeg is free software; you can redistribute it and/or
|
yading@10
|
9 * modify it under the terms of the GNU Lesser General Public
|
yading@10
|
10 * License as published by the Free Software Foundation; either
|
yading@10
|
11 * version 2.1 of the License, or (at your option) any later version.
|
yading@10
|
12 *
|
yading@10
|
13 * FFmpeg is distributed in the hope that it will be useful,
|
yading@10
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
yading@10
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
yading@10
|
16 * Lesser General Public License for more details.
|
yading@10
|
17 *
|
yading@10
|
18 * You should have received a copy of the GNU Lesser General Public
|
yading@10
|
19 * License along with FFmpeg; if not, write to the Free Software
|
yading@10
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
yading@10
|
21 */
|
yading@10
|
22
|
yading@10
|
23 #include "libavutil/attributes.h"
|
yading@10
|
24 #include "libavcodec/avcodec.h"
|
yading@10
|
25 #include "libavcodec/dsputil.h"
|
yading@10
|
26 #include "libavcodec/bit_depth_template.c" // for BYTE_VEC32
|
yading@10
|
27 #include "libavcodec/hpeldsp.h"
|
yading@10
|
28 #include "libavcodec/rnd_avg.h"
|
yading@10
|
29 #include "dsputil_sh4.h"
|
yading@10
|
30
|
yading@10
|
31
|
yading@10
|
32 #define LP(p) *(uint32_t*)(p)
|
yading@10
|
33 #define LPC(p) *(const uint32_t*)(p)
|
yading@10
|
34
|
yading@10
|
35
|
yading@10
|
36 #define UNPACK(ph,pl,tt0,tt1) do { \
|
yading@10
|
37 uint32_t t0,t1; t0=tt0;t1=tt1; \
|
yading@10
|
38 ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \
|
yading@10
|
39 pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0)
|
yading@10
|
40
|
yading@10
|
41 #define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03))
|
yading@10
|
42 #define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03))
|
yading@10
|
43
|
yading@10
|
44 /* little-endian */
|
yading@10
|
45 #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) )
|
yading@10
|
46 #define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) )
|
yading@10
|
47 /* big
|
yading@10
|
48 #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) )
|
yading@10
|
49 #define MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) )
|
yading@10
|
50 */
|
yading@10
|
51
|
yading@10
|
52
|
yading@10
|
53 #define put(d,s) d = s
|
yading@10
|
54 #define avg(d,s) d = rnd_avg32(s,d)
|
yading@10
|
55
|
yading@10
|
56 #define OP_C4(ofs) \
|
yading@10
|
57 ref-=ofs; \
|
yading@10
|
58 do { \
|
yading@10
|
59 OP(LP(dest),MERGE1(LPC(ref),LPC(ref+4),ofs)); \
|
yading@10
|
60 ref+=stride; \
|
yading@10
|
61 dest+=stride; \
|
yading@10
|
62 } while(--height)
|
yading@10
|
63
|
yading@10
|
64 #define OP_C40() \
|
yading@10
|
65 do { \
|
yading@10
|
66 OP(LP(dest),LPC(ref)); \
|
yading@10
|
67 ref+=stride; \
|
yading@10
|
68 dest+=stride; \
|
yading@10
|
69 } while(--height)
|
yading@10
|
70
|
yading@10
|
71
|
yading@10
|
72 #define OP put
|
yading@10
|
73
|
yading@10
|
74 static void put_pixels4_c(uint8_t *dest, const uint8_t *ref,
|
yading@10
|
75 const int stride, int height)
|
yading@10
|
76 {
|
yading@10
|
77 switch((int)ref&3){
|
yading@10
|
78 case 0: OP_C40(); return;
|
yading@10
|
79 case 1: OP_C4(1); return;
|
yading@10
|
80 case 2: OP_C4(2); return;
|
yading@10
|
81 case 3: OP_C4(3); return;
|
yading@10
|
82 }
|
yading@10
|
83 }
|
yading@10
|
84
|
yading@10
|
85 #undef OP
|
yading@10
|
86 #define OP avg
|
yading@10
|
87
|
yading@10
|
88 static void avg_pixels4_c(uint8_t *dest, const uint8_t *ref,
|
yading@10
|
89 const int stride, int height)
|
yading@10
|
90 {
|
yading@10
|
91 switch((int)ref&3){
|
yading@10
|
92 case 0: OP_C40(); return;
|
yading@10
|
93 case 1: OP_C4(1); return;
|
yading@10
|
94 case 2: OP_C4(2); return;
|
yading@10
|
95 case 3: OP_C4(3); return;
|
yading@10
|
96 }
|
yading@10
|
97 }
|
yading@10
|
98
|
yading@10
|
99 #undef OP
|
yading@10
|
100
|
yading@10
|
101 #define OP_C(ofs,sz,avg2) \
|
yading@10
|
102 { \
|
yading@10
|
103 ref-=ofs; \
|
yading@10
|
104 do { \
|
yading@10
|
105 uint32_t t0,t1; \
|
yading@10
|
106 t0 = LPC(ref+0); \
|
yading@10
|
107 t1 = LPC(ref+4); \
|
yading@10
|
108 OP(LP(dest+0), MERGE1(t0,t1,ofs)); \
|
yading@10
|
109 t0 = LPC(ref+8); \
|
yading@10
|
110 OP(LP(dest+4), MERGE1(t1,t0,ofs)); \
|
yading@10
|
111 if (sz==16) { \
|
yading@10
|
112 t1 = LPC(ref+12); \
|
yading@10
|
113 OP(LP(dest+8), MERGE1(t0,t1,ofs)); \
|
yading@10
|
114 t0 = LPC(ref+16); \
|
yading@10
|
115 OP(LP(dest+12), MERGE1(t1,t0,ofs)); \
|
yading@10
|
116 } \
|
yading@10
|
117 ref+=stride; \
|
yading@10
|
118 dest+= stride; \
|
yading@10
|
119 } while(--height); \
|
yading@10
|
120 }
|
yading@10
|
121
|
yading@10
|
122 /* aligned */
|
yading@10
|
123 #define OP_C0(sz,avg2) \
|
yading@10
|
124 { \
|
yading@10
|
125 do { \
|
yading@10
|
126 OP(LP(dest+0), LPC(ref+0)); \
|
yading@10
|
127 OP(LP(dest+4), LPC(ref+4)); \
|
yading@10
|
128 if (sz==16) { \
|
yading@10
|
129 OP(LP(dest+8), LPC(ref+8)); \
|
yading@10
|
130 OP(LP(dest+12), LPC(ref+12)); \
|
yading@10
|
131 } \
|
yading@10
|
132 ref+=stride; \
|
yading@10
|
133 dest+= stride; \
|
yading@10
|
134 } while(--height); \
|
yading@10
|
135 }
|
yading@10
|
136
|
yading@10
|
137 #define OP_X(ofs,sz,avg2) \
|
yading@10
|
138 { \
|
yading@10
|
139 ref-=ofs; \
|
yading@10
|
140 do { \
|
yading@10
|
141 uint32_t t0,t1; \
|
yading@10
|
142 t0 = LPC(ref+0); \
|
yading@10
|
143 t1 = LPC(ref+4); \
|
yading@10
|
144 OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
|
yading@10
|
145 t0 = LPC(ref+8); \
|
yading@10
|
146 OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
|
yading@10
|
147 if (sz==16) { \
|
yading@10
|
148 t1 = LPC(ref+12); \
|
yading@10
|
149 OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
|
yading@10
|
150 t0 = LPC(ref+16); \
|
yading@10
|
151 OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
|
yading@10
|
152 } \
|
yading@10
|
153 ref+=stride; \
|
yading@10
|
154 dest+= stride; \
|
yading@10
|
155 } while(--height); \
|
yading@10
|
156 }
|
yading@10
|
157
|
yading@10
|
158 /* aligned */
|
yading@10
|
159 #define OP_Y0(sz,avg2) \
|
yading@10
|
160 { \
|
yading@10
|
161 uint32_t t0,t1,t2,t3,t; \
|
yading@10
|
162 \
|
yading@10
|
163 t0 = LPC(ref+0); \
|
yading@10
|
164 t1 = LPC(ref+4); \
|
yading@10
|
165 if (sz==16) { \
|
yading@10
|
166 t2 = LPC(ref+8); \
|
yading@10
|
167 t3 = LPC(ref+12); \
|
yading@10
|
168 } \
|
yading@10
|
169 do { \
|
yading@10
|
170 ref += stride; \
|
yading@10
|
171 \
|
yading@10
|
172 t = LPC(ref+0); \
|
yading@10
|
173 OP(LP(dest+0), avg2(t0,t)); t0 = t; \
|
yading@10
|
174 t = LPC(ref+4); \
|
yading@10
|
175 OP(LP(dest+4), avg2(t1,t)); t1 = t; \
|
yading@10
|
176 if (sz==16) { \
|
yading@10
|
177 t = LPC(ref+8); \
|
yading@10
|
178 OP(LP(dest+8), avg2(t2,t)); t2 = t; \
|
yading@10
|
179 t = LPC(ref+12); \
|
yading@10
|
180 OP(LP(dest+12), avg2(t3,t)); t3 = t; \
|
yading@10
|
181 } \
|
yading@10
|
182 dest+= stride; \
|
yading@10
|
183 } while(--height); \
|
yading@10
|
184 }
|
yading@10
|
185
|
yading@10
|
186 #define OP_Y(ofs,sz,avg2) \
|
yading@10
|
187 { \
|
yading@10
|
188 uint32_t t0,t1,t2,t3,t,w0,w1; \
|
yading@10
|
189 \
|
yading@10
|
190 ref-=ofs; \
|
yading@10
|
191 w0 = LPC(ref+0); \
|
yading@10
|
192 w1 = LPC(ref+4); \
|
yading@10
|
193 t0 = MERGE1(w0,w1,ofs); \
|
yading@10
|
194 w0 = LPC(ref+8); \
|
yading@10
|
195 t1 = MERGE1(w1,w0,ofs); \
|
yading@10
|
196 if (sz==16) { \
|
yading@10
|
197 w1 = LPC(ref+12); \
|
yading@10
|
198 t2 = MERGE1(w0,w1,ofs); \
|
yading@10
|
199 w0 = LPC(ref+16); \
|
yading@10
|
200 t3 = MERGE1(w1,w0,ofs); \
|
yading@10
|
201 } \
|
yading@10
|
202 do { \
|
yading@10
|
203 ref += stride; \
|
yading@10
|
204 \
|
yading@10
|
205 w0 = LPC(ref+0); \
|
yading@10
|
206 w1 = LPC(ref+4); \
|
yading@10
|
207 t = MERGE1(w0,w1,ofs); \
|
yading@10
|
208 OP(LP(dest+0), avg2(t0,t)); t0 = t; \
|
yading@10
|
209 w0 = LPC(ref+8); \
|
yading@10
|
210 t = MERGE1(w1,w0,ofs); \
|
yading@10
|
211 OP(LP(dest+4), avg2(t1,t)); t1 = t; \
|
yading@10
|
212 if (sz==16) { \
|
yading@10
|
213 w1 = LPC(ref+12); \
|
yading@10
|
214 t = MERGE1(w0,w1,ofs); \
|
yading@10
|
215 OP(LP(dest+8), avg2(t2,t)); t2 = t; \
|
yading@10
|
216 w0 = LPC(ref+16); \
|
yading@10
|
217 t = MERGE1(w1,w0,ofs); \
|
yading@10
|
218 OP(LP(dest+12), avg2(t3,t)); t3 = t; \
|
yading@10
|
219 } \
|
yading@10
|
220 dest+=stride; \
|
yading@10
|
221 } while(--height); \
|
yading@10
|
222 }
|
yading@10
|
223
|
yading@10
|
224 #define OP_X0(sz,avg2) OP_X(0,sz,avg2)
|
yading@10
|
225 #define OP_XY0(sz,PACK) OP_XY(0,sz,PACK)
|
yading@10
|
226 #define OP_XY(ofs,sz,PACK) \
|
yading@10
|
227 { \
|
yading@10
|
228 uint32_t t2,t3,w0,w1; \
|
yading@10
|
229 uint32_t a0,a1,a2,a3,a4,a5,a6,a7; \
|
yading@10
|
230 \
|
yading@10
|
231 ref -= ofs; \
|
yading@10
|
232 w0 = LPC(ref+0); \
|
yading@10
|
233 w1 = LPC(ref+4); \
|
yading@10
|
234 UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
|
yading@10
|
235 w0 = LPC(ref+8); \
|
yading@10
|
236 UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
|
yading@10
|
237 if (sz==16) { \
|
yading@10
|
238 w1 = LPC(ref+12); \
|
yading@10
|
239 UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
|
yading@10
|
240 w0 = LPC(ref+16); \
|
yading@10
|
241 UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
|
yading@10
|
242 } \
|
yading@10
|
243 do { \
|
yading@10
|
244 ref+=stride; \
|
yading@10
|
245 w0 = LPC(ref+0); \
|
yading@10
|
246 w1 = LPC(ref+4); \
|
yading@10
|
247 UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
|
yading@10
|
248 OP(LP(dest+0),PACK(a0,a1,t2,t3)); \
|
yading@10
|
249 a0 = t2; a1 = t3; \
|
yading@10
|
250 w0 = LPC(ref+8); \
|
yading@10
|
251 UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
|
yading@10
|
252 OP(LP(dest+4),PACK(a2,a3,t2,t3)); \
|
yading@10
|
253 a2 = t2; a3 = t3; \
|
yading@10
|
254 if (sz==16) { \
|
yading@10
|
255 w1 = LPC(ref+12); \
|
yading@10
|
256 UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
|
yading@10
|
257 OP(LP(dest+8),PACK(a4,a5,t2,t3)); \
|
yading@10
|
258 a4 = t2; a5 = t3; \
|
yading@10
|
259 w0 = LPC(ref+16); \
|
yading@10
|
260 UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
|
yading@10
|
261 OP(LP(dest+12),PACK(a6,a7,t2,t3)); \
|
yading@10
|
262 a6 = t2; a7 = t3; \
|
yading@10
|
263 } \
|
yading@10
|
264 dest+=stride; \
|
yading@10
|
265 } while(--height); \
|
yading@10
|
266 }
|
yading@10
|
267
|
yading@10
|
268 #define DEFFUNC(prefix, op, rnd, xy, sz, OP_N, avgfunc) \
|
yading@10
|
269 prefix void op##_##rnd##_pixels##sz##_##xy(uint8_t *dest, const uint8_t *ref, \
|
yading@10
|
270 const ptrdiff_t stride, int height) \
|
yading@10
|
271 { \
|
yading@10
|
272 switch((int)ref&3) { \
|
yading@10
|
273 case 0:OP_N##0(sz,rnd##_##avgfunc); return; \
|
yading@10
|
274 case 1:OP_N(1,sz,rnd##_##avgfunc); return; \
|
yading@10
|
275 case 2:OP_N(2,sz,rnd##_##avgfunc); return; \
|
yading@10
|
276 case 3:OP_N(3,sz,rnd##_##avgfunc); return; \
|
yading@10
|
277 } \
|
yading@10
|
278 }
|
yading@10
|
279
|
yading@10
|
280 #define OP put
|
yading@10
|
281
|
yading@10
|
282 DEFFUNC( ,ff_put,rnd,o,8,OP_C,avg32)
|
yading@10
|
283 DEFFUNC(static,put, rnd,x,8,OP_X,avg32)
|
yading@10
|
284 DEFFUNC(static,put,no_rnd,x,8,OP_X,avg32)
|
yading@10
|
285 DEFFUNC(static,put, rnd,y,8,OP_Y,avg32)
|
yading@10
|
286 DEFFUNC(static,put,no_rnd,y,8,OP_Y,avg32)
|
yading@10
|
287 DEFFUNC(static,put, rnd,xy,8,OP_XY,PACK)
|
yading@10
|
288 DEFFUNC(static,put,no_rnd,xy,8,OP_XY,PACK)
|
yading@10
|
289 DEFFUNC( ,ff_put,rnd,o,16,OP_C,avg32)
|
yading@10
|
290 DEFFUNC(static,put, rnd,x,16,OP_X,avg32)
|
yading@10
|
291 DEFFUNC(static,put,no_rnd,x,16,OP_X,avg32)
|
yading@10
|
292 DEFFUNC(static,put, rnd,y,16,OP_Y,avg32)
|
yading@10
|
293 DEFFUNC(static,put,no_rnd,y,16,OP_Y,avg32)
|
yading@10
|
294 DEFFUNC(static,put, rnd,xy,16,OP_XY,PACK)
|
yading@10
|
295 DEFFUNC(static,put,no_rnd,xy,16,OP_XY,PACK)
|
yading@10
|
296
|
yading@10
|
297 #undef OP
|
yading@10
|
298 #define OP avg
|
yading@10
|
299
|
yading@10
|
300 DEFFUNC( ,ff_avg,rnd,o,8,OP_C,avg32)
|
yading@10
|
301 DEFFUNC(static,avg, rnd,x,8,OP_X,avg32)
|
yading@10
|
302 DEFFUNC(static,avg, rnd,y,8,OP_Y,avg32)
|
yading@10
|
303 DEFFUNC(static,avg, rnd,xy,8,OP_XY,PACK)
|
yading@10
|
304 DEFFUNC( ,ff_avg,rnd,o,16,OP_C,avg32)
|
yading@10
|
305 DEFFUNC(static,avg, rnd,x,16,OP_X,avg32)
|
yading@10
|
306 DEFFUNC(static,avg,no_rnd,x,16,OP_X,avg32)
|
yading@10
|
307 DEFFUNC(static,avg, rnd,y,16,OP_Y,avg32)
|
yading@10
|
308 DEFFUNC(static,avg,no_rnd,y,16,OP_Y,avg32)
|
yading@10
|
309 DEFFUNC(static,avg, rnd,xy,16,OP_XY,PACK)
|
yading@10
|
310 DEFFUNC(static,avg,no_rnd,xy,16,OP_XY,PACK)
|
yading@10
|
311
|
yading@10
|
312 #undef OP
|
yading@10
|
313
|
yading@10
|
314 #define ff_put_no_rnd_pixels8_o ff_put_rnd_pixels8_o
|
yading@10
|
315 #define ff_put_no_rnd_pixels16_o ff_put_rnd_pixels16_o
|
yading@10
|
316 #define ff_avg_no_rnd_pixels16_o ff_avg_rnd_pixels16_o
|
yading@10
|
317
|
yading@10
|
318 av_cold void ff_hpeldsp_init_sh4(HpelDSPContext *c, int flags)
|
yading@10
|
319 {
|
yading@10
|
320 c->put_pixels_tab[0][0] = ff_put_rnd_pixels16_o;
|
yading@10
|
321 c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
|
yading@10
|
322 c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
|
yading@10
|
323 c->put_pixels_tab[0][3] = put_rnd_pixels16_xy;
|
yading@10
|
324 c->put_pixels_tab[1][0] = ff_put_rnd_pixels8_o;
|
yading@10
|
325 c->put_pixels_tab[1][1] = put_rnd_pixels8_x;
|
yading@10
|
326 c->put_pixels_tab[1][2] = put_rnd_pixels8_y;
|
yading@10
|
327 c->put_pixels_tab[1][3] = put_rnd_pixels8_xy;
|
yading@10
|
328
|
yading@10
|
329 c->put_no_rnd_pixels_tab[0][0] = ff_put_no_rnd_pixels16_o;
|
yading@10
|
330 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x;
|
yading@10
|
331 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y;
|
yading@10
|
332 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy;
|
yading@10
|
333 c->put_no_rnd_pixels_tab[1][0] = ff_put_no_rnd_pixels8_o;
|
yading@10
|
334 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x;
|
yading@10
|
335 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y;
|
yading@10
|
336 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy;
|
yading@10
|
337
|
yading@10
|
338 c->avg_pixels_tab[0][0] = ff_avg_rnd_pixels16_o;
|
yading@10
|
339 c->avg_pixels_tab[0][1] = avg_rnd_pixels16_x;
|
yading@10
|
340 c->avg_pixels_tab[0][2] = avg_rnd_pixels16_y;
|
yading@10
|
341 c->avg_pixels_tab[0][3] = avg_rnd_pixels16_xy;
|
yading@10
|
342 c->avg_pixels_tab[1][0] = ff_avg_rnd_pixels8_o;
|
yading@10
|
343 c->avg_pixels_tab[1][1] = avg_rnd_pixels8_x;
|
yading@10
|
344 c->avg_pixels_tab[1][2] = avg_rnd_pixels8_y;
|
yading@10
|
345 c->avg_pixels_tab[1][3] = avg_rnd_pixels8_xy;
|
yading@10
|
346
|
yading@10
|
347 c->avg_no_rnd_pixels_tab[0] = ff_avg_no_rnd_pixels16_o;
|
yading@10
|
348 c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x;
|
yading@10
|
349 c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y;
|
yading@10
|
350 c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy;
|
yading@10
|
351 }
|