|
yading@10
|
1 /*
|
|
yading@10
|
2 * Blackfin Pixel Operations
|
|
yading@10
|
3 * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
|
|
yading@10
|
4 *
|
|
yading@10
|
5 * This file is part of FFmpeg.
|
|
yading@10
|
6 *
|
|
yading@10
|
7 * FFmpeg is free software; you can redistribute it and/or
|
|
yading@10
|
8 * modify it under the terms of the GNU Lesser General Public
|
|
yading@10
|
9 * License as published by the Free Software Foundation; either
|
|
yading@10
|
10 * version 2.1 of the License, or (at your option) any later version.
|
|
yading@10
|
11 *
|
|
yading@10
|
12 * FFmpeg is distributed in the hope that it will be useful,
|
|
yading@10
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
yading@10
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
yading@10
|
15 * Lesser General Public License for more details.
|
|
yading@10
|
16 *
|
|
yading@10
|
17 * You should have received a copy of the GNU Lesser General Public
|
|
yading@10
|
18 * License along with FFmpeg; if not, write to the Free Software
|
|
yading@10
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
yading@10
|
20 */
|
|
yading@10
|
21 #include "config_bfin.h"
|
|
yading@10
|
22
|
|
yading@10
|
23 DEFUN(put_pixels_clamped,mL1,
|
|
yading@10
|
24 (int16_t *block, uint8_t *dest, int line_size)):
|
|
yading@10
|
25 [--SP] = (R7:4);
|
|
yading@10
|
26 R4 = 0;
|
|
yading@10
|
27 R5.l = 0x00ff;
|
|
yading@10
|
28 R5.h = 0x00ff;
|
|
yading@10
|
29 I0 = R0; // block
|
|
yading@10
|
30 I1 = R1; // dest
|
|
yading@10
|
31 R2 += -4; // line_size
|
|
yading@10
|
32 M1 = R2;
|
|
yading@10
|
33 P0 = 8;
|
|
yading@10
|
34 R0 = [I0++];
|
|
yading@10
|
35 R1 = [I0++];
|
|
yading@10
|
36 R2 = MAX(R0, R4) (V);
|
|
yading@10
|
37 LSETUP (ppc$0,ppc$1) LC0=P0;
|
|
yading@10
|
38 ppc$0: R2 = MIN(R2, R5) (V);
|
|
yading@10
|
39 R3 = MAX(R1, R4) (V);
|
|
yading@10
|
40 R3 = MIN(R3, R5) (V) || R0 = [I0++];
|
|
yading@10
|
41 R6 = BYTEPACK (R2,R3) || R1 = [I0++];
|
|
yading@10
|
42 R2 = MAX(R0, R4) (V) || [I1++] = R6;
|
|
yading@10
|
43 R2 = MIN(R2, R5) (V);
|
|
yading@10
|
44 R3 = MAX(R1, R4) (V);
|
|
yading@10
|
45 R3 = MIN(R3, R5) (V) || R0 = [I0++];
|
|
yading@10
|
46 R6 = BYTEPACK (R2,R3) || R1 = [I0++];
|
|
yading@10
|
47 ppc$1: R2 = Max(R0, R4) (V) || [I1++M1] = R6;
|
|
yading@10
|
48
|
|
yading@10
|
49 (R7:4) = [SP++];
|
|
yading@10
|
50 RTS;
|
|
yading@10
|
51 DEFUN_END(put_pixels_clamped)
|
|
yading@10
|
52
|
|
yading@10
|
53 DEFUN(add_pixels_clamped,mL1,
|
|
yading@10
|
54 (int16_t *block, uint8_t *dest, int line_size)):
|
|
yading@10
|
55 [-- SP] = (R7:4);
|
|
yading@10
|
56 R4 = 0;
|
|
yading@10
|
57 I0 = 0;
|
|
yading@10
|
58 R2 += -4; // line_size
|
|
yading@10
|
59 M0 = R2;
|
|
yading@10
|
60 I1 = R1; // dest
|
|
yading@10
|
61 I3 = R0; // block
|
|
yading@10
|
62 I2 = R1; // dest
|
|
yading@10
|
63 P0 = 8;
|
|
yading@10
|
64 M3 = 2;
|
|
yading@10
|
65 R0 = [I3++] || R2 = [I1];
|
|
yading@10
|
66 R2 = R2 << 8 || R0.H = W[I3--] || R3 = [I1++];
|
|
yading@10
|
67 R3 = R3 >> 8 || R1.L = W[I3] || I3 += 4;
|
|
yading@10
|
68 R6 = BYTEOP3P(R1:0, R3:2) (LO) || R1.H = W[I3++] || R2 = [I1];
|
|
yading@10
|
69
|
|
yading@10
|
70 LSETUP(apc$2,apc$3) LC1 = P0;
|
|
yading@10
|
71 apc$2: R7 = BYTEOP3P(R1:0, R3:2) (HI, R) || R0 = [I3++] || R3 = [I1++M0];
|
|
yading@10
|
72 R2 = R2 << 8 || R0.H = W[I3--];
|
|
yading@10
|
73 R3 = R3 >> 8 || R1.L = W[I3] || I3 += 4;
|
|
yading@10
|
74 R6 = R6 + R7 (S) || R1.H = W[I3];
|
|
yading@10
|
75 R6 = BYTEOP3P(R1:0, R3:2) (LO) || I3+=M3 || [I2++]=R6;
|
|
yading@10
|
76 R7 = BYTEOP3P(R1:0, R3:2) (HI, R) || R0 = [I3++] || R2 = [I1];
|
|
yading@10
|
77 R2 = R2 << 8 || R0.H = W[I3--] || R3 = [I1++];
|
|
yading@10
|
78 R3 = R3 >> 8 || R1.L = W[I3] || I3 += 4;
|
|
yading@10
|
79 R6 = R6 + R7 (S) || R1.H = W[I3++];
|
|
yading@10
|
80 apc$3: R6 = BYTEOP3P(R1:0, R3:2) (LO) || [I2++M0] = R6 || R2 = [I1];
|
|
yading@10
|
81
|
|
yading@10
|
82 (R7:4) = [SP++];
|
|
yading@10
|
83 RTS;
|
|
yading@10
|
84 DEFUN_END(add_pixels_clamped)
|
|
yading@10
|
85
|
|
yading@10
|
86 DEFUN(diff_pixels,mL1,
|
|
yading@10
|
87 (int16_t *block, uint8_t *s1, uint8_t *s2, int stride)):
|
|
yading@10
|
88 link 0;
|
|
yading@10
|
89 [--sp] = (r7:4);
|
|
yading@10
|
90 p0=8;
|
|
yading@10
|
91 i3=r0; // block
|
|
yading@10
|
92 i0=r1; // s1
|
|
yading@10
|
93 i1=r2; // s2
|
|
yading@10
|
94 r2=[fp+20]; // stride
|
|
yading@10
|
95 r2+=-8;
|
|
yading@10
|
96 m0=r2;
|
|
yading@10
|
97
|
|
yading@10
|
98
|
|
yading@10
|
99 LSETUP(.LS0,.LE0) LC0=P0;
|
|
yading@10
|
100 DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
|
|
yading@10
|
101
|
|
yading@10
|
102 .LS0: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];
|
|
yading@10
|
103 (R5,R4) = BYTEOP16M (R1:0,R3:2) || R0 = [I0++M0] || R2 = [I1++M0];
|
|
yading@10
|
104 (R7,R6) = BYTEOP16M (R1:0,R3:2) (R)|| R0 = [I0++] || [I3++] = R4;
|
|
yading@10
|
105 DISALGNEXCPT || R2 = [I1++] || [I3++] = R5;
|
|
yading@10
|
106 [i3++]=r6;
|
|
yading@10
|
107 .LE0: [i3++]=r7;
|
|
yading@10
|
108
|
|
yading@10
|
109 (r7:4) = [sp++];
|
|
yading@10
|
110 unlink;
|
|
yading@10
|
111 rts;
|
|
yading@10
|
112 DEFUN_END(diff_pixels)
|
|
yading@10
|
113
|
|
yading@10
|
114 /*
|
|
yading@10
|
115 for (i = 0; i < 16; i++) {
|
|
yading@10
|
116 for (j = 0; j < 16; j++) {
|
|
yading@10
|
117 sum += pix[j];
|
|
yading@10
|
118 }
|
|
yading@10
|
119 pix += line_size;
|
|
yading@10
|
120 }
|
|
yading@10
|
121 */
|
|
yading@10
|
122 DEFUN(pix_sum,mL1,
|
|
yading@10
|
123 (uint8_t *p, int stride)):
|
|
yading@10
|
124 link 0;
|
|
yading@10
|
125 [--sp] = (r7:4);
|
|
yading@10
|
126 p0=8;
|
|
yading@10
|
127 i0=r0; // s1
|
|
yading@10
|
128 i1=r0;
|
|
yading@10
|
129 m1=r1;
|
|
yading@10
|
130 r1=r1+r1;
|
|
yading@10
|
131 r1+=-16; // stride
|
|
yading@10
|
132 m0=r1;
|
|
yading@10
|
133 i1+=m1;
|
|
yading@10
|
134
|
|
yading@10
|
135 r6=0;
|
|
yading@10
|
136
|
|
yading@10
|
137 LSETUP(LS$PS,LE$PS) LC0=P0;
|
|
yading@10
|
138 DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
|
|
yading@10
|
139
|
|
yading@10
|
140 LS$PS: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];
|
|
yading@10
|
141 (R5,R4) = BYTEOP16P (R3:2,R1:0) || R0 = [I0++] || R2 = [I1++];
|
|
yading@10
|
142 r6=r6+|+r5;
|
|
yading@10
|
143 r6=r6+|+r4;
|
|
yading@10
|
144 (R5,R4) = BYTEOP16P (R3:2,R1:0) (R)|| R1 = [I0++] || R3 = [I1++];
|
|
yading@10
|
145 r6=r6+|+r5;
|
|
yading@10
|
146 r6=r6+|+r4;
|
|
yading@10
|
147 (R5,R4) = BYTEOP16P (R3:2,R1:0) || R0 = [I0++m0] || R2 = [I1++m0];
|
|
yading@10
|
148 r6=r6+|+r5;
|
|
yading@10
|
149 r6=r6+|+r4;
|
|
yading@10
|
150 (R5,R4) = BYTEOP16P (R3:2,R1:0) (R)|| R0 = [I0++] || R2 = [I1++];
|
|
yading@10
|
151 r6=r6+|+r5;
|
|
yading@10
|
152 LE$PS: r6=r6+|+r4;
|
|
yading@10
|
153 r0.l=r6.l+r6.h;
|
|
yading@10
|
154 r0.h=0;
|
|
yading@10
|
155
|
|
yading@10
|
156 (r7:4) = [sp++];
|
|
yading@10
|
157 unlink;
|
|
yading@10
|
158 rts;
|
|
yading@10
|
159 DEFUN_END(pix_sum)
|
|
yading@10
|
160
|
|
yading@10
|
161
|
|
yading@10
|
162 DEFUN(get_pixels,mL1,
|
|
yading@10
|
163 (int16_t *av_restrict block, const uint8_t *pixels, int line_size)):
|
|
yading@10
|
164 [--sp] = (r7:4);
|
|
yading@10
|
165 i3=r0; // dest
|
|
yading@10
|
166 i0=r1; // src0
|
|
yading@10
|
167 p0=8;
|
|
yading@10
|
168 r2+=-8;
|
|
yading@10
|
169 m0=r2;
|
|
yading@10
|
170 LSETUP(gp8$0,gp8$1) LC0=P0;
|
|
yading@10
|
171
|
|
yading@10
|
172 DISALGNEXCPT || R0 = [I0++];
|
|
yading@10
|
173 DISALGNEXCPT || R1 = [I0++];
|
|
yading@10
|
174
|
|
yading@10
|
175 gp8$0: (R7,R6) = byteunpack R1:0 || R0 = [I0++M0];
|
|
yading@10
|
176 (R5,R4) = byteunpack R1:0 (R) || R0 = [I0++] || [I3++]=R6;
|
|
yading@10
|
177 DISALGNEXCPT || R1 = [I0++] || [I3++]=R7;
|
|
yading@10
|
178 [I3++]=R4;
|
|
yading@10
|
179 gp8$1: [I3++]=R5
|
|
yading@10
|
180
|
|
yading@10
|
181
|
|
yading@10
|
182 (r7:4) = [sp++];
|
|
yading@10
|
183 RTS;
|
|
yading@10
|
184 DEFUN_END(get_pixels)
|
|
yading@10
|
185
|
|
yading@10
|
186
|
|
yading@10
|
187 /* sad = sad16x16 (ubyte *mb, ubyte *refwin, srcwidth, refwinwidth, h) */
|
|
yading@10
|
188 /* 91 cycles */
|
|
yading@10
|
189 DEFUN(z_sad16x16,mL1,
|
|
yading@10
|
190 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h)):
|
|
yading@10
|
191 link 0;
|
|
yading@10
|
192 I0 = R0;
|
|
yading@10
|
193 I1 = R1;
|
|
yading@10
|
194
|
|
yading@10
|
195 A1 = A0 = 0;
|
|
yading@10
|
196 R0 = [sp+20]; // rwidth
|
|
yading@10
|
197 P2 = [sp+24]; // height
|
|
yading@10
|
198 R3 = 16;
|
|
yading@10
|
199 R0 = R0 - R3;
|
|
yading@10
|
200 R3 = R2 - R3;
|
|
yading@10
|
201 M1 = R0;
|
|
yading@10
|
202 M0 = R3;
|
|
yading@10
|
203
|
|
yading@10
|
204 DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
|
|
yading@10
|
205 LSETUP (s$16, e$16) LC0=P2;
|
|
yading@10
|
206 s$16: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];
|
|
yading@10
|
207 SAA (R1:0,R3:2) || R0 = [I0++] || R2 = [I1++];
|
|
yading@10
|
208 SAA (R1:0,R3:2) (R) || R1 = [I0++] || R3 = [I1++];
|
|
yading@10
|
209 SAA (R1:0,R3:2) || R0 = [I0++M0] || R2 = [I1++M1];
|
|
yading@10
|
210 e$16: SAA (R1:0,R3:2) (R) || R0 = [I0++] || R2 = [I1++];
|
|
yading@10
|
211
|
|
yading@10
|
212 R3=A1.L+A1.H, R2=A0.L+A0.H ;
|
|
yading@10
|
213 R0 = R2 + R3 ;
|
|
yading@10
|
214 unlink;
|
|
yading@10
|
215 RTS;
|
|
yading@10
|
216 DEFUN_END(z_sad16x16)
|
|
yading@10
|
217
|
|
yading@10
|
218 /* sad = sad8x8 (ubyte *mb, ubyte *refwin, int srcwidth, int refwinwidth, int h) */
|
|
yading@10
|
219 /* 36 cycles */
|
|
yading@10
|
220 DEFUN(z_sad8x8,mL1,
|
|
yading@10
|
221 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h)):
|
|
yading@10
|
222 I0 = R0;
|
|
yading@10
|
223 I1 = R1;
|
|
yading@10
|
224
|
|
yading@10
|
225 A1 = A0 = 0;
|
|
yading@10
|
226 r0 = [sp+12]; // rwidth
|
|
yading@10
|
227 P2 = [sp+16]; //height
|
|
yading@10
|
228 R3 = 8;
|
|
yading@10
|
229 R0 = R0 - R3;
|
|
yading@10
|
230 R3 = R2 - R3;
|
|
yading@10
|
231 M0 = R3;
|
|
yading@10
|
232 M1 = R0;
|
|
yading@10
|
233
|
|
yading@10
|
234 LSETUP (s$8, e$8) LC0=P2;
|
|
yading@10
|
235 DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
|
|
yading@10
|
236 DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];
|
|
yading@10
|
237 s$8: SAA (R1:0,R3:2) || R0 = [I0++M0] || R2 = [I1++M1];
|
|
yading@10
|
238 SAA (R1:0,R3:2) (R) || R0 = [I0++] || R2 = [I1++];
|
|
yading@10
|
239 e$8: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];
|
|
yading@10
|
240
|
|
yading@10
|
241 R3=A1.L+A1.H, R2=A0.L+A0.H ;
|
|
yading@10
|
242 R0 = R2 + R3 ;
|
|
yading@10
|
243 RTS;
|
|
yading@10
|
244 DEFUN_END(z_sad8x8)
|
|
yading@10
|
245
|
|
yading@10
|
246 DEFUN(pix_norm1,mL1,
|
|
yading@10
|
247 (uint8_t * pix, int line_size)):
|
|
yading@10
|
248 [--SP]=(R7:4,P5:3);
|
|
yading@10
|
249
|
|
yading@10
|
250 // Fetch the input arguments.
|
|
yading@10
|
251 P1 = R0; // pix
|
|
yading@10
|
252 P0 = R1; // line_size
|
|
yading@10
|
253 P5 = 16; // loop ctr.
|
|
yading@10
|
254 P0 -= P5;
|
|
yading@10
|
255 M0 = P0; // M0 = line_size-16;
|
|
yading@10
|
256 // Now for the real work.
|
|
yading@10
|
257 A1 = A0 = 0;
|
|
yading@10
|
258 lsetup(_pix_norm1_blkfn_loopStart, _pix_norm1_blkfn_loopEnd) LC1 = P5;
|
|
yading@10
|
259 I0 = P1;
|
|
yading@10
|
260 DISALGNEXCPT || r0 = [i0++];
|
|
yading@10
|
261
|
|
yading@10
|
262 _pix_norm1_blkfn_loopStart:
|
|
yading@10
|
263 // following unpacks pix1[0..15] pix1+line_size[0..15]
|
|
yading@10
|
264 DISALGNEXCPT || r1 = [i0++];
|
|
yading@10
|
265
|
|
yading@10
|
266 (r5, r4) = byteunpack r1:0 || r0 = [i0++];
|
|
yading@10
|
267 a1 += r5.h * r5.h, a0 += r5.l * r5.l (is);
|
|
yading@10
|
268 a1 += r4.h * r4.h, a0 += r4.l * r4.l (is);
|
|
yading@10
|
269 (r5, r4) = byteunpack r1:0(r) || r1 = [i0++];
|
|
yading@10
|
270 a1 += r5.h * r5.h, a0 += r5.l * r5.l (is);
|
|
yading@10
|
271 a1 += r4.h * r4.h, a0 += r4.l * r4.l (is);
|
|
yading@10
|
272 (r5, r4) = byteunpack r1:0 || r0 = [i0++M0];
|
|
yading@10
|
273 a1 += r5.h * r5.h, a0 += r5.l * r5.l (is);
|
|
yading@10
|
274 a1 += r4.h * r4.h, a0 += r4.l * r4.l (is);
|
|
yading@10
|
275 (r5, r4) = byteunpack r1:0(r) || r0 = [i0++];
|
|
yading@10
|
276 a1 += r5.h * r5.h, a0 += r5.l * r5.l (is);
|
|
yading@10
|
277 _pix_norm1_blkfn_loopEnd:
|
|
yading@10
|
278 a1 += r4.h * r4.h, a0 += r4.l * r4.l (is);
|
|
yading@10
|
279
|
|
yading@10
|
280
|
|
yading@10
|
281 // Clean up at the end:
|
|
yading@10
|
282 R2 = A0, R3 = A1;
|
|
yading@10
|
283 R0 = R2 + R3 (S);
|
|
yading@10
|
284
|
|
yading@10
|
285 (R7:4,P5:3)=[SP++];
|
|
yading@10
|
286
|
|
yading@10
|
287 RTS;
|
|
yading@10
|
288 DEFUN_END(pix_norm1)
|
|
yading@10
|
289
|
|
yading@10
|
290 DEFUN(sse4,mL1,
|
|
yading@10
|
291 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)):
|
|
yading@10
|
292 link 0;
|
|
yading@10
|
293 [--sp] = (r7:6);
|
|
yading@10
|
294 p0=[fp+24]; // h
|
|
yading@10
|
295 i0=r1; // pix1
|
|
yading@10
|
296 i1=r2; // pix2
|
|
yading@10
|
297 r2=[fp+20]; // line_size
|
|
yading@10
|
298 r2+=-4;
|
|
yading@10
|
299 m0=r2;
|
|
yading@10
|
300
|
|
yading@10
|
301 a0=a1=0;
|
|
yading@10
|
302 LSETUP(.S40,.E40) LC0=P0;
|
|
yading@10
|
303 DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
|
|
yading@10
|
304
|
|
yading@10
|
305 .S40: DISALGNEXCPT || R1 = [I0++M0] || R3 = [I1++M0];
|
|
yading@10
|
306 (R7,R6) = BYTEOP16M (R1:0,R3:2);
|
|
yading@10
|
307 a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
|
|
yading@10
|
308 .E40: a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
|
|
yading@10
|
309 a0 += a1;
|
|
yading@10
|
310 r0 = a0;
|
|
yading@10
|
311
|
|
yading@10
|
312 (r7:6) = [sp++];
|
|
yading@10
|
313 unlink;
|
|
yading@10
|
314 rts;
|
|
yading@10
|
315 DEFUN_END(sse4)
|
|
yading@10
|
316
|
|
yading@10
|
317 DEFUN(sse8,mL1,
|
|
yading@10
|
318 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)):
|
|
yading@10
|
319 link 0;
|
|
yading@10
|
320 [--sp] = (r7:6);
|
|
yading@10
|
321 p0=[fp+24]; // h
|
|
yading@10
|
322 i0=r1; // pix1
|
|
yading@10
|
323 i1=r2; // pix2
|
|
yading@10
|
324 r2=[fp+20]; // line_size
|
|
yading@10
|
325 r2+=-8;
|
|
yading@10
|
326 m0=r2;
|
|
yading@10
|
327
|
|
yading@10
|
328 a0=a1=0;
|
|
yading@10
|
329 LSETUP(.S80,.E80) LC0=P0;
|
|
yading@10
|
330 DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
|
|
yading@10
|
331
|
|
yading@10
|
332 .S80: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];
|
|
yading@10
|
333 (R7,R6) = BYTEOP16M (R1:0,R3:2) || R0 = [I0++M0] || R2 = [I1++M0];
|
|
yading@10
|
334 a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
|
|
yading@10
|
335 a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
|
|
yading@10
|
336 (R7,R6) = BYTEOP16M (R1:0,R3:2) (R)|| R0 = [I0++] || R2 = [I1++];
|
|
yading@10
|
337 a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
|
|
yading@10
|
338 .E80: a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
|
|
yading@10
|
339 a0 += a1;
|
|
yading@10
|
340 r0 = a0;
|
|
yading@10
|
341
|
|
yading@10
|
342 (r7:6) = [sp++];
|
|
yading@10
|
343 unlink;
|
|
yading@10
|
344 rts;
|
|
yading@10
|
345 DEFUN_END(sse8)
|
|
yading@10
|
346
|
|
yading@10
|
347 DEFUN(sse16,mL1,
|
|
yading@10
|
348 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)):
|
|
yading@10
|
349 link 0;
|
|
yading@10
|
350 [--sp] = (r7:6);
|
|
yading@10
|
351 p0=[fp+24]; // h
|
|
yading@10
|
352 i0=r1; // pix1
|
|
yading@10
|
353 i1=r2; // pix2
|
|
yading@10
|
354 r2=[fp+20]; // line_size
|
|
yading@10
|
355 r2+=-16;
|
|
yading@10
|
356 m0=r2;
|
|
yading@10
|
357
|
|
yading@10
|
358 a0=a1=0;
|
|
yading@10
|
359 DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
|
|
yading@10
|
360 LSETUP(.S160,.E160) LC0=P0;
|
|
yading@10
|
361
|
|
yading@10
|
362 .S160: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];
|
|
yading@10
|
363 (R7,R6) = BYTEOP16M (R1:0,R3:2) || R0 = [I0++] || R2 = [I1++];
|
|
yading@10
|
364 a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
|
|
yading@10
|
365 a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
|
|
yading@10
|
366 (R7,R6) = BYTEOP16M (R1:0,R3:2) (R)|| R1 = [I0++] || R3 = [I1++];
|
|
yading@10
|
367 a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
|
|
yading@10
|
368 a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
|
|
yading@10
|
369 (R7,R6) = BYTEOP16M (R1:0,R3:2) || R0 = [I0++M0] || R2 = [I1++M0];
|
|
yading@10
|
370 a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
|
|
yading@10
|
371 a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
|
|
yading@10
|
372 (R7,R6) = BYTEOP16M (R1:0,R3:2) (R)|| R0 = [I0++] || R2 = [I1++];
|
|
yading@10
|
373 a0 += r7.l * r7.l, a1 += r7.h * r7.h (is);
|
|
yading@10
|
374 .E160: a0 += r6.l * r6.l, a1 += r6.h * r6.h (is);
|
|
yading@10
|
375 a0 += a1;
|
|
yading@10
|
376 r0 = a0;
|
|
yading@10
|
377
|
|
yading@10
|
378 (r7:6) = [sp++];
|
|
yading@10
|
379 unlink;
|
|
yading@10
|
380 rts;
|
|
yading@10
|
381 DEFUN_END(sse16)
|