38 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 39 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 40 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 41 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 42 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5 43 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 44 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 45 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 48 #define COL_SHIFT 20 // 6 81 static inline void idct(int16_t *block)
84 int16_t *
const temp= (int16_t*)align_tmp;
87 #if 0 //Alternative, simpler variant 89 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ 90 "movq " #src0 ", %%mm0 \n\t" \ 91 "movq " #src4 ", %%mm1 \n\t" \ 92 "movq " #src1 ", %%mm2 \n\t" \ 93 "movq " #src5 ", %%mm3 \n\t" \ 94 "movq 16(%2), %%mm4 \n\t" \ 95 "pmaddwd %%mm0, %%mm4 \n\t" \ 96 "movq 24(%2), %%mm5 \n\t" \ 97 "pmaddwd %%mm5, %%mm0 \n\t" \ 98 "movq 32(%2), %%mm5 \n\t" \ 99 "pmaddwd %%mm1, %%mm5 \n\t" \ 100 "movq 40(%2), %%mm6 \n\t" \ 101 "pmaddwd %%mm6, %%mm1 \n\t" \ 102 "movq 48(%2), %%mm7 \n\t" \ 103 "pmaddwd %%mm2, %%mm7 \n\t" \ 104 #rounder ", %%mm4 \n\t"\ 105 "movq %%mm4, %%mm6 \n\t" \ 106 "paddd %%mm5, %%mm4 \n\t" \ 107 "psubd %%mm5, %%mm6 \n\t" \ 108 "movq 56(%2), %%mm5 \n\t" \ 109 "pmaddwd %%mm3, %%mm5 \n\t" \ 110 #rounder ", %%mm0 \n\t"\ 111 "paddd %%mm0, %%mm1 \n\t" \ 112 "paddd %%mm0, %%mm0 \n\t" \ 113 "psubd %%mm1, %%mm0 \n\t" \ 114 "pmaddwd 64(%2), %%mm2 \n\t" \ 115 "paddd %%mm5, %%mm7 \n\t" \ 116 "movq 72(%2), %%mm5 \n\t" \ 117 "pmaddwd %%mm3, %%mm5 \n\t" \ 118 "paddd %%mm4, %%mm7 \n\t" \ 119 "paddd %%mm4, %%mm4 \n\t" \ 120 "psubd %%mm7, %%mm4 \n\t" \ 121 "paddd %%mm2, %%mm5 \n\t" \ 122 "psrad $" #shift ", %%mm7 \n\t"\ 123 "psrad $" #shift ", %%mm4 \n\t"\ 124 "movq %%mm1, %%mm2 \n\t" \ 125 "paddd %%mm5, %%mm1 \n\t" \ 126 "psubd %%mm5, %%mm2 \n\t" \ 127 "psrad $" #shift ", %%mm1 \n\t"\ 128 "psrad $" #shift ", %%mm2 \n\t"\ 129 "packssdw %%mm1, %%mm7 \n\t" \ 130 "packssdw %%mm4, %%mm2 \n\t" \ 131 "movq %%mm7, " #dst " \n\t"\ 132 "movq " #src1 ", %%mm1 \n\t" \ 133 "movq 80(%2), %%mm4 \n\t" \ 134 "movq %%mm2, 24+" #dst " \n\t"\ 135 "pmaddwd %%mm1, %%mm4 \n\t" \ 136 "movq 88(%2), %%mm7 \n\t" \ 137 "pmaddwd 96(%2), %%mm1 \n\t" \ 138 "pmaddwd %%mm3, %%mm7 \n\t" \ 139 "movq %%mm0, %%mm2 \n\t" \ 140 "pmaddwd 104(%2), %%mm3 \n\t" \ 141 "paddd %%mm7, %%mm4 \n\t" \ 142 "paddd %%mm4, %%mm2 \n\t" \ 143 "psubd %%mm4, %%mm0 \n\t" \ 144 "psrad $" #shift ", %%mm2 \n\t"\ 145 "psrad $" #shift ", %%mm0 \n\t"\ 146 "movq %%mm6, %%mm4 \n\t" \ 147 "paddd %%mm1, %%mm3 \n\t" \ 148 "paddd %%mm3, %%mm6 \n\t" \ 149 "psubd %%mm3, %%mm4 \n\t" \ 150 "psrad $" #shift ", %%mm6 \n\t"\ 151 "packssdw %%mm6, %%mm2 \n\t" \ 152 "movq %%mm2, 8+" #dst " \n\t"\ 153 "psrad $" #shift ", %%mm4 \n\t"\ 154 "packssdw %%mm0, %%mm4 \n\t" \ 155 "movq %%mm4, 16+" #dst " \n\t"\ 157 #define COL_IDCT(src0, src4, src1, src5, dst, shift) \ 158 "movq " #src0 ", %%mm0 \n\t" \ 159 "movq " #src4 ", %%mm1 \n\t" \ 160 "movq " #src1 ", %%mm2 \n\t" \ 161 "movq " #src5 ", %%mm3 \n\t" \ 162 "movq 16(%2), %%mm4 \n\t" \ 163 "pmaddwd %%mm0, %%mm4 \n\t" \ 164 "movq 24(%2), %%mm5 \n\t" \ 165 "pmaddwd %%mm5, %%mm0 \n\t" \ 166 "movq 32(%2), %%mm5 \n\t" \ 167 "pmaddwd %%mm1, %%mm5 \n\t" \ 168 "movq 40(%2), %%mm6 \n\t" \ 169 "pmaddwd %%mm6, %%mm1 \n\t" \ 170 "movq %%mm4, %%mm6 \n\t" \ 171 "movq 48(%2), %%mm7 \n\t" \ 172 "pmaddwd %%mm2, %%mm7 \n\t" \ 173 "paddd %%mm5, %%mm4 \n\t" \ 174 "psubd %%mm5, %%mm6 \n\t" \ 175 "movq %%mm0, %%mm5 \n\t" \ 176 "paddd %%mm1, %%mm0 \n\t" \ 177 "psubd %%mm1, %%mm5 \n\t" \ 178 "movq 56(%2), %%mm1 \n\t" \ 179 "pmaddwd %%mm3, %%mm1 \n\t" \ 180 "pmaddwd 64(%2), %%mm2 \n\t" \ 181 "paddd %%mm1, %%mm7 \n\t" \ 182 "movq 72(%2), %%mm1 \n\t" \ 183 "pmaddwd %%mm3, %%mm1 \n\t" \ 184 "paddd %%mm4, %%mm7 \n\t" \ 185 "paddd %%mm4, %%mm4 \n\t" \ 186 "psubd %%mm7, %%mm4 \n\t" \ 187 "paddd %%mm2, %%mm1 \n\t" \ 188 "psrad $" #shift ", %%mm7 \n\t"\ 189 "psrad $" #shift ", %%mm4 \n\t"\ 190 "movq %%mm0, %%mm2 \n\t" \ 191 "paddd %%mm1, %%mm0 \n\t" \ 192 "psubd %%mm1, %%mm2 \n\t" \ 193 "psrad $" #shift ", %%mm0 \n\t"\ 194 "psrad $" #shift ", %%mm2 \n\t"\ 195 "packssdw %%mm7, %%mm7 \n\t" \ 196 "movd %%mm7, " #dst " \n\t"\ 197 "packssdw %%mm0, %%mm0 \n\t" \ 198 "movd %%mm0, 16+" #dst " \n\t"\ 199 "packssdw %%mm2, %%mm2 \n\t" \ 200 "movd %%mm2, 96+" #dst " \n\t"\ 201 "packssdw %%mm4, %%mm4 \n\t" \ 202 "movd %%mm4, 112+" #dst " \n\t"\ 203 "movq " #src1 ", %%mm0 \n\t" \ 204 "movq 80(%2), %%mm4 \n\t" \ 205 "pmaddwd %%mm0, %%mm4 \n\t" \ 206 "movq 88(%2), %%mm7 \n\t" \ 207 "pmaddwd 96(%2), %%mm0 \n\t" \ 208 "pmaddwd %%mm3, %%mm7 \n\t" \ 209 "movq %%mm5, %%mm2 \n\t" \ 210 "pmaddwd 104(%2), %%mm3 \n\t" \ 211 "paddd %%mm7, %%mm4 \n\t" \ 212 "paddd %%mm4, %%mm2 \n\t" \ 213 "psubd %%mm4, %%mm5 \n\t" \ 214 "psrad $" #shift ", %%mm2 \n\t"\ 215 "psrad $" #shift ", %%mm5 \n\t"\ 216 "movq %%mm6, %%mm4 \n\t" \ 217 "paddd %%mm0, %%mm3 \n\t" \ 218 "paddd %%mm3, %%mm6 \n\t" \ 219 "psubd %%mm3, %%mm4 \n\t" \ 220 "psrad $" #shift ", %%mm6 \n\t"\ 221 "psrad $" #shift ", %%mm4 \n\t"\ 222 "packssdw %%mm2, %%mm2 \n\t" \ 223 "packssdw %%mm6, %%mm6 \n\t" \ 224 "movd %%mm2, 32+" #dst " \n\t"\ 225 "packssdw %%mm4, %%mm4 \n\t" \ 226 "packssdw %%mm5, %%mm5 \n\t" \ 227 "movd %%mm6, 48+" #dst " \n\t"\ 228 "movd %%mm4, 64+" #dst " \n\t"\ 229 "movd %%mm5, 80+" #dst " \n\t"\ 232 #define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ 233 "movq " #src0 ", %%mm0 \n\t" \ 234 "movq " #src4 ", %%mm1 \n\t" \ 235 "movq " #src1 ", %%mm2 \n\t" \ 236 "movq " #src5 ", %%mm3 \n\t" \ 237 "movq "MANGLE(wm1010)", %%mm4 \n\t"\ 238 "pand %%mm0, %%mm4 \n\t"\ 239 "por %%mm1, %%mm4 \n\t"\ 240 "por %%mm2, %%mm4 \n\t"\ 241 "por %%mm3, %%mm4 \n\t"\ 242 "packssdw %%mm4,%%mm4 \n\t"\ 243 "movd %%mm4, %%eax \n\t"\ 244 "orl %%eax, %%eax \n\t"\ 246 "movq 16(%2), %%mm4 \n\t" \ 247 "pmaddwd %%mm0, %%mm4 \n\t" \ 248 "movq 24(%2), %%mm5 \n\t" \ 249 "pmaddwd %%mm5, %%mm0 \n\t" \ 250 "movq 32(%2), %%mm5 \n\t" \ 251 "pmaddwd %%mm1, %%mm5 \n\t" \ 252 "movq 40(%2), %%mm6 \n\t" \ 253 "pmaddwd %%mm6, %%mm1 \n\t" \ 254 "movq 48(%2), %%mm7 \n\t" \ 255 "pmaddwd %%mm2, %%mm7 \n\t" \ 256 #rounder ", %%mm4 \n\t"\ 257 "movq %%mm4, %%mm6 \n\t" \ 258 "paddd %%mm5, %%mm4 \n\t" \ 259 "psubd %%mm5, %%mm6 \n\t" \ 260 "movq 56(%2), %%mm5 \n\t" \ 261 "pmaddwd %%mm3, %%mm5 \n\t" \ 262 #rounder ", %%mm0 \n\t"\ 263 "paddd %%mm0, %%mm1 \n\t" \ 264 "paddd %%mm0, %%mm0 \n\t" \ 265 "psubd %%mm1, %%mm0 \n\t" \ 266 "pmaddwd 64(%2), %%mm2 \n\t" \ 267 "paddd %%mm5, %%mm7 \n\t" \ 268 "movq 72(%2), %%mm5 \n\t" \ 269 "pmaddwd %%mm3, %%mm5 \n\t" \ 270 "paddd %%mm4, %%mm7 \n\t" \ 271 "paddd %%mm4, %%mm4 \n\t" \ 272 "psubd %%mm7, %%mm4 \n\t" \ 273 "paddd %%mm2, %%mm5 \n\t" \ 274 "psrad $" #shift ", %%mm7 \n\t"\ 275 "psrad $" #shift ", %%mm4 \n\t"\ 276 "movq %%mm1, %%mm2 \n\t" \ 277 "paddd %%mm5, %%mm1 \n\t" \ 278 "psubd %%mm5, %%mm2 \n\t" \ 279 "psrad $" #shift ", %%mm1 \n\t"\ 280 "psrad $" #shift ", %%mm2 \n\t"\ 281 "packssdw %%mm1, %%mm7 \n\t" \ 282 "packssdw %%mm4, %%mm2 \n\t" \ 283 "movq %%mm7, " #dst " \n\t"\ 284 "movq " #src1 ", %%mm1 \n\t" \ 285 "movq 80(%2), %%mm4 \n\t" \ 286 "movq %%mm2, 24+" #dst " \n\t"\ 287 "pmaddwd %%mm1, %%mm4 \n\t" \ 288 "movq 88(%2), %%mm7 \n\t" \ 289 "pmaddwd 96(%2), %%mm1 \n\t" \ 290 "pmaddwd %%mm3, %%mm7 \n\t" \ 291 "movq %%mm0, %%mm2 \n\t" \ 292 "pmaddwd 104(%2), %%mm3 \n\t" \ 293 "paddd %%mm7, %%mm4 \n\t" \ 294 "paddd %%mm4, %%mm2 \n\t" \ 295 "psubd %%mm4, %%mm0 \n\t" \ 296 "psrad $" #shift ", %%mm2 \n\t"\ 297 "psrad $" #shift ", %%mm0 \n\t"\ 298 "movq %%mm6, %%mm4 \n\t" \ 299 "paddd %%mm1, %%mm3 \n\t" \ 300 "paddd %%mm3, %%mm6 \n\t" \ 301 "psubd %%mm3, %%mm4 \n\t" \ 302 "psrad $" #shift ", %%mm6 \n\t"\ 303 "packssdw %%mm6, %%mm2 \n\t" \ 304 "movq %%mm2, 8+" #dst " \n\t"\ 305 "psrad $" #shift ", %%mm4 \n\t"\ 306 "packssdw %%mm0, %%mm4 \n\t" \ 307 "movq %%mm4, 16+" #dst " \n\t"\ 310 "pslld $16, %%mm0 \n\t"\ 311 "#paddd "MANGLE(d40000)", %%mm0 \n\t"\ 312 "psrad $13, %%mm0 \n\t"\ 313 "packssdw %%mm0, %%mm0 \n\t"\ 314 "movq %%mm0, " #dst " \n\t"\ 315 "movq %%mm0, 8+" #dst " \n\t"\ 316 "movq %%mm0, 16+" #dst " \n\t"\ 317 "movq %%mm0, 24+" #dst " \n\t"\ 322 ROW_IDCT( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
327 DC_COND_ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
328 DC_COND_ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
329 DC_COND_ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
333 COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
334 COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
335 COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
336 COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
340 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ 341 "movq " #src0 ", %%mm0 \n\t" \ 342 "movq " #src4 ", %%mm1 \n\t" \ 343 "movq " #src1 ", %%mm2 \n\t" \ 344 "movq " #src5 ", %%mm3 \n\t" \ 345 "movq "MANGLE(wm1010)", %%mm4 \n\t"\ 346 "pand %%mm0, %%mm4 \n\t"\ 347 "por %%mm1, %%mm4 \n\t"\ 348 "por %%mm2, %%mm4 \n\t"\ 349 "por %%mm3, %%mm4 \n\t"\ 350 "packssdw %%mm4,%%mm4 \n\t"\ 351 "movd %%mm4, %%eax \n\t"\ 352 "orl %%eax, %%eax \n\t"\ 354 "movq 16(%2), %%mm4 \n\t" \ 355 "pmaddwd %%mm0, %%mm4 \n\t" \ 356 "movq 24(%2), %%mm5 \n\t" \ 357 "pmaddwd %%mm5, %%mm0 \n\t" \ 358 "movq 32(%2), %%mm5 \n\t" \ 359 "pmaddwd %%mm1, %%mm5 \n\t" \ 360 "movq 40(%2), %%mm6 \n\t" \ 361 "pmaddwd %%mm6, %%mm1 \n\t" \ 362 "movq 48(%2), %%mm7 \n\t" \ 363 "pmaddwd %%mm2, %%mm7 \n\t" \ 364 #rounder ", %%mm4 \n\t"\ 365 "movq %%mm4, %%mm6 \n\t" \ 366 "paddd %%mm5, %%mm4 \n\t" \ 367 "psubd %%mm5, %%mm6 \n\t" \ 368 "movq 56(%2), %%mm5 \n\t" \ 369 "pmaddwd %%mm3, %%mm5 \n\t" \ 370 #rounder ", %%mm0 \n\t"\ 371 "paddd %%mm0, %%mm1 \n\t" \ 372 "paddd %%mm0, %%mm0 \n\t" \ 373 "psubd %%mm1, %%mm0 \n\t" \ 374 "pmaddwd 64(%2), %%mm2 \n\t" \ 375 "paddd %%mm5, %%mm7 \n\t" \ 376 "movq 72(%2), %%mm5 \n\t" \ 377 "pmaddwd %%mm3, %%mm5 \n\t" \ 378 "paddd %%mm4, %%mm7 \n\t" \ 379 "paddd %%mm4, %%mm4 \n\t" \ 380 "psubd %%mm7, %%mm4 \n\t" \ 381 "paddd %%mm2, %%mm5 \n\t" \ 382 "psrad $" #shift ", %%mm7 \n\t"\ 383 "psrad $" #shift ", %%mm4 \n\t"\ 384 "movq %%mm1, %%mm2 \n\t" \ 385 "paddd %%mm5, %%mm1 \n\t" \ 386 "psubd %%mm5, %%mm2 \n\t" \ 387 "psrad $" #shift ", %%mm1 \n\t"\ 388 "psrad $" #shift ", %%mm2 \n\t"\ 389 "packssdw %%mm1, %%mm7 \n\t" \ 390 "packssdw %%mm4, %%mm2 \n\t" \ 391 "movq %%mm7, " #dst " \n\t"\ 392 "movq " #src1 ", %%mm1 \n\t" \ 393 "movq 80(%2), %%mm4 \n\t" \ 394 "movq %%mm2, 24+" #dst " \n\t"\ 395 "pmaddwd %%mm1, %%mm4 \n\t" \ 396 "movq 88(%2), %%mm7 \n\t" \ 397 "pmaddwd 96(%2), %%mm1 \n\t" \ 398 "pmaddwd %%mm3, %%mm7 \n\t" \ 399 "movq %%mm0, %%mm2 \n\t" \ 400 "pmaddwd 104(%2), %%mm3 \n\t" \ 401 "paddd %%mm7, %%mm4 \n\t" \ 402 "paddd %%mm4, %%mm2 \n\t" \ 403 "psubd %%mm4, %%mm0 \n\t" \ 404 "psrad $" #shift ", %%mm2 \n\t"\ 405 "psrad $" #shift ", %%mm0 \n\t"\ 406 "movq %%mm6, %%mm4 \n\t" \ 407 "paddd %%mm1, %%mm3 \n\t" \ 408 "paddd %%mm3, %%mm6 \n\t" \ 409 "psubd %%mm3, %%mm4 \n\t" \ 410 "psrad $" #shift ", %%mm6 \n\t"\ 411 "packssdw %%mm6, %%mm2 \n\t" \ 412 "movq %%mm2, 8+" #dst " \n\t"\ 413 "psrad $" #shift ", %%mm4 \n\t"\ 414 "packssdw %%mm0, %%mm4 \n\t" \ 415 "movq %%mm4, 16+" #dst " \n\t"\ 418 "pslld $16, %%mm0 \n\t"\ 419 "paddd "MANGLE(d40000)", %%mm0 \n\t"\ 420 "psrad $13, %%mm0 \n\t"\ 421 "packssdw %%mm0, %%mm0 \n\t"\ 422 "movq %%mm0, " #dst " \n\t"\ 423 "movq %%mm0, 8+" #dst " \n\t"\ 424 "movq %%mm0, 16+" #dst " \n\t"\ 425 "movq %%mm0, 24+" #dst " \n\t"\ 428 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \ 429 "movq " #src0 ", %%mm0 \n\t" \ 430 "movq " #src4 ", %%mm1 \n\t" \ 431 "movq " #src1 ", %%mm2 \n\t" \ 432 "movq " #src5 ", %%mm3 \n\t" \ 433 "movq %%mm0, %%mm4 \n\t"\ 434 "por %%mm1, %%mm4 \n\t"\ 435 "por %%mm2, %%mm4 \n\t"\ 436 "por %%mm3, %%mm4 \n\t"\ 437 "packssdw %%mm4,%%mm4 \n\t"\ 438 "movd %%mm4, %%eax \n\t"\ 439 "orl %%eax, %%eax \n\t"\ 441 "movq 16(%2), %%mm4 \n\t" \ 442 "pmaddwd %%mm0, %%mm4 \n\t" \ 443 "movq 24(%2), %%mm5 \n\t" \ 444 "pmaddwd %%mm5, %%mm0 \n\t" \ 445 "movq 32(%2), %%mm5 \n\t" \ 446 "pmaddwd %%mm1, %%mm5 \n\t" \ 447 "movq 40(%2), %%mm6 \n\t" \ 448 "pmaddwd %%mm6, %%mm1 \n\t" \ 449 "movq 48(%2), %%mm7 \n\t" \ 450 "pmaddwd %%mm2, %%mm7 \n\t" \ 451 #rounder ", %%mm4 \n\t"\ 452 "movq %%mm4, %%mm6 \n\t" \ 453 "paddd %%mm5, %%mm4 \n\t" \ 454 "psubd %%mm5, %%mm6 \n\t" \ 455 "movq 56(%2), %%mm5 \n\t" \ 456 "pmaddwd %%mm3, %%mm5 \n\t" \ 457 #rounder ", %%mm0 \n\t"\ 458 "paddd %%mm0, %%mm1 \n\t" \ 459 "paddd %%mm0, %%mm0 \n\t" \ 460 "psubd %%mm1, %%mm0 \n\t" \ 461 "pmaddwd 64(%2), %%mm2 \n\t" \ 462 "paddd %%mm5, %%mm7 \n\t" \ 463 "movq 72(%2), %%mm5 \n\t" \ 464 "pmaddwd %%mm3, %%mm5 \n\t" \ 465 "paddd %%mm4, %%mm7 \n\t" \ 466 "paddd %%mm4, %%mm4 \n\t" \ 467 "psubd %%mm7, %%mm4 \n\t" \ 468 "paddd %%mm2, %%mm5 \n\t" \ 469 "psrad $" #shift ", %%mm7 \n\t"\ 470 "psrad $" #shift ", %%mm4 \n\t"\ 471 "movq %%mm1, %%mm2 \n\t" \ 472 "paddd %%mm5, %%mm1 \n\t" \ 473 "psubd %%mm5, %%mm2 \n\t" \ 474 "psrad $" #shift ", %%mm1 \n\t"\ 475 "psrad $" #shift ", %%mm2 \n\t"\ 476 "packssdw %%mm1, %%mm7 \n\t" \ 477 "packssdw %%mm4, %%mm2 \n\t" \ 478 "movq %%mm7, " #dst " \n\t"\ 479 "movq " #src1 ", %%mm1 \n\t" \ 480 "movq 80(%2), %%mm4 \n\t" \ 481 "movq %%mm2, 24+" #dst " \n\t"\ 482 "pmaddwd %%mm1, %%mm4 \n\t" \ 483 "movq 88(%2), %%mm7 \n\t" \ 484 "pmaddwd 96(%2), %%mm1 \n\t" \ 485 "pmaddwd %%mm3, %%mm7 \n\t" \ 486 "movq %%mm0, %%mm2 \n\t" \ 487 "pmaddwd 104(%2), %%mm3 \n\t" \ 488 "paddd %%mm7, %%mm4 \n\t" \ 489 "paddd %%mm4, %%mm2 \n\t" \ 490 "psubd %%mm4, %%mm0 \n\t" \ 491 "psrad $" #shift ", %%mm2 \n\t"\ 492 "psrad $" #shift ", %%mm0 \n\t"\ 493 "movq %%mm6, %%mm4 \n\t" \ 494 "paddd %%mm1, %%mm3 \n\t" \ 495 "paddd %%mm3, %%mm6 \n\t" \ 496 "psubd %%mm3, %%mm4 \n\t" \ 497 "psrad $" #shift ", %%mm6 \n\t"\ 498 "packssdw %%mm6, %%mm2 \n\t" \ 499 "movq %%mm2, 8+" #dst " \n\t"\ 500 "psrad $" #shift ", %%mm4 \n\t"\ 501 "packssdw %%mm0, %%mm4 \n\t" \ 502 "movq %%mm4, 16+" #dst " \n\t"\ 504 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ 505 "movq " #src0 ", %%mm0 \n\t" \ 506 "movq " #src4 ", %%mm1 \n\t" \ 507 "movq " #src1 ", %%mm2 \n\t" \ 508 "movq " #src5 ", %%mm3 \n\t" \ 509 "movq 16(%2), %%mm4 \n\t" \ 510 "pmaddwd %%mm0, %%mm4 \n\t" \ 511 "movq 24(%2), %%mm5 \n\t" \ 512 "pmaddwd %%mm5, %%mm0 \n\t" \ 513 "movq 32(%2), %%mm5 \n\t" \ 514 "pmaddwd %%mm1, %%mm5 \n\t" \ 515 "movq 40(%2), %%mm6 \n\t" \ 516 "pmaddwd %%mm6, %%mm1 \n\t" \ 517 "movq 48(%2), %%mm7 \n\t" \ 518 "pmaddwd %%mm2, %%mm7 \n\t" \ 519 #rounder ", %%mm4 \n\t"\ 520 "movq %%mm4, %%mm6 \n\t" \ 521 "paddd %%mm5, %%mm4 \n\t" \ 522 "psubd %%mm5, %%mm6 \n\t" \ 523 "movq 56(%2), %%mm5 \n\t" \ 524 "pmaddwd %%mm3, %%mm5 \n\t" \ 525 #rounder ", %%mm0 \n\t"\ 526 "paddd %%mm0, %%mm1 \n\t" \ 527 "paddd %%mm0, %%mm0 \n\t" \ 528 "psubd %%mm1, %%mm0 \n\t" \ 529 "pmaddwd 64(%2), %%mm2 \n\t" \ 530 "paddd %%mm5, %%mm7 \n\t" \ 531 "movq 72(%2), %%mm5 \n\t" \ 532 "pmaddwd %%mm3, %%mm5 \n\t" \ 533 "paddd %%mm4, %%mm7 \n\t" \ 534 "paddd %%mm4, %%mm4 \n\t" \ 535 "psubd %%mm7, %%mm4 \n\t" \ 536 "paddd %%mm2, %%mm5 \n\t" \ 537 "psrad $" #shift ", %%mm7 \n\t"\ 538 "psrad $" #shift ", %%mm4 \n\t"\ 539 "movq %%mm1, %%mm2 \n\t" \ 540 "paddd %%mm5, %%mm1 \n\t" \ 541 "psubd %%mm5, %%mm2 \n\t" \ 542 "psrad $" #shift ", %%mm1 \n\t"\ 543 "psrad $" #shift ", %%mm2 \n\t"\ 544 "packssdw %%mm1, %%mm7 \n\t" \ 545 "packssdw %%mm4, %%mm2 \n\t" \ 546 "movq %%mm7, " #dst " \n\t"\ 547 "movq " #src1 ", %%mm1 \n\t" \ 548 "movq 80(%2), %%mm4 \n\t" \ 549 "movq %%mm2, 24+" #dst " \n\t"\ 550 "pmaddwd %%mm1, %%mm4 \n\t" \ 551 "movq 88(%2), %%mm7 \n\t" \ 552 "pmaddwd 96(%2), %%mm1 \n\t" \ 553 "pmaddwd %%mm3, %%mm7 \n\t" \ 554 "movq %%mm0, %%mm2 \n\t" \ 555 "pmaddwd 104(%2), %%mm3 \n\t" \ 556 "paddd %%mm7, %%mm4 \n\t" \ 557 "paddd %%mm4, %%mm2 \n\t" \ 558 "psubd %%mm4, %%mm0 \n\t" \ 559 "psrad $" #shift ", %%mm2 \n\t"\ 560 "psrad $" #shift ", %%mm0 \n\t"\ 561 "movq %%mm6, %%mm4 \n\t" \ 562 "paddd %%mm1, %%mm3 \n\t" \ 563 "paddd %%mm3, %%mm6 \n\t" \ 564 "psubd %%mm3, %%mm4 \n\t" \ 565 "psrad $" #shift ", %%mm6 \n\t"\ 566 "packssdw %%mm6, %%mm2 \n\t" \ 567 "movq %%mm2, 8+" #dst " \n\t"\ 568 "psrad $" #shift ", %%mm4 \n\t"\ 569 "packssdw %%mm0, %%mm4 \n\t" \ 570 "movq %%mm4, 16+" #dst " \n\t"\ 573 DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
574 Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4
f)
575 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2
f)
576 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1
f)
579 #define IDCT(src0, src4, src1, src5, dst, shift) \ 580 "movq " #src0 ", %%mm0 \n\t" \ 581 "movq " #src4 ", %%mm1 \n\t" \ 582 "movq " #src1 ", %%mm2 \n\t" \ 583 "movq " #src5 ", %%mm3 \n\t" \ 584 "movq 16(%2), %%mm4 \n\t" \ 585 "pmaddwd %%mm0, %%mm4 \n\t" \ 586 "movq 24(%2), %%mm5 \n\t" \ 587 "pmaddwd %%mm5, %%mm0 \n\t" \ 588 "movq 32(%2), %%mm5 \n\t" \ 589 "pmaddwd %%mm1, %%mm5 \n\t" \ 590 "movq 40(%2), %%mm6 \n\t" \ 591 "pmaddwd %%mm6, %%mm1 \n\t" \ 592 "movq %%mm4, %%mm6 \n\t" \ 593 "movq 48(%2), %%mm7 \n\t" \ 594 "pmaddwd %%mm2, %%mm7 \n\t" \ 595 "paddd %%mm5, %%mm4 \n\t" \ 596 "psubd %%mm5, %%mm6 \n\t" \ 597 "movq %%mm0, %%mm5 \n\t" \ 598 "paddd %%mm1, %%mm0 \n\t" \ 599 "psubd %%mm1, %%mm5 \n\t" \ 600 "movq 56(%2), %%mm1 \n\t" \ 601 "pmaddwd %%mm3, %%mm1 \n\t" \ 602 "pmaddwd 64(%2), %%mm2 \n\t" \ 603 "paddd %%mm1, %%mm7 \n\t" \ 604 "movq 72(%2), %%mm1 \n\t" \ 605 "pmaddwd %%mm3, %%mm1 \n\t" \ 606 "paddd %%mm4, %%mm7 \n\t" \ 607 "paddd %%mm4, %%mm4 \n\t" \ 608 "psubd %%mm7, %%mm4 \n\t" \ 609 "paddd %%mm2, %%mm1 \n\t" \ 610 "psrad $" #shift ", %%mm7 \n\t"\ 611 "psrad $" #shift ", %%mm4 \n\t"\ 612 "movq %%mm0, %%mm2 \n\t" \ 613 "paddd %%mm1, %%mm0 \n\t" \ 614 "psubd %%mm1, %%mm2 \n\t" \ 615 "psrad $" #shift ", %%mm0 \n\t"\ 616 "psrad $" #shift ", %%mm2 \n\t"\ 617 "packssdw %%mm7, %%mm7 \n\t" \ 618 "movd %%mm7, " #dst " \n\t"\ 619 "packssdw %%mm0, %%mm0 \n\t" \ 620 "movd %%mm0, 16+" #dst " \n\t"\ 621 "packssdw %%mm2, %%mm2 \n\t" \ 622 "movd %%mm2, 96+" #dst " \n\t"\ 623 "packssdw %%mm4, %%mm4 \n\t" \ 624 "movd %%mm4, 112+" #dst " \n\t"\ 625 "movq " #src1 ", %%mm0 \n\t" \ 626 "movq 80(%2), %%mm4 \n\t" \ 627 "pmaddwd %%mm0, %%mm4 \n\t" \ 628 "movq 88(%2), %%mm7 \n\t" \ 629 "pmaddwd 96(%2), %%mm0 \n\t" \ 630 "pmaddwd %%mm3, %%mm7 \n\t" \ 631 "movq %%mm5, %%mm2 \n\t" \ 632 "pmaddwd 104(%2), %%mm3 \n\t" \ 633 "paddd %%mm7, %%mm4 \n\t" \ 634 "paddd %%mm4, %%mm2 \n\t" \ 635 "psubd %%mm4, %%mm5 \n\t" \ 636 "psrad $" #shift ", %%mm2 \n\t"\ 637 "psrad $" #shift ", %%mm5 \n\t"\ 638 "movq %%mm6, %%mm4 \n\t" \ 639 "paddd %%mm0, %%mm3 \n\t" \ 640 "paddd %%mm3, %%mm6 \n\t" \ 641 "psubd %%mm3, %%mm4 \n\t" \ 642 "psrad $" #shift ", %%mm6 \n\t"\ 643 "psrad $" #shift ", %%mm4 \n\t"\ 644 "packssdw %%mm2, %%mm2 \n\t" \ 645 "packssdw %%mm6, %%mm6 \n\t" \ 646 "movd %%mm2, 32+" #dst " \n\t"\ 647 "packssdw %%mm4, %%mm4 \n\t" \ 648 "packssdw %%mm5, %%mm5 \n\t" \ 649 "movd %%mm6, 48+" #dst " \n\t"\ 650 "movd %%mm4, 64+" #dst " \n\t"\ 651 "movd %%mm5, 80+" #dst " \n\t" 655 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
656 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
657 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
658 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
663 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6
f)
664 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5
f)
667 #define IDCT(src0, src4, src1, src5, dst, shift) \ 668 "movq " #src0 ", %%mm0 \n\t" \ 669 "movq " #src4 ", %%mm1 \n\t" \ 670 "movq " #src5 ", %%mm3 \n\t" \ 671 "movq 16(%2), %%mm4 \n\t" \ 672 "pmaddwd %%mm0, %%mm4 \n\t" \ 673 "movq 24(%2), %%mm5 \n\t" \ 674 "pmaddwd %%mm5, %%mm0 \n\t" \ 675 "movq 32(%2), %%mm5 \n\t" \ 676 "pmaddwd %%mm1, %%mm5 \n\t" \ 677 "movq 40(%2), %%mm6 \n\t" \ 678 "pmaddwd %%mm6, %%mm1 \n\t" \ 679 "movq %%mm4, %%mm6 \n\t" \ 680 "paddd %%mm5, %%mm4 \n\t" \ 681 "psubd %%mm5, %%mm6 \n\t" \ 682 "movq %%mm0, %%mm5 \n\t" \ 683 "paddd %%mm1, %%mm0 \n\t" \ 684 "psubd %%mm1, %%mm5 \n\t" \ 685 "movq 56(%2), %%mm1 \n\t" \ 686 "pmaddwd %%mm3, %%mm1 \n\t" \ 687 "movq 72(%2), %%mm7 \n\t" \ 688 "pmaddwd %%mm3, %%mm7 \n\t" \ 689 "paddd %%mm4, %%mm1 \n\t" \ 690 "paddd %%mm4, %%mm4 \n\t" \ 691 "psubd %%mm1, %%mm4 \n\t" \ 692 "psrad $" #shift ", %%mm1 \n\t"\ 693 "psrad $" #shift ", %%mm4 \n\t"\ 694 "movq %%mm0, %%mm2 \n\t" \ 695 "paddd %%mm7, %%mm0 \n\t" \ 696 "psubd %%mm7, %%mm2 \n\t" \ 697 "psrad $" #shift ", %%mm0 \n\t"\ 698 "psrad $" #shift ", %%mm2 \n\t"\ 699 "packssdw %%mm1, %%mm1 \n\t" \ 700 "movd %%mm1, " #dst " \n\t"\ 701 "packssdw %%mm0, %%mm0 \n\t" \ 702 "movd %%mm0, 16+" #dst " \n\t"\ 703 "packssdw %%mm2, %%mm2 \n\t" \ 704 "movd %%mm2, 96+" #dst " \n\t"\ 705 "packssdw %%mm4, %%mm4 \n\t" \ 706 "movd %%mm4, 112+" #dst " \n\t"\ 707 "movq 88(%2), %%mm1 \n\t" \ 708 "pmaddwd %%mm3, %%mm1 \n\t" \ 709 "movq %%mm5, %%mm2 \n\t" \ 710 "pmaddwd 104(%2), %%mm3 \n\t" \ 711 "paddd %%mm1, %%mm2 \n\t" \ 712 "psubd %%mm1, %%mm5 \n\t" \ 713 "psrad $" #shift ", %%mm2 \n\t"\ 714 "psrad $" #shift ", %%mm5 \n\t"\ 715 "movq %%mm6, %%mm1 \n\t" \ 716 "paddd %%mm3, %%mm6 \n\t" \ 717 "psubd %%mm3, %%mm1 \n\t" \ 718 "psrad $" #shift ", %%mm6 \n\t"\ 719 "psrad $" #shift ", %%mm1 \n\t"\ 720 "packssdw %%mm2, %%mm2 \n\t" \ 721 "packssdw %%mm6, %%mm6 \n\t" \ 722 "movd %%mm2, 32+" #dst " \n\t"\ 723 "packssdw %%mm1, %%mm1 \n\t" \ 724 "packssdw %%mm5, %%mm5 \n\t" \ 725 "movd %%mm6, 48+" #dst " \n\t"\ 726 "movd %%mm1, 64+" #dst " \n\t"\ 727 "movd %%mm5, 80+" #dst " \n\t" 730 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
731 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
732 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
733 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
738 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7
f)
741 #define IDCT(src0, src4, src1, src5, dst, shift) \ 742 "movq " #src0 ", %%mm0 \n\t" \ 743 "movq " #src5 ", %%mm3 \n\t" \ 744 "movq 16(%2), %%mm4 \n\t" \ 745 "pmaddwd %%mm0, %%mm4 \n\t" \ 746 "movq 24(%2), %%mm5 \n\t" \ 747 "pmaddwd %%mm5, %%mm0 \n\t" \ 748 "movq %%mm4, %%mm6 \n\t" \ 749 "movq %%mm0, %%mm5 \n\t" \ 750 "movq 56(%2), %%mm1 \n\t" \ 751 "pmaddwd %%mm3, %%mm1 \n\t" \ 752 "movq 72(%2), %%mm7 \n\t" \ 753 "pmaddwd %%mm3, %%mm7 \n\t" \ 754 "paddd %%mm4, %%mm1 \n\t" \ 755 "paddd %%mm4, %%mm4 \n\t" \ 756 "psubd %%mm1, %%mm4 \n\t" \ 757 "psrad $" #shift ", %%mm1 \n\t"\ 758 "psrad $" #shift ", %%mm4 \n\t"\ 759 "movq %%mm0, %%mm2 \n\t" \ 760 "paddd %%mm7, %%mm0 \n\t" \ 761 "psubd %%mm7, %%mm2 \n\t" \ 762 "psrad $" #shift ", %%mm0 \n\t"\ 763 "psrad $" #shift ", %%mm2 \n\t"\ 764 "packssdw %%mm1, %%mm1 \n\t" \ 765 "movd %%mm1, " #dst " \n\t"\ 766 "packssdw %%mm0, %%mm0 \n\t" \ 767 "movd %%mm0, 16+" #dst " \n\t"\ 768 "packssdw %%mm2, %%mm2 \n\t" \ 769 "movd %%mm2, 96+" #dst " \n\t"\ 770 "packssdw %%mm4, %%mm4 \n\t" \ 771 "movd %%mm4, 112+" #dst " \n\t"\ 772 "movq 88(%2), %%mm1 \n\t" \ 773 "pmaddwd %%mm3, %%mm1 \n\t" \ 774 "movq %%mm5, %%mm2 \n\t" \ 775 "pmaddwd 104(%2), %%mm3 \n\t" \ 776 "paddd %%mm1, %%mm2 \n\t" \ 777 "psubd %%mm1, %%mm5 \n\t" \ 778 "psrad $" #shift ", %%mm2 \n\t"\ 779 "psrad $" #shift ", %%mm5 \n\t"\ 780 "movq %%mm6, %%mm1 \n\t" \ 781 "paddd %%mm3, %%mm6 \n\t" \ 782 "psubd %%mm3, %%mm1 \n\t" \ 783 "psrad $" #shift ", %%mm6 \n\t"\ 784 "psrad $" #shift ", %%mm1 \n\t"\ 785 "packssdw %%mm2, %%mm2 \n\t" \ 786 "packssdw %%mm6, %%mm6 \n\t" \ 787 "movd %%mm2, 32+" #dst " \n\t"\ 788 "packssdw %%mm1, %%mm1 \n\t" \ 789 "packssdw %%mm5, %%mm5 \n\t" \ 790 "movd %%mm6, 48+" #dst " \n\t"\ 791 "movd %%mm1, 64+" #dst " \n\t"\ 792 "movd %%mm5, 80+" #dst " \n\t" 796 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
797 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
798 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
799 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
804 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3
f)
807 #define IDCT(src0, src4, src1, src5, dst, shift) \ 808 "movq " #src0 ", %%mm0 \n\t" \ 809 "movq " #src1 ", %%mm2 \n\t" \ 810 "movq " #src5 ", %%mm3 \n\t" \ 811 "movq 16(%2), %%mm4 \n\t" \ 812 "pmaddwd %%mm0, %%mm4 \n\t" \ 813 "movq 24(%2), %%mm5 \n\t" \ 814 "pmaddwd %%mm5, %%mm0 \n\t" \ 815 "movq %%mm4, %%mm6 \n\t" \ 816 "movq 48(%2), %%mm7 \n\t" \ 817 "pmaddwd %%mm2, %%mm7 \n\t" \ 818 "movq %%mm0, %%mm5 \n\t" \ 819 "movq 56(%2), %%mm1 \n\t" \ 820 "pmaddwd %%mm3, %%mm1 \n\t" \ 821 "pmaddwd 64(%2), %%mm2 \n\t" \ 822 "paddd %%mm1, %%mm7 \n\t" \ 823 "movq 72(%2), %%mm1 \n\t" \ 824 "pmaddwd %%mm3, %%mm1 \n\t" \ 825 "paddd %%mm4, %%mm7 \n\t" \ 826 "paddd %%mm4, %%mm4 \n\t" \ 827 "psubd %%mm7, %%mm4 \n\t" \ 828 "paddd %%mm2, %%mm1 \n\t" \ 829 "psrad $" #shift ", %%mm7 \n\t"\ 830 "psrad $" #shift ", %%mm4 \n\t"\ 831 "movq %%mm0, %%mm2 \n\t" \ 832 "paddd %%mm1, %%mm0 \n\t" \ 833 "psubd %%mm1, %%mm2 \n\t" \ 834 "psrad $" #shift ", %%mm0 \n\t"\ 835 "psrad $" #shift ", %%mm2 \n\t"\ 836 "packssdw %%mm7, %%mm7 \n\t" \ 837 "movd %%mm7, " #dst " \n\t"\ 838 "packssdw %%mm0, %%mm0 \n\t" \ 839 "movd %%mm0, 16+" #dst " \n\t"\ 840 "packssdw %%mm2, %%mm2 \n\t" \ 841 "movd %%mm2, 96+" #dst " \n\t"\ 842 "packssdw %%mm4, %%mm4 \n\t" \ 843 "movd %%mm4, 112+" #dst " \n\t"\ 844 "movq " #src1 ", %%mm0 \n\t" \ 845 "movq 80(%2), %%mm4 \n\t" \ 846 "pmaddwd %%mm0, %%mm4 \n\t" \ 847 "movq 88(%2), %%mm7 \n\t" \ 848 "pmaddwd 96(%2), %%mm0 \n\t" \ 849 "pmaddwd %%mm3, %%mm7 \n\t" \ 850 "movq %%mm5, %%mm2 \n\t" \ 851 "pmaddwd 104(%2), %%mm3 \n\t" \ 852 "paddd %%mm7, %%mm4 \n\t" \ 853 "paddd %%mm4, %%mm2 \n\t" \ 854 "psubd %%mm4, %%mm5 \n\t" \ 855 "psrad $" #shift ", %%mm2 \n\t"\ 856 "psrad $" #shift ", %%mm5 \n\t"\ 857 "movq %%mm6, %%mm4 \n\t" \ 858 "paddd %%mm0, %%mm3 \n\t" \ 859 "paddd %%mm3, %%mm6 \n\t" \ 860 "psubd %%mm3, %%mm4 \n\t" \ 861 "psrad $" #shift ", %%mm6 \n\t"\ 862 "psrad $" #shift ", %%mm4 \n\t"\ 863 "packssdw %%mm2, %%mm2 \n\t" \ 864 "packssdw %%mm6, %%mm6 \n\t" \ 865 "movd %%mm2, 32+" #dst " \n\t"\ 866 "packssdw %%mm4, %%mm4 \n\t" \ 867 "packssdw %%mm5, %%mm5 \n\t" \ 868 "movd %%mm6, 48+" #dst " \n\t"\ 869 "movd %%mm4, 64+" #dst " \n\t"\ 870 "movd %%mm5, 80+" #dst " \n\t" 873 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
874 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
875 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
876 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
882 #define IDCT(src0, src4, src1, src5, dst, shift) \ 883 "movq " #src0 ", %%mm0 \n\t" \ 884 "movq " #src1 ", %%mm2 \n\t" \ 885 "movq 16(%2), %%mm4 \n\t" \ 886 "pmaddwd %%mm0, %%mm4 \n\t" \ 887 "movq 24(%2), %%mm5 \n\t" \ 888 "pmaddwd %%mm5, %%mm0 \n\t" \ 889 "movq %%mm4, %%mm6 \n\t" \ 890 "movq 48(%2), %%mm7 \n\t" \ 891 "pmaddwd %%mm2, %%mm7 \n\t" \ 892 "movq %%mm0, %%mm5 \n\t" \ 893 "movq 64(%2), %%mm3 \n\t"\ 894 "pmaddwd %%mm2, %%mm3 \n\t" \ 895 "paddd %%mm4, %%mm7 \n\t" \ 896 "paddd %%mm4, %%mm4 \n\t" \ 897 "psubd %%mm7, %%mm4 \n\t" \ 898 "psrad $" #shift ", %%mm7 \n\t"\ 899 "psrad $" #shift ", %%mm4 \n\t"\ 900 "movq %%mm0, %%mm1 \n\t" \ 901 "paddd %%mm3, %%mm0 \n\t" \ 902 "psubd %%mm3, %%mm1 \n\t" \ 903 "psrad $" #shift ", %%mm0 \n\t"\ 904 "psrad $" #shift ", %%mm1 \n\t"\ 905 "packssdw %%mm7, %%mm7 \n\t" \ 906 "movd %%mm7, " #dst " \n\t"\ 907 "packssdw %%mm0, %%mm0 \n\t" \ 908 "movd %%mm0, 16+" #dst " \n\t"\ 909 "packssdw %%mm1, %%mm1 \n\t" \ 910 "movd %%mm1, 96+" #dst " \n\t"\ 911 "packssdw %%mm4, %%mm4 \n\t" \ 912 "movd %%mm4, 112+" #dst " \n\t"\ 913 "movq 80(%2), %%mm4 \n\t" \ 914 "pmaddwd %%mm2, %%mm4 \n\t" \ 915 "pmaddwd 96(%2), %%mm2 \n\t" \ 916 "movq %%mm5, %%mm1 \n\t" \ 917 "paddd %%mm4, %%mm1 \n\t" \ 918 "psubd %%mm4, %%mm5 \n\t" \ 919 "psrad $" #shift ", %%mm1 \n\t"\ 920 "psrad $" #shift ", %%mm5 \n\t"\ 921 "movq %%mm6, %%mm4 \n\t" \ 922 "paddd %%mm2, %%mm6 \n\t" \ 923 "psubd %%mm2, %%mm4 \n\t" \ 924 "psrad $" #shift ", %%mm6 \n\t"\ 925 "psrad $" #shift ", %%mm4 \n\t"\ 926 "packssdw %%mm1, %%mm1 \n\t" \ 927 "packssdw %%mm6, %%mm6 \n\t" \ 928 "movd %%mm1, 32+" #dst " \n\t"\ 929 "packssdw %%mm4, %%mm4 \n\t" \ 930 "packssdw %%mm5, %%mm5 \n\t" \ 931 "movd %%mm6, 48+" #dst " \n\t"\ 932 "movd %%mm4, 64+" #dst " \n\t"\ 933 "movd %%mm5, 80+" #dst " \n\t" 937 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
938 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
939 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
940 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
946 #define IDCT(src0, src4, src1, src5, dst, shift) \ 947 "movq " #src0 ", %%mm0 \n\t" \ 948 "movq " #src4 ", %%mm1 \n\t" \ 949 "movq 16(%2), %%mm4 \n\t" \ 950 "pmaddwd %%mm0, %%mm4 \n\t" \ 951 "movq 24(%2), %%mm5 \n\t" \ 952 "pmaddwd %%mm5, %%mm0 \n\t" \ 953 "movq 32(%2), %%mm5 \n\t" \ 954 "pmaddwd %%mm1, %%mm5 \n\t" \ 955 "movq 40(%2), %%mm6 \n\t" \ 956 "pmaddwd %%mm6, %%mm1 \n\t" \ 957 "movq %%mm4, %%mm6 \n\t" \ 958 "paddd %%mm5, %%mm4 \n\t" \ 959 "psubd %%mm5, %%mm6 \n\t" \ 960 "movq %%mm0, %%mm5 \n\t" \ 961 "paddd %%mm1, %%mm0 \n\t" \ 962 "psubd %%mm1, %%mm5 \n\t" \ 963 "movq 8+" #src0 ", %%mm2 \n\t" \ 964 "movq 8+" #src4 ", %%mm3 \n\t" \ 965 "movq 16(%2), %%mm1 \n\t" \ 966 "pmaddwd %%mm2, %%mm1 \n\t" \ 967 "movq 24(%2), %%mm7 \n\t" \ 968 "pmaddwd %%mm7, %%mm2 \n\t" \ 969 "movq 32(%2), %%mm7 \n\t" \ 970 "pmaddwd %%mm3, %%mm7 \n\t" \ 971 "pmaddwd 40(%2), %%mm3 \n\t" \ 972 "paddd %%mm1, %%mm7 \n\t" \ 973 "paddd %%mm1, %%mm1 \n\t" \ 974 "psubd %%mm7, %%mm1 \n\t" \ 975 "paddd %%mm2, %%mm3 \n\t" \ 976 "paddd %%mm2, %%mm2 \n\t" \ 977 "psubd %%mm3, %%mm2 \n\t" \ 978 "psrad $" #shift ", %%mm4 \n\t"\ 979 "psrad $" #shift ", %%mm7 \n\t"\ 980 "psrad $" #shift ", %%mm3 \n\t"\ 981 "packssdw %%mm7, %%mm4 \n\t" \ 982 "movq %%mm4, " #dst " \n\t"\ 983 "psrad $" #shift ", %%mm0 \n\t"\ 984 "packssdw %%mm3, %%mm0 \n\t" \ 985 "movq %%mm0, 16+" #dst " \n\t"\ 986 "movq %%mm0, 96+" #dst " \n\t"\ 987 "movq %%mm4, 112+" #dst " \n\t"\ 988 "psrad $" #shift ", %%mm5 \n\t"\ 989 "psrad $" #shift ", %%mm6 \n\t"\ 990 "psrad $" #shift ", %%mm2 \n\t"\ 991 "packssdw %%mm2, %%mm5 \n\t" \ 992 "movq %%mm5, 32+" #dst " \n\t"\ 993 "psrad $" #shift ", %%mm1 \n\t"\ 994 "packssdw %%mm1, %%mm6 \n\t" \ 995 "movq %%mm6, 48+" #dst " \n\t"\ 996 "movq %%mm6, 64+" #dst " \n\t"\ 997 "movq %%mm5, 80+" #dst " \n\t" 1001 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
1003 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
1008 "
# .p2align 4 \n\t"\ 1011 #define IDCT(src0, src4, src1, src5, dst, shift) \ 1012 "movq " #src0 ", %%mm0 \n\t" \ 1013 "movq " #src4 ", %%mm1 \n\t" \ 1014 "movq " #src1 ", %%mm2 \n\t" \ 1015 "movq 16(%2), %%mm4 \n\t" \ 1016 "pmaddwd %%mm0, %%mm4 \n\t" \ 1017 "movq 24(%2), %%mm5 \n\t" \ 1018 "pmaddwd %%mm5, %%mm0 \n\t" \ 1019 "movq 32(%2), %%mm5 \n\t" \ 1020 "pmaddwd %%mm1, %%mm5 \n\t" \ 1021 "movq 40(%2), %%mm6 \n\t" \ 1022 "pmaddwd %%mm6, %%mm1 \n\t" \ 1023 "movq %%mm4, %%mm6 \n\t" \ 1024 "movq 48(%2), %%mm7 \n\t" \ 1025 "pmaddwd %%mm2, %%mm7 \n\t" \ 1026 "paddd %%mm5, %%mm4 \n\t" \ 1027 "psubd %%mm5, %%mm6 \n\t" \ 1028 "movq %%mm0, %%mm5 \n\t" \ 1029 "paddd %%mm1, %%mm0 \n\t" \ 1030 "psubd %%mm1, %%mm5 \n\t" \ 1031 "movq 64(%2), %%mm1 \n\t"\ 1032 "pmaddwd %%mm2, %%mm1 \n\t" \ 1033 "paddd %%mm4, %%mm7 \n\t" \ 1034 "paddd %%mm4, %%mm4 \n\t" \ 1035 "psubd %%mm7, %%mm4 \n\t" \ 1036 "psrad $" #shift ", %%mm7 \n\t"\ 1037 "psrad $" #shift ", %%mm4 \n\t"\ 1038 "movq %%mm0, %%mm3 \n\t" \ 1039 "paddd %%mm1, %%mm0 \n\t" \ 1040 "psubd %%mm1, %%mm3 \n\t" \ 1041 "psrad $" #shift ", %%mm0 \n\t"\ 1042 "psrad $" #shift ", %%mm3 \n\t"\ 1043 "packssdw %%mm7, %%mm7 \n\t" \ 1044 "movd %%mm7, " #dst " \n\t"\ 1045 "packssdw %%mm0, %%mm0 \n\t" \ 1046 "movd %%mm0, 16+" #dst " \n\t"\ 1047 "packssdw %%mm3, %%mm3 \n\t" \ 1048 "movd %%mm3, 96+" #dst " \n\t"\ 1049 "packssdw %%mm4, %%mm4 \n\t" \ 1050 "movd %%mm4, 112+" #dst " \n\t"\ 1051 "movq 80(%2), %%mm4 \n\t" \ 1052 "pmaddwd %%mm2, %%mm4 \n\t" \ 1053 "pmaddwd 96(%2), %%mm2 \n\t" \ 1054 "movq %%mm5, %%mm3 \n\t" \ 1055 "paddd %%mm4, %%mm3 \n\t" \ 1056 "psubd %%mm4, %%mm5 \n\t" \ 1057 "psrad $" #shift ", %%mm3 \n\t"\ 1058 "psrad $" #shift ", %%mm5 \n\t"\ 1059 "movq %%mm6, %%mm4 \n\t" \ 1060 "paddd %%mm2, %%mm6 \n\t" \ 1061 "psubd %%mm2, %%mm4 \n\t" \ 1062 "psrad $" #shift ", %%mm6 \n\t"\ 1063 "packssdw %%mm3, %%mm3 \n\t" \ 1064 "movd %%mm3, 32+" #dst " \n\t"\ 1065 "psrad $" #shift ", %%mm4 \n\t"\ 1066 "packssdw %%mm6, %%mm6 \n\t" \ 1067 "movd %%mm6, 48+" #dst " \n\t"\ 1068 "packssdw %%mm4, %%mm4 \n\t" \ 1069 "packssdw %%mm5, %%mm5 \n\t" \ 1070 "movd %%mm4, 64+" #dst " \n\t"\ 1071 "movd %%mm5, 80+" #dst " \n\t" 1075 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
1076 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
1077 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
1078 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
1085 #define IDCT(src0, src4, src1, src5, dst, shift) \ 1086 "movq " #src0 ", %%mm0 \n\t" \ 1087 "movq 16(%2), %%mm4 \n\t" \ 1088 "pmaddwd %%mm0, %%mm4 \n\t" \ 1089 "movq 24(%2), %%mm5 \n\t" \ 1090 "pmaddwd %%mm5, %%mm0 \n\t" \ 1091 "psrad $" #shift ", %%mm4 \n\t"\ 1092 "psrad $" #shift ", %%mm0 \n\t"\ 1093 "movq 8+" #src0 ", %%mm2 \n\t" \ 1094 "movq 16(%2), %%mm1 \n\t" \ 1095 "pmaddwd %%mm2, %%mm1 \n\t" \ 1096 "movq 24(%2), %%mm7 \n\t" \ 1097 "pmaddwd %%mm7, %%mm2 \n\t" \ 1098 "movq 32(%2), %%mm7 \n\t" \ 1099 "psrad $" #shift ", %%mm1 \n\t"\ 1100 "packssdw %%mm1, %%mm4 \n\t" \ 1101 "movq %%mm4, " #dst " \n\t"\ 1102 "psrad $" #shift ", %%mm2 \n\t"\ 1103 "packssdw %%mm2, %%mm0 \n\t" \ 1104 "movq %%mm0, 16+" #dst " \n\t"\ 1105 "movq %%mm0, 96+" #dst " \n\t"\ 1106 "movq %%mm4, 112+" #dst " \n\t"\ 1107 "movq %%mm0, 32+" #dst " \n\t"\ 1108 "movq %%mm4, 48+" #dst " \n\t"\ 1109 "movq %%mm4, 64+" #dst " \n\t"\ 1110 "movq %%mm0, 80+" #dst " \n\t" 1113 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
1115 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
1144 ::
"r" (block),
"r" (temp),
"r" (coeffs)
memory handling functions
#define DECLARE_ALIGNED(n, t, v)
void ff_simple_idct_mmx(int16_t *block)
void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block)
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block)
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
DECLARE_ASM_CONST(8, int, deringThreshold)