yading@10: /* yading@10: * Alpha optimized DSP utils yading@10: * Copyright (c) 2002 Falk Hueffner yading@10: * yading@10: * This file is part of FFmpeg. yading@10: * yading@10: * FFmpeg is free software; you can redistribute it and/or yading@10: * modify it under the terms of the GNU Lesser General Public yading@10: * License as published by the Free Software Foundation; either yading@10: * version 2.1 of the License, or (at your option) any later version. yading@10: * yading@10: * FFmpeg is distributed in the hope that it will be useful, yading@10: * but WITHOUT ANY WARRANTY; without even the implied warranty of yading@10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU yading@10: * Lesser General Public License for more details. yading@10: * yading@10: * You should have received a copy of the GNU Lesser General Public yading@10: * License along with FFmpeg; if not, write to the Free Software yading@10: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA yading@10: */ yading@10: yading@10: #include "regdef.h" yading@10: yading@10: /* Some nicer register names. */ yading@10: #define ta t10 yading@10: #define tb t11 yading@10: #define tc t12 yading@10: #define td AT yading@10: /* Danger: these overlap with the argument list and the return value */ yading@10: #define te a5 yading@10: #define tf a4 yading@10: #define tg a3 yading@10: #define th v0 yading@10: yading@10: .set noat yading@10: .set noreorder yading@10: .arch pca56 yading@10: .text yading@10: yading@10: /***************************************************************************** yading@10: * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) yading@10: * yading@10: * This code is written with a pca56 in mind. For ev6, one should yading@10: * really take the increased latency of 3 cycles for MVI instructions yading@10: * into account. yading@10: * yading@10: * It is important to keep the loading and first use of a register as yading@10: * far apart as possible, because if a register is accessed before it yading@10: * has been fetched from memory, the CPU will stall. yading@10: */ yading@10: .align 4 yading@10: .globl pix_abs16x16_mvi_asm yading@10: .ent pix_abs16x16_mvi_asm yading@10: pix_abs16x16_mvi_asm: yading@10: .frame sp, 0, ra, 0 yading@10: .prologue 0 yading@10: yading@10: and a2, 7, t0 yading@10: clr v0 yading@10: beq t0, $aligned yading@10: .align 4 yading@10: $unaligned: yading@10: /* Registers: yading@10: line 0: yading@10: t0: left_u -> left lo -> left yading@10: t1: mid yading@10: t2: right_u -> right hi -> right yading@10: t3: ref left yading@10: t4: ref right yading@10: line 1: yading@10: t5: left_u -> left lo -> left yading@10: t6: mid yading@10: t7: right_u -> right hi -> right yading@10: t8: ref left yading@10: t9: ref right yading@10: temp: yading@10: ta: left hi yading@10: tb: right lo yading@10: tc: error left yading@10: td: error right */ yading@10: yading@10: /* load line 0 */ yading@10: ldq_u t0, 0(a2) # left_u yading@10: ldq_u t1, 8(a2) # mid yading@10: ldq_u t2, 16(a2) # right_u yading@10: ldq t3, 0(a1) # ref left yading@10: ldq t4, 8(a1) # ref right yading@10: addq a1, a3, a1 # pix1 yading@10: addq a2, a3, a2 # pix2 yading@10: /* load line 1 */ yading@10: ldq_u t5, 0(a2) # left_u yading@10: ldq_u t6, 8(a2) # mid yading@10: ldq_u t7, 16(a2) # right_u yading@10: ldq t8, 0(a1) # ref left yading@10: ldq t9, 8(a1) # ref right yading@10: addq a1, a3, a1 # pix1 yading@10: addq a2, a3, a2 # pix2 yading@10: /* calc line 0 */ yading@10: extql t0, a2, t0 # left lo yading@10: extqh t1, a2, ta # left hi yading@10: extql t1, a2, tb # right lo yading@10: or t0, ta, t0 # left yading@10: extqh t2, a2, t2 # right hi yading@10: perr t3, t0, tc # error left yading@10: or t2, tb, t2 # right yading@10: perr t4, t2, td # error right yading@10: addq v0, tc, v0 # add error left yading@10: addq v0, td, v0 # add error left yading@10: /* calc line 1 */ yading@10: extql t5, a2, t5 # left lo yading@10: extqh t6, a2, ta # left hi yading@10: extql t6, a2, tb # right lo yading@10: or t5, ta, t5 # left yading@10: extqh t7, a2, t7 # right hi yading@10: perr t8, t5, tc # error left yading@10: or t7, tb, t7 # right yading@10: perr t9, t7, td # error right yading@10: addq v0, tc, v0 # add error left yading@10: addq v0, td, v0 # add error left yading@10: /* loop */ yading@10: subq a4, 2, a4 # h -= 2 yading@10: bne a4, $unaligned yading@10: ret yading@10: yading@10: .align 4 yading@10: $aligned: yading@10: /* load line 0 */ yading@10: ldq t0, 0(a2) # left yading@10: ldq t1, 8(a2) # right yading@10: addq a2, a3, a2 # pix2 yading@10: ldq t2, 0(a1) # ref left yading@10: ldq t3, 8(a1) # ref right yading@10: addq a1, a3, a1 # pix1 yading@10: /* load line 1 */ yading@10: ldq t4, 0(a2) # left yading@10: ldq t5, 8(a2) # right yading@10: addq a2, a3, a2 # pix2 yading@10: ldq t6, 0(a1) # ref left yading@10: ldq t7, 8(a1) # ref right yading@10: addq a1, a3, a1 # pix1 yading@10: /* load line 2 */ yading@10: ldq t8, 0(a2) # left yading@10: ldq t9, 8(a2) # right yading@10: addq a2, a3, a2 # pix2 yading@10: ldq ta, 0(a1) # ref left yading@10: ldq tb, 8(a1) # ref right yading@10: addq a1, a3, a1 # pix1 yading@10: /* load line 3 */ yading@10: ldq tc, 0(a2) # left yading@10: ldq td, 8(a2) # right yading@10: addq a2, a3, a2 # pix2 yading@10: ldq te, 0(a1) # ref left yading@10: ldq a0, 8(a1) # ref right yading@10: /* calc line 0 */ yading@10: perr t0, t2, t0 # error left yading@10: addq a1, a3, a1 # pix1 yading@10: perr t1, t3, t1 # error right yading@10: addq v0, t0, v0 # add error left yading@10: /* calc line 1 */ yading@10: perr t4, t6, t0 # error left yading@10: addq v0, t1, v0 # add error right yading@10: perr t5, t7, t1 # error right yading@10: addq v0, t0, v0 # add error left yading@10: /* calc line 2 */ yading@10: perr t8, ta, t0 # error left yading@10: addq v0, t1, v0 # add error right yading@10: perr t9, tb, t1 # error right yading@10: addq v0, t0, v0 # add error left yading@10: /* calc line 3 */ yading@10: perr tc, te, t0 # error left yading@10: addq v0, t1, v0 # add error right yading@10: perr td, a0, t1 # error right yading@10: addq v0, t0, v0 # add error left yading@10: addq v0, t1, v0 # add error right yading@10: /* loop */ yading@10: subq a4, 4, a4 # h -= 4 yading@10: bne a4, $aligned yading@10: ret yading@10: .end pix_abs16x16_mvi_asm