annotate ffmpeg/libavcodec/alpha/motion_est_mvi_asm.S @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * Alpha optimized DSP utils
yading@10 3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
yading@10 4 *
yading@10 5 * This file is part of FFmpeg.
yading@10 6 *
yading@10 7 * FFmpeg is free software; you can redistribute it and/or
yading@10 8 * modify it under the terms of the GNU Lesser General Public
yading@10 9 * License as published by the Free Software Foundation; either
yading@10 10 * version 2.1 of the License, or (at your option) any later version.
yading@10 11 *
yading@10 12 * FFmpeg is distributed in the hope that it will be useful,
yading@10 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 15 * Lesser General Public License for more details.
yading@10 16 *
yading@10 17 * You should have received a copy of the GNU Lesser General Public
yading@10 18 * License along with FFmpeg; if not, write to the Free Software
yading@10 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 20 */
yading@10 21
yading@10 22 #include "regdef.h"
yading@10 23
yading@10 24 /* Some nicer register names. */
yading@10 25 #define ta t10
yading@10 26 #define tb t11
yading@10 27 #define tc t12
yading@10 28 #define td AT
yading@10 29 /* Danger: these overlap with the argument list and the return value */
yading@10 30 #define te a5
yading@10 31 #define tf a4
yading@10 32 #define tg a3
yading@10 33 #define th v0
yading@10 34
yading@10 35 .set noat
yading@10 36 .set noreorder
yading@10 37 .arch pca56
yading@10 38 .text
yading@10 39
yading@10 40 /*****************************************************************************
yading@10 41 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
yading@10 42 *
yading@10 43 * This code is written with a pca56 in mind. For ev6, one should
yading@10 44 * really take the increased latency of 3 cycles for MVI instructions
yading@10 45 * into account.
yading@10 46 *
yading@10 47 * It is important to keep the loading and first use of a register as
yading@10 48 * far apart as possible, because if a register is accessed before it
yading@10 49 * has been fetched from memory, the CPU will stall.
yading@10 50 */
yading@10 51 .align 4
yading@10 52 .globl pix_abs16x16_mvi_asm
yading@10 53 .ent pix_abs16x16_mvi_asm
yading@10 54 pix_abs16x16_mvi_asm:
yading@10 55 .frame sp, 0, ra, 0
yading@10 56 .prologue 0
yading@10 57
yading@10 58 and a2, 7, t0
yading@10 59 clr v0
yading@10 60 beq t0, $aligned
yading@10 61 .align 4
yading@10 62 $unaligned:
yading@10 63 /* Registers:
yading@10 64 line 0:
yading@10 65 t0: left_u -> left lo -> left
yading@10 66 t1: mid
yading@10 67 t2: right_u -> right hi -> right
yading@10 68 t3: ref left
yading@10 69 t4: ref right
yading@10 70 line 1:
yading@10 71 t5: left_u -> left lo -> left
yading@10 72 t6: mid
yading@10 73 t7: right_u -> right hi -> right
yading@10 74 t8: ref left
yading@10 75 t9: ref right
yading@10 76 temp:
yading@10 77 ta: left hi
yading@10 78 tb: right lo
yading@10 79 tc: error left
yading@10 80 td: error right */
yading@10 81
yading@10 82 /* load line 0 */
yading@10 83 ldq_u t0, 0(a2) # left_u
yading@10 84 ldq_u t1, 8(a2) # mid
yading@10 85 ldq_u t2, 16(a2) # right_u
yading@10 86 ldq t3, 0(a1) # ref left
yading@10 87 ldq t4, 8(a1) # ref right
yading@10 88 addq a1, a3, a1 # pix1
yading@10 89 addq a2, a3, a2 # pix2
yading@10 90 /* load line 1 */
yading@10 91 ldq_u t5, 0(a2) # left_u
yading@10 92 ldq_u t6, 8(a2) # mid
yading@10 93 ldq_u t7, 16(a2) # right_u
yading@10 94 ldq t8, 0(a1) # ref left
yading@10 95 ldq t9, 8(a1) # ref right
yading@10 96 addq a1, a3, a1 # pix1
yading@10 97 addq a2, a3, a2 # pix2
yading@10 98 /* calc line 0 */
yading@10 99 extql t0, a2, t0 # left lo
yading@10 100 extqh t1, a2, ta # left hi
yading@10 101 extql t1, a2, tb # right lo
yading@10 102 or t0, ta, t0 # left
yading@10 103 extqh t2, a2, t2 # right hi
yading@10 104 perr t3, t0, tc # error left
yading@10 105 or t2, tb, t2 # right
yading@10 106 perr t4, t2, td # error right
yading@10 107 addq v0, tc, v0 # add error left
yading@10 108 addq v0, td, v0 # add error left
yading@10 109 /* calc line 1 */
yading@10 110 extql t5, a2, t5 # left lo
yading@10 111 extqh t6, a2, ta # left hi
yading@10 112 extql t6, a2, tb # right lo
yading@10 113 or t5, ta, t5 # left
yading@10 114 extqh t7, a2, t7 # right hi
yading@10 115 perr t8, t5, tc # error left
yading@10 116 or t7, tb, t7 # right
yading@10 117 perr t9, t7, td # error right
yading@10 118 addq v0, tc, v0 # add error left
yading@10 119 addq v0, td, v0 # add error left
yading@10 120 /* loop */
yading@10 121 subq a4, 2, a4 # h -= 2
yading@10 122 bne a4, $unaligned
yading@10 123 ret
yading@10 124
yading@10 125 .align 4
yading@10 126 $aligned:
yading@10 127 /* load line 0 */
yading@10 128 ldq t0, 0(a2) # left
yading@10 129 ldq t1, 8(a2) # right
yading@10 130 addq a2, a3, a2 # pix2
yading@10 131 ldq t2, 0(a1) # ref left
yading@10 132 ldq t3, 8(a1) # ref right
yading@10 133 addq a1, a3, a1 # pix1
yading@10 134 /* load line 1 */
yading@10 135 ldq t4, 0(a2) # left
yading@10 136 ldq t5, 8(a2) # right
yading@10 137 addq a2, a3, a2 # pix2
yading@10 138 ldq t6, 0(a1) # ref left
yading@10 139 ldq t7, 8(a1) # ref right
yading@10 140 addq a1, a3, a1 # pix1
yading@10 141 /* load line 2 */
yading@10 142 ldq t8, 0(a2) # left
yading@10 143 ldq t9, 8(a2) # right
yading@10 144 addq a2, a3, a2 # pix2
yading@10 145 ldq ta, 0(a1) # ref left
yading@10 146 ldq tb, 8(a1) # ref right
yading@10 147 addq a1, a3, a1 # pix1
yading@10 148 /* load line 3 */
yading@10 149 ldq tc, 0(a2) # left
yading@10 150 ldq td, 8(a2) # right
yading@10 151 addq a2, a3, a2 # pix2
yading@10 152 ldq te, 0(a1) # ref left
yading@10 153 ldq a0, 8(a1) # ref right
yading@10 154 /* calc line 0 */
yading@10 155 perr t0, t2, t0 # error left
yading@10 156 addq a1, a3, a1 # pix1
yading@10 157 perr t1, t3, t1 # error right
yading@10 158 addq v0, t0, v0 # add error left
yading@10 159 /* calc line 1 */
yading@10 160 perr t4, t6, t0 # error left
yading@10 161 addq v0, t1, v0 # add error right
yading@10 162 perr t5, t7, t1 # error right
yading@10 163 addq v0, t0, v0 # add error left
yading@10 164 /* calc line 2 */
yading@10 165 perr t8, ta, t0 # error left
yading@10 166 addq v0, t1, v0 # add error right
yading@10 167 perr t9, tb, t1 # error right
yading@10 168 addq v0, t0, v0 # add error left
yading@10 169 /* calc line 3 */
yading@10 170 perr tc, te, t0 # error left
yading@10 171 addq v0, t1, v0 # add error right
yading@10 172 perr td, a0, t1 # error right
yading@10 173 addq v0, t0, v0 # add error left
yading@10 174 addq v0, t1, v0 # add error right
yading@10 175 /* loop */
yading@10 176 subq a4, 4, a4 # h -= 4
yading@10 177 bne a4, $aligned
yading@10 178 ret
yading@10 179 .end pix_abs16x16_mvi_asm