annotate ffmpeg/libavcodec/sparc/vis.h @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * Copyright (C) 2003 David S. Miller <davem@redhat.com>
yading@10 3 *
yading@10 4 * This file is part of FFmpeg.
yading@10 5 *
yading@10 6 * FFmpeg is free software; you can redistribute it and/or
yading@10 7 * modify it under the terms of the GNU Lesser General Public
yading@10 8 * License as published by the Free Software Foundation; either
yading@10 9 * version 2.1 of the License, or (at your option) any later version.
yading@10 10 *
yading@10 11 * FFmpeg is distributed in the hope that it will be useful,
yading@10 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 14 * Lesser General Public License for more details.
yading@10 15 *
yading@10 16 * You should have received a copy of the GNU Lesser General Public
yading@10 17 * License along with FFmpeg; if not, write to the Free Software
yading@10 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 19 */
yading@10 20
yading@10 21 /* You may be asking why I hard-code the instruction opcodes and don't
yading@10 22 * use the normal VIS assembler mnenomics for the VIS instructions.
yading@10 23 *
yading@10 24 * The reason is that Sun, in their infinite wisdom, decided that a binary
yading@10 25 * using a VIS instruction will cause it to be marked (in the ELF headers)
yading@10 26 * as doing so, and this prevents the OS from loading such binaries if the
yading@10 27 * current cpu doesn't have VIS. There is no way to easily override this
yading@10 28 * behavior of the assembler that I am aware of.
yading@10 29 *
yading@10 30 * This totally defeats what libmpeg2 is trying to do which is allow a
yading@10 31 * single binary to be created, and then detect the availability of VIS
yading@10 32 * at runtime.
yading@10 33 *
yading@10 34 * I'm not saying that tainting the binary by default is bad, rather I'm
yading@10 35 * saying that not providing a way to override this easily unnecessarily
yading@10 36 * ties people's hands.
yading@10 37 *
yading@10 38 * Thus, we do the opcode encoding by hand and output 32-bit words in
yading@10 39 * the assembler to keep the binary from becoming tainted.
yading@10 40 */
yading@10 41
yading@10 42 #ifndef AVCODEC_SPARC_VIS_H
yading@10 43 #define AVCODEC_SPARC_VIS_H
yading@10 44
yading@10 45 #define ACCEL_SPARC_VIS 1
yading@10 46 #define ACCEL_SPARC_VIS2 2
yading@10 47
yading@10 48 static inline int vis_level(void)
yading@10 49 {
yading@10 50 int accel = 0;
yading@10 51 accel |= ACCEL_SPARC_VIS;
yading@10 52 accel |= ACCEL_SPARC_VIS2;
yading@10 53 return accel;
yading@10 54 }
yading@10 55
yading@10 56 #define vis_opc_base ((0x1 << 31) | (0x36 << 19))
yading@10 57 #define vis_opf(X) ((X) << 5)
yading@10 58 #define vis_sreg(X) (X)
yading@10 59 #define vis_dreg(X) (((X)&0x1f)|((X)>>5))
yading@10 60 #define vis_rs1_s(X) (vis_sreg(X) << 14)
yading@10 61 #define vis_rs1_d(X) (vis_dreg(X) << 14)
yading@10 62 #define vis_rs2_s(X) (vis_sreg(X) << 0)
yading@10 63 #define vis_rs2_d(X) (vis_dreg(X) << 0)
yading@10 64 #define vis_rd_s(X) (vis_sreg(X) << 25)
yading@10 65 #define vis_rd_d(X) (vis_dreg(X) << 25)
yading@10 66
yading@10 67 #define vis_ss2s(opf,rs1,rs2,rd) \
yading@10 68 __asm__ volatile (".word %0" \
yading@10 69 : : "i" (vis_opc_base | vis_opf(opf) | \
yading@10 70 vis_rs1_s(rs1) | \
yading@10 71 vis_rs2_s(rs2) | \
yading@10 72 vis_rd_s(rd)))
yading@10 73
yading@10 74 #define vis_dd2d(opf,rs1,rs2,rd) \
yading@10 75 __asm__ volatile (".word %0" \
yading@10 76 : : "i" (vis_opc_base | vis_opf(opf) | \
yading@10 77 vis_rs1_d(rs1) | \
yading@10 78 vis_rs2_d(rs2) | \
yading@10 79 vis_rd_d(rd)))
yading@10 80
yading@10 81 #define vis_ss2d(opf,rs1,rs2,rd) \
yading@10 82 __asm__ volatile (".word %0" \
yading@10 83 : : "i" (vis_opc_base | vis_opf(opf) | \
yading@10 84 vis_rs1_s(rs1) | \
yading@10 85 vis_rs2_s(rs2) | \
yading@10 86 vis_rd_d(rd)))
yading@10 87
yading@10 88 #define vis_sd2d(opf,rs1,rs2,rd) \
yading@10 89 __asm__ volatile (".word %0" \
yading@10 90 : : "i" (vis_opc_base | vis_opf(opf) | \
yading@10 91 vis_rs1_s(rs1) | \
yading@10 92 vis_rs2_d(rs2) | \
yading@10 93 vis_rd_d(rd)))
yading@10 94
yading@10 95 #define vis_d2s(opf,rs2,rd) \
yading@10 96 __asm__ volatile (".word %0" \
yading@10 97 : : "i" (vis_opc_base | vis_opf(opf) | \
yading@10 98 vis_rs2_d(rs2) | \
yading@10 99 vis_rd_s(rd)))
yading@10 100
yading@10 101 #define vis_s2d(opf,rs2,rd) \
yading@10 102 __asm__ volatile (".word %0" \
yading@10 103 : : "i" (vis_opc_base | vis_opf(opf) | \
yading@10 104 vis_rs2_s(rs2) | \
yading@10 105 vis_rd_d(rd)))
yading@10 106
yading@10 107 #define vis_d12d(opf,rs1,rd) \
yading@10 108 __asm__ volatile (".word %0" \
yading@10 109 : : "i" (vis_opc_base | vis_opf(opf) | \
yading@10 110 vis_rs1_d(rs1) | \
yading@10 111 vis_rd_d(rd)))
yading@10 112
yading@10 113 #define vis_d22d(opf,rs2,rd) \
yading@10 114 __asm__ volatile (".word %0" \
yading@10 115 : : "i" (vis_opc_base | vis_opf(opf) | \
yading@10 116 vis_rs2_d(rs2) | \
yading@10 117 vis_rd_d(rd)))
yading@10 118
yading@10 119 #define vis_s12s(opf,rs1,rd) \
yading@10 120 __asm__ volatile (".word %0" \
yading@10 121 : : "i" (vis_opc_base | vis_opf(opf) | \
yading@10 122 vis_rs1_s(rs1) | \
yading@10 123 vis_rd_s(rd)))
yading@10 124
yading@10 125 #define vis_s22s(opf,rs2,rd) \
yading@10 126 __asm__ volatile (".word %0" \
yading@10 127 : : "i" (vis_opc_base | vis_opf(opf) | \
yading@10 128 vis_rs2_s(rs2) | \
yading@10 129 vis_rd_s(rd)))
yading@10 130
yading@10 131 #define vis_s(opf,rd) \
yading@10 132 __asm__ volatile (".word %0" \
yading@10 133 : : "i" (vis_opc_base | vis_opf(opf) | \
yading@10 134 vis_rd_s(rd)))
yading@10 135
yading@10 136 #define vis_d(opf,rd) \
yading@10 137 __asm__ volatile (".word %0" \
yading@10 138 : : "i" (vis_opc_base | vis_opf(opf) | \
yading@10 139 vis_rd_d(rd)))
yading@10 140
yading@10 141 #define vis_r2m(op,rd,mem) \
yading@10 142 __asm__ volatile (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) )
yading@10 143
yading@10 144 #define vis_r2m_2(op,rd,mem1,mem2) \
yading@10 145 __asm__ volatile (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) )
yading@10 146
yading@10 147 #define vis_m2r(op,mem,rd) \
yading@10 148 __asm__ volatile (#op "\t[%0], %%f" #rd : : "r" (&(mem)) )
yading@10 149
yading@10 150 #define vis_m2r_2(op,mem1,mem2,rd) \
yading@10 151 __asm__ volatile (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) )
yading@10 152
yading@10 153 static inline void vis_set_gsr(unsigned int _val)
yading@10 154 {
yading@10 155 register unsigned int val __asm__("g1");
yading@10 156
yading@10 157 val = _val;
yading@10 158 __asm__ volatile(".word 0xa7804000"
yading@10 159 : : "r" (val));
yading@10 160 }
yading@10 161
yading@10 162 #define VIS_GSR_ALIGNADDR_MASK 0x0000007
yading@10 163 #define VIS_GSR_ALIGNADDR_SHIFT 0
yading@10 164 #define VIS_GSR_SCALEFACT_MASK 0x0000078
yading@10 165 #define VIS_GSR_SCALEFACT_SHIFT 3
yading@10 166
yading@10 167 #define vis_ld32(mem,rs1) vis_m2r(ld, mem, rs1)
yading@10 168 #define vis_ld32_2(mem1,mem2,rs1) vis_m2r_2(ld, mem1, mem2, rs1)
yading@10 169 #define vis_st32(rs1,mem) vis_r2m(st, rs1, mem)
yading@10 170 #define vis_st32_2(rs1,mem1,mem2) vis_r2m_2(st, rs1, mem1, mem2)
yading@10 171 #define vis_ld64(mem,rs1) vis_m2r(ldd, mem, rs1)
yading@10 172 #define vis_ld64_2(mem1,mem2,rs1) vis_m2r_2(ldd, mem1, mem2, rs1)
yading@10 173 #define vis_st64(rs1,mem) vis_r2m(std, rs1, mem)
yading@10 174 #define vis_st64_2(rs1,mem1,mem2) vis_r2m_2(std, rs1, mem1, mem2)
yading@10 175
yading@10 176 #define vis_ldblk(mem, rd) \
yading@10 177 do { register void *__mem __asm__("g1"); \
yading@10 178 __mem = &(mem); \
yading@10 179 __asm__ volatile(".word 0xc1985e00 | %1" \
yading@10 180 : \
yading@10 181 : "r" (__mem), \
yading@10 182 "i" (vis_rd_d(rd)) \
yading@10 183 : "memory"); \
yading@10 184 } while (0)
yading@10 185
yading@10 186 #define vis_stblk(rd, mem) \
yading@10 187 do { register void *__mem __asm__("g1"); \
yading@10 188 __mem = &(mem); \
yading@10 189 __asm__ volatile(".word 0xc1b85e00 | %1" \
yading@10 190 : \
yading@10 191 : "r" (__mem), \
yading@10 192 "i" (vis_rd_d(rd)) \
yading@10 193 : "memory"); \
yading@10 194 } while (0)
yading@10 195
yading@10 196 #define vis_membar_storestore() \
yading@10 197 __asm__ volatile(".word 0x8143e008" : : : "memory")
yading@10 198
yading@10 199 #define vis_membar_sync() \
yading@10 200 __asm__ volatile(".word 0x8143e040" : : : "memory")
yading@10 201
yading@10 202 /* 16 and 32 bit partitioned addition and subtraction. The normal
yading@10 203 * versions perform 4 16-bit or 2 32-bit additions or subtractions.
yading@10 204 * The 's' versions perform 2 16-bit or 1 32-bit additions or
yading@10 205 * subtractions.
yading@10 206 */
yading@10 207
yading@10 208 #define vis_padd16(rs1,rs2,rd) vis_dd2d(0x50, rs1, rs2, rd)
yading@10 209 #define vis_padd16s(rs1,rs2,rd) vis_ss2s(0x51, rs1, rs2, rd)
yading@10 210 #define vis_padd32(rs1,rs2,rd) vis_dd2d(0x52, rs1, rs2, rd)
yading@10 211 #define vis_padd32s(rs1,rs2,rd) vis_ss2s(0x53, rs1, rs2, rd)
yading@10 212 #define vis_psub16(rs1,rs2,rd) vis_dd2d(0x54, rs1, rs2, rd)
yading@10 213 #define vis_psub16s(rs1,rs2,rd) vis_ss2s(0x55, rs1, rs2, rd)
yading@10 214 #define vis_psub32(rs1,rs2,rd) vis_dd2d(0x56, rs1, rs2, rd)
yading@10 215 #define vis_psub32s(rs1,rs2,rd) vis_ss2s(0x57, rs1, rs2, rd)
yading@10 216
yading@10 217 /* Pixel formatting instructions. */
yading@10 218
yading@10 219 #define vis_pack16(rs2,rd) vis_d2s( 0x3b, rs2, rd)
yading@10 220 #define vis_pack32(rs1,rs2,rd) vis_dd2d(0x3a, rs1, rs2, rd)
yading@10 221 #define vis_packfix(rs2,rd) vis_d2s( 0x3d, rs2, rd)
yading@10 222 #define vis_expand(rs2,rd) vis_s2d( 0x4d, rs2, rd)
yading@10 223 #define vis_pmerge(rs1,rs2,rd) vis_ss2d(0x4b, rs1, rs2, rd)
yading@10 224
yading@10 225 /* Partitioned multiply instructions. */
yading@10 226
yading@10 227 #define vis_mul8x16(rs1,rs2,rd) vis_sd2d(0x31, rs1, rs2, rd)
yading@10 228 #define vis_mul8x16au(rs1,rs2,rd) vis_ss2d(0x33, rs1, rs2, rd)
yading@10 229 #define vis_mul8x16al(rs1,rs2,rd) vis_ss2d(0x35, rs1, rs2, rd)
yading@10 230 #define vis_mul8sux16(rs1,rs2,rd) vis_dd2d(0x36, rs1, rs2, rd)
yading@10 231 #define vis_mul8ulx16(rs1,rs2,rd) vis_dd2d(0x37, rs1, rs2, rd)
yading@10 232 #define vis_muld8sux16(rs1,rs2,rd) vis_ss2d(0x38, rs1, rs2, rd)
yading@10 233 #define vis_muld8ulx16(rs1,rs2,rd) vis_ss2d(0x39, rs1, rs2, rd)
yading@10 234
yading@10 235 /* Alignment instructions. */
yading@10 236
yading@10 237 static inline const void *vis_alignaddr(const void *_ptr)
yading@10 238 {
yading@10 239 register const void *ptr __asm__("g1");
yading@10 240
yading@10 241 ptr = _ptr;
yading@10 242
yading@10 243 __asm__ volatile(".word %2"
yading@10 244 : "=&r" (ptr)
yading@10 245 : "0" (ptr),
yading@10 246 "i" (vis_opc_base | vis_opf(0x18) |
yading@10 247 vis_rs1_s(1) |
yading@10 248 vis_rs2_s(0) |
yading@10 249 vis_rd_s(1)));
yading@10 250
yading@10 251 return ptr;
yading@10 252 }
yading@10 253
yading@10 254 static inline void vis_alignaddr_g0(void *_ptr)
yading@10 255 {
yading@10 256 register void *ptr __asm__("g1");
yading@10 257
yading@10 258 ptr = _ptr;
yading@10 259
yading@10 260 __asm__ volatile(".word %2"
yading@10 261 : "=&r" (ptr)
yading@10 262 : "0" (ptr),
yading@10 263 "i" (vis_opc_base | vis_opf(0x18) |
yading@10 264 vis_rs1_s(1) |
yading@10 265 vis_rs2_s(0) |
yading@10 266 vis_rd_s(0)));
yading@10 267 }
yading@10 268
yading@10 269 static inline void *vis_alignaddrl(void *_ptr)
yading@10 270 {
yading@10 271 register void *ptr __asm__("g1");
yading@10 272
yading@10 273 ptr = _ptr;
yading@10 274
yading@10 275 __asm__ volatile(".word %2"
yading@10 276 : "=&r" (ptr)
yading@10 277 : "0" (ptr),
yading@10 278 "i" (vis_opc_base | vis_opf(0x19) |
yading@10 279 vis_rs1_s(1) |
yading@10 280 vis_rs2_s(0) |
yading@10 281 vis_rd_s(1)));
yading@10 282
yading@10 283 return ptr;
yading@10 284 }
yading@10 285
yading@10 286 static inline void vis_alignaddrl_g0(void *_ptr)
yading@10 287 {
yading@10 288 register void *ptr __asm__("g1");
yading@10 289
yading@10 290 ptr = _ptr;
yading@10 291
yading@10 292 __asm__ volatile(".word %2"
yading@10 293 : "=&r" (ptr)
yading@10 294 : "0" (ptr),
yading@10 295 "i" (vis_opc_base | vis_opf(0x19) |
yading@10 296 vis_rs1_s(1) |
yading@10 297 vis_rs2_s(0) |
yading@10 298 vis_rd_s(0)));
yading@10 299 }
yading@10 300
yading@10 301 #define vis_faligndata(rs1,rs2,rd) vis_dd2d(0x48, rs1, rs2, rd)
yading@10 302
yading@10 303 /* Logical operate instructions. */
yading@10 304
yading@10 305 #define vis_fzero(rd) vis_d( 0x60, rd)
yading@10 306 #define vis_fzeros(rd) vis_s( 0x61, rd)
yading@10 307 #define vis_fone(rd) vis_d( 0x7e, rd)
yading@10 308 #define vis_fones(rd) vis_s( 0x7f, rd)
yading@10 309 #define vis_src1(rs1,rd) vis_d12d(0x74, rs1, rd)
yading@10 310 #define vis_src1s(rs1,rd) vis_s12s(0x75, rs1, rd)
yading@10 311 #define vis_src2(rs2,rd) vis_d22d(0x78, rs2, rd)
yading@10 312 #define vis_src2s(rs2,rd) vis_s22s(0x79, rs2, rd)
yading@10 313 #define vis_not1(rs1,rd) vis_d12d(0x6a, rs1, rd)
yading@10 314 #define vis_not1s(rs1,rd) vis_s12s(0x6b, rs1, rd)
yading@10 315 #define vis_not2(rs2,rd) vis_d22d(0x66, rs2, rd)
yading@10 316 #define vis_not2s(rs2,rd) vis_s22s(0x67, rs2, rd)
yading@10 317 #define vis_or(rs1,rs2,rd) vis_dd2d(0x7c, rs1, rs2, rd)
yading@10 318 #define vis_ors(rs1,rs2,rd) vis_ss2s(0x7d, rs1, rs2, rd)
yading@10 319 #define vis_nor(rs1,rs2,rd) vis_dd2d(0x62, rs1, rs2, rd)
yading@10 320 #define vis_nors(rs1,rs2,rd) vis_ss2s(0x63, rs1, rs2, rd)
yading@10 321 #define vis_and(rs1,rs2,rd) vis_dd2d(0x70, rs1, rs2, rd)
yading@10 322 #define vis_ands(rs1,rs2,rd) vis_ss2s(0x71, rs1, rs2, rd)
yading@10 323 #define vis_nand(rs1,rs2,rd) vis_dd2d(0x6e, rs1, rs2, rd)
yading@10 324 #define vis_nands(rs1,rs2,rd) vis_ss2s(0x6f, rs1, rs2, rd)
yading@10 325 #define vis_xor(rs1,rs2,rd) vis_dd2d(0x6c, rs1, rs2, rd)
yading@10 326 #define vis_xors(rs1,rs2,rd) vis_ss2s(0x6d, rs1, rs2, rd)
yading@10 327 #define vis_xnor(rs1,rs2,rd) vis_dd2d(0x72, rs1, rs2, rd)
yading@10 328 #define vis_xnors(rs1,rs2,rd) vis_ss2s(0x73, rs1, rs2, rd)
yading@10 329 #define vis_ornot1(rs1,rs2,rd) vis_dd2d(0x7a, rs1, rs2, rd)
yading@10 330 #define vis_ornot1s(rs1,rs2,rd) vis_ss2s(0x7b, rs1, rs2, rd)
yading@10 331 #define vis_ornot2(rs1,rs2,rd) vis_dd2d(0x76, rs1, rs2, rd)
yading@10 332 #define vis_ornot2s(rs1,rs2,rd) vis_ss2s(0x77, rs1, rs2, rd)
yading@10 333 #define vis_andnot1(rs1,rs2,rd) vis_dd2d(0x68, rs1, rs2, rd)
yading@10 334 #define vis_andnot1s(rs1,rs2,rd) vis_ss2s(0x69, rs1, rs2, rd)
yading@10 335 #define vis_andnot2(rs1,rs2,rd) vis_dd2d(0x64, rs1, rs2, rd)
yading@10 336 #define vis_andnot2s(rs1,rs2,rd) vis_ss2s(0x65, rs1, rs2, rd)
yading@10 337
yading@10 338 /* Pixel component distance. */
yading@10 339
yading@10 340 #define vis_pdist(rs1,rs2,rd) vis_dd2d(0x3e, rs1, rs2, rd)
yading@10 341
yading@10 342 #endif /* AVCODEC_SPARC_VIS_H */