annotate ffmpeg/libavcodec/alpha/hpeldsp_alpha.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents 6840f77b83aa
children
rev   line source
yading@10 1 /*
yading@10 2 * Alpha optimized DSP utils
yading@10 3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
yading@10 4 *
yading@10 5 * This file is part of FFmpeg.
yading@10 6 *
yading@10 7 * FFmpeg is free software; you can redistribute it and/or
yading@10 8 * modify it under the terms of the GNU Lesser General Public
yading@10 9 * License as published by the Free Software Foundation; either
yading@10 10 * version 2.1 of the License, or (at your option) any later version.
yading@10 11 *
yading@10 12 * FFmpeg is distributed in the hope that it will be useful,
yading@10 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@10 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@10 15 * Lesser General Public License for more details.
yading@10 16 *
yading@10 17 * You should have received a copy of the GNU Lesser General Public
yading@10 18 * License along with FFmpeg; if not, write to the Free Software
yading@10 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@10 20 */
yading@10 21
yading@10 22 #include "libavutil/attributes.h"
yading@10 23 #include "libavcodec/hpeldsp.h"
yading@10 24 #include "hpeldsp_alpha.h"
yading@10 25 #include "asm.h"
yading@10 26
yading@10 27 static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
yading@10 28 {
yading@10 29 return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
yading@10 30 }
yading@10 31
yading@10 32 static inline uint64_t avg2(uint64_t a, uint64_t b)
yading@10 33 {
yading@10 34 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
yading@10 35 }
yading@10 36
yading@10 37 #if 0
yading@10 38 /* The XY2 routines basically utilize this scheme, but reuse parts in
yading@10 39 each iteration. */
yading@10 40 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
yading@10 41 {
yading@10 42 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
yading@10 43 + ((l2 & ~BYTE_VEC(0x03)) >> 2)
yading@10 44 + ((l3 & ~BYTE_VEC(0x03)) >> 2)
yading@10 45 + ((l4 & ~BYTE_VEC(0x03)) >> 2);
yading@10 46 uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
yading@10 47 + (l2 & BYTE_VEC(0x03))
yading@10 48 + (l3 & BYTE_VEC(0x03))
yading@10 49 + (l4 & BYTE_VEC(0x03))
yading@10 50 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
yading@10 51 return r1 + r2;
yading@10 52 }
yading@10 53 #endif
yading@10 54
yading@10 55 #define OP(LOAD, STORE) \
yading@10 56 do { \
yading@10 57 STORE(LOAD(pixels), block); \
yading@10 58 pixels += line_size; \
yading@10 59 block += line_size; \
yading@10 60 } while (--h)
yading@10 61
yading@10 62 #define OP_X2(LOAD, STORE) \
yading@10 63 do { \
yading@10 64 uint64_t pix1, pix2; \
yading@10 65 \
yading@10 66 pix1 = LOAD(pixels); \
yading@10 67 pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
yading@10 68 STORE(AVG2(pix1, pix2), block); \
yading@10 69 pixels += line_size; \
yading@10 70 block += line_size; \
yading@10 71 } while (--h)
yading@10 72
yading@10 73 #define OP_Y2(LOAD, STORE) \
yading@10 74 do { \
yading@10 75 uint64_t pix = LOAD(pixels); \
yading@10 76 do { \
yading@10 77 uint64_t next_pix; \
yading@10 78 \
yading@10 79 pixels += line_size; \
yading@10 80 next_pix = LOAD(pixels); \
yading@10 81 STORE(AVG2(pix, next_pix), block); \
yading@10 82 block += line_size; \
yading@10 83 pix = next_pix; \
yading@10 84 } while (--h); \
yading@10 85 } while (0)
yading@10 86
yading@10 87 #define OP_XY2(LOAD, STORE) \
yading@10 88 do { \
yading@10 89 uint64_t pix1 = LOAD(pixels); \
yading@10 90 uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
yading@10 91 uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
yading@10 92 + (pix2 & BYTE_VEC(0x03)); \
yading@10 93 uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
yading@10 94 + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
yading@10 95 \
yading@10 96 do { \
yading@10 97 uint64_t npix1, npix2; \
yading@10 98 uint64_t npix_l, npix_h; \
yading@10 99 uint64_t avg; \
yading@10 100 \
yading@10 101 pixels += line_size; \
yading@10 102 npix1 = LOAD(pixels); \
yading@10 103 npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
yading@10 104 npix_l = (npix1 & BYTE_VEC(0x03)) \
yading@10 105 + (npix2 & BYTE_VEC(0x03)); \
yading@10 106 npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
yading@10 107 + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
yading@10 108 avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
yading@10 109 + pix_h + npix_h; \
yading@10 110 STORE(avg, block); \
yading@10 111 \
yading@10 112 block += line_size; \
yading@10 113 pix_l = npix_l; \
yading@10 114 pix_h = npix_h; \
yading@10 115 } while (--h); \
yading@10 116 } while (0)
yading@10 117
yading@10 118 #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
yading@10 119 static void OPNAME ## _pixels ## SUFF ## _axp \
yading@10 120 (uint8_t *restrict block, const uint8_t *restrict pixels, \
yading@10 121 ptrdiff_t line_size, int h) \
yading@10 122 { \
yading@10 123 if ((size_t) pixels & 0x7) { \
yading@10 124 OPKIND(uldq, STORE); \
yading@10 125 } else { \
yading@10 126 OPKIND(ldq, STORE); \
yading@10 127 } \
yading@10 128 } \
yading@10 129 \
yading@10 130 static void OPNAME ## _pixels16 ## SUFF ## _axp \
yading@10 131 (uint8_t *restrict block, const uint8_t *restrict pixels, \
yading@10 132 ptrdiff_t line_size, int h) \
yading@10 133 { \
yading@10 134 OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
yading@10 135 OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
yading@10 136 }
yading@10 137
yading@10 138 #define PIXOP(OPNAME, STORE) \
yading@10 139 MAKE_OP(OPNAME, , OP, STORE) \
yading@10 140 MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
yading@10 141 MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
yading@10 142 MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
yading@10 143
yading@10 144 /* Rounding primitives. */
yading@10 145 #define AVG2 avg2
yading@10 146 #define AVG4 avg4
yading@10 147 #define AVG4_ROUNDER BYTE_VEC(0x02)
yading@10 148 #define STORE(l, b) stq(l, b)
yading@10 149 PIXOP(put, STORE);
yading@10 150
yading@10 151 #undef STORE
yading@10 152 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
yading@10 153 PIXOP(avg, STORE);
yading@10 154
yading@10 155 /* Not rounding primitives. */
yading@10 156 #undef AVG2
yading@10 157 #undef AVG4
yading@10 158 #undef AVG4_ROUNDER
yading@10 159 #undef STORE
yading@10 160 #define AVG2 avg2_no_rnd
yading@10 161 #define AVG4 avg4_no_rnd
yading@10 162 #define AVG4_ROUNDER BYTE_VEC(0x01)
yading@10 163 #define STORE(l, b) stq(l, b)
yading@10 164 PIXOP(put_no_rnd, STORE);
yading@10 165
yading@10 166 #undef STORE
yading@10 167 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
yading@10 168 PIXOP(avg_no_rnd, STORE);
yading@10 169
yading@10 170 static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
yading@10 171 ptrdiff_t line_size, int h)
yading@10 172 {
yading@10 173 put_pixels_axp_asm(block, pixels, line_size, h);
yading@10 174 put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
yading@10 175 }
yading@10 176
yading@10 177 av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags)
yading@10 178 {
yading@10 179 c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
yading@10 180 c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
yading@10 181 c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
yading@10 182 c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
yading@10 183
yading@10 184 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
yading@10 185 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
yading@10 186 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
yading@10 187 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
yading@10 188
yading@10 189 c->avg_pixels_tab[0][0] = avg_pixels16_axp;
yading@10 190 c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
yading@10 191 c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
yading@10 192 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
yading@10 193
yading@10 194 c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
yading@10 195 c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
yading@10 196 c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
yading@10 197 c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
yading@10 198
yading@10 199 c->put_pixels_tab[1][0] = put_pixels_axp_asm;
yading@10 200 c->put_pixels_tab[1][1] = put_pixels_x2_axp;
yading@10 201 c->put_pixels_tab[1][2] = put_pixels_y2_axp;
yading@10 202 c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
yading@10 203
yading@10 204 c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
yading@10 205 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
yading@10 206 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
yading@10 207 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
yading@10 208
yading@10 209 c->avg_pixels_tab[1][0] = avg_pixels_axp;
yading@10 210 c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
yading@10 211 c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
yading@10 212 c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
yading@10 213 }