annotate ffmpeg/libavresample/arm/audio_convert_neon.S @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents f445c3017523
children
rev   line source
yading@11 1 /*
yading@11 2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
yading@11 3 *
yading@11 4 * This file is part of FFmpeg
yading@11 5 *
yading@11 6 * FFmpeg is free software; you can redistribute it and/or
yading@11 7 * modify it under the terms of the GNU Lesser General Public
yading@11 8 * License as published by the Free Software Foundation; either
yading@11 9 * version 2.1 of the License, or (at your option) any later version.
yading@11 10 *
yading@11 11 * FFmpeg is distributed in the hope that it will be useful,
yading@11 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@11 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@11 14 * Lesser General Public License for more details.
yading@11 15 *
yading@11 16 * You should have received a copy of the GNU Lesser General Public
yading@11 17 * License along with FFmpeg; if not, write to the Free Software
yading@11 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@11 19 */
yading@11 20
yading@11 21 #include "config.h"
yading@11 22 #include "libavutil/arm/asm.S"
yading@11 23
yading@11 24 function ff_conv_flt_to_s16_neon, export=1
yading@11 25 subs r2, r2, #8
yading@11 26 vld1.32 {q0}, [r1,:128]!
yading@11 27 vcvt.s32.f32 q8, q0, #31
yading@11 28 vld1.32 {q1}, [r1,:128]!
yading@11 29 vcvt.s32.f32 q9, q1, #31
yading@11 30 beq 3f
yading@11 31 bics r12, r2, #15
yading@11 32 beq 2f
yading@11 33 1: subs r12, r12, #16
yading@11 34 vqrshrn.s32 d4, q8, #16
yading@11 35 vld1.32 {q0}, [r1,:128]!
yading@11 36 vcvt.s32.f32 q0, q0, #31
yading@11 37 vqrshrn.s32 d5, q9, #16
yading@11 38 vld1.32 {q1}, [r1,:128]!
yading@11 39 vcvt.s32.f32 q1, q1, #31
yading@11 40 vqrshrn.s32 d6, q0, #16
yading@11 41 vst1.16 {q2}, [r0,:128]!
yading@11 42 vqrshrn.s32 d7, q1, #16
yading@11 43 vld1.32 {q8}, [r1,:128]!
yading@11 44 vcvt.s32.f32 q8, q8, #31
yading@11 45 vld1.32 {q9}, [r1,:128]!
yading@11 46 vcvt.s32.f32 q9, q9, #31
yading@11 47 vst1.16 {q3}, [r0,:128]!
yading@11 48 bne 1b
yading@11 49 ands r2, r2, #15
yading@11 50 beq 3f
yading@11 51 2: vld1.32 {q0}, [r1,:128]!
yading@11 52 vqrshrn.s32 d4, q8, #16
yading@11 53 vcvt.s32.f32 q0, q0, #31
yading@11 54 vld1.32 {q1}, [r1,:128]!
yading@11 55 vqrshrn.s32 d5, q9, #16
yading@11 56 vcvt.s32.f32 q1, q1, #31
yading@11 57 vqrshrn.s32 d6, q0, #16
yading@11 58 vst1.16 {q2}, [r0,:128]!
yading@11 59 vqrshrn.s32 d7, q1, #16
yading@11 60 vst1.16 {q3}, [r0,:128]!
yading@11 61 bx lr
yading@11 62 3: vqrshrn.s32 d4, q8, #16
yading@11 63 vqrshrn.s32 d5, q9, #16
yading@11 64 vst1.16 {q2}, [r0,:128]!
yading@11 65 bx lr
yading@11 66 endfunc
yading@11 67
yading@11 68 function ff_conv_fltp_to_s16_2ch_neon, export=1
yading@11 69 ldm r1, {r1, r3}
yading@11 70 subs r2, r2, #8
yading@11 71 vld1.32 {q0}, [r1,:128]!
yading@11 72 vcvt.s32.f32 q8, q0, #31
yading@11 73 vld1.32 {q1}, [r1,:128]!
yading@11 74 vcvt.s32.f32 q9, q1, #31
yading@11 75 vld1.32 {q10}, [r3,:128]!
yading@11 76 vcvt.s32.f32 q10, q10, #31
yading@11 77 vld1.32 {q11}, [r3,:128]!
yading@11 78 vcvt.s32.f32 q11, q11, #31
yading@11 79 beq 3f
yading@11 80 bics r12, r2, #15
yading@11 81 beq 2f
yading@11 82 1: subs r12, r12, #16
yading@11 83 vld1.32 {q0}, [r1,:128]!
yading@11 84 vcvt.s32.f32 q0, q0, #31
yading@11 85 vsri.32 q10, q8, #16
yading@11 86 vld1.32 {q1}, [r1,:128]!
yading@11 87 vcvt.s32.f32 q1, q1, #31
yading@11 88 vld1.32 {q12}, [r3,:128]!
yading@11 89 vcvt.s32.f32 q12, q12, #31
yading@11 90 vld1.32 {q13}, [r3,:128]!
yading@11 91 vsri.32 q11, q9, #16
yading@11 92 vst1.16 {q10}, [r0,:128]!
yading@11 93 vcvt.s32.f32 q13, q13, #31
yading@11 94 vst1.16 {q11}, [r0,:128]!
yading@11 95 vsri.32 q12, q0, #16
yading@11 96 vld1.32 {q8}, [r1,:128]!
yading@11 97 vsri.32 q13, q1, #16
yading@11 98 vst1.16 {q12}, [r0,:128]!
yading@11 99 vcvt.s32.f32 q8, q8, #31
yading@11 100 vld1.32 {q9}, [r1,:128]!
yading@11 101 vcvt.s32.f32 q9, q9, #31
yading@11 102 vld1.32 {q10}, [r3,:128]!
yading@11 103 vcvt.s32.f32 q10, q10, #31
yading@11 104 vld1.32 {q11}, [r3,:128]!
yading@11 105 vcvt.s32.f32 q11, q11, #31
yading@11 106 vst1.16 {q13}, [r0,:128]!
yading@11 107 bne 1b
yading@11 108 ands r2, r2, #15
yading@11 109 beq 3f
yading@11 110 2: vsri.32 q10, q8, #16
yading@11 111 vld1.32 {q0}, [r1,:128]!
yading@11 112 vcvt.s32.f32 q0, q0, #31
yading@11 113 vld1.32 {q1}, [r1,:128]!
yading@11 114 vcvt.s32.f32 q1, q1, #31
yading@11 115 vld1.32 {q12}, [r3,:128]!
yading@11 116 vcvt.s32.f32 q12, q12, #31
yading@11 117 vsri.32 q11, q9, #16
yading@11 118 vld1.32 {q13}, [r3,:128]!
yading@11 119 vcvt.s32.f32 q13, q13, #31
yading@11 120 vst1.16 {q10}, [r0,:128]!
yading@11 121 vsri.32 q12, q0, #16
yading@11 122 vst1.16 {q11}, [r0,:128]!
yading@11 123 vsri.32 q13, q1, #16
yading@11 124 vst1.16 {q12-q13},[r0,:128]!
yading@11 125 bx lr
yading@11 126 3: vsri.32 q10, q8, #16
yading@11 127 vsri.32 q11, q9, #16
yading@11 128 vst1.16 {q10-q11},[r0,:128]!
yading@11 129 bx lr
yading@11 130 endfunc
yading@11 131
yading@11 132 function ff_conv_fltp_to_s16_neon, export=1
yading@11 133 cmp r3, #2
yading@11 134 itt lt
yading@11 135 ldrlt r1, [r1]
yading@11 136 blt ff_conv_flt_to_s16_neon
yading@11 137 beq ff_conv_fltp_to_s16_2ch_neon
yading@11 138
yading@11 139 push {r4-r8, lr}
yading@11 140 cmp r3, #4
yading@11 141 lsl r12, r3, #1
yading@11 142 blt 4f
yading@11 143
yading@11 144 @ 4 channels
yading@11 145 5: ldm r1!, {r4-r7}
yading@11 146 mov lr, r2
yading@11 147 mov r8, r0
yading@11 148 vld1.32 {q8}, [r4,:128]!
yading@11 149 vcvt.s32.f32 q8, q8, #31
yading@11 150 vld1.32 {q9}, [r5,:128]!
yading@11 151 vcvt.s32.f32 q9, q9, #31
yading@11 152 vld1.32 {q10}, [r6,:128]!
yading@11 153 vcvt.s32.f32 q10, q10, #31
yading@11 154 vld1.32 {q11}, [r7,:128]!
yading@11 155 vcvt.s32.f32 q11, q11, #31
yading@11 156 6: subs lr, lr, #8
yading@11 157 vld1.32 {q0}, [r4,:128]!
yading@11 158 vcvt.s32.f32 q0, q0, #31
yading@11 159 vsri.32 q9, q8, #16
yading@11 160 vld1.32 {q1}, [r5,:128]!
yading@11 161 vcvt.s32.f32 q1, q1, #31
yading@11 162 vsri.32 q11, q10, #16
yading@11 163 vld1.32 {q2}, [r6,:128]!
yading@11 164 vcvt.s32.f32 q2, q2, #31
yading@11 165 vzip.32 d18, d22
yading@11 166 vld1.32 {q3}, [r7,:128]!
yading@11 167 vcvt.s32.f32 q3, q3, #31
yading@11 168 vzip.32 d19, d23
yading@11 169 vst1.16 {d18}, [r8], r12
yading@11 170 vsri.32 q1, q0, #16
yading@11 171 vst1.16 {d22}, [r8], r12
yading@11 172 vsri.32 q3, q2, #16
yading@11 173 vst1.16 {d19}, [r8], r12
yading@11 174 vzip.32 d2, d6
yading@11 175 vst1.16 {d23}, [r8], r12
yading@11 176 vzip.32 d3, d7
yading@11 177 beq 7f
yading@11 178 vld1.32 {q8}, [r4,:128]!
yading@11 179 vcvt.s32.f32 q8, q8, #31
yading@11 180 vst1.16 {d2}, [r8], r12
yading@11 181 vld1.32 {q9}, [r5,:128]!
yading@11 182 vcvt.s32.f32 q9, q9, #31
yading@11 183 vst1.16 {d6}, [r8], r12
yading@11 184 vld1.32 {q10}, [r6,:128]!
yading@11 185 vcvt.s32.f32 q10, q10, #31
yading@11 186 vst1.16 {d3}, [r8], r12
yading@11 187 vld1.32 {q11}, [r7,:128]!
yading@11 188 vcvt.s32.f32 q11, q11, #31
yading@11 189 vst1.16 {d7}, [r8], r12
yading@11 190 b 6b
yading@11 191 7: vst1.16 {d2}, [r8], r12
yading@11 192 vst1.16 {d6}, [r8], r12
yading@11 193 vst1.16 {d3}, [r8], r12
yading@11 194 vst1.16 {d7}, [r8], r12
yading@11 195 subs r3, r3, #4
yading@11 196 it eq
yading@11 197 popeq {r4-r8, pc}
yading@11 198 cmp r3, #4
yading@11 199 add r0, r0, #8
yading@11 200 bge 5b
yading@11 201
yading@11 202 @ 2 channels
yading@11 203 4: cmp r3, #2
yading@11 204 blt 4f
yading@11 205 ldm r1!, {r4-r5}
yading@11 206 mov lr, r2
yading@11 207 mov r8, r0
yading@11 208 tst lr, #8
yading@11 209 vld1.32 {q8}, [r4,:128]!
yading@11 210 vcvt.s32.f32 q8, q8, #31
yading@11 211 vld1.32 {q9}, [r5,:128]!
yading@11 212 vcvt.s32.f32 q9, q9, #31
yading@11 213 vld1.32 {q10}, [r4,:128]!
yading@11 214 vcvt.s32.f32 q10, q10, #31
yading@11 215 vld1.32 {q11}, [r5,:128]!
yading@11 216 vcvt.s32.f32 q11, q11, #31
yading@11 217 beq 6f
yading@11 218 subs lr, lr, #8
yading@11 219 beq 7f
yading@11 220 vsri.32 d18, d16, #16
yading@11 221 vsri.32 d19, d17, #16
yading@11 222 vld1.32 {q8}, [r4,:128]!
yading@11 223 vcvt.s32.f32 q8, q8, #31
yading@11 224 vst1.32 {d18[0]}, [r8], r12
yading@11 225 vsri.32 d22, d20, #16
yading@11 226 vst1.32 {d18[1]}, [r8], r12
yading@11 227 vsri.32 d23, d21, #16
yading@11 228 vst1.32 {d19[0]}, [r8], r12
yading@11 229 vst1.32 {d19[1]}, [r8], r12
yading@11 230 vld1.32 {q9}, [r5,:128]!
yading@11 231 vcvt.s32.f32 q9, q9, #31
yading@11 232 vst1.32 {d22[0]}, [r8], r12
yading@11 233 vst1.32 {d22[1]}, [r8], r12
yading@11 234 vld1.32 {q10}, [r4,:128]!
yading@11 235 vcvt.s32.f32 q10, q10, #31
yading@11 236 vst1.32 {d23[0]}, [r8], r12
yading@11 237 vst1.32 {d23[1]}, [r8], r12
yading@11 238 vld1.32 {q11}, [r5,:128]!
yading@11 239 vcvt.s32.f32 q11, q11, #31
yading@11 240 6: subs lr, lr, #16
yading@11 241 vld1.32 {q0}, [r4,:128]!
yading@11 242 vcvt.s32.f32 q0, q0, #31
yading@11 243 vsri.32 d18, d16, #16
yading@11 244 vld1.32 {q1}, [r5,:128]!
yading@11 245 vcvt.s32.f32 q1, q1, #31
yading@11 246 vsri.32 d19, d17, #16
yading@11 247 vld1.32 {q2}, [r4,:128]!
yading@11 248 vcvt.s32.f32 q2, q2, #31
yading@11 249 vld1.32 {q3}, [r5,:128]!
yading@11 250 vcvt.s32.f32 q3, q3, #31
yading@11 251 vst1.32 {d18[0]}, [r8], r12
yading@11 252 vsri.32 d22, d20, #16
yading@11 253 vst1.32 {d18[1]}, [r8], r12
yading@11 254 vsri.32 d23, d21, #16
yading@11 255 vst1.32 {d19[0]}, [r8], r12
yading@11 256 vsri.32 d2, d0, #16
yading@11 257 vst1.32 {d19[1]}, [r8], r12
yading@11 258 vsri.32 d3, d1, #16
yading@11 259 vst1.32 {d22[0]}, [r8], r12
yading@11 260 vsri.32 d6, d4, #16
yading@11 261 vst1.32 {d22[1]}, [r8], r12
yading@11 262 vsri.32 d7, d5, #16
yading@11 263 vst1.32 {d23[0]}, [r8], r12
yading@11 264 vst1.32 {d23[1]}, [r8], r12
yading@11 265 beq 6f
yading@11 266 vld1.32 {q8}, [r4,:128]!
yading@11 267 vcvt.s32.f32 q8, q8, #31
yading@11 268 vst1.32 {d2[0]}, [r8], r12
yading@11 269 vst1.32 {d2[1]}, [r8], r12
yading@11 270 vld1.32 {q9}, [r5,:128]!
yading@11 271 vcvt.s32.f32 q9, q9, #31
yading@11 272 vst1.32 {d3[0]}, [r8], r12
yading@11 273 vst1.32 {d3[1]}, [r8], r12
yading@11 274 vld1.32 {q10}, [r4,:128]!
yading@11 275 vcvt.s32.f32 q10, q10, #31
yading@11 276 vst1.32 {d6[0]}, [r8], r12
yading@11 277 vst1.32 {d6[1]}, [r8], r12
yading@11 278 vld1.32 {q11}, [r5,:128]!
yading@11 279 vcvt.s32.f32 q11, q11, #31
yading@11 280 vst1.32 {d7[0]}, [r8], r12
yading@11 281 vst1.32 {d7[1]}, [r8], r12
yading@11 282 bgt 6b
yading@11 283 6: vst1.32 {d2[0]}, [r8], r12
yading@11 284 vst1.32 {d2[1]}, [r8], r12
yading@11 285 vst1.32 {d3[0]}, [r8], r12
yading@11 286 vst1.32 {d3[1]}, [r8], r12
yading@11 287 vst1.32 {d6[0]}, [r8], r12
yading@11 288 vst1.32 {d6[1]}, [r8], r12
yading@11 289 vst1.32 {d7[0]}, [r8], r12
yading@11 290 vst1.32 {d7[1]}, [r8], r12
yading@11 291 b 8f
yading@11 292 7: vsri.32 d18, d16, #16
yading@11 293 vsri.32 d19, d17, #16
yading@11 294 vst1.32 {d18[0]}, [r8], r12
yading@11 295 vsri.32 d22, d20, #16
yading@11 296 vst1.32 {d18[1]}, [r8], r12
yading@11 297 vsri.32 d23, d21, #16
yading@11 298 vst1.32 {d19[0]}, [r8], r12
yading@11 299 vst1.32 {d19[1]}, [r8], r12
yading@11 300 vst1.32 {d22[0]}, [r8], r12
yading@11 301 vst1.32 {d22[1]}, [r8], r12
yading@11 302 vst1.32 {d23[0]}, [r8], r12
yading@11 303 vst1.32 {d23[1]}, [r8], r12
yading@11 304 8: subs r3, r3, #2
yading@11 305 add r0, r0, #4
yading@11 306 it eq
yading@11 307 popeq {r4-r8, pc}
yading@11 308
yading@11 309 @ 1 channel
yading@11 310 4: ldr r4, [r1]
yading@11 311 tst r2, #8
yading@11 312 mov lr, r2
yading@11 313 mov r5, r0
yading@11 314 vld1.32 {q0}, [r4,:128]!
yading@11 315 vcvt.s32.f32 q0, q0, #31
yading@11 316 vld1.32 {q1}, [r4,:128]!
yading@11 317 vcvt.s32.f32 q1, q1, #31
yading@11 318 bne 8f
yading@11 319 6: subs lr, lr, #16
yading@11 320 vld1.32 {q2}, [r4,:128]!
yading@11 321 vcvt.s32.f32 q2, q2, #31
yading@11 322 vld1.32 {q3}, [r4,:128]!
yading@11 323 vcvt.s32.f32 q3, q3, #31
yading@11 324 vst1.16 {d0[1]}, [r5,:16], r12
yading@11 325 vst1.16 {d0[3]}, [r5,:16], r12
yading@11 326 vst1.16 {d1[1]}, [r5,:16], r12
yading@11 327 vst1.16 {d1[3]}, [r5,:16], r12
yading@11 328 vst1.16 {d2[1]}, [r5,:16], r12
yading@11 329 vst1.16 {d2[3]}, [r5,:16], r12
yading@11 330 vst1.16 {d3[1]}, [r5,:16], r12
yading@11 331 vst1.16 {d3[3]}, [r5,:16], r12
yading@11 332 beq 7f
yading@11 333 vld1.32 {q0}, [r4,:128]!
yading@11 334 vcvt.s32.f32 q0, q0, #31
yading@11 335 vld1.32 {q1}, [r4,:128]!
yading@11 336 vcvt.s32.f32 q1, q1, #31
yading@11 337 7: vst1.16 {d4[1]}, [r5,:16], r12
yading@11 338 vst1.16 {d4[3]}, [r5,:16], r12
yading@11 339 vst1.16 {d5[1]}, [r5,:16], r12
yading@11 340 vst1.16 {d5[3]}, [r5,:16], r12
yading@11 341 vst1.16 {d6[1]}, [r5,:16], r12
yading@11 342 vst1.16 {d6[3]}, [r5,:16], r12
yading@11 343 vst1.16 {d7[1]}, [r5,:16], r12
yading@11 344 vst1.16 {d7[3]}, [r5,:16], r12
yading@11 345 bgt 6b
yading@11 346 pop {r4-r8, pc}
yading@11 347 8: subs lr, lr, #8
yading@11 348 vst1.16 {d0[1]}, [r5,:16], r12
yading@11 349 vst1.16 {d0[3]}, [r5,:16], r12
yading@11 350 vst1.16 {d1[1]}, [r5,:16], r12
yading@11 351 vst1.16 {d1[3]}, [r5,:16], r12
yading@11 352 vst1.16 {d2[1]}, [r5,:16], r12
yading@11 353 vst1.16 {d2[3]}, [r5,:16], r12
yading@11 354 vst1.16 {d3[1]}, [r5,:16], r12
yading@11 355 vst1.16 {d3[3]}, [r5,:16], r12
yading@11 356 it eq
yading@11 357 popeq {r4-r8, pc}
yading@11 358 vld1.32 {q0}, [r4,:128]!
yading@11 359 vcvt.s32.f32 q0, q0, #31
yading@11 360 vld1.32 {q1}, [r4,:128]!
yading@11 361 vcvt.s32.f32 q1, q1, #31
yading@11 362 b 6b
yading@11 363 endfunc