annotate src/libmad-0.15.1b/imdct_l_arm.S @ 169:223a55898ab9 tip default

Add null config files
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 02 Mar 2020 14:03:47 +0000
parents 545efbb81310
children
rev   line source
cannam@85 1 /*****************************************************************************
cannam@85 2 * Copyright (C) 2000-2001 Andre McCurdy <armccurdy@yahoo.co.uk>
cannam@85 3 *
cannam@85 4 * This program is free software. you can redistribute it and/or modify
cannam@85 5 * it under the terms of the GNU General Public License as published by
cannam@85 6 * the Free Software Foundation@ either version 2 of the License, or
cannam@85 7 * (at your option) any later version.
cannam@85 8 *
cannam@85 9 * This program is distributed in the hope that it will be useful,
cannam@85 10 * but WITHOUT ANY WARRANTY, without even the implied warranty of
cannam@85 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cannam@85 12 * GNU General Public License for more details.
cannam@85 13 *
cannam@85 14 * You should have received a copy of the GNU General Public License
cannam@85 15 * along with this program@ if not, write to the Free Software
cannam@85 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
cannam@85 17 *
cannam@85 18 *****************************************************************************
cannam@85 19 *
cannam@85 20 * Notes:
cannam@85 21 *
cannam@85 22 *
cannam@85 23 *****************************************************************************
cannam@85 24 *
cannam@85 25 * $Id: imdct_l_arm.S,v 1.7 2001/03/25 20:03:34 rob Rel $
cannam@85 26 *
cannam@85 27 * 2001/03/24: Andre McCurdy <armccurdy@yahoo.co.uk>
cannam@85 28 * - Corrected PIC unsafe loading of address of 'imdct36_long_karray'
cannam@85 29 *
cannam@85 30 * 2000/09/20: Robert Leslie <rob@mars.org>
cannam@85 31 * - Added a global symbol with leading underscore per suggestion of
cannam@85 32 * Simon Burge to support linking with the a.out format.
cannam@85 33 *
cannam@85 34 * 2000/09/15: Robert Leslie <rob@mars.org>
cannam@85 35 * - Fixed a small bug where flags were changed before a conditional branch.
cannam@85 36 *
cannam@85 37 * 2000/09/15: Andre McCurdy <armccurdy@yahoo.co.uk>
cannam@85 38 * - Applied Nicolas Pitre's rounding optimisation in all remaining places.
cannam@85 39 *
cannam@85 40 * 2000/09/09: Nicolas Pitre <nico@cam.org>
cannam@85 41 * - Optimized rounding + scaling operations.
cannam@85 42 *
cannam@85 43 * 2000/08/09: Andre McCurdy <armccurdy@yahoo.co.uk>
cannam@85 44 * - Original created.
cannam@85 45 *
cannam@85 46 ****************************************************************************/
cannam@85 47
cannam@85 48
cannam@85 49 /*
cannam@85 50 On entry:
cannam@85 51
cannam@85 52 r0 = pointer to 18 element input array
cannam@85 53 r1 = pointer to 36 element output array
cannam@85 54 r2 = windowing block type
cannam@85 55
cannam@85 56
cannam@85 57 Stack frame created during execution of the function:
cannam@85 58
cannam@85 59 Initial Holds:
cannam@85 60 Stack
cannam@85 61 pointer
cannam@85 62 minus:
cannam@85 63
cannam@85 64 0
cannam@85 65 4 lr
cannam@85 66 8 r11
cannam@85 67 12 r10
cannam@85 68 16 r9
cannam@85 69 20 r8
cannam@85 70 24 r7
cannam@85 71 28 r6
cannam@85 72 32 r5
cannam@85 73 36 r4
cannam@85 74
cannam@85 75 40 r2 : windowing block type
cannam@85 76
cannam@85 77 44 ct00 high
cannam@85 78 48 ct00 low
cannam@85 79 52 ct01 high
cannam@85 80 56 ct01 low
cannam@85 81 60 ct04 high
cannam@85 82 64 ct04 low
cannam@85 83 68 ct06 high
cannam@85 84 72 ct06 low
cannam@85 85 76 ct05 high
cannam@85 86 80 ct05 low
cannam@85 87 84 ct03 high
cannam@85 88 88 ct03 low
cannam@85 89 92 -ct05 high
cannam@85 90 96 -ct05 low
cannam@85 91 100 -ct07 high
cannam@85 92 104 -ct07 low
cannam@85 93 108 ct07 high
cannam@85 94 112 ct07 low
cannam@85 95 116 ct02 high
cannam@85 96 120 ct02 low
cannam@85 97 */
cannam@85 98
cannam@85 99 #define BLOCK_MODE_NORMAL 0
cannam@85 100 #define BLOCK_MODE_START 1
cannam@85 101 #define BLOCK_MODE_STOP 3
cannam@85 102
cannam@85 103
cannam@85 104 #define X0 0x00
cannam@85 105 #define X1 0x04
cannam@85 106 #define X2 0x08
cannam@85 107 #define X3 0x0C
cannam@85 108 #define X4 0x10
cannam@85 109 #define X5 0x14
cannam@85 110 #define X6 0x18
cannam@85 111 #define X7 0x1c
cannam@85 112 #define X8 0x20
cannam@85 113 #define X9 0x24
cannam@85 114 #define X10 0x28
cannam@85 115 #define X11 0x2c
cannam@85 116 #define X12 0x30
cannam@85 117 #define X13 0x34
cannam@85 118 #define X14 0x38
cannam@85 119 #define X15 0x3c
cannam@85 120 #define X16 0x40
cannam@85 121 #define X17 0x44
cannam@85 122
cannam@85 123 #define x0 0x00
cannam@85 124 #define x1 0x04
cannam@85 125 #define x2 0x08
cannam@85 126 #define x3 0x0C
cannam@85 127 #define x4 0x10
cannam@85 128 #define x5 0x14
cannam@85 129 #define x6 0x18
cannam@85 130 #define x7 0x1c
cannam@85 131 #define x8 0x20
cannam@85 132 #define x9 0x24
cannam@85 133 #define x10 0x28
cannam@85 134 #define x11 0x2c
cannam@85 135 #define x12 0x30
cannam@85 136 #define x13 0x34
cannam@85 137 #define x14 0x38
cannam@85 138 #define x15 0x3c
cannam@85 139 #define x16 0x40
cannam@85 140 #define x17 0x44
cannam@85 141 #define x18 0x48
cannam@85 142 #define x19 0x4c
cannam@85 143 #define x20 0x50
cannam@85 144 #define x21 0x54
cannam@85 145 #define x22 0x58
cannam@85 146 #define x23 0x5c
cannam@85 147 #define x24 0x60
cannam@85 148 #define x25 0x64
cannam@85 149 #define x26 0x68
cannam@85 150 #define x27 0x6c
cannam@85 151 #define x28 0x70
cannam@85 152 #define x29 0x74
cannam@85 153 #define x30 0x78
cannam@85 154 #define x31 0x7c
cannam@85 155 #define x32 0x80
cannam@85 156 #define x33 0x84
cannam@85 157 #define x34 0x88
cannam@85 158 #define x35 0x8c
cannam@85 159
cannam@85 160 #define K00 0x0ffc19fd
cannam@85 161 #define K01 0x00b2aa3e
cannam@85 162 #define K02 0x0fdcf549
cannam@85 163 #define K03 0x0216a2a2
cannam@85 164 #define K04 0x0f9ee890
cannam@85 165 #define K05 0x03768962
cannam@85 166 #define K06 0x0f426cb5
cannam@85 167 #define K07 0x04cfb0e2
cannam@85 168 #define K08 0x0ec835e8
cannam@85 169 #define K09 0x061f78aa
cannam@85 170 #define K10 0x0e313245
cannam@85 171 #define K11 0x07635284
cannam@85 172 #define K12 0x0d7e8807
cannam@85 173 #define K13 0x0898c779
cannam@85 174 #define K14 0x0cb19346
cannam@85 175 #define K15 0x09bd7ca0
cannam@85 176 #define K16 0x0bcbe352
cannam@85 177 #define K17 0x0acf37ad
cannam@85 178
cannam@85 179 #define minus_K02 0xf0230ab7
cannam@85 180
cannam@85 181 #define WL0 0x00b2aa3e
cannam@85 182 #define WL1 0x0216a2a2
cannam@85 183 #define WL2 0x03768962
cannam@85 184 #define WL3 0x04cfb0e2
cannam@85 185 #define WL4 0x061f78aa
cannam@85 186 #define WL5 0x07635284
cannam@85 187 #define WL6 0x0898c779
cannam@85 188 #define WL7 0x09bd7ca0
cannam@85 189 #define WL8 0x0acf37ad
cannam@85 190 #define WL9 0x0bcbe352
cannam@85 191 #define WL10 0x0cb19346
cannam@85 192 #define WL11 0x0d7e8807
cannam@85 193 #define WL12 0x0e313245
cannam@85 194 #define WL13 0x0ec835e8
cannam@85 195 #define WL14 0x0f426cb5
cannam@85 196 #define WL15 0x0f9ee890
cannam@85 197 #define WL16 0x0fdcf549
cannam@85 198 #define WL17 0x0ffc19fd
cannam@85 199
cannam@85 200
cannam@85 201 @*****************************************************************************
cannam@85 202
cannam@85 203
cannam@85 204 .text
cannam@85 205 .align
cannam@85 206
cannam@85 207 .global III_imdct_l
cannam@85 208 .global _III_imdct_l
cannam@85 209
cannam@85 210 III_imdct_l:
cannam@85 211 _III_imdct_l:
cannam@85 212
cannam@85 213 stmdb sp!, { r2, r4 - r11, lr } @ all callee saved regs, plus arg3
cannam@85 214
cannam@85 215 ldr r4, =K08 @ r4 = K08
cannam@85 216 ldr r5, =K09 @ r5 = K09
cannam@85 217 ldr r8, [r0, #X4] @ r8 = X4
cannam@85 218 ldr r9, [r0, #X13] @ r9 = X13
cannam@85 219 rsb r6, r4, #0 @ r6 = -K08
cannam@85 220 rsb r7, r5, #0 @ r7 = -K09
cannam@85 221
cannam@85 222 smull r2, r3, r4, r8 @ r2..r3 = (X4 * K08)
cannam@85 223 smlal r2, r3, r5, r9 @ r2..r3 = (X4 * K08) + (X13 * K09) = ct01
cannam@85 224
cannam@85 225 smull r10, lr, r8, r5 @ r10..lr = (X4 * K09)
cannam@85 226 smlal r10, lr, r9, r6 @ r10..lr = (X4 * K09) + (X13 * -K08) = ct00
cannam@85 227
cannam@85 228 ldr r8, [r0, #X7] @ r8 = X7
cannam@85 229 ldr r9, [r0, #X16] @ r9 = X16
cannam@85 230
cannam@85 231 stmdb sp!, { r2, r3, r10, lr } @ stack ct00_h, ct00_l, ct01_h, ct01_l
cannam@85 232
cannam@85 233 add r8, r8, r9 @ r8 = (X7 + X16)
cannam@85 234 ldr r9, [r0, #X1] @ r9 = X1
cannam@85 235
cannam@85 236 smlal r2, r3, r6, r8 @ r2..r3 = ct01 + ((X7 + X16) * -K08)
cannam@85 237 smlal r2, r3, r7, r9 @ r2..r3 += (X1 * -K09)
cannam@85 238
cannam@85 239 ldr r7, [r0, #X10] @ r7 = X10
cannam@85 240
cannam@85 241 rsbs r10, r10, #0
cannam@85 242 rsc lr, lr, #0 @ r10..lr = -ct00
cannam@85 243
cannam@85 244 smlal r2, r3, r5, r7 @ r2..r3 += (X10 * K09) = ct06
cannam@85 245
cannam@85 246 smlal r10, lr, r9, r6 @ r10..lr = -ct00 + ( X1 * -K08)
cannam@85 247 smlal r10, lr, r8, r5 @ r10..lr += ((X7 + X16) * K09)
cannam@85 248 smlal r10, lr, r7, r4 @ r10..lr += ( X10 * K08) = ct04
cannam@85 249
cannam@85 250 stmdb sp!, { r2, r3, r10, lr } @ stack ct04_h, ct04_l, ct06_h, ct06_l
cannam@85 251
cannam@85 252 @----
cannam@85 253
cannam@85 254 ldr r7, [r0, #X0]
cannam@85 255 ldr r8, [r0, #X11]
cannam@85 256 ldr r9, [r0, #X12]
cannam@85 257 sub r7, r7, r8
cannam@85 258 sub r7, r7, r9 @ r7 = (X0 - X11 -X12) = ct14
cannam@85 259
cannam@85 260 ldr r9, [r0, #X3]
cannam@85 261 ldr r8, [r0, #X8]
cannam@85 262 ldr r11, [r0, #X15]
cannam@85 263 sub r8, r8, r9
cannam@85 264 add r8, r8, r11 @ r8 = (X8 - X3 + X15) = ct16
cannam@85 265
cannam@85 266 add r11, r7, r8 @ r11 = ct14 + ct16 = ct18
cannam@85 267
cannam@85 268 smlal r2, r3, r6, r11 @ r2..r3 = ct06 + ((X0 - X11 - X3 + X15 + X8 - X12) * -K08)
cannam@85 269
cannam@85 270 ldr r6, [r0, #X2]
cannam@85 271 ldr r9, [r0, #X9]
cannam@85 272 ldr r12, [r0, #X14]
cannam@85 273 sub r6, r6, r9
cannam@85 274 sub r6, r6, r12 @ r6 = (X2 - X9 - X14) = ct15
cannam@85 275
cannam@85 276 ldr r9, [r0, #X5]
cannam@85 277 ldr r12, [r0, #X6]
cannam@85 278 sub r9, r9, r12
cannam@85 279 ldr r12, [r0, #X17]
cannam@85 280 sub r9, r9, r12 @ r9 = (X5 - X6 - X17) = ct17
cannam@85 281
cannam@85 282 add r12, r9, r6 @ r12 = ct15 + ct17 = ct19
cannam@85 283
cannam@85 284 smlal r2, r3, r5, r12 @ r2..r3 += ((X2 - X9 + X5 - X6 - X17 - X14) * K09)
cannam@85 285
cannam@85 286 smlal r10, lr, r11, r5 @ r10..lr = ct04 + (ct18 * K09)
cannam@85 287 smlal r10, lr, r12, r4 @ r10..lr = ct04 + (ct18 * K09) + (ct19 * K08)
cannam@85 288
cannam@85 289 movs r2, r2, lsr #28
cannam@85 290 adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3
cannam@85 291 str r2, [r1, #x22] @ store result x22
cannam@85 292
cannam@85 293 movs r10, r10, lsr #28
cannam@85 294 adc r10, r10, lr, lsl #4 @ r10 = bits[59..28] of r10..lr
cannam@85 295 str r10, [r1, #x4] @ store result x4
cannam@85 296
cannam@85 297 @----
cannam@85 298
cannam@85 299 ldmia sp, { r2, r3, r4, r5 } @ r2..r3 = ct06, r4..r5 = ct04 (dont update sp)
cannam@85 300
cannam@85 301 @ r2..r3 = ct06
cannam@85 302 @ r4..r5 = ct04
cannam@85 303 @ r6 = ct15
cannam@85 304 @ r7 = ct14
cannam@85 305 @ r8 = ct16
cannam@85 306 @ r9 = ct17
cannam@85 307 @ r10 = .
cannam@85 308 @ r11 = .
cannam@85 309 @ r12 = .
cannam@85 310 @ lr = .
cannam@85 311
cannam@85 312 ldr r10, =K03 @ r10 = K03
cannam@85 313 ldr lr, =K15 @ lr = K15
cannam@85 314
cannam@85 315 smlal r2, r3, r10, r7 @ r2..r3 = ct06 + (ct14 * K03)
cannam@85 316 smlal r4, r5, lr, r7 @ r4..r5 = ct04 + (ct14 * K15)
cannam@85 317
cannam@85 318 ldr r12, =K14 @ r12 = K14
cannam@85 319 rsb r10, r10, #0 @ r10 = -K03
cannam@85 320
cannam@85 321 smlal r2, r3, lr, r6 @ r2..r3 += (ct15 * K15)
cannam@85 322 smlal r4, r5, r10, r6 @ r4..r5 += (ct15 * -K03)
cannam@85 323 smlal r2, r3, r12, r8 @ r2..r3 += (ct16 * K14)
cannam@85 324
cannam@85 325 ldr r11, =minus_K02 @ r11 = -K02
cannam@85 326 rsb r12, r12, #0 @ r12 = -K14
cannam@85 327
cannam@85 328 smlal r4, r5, r12, r9 @ r4..r5 += (ct17 * -K14)
cannam@85 329 smlal r2, r3, r11, r9 @ r2..r3 += (ct17 * -K02)
cannam@85 330 smlal r4, r5, r11, r8 @ r4..r5 += (ct16 * -K02)
cannam@85 331
cannam@85 332 movs r2, r2, lsr #28
cannam@85 333 adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3
cannam@85 334 str r2, [r1, #x7] @ store result x7
cannam@85 335
cannam@85 336 movs r4, r4, lsr #28
cannam@85 337 adc r4, r4, r5, lsl #4 @ r4 = bits[59..28] of r4..r5
cannam@85 338 str r4, [r1, #x1] @ store result x1
cannam@85 339
cannam@85 340 @----
cannam@85 341
cannam@85 342 ldmia sp, { r2, r3, r4, r5 } @ r2..r3 = ct06, r4..r5 = ct04 (dont update sp)
cannam@85 343
cannam@85 344 @ r2..r3 = ct06
cannam@85 345 @ r4..r5 = ct04
cannam@85 346 @ r6 = ct15
cannam@85 347 @ r7 = ct14
cannam@85 348 @ r8 = ct16
cannam@85 349 @ r9 = ct17
cannam@85 350 @ r10 = -K03
cannam@85 351 @ r11 = -K02
cannam@85 352 @ r12 = -K14
cannam@85 353 @ lr = K15
cannam@85 354
cannam@85 355 rsbs r2, r2, #0
cannam@85 356 rsc r3, r3, #0 @ r2..r3 = -ct06
cannam@85 357
cannam@85 358 smlal r2, r3, r12, r7 @ r2..r3 = -ct06 + (ct14 * -K14)
cannam@85 359 smlal r2, r3, r10, r8 @ r2..r3 += (ct16 * -K03)
cannam@85 360
cannam@85 361 smlal r4, r5, r12, r6 @ r4..r5 = ct04 + (ct15 * -K14)
cannam@85 362 smlal r4, r5, r10, r9 @ r4..r5 += (ct17 * -K03)
cannam@85 363 smlal r4, r5, lr, r8 @ r4..r5 += (ct16 * K15)
cannam@85 364 smlal r4, r5, r11, r7 @ r4..r5 += (ct14 * -K02)
cannam@85 365
cannam@85 366 rsb lr, lr, #0 @ lr = -K15
cannam@85 367 rsb r11, r11, #0 @ r11 = K02
cannam@85 368
cannam@85 369 smlal r2, r3, lr, r9 @ r2..r3 += (ct17 * -K15)
cannam@85 370 smlal r2, r3, r11, r6 @ r2..r3 += (ct15 * K02)
cannam@85 371
cannam@85 372 movs r4, r4, lsr #28
cannam@85 373 adc r4, r4, r5, lsl #4 @ r4 = bits[59..28] of r4..r5
cannam@85 374 str r4, [r1, #x25] @ store result x25
cannam@85 375
cannam@85 376 movs r2, r2, lsr #28
cannam@85 377 adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3
cannam@85 378 str r2, [r1, #x19] @ store result x19
cannam@85 379
cannam@85 380 @----
cannam@85 381
cannam@85 382 ldr r2, [sp, #16] @ r2 = ct01_l
cannam@85 383 ldr r3, [sp, #20] @ r3 = ct01_h
cannam@85 384
cannam@85 385 ldr r6, [r0, #X1]
cannam@85 386 ldr r8, [r0, #X7]
cannam@85 387 ldr r9, [r0, #X10]
cannam@85 388 ldr r7, [r0, #X16]
cannam@85 389
cannam@85 390 rsbs r2, r2, #0
cannam@85 391 rsc r3, r3, #0 @ r2..r3 = -ct01
cannam@85 392
cannam@85 393 mov r4, r2
cannam@85 394 mov r5, r3 @ r4..r5 = -ct01
cannam@85 395
cannam@85 396 @ r2..r3 = -ct01
cannam@85 397 @ r4..r5 = -ct01
cannam@85 398 @ r6 = X1
cannam@85 399 @ r7 = X16
cannam@85 400 @ r8 = X7
cannam@85 401 @ r9 = X10
cannam@85 402 @ r10 = -K03
cannam@85 403 @ r11 = K02
cannam@85 404 @ r12 = -K14
cannam@85 405 @ lr = -K15
cannam@85 406
cannam@85 407 smlal r4, r5, r12, r7 @ r4..r5 = -ct01 + (X16 * -K14)
cannam@85 408 smlal r2, r3, lr, r9 @ r2..r3 = -ct01 + (X10 * -K15)
cannam@85 409
cannam@85 410 smlal r4, r5, r10, r8 @ r4..r5 += (X7 * -K03)
cannam@85 411 smlal r2, r3, r10, r7 @ r2..r3 += (X16 * -K03)
cannam@85 412
cannam@85 413 smlal r4, r5, r11, r9 @ r4..r5 += (X10 * K02)
cannam@85 414 smlal r2, r3, r12, r8 @ r2..r3 += (X7 * -K14)
cannam@85 415
cannam@85 416 rsb lr, lr, #0 @ lr = K15
cannam@85 417 rsb r11, r11, #0 @ r11 = -K02
cannam@85 418
cannam@85 419 smlal r4, r5, lr, r6 @ r4..r5 += (X1 * K15) = ct05
cannam@85 420 smlal r2, r3, r11, r6 @ r2..r3 += (X1 * -K02) = ct03
cannam@85 421
cannam@85 422 stmdb sp!, { r2, r3, r4, r5 } @ stack ct05_h, ct05_l, ct03_h, ct03_l
cannam@85 423
cannam@85 424 rsbs r4, r4, #0
cannam@85 425 rsc r5, r5, #0 @ r4..r5 = -ct05
cannam@85 426
cannam@85 427 stmdb sp!, { r4, r5 } @ stack -ct05_h, -ct05_l
cannam@85 428
cannam@85 429 ldr r2, [sp, #48] @ r2 = ct00_l
cannam@85 430 ldr r3, [sp, #52] @ r3 = ct00_h
cannam@85 431
cannam@85 432 rsb r10, r10, #0 @ r10 = K03
cannam@85 433
cannam@85 434 rsbs r4, r2, #0
cannam@85 435 rsc r5, r3, #0 @ r4..r5 = -ct00
cannam@85 436
cannam@85 437 @ r2..r3 = ct00
cannam@85 438 @ r4..r5 = -ct00
cannam@85 439 @ r6 = X1
cannam@85 440 @ r7 = X16
cannam@85 441 @ r8 = X7
cannam@85 442 @ r9 = X10
cannam@85 443 @ r10 = K03
cannam@85 444 @ r11 = -K02
cannam@85 445 @ r12 = -K14
cannam@85 446 @ lr = K15
cannam@85 447
cannam@85 448 smlal r4, r5, r10, r6 @ r4..r5 = -ct00 + (X1 * K03)
cannam@85 449 smlal r2, r3, r10, r9 @ r2..r3 = ct00 + (X10 * K03)
cannam@85 450
cannam@85 451 smlal r4, r5, r12, r9 @ r4..r5 += (X10 * -K14)
cannam@85 452 smlal r2, r3, r12, r6 @ r2..r3 += (X1 * -K14)
cannam@85 453
cannam@85 454 smlal r4, r5, r11, r7 @ r4..r5 += (X16 * -K02)
cannam@85 455 smlal r4, r5, lr, r8 @ r4..r5 += (X7 * K15) = ct07
cannam@85 456
cannam@85 457 rsb lr, lr, #0 @ lr = -K15
cannam@85 458 rsb r11, r11, #0 @ r11 = K02
cannam@85 459
cannam@85 460 smlal r2, r3, r11, r8 @ r2..r3 += (X7 * K02)
cannam@85 461 smlal r2, r3, lr, r7 @ r2..r3 += (X16 * -K15) = ct02
cannam@85 462
cannam@85 463 rsbs r6, r4, #0
cannam@85 464 rsc r7, r5, #0 @ r6..r7 = -ct07
cannam@85 465
cannam@85 466 stmdb sp!, { r2 - r7 } @ stack -ct07_h, -ct07_l, ct07_h, ct07_l, ct02_h, ct02_l
cannam@85 467
cannam@85 468
cannam@85 469 @----
cannam@85 470
cannam@85 471 add r2, pc, #(imdct36_long_karray-.-8) @ r2 = base address of Knn array (PIC safe ?)
cannam@85 472
cannam@85 473
cannam@85 474 loop:
cannam@85 475 ldr r12, [r0, #X0]
cannam@85 476
cannam@85 477 ldmia r2!, { r5 - r11 } @ first 7 words from Karray element
cannam@85 478
cannam@85 479 smull r3, r4, r5, r12 @ sum = (Kxx * X0)
cannam@85 480 ldr r12, [r0, #X2]
cannam@85 481 ldr r5, [r0, #X3]
cannam@85 482 smlal r3, r4, r6, r12 @ sum += (Kxx * X2)
cannam@85 483 ldr r12, [r0, #X5]
cannam@85 484 ldr r6, [r0, #X6]
cannam@85 485 smlal r3, r4, r7, r5 @ sum += (Kxx * X3)
cannam@85 486 smlal r3, r4, r8, r12 @ sum += (Kxx * X5)
cannam@85 487 ldr r12, [r0, #X8]
cannam@85 488 ldr r5, [r0, #X9]
cannam@85 489 smlal r3, r4, r9, r6 @ sum += (Kxx * X6)
cannam@85 490 smlal r3, r4, r10, r12 @ sum += (Kxx * X8)
cannam@85 491 smlal r3, r4, r11, r5 @ sum += (Kxx * X9)
cannam@85 492
cannam@85 493 ldmia r2!, { r5 - r10 } @ final 6 words from Karray element
cannam@85 494
cannam@85 495 ldr r11, [r0, #X11]
cannam@85 496 ldr r12, [r0, #X12]
cannam@85 497 smlal r3, r4, r5, r11 @ sum += (Kxx * X11)
cannam@85 498 ldr r11, [r0, #X14]
cannam@85 499 ldr r5, [r0, #X15]
cannam@85 500 smlal r3, r4, r6, r12 @ sum += (Kxx * X12)
cannam@85 501 smlal r3, r4, r7, r11 @ sum += (Kxx * X14)
cannam@85 502 ldr r11, [r0, #X17]
cannam@85 503 smlal r3, r4, r8, r5 @ sum += (Kxx * X15)
cannam@85 504 smlal r3, r4, r9, r11 @ sum += (Kxx * X17)
cannam@85 505
cannam@85 506 add r5, sp, r10, lsr #16 @ create index back into stack for required ctxx
cannam@85 507
cannam@85 508 ldmia r5, { r6, r7 } @ r6..r7 = ctxx
cannam@85 509
cannam@85 510 mov r8, r10, lsl #16 @ push ctxx index off the top end
cannam@85 511
cannam@85 512 adds r3, r3, r6 @ add low words
cannam@85 513 adc r4, r4, r7 @ add high words, with carry
cannam@85 514 movs r3, r3, lsr #28
cannam@85 515 adc r3, r3, r4, lsl #4 @ r3 = bits[59..28] of r3..r4
cannam@85 516
cannam@85 517 str r3, [r1, r8, lsr #24] @ push completion flag off the bottom end
cannam@85 518
cannam@85 519 movs r8, r8, lsl #8 @ push result location index off the top end
cannam@85 520 beq loop @ loop back if completion flag not set
cannam@85 521 b imdct_l_windowing @ branch to windowing stage if looping finished
cannam@85 522
cannam@85 523 imdct36_long_karray:
cannam@85 524
cannam@85 525 .word K17, -K13, K10, -K06, -K05, K01, -K00, K04, -K07, K11, K12, -K16, 0x00000000
cannam@85 526 .word K13, K07, K16, K01, K10, -K05, K04, -K11, K00, -K17, K06, -K12, 0x00200800
cannam@85 527 .word K11, K17, K05, K12, -K01, K06, -K07, K00, -K13, K04, -K16, K10, 0x00200c00
cannam@85 528 .word K07, K00, -K12, K05, -K16, -K10, K11, -K17, K04, K13, K01, K06, 0x00001400
cannam@85 529 .word K05, K10, -K00, -K17, K07, -K13, K12, K06, -K16, K01, -K11, -K04, 0x00181800
cannam@85 530 .word K01, K05, -K07, -K11, K13, K17, -K16, -K12, K10, K06, -K04, -K00, 0x00102000
cannam@85 531 .word -K16, K12, -K11, K07, K04, -K00, -K01, K05, -K06, K10, K13, -K17, 0x00284800
cannam@85 532 .word -K12, K06, K17, -K00, -K11, K04, K05, -K10, K01, K16, -K07, -K13, 0x00085000
cannam@85 533 .word -K10, K16, K04, -K13, -K00, K07, K06, -K01, -K12, -K05, K17, K11, 0x00105400
cannam@85 534 .word -K06, -K01, K13, K04, K17, -K11, -K10, -K16, -K05, K12, K00, K07, 0x00185c00
cannam@85 535 .word -K04, -K11, -K01, K16, K06, K12, K13, -K07, -K17, -K00, -K10, -K05, 0x00006000
cannam@85 536 .word -K00, -K04, -K06, -K10, -K12, -K16, -K17, -K13, -K11, -K07, -K05, -K01, 0x00206801
cannam@85 537
cannam@85 538
cannam@85 539 @----
cannam@85 540 @-------------------------------------------------------------------------
cannam@85 541 @----
cannam@85 542
cannam@85 543 imdct_l_windowing:
cannam@85 544
cannam@85 545 ldr r11, [sp, #80] @ fetch function parameter 3 from out of the stack
cannam@85 546 ldmia r1!, { r0, r2 - r9 } @ load 9 words from x0, update pointer
cannam@85 547
cannam@85 548 @ r0 = x0
cannam@85 549 @ r1 = &x[9]
cannam@85 550 @ r2 = x1
cannam@85 551 @ r3 = x2
cannam@85 552 @ r4 = x3
cannam@85 553 @ r5 = x4
cannam@85 554 @ r6 = x5
cannam@85 555 @ r7 = x6
cannam@85 556 @ r8 = x7
cannam@85 557 @ r9 = x8
cannam@85 558 @ r10 = .
cannam@85 559 @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block)
cannam@85 560 @ r12 = .
cannam@85 561 @ lr = .
cannam@85 562
cannam@85 563 cmp r11, #BLOCK_MODE_STOP @ setup flags
cannam@85 564 rsb r10, r0, #0 @ r10 = -x0 (DONT change flags !!)
cannam@85 565 beq stop_block_x0_to_x17
cannam@85 566
cannam@85 567
cannam@85 568 @ start and normal blocks are treated the same for x[0]..x[17]
cannam@85 569
cannam@85 570 normal_block_x0_to_x17:
cannam@85 571
cannam@85 572 ldr r12, =WL9 @ r12 = window_l[9]
cannam@85 573
cannam@85 574 rsb r0, r9, #0 @ r0 = -x8
cannam@85 575 rsb r9, r2, #0 @ r9 = -x1
cannam@85 576 rsb r2, r8, #0 @ r2 = -x7
cannam@85 577 rsb r8, r3, #0 @ r8 = -x2
cannam@85 578 rsb r3, r7, #0 @ r3 = -x6
cannam@85 579 rsb r7, r4, #0 @ r7 = -x3
cannam@85 580 rsb r4, r6, #0 @ r4 = -x5
cannam@85 581 rsb r6, r5, #0 @ r6 = -x4
cannam@85 582
cannam@85 583 @ r0 = -x8
cannam@85 584 @ r1 = &x[9]
cannam@85 585 @ r2 = -x7
cannam@85 586 @ r3 = -x6
cannam@85 587 @ r4 = -x5
cannam@85 588 @ r5 = .
cannam@85 589 @ r6 = -x4
cannam@85 590 @ r7 = -x3
cannam@85 591 @ r8 = -x2
cannam@85 592 @ r9 = -x1
cannam@85 593 @ r10 = -x0
cannam@85 594 @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block)
cannam@85 595 @ r12 = window_l[9]
cannam@85 596 @ lr = .
cannam@85 597
cannam@85 598 smull r5, lr, r12, r0 @ r5..lr = (window_l[9] * (x[9] == -x[8]))
cannam@85 599 ldr r12, =WL10 @ r12 = window_l[10]
cannam@85 600 movs r5, r5, lsr #28
cannam@85 601 adc r0, r5, lr, lsl #4 @ r0 = bits[59..28] of windowed x9
cannam@85 602
cannam@85 603 smull r5, lr, r12, r2 @ r5..lr = (window_l[10] * (x[10] == -x[7]))
cannam@85 604 ldr r12, =WL11 @ r12 = window_l[11]
cannam@85 605 movs r5, r5, lsr #28
cannam@85 606 adc r2, r5, lr, lsl #4 @ r2 = bits[59..28] of windowed x10
cannam@85 607
cannam@85 608 smull r5, lr, r12, r3 @ r5..lr = (window_l[11] * (x[11] == -x[6]))
cannam@85 609 ldr r12, =WL12 @ r12 = window_l[12]
cannam@85 610 movs r5, r5, lsr #28
cannam@85 611 adc r3, r5, lr, lsl #4 @ r3 = bits[59..28] of windowed x11
cannam@85 612
cannam@85 613 smull r5, lr, r12, r4 @ r5..lr = (window_l[12] * (x[12] == -x[5]))
cannam@85 614 ldr r12, =WL13 @ r12 = window_l[13]
cannam@85 615 movs r5, r5, lsr #28
cannam@85 616 adc r4, r5, lr, lsl #4 @ r4 = bits[59..28] of windowed x12
cannam@85 617
cannam@85 618 smull r5, lr, r12, r6 @ r5..lr = (window_l[13] * (x[13] == -x[4]))
cannam@85 619 ldr r12, =WL14 @ r12 = window_l[14]
cannam@85 620 movs r5, r5, lsr #28
cannam@85 621 adc r6, r5, lr, lsl #4 @ r6 = bits[59..28] of windowed x13
cannam@85 622
cannam@85 623 smull r5, lr, r12, r7 @ r5..lr = (window_l[14] * (x[14] == -x[3]))
cannam@85 624 ldr r12, =WL15 @ r12 = window_l[15]
cannam@85 625 movs r5, r5, lsr #28
cannam@85 626 adc r7, r5, lr, lsl #4 @ r7 = bits[59..28] of windowed x14
cannam@85 627
cannam@85 628 smull r5, lr, r12, r8 @ r5..lr = (window_l[15] * (x[15] == -x[2]))
cannam@85 629 ldr r12, =WL16 @ r12 = window_l[16]
cannam@85 630 movs r5, r5, lsr #28
cannam@85 631 adc r8, r5, lr, lsl #4 @ r8 = bits[59..28] of windowed x15
cannam@85 632
cannam@85 633 smull r5, lr, r12, r9 @ r5..lr = (window_l[16] * (x[16] == -x[1]))
cannam@85 634 ldr r12, =WL17 @ r12 = window_l[17]
cannam@85 635 movs r5, r5, lsr #28
cannam@85 636 adc r9, r5, lr, lsl #4 @ r9 = bits[59..28] of windowed x16
cannam@85 637
cannam@85 638 smull r5, lr, r12, r10 @ r5..lr = (window_l[17] * (x[17] == -x[0]))
cannam@85 639 ldr r12, =WL0 @ r12 = window_l[0]
cannam@85 640 movs r5, r5, lsr #28
cannam@85 641 adc r10, r5, lr, lsl #4 @ r10 = bits[59..28] of windowed x17
cannam@85 642
cannam@85 643
cannam@85 644 stmia r1, { r0, r2 - r4, r6 - r10 } @ store windowed x[9] .. x[17]
cannam@85 645 ldmdb r1!, { r0, r2 - r9 } @ load 9 words downto (and including) x0
cannam@85 646
cannam@85 647
cannam@85 648 smull r10, lr, r12, r0 @ r10..lr = (window_l[0] * x[0])
cannam@85 649 ldr r12, =WL1 @ r12 = window_l[1]
cannam@85 650 movs r10, r10, lsr #28
cannam@85 651 adc r0, r10, lr, lsl #4 @ r0 = bits[59..28] of windowed x0
cannam@85 652
cannam@85 653 smull r10, lr, r12, r2 @ r10..lr = (window_l[1] * x[1])
cannam@85 654 ldr r12, =WL2 @ r12 = window_l[2]
cannam@85 655 movs r10, r10, lsr #28
cannam@85 656 adc r2, r10, lr, lsl #4 @ r2 = bits[59..28] of windowed x1
cannam@85 657
cannam@85 658 smull r10, lr, r12, r3 @ r10..lr = (window_l[2] * x[2])
cannam@85 659 ldr r12, =WL3 @ r12 = window_l[3]
cannam@85 660 movs r10, r10, lsr #28
cannam@85 661 adc r3, r10, lr, lsl #4 @ r3 = bits[59..28] of windowed x2
cannam@85 662
cannam@85 663 smull r10, lr, r12, r4 @ r10..lr = (window_l[3] * x[3])
cannam@85 664 ldr r12, =WL4 @ r12 = window_l[4]
cannam@85 665 movs r10, r10, lsr #28
cannam@85 666 adc r4, r10, lr, lsl #4 @ r4 = bits[59..28] of windowed x3
cannam@85 667
cannam@85 668 smull r10, lr, r12, r5 @ r10..lr = (window_l[4] * x[4])
cannam@85 669 ldr r12, =WL5 @ r12 = window_l[5]
cannam@85 670 movs r10, r10, lsr #28
cannam@85 671 adc r5, r10, lr, lsl #4 @ r5 = bits[59..28] of windowed x4
cannam@85 672
cannam@85 673 smull r10, lr, r12, r6 @ r10..lr = (window_l[5] * x[5])
cannam@85 674 ldr r12, =WL6 @ r12 = window_l[6]
cannam@85 675 movs r10, r10, lsr #28
cannam@85 676 adc r6, r10, lr, lsl #4 @ r6 = bits[59..28] of windowed x5
cannam@85 677
cannam@85 678 smull r10, lr, r12, r7 @ r10..lr = (window_l[6] * x[6])
cannam@85 679 ldr r12, =WL7 @ r12 = window_l[7]
cannam@85 680 movs r10, r10, lsr #28
cannam@85 681 adc r7, r10, lr, lsl #4 @ r7 = bits[59..28] of windowed x6
cannam@85 682
cannam@85 683 smull r10, lr, r12, r8 @ r10..lr = (window_l[7] * x[7])
cannam@85 684 ldr r12, =WL8 @ r12 = window_l[8]
cannam@85 685 movs r10, r10, lsr #28
cannam@85 686 adc r8, r10, lr, lsl #4 @ r8 = bits[59..28] of windowed x7
cannam@85 687
cannam@85 688 smull r10, lr, r12, r9 @ r10..lr = (window_l[8] * x[8])
cannam@85 689 movs r10, r10, lsr #28
cannam@85 690 adc r9, r10, lr, lsl #4 @ r9 = bits[59..28] of windowed x8
cannam@85 691
cannam@85 692 stmia r1, { r0, r2 - r9 } @ store windowed x[0] .. x[8]
cannam@85 693
cannam@85 694 cmp r11, #BLOCK_MODE_START
cannam@85 695 beq start_block_x18_to_x35
cannam@85 696
cannam@85 697
cannam@85 698 @----
cannam@85 699
cannam@85 700
cannam@85 701 normal_block_x18_to_x35:
cannam@85 702
cannam@85 703 ldr r11, =WL3 @ r11 = window_l[3]
cannam@85 704 ldr r12, =WL4 @ r12 = window_l[4]
cannam@85 705
cannam@85 706 add r1, r1, #(18*4) @ r1 = &x[18]
cannam@85 707
cannam@85 708 ldmia r1!, { r0, r2 - r4, r6 - r10 } @ load 9 words from x18, update pointer
cannam@85 709
cannam@85 710 @ r0 = x18
cannam@85 711 @ r1 = &x[27]
cannam@85 712 @ r2 = x19
cannam@85 713 @ r3 = x20
cannam@85 714 @ r4 = x21
cannam@85 715 @ r5 = .
cannam@85 716 @ r6 = x22
cannam@85 717 @ r7 = x23
cannam@85 718 @ r8 = x24
cannam@85 719 @ r9 = x25
cannam@85 720 @ r10 = x26
cannam@85 721 @ r11 = window_l[3]
cannam@85 722 @ r12 = window_l[4]
cannam@85 723 @ lr = .
cannam@85 724
cannam@85 725 smull r5, lr, r12, r6 @ r5..lr = (window_l[4] * (x[22] == x[31]))
cannam@85 726 movs r5, r5, lsr #28
cannam@85 727 adc r5, r5, lr, lsl #4 @ r5 = bits[59..28] of windowed x31
cannam@85 728
cannam@85 729 smull r6, lr, r11, r4 @ r5..lr = (window_l[3] * (x[21] == x[32]))
cannam@85 730 ldr r12, =WL5 @ r12 = window_l[5]
cannam@85 731 movs r6, r6, lsr #28
cannam@85 732 adc r6, r6, lr, lsl #4 @ r6 = bits[59..28] of windowed x32
cannam@85 733
cannam@85 734 smull r4, lr, r12, r7 @ r4..lr = (window_l[5] * (x[23] == x[30]))
cannam@85 735 ldr r11, =WL1 @ r11 = window_l[1]
cannam@85 736 ldr r12, =WL2 @ r12 = window_l[2]
cannam@85 737 movs r4, r4, lsr #28
cannam@85 738 adc r4, r4, lr, lsl #4 @ r4 = bits[59..28] of windowed x30
cannam@85 739
cannam@85 740 smull r7, lr, r12, r3 @ r7..lr = (window_l[2] * (x[20] == x[33]))
cannam@85 741 ldr r12, =WL6 @ r12 = window_l[6]
cannam@85 742 movs r7, r7, lsr #28
cannam@85 743 adc r7, r7, lr, lsl #4 @ r7 = bits[59..28] of windowed x33
cannam@85 744
cannam@85 745 smull r3, lr, r12, r8 @ r3..lr = (window_l[6] * (x[24] == x[29]))
cannam@85 746 movs r3, r3, lsr #28
cannam@85 747 adc r3, r3, lr, lsl #4 @ r3 = bits[59..28] of windowed x29
cannam@85 748
cannam@85 749 smull r8, lr, r11, r2 @ r7..lr = (window_l[1] * (x[19] == x[34]))
cannam@85 750 ldr r12, =WL7 @ r12 = window_l[7]
cannam@85 751 ldr r11, =WL8 @ r11 = window_l[8]
cannam@85 752 movs r8, r8, lsr #28
cannam@85 753 adc r8, r8, lr, lsl #4 @ r8 = bits[59..28] of windowed x34
cannam@85 754
cannam@85 755 smull r2, lr, r12, r9 @ r7..lr = (window_l[7] * (x[25] == x[28]))
cannam@85 756 ldr r12, =WL0 @ r12 = window_l[0]
cannam@85 757 movs r2, r2, lsr #28
cannam@85 758 adc r2, r2, lr, lsl #4 @ r2 = bits[59..28] of windowed x28
cannam@85 759
cannam@85 760 smull r9, lr, r12, r0 @ r3..lr = (window_l[0] * (x[18] == x[35]))
cannam@85 761 movs r9, r9, lsr #28
cannam@85 762 adc r9, r9, lr, lsl #4 @ r9 = bits[59..28] of windowed x35
cannam@85 763
cannam@85 764 smull r0, lr, r11, r10 @ r7..lr = (window_l[8] * (x[26] == x[27]))
cannam@85 765 ldr r11, =WL16 @ r11 = window_l[16]
cannam@85 766 ldr r12, =WL17 @ r12 = window_l[17]
cannam@85 767 movs r0, r0, lsr #28
cannam@85 768 adc r0, r0, lr, lsl #4 @ r0 = bits[59..28] of windowed x27
cannam@85 769
cannam@85 770
cannam@85 771 stmia r1, { r0, r2 - r9 } @ store windowed x[27] .. x[35]
cannam@85 772 ldmdb r1!, { r0, r2 - r9 } @ load 9 words downto (and including) x18
cannam@85 773
cannam@85 774
cannam@85 775 smull r10, lr, r12, r0 @ r10..lr = (window_l[17] * x[18])
cannam@85 776 movs r10, r10, lsr #28
cannam@85 777 adc r0, r10, lr, lsl #4 @ r0 = bits[59..28] of windowed x0
cannam@85 778
cannam@85 779 smull r10, lr, r11, r2 @ r10..lr = (window_l[16] * x[19])
cannam@85 780 ldr r11, =WL14 @ r11 = window_l[14]
cannam@85 781 ldr r12, =WL15 @ r12 = window_l[15]
cannam@85 782 movs r10, r10, lsr #28
cannam@85 783 adc r2, r10, lr, lsl #4 @ r2 = bits[59..28] of windowed x1
cannam@85 784
cannam@85 785 smull r10, lr, r12, r3 @ r10..lr = (window_l[15] * x[20])
cannam@85 786 movs r10, r10, lsr #28
cannam@85 787 adc r3, r10, lr, lsl #4 @ r3 = bits[59..28] of windowed x2
cannam@85 788
cannam@85 789 smull r10, lr, r11, r4 @ r10..lr = (window_l[14] * x[21])
cannam@85 790 ldr r11, =WL12 @ r11 = window_l[12]
cannam@85 791 ldr r12, =WL13 @ r12 = window_l[13]
cannam@85 792 movs r10, r10, lsr #28
cannam@85 793 adc r4, r10, lr, lsl #4 @ r4 = bits[59..28] of windowed x3
cannam@85 794
cannam@85 795 smull r10, lr, r12, r5 @ r10..lr = (window_l[13] * x[22])
cannam@85 796 movs r10, r10, lsr #28
cannam@85 797 adc r5, r10, lr, lsl #4 @ r5 = bits[59..28] of windowed x4
cannam@85 798
cannam@85 799 smull r10, lr, r11, r6 @ r10..lr = (window_l[12] * x[23])
cannam@85 800 ldr r11, =WL10 @ r12 = window_l[10]
cannam@85 801 ldr r12, =WL11 @ r12 = window_l[11]
cannam@85 802 movs r10, r10, lsr #28
cannam@85 803 adc r6, r10, lr, lsl #4 @ r6 = bits[59..28] of windowed x5
cannam@85 804
cannam@85 805 smull r10, lr, r12, r7 @ r10..lr = (window_l[11] * x[24])
cannam@85 806 movs r10, r10, lsr #28
cannam@85 807 adc r7, r10, lr, lsl #4 @ r7 = bits[59..28] of windowed x6
cannam@85 808
cannam@85 809 smull r10, lr, r11, r8 @ r10..lr = (window_l[10] * x[25])
cannam@85 810 ldr r12, =WL9 @ r12 = window_l[9]
cannam@85 811 movs r10, r10, lsr #28
cannam@85 812 adc r8, r10, lr, lsl #4 @ r8 = bits[59..28] of windowed x7
cannam@85 813
cannam@85 814 smull r10, lr, r12, r9 @ r10..lr = (window_l[9] * x[26])
cannam@85 815
cannam@85 816 movs r10, r10, lsr #28
cannam@85 817 adc r9, r10, lr, lsl #4 @ r9 = bits[59..28] of windowed x8
cannam@85 818
cannam@85 819 stmia r1, { r0, r2 - r9 } @ store windowed x[18] .. x[26]
cannam@85 820
cannam@85 821 @----
cannam@85 822 @ NB there are 2 possible exits from this function - this is only one of them
cannam@85 823 @----
cannam@85 824
cannam@85 825 add sp, sp, #(21*4) @ return stack frame
cannam@85 826 ldmia sp!, { r4 - r11, pc } @ restore callee saved regs, and return
cannam@85 827
cannam@85 828 @----
cannam@85 829
cannam@85 830
cannam@85 831 stop_block_x0_to_x17:
cannam@85 832
cannam@85 833 @ r0 = x0
cannam@85 834 @ r1 = &x[9]
cannam@85 835 @ r2 = x1
cannam@85 836 @ r3 = x2
cannam@85 837 @ r4 = x3
cannam@85 838 @ r5 = x4
cannam@85 839 @ r6 = x5
cannam@85 840 @ r7 = x6
cannam@85 841 @ r8 = x7
cannam@85 842 @ r9 = x8
cannam@85 843 @ r10 = -x0
cannam@85 844 @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block)
cannam@85 845 @ r12 = .
cannam@85 846 @ lr = .
cannam@85 847
cannam@85 848 rsb r0, r6, #0 @ r0 = -x5
cannam@85 849 rsb r6, r2, #0 @ r6 = -x1
cannam@85 850 rsb r2, r5, #0 @ r2 = -x4
cannam@85 851 rsb r5, r3, #0 @ r5 = -x2
cannam@85 852 rsb r3, r4, #0 @ r3 = -x3
cannam@85 853
cannam@85 854 add r1, r1, #(3*4) @ r1 = &x[12]
cannam@85 855 stmia r1, { r0, r2, r3, r5, r6, r10 } @ store unchanged x[12] .. x[17]
cannam@85 856
cannam@85 857 ldr r0, =WL1 @ r0 = window_l[1] == window_s[0]
cannam@85 858
cannam@85 859 rsb r10, r9, #0 @ r10 = -x8
cannam@85 860 rsb r12, r8, #0 @ r12 = -x7
cannam@85 861 rsb lr, r7, #0 @ lr = -x6
cannam@85 862
cannam@85 863 @ r0 = WL1
cannam@85 864 @ r1 = &x[12]
cannam@85 865 @ r2 = .
cannam@85 866 @ r3 = .
cannam@85 867 @ r4 = .
cannam@85 868 @ r5 = .
cannam@85 869 @ r6 = .
cannam@85 870 @ r7 = x6
cannam@85 871 @ r8 = x7
cannam@85 872 @ r9 = x8
cannam@85 873 @ r10 = -x8
cannam@85 874 @ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block)
cannam@85 875 @ r12 = -x7
cannam@85 876 @ lr = -x6
cannam@85 877
cannam@85 878 smull r5, r6, r0, r7 @ r5..r6 = (window_l[1] * x[6])
cannam@85 879 ldr r2, =WL4 @ r2 = window_l[4] == window_s[1]
cannam@85 880 movs r5, r5, lsr #28
cannam@85 881 adc r7, r5, r6, lsl #4 @ r7 = bits[59..28] of windowed x6
cannam@85 882
cannam@85 883 smull r5, r6, r2, r8 @ r5..r6 = (window_l[4] * x[7])
cannam@85 884 ldr r3, =WL7 @ r3 = window_l[7] == window_s[2]
cannam@85 885 movs r5, r5, lsr #28
cannam@85 886 adc r8, r5, r6, lsl #4 @ r8 = bits[59..28] of windowed x7
cannam@85 887
cannam@85 888 smull r5, r6, r3, r9 @ r5..r6 = (window_l[7] * x[8])
cannam@85 889 ldr r4, =WL10 @ r4 = window_l[10] == window_s[3]
cannam@85 890 movs r5, r5, lsr #28
cannam@85 891 adc r9, r5, r6, lsl #4 @ r9 = bits[59..28] of windowed x8
cannam@85 892
cannam@85 893 smull r5, r6, r4, r10 @ r5..r6 = (window_l[10] * (x[9] == -x[8]))
cannam@85 894 ldr r0, =WL13 @ r0 = window_l[13] == window_s[4]
cannam@85 895 movs r5, r5, lsr #28
cannam@85 896 adc r10, r5, r6, lsl #4 @ r10 = bits[59..28] of windowed x9
cannam@85 897
cannam@85 898 smull r5, r6, r0, r12 @ r5..r6 = (window_l[13] * (x[10] == -x[7]))
cannam@85 899 ldr r2, =WL16 @ r2 = window_l[16] == window_s[5]
cannam@85 900 movs r5, r5, lsr #28
cannam@85 901 adc r12, r5, r6, lsl #4 @ r10 = bits[59..28] of windowed x9
cannam@85 902
cannam@85 903 smull r5, r6, r2, lr @ r5..r6 = (window_l[16] * (x[11] == -x[6]))
cannam@85 904
cannam@85 905 ldr r0, =0x00
cannam@85 906
cannam@85 907 movs r5, r5, lsr #28
cannam@85 908 adc lr, r5, r6, lsl #4 @ r10 = bits[59..28] of windowed x9
cannam@85 909
cannam@85 910 stmdb r1!, { r7 - r10, r12, lr } @ store windowed x[6] .. x[11]
cannam@85 911
cannam@85 912 ldr r5, =0x00
cannam@85 913 ldr r6, =0x00
cannam@85 914 ldr r2, =0x00
cannam@85 915 ldr r3, =0x00
cannam@85 916 ldr r4, =0x00
cannam@85 917
cannam@85 918 stmdb r1!, { r0, r2 - r6 } @ store windowed x[0] .. x[5]
cannam@85 919
cannam@85 920 b normal_block_x18_to_x35
cannam@85 921
cannam@85 922
cannam@85 923 @----
cannam@85 924
cannam@85 925
cannam@85 926 start_block_x18_to_x35:
cannam@85 927
cannam@85 928 ldr r4, =WL1 @ r0 = window_l[1] == window_s[0]
cannam@85 929
cannam@85 930 add r1, r1, #(24*4) @ r1 = &x[24]
cannam@85 931
cannam@85 932 ldmia r1, { r0, r2, r3 } @ load 3 words from x24, dont update pointer
cannam@85 933
cannam@85 934 @ r0 = x24
cannam@85 935 @ r1 = &x[24]
cannam@85 936 @ r2 = x25
cannam@85 937 @ r3 = x26
cannam@85 938 @ r4 = WL1
cannam@85 939 @ r5 = WL4
cannam@85 940 @ r6 = WL7
cannam@85 941 @ r7 = WL10
cannam@85 942 @ r8 = WL13
cannam@85 943 @ r9 = WL16
cannam@85 944 @ r10 = .
cannam@85 945 @ r11 = .
cannam@85 946 @ r12 = .
cannam@85 947 @ lr = .
cannam@85 948
cannam@85 949 ldr r5, =WL4 @ r5 = window_l[4] == window_s[1]
cannam@85 950
cannam@85 951 smull r10, r11, r4, r0 @ r10..r11 = (window_l[1] * (x[24] == x[29]))
cannam@85 952 ldr r6, =WL7 @ r6 = window_l[7] == window_s[2]
cannam@85 953 movs r10, r10, lsr #28
cannam@85 954 adc lr, r10, r11, lsl #4 @ lr = bits[59..28] of windowed x29
cannam@85 955
cannam@85 956 smull r10, r11, r5, r2 @ r10..r11 = (window_l[4] * (x[25] == x[28]))
cannam@85 957 ldr r7, =WL10 @ r7 = window_l[10] == window_s[3]
cannam@85 958 movs r10, r10, lsr #28
cannam@85 959 adc r12, r10, r11, lsl #4 @ r12 = bits[59..28] of windowed x28
cannam@85 960
cannam@85 961 smull r10, r11, r6, r3 @ r10..r11 = (window_l[7] * (x[26] == x[27]))
cannam@85 962 ldr r8, =WL13 @ r8 = window_l[13] == window_s[4]
cannam@85 963 movs r10, r10, lsr #28
cannam@85 964 adc r4, r10, r11, lsl #4 @ r4 = bits[59..28] of windowed x27
cannam@85 965
cannam@85 966 smull r10, r11, r7, r3 @ r10..r11 = (window_l[10] * x[26])
cannam@85 967 ldr r9, =WL16 @ r9 = window_l[16] == window_s[5]
cannam@85 968 movs r10, r10, lsr #28
cannam@85 969 adc r3, r10, r11, lsl #4 @ r3 = bits[59..28] of windowed x26
cannam@85 970
cannam@85 971 smull r10, r11, r8, r2 @ r10..r11 = (window_l[13] * x[25])
cannam@85 972 ldr r5, =0x00
cannam@85 973 movs r10, r10, lsr #28
cannam@85 974 adc r2, r10, r11, lsl #4 @ r2 = bits[59..28] of windowed x25
cannam@85 975
cannam@85 976 smull r10, r11, r9, r0 @ r10..r11 = (window_l[16] * x[24])
cannam@85 977 ldr r6, =0x00
cannam@85 978 movs r10, r10, lsr #28
cannam@85 979 adc r0, r10, r11, lsl #4 @ r0 = bits[59..28] of windowed x24
cannam@85 980
cannam@85 981 stmia r1!, { r0, r2, r3, r4, r12, lr } @ store windowed x[24] .. x[29]
cannam@85 982
cannam@85 983 ldr r7, =0x00
cannam@85 984 ldr r8, =0x00
cannam@85 985 ldr r9, =0x00
cannam@85 986 ldr r10, =0x00
cannam@85 987
cannam@85 988 stmia r1!, { r5 - r10 } @ store windowed x[30] .. x[35]
cannam@85 989
cannam@85 990 @----
cannam@85 991 @ NB there are 2 possible exits from this function - this is only one of them
cannam@85 992 @----
cannam@85 993
cannam@85 994 add sp, sp, #(21*4) @ return stack frame
cannam@85 995 ldmia sp!, { r4 - r11, pc } @ restore callee saved regs, and return
cannam@85 996
cannam@85 997 @----
cannam@85 998 @END
cannam@85 999 @----
cannam@85 1000